From 2f2e2663cef9205720d8b9ae153af2ed179fb64d Mon Sep 17 00:00:00 2001 From: Ge Li <77590974+GeLi2001@users.noreply.github.com> Date: Tue, 12 Aug 2025 12:14:50 -0700 Subject: [PATCH 01/15] feat: openai-js url path --- .../src/instrumentation.ts | 14 +++++++++++++- js/pnpm-lock.yaml | 19 +++++++++++++++++++ 2 files changed, 32 insertions(+), 1 deletion(-) diff --git a/js/packages/openinference-instrumentation-openai/src/instrumentation.ts b/js/packages/openinference-instrumentation-openai/src/instrumentation.ts index d2bead6188..b6ab5bca69 100644 --- a/js/packages/openinference-instrumentation-openai/src/instrumentation.ts +++ b/js/packages/openinference-instrumentation-openai/src/instrumentation.ts @@ -40,7 +40,8 @@ import { EmbeddingCreateParams, } from "openai/resources"; import { assertUnreachable, isString } from "./typeUtils"; -import { isTracingSuppressed } from "@opentelemetry/core"; +import { isTracingSuppressed } from "@opentelemetry/core"; + import { OITracer, @@ -70,6 +71,8 @@ const INSTRUMENTATION_NAME = "@arizeai/openinference-instrumentation-openai"; */ let _isOpenInferencePatched = false; + + /** * function to check if instrumentation is enabled / disabled */ @@ -95,6 +98,8 @@ function getExecContext(span: Span) { return execContext; } + + /** * Gets the appropriate LLM provider based on the OpenAI client instance * Follows the same logic as the Python implementation by checking the baseURL host @@ -256,6 +261,8 @@ export class OpenAIInstrumentation extends InstrumentationBase { // eslint-disable-next-line @typescript-eslint/no-this-alias const instrumentation: OpenAIInstrumentation = this; + + // Patch create chat completions type ChatCompletionCreateType = typeof module.OpenAI.Chat.Completions.prototype.create; @@ -287,6 +294,7 @@ export class OpenAIInstrumentation extends InstrumentationBase { [SemanticConventions.LLM_PROVIDER]: getLLMProvider(this), ...getLLMInputMessagesAttributes(body), ...getLLMToolsJSONSchema(body), + }, }, ); @@ -375,6 +383,7 @@ export class OpenAIInstrumentation extends InstrumentationBase { [SemanticConventions.LLM_SYSTEM]: LLMSystem.OPENAI, [SemanticConventions.LLM_PROVIDER]: getLLMProvider(this), ...getCompletionInputValueAndMimeType(body), + }, }, ); @@ -454,6 +463,7 @@ export class OpenAIInstrumentation extends InstrumentationBase { : MimeType.JSON, [SemanticConventions.LLM_PROVIDER]: getLLMProvider(this), ...getEmbeddingTextAttributes(body), + }, }); const execContext = getExecContext(span); @@ -528,6 +538,7 @@ export class OpenAIInstrumentation extends InstrumentationBase { [SemanticConventions.LLM_PROVIDER]: getLLMProvider(this), ...getResponsesInputMessagesAttributes(body), ...getLLMToolsJSONSchema(body), + }, }, ); @@ -614,6 +625,7 @@ export class OpenAIInstrumentation extends InstrumentationBase { moduleVersion?: string, ) { diag.debug(`Removing patch for ${MODULE_NAME}@${moduleVersion}`); + this._unwrap(moduleExports.OpenAI.Chat.Completions.prototype, "create"); this._unwrap(moduleExports.OpenAI.Completions.prototype, "create"); this._unwrap(moduleExports.OpenAI.Embeddings.prototype, "create"); diff --git a/js/pnpm-lock.yaml b/js/pnpm-lock.yaml index 7dae76f708..669a32a7ad 100644 --- a/js/pnpm-lock.yaml +++ b/js/pnpm-lock.yaml @@ -396,6 +396,9 @@ importers: '@opentelemetry/instrumentation': specifier: ^0.46.0 version: 0.46.0(@opentelemetry/api@1.9.0) + '@opentelemetry/instrumentation-http': + specifier: ^0.46.0 + version: 0.46.0(@opentelemetry/api@1.9.0) devDependencies: '@opentelemetry/exporter-trace-otlp-proto': specifier: ^0.50.0 @@ -1729,6 +1732,12 @@ packages: peerDependencies: '@opentelemetry/api': ^1.0.0 + '@opentelemetry/instrumentation-http@0.46.0': + resolution: {integrity: sha512-t5cxgqfV9AcxVP00/OL1ggkOSZM57VXDpvlWaOidYyyfLKcUJ9e2fGbNwoVsGFboRDeH0iFo7gLA3EEvX13wCA==} + engines: {node: '>=14'} + peerDependencies: + '@opentelemetry/api': ^1.3.0 + '@opentelemetry/instrumentation@0.46.0': resolution: {integrity: sha512-a9TijXZZbk0vI5TGLZl+0kxyFfrXHhX6Svtz7Pp2/VBlCSKrazuULEyoJQrOknJyFWNMEmbbJgOciHCCpQcisw==} engines: {node: '>=14'} @@ -7635,6 +7644,16 @@ snapshots: '@opentelemetry/sdk-trace-base': 1.30.1(@opentelemetry/api@1.9.0) '@opentelemetry/semantic-conventions': 1.28.0 + '@opentelemetry/instrumentation-http@0.46.0(@opentelemetry/api@1.9.0)': + dependencies: + '@opentelemetry/api': 1.9.0 + '@opentelemetry/core': 1.19.0(@opentelemetry/api@1.9.0) + '@opentelemetry/instrumentation': 0.46.0(@opentelemetry/api@1.9.0) + '@opentelemetry/semantic-conventions': 1.19.0 + semver: 7.7.1 + transitivePeerDependencies: + - supports-color + '@opentelemetry/instrumentation@0.46.0(@opentelemetry/api@1.9.0)': dependencies: '@opentelemetry/api': 1.9.0 From 882f78a676f07f42797726cdf819865ce8fdb96c Mon Sep 17 00:00:00 2001 From: Ge Li <77590974+GeLi2001@users.noreply.github.com> Date: Tue, 12 Aug 2025 12:39:35 -0700 Subject: [PATCH 02/15] wip --- .../package.json | 3 +- .../src/instrumentation.ts | 132 +++++++++++++++++- 2 files changed, 127 insertions(+), 8 deletions(-) diff --git a/js/packages/openinference-instrumentation-openai/package.json b/js/packages/openinference-instrumentation-openai/package.json index b595817d0f..b05b6028fb 100644 --- a/js/packages/openinference-instrumentation-openai/package.json +++ b/js/packages/openinference-instrumentation-openai/package.json @@ -30,7 +30,8 @@ "@arizeai/openinference-semantic-conventions": "workspace:*", "@opentelemetry/api": "^1.9.0", "@opentelemetry/core": "^1.25.1", - "@opentelemetry/instrumentation": "^0.46.0" + "@opentelemetry/instrumentation": "^0.46.0", + "@opentelemetry/instrumentation-http": "^0.46.0" }, "keywords": [], "files": [ diff --git a/js/packages/openinference-instrumentation-openai/src/instrumentation.ts b/js/packages/openinference-instrumentation-openai/src/instrumentation.ts index b6ab5bca69..afbe29f9b5 100644 --- a/js/packages/openinference-instrumentation-openai/src/instrumentation.ts +++ b/js/packages/openinference-instrumentation-openai/src/instrumentation.ts @@ -40,7 +40,9 @@ import { EmbeddingCreateParams, } from "openai/resources"; import { assertUnreachable, isString } from "./typeUtils"; -import { isTracingSuppressed } from "@opentelemetry/core"; +import { isTracingSuppressed } from "@opentelemetry/core"; + +import { getAbsoluteUrl } from "@opentelemetry/instrumentation-http/build/src/utils"; import { @@ -71,6 +73,12 @@ const INSTRUMENTATION_NAME = "@arizeai/openinference-instrumentation-openai"; */ let _isOpenInferencePatched = false; +/** + * WeakMap to store URL information for each request context + * This allows us to correlate the URL captured in the post method with the higher-level API calls + */ +const requestUrlMap = new WeakMap(); + /** @@ -98,7 +106,78 @@ function getExecContext(span: Span) { return execContext; } +/** + * Extracts URL path for debugging purposes (especially useful for Azure) + * Uses getAbsoluteUrl for proper redaction of sensitive information + * @param fullUrl The complete URL of the request + * @param baseUrl The base URL of the client + * @returns Object containing URL path for debugging + */ +function getUrlAttributes(fullUrl: string, baseUrl?: string): Record { + const attributes: Record = {}; + + try { + const url = new URL(fullUrl); + + // Use OpenTelemetry's getAbsoluteUrl for proper redaction of sensitive information + // Convert URL to the format expected by getAbsoluteUrl + const urlOptions = { + protocol: url.protocol, + hostname: url.hostname, + port: url.port || undefined, + path: url.pathname + url.search, + auth: url.username && url.password ? `${url.username}:${url.password}` : undefined, + }; + + // Get properly redacted full URL using OpenTelemetry utilities + const redactedUrl = getAbsoluteUrl(urlOptions, {}); + + // Extract the path (URL - baseURL) as requested: path = full - base_url + if (baseUrl) { + try { + const path = fullUrl.replace(baseUrl.replace(/\/$/, ''), '') || url.pathname; + // Use a simple custom attribute for the path (useful for Azure debugging) + attributes["url.path"] = path; + } catch { + // If baseURL parsing fails, use the pathname + attributes["url.path"] = url.pathname; + } + } else { + attributes["url.path"] = url.pathname; + } + + // Safely extract api_version query parameter for Azure + if (url.search) { + const queryParams = new URLSearchParams(url.search); + const apiVersion = queryParams.get("api-version"); + if (apiVersion) { + attributes["url.query.api_version"] = apiVersion; + } + } + } catch (error) { + diag.debug("Failed to extract URL attributes", error); + } + + return attributes; +} +/** + * Gets URL attributes for a client instance from stored request information + * @param clientInstance The OpenAI client instance + * @returns URL attributes object using OpenTelemetry conventions + */ +function getStoredUrlAttributes(clientInstance: unknown): Record { + try { + const instance = clientInstance as object; + const urlInfo = requestUrlMap.get(instance); + if (urlInfo) { + return getUrlAttributes(urlInfo.url, urlInfo.baseUrl); + } + } catch (error) { + diag.debug("Failed to get stored URL attributes", error); + } + return {}; +} /** * Gets the appropriate LLM provider based on the OpenAI client instance @@ -261,7 +340,46 @@ export class OpenAIInstrumentation extends InstrumentationBase { // eslint-disable-next-line @typescript-eslint/no-this-alias const instrumentation: OpenAIInstrumentation = this; - + // Patch the post method to capture URL information + this._wrap( + module.OpenAI.prototype, + "post", + // eslint-disable-next-line @typescript-eslint/no-explicit-any + (original: any): any => { + return function patchedPost( + this: unknown, + path: string, + // eslint-disable-next-line @typescript-eslint/no-explicit-any + body?: any, + // eslint-disable-next-line @typescript-eslint/no-explicit-any + options?: any + ) { + // Store URL information for this request context + try { + const clientInstance = this as { + baseURL?: string; + _client?: { baseURL?: string }; + }; + + let baseUrl: string | undefined; + if (clientInstance.baseURL && typeof clientInstance.baseURL === "string") { + baseUrl = clientInstance.baseURL; + } else if (clientInstance._client?.baseURL && typeof clientInstance._client.baseURL === "string") { + baseUrl = clientInstance._client.baseURL; + } + + if (baseUrl && this) { + const fullUrl = new URL(path, baseUrl).toString(); + requestUrlMap.set(this as object, { url: fullUrl, baseUrl }); + } + } catch (error) { + diag.debug("Failed to capture URL information in post method", error); + } + + return original.apply(this, [path, body, options]); + }; + } + ); // Patch create chat completions type ChatCompletionCreateType = @@ -294,7 +412,7 @@ export class OpenAIInstrumentation extends InstrumentationBase { [SemanticConventions.LLM_PROVIDER]: getLLMProvider(this), ...getLLMInputMessagesAttributes(body), ...getLLMToolsJSONSchema(body), - + ...getStoredUrlAttributes(this), }, }, ); @@ -383,7 +501,7 @@ export class OpenAIInstrumentation extends InstrumentationBase { [SemanticConventions.LLM_SYSTEM]: LLMSystem.OPENAI, [SemanticConventions.LLM_PROVIDER]: getLLMProvider(this), ...getCompletionInputValueAndMimeType(body), - + ...getStoredUrlAttributes(this), }, }, ); @@ -463,7 +581,7 @@ export class OpenAIInstrumentation extends InstrumentationBase { : MimeType.JSON, [SemanticConventions.LLM_PROVIDER]: getLLMProvider(this), ...getEmbeddingTextAttributes(body), - + ...getStoredUrlAttributes(this), }, }); const execContext = getExecContext(span); @@ -538,7 +656,7 @@ export class OpenAIInstrumentation extends InstrumentationBase { [SemanticConventions.LLM_PROVIDER]: getLLMProvider(this), ...getResponsesInputMessagesAttributes(body), ...getLLMToolsJSONSchema(body), - + ...getStoredUrlAttributes(this), }, }, ); @@ -625,7 +743,7 @@ export class OpenAIInstrumentation extends InstrumentationBase { moduleVersion?: string, ) { diag.debug(`Removing patch for ${MODULE_NAME}@${moduleVersion}`); - + this._unwrap(moduleExports.OpenAI.prototype, "post"); this._unwrap(moduleExports.OpenAI.Chat.Completions.prototype, "create"); this._unwrap(moduleExports.OpenAI.Completions.prototype, "create"); this._unwrap(moduleExports.OpenAI.Embeddings.prototype, "create"); From b19770abfa51fb90650f1ca22269c33e56f1b749 Mon Sep 17 00:00:00 2001 From: Ge Li <77590974+GeLi2001@users.noreply.github.com> Date: Tue, 12 Aug 2025 13:25:53 -0700 Subject: [PATCH 03/15] wip --- .../src/instrumentation.ts | 59 ++++++++++++------- 1 file changed, 37 insertions(+), 22 deletions(-) diff --git a/js/packages/openinference-instrumentation-openai/src/instrumentation.ts b/js/packages/openinference-instrumentation-openai/src/instrumentation.ts index afbe29f9b5..2c433a0f84 100644 --- a/js/packages/openinference-instrumentation-openai/src/instrumentation.ts +++ b/js/packages/openinference-instrumentation-openai/src/instrumentation.ts @@ -42,8 +42,7 @@ import { import { assertUnreachable, isString } from "./typeUtils"; import { isTracingSuppressed } from "@opentelemetry/core"; -import { getAbsoluteUrl } from "@opentelemetry/instrumentation-http/build/src/utils"; - +import { getAbsoluteUrl } from "@opentelemetry/instrumentation-http/build/src/utils"; import { OITracer, @@ -79,8 +78,6 @@ let _isOpenInferencePatched = false; */ const requestUrlMap = new WeakMap(); - - /** * function to check if instrumentation is enabled / disabled */ @@ -113,12 +110,15 @@ function getExecContext(span: Span) { * @param baseUrl The base URL of the client * @returns Object containing URL path for debugging */ -function getUrlAttributes(fullUrl: string, baseUrl?: string): Record { +function getUrlAttributes( + fullUrl: string, + baseUrl?: string, +): Record { const attributes: Record = {}; - + try { const url = new URL(fullUrl); - + // Use OpenTelemetry's getAbsoluteUrl for proper redaction of sensitive information // Convert URL to the format expected by getAbsoluteUrl const urlOptions = { @@ -126,16 +126,20 @@ function getUrlAttributes(fullUrl: string, baseUrl?: string): Record { +function getStoredUrlAttributes( + clientInstance: unknown, +): Record { try { const instance = clientInstance as object; const urlInfo = requestUrlMap.get(instance); @@ -352,7 +358,7 @@ export class OpenAIInstrumentation extends InstrumentationBase { // eslint-disable-next-line @typescript-eslint/no-explicit-any body?: any, // eslint-disable-next-line @typescript-eslint/no-explicit-any - options?: any + options?: any, ) { // Store URL information for this request context try { @@ -360,25 +366,34 @@ export class OpenAIInstrumentation extends InstrumentationBase { baseURL?: string; _client?: { baseURL?: string }; }; - + let baseUrl: string | undefined; - if (clientInstance.baseURL && typeof clientInstance.baseURL === "string") { + if ( + clientInstance.baseURL && + typeof clientInstance.baseURL === "string" + ) { baseUrl = clientInstance.baseURL; - } else if (clientInstance._client?.baseURL && typeof clientInstance._client.baseURL === "string") { + } else if ( + clientInstance._client?.baseURL && + typeof clientInstance._client.baseURL === "string" + ) { baseUrl = clientInstance._client.baseURL; } - + if (baseUrl && this) { const fullUrl = new URL(path, baseUrl).toString(); requestUrlMap.set(this as object, { url: fullUrl, baseUrl }); } } catch (error) { - diag.debug("Failed to capture URL information in post method", error); + diag.debug( + "Failed to capture URL information in post method", + error, + ); } - + return original.apply(this, [path, body, options]); }; - } + }, ); // Patch create chat completions From 4f6320eee4ec7dece22320e6166396db3137711d Mon Sep 17 00:00:00 2001 From: Ge Li <77590974+GeLi2001@users.noreply.github.com> Date: Tue, 12 Aug 2025 13:30:24 -0700 Subject: [PATCH 04/15] wi[ --- .../src/instrumentation.ts | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/js/packages/openinference-instrumentation-openai/src/instrumentation.ts b/js/packages/openinference-instrumentation-openai/src/instrumentation.ts index 2c433a0f84..3923a93a4a 100644 --- a/js/packages/openinference-instrumentation-openai/src/instrumentation.ts +++ b/js/packages/openinference-instrumentation-openai/src/instrumentation.ts @@ -174,8 +174,16 @@ function getStoredUrlAttributes( clientInstance: unknown, ): Record { try { - const instance = clientInstance as object; - const urlInfo = requestUrlMap.get(instance); + const instance = clientInstance as { _client?: object }; + + // Try to get URL info using the sub-resource instance first + let urlInfo = requestUrlMap.get(instance as object); + + // If not found and there's a _client property, try that (this is the actual OpenAI client) + if (!urlInfo && instance._client) { + urlInfo = requestUrlMap.get(instance._client); + } + if (urlInfo) { return getUrlAttributes(urlInfo.url, urlInfo.baseUrl); } From fadb10489940b57ea7c7855f9f390dcdacc0b4eb Mon Sep 17 00:00:00 2001 From: Ge Li <77590974+GeLi2001@users.noreply.github.com> Date: Tue, 12 Aug 2025 13:32:41 -0700 Subject: [PATCH 05/15] wip --- .../src/instrumentation.ts | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/js/packages/openinference-instrumentation-openai/src/instrumentation.ts b/js/packages/openinference-instrumentation-openai/src/instrumentation.ts index 3923a93a4a..5bfa3ed2a5 100644 --- a/js/packages/openinference-instrumentation-openai/src/instrumentation.ts +++ b/js/packages/openinference-instrumentation-openai/src/instrumentation.ts @@ -175,15 +175,15 @@ function getStoredUrlAttributes( ): Record { try { const instance = clientInstance as { _client?: object }; - + // Try to get URL info using the sub-resource instance first let urlInfo = requestUrlMap.get(instance as object); - + // If not found and there's a _client property, try that (this is the actual OpenAI client) if (!urlInfo && instance._client) { urlInfo = requestUrlMap.get(instance._client); } - + if (urlInfo) { return getUrlAttributes(urlInfo.url, urlInfo.baseUrl); } @@ -435,7 +435,6 @@ export class OpenAIInstrumentation extends InstrumentationBase { [SemanticConventions.LLM_PROVIDER]: getLLMProvider(this), ...getLLMInputMessagesAttributes(body), ...getLLMToolsJSONSchema(body), - ...getStoredUrlAttributes(this), }, }, ); @@ -473,6 +472,8 @@ export class OpenAIInstrumentation extends InstrumentationBase { [SemanticConventions.LLM_MODEL_NAME]: result.model, ...getChatCompletionLLMOutputMessagesAttributes(result), ...getUsageAttributes(result), + // Add URL attributes now that the request has completed + ...getStoredUrlAttributes(this), }); span.setStatus({ code: SpanStatusCode.OK }); span.end(); @@ -524,7 +525,6 @@ export class OpenAIInstrumentation extends InstrumentationBase { [SemanticConventions.LLM_SYSTEM]: LLMSystem.OPENAI, [SemanticConventions.LLM_PROVIDER]: getLLMProvider(this), ...getCompletionInputValueAndMimeType(body), - ...getStoredUrlAttributes(this), }, }, ); @@ -560,6 +560,8 @@ export class OpenAIInstrumentation extends InstrumentationBase { [SemanticConventions.LLM_MODEL_NAME]: result.model, ...getCompletionOutputValueAndMimeType(result), ...getUsageAttributes(result), + // Add URL attributes now that the request has completed + ...getStoredUrlAttributes(this), }); span.setStatus({ code: SpanStatusCode.OK }); span.end(); @@ -604,7 +606,6 @@ export class OpenAIInstrumentation extends InstrumentationBase { : MimeType.JSON, [SemanticConventions.LLM_PROVIDER]: getLLMProvider(this), ...getEmbeddingTextAttributes(body), - ...getStoredUrlAttributes(this), }, }); const execContext = getExecContext(span); @@ -632,6 +633,8 @@ export class OpenAIInstrumentation extends InstrumentationBase { span.setAttributes({ // Do not record the output data as it can be large ...getEmbeddingEmbeddingsAttributes(result), + // Add URL attributes now that the request has completed + ...getStoredUrlAttributes(this), }); } span.setStatus({ code: SpanStatusCode.OK }); @@ -679,7 +682,6 @@ export class OpenAIInstrumentation extends InstrumentationBase { [SemanticConventions.LLM_PROVIDER]: getLLMProvider(this), ...getResponsesInputMessagesAttributes(body), ...getLLMToolsJSONSchema(body), - ...getStoredUrlAttributes(this), }, }, ); @@ -718,6 +720,8 @@ export class OpenAIInstrumentation extends InstrumentationBase { [SemanticConventions.LLM_MODEL_NAME]: result.model, ...getResponsesOutputMessagesAttributes(result), ...getResponsesUsageAttributes(result), + // Add URL attributes now that the request has completed + ...getStoredUrlAttributes(this), }); span.setStatus({ code: SpanStatusCode.OK }); span.end(); From 1ae0a8475e9d13f0fec1a7b1e4b58a5f40b99d09 Mon Sep 17 00:00:00 2001 From: Ge Li <77590974+GeLi2001@users.noreply.github.com> Date: Tue, 12 Aug 2025 13:49:13 -0700 Subject: [PATCH 06/15] cleanup --- .../src/instrumentation.ts | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/js/packages/openinference-instrumentation-openai/src/instrumentation.ts b/js/packages/openinference-instrumentation-openai/src/instrumentation.ts index 5bfa3ed2a5..12614cae05 100644 --- a/js/packages/openinference-instrumentation-openai/src/instrumentation.ts +++ b/js/packages/openinference-instrumentation-openai/src/instrumentation.ts @@ -132,8 +132,7 @@ function getUrlAttributes( : undefined, }; - // Get properly redacted full URL using OpenTelemetry utilities - const redactedUrl = getAbsoluteUrl(urlOptions, {}); + // Note: We could use getAbsoluteUrl(urlOptions, {}) for redacted full URL if needed // Extract the path (URL - baseURL) as requested: path = full - base_url if (baseUrl) { From cb195e4983aeb4d972edef6586646f022448d7b9 Mon Sep 17 00:00:00 2001 From: Ge Li <77590974+GeLi2001@users.noreply.github.com> Date: Tue, 12 Aug 2025 13:52:05 -0700 Subject: [PATCH 07/15] wip --- .../package.json | 3 +-- .../src/instrumentation.ts | 17 +---------------- 2 files changed, 2 insertions(+), 18 deletions(-) diff --git a/js/packages/openinference-instrumentation-openai/package.json b/js/packages/openinference-instrumentation-openai/package.json index b05b6028fb..b595817d0f 100644 --- a/js/packages/openinference-instrumentation-openai/package.json +++ b/js/packages/openinference-instrumentation-openai/package.json @@ -30,8 +30,7 @@ "@arizeai/openinference-semantic-conventions": "workspace:*", "@opentelemetry/api": "^1.9.0", "@opentelemetry/core": "^1.25.1", - "@opentelemetry/instrumentation": "^0.46.0", - "@opentelemetry/instrumentation-http": "^0.46.0" + "@opentelemetry/instrumentation": "^0.46.0" }, "keywords": [], "files": [ diff --git a/js/packages/openinference-instrumentation-openai/src/instrumentation.ts b/js/packages/openinference-instrumentation-openai/src/instrumentation.ts index 12614cae05..745e7113c8 100644 --- a/js/packages/openinference-instrumentation-openai/src/instrumentation.ts +++ b/js/packages/openinference-instrumentation-openai/src/instrumentation.ts @@ -42,8 +42,6 @@ import { import { assertUnreachable, isString } from "./typeUtils"; import { isTracingSuppressed } from "@opentelemetry/core"; -import { getAbsoluteUrl } from "@opentelemetry/instrumentation-http/build/src/utils"; - import { OITracer, safelyJSONStringify, @@ -119,20 +117,7 @@ function getUrlAttributes( try { const url = new URL(fullUrl); - // Use OpenTelemetry's getAbsoluteUrl for proper redaction of sensitive information - // Convert URL to the format expected by getAbsoluteUrl - const urlOptions = { - protocol: url.protocol, - hostname: url.hostname, - port: url.port || undefined, - path: url.pathname + url.search, - auth: - url.username && url.password - ? `${url.username}:${url.password}` - : undefined, - }; - - // Note: We could use getAbsoluteUrl(urlOptions, {}) for redacted full URL if needed + // Extract URL components for debugging (path and api_version only) // Extract the path (URL - baseURL) as requested: path = full - base_url if (baseUrl) { From f0b2cf5abba723f1f4bed7695275b98890d4bcce Mon Sep 17 00:00:00 2001 From: Ge Li <77590974+GeLi2001@users.noreply.github.com> Date: Tue, 12 Aug 2025 13:55:18 -0700 Subject: [PATCH 08/15] wip --- .../src/instrumentation.ts | 43 +++++++++---------- 1 file changed, 20 insertions(+), 23 deletions(-) diff --git a/js/packages/openinference-instrumentation-openai/src/instrumentation.ts b/js/packages/openinference-instrumentation-openai/src/instrumentation.ts index 745e7113c8..27985bc0c5 100644 --- a/js/packages/openinference-instrumentation-openai/src/instrumentation.ts +++ b/js/packages/openinference-instrumentation-openai/src/instrumentation.ts @@ -72,7 +72,7 @@ let _isOpenInferencePatched = false; /** * WeakMap to store URL information for each request context - * This allows us to correlate the URL captured in the post method with the higher-level API calls + * Uses the actual request arguments as the key to avoid concurrent request overwrites */ const requestUrlMap = new WeakMap(); @@ -150,26 +150,19 @@ function getUrlAttributes( } /** - * Gets URL attributes for a client instance from stored request information - * @param clientInstance The OpenAI client instance - * @returns URL attributes object using OpenTelemetry conventions + * Gets URL attributes for a request from stored request information + * @param requestBody The request body used as a unique key for this request + * @returns URL attributes object */ function getStoredUrlAttributes( - clientInstance: unknown, + requestBody: unknown, ): Record { try { - const instance = clientInstance as { _client?: object }; - - // Try to get URL info using the sub-resource instance first - let urlInfo = requestUrlMap.get(instance as object); - - // If not found and there's a _client property, try that (this is the actual OpenAI client) - if (!urlInfo && instance._client) { - urlInfo = requestUrlMap.get(instance._client); - } - - if (urlInfo) { - return getUrlAttributes(urlInfo.url, urlInfo.baseUrl); + if (requestBody && typeof requestBody === 'object') { + const urlInfo = requestUrlMap.get(requestBody as object); + if (urlInfo) { + return getUrlAttributes(urlInfo.url, urlInfo.baseUrl); + } } } catch (error) { diag.debug("Failed to get stored URL attributes", error); @@ -352,7 +345,7 @@ export class OpenAIInstrumentation extends InstrumentationBase { // eslint-disable-next-line @typescript-eslint/no-explicit-any options?: any, ) { - // Store URL information for this request context + // Store URL information for this specific request try { const clientInstance = this as { baseURL?: string; @@ -372,9 +365,13 @@ export class OpenAIInstrumentation extends InstrumentationBase { baseUrl = clientInstance._client.baseURL; } - if (baseUrl && this) { + if (baseUrl) { const fullUrl = new URL(path, baseUrl).toString(); - requestUrlMap.set(this as object, { url: fullUrl, baseUrl }); + // Use the request body as a unique key for this specific request + // This avoids concurrent requests overwriting each other + if (body && typeof body === 'object') { + requestUrlMap.set(body, { url: fullUrl, baseUrl }); + } } } catch (error) { diag.debug( @@ -457,7 +454,7 @@ export class OpenAIInstrumentation extends InstrumentationBase { ...getChatCompletionLLMOutputMessagesAttributes(result), ...getUsageAttributes(result), // Add URL attributes now that the request has completed - ...getStoredUrlAttributes(this), + ...getStoredUrlAttributes(body), }); span.setStatus({ code: SpanStatusCode.OK }); span.end(); @@ -545,7 +542,7 @@ export class OpenAIInstrumentation extends InstrumentationBase { ...getCompletionOutputValueAndMimeType(result), ...getUsageAttributes(result), // Add URL attributes now that the request has completed - ...getStoredUrlAttributes(this), + ...getStoredUrlAttributes(body), }); span.setStatus({ code: SpanStatusCode.OK }); span.end(); @@ -618,7 +615,7 @@ export class OpenAIInstrumentation extends InstrumentationBase { // Do not record the output data as it can be large ...getEmbeddingEmbeddingsAttributes(result), // Add URL attributes now that the request has completed - ...getStoredUrlAttributes(this), + ...getStoredUrlAttributes(body), }); } span.setStatus({ code: SpanStatusCode.OK }); From dc48f40f09c130b979435acf53792fec5c594042 Mon Sep 17 00:00:00 2001 From: Ge Li <77590974+GeLi2001@users.noreply.github.com> Date: Tue, 12 Aug 2025 13:55:36 -0700 Subject: [PATCH 09/15] prettier --- .../src/instrumentation.ts | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/js/packages/openinference-instrumentation-openai/src/instrumentation.ts b/js/packages/openinference-instrumentation-openai/src/instrumentation.ts index 27985bc0c5..5b655988c4 100644 --- a/js/packages/openinference-instrumentation-openai/src/instrumentation.ts +++ b/js/packages/openinference-instrumentation-openai/src/instrumentation.ts @@ -154,11 +154,9 @@ function getUrlAttributes( * @param requestBody The request body used as a unique key for this request * @returns URL attributes object */ -function getStoredUrlAttributes( - requestBody: unknown, -): Record { +function getStoredUrlAttributes(requestBody: unknown): Record { try { - if (requestBody && typeof requestBody === 'object') { + if (requestBody && typeof requestBody === "object") { const urlInfo = requestUrlMap.get(requestBody as object); if (urlInfo) { return getUrlAttributes(urlInfo.url, urlInfo.baseUrl); @@ -369,7 +367,7 @@ export class OpenAIInstrumentation extends InstrumentationBase { const fullUrl = new URL(path, baseUrl).toString(); // Use the request body as a unique key for this specific request // This avoids concurrent requests overwriting each other - if (body && typeof body === 'object') { + if (body && typeof body === "object") { requestUrlMap.set(body, { url: fullUrl, baseUrl }); } } From 9620b9fd8f4bccd5c7e9a8ed036516fdbd1c1ba0 Mon Sep 17 00:00:00 2001 From: Ge Li <77590974+GeLi2001@users.noreply.github.com> Date: Tue, 12 Aug 2025 14:00:53 -0700 Subject: [PATCH 10/15] wip --- .../src/instrumentation.ts | 1 - js/pnpm-lock.yaml | 19 ------------------- 2 files changed, 20 deletions(-) diff --git a/js/packages/openinference-instrumentation-openai/src/instrumentation.ts b/js/packages/openinference-instrumentation-openai/src/instrumentation.ts index 5b655988c4..30905b5b99 100644 --- a/js/packages/openinference-instrumentation-openai/src/instrumentation.ts +++ b/js/packages/openinference-instrumentation-openai/src/instrumentation.ts @@ -103,7 +103,6 @@ function getExecContext(span: Span) { /** * Extracts URL path for debugging purposes (especially useful for Azure) - * Uses getAbsoluteUrl for proper redaction of sensitive information * @param fullUrl The complete URL of the request * @param baseUrl The base URL of the client * @returns Object containing URL path for debugging diff --git a/js/pnpm-lock.yaml b/js/pnpm-lock.yaml index 669a32a7ad..7dae76f708 100644 --- a/js/pnpm-lock.yaml +++ b/js/pnpm-lock.yaml @@ -396,9 +396,6 @@ importers: '@opentelemetry/instrumentation': specifier: ^0.46.0 version: 0.46.0(@opentelemetry/api@1.9.0) - '@opentelemetry/instrumentation-http': - specifier: ^0.46.0 - version: 0.46.0(@opentelemetry/api@1.9.0) devDependencies: '@opentelemetry/exporter-trace-otlp-proto': specifier: ^0.50.0 @@ -1732,12 +1729,6 @@ packages: peerDependencies: '@opentelemetry/api': ^1.0.0 - '@opentelemetry/instrumentation-http@0.46.0': - resolution: {integrity: sha512-t5cxgqfV9AcxVP00/OL1ggkOSZM57VXDpvlWaOidYyyfLKcUJ9e2fGbNwoVsGFboRDeH0iFo7gLA3EEvX13wCA==} - engines: {node: '>=14'} - peerDependencies: - '@opentelemetry/api': ^1.3.0 - '@opentelemetry/instrumentation@0.46.0': resolution: {integrity: sha512-a9TijXZZbk0vI5TGLZl+0kxyFfrXHhX6Svtz7Pp2/VBlCSKrazuULEyoJQrOknJyFWNMEmbbJgOciHCCpQcisw==} engines: {node: '>=14'} @@ -7644,16 +7635,6 @@ snapshots: '@opentelemetry/sdk-trace-base': 1.30.1(@opentelemetry/api@1.9.0) '@opentelemetry/semantic-conventions': 1.28.0 - '@opentelemetry/instrumentation-http@0.46.0(@opentelemetry/api@1.9.0)': - dependencies: - '@opentelemetry/api': 1.9.0 - '@opentelemetry/core': 1.19.0(@opentelemetry/api@1.9.0) - '@opentelemetry/instrumentation': 0.46.0(@opentelemetry/api@1.9.0) - '@opentelemetry/semantic-conventions': 1.19.0 - semver: 7.7.1 - transitivePeerDependencies: - - supports-color - '@opentelemetry/instrumentation@0.46.0(@opentelemetry/api@1.9.0)': dependencies: '@opentelemetry/api': 1.9.0 From f1c73749dd69e82c91de92d99b890f544d384591 Mon Sep 17 00:00:00 2001 From: Ge Li <77590974+GeLi2001@users.noreply.github.com> Date: Thu, 28 Aug 2025 13:36:23 -0700 Subject: [PATCH 11/15] wip --- .../src/instrumentation.ts | 87 ++++++++++++++----- 1 file changed, 64 insertions(+), 23 deletions(-) diff --git a/js/packages/openinference-instrumentation-openai/src/instrumentation.ts b/js/packages/openinference-instrumentation-openai/src/instrumentation.ts index 30905b5b99..2aea823d42 100644 --- a/js/packages/openinference-instrumentation-openai/src/instrumentation.ts +++ b/js/packages/openinference-instrumentation-openai/src/instrumentation.ts @@ -71,10 +71,10 @@ const INSTRUMENTATION_NAME = "@arizeai/openinference-instrumentation-openai"; let _isOpenInferencePatched = false; /** - * WeakMap to store URL information for each request context - * Uses the actual request arguments as the key to avoid concurrent request overwrites + * Map to store URL information for each request using trace context + * Uses trace ID + span ID as the key to avoid concurrent request overwrites */ -const requestUrlMap = new WeakMap(); +const requestUrlMap = new Map(); /** * function to check if instrumentation is enabled / disabled @@ -121,12 +121,29 @@ function getUrlAttributes( // Extract the path (URL - baseURL) as requested: path = full - base_url if (baseUrl) { try { - const path = - fullUrl.replace(baseUrl.replace(/\/$/, ""), "") || url.pathname; - // Use a simple custom attribute for the path (useful for Azure debugging) - attributes["url.path"] = path; + const baseUrlObj = new URL(baseUrl); + const fullUrlObj = new URL(fullUrl); + + // If the hosts match, calculate the path difference + if (baseUrlObj.hostname === fullUrlObj.hostname) { + // Calculate the relative path by removing the base path from the full path + const basePath = baseUrlObj.pathname.replace(/\/$/, ""); + const fullPath = fullUrlObj.pathname; + + if (fullPath.startsWith(basePath)) { + // Remove base path to get the relative path + const relativePath = fullPath.substring(basePath.length) || "/"; + attributes["url.path"] = relativePath; + } else { + // If paths don't align, use the full path + attributes["url.path"] = fullPath; + } + } else { + // Different hosts, use pathname + attributes["url.path"] = url.pathname; + } } catch { - // If baseURL parsing fails, use the pathname + // If URL parsing fails, use the pathname attributes["url.path"] = url.pathname; } } else { @@ -150,16 +167,26 @@ function getUrlAttributes( /** * Gets URL attributes for a request from stored request information - * @param requestBody The request body used as a unique key for this request + * @param span The span to get URL attributes for * @returns URL attributes object */ -function getStoredUrlAttributes(requestBody: unknown): Record { +function getStoredUrlAttributes(span: Span): Record { try { - if (requestBody && typeof requestBody === "object") { - const urlInfo = requestUrlMap.get(requestBody as object); - if (urlInfo) { - return getUrlAttributes(urlInfo.url, urlInfo.baseUrl); - } + const spanContext = span.spanContext(); + const contextKey = `${spanContext.traceId}-${spanContext.spanId}`; + const urlInfo = requestUrlMap.get(contextKey); + if (urlInfo) { + diag.debug("Retrieved URL info from requestUrlMap", { + urlInfo, + contextKey, + }); + // Clean up after use to prevent memory leaks + requestUrlMap.delete(contextKey); + return getUrlAttributes(urlInfo.url, urlInfo.baseUrl); + } else { + diag.debug("No URL info found in requestUrlMap for this span", { + contextKey, + }); } } catch (error) { diag.debug("Failed to get stored URL attributes", error); @@ -364,10 +391,24 @@ export class OpenAIInstrumentation extends InstrumentationBase { if (baseUrl) { const fullUrl = new URL(path, baseUrl).toString(); - // Use the request body as a unique key for this specific request - // This avoids concurrent requests overwriting each other - if (body && typeof body === "object") { - requestUrlMap.set(body, { url: fullUrl, baseUrl }); + // Store URL info using the current active span context + const activeSpan = trace.getActiveSpan(); + if (activeSpan) { + const spanContext = activeSpan.spanContext(); + const contextKey = `${spanContext.traceId}-${spanContext.spanId}`; + requestUrlMap.set(contextKey, { url: fullUrl, baseUrl }); + diag.debug("Stored URL info for request", { + fullUrl, + baseUrl, + contextKey, + }); + // Clean up old entries to prevent memory leaks + if (requestUrlMap.size > 1000) { + const oldestKey = requestUrlMap.keys().next().value; + if (oldestKey) { + requestUrlMap.delete(oldestKey); + } + } } } } catch (error) { @@ -451,7 +492,7 @@ export class OpenAIInstrumentation extends InstrumentationBase { ...getChatCompletionLLMOutputMessagesAttributes(result), ...getUsageAttributes(result), // Add URL attributes now that the request has completed - ...getStoredUrlAttributes(body), + ...getStoredUrlAttributes(span), }); span.setStatus({ code: SpanStatusCode.OK }); span.end(); @@ -539,7 +580,7 @@ export class OpenAIInstrumentation extends InstrumentationBase { ...getCompletionOutputValueAndMimeType(result), ...getUsageAttributes(result), // Add URL attributes now that the request has completed - ...getStoredUrlAttributes(body), + ...getStoredUrlAttributes(span), }); span.setStatus({ code: SpanStatusCode.OK }); span.end(); @@ -612,7 +653,7 @@ export class OpenAIInstrumentation extends InstrumentationBase { // Do not record the output data as it can be large ...getEmbeddingEmbeddingsAttributes(result), // Add URL attributes now that the request has completed - ...getStoredUrlAttributes(body), + ...getStoredUrlAttributes(span), }); } span.setStatus({ code: SpanStatusCode.OK }); @@ -699,7 +740,7 @@ export class OpenAIInstrumentation extends InstrumentationBase { ...getResponsesOutputMessagesAttributes(result), ...getResponsesUsageAttributes(result), // Add URL attributes now that the request has completed - ...getStoredUrlAttributes(this), + ...getStoredUrlAttributes(span), }); span.setStatus({ code: SpanStatusCode.OK }); span.end(); From 145a9003d0384cfa53bf05f5e91a6d4e4872b747 Mon Sep 17 00:00:00 2001 From: Ge Li <77590974+GeLi2001@users.noreply.github.com> Date: Thu, 28 Aug 2025 16:18:33 -0700 Subject: [PATCH 12/15] wip --- .../src/instrumentation.ts | 76 ++++++++++++++----- 1 file changed, 59 insertions(+), 17 deletions(-) diff --git a/js/packages/openinference-instrumentation-openai/src/instrumentation.ts b/js/packages/openinference-instrumentation-openai/src/instrumentation.ts index 2aea823d42..20df662287 100644 --- a/js/packages/openinference-instrumentation-openai/src/instrumentation.ts +++ b/js/packages/openinference-instrumentation-openai/src/instrumentation.ts @@ -102,10 +102,10 @@ function getExecContext(span: Span) { } /** - * Extracts URL path for debugging purposes (especially useful for Azure) + * Extracts URL attributes for debugging purposes (especially useful for Azure) * @param fullUrl The complete URL of the request * @param baseUrl The base URL of the client - * @returns Object containing URL path for debugging + * @returns Object containing URL attributes for debugging */ function getUrlAttributes( fullUrl: string, @@ -116,9 +116,10 @@ function getUrlAttributes( try { const url = new URL(fullUrl); - // Extract URL components for debugging (path and api_version only) + // Always include the full URL for complete debugging context + attributes["url.full"] = fullUrl; - // Extract the path (URL - baseURL) as requested: path = full - base_url + // Extract the path component if (baseUrl) { try { const baseUrlObj = new URL(baseUrl); @@ -126,28 +127,56 @@ function getUrlAttributes( // If the hosts match, calculate the path difference if (baseUrlObj.hostname === fullUrlObj.hostname) { - // Calculate the relative path by removing the base path from the full path - const basePath = baseUrlObj.pathname.replace(/\/$/, ""); + // For Azure OpenAI, we want to reconstruct the deployment path + // baseUrl example: "https://example.openai.azure.com/openai/deployments/gpt-4" + // fullUrl example: "https://example.openai.azure.com/chat/completions" + // We want to extract the deployment info from baseUrl and combine with the endpoint + + const basePath = baseUrlObj.pathname; const fullPath = fullUrlObj.pathname; - if (fullPath.startsWith(basePath)) { - // Remove base path to get the relative path - const relativePath = fullPath.substring(basePath.length) || "/"; - attributes["url.path"] = relativePath; + // Extract deployment information from the base URL + if (basePath.includes("/deployments/")) { + // Extract the deployment part: "deployments/model-name" + const deploymentMatch = basePath.match(/\/deployments\/([^/]+)/); + if (deploymentMatch) { + const deploymentName = deploymentMatch[1]; + const endpoint = fullPath.startsWith("/") + ? fullPath.substring(1) + : fullPath; + attributes["url.path"] = + `deployments/${deploymentName}/${endpoint}`; + } else { + // Fallback to just the endpoint + attributes["url.path"] = fullPath.startsWith("/") + ? fullPath.substring(1) + : fullPath; + } } else { - // If paths don't align, use the full path - attributes["url.path"] = fullPath; + // Not a deployment URL, use the full path + attributes["url.path"] = fullPath.startsWith("/") + ? fullPath.substring(1) + : fullPath; } } else { - // Different hosts, use pathname - attributes["url.path"] = url.pathname; + // Different hosts, use pathname without leading slash + const pathname = url.pathname.startsWith("/") + ? url.pathname.substring(1) + : url.pathname; + attributes["url.path"] = pathname || "/"; } } catch { // If URL parsing fails, use the pathname - attributes["url.path"] = url.pathname; + const pathname = url.pathname.startsWith("/") + ? url.pathname.substring(1) + : url.pathname; + attributes["url.path"] = pathname || "/"; } } else { - attributes["url.path"] = url.pathname; + const pathname = url.pathname.startsWith("/") + ? url.pathname.substring(1) + : url.pathname; + attributes["url.path"] = pathname || "/"; } // Safely extract api_version query parameter for Azure @@ -390,7 +419,20 @@ export class OpenAIInstrumentation extends InstrumentationBase { } if (baseUrl) { - const fullUrl = new URL(path, baseUrl).toString(); + // Construct the full URL with query parameters if available + let fullUrl = new URL(path, baseUrl).toString(); + + // Add query parameters if they exist in options + if (options?.query && typeof options.query === "object") { + const url = new URL(fullUrl); + Object.entries(options.query).forEach(([key, value]) => { + if (value !== undefined && value !== null) { + url.searchParams.set(key, String(value)); + } + }); + fullUrl = url.toString(); + } + // Store URL info using the current active span context const activeSpan = trace.getActiveSpan(); if (activeSpan) { From b8eb0053e73340185b955c07eb62d0802d232c43 Mon Sep 17 00:00:00 2001 From: Ge Li <77590974+GeLi2001@users.noreply.github.com> Date: Fri, 5 Sep 2025 10:09:39 -0700 Subject: [PATCH 13/15] Add URL extraction for Azure OpenAI debugging - Extract url.full with complete URL - Extract url.path with deployment info (deployments/model-name/endpoint) - Improve Azure provider detection - Add unit test for URL extraction functionality --- .../test/openai.test.ts | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/js/packages/openinference-instrumentation-openai/test/openai.test.ts b/js/packages/openinference-instrumentation-openai/test/openai.test.ts index 3d31305f56..fc2ae4deb9 100644 --- a/js/packages/openinference-instrumentation-openai/test/openai.test.ts +++ b/js/packages/openinference-instrumentation-openai/test/openai.test.ts @@ -1650,4 +1650,16 @@ describe("OpenAIInstrumentation with a custom tracer provider", () => { expect(span.attributes["llm.model_name"]).toBe("gpt-3.5-turbo-0613"); }); }); + + describe("URL extraction", () => { + it("should detect Azure provider correctly", () => { + const azureClient = new OpenAI({ + apiKey: "test-key", + baseURL: "https://test-resource.openai.azure.com/openai/deployments/gpt-4", + }); + + // Just verify the client was created with Azure base URL + expect(azureClient.baseURL).toContain("openai.azure.com"); + }); + }); }); From d4f3297e6e8655736875b8d44ddec58d1135a5b2 Mon Sep 17 00:00:00 2001 From: Ge Li <77590974+GeLi2001@users.noreply.github.com> Date: Fri, 5 Sep 2025 10:15:10 -0700 Subject: [PATCH 14/15] wip --- .../test/openai.test.ts | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/js/packages/openinference-instrumentation-openai/test/openai.test.ts b/js/packages/openinference-instrumentation-openai/test/openai.test.ts index fc2ae4deb9..aa4ace03ed 100644 --- a/js/packages/openinference-instrumentation-openai/test/openai.test.ts +++ b/js/packages/openinference-instrumentation-openai/test/openai.test.ts @@ -1654,10 +1654,11 @@ describe("OpenAIInstrumentation with a custom tracer provider", () => { describe("URL extraction", () => { it("should detect Azure provider correctly", () => { const azureClient = new OpenAI({ - apiKey: "test-key", - baseURL: "https://test-resource.openai.azure.com/openai/deployments/gpt-4", + apiKey: "test-key", + baseURL: + "https://test-resource.openai.azure.com/openai/deployments/gpt-4", }); - + // Just verify the client was created with Azure base URL expect(azureClient.baseURL).toContain("openai.azure.com"); }); From 3410a19aa59c79301fcdb530485a06aa9b487a7c Mon Sep 17 00:00:00 2001 From: Roger Yang Date: Thu, 2 Oct 2025 09:27:48 -0700 Subject: [PATCH 15/15] wip --- js/.changeset/lemon-geese-hear.md | 5 + .../src/httpUtils.ts | 91 ++++++ .../src/instrumentation.ts | 264 +++++------------- .../test/httpUtils.test.ts | 37 +++ .../test/openai.test.ts | 7 + 5 files changed, 211 insertions(+), 193 deletions(-) create mode 100644 js/.changeset/lemon-geese-hear.md create mode 100644 js/packages/openinference-instrumentation-openai/src/httpUtils.ts create mode 100644 js/packages/openinference-instrumentation-openai/test/httpUtils.test.ts diff --git a/js/.changeset/lemon-geese-hear.md b/js/.changeset/lemon-geese-hear.md new file mode 100644 index 0000000000..24870d616e --- /dev/null +++ b/js/.changeset/lemon-geese-hear.md @@ -0,0 +1,5 @@ +--- +"@arizeai/openinference-instrumentation-openai": minor +--- + +extract url for span attributes diff --git a/js/packages/openinference-instrumentation-openai/src/httpUtils.ts b/js/packages/openinference-instrumentation-openai/src/httpUtils.ts new file mode 100644 index 0000000000..fd56fd0867 --- /dev/null +++ b/js/packages/openinference-instrumentation-openai/src/httpUtils.ts @@ -0,0 +1,91 @@ +/** + * HTTP utilities for OpenTelemetry instrumentation + * JavaScript equivalent of opentelemetry-python-contrib/util/opentelemetry-util-http + * Minimal version containing only URL redaction functionality + */ + +/** + * List of query parameters that should be redacted for security + */ +const PARAMS_TO_REDACT = [ + "AWSAccessKeyId", + "Signature", + "sig", + "X-Goog-Signature", +]; + +/** + * Replaces the username and password with the keyword `REDACTED` in a URL + * Only modifies the URL if it is valid and contains credentials + * @param url The URL string to process + * @returns The URL with credentials redacted, or original URL if invalid + */ +export function removeUrlCredentials(url: string): string { + try { + const parsed = new URL(url); + + // Check if URL has credentials + if (parsed.username || parsed.password) { + // Create new URL with redacted credentials + const newUrl = new URL(url); + newUrl.username = "REDACTED"; + newUrl.password = "REDACTED"; + return newUrl.toString(); + } + + return url; + } catch (error) { + // If URL parsing fails, return original URL + return url; + } +} + +/** + * Redacts sensitive query parameter values in a URL + * @param url The URL string to process + * @returns The URL with sensitive query parameters redacted, or original URL if no changes needed + */ +export function redactQueryParameters(url: string): string { + try { + const parsed = new URL(url); + + if (!parsed.search) { + // No query parameters to redact + return url; + } + + const searchParams = new URLSearchParams(parsed.search); + let hasRedactedParams = false; + + // Check if any parameters need redaction + for (const param of PARAMS_TO_REDACT) { + if (searchParams.has(param)) { + searchParams.set(param, "REDACTED"); + hasRedactedParams = true; + } + } + + if (!hasRedactedParams) { + return url; + } + + // Reconstruct URL with redacted parameters + const newUrl = new URL(url); + newUrl.search = searchParams.toString(); + return newUrl.toString(); + } catch (error) { + // If URL parsing fails, return original URL + return url; + } +} + +/** + * Redacts sensitive data from the URL, including credentials and query parameters + * @param url The URL string to process + * @returns The URL with all sensitive data redacted + */ +export function redactUrl(url: string): string { + let redactedUrl = removeUrlCredentials(url); + redactedUrl = redactQueryParameters(redactedUrl); + return redactedUrl; +} diff --git a/js/packages/openinference-instrumentation-openai/src/instrumentation.ts b/js/packages/openinference-instrumentation-openai/src/instrumentation.ts index 20df662287..53a36d851e 100644 --- a/js/packages/openinference-instrumentation-openai/src/instrumentation.ts +++ b/js/packages/openinference-instrumentation-openai/src/instrumentation.ts @@ -41,6 +41,7 @@ import { } from "openai/resources"; import { assertUnreachable, isString } from "./typeUtils"; import { isTracingSuppressed } from "@opentelemetry/core"; +import { redactUrl } from "./httpUtils"; import { OITracer, @@ -70,12 +71,6 @@ const INSTRUMENTATION_NAME = "@arizeai/openinference-instrumentation-openai"; */ let _isOpenInferencePatched = false; -/** - * Map to store URL information for each request using trace context - * Uses trace ID + span ID as the key to avoid concurrent request overwrites - */ -const requestUrlMap = new Map(); - /** * function to check if instrumentation is enabled / disabled */ @@ -102,126 +97,71 @@ function getExecContext(span: Span) { } /** - * Extracts URL attributes for debugging purposes (especially useful for Azure) - * @param fullUrl The complete URL of the request - * @param baseUrl The base URL of the client - * @returns Object containing URL attributes for debugging + * Extracts the base URL from an OpenAI client instance + * @param instance The OpenAI client instance (may be nested) + * @returns The base URL string, or undefined if not found */ -function getUrlAttributes( - fullUrl: string, - baseUrl?: string, -): Record { - const attributes: Record = {}; +function getBaseUrl(instance: unknown): string | undefined { + const client = instance as { + baseURL?: string; + _client?: { baseURL?: string }; + }; + return client.baseURL || client._client?.baseURL; +} +/** + * Extracts the relative path from a full URL given a base URL + * @param fullUrl The complete URL to extract the path from + * @param baseUrl The base URL to remove from the full URL + * @returns The relative path, or null if extraction fails + */ +const extractRelativePath = ( + fullUrl: string, + baseUrl: string, +): string | null => { try { - const url = new URL(fullUrl); - - // Always include the full URL for complete debugging context - attributes["url.full"] = fullUrl; - - // Extract the path component - if (baseUrl) { - try { - const baseUrlObj = new URL(baseUrl); - const fullUrlObj = new URL(fullUrl); - - // If the hosts match, calculate the path difference - if (baseUrlObj.hostname === fullUrlObj.hostname) { - // For Azure OpenAI, we want to reconstruct the deployment path - // baseUrl example: "https://example.openai.azure.com/openai/deployments/gpt-4" - // fullUrl example: "https://example.openai.azure.com/chat/completions" - // We want to extract the deployment info from baseUrl and combine with the endpoint - - const basePath = baseUrlObj.pathname; - const fullPath = fullUrlObj.pathname; - - // Extract deployment information from the base URL - if (basePath.includes("/deployments/")) { - // Extract the deployment part: "deployments/model-name" - const deploymentMatch = basePath.match(/\/deployments\/([^/]+)/); - if (deploymentMatch) { - const deploymentName = deploymentMatch[1]; - const endpoint = fullPath.startsWith("/") - ? fullPath.substring(1) - : fullPath; - attributes["url.path"] = - `deployments/${deploymentName}/${endpoint}`; - } else { - // Fallback to just the endpoint - attributes["url.path"] = fullPath.startsWith("/") - ? fullPath.substring(1) - : fullPath; - } - } else { - // Not a deployment URL, use the full path - attributes["url.path"] = fullPath.startsWith("/") - ? fullPath.substring(1) - : fullPath; - } - } else { - // Different hosts, use pathname without leading slash - const pathname = url.pathname.startsWith("/") - ? url.pathname.substring(1) - : url.pathname; - attributes["url.path"] = pathname || "/"; - } - } catch { - // If URL parsing fails, use the pathname - const pathname = url.pathname.startsWith("/") - ? url.pathname.substring(1) - : url.pathname; - attributes["url.path"] = pathname || "/"; - } - } else { - const pathname = url.pathname.startsWith("/") - ? url.pathname.substring(1) - : url.pathname; - attributes["url.path"] = pathname || "/"; - } - - // Safely extract api_version query parameter for Azure - if (url.search) { - const queryParams = new URLSearchParams(url.search); - const apiVersion = queryParams.get("api-version"); - if (apiVersion) { - attributes["url.query.api_version"] = apiVersion; - } - } - } catch (error) { - diag.debug("Failed to extract URL attributes", error); + const [basePath, fullPath] = [ + new URL(baseUrl).pathname, + new URL(fullUrl).pathname, + ]; + const path = fullPath.startsWith(basePath) + ? fullPath.slice(basePath.length) + : fullPath; + return path.startsWith("/") ? path : `/${path}`; + } catch { + return null; } - - return attributes; -} +}; /** - * Gets URL attributes for a request from stored request information - * @param span The span to get URL attributes for - * @returns URL attributes object + * Adds back non-sensitive query parameters to a redacted URL + * @param redactedUrl The URL that has been redacted + * @param originalUrl The original URL containing parameters + * @param paramsToRestore Array of parameter names to restore (defaults to ["api-version"]) + * @returns The redacted URL with specified parameters restored */ -function getStoredUrlAttributes(span: Span): Record { +const addBackNonSensitiveParams = ( + redactedUrl: string, + originalUrl: string, + paramsToRestore: string[] = ["api-version"], +): string => { try { - const spanContext = span.spanContext(); - const contextKey = `${spanContext.traceId}-${spanContext.spanId}`; - const urlInfo = requestUrlMap.get(contextKey); - if (urlInfo) { - diag.debug("Retrieved URL info from requestUrlMap", { - urlInfo, - contextKey, - }); - // Clean up after use to prevent memory leaks - requestUrlMap.delete(contextKey); - return getUrlAttributes(urlInfo.url, urlInfo.baseUrl); - } else { - diag.debug("No URL info found in requestUrlMap for this span", { - contextKey, - }); + const [original, redacted] = [new URL(originalUrl), new URL(redactedUrl)]; + let hasChanges = false; + + for (const param of paramsToRestore) { + const value = original.searchParams.get(param); + if (value) { + redacted.searchParams.set(param, value); + hasChanges = true; + } } - } catch (error) { - diag.debug("Failed to get stored URL attributes", error); + + return hasChanges ? redacted.toString() : redactedUrl; + } catch { + return redactedUrl; } - return {}; -} +}; /** * Gets the appropriate LLM provider based on the OpenAI client instance @@ -384,83 +324,29 @@ export class OpenAIInstrumentation extends InstrumentationBase { // eslint-disable-next-line @typescript-eslint/no-this-alias const instrumentation: OpenAIInstrumentation = this; - // Patch the post method to capture URL information + // Patch the buildURL method to capture URL information this._wrap( module.OpenAI.prototype, - "post", + "buildURL", // eslint-disable-next-line @typescript-eslint/no-explicit-any (original: any): any => { - return function patchedPost( - this: unknown, - path: string, - // eslint-disable-next-line @typescript-eslint/no-explicit-any - body?: any, - // eslint-disable-next-line @typescript-eslint/no-explicit-any - options?: any, - ) { - // Store URL information for this specific request - try { - const clientInstance = this as { - baseURL?: string; - _client?: { baseURL?: string }; - }; - - let baseUrl: string | undefined; - if ( - clientInstance.baseURL && - typeof clientInstance.baseURL === "string" - ) { - baseUrl = clientInstance.baseURL; - } else if ( - clientInstance._client?.baseURL && - typeof clientInstance._client.baseURL === "string" - ) { - baseUrl = clientInstance._client.baseURL; - } + return function patchedBuildURL(this: unknown, ...args: unknown[]) { + const urlFull = original.apply(this, args) as string; + const activeSpan = trace.getActiveSpan(); - if (baseUrl) { - // Construct the full URL with query parameters if available - let fullUrl = new URL(path, baseUrl).toString(); + if (!activeSpan) return urlFull; - // Add query parameters if they exist in options - if (options?.query && typeof options.query === "object") { - const url = new URL(fullUrl); - Object.entries(options.query).forEach(([key, value]) => { - if (value !== undefined && value !== null) { - url.searchParams.set(key, String(value)); - } - }); - fullUrl = url.toString(); - } + const redactedUrl = redactUrl(urlFull); + const finalUrl = addBackNonSensitiveParams(redactedUrl, urlFull); + activeSpan.setAttribute("url.full", finalUrl); - // Store URL info using the current active span context - const activeSpan = trace.getActiveSpan(); - if (activeSpan) { - const spanContext = activeSpan.spanContext(); - const contextKey = `${spanContext.traceId}-${spanContext.spanId}`; - requestUrlMap.set(contextKey, { url: fullUrl, baseUrl }); - diag.debug("Stored URL info for request", { - fullUrl, - baseUrl, - contextKey, - }); - // Clean up old entries to prevent memory leaks - if (requestUrlMap.size > 1000) { - const oldestKey = requestUrlMap.keys().next().value; - if (oldestKey) { - requestUrlMap.delete(oldestKey); - } - } - } - } - } catch (error) { - diag.debug( - "Failed to capture URL information in post method", - error, - ); + const baseUrl = getBaseUrl(this); + const urlPath = baseUrl && extractRelativePath(urlFull, baseUrl); + if (urlPath) { + activeSpan.setAttribute("url.path", urlPath); } - return original.apply(this, [path, body, options]); + return urlFull; }; }, ); @@ -533,8 +419,6 @@ export class OpenAIInstrumentation extends InstrumentationBase { [SemanticConventions.LLM_MODEL_NAME]: result.model, ...getChatCompletionLLMOutputMessagesAttributes(result), ...getUsageAttributes(result), - // Add URL attributes now that the request has completed - ...getStoredUrlAttributes(span), }); span.setStatus({ code: SpanStatusCode.OK }); span.end(); @@ -621,8 +505,6 @@ export class OpenAIInstrumentation extends InstrumentationBase { [SemanticConventions.LLM_MODEL_NAME]: result.model, ...getCompletionOutputValueAndMimeType(result), ...getUsageAttributes(result), - // Add URL attributes now that the request has completed - ...getStoredUrlAttributes(span), }); span.setStatus({ code: SpanStatusCode.OK }); span.end(); @@ -694,8 +576,6 @@ export class OpenAIInstrumentation extends InstrumentationBase { span.setAttributes({ // Do not record the output data as it can be large ...getEmbeddingEmbeddingsAttributes(result), - // Add URL attributes now that the request has completed - ...getStoredUrlAttributes(span), }); } span.setStatus({ code: SpanStatusCode.OK }); @@ -781,8 +661,6 @@ export class OpenAIInstrumentation extends InstrumentationBase { [SemanticConventions.LLM_MODEL_NAME]: result.model, ...getResponsesOutputMessagesAttributes(result), ...getResponsesUsageAttributes(result), - // Add URL attributes now that the request has completed - ...getStoredUrlAttributes(span), }); span.setStatus({ code: SpanStatusCode.OK }); span.end(); @@ -831,7 +709,7 @@ export class OpenAIInstrumentation extends InstrumentationBase { moduleVersion?: string, ) { diag.debug(`Removing patch for ${MODULE_NAME}@${moduleVersion}`); - this._unwrap(moduleExports.OpenAI.prototype, "post"); + this._unwrap(moduleExports.OpenAI.prototype, "buildURL"); this._unwrap(moduleExports.OpenAI.Chat.Completions.prototype, "create"); this._unwrap(moduleExports.OpenAI.Completions.prototype, "create"); this._unwrap(moduleExports.OpenAI.Embeddings.prototype, "create"); diff --git a/js/packages/openinference-instrumentation-openai/test/httpUtils.test.ts b/js/packages/openinference-instrumentation-openai/test/httpUtils.test.ts new file mode 100644 index 0000000000..7ae0c620c7 --- /dev/null +++ b/js/packages/openinference-instrumentation-openai/test/httpUtils.test.ts @@ -0,0 +1,37 @@ +import { redactUrl } from "../src/httpUtils"; + +describe("httpUtils", () => { + describe("redactUrl", () => { + it("should redact credentials from URLs", () => { + const url = "https://user:pass@api.openai.com/v1/chat/completions"; + const result = redactUrl(url); + expect(result).toBe( + "https://REDACTED:REDACTED@api.openai.com/v1/chat/completions", + ); + }); + + it("should redact sensitive query parameters", () => { + const url = + "https://api.example.com/chat?AWSAccessKeyId=secret&Signature=secret&model=gpt-4"; + const result = redactUrl(url); + expect(result).toBe( + "https://api.example.com/chat?AWSAccessKeyId=REDACTED&Signature=REDACTED&model=gpt-4", + ); + }); + + it("should redact both credentials and query parameters", () => { + const url = + "https://user:pass@api.example.com/chat?AWSAccessKeyId=secret&model=gpt-4"; + const result = redactUrl(url); + expect(result).toBe( + "https://REDACTED:REDACTED@api.example.com/chat?AWSAccessKeyId=REDACTED&model=gpt-4", + ); + }); + + it("should handle malformed URLs gracefully", () => { + const malformedUrl = "not-a-valid-url"; + const result = redactUrl(malformedUrl); + expect(result).toBe(malformedUrl); + }); + }); +}); diff --git a/js/packages/openinference-instrumentation-openai/test/openai.test.ts b/js/packages/openinference-instrumentation-openai/test/openai.test.ts index aa4ace03ed..c8c219fbde 100644 --- a/js/packages/openinference-instrumentation-openai/test/openai.test.ts +++ b/js/packages/openinference-instrumentation-openai/test/openai.test.ts @@ -938,6 +938,12 @@ describe("OpenAIInstrumentation", () => { `); }); + it("should ensure buildURL method exists on OpenAI prototype", () => { + // This test will fail if OpenAI SDK removes buildURL method + // If this fails, update the instrumentation to use a different approach + expect(typeof OpenAI.prototype.buildURL).toBe("function"); + }); + it("should capture context attributes and add them to spans", async () => { const response = { id: "cmpl-8fZu1H3VijJUWev9asnxaYyQvJTC9", @@ -1002,6 +1008,7 @@ describe("OpenAIInstrumentation", () => { } `); }); + it("creates a span for chat completions parse", async () => { const response = { id: "chatcmpl-parseTest",