diff --git a/js/.changeset/lemon-geese-hear.md b/js/.changeset/lemon-geese-hear.md new file mode 100644 index 0000000000..24870d616e --- /dev/null +++ b/js/.changeset/lemon-geese-hear.md @@ -0,0 +1,5 @@ +--- +"@arizeai/openinference-instrumentation-openai": minor +--- + +extract url for span attributes diff --git a/js/packages/openinference-instrumentation-openai/src/httpUtils.ts b/js/packages/openinference-instrumentation-openai/src/httpUtils.ts new file mode 100644 index 0000000000..fd56fd0867 --- /dev/null +++ b/js/packages/openinference-instrumentation-openai/src/httpUtils.ts @@ -0,0 +1,91 @@ +/** + * HTTP utilities for OpenTelemetry instrumentation + * JavaScript equivalent of opentelemetry-python-contrib/util/opentelemetry-util-http + * Minimal version containing only URL redaction functionality + */ + +/** + * List of query parameters that should be redacted for security + */ +const PARAMS_TO_REDACT = [ + "AWSAccessKeyId", + "Signature", + "sig", + "X-Goog-Signature", +]; + +/** + * Replaces the username and password with the keyword `REDACTED` in a URL + * Only modifies the URL if it is valid and contains credentials + * @param url The URL string to process + * @returns The URL with credentials redacted, or original URL if invalid + */ +export function removeUrlCredentials(url: string): string { + try { + const parsed = new URL(url); + + // Check if URL has credentials + if (parsed.username || parsed.password) { + // Create new URL with redacted credentials + const newUrl = new URL(url); + newUrl.username = "REDACTED"; + newUrl.password = "REDACTED"; + return newUrl.toString(); + } + + return url; + } catch (error) { + // If URL parsing fails, return original URL + return url; + } +} + +/** + * Redacts sensitive query parameter values in a URL + * @param url The URL string to process + * @returns The URL with sensitive query parameters redacted, or original URL if no changes needed + */ +export function redactQueryParameters(url: string): string { + try { + const parsed = new URL(url); + + if (!parsed.search) { + // No query parameters to redact + return url; + } + + const searchParams = new URLSearchParams(parsed.search); + let hasRedactedParams = false; + + // Check if any parameters need redaction + for (const param of PARAMS_TO_REDACT) { + if (searchParams.has(param)) { + searchParams.set(param, "REDACTED"); + hasRedactedParams = true; + } + } + + if (!hasRedactedParams) { + return url; + } + + // Reconstruct URL with redacted parameters + const newUrl = new URL(url); + newUrl.search = searchParams.toString(); + return newUrl.toString(); + } catch (error) { + // If URL parsing fails, return original URL + return url; + } +} + +/** + * Redacts sensitive data from the URL, including credentials and query parameters + * @param url The URL string to process + * @returns The URL with all sensitive data redacted + */ +export function redactUrl(url: string): string { + let redactedUrl = removeUrlCredentials(url); + redactedUrl = redactQueryParameters(redactedUrl); + return redactedUrl; +} diff --git a/js/packages/openinference-instrumentation-openai/src/instrumentation.ts b/js/packages/openinference-instrumentation-openai/src/instrumentation.ts index d2bead6188..53a36d851e 100644 --- a/js/packages/openinference-instrumentation-openai/src/instrumentation.ts +++ b/js/packages/openinference-instrumentation-openai/src/instrumentation.ts @@ -41,6 +41,7 @@ import { } from "openai/resources"; import { assertUnreachable, isString } from "./typeUtils"; import { isTracingSuppressed } from "@opentelemetry/core"; +import { redactUrl } from "./httpUtils"; import { OITracer, @@ -95,6 +96,73 @@ function getExecContext(span: Span) { return execContext; } +/** + * Extracts the base URL from an OpenAI client instance + * @param instance The OpenAI client instance (may be nested) + * @returns The base URL string, or undefined if not found + */ +function getBaseUrl(instance: unknown): string | undefined { + const client = instance as { + baseURL?: string; + _client?: { baseURL?: string }; + }; + return client.baseURL || client._client?.baseURL; +} + +/** + * Extracts the relative path from a full URL given a base URL + * @param fullUrl The complete URL to extract the path from + * @param baseUrl The base URL to remove from the full URL + * @returns The relative path, or null if extraction fails + */ +const extractRelativePath = ( + fullUrl: string, + baseUrl: string, +): string | null => { + try { + const [basePath, fullPath] = [ + new URL(baseUrl).pathname, + new URL(fullUrl).pathname, + ]; + const path = fullPath.startsWith(basePath) + ? fullPath.slice(basePath.length) + : fullPath; + return path.startsWith("/") ? path : `/${path}`; + } catch { + return null; + } +}; + +/** + * Adds back non-sensitive query parameters to a redacted URL + * @param redactedUrl The URL that has been redacted + * @param originalUrl The original URL containing parameters + * @param paramsToRestore Array of parameter names to restore (defaults to ["api-version"]) + * @returns The redacted URL with specified parameters restored + */ +const addBackNonSensitiveParams = ( + redactedUrl: string, + originalUrl: string, + paramsToRestore: string[] = ["api-version"], +): string => { + try { + const [original, redacted] = [new URL(originalUrl), new URL(redactedUrl)]; + let hasChanges = false; + + for (const param of paramsToRestore) { + const value = original.searchParams.get(param); + if (value) { + redacted.searchParams.set(param, value); + hasChanges = true; + } + } + + return hasChanges ? redacted.toString() : redactedUrl; + } catch { + return redactedUrl; + } +}; + /** * Gets the appropriate LLM provider based on the OpenAI client instance * Follows the same logic as the Python implementation by checking the baseURL host @@ -256,6 +324,33 @@ export class OpenAIInstrumentation extends InstrumentationBase { // eslint-disable-next-line @typescript-eslint/no-this-alias const instrumentation: OpenAIInstrumentation = this; + // Patch the buildURL method to capture URL information + this._wrap( + module.OpenAI.prototype, + "buildURL", + // eslint-disable-next-line @typescript-eslint/no-explicit-any + (original: any): any => { + return function patchedBuildURL(this: unknown, ...args: unknown[]) { + const urlFull = original.apply(this, args) as string; + const activeSpan = trace.getActiveSpan(); + + if (!activeSpan) return urlFull; + + const redactedUrl = redactUrl(urlFull); + const finalUrl = addBackNonSensitiveParams(redactedUrl, urlFull); + activeSpan.setAttribute("url.full", finalUrl); + + const baseUrl = getBaseUrl(this); + const urlPath = baseUrl && extractRelativePath(urlFull, baseUrl); + if (urlPath) { + activeSpan.setAttribute("url.path", urlPath); + } + + return urlFull; + }; + }, + ); + // Patch create chat completions type ChatCompletionCreateType = typeof module.OpenAI.Chat.Completions.prototype.create; @@ -614,6 +709,7 @@ export class OpenAIInstrumentation extends InstrumentationBase { moduleVersion?: string, ) { diag.debug(`Removing patch for ${MODULE_NAME}@${moduleVersion}`); + this._unwrap(moduleExports.OpenAI.prototype, "buildURL"); this._unwrap(moduleExports.OpenAI.Chat.Completions.prototype, "create"); this._unwrap(moduleExports.OpenAI.Completions.prototype, "create"); this._unwrap(moduleExports.OpenAI.Embeddings.prototype, "create"); diff --git a/js/packages/openinference-instrumentation-openai/test/httpUtils.test.ts b/js/packages/openinference-instrumentation-openai/test/httpUtils.test.ts new file mode 100644 index 0000000000..7ae0c620c7 --- /dev/null +++ b/js/packages/openinference-instrumentation-openai/test/httpUtils.test.ts @@ -0,0 +1,37 @@ +import { redactUrl } from "../src/httpUtils"; + +describe("httpUtils", () => { + describe("redactUrl", () => { + it("should redact credentials from URLs", () => { + const url = "https://user:pass@api.openai.com/v1/chat/completions"; + const result = redactUrl(url); + expect(result).toBe( + "https://REDACTED:REDACTED@api.openai.com/v1/chat/completions", + ); + }); + + it("should redact sensitive query parameters", () => { + const url = + "https://api.example.com/chat?AWSAccessKeyId=secret&Signature=secret&model=gpt-4"; + const result = redactUrl(url); + expect(result).toBe( + "https://api.example.com/chat?AWSAccessKeyId=REDACTED&Signature=REDACTED&model=gpt-4", + ); + }); + + it("should redact both credentials and query parameters", () => { + const url = + "https://user:pass@api.example.com/chat?AWSAccessKeyId=secret&model=gpt-4"; + const result = redactUrl(url); + expect(result).toBe( + "https://REDACTED:REDACTED@api.example.com/chat?AWSAccessKeyId=REDACTED&model=gpt-4", + ); + }); + + it("should handle malformed URLs gracefully", () => { + const malformedUrl = "not-a-valid-url"; + const result = redactUrl(malformedUrl); + expect(result).toBe(malformedUrl); + }); + }); +}); diff --git a/js/packages/openinference-instrumentation-openai/test/openai.test.ts b/js/packages/openinference-instrumentation-openai/test/openai.test.ts index 3d31305f56..c8c219fbde 100644 --- a/js/packages/openinference-instrumentation-openai/test/openai.test.ts +++ b/js/packages/openinference-instrumentation-openai/test/openai.test.ts @@ -938,6 +938,12 @@ describe("OpenAIInstrumentation", () => { `); }); + it("should ensure buildURL method exists on OpenAI prototype", () => { + // This test will fail if OpenAI SDK removes buildURL method + // If this fails, update the instrumentation to use a different approach + expect(typeof OpenAI.prototype.buildURL).toBe("function"); + }); + it("should capture context attributes and add them to spans", async () => { const response = { id: "cmpl-8fZu1H3VijJUWev9asnxaYyQvJTC9", @@ -1002,6 +1008,7 @@ describe("OpenAIInstrumentation", () => { } `); }); + it("creates a span for chat completions parse", async () => { const response = { id: "chatcmpl-parseTest", @@ -1650,4 +1657,17 @@ describe("OpenAIInstrumentation with a custom tracer provider", () => { expect(span.attributes["llm.model_name"]).toBe("gpt-3.5-turbo-0613"); }); }); + + describe("URL extraction", () => { + it("should detect Azure provider correctly", () => { + const azureClient = new OpenAI({ + apiKey: "test-key", + baseURL: + "https://test-resource.openai.azure.com/openai/deployments/gpt-4", + }); + + // Just verify the client was created with Azure base URL + expect(azureClient.baseURL).toContain("openai.azure.com"); + }); + }); });