Skip to content

Commit d478351

Browse files
feat: add plugin execution tracing to OpenTelemetry plugin
This commit adds plugin execution tracing capability to the OpenTelemetry plugin, allowing users to trace individual plugin phases (rewrite, access, header_filter, body_filter, log) as child spans of the main request trace. Changes: - Added trace_plugins configuration option (default: false, opt-in) - Added plugin_span_kind configuration for observability provider compatibility - Enhanced plugin execution with OpenTelemetry span creation and finishing - Added comprehensive request context attributes to plugin spans - Updated documentation with examples and usage instructions - Added comprehensive test suite for the new functionality Features: - Plugin Phase Tracing: Creates child spans for each plugin phase execution - Rich Context: Includes HTTP method, URI, hostname, user agent, route info, and service info - Configurable: Can be enabled/disabled via trace_plugins configuration - Span Kind Control: Supports internal (default) and server span kinds for observability provider compatibility - Proper Hierarchy: Plugin spans are correctly nested under main request spans - Performance: Minimal overhead when disabled (default behavior) Configuration: - trace_plugins: boolean (default: false) - Enable/disable plugin tracing - plugin_span_kind: string (default: 'internal') - Span kind for plugin spans - 'internal': Standard internal operation (may be excluded from metrics) - 'server': Server-side operation (typically included in service-level metrics) This addresses GitHub issue #12510 and provides end-to-end tracing visibility for APISIX plugin execution phases.
1 parent 2c041a3 commit d478351

File tree

5 files changed

+954
-4
lines changed

5 files changed

+954
-4
lines changed

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,7 @@ A/B testing, canary release, blue-green deployment, limit rate, defense against
123123
- **OPS friendly**
124124

125125
- Zipkin tracing: [Zipkin](docs/en/latest/plugins/zipkin.md)
126+
- OpenTelemetry tracing: [OpenTelemetry](docs/en/latest/plugins/opentelemetry.md) with plugin execution tracing
126127
- Open source APM: support [Apache SkyWalking](docs/en/latest/plugins/skywalking.md)
127128
- Works with external service discovery: In addition to the built-in etcd, it also supports [Consul](docs/en/latest/discovery/consul.md), [Consul_kv](docs/en/latest/discovery/consul_kv.md), [Nacos](docs/en/latest/discovery/nacos.md), [Eureka](docs/en/latest/discovery/eureka.md) and [Zookeeper (CP)](https://github.com/api7/apisix-seed/blob/main/docs/en/latest/zookeeper.md).
128129
- Monitoring And Metrics: [Prometheus](docs/en/latest/plugins/prometheus.md)

apisix/plugin.lua

Lines changed: 34 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1169,6 +1169,9 @@ function _M.run_plugin(phase, plugins, api_ctx)
11691169
return api_ctx
11701170
end
11711171

1172+
-- Get OpenTelemetry plugin for tracing
1173+
local otel_plugin = _M.get("opentelemetry")
1174+
11721175
if phase ~= "log"
11731176
and phase ~= "header_filter"
11741177
and phase ~= "body_filter"
@@ -1188,11 +1191,26 @@ function _M.run_plugin(phase, plugins, api_ctx)
11881191
goto CONTINUE
11891192
end
11901193

1194+
-- Start OpenTelemetry plugin span
1195+
if otel_plugin and otel_plugin.start_plugin_span then
1196+
otel_plugin.start_plugin_span(api_ctx, plugins[i]["name"], phase)
1197+
end
1198+
11911199
run_meta_pre_function(conf, api_ctx, plugins[i]["name"])
11921200
plugin_run = true
11931201
api_ctx._plugin_name = plugins[i]["name"]
11941202
local code, body = phase_func(conf, api_ctx)
11951203
api_ctx._plugin_name = nil
1204+
1205+
-- Finish OpenTelemetry plugin span
1206+
if otel_plugin and otel_plugin.finish_plugin_span then
1207+
local error_msg = nil
1208+
if code and code >= 400 then
1209+
error_msg = "plugin returned error code: " .. tostring(code)
1210+
end
1211+
otel_plugin.finish_plugin_span(api_ctx, plugins[i]["name"], phase, error_msg)
1212+
end
1213+
11961214
if code or body then
11971215
if is_http then
11981216
if code >= 400 then
@@ -1216,7 +1234,6 @@ function _M.run_plugin(phase, plugins, api_ctx)
12161234
end
12171235
end
12181236
end
1219-
12201237
::CONTINUE::
12211238
end
12221239
return api_ctx, plugin_run
@@ -1226,11 +1243,26 @@ function _M.run_plugin(phase, plugins, api_ctx)
12261243
local phase_func = plugins[i][phase]
12271244
local conf = plugins[i + 1]
12281245
if phase_func and meta_filter(api_ctx, plugins[i]["name"], conf) then
1246+
-- Start OpenTelemetry plugin span
1247+
if otel_plugin and otel_plugin.start_plugin_span then
1248+
otel_plugin.start_plugin_span(api_ctx, plugins[i]["name"], phase)
1249+
end
1250+
12291251
plugin_run = true
12301252
run_meta_pre_function(conf, api_ctx, plugins[i]["name"])
12311253
api_ctx._plugin_name = plugins[i]["name"]
1232-
phase_func(conf, api_ctx)
1254+
1255+
local code = phase_func(conf, api_ctx)
12331256
api_ctx._plugin_name = nil
1257+
1258+
-- Finish OpenTelemetry plugin span
1259+
if otel_plugin and otel_plugin.finish_plugin_span then
1260+
local error_msg = nil
1261+
if code and code >= 400 then
1262+
error_msg = "plugin returned error code: " .. tostring(code)
1263+
end
1264+
otel_plugin.finish_plugin_span(api_ctx, plugins[i]["name"], phase, error_msg)
1265+
end
12341266
end
12351267
end
12361268

apisix/plugins/opentelemetry.lua

Lines changed: 273 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -182,6 +182,42 @@ local schema = {
182182
type = "string",
183183
minLength = 1,
184184
}
185+
},
186+
trace_plugins = {
187+
type = "object",
188+
description = "configuration for plugin execution tracing",
189+
properties = {
190+
enabled = {
191+
type = "boolean",
192+
description = "whether to trace individual plugin execution",
193+
default = false
194+
},
195+
plugin_span_kind = {
196+
type = "string",
197+
enum = {"internal", "server"},
198+
description = "span kind for plugin execution spans. "
199+
.. "Some observability providers may exclude internal "
200+
.. "spans from metrics and dashboards. Use 'server' "
201+
.. "if you need plugin spans included in "
202+
.. "service-level metrics.",
203+
default = "internal"
204+
},
205+
excluded_plugins = {
206+
type = "array",
207+
description = "plugins to exclude from tracing "
208+
.. "(e.g., opentelemetry, prometheus)",
209+
items = {
210+
type = "string",
211+
minLength = 1,
212+
},
213+
default = {"opentelemetry", "prometheus"}
214+
}
215+
},
216+
default = {
217+
enabled = false,
218+
plugin_span_kind = "internal",
219+
excluded_plugins = {"opentelemetry", "prometheus"}
220+
}
185221
}
186222
}
187223
}
@@ -306,6 +342,174 @@ local function inject_attributes(attributes, wanted_attributes, source, with_pre
306342
end
307343

308344

345+
-- Plugin span management functions
346+
-- =================================
347+
348+
-- Create phase span
349+
local function create_phase_span(api_ctx, plugin_name, phase)
350+
if not api_ctx.otel then
351+
return nil
352+
end
353+
354+
if not api_ctx.otel_plugin_spans then
355+
api_ctx.otel_plugin_spans = {}
356+
end
357+
358+
-- Create unique key for plugin+phase combination
359+
local span_key = plugin_name .. ":" .. phase
360+
if not api_ctx.otel_plugin_spans[span_key] then
361+
-- Create span named "plugin_name phase" directly under main request span
362+
local phase_span_ctx = api_ctx.otel.start_span({
363+
name = plugin_name .. " " .. phase,
364+
kind = api_ctx.otel_plugin_span_kind,
365+
attributes = {
366+
attr.string("apisix.plugin_name", plugin_name),
367+
attr.string("apisix.plugin_phase", phase),
368+
}
369+
})
370+
371+
api_ctx.otel_plugin_spans[span_key] = phase_span_ctx
372+
-- Store current plugin context for child spans
373+
api_ctx._current_plugin_phase = span_key
374+
end
375+
376+
return api_ctx.otel_plugin_spans[span_key]
377+
end
378+
379+
-- Finish phase span
380+
local function finish_phase_span(api_ctx, plugin_name, phase, error_msg)
381+
if not api_ctx.otel_plugin_spans then
382+
return
383+
end
384+
385+
local span_key = plugin_name .. ":" .. phase
386+
local phase_span_ctx = api_ctx.otel_plugin_spans[span_key]
387+
388+
if phase_span_ctx then
389+
api_ctx.otel.stop_span(phase_span_ctx, error_msg)
390+
api_ctx.otel_plugin_spans[span_key] = nil
391+
392+
-- Clear current plugin phase context when span is finished
393+
if api_ctx._current_plugin_phase == span_key then
394+
api_ctx._current_plugin_phase = nil
395+
end
396+
end
397+
end
398+
399+
-- Cleanup all plugin spans
400+
local function cleanup_plugin_spans(api_ctx)
401+
if not api_ctx.otel_plugin_spans then
402+
return
403+
end
404+
405+
for span_key, phase_span_ctx in pairs(api_ctx.otel_plugin_spans) do
406+
if phase_span_ctx then
407+
api_ctx.otel.stop_span(phase_span_ctx, "plugin span cleanup: check logs for details")
408+
end
409+
end
410+
411+
api_ctx.otel_plugin_spans = nil
412+
api_ctx._current_plugin_phase = nil
413+
end
414+
415+
416+
-- OpenTelemetry API for plugins
417+
-- =============================
418+
419+
-- No-op API when tracing is disabled
420+
local noop_api = {
421+
start_span = function(span_info)
422+
return nil
423+
end,
424+
425+
stop_span = function(span_ctx, error_msg)
426+
-- no-op
427+
end,
428+
429+
current_span = function()
430+
return nil
431+
end,
432+
433+
get_plugin_context = function(plugin_name)
434+
return nil
435+
end
436+
}
437+
438+
-- Create simple OpenTelemetry API for plugins
439+
local function create_otel_api(api_ctx, tracer, main_context)
440+
-- Initialize span stack for tracking current spans
441+
if not api_ctx._otel_span_stack then
442+
api_ctx._otel_span_stack = {}
443+
end
444+
445+
return {
446+
start_span = function(span_info)
447+
if not (span_info and span_info.name) then
448+
return nil
449+
end
450+
451+
-- Get parent context (prioritize explicit parent, then current phase span, then main)
452+
local current_phase_span = api_ctx._current_plugin_phase and
453+
api_ctx.otel_plugin_spans and
454+
api_ctx.otel_plugin_spans[api_ctx._current_plugin_phase]
455+
456+
local parent_context = span_info.parent or current_phase_span or main_context
457+
458+
-- Use the provided kind directly (users should pass span_kind constants)
459+
local span_kind_value = span_info.kind or span_kind.internal
460+
local attributes = span_info.attributes or {}
461+
local span_ctx = tracer:start(parent_context, span_info.name, {
462+
kind = span_kind_value,
463+
attributes = attributes,
464+
})
465+
466+
-- Track this span as current (push to stack)
467+
core.table.insert(api_ctx._otel_span_stack, span_ctx)
468+
469+
return span_ctx
470+
end,
471+
472+
stop_span = function(span_ctx, error_msg)
473+
if not span_ctx then
474+
return
475+
end
476+
477+
local span = span_ctx:span()
478+
if not span then
479+
return
480+
end
481+
482+
if error_msg then
483+
span:set_status(span_status.ERROR, error_msg)
484+
end
485+
486+
span:finish()
487+
488+
-- Remove from stack if it's the current span (pop from stack)
489+
if api_ctx._otel_span_stack and
490+
#api_ctx._otel_span_stack > 0 and
491+
api_ctx._otel_span_stack[#api_ctx._otel_span_stack] == span_ctx then
492+
core.table.remove(api_ctx._otel_span_stack)
493+
end
494+
end,
495+
496+
current_span = function()
497+
-- Return the most recently started span (top of stack)
498+
if api_ctx._otel_span_stack and #api_ctx._otel_span_stack > 0 then
499+
return api_ctx._otel_span_stack[#api_ctx._otel_span_stack]
500+
end
501+
return nil
502+
end,
503+
504+
get_plugin_context = function(plugin_name, phase)
505+
if not (api_ctx.otel_plugin_spans and phase) then
506+
return nil
507+
end
508+
return api_ctx.otel_plugin_spans[plugin_name .. ":" .. phase]
509+
end
510+
}
511+
end
512+
309513
function _M.rewrite(conf, api_ctx)
310514
local metadata = plugin.plugin_metadata(plugin_name)
311515
if metadata == nil then
@@ -323,7 +527,7 @@ function _M.rewrite(conf, api_ctx)
323527
return
324528
end
325529

326-
local span_name = vars.method
530+
local span_name = string_format("http.%s", vars.method)
327531

328532
local attributes = {
329533
attr.string("net.host.name", vars.host),
@@ -337,7 +541,7 @@ function _M.rewrite(conf, api_ctx)
337541
table.insert(attributes, attr.string("apisix.route_id", api_ctx.route_id))
338542
table.insert(attributes, attr.string("apisix.route_name", api_ctx.route_name))
339543
table.insert(attributes, attr.string("http.route", api_ctx.curr_req_matched._path))
340-
span_name = span_name .. " " .. api_ctx.curr_req_matched._path
544+
span_name = string_format("http.%s %s", vars.method, api_ctx.curr_req_matched._path)
341545
end
342546

343547
if api_ctx.service_id then
@@ -378,6 +582,30 @@ function _M.rewrite(conf, api_ctx)
378582

379583
api_ctx.otel_context_token = ctx:attach()
380584

585+
-- Store tracer and configuration for plugin tracing
586+
if conf.trace_plugins.enabled then
587+
-- Map string span kind to span_kind constant
588+
local kind_mapping = {
589+
internal = span_kind.internal,
590+
server = span_kind.server,
591+
}
592+
api_ctx.otel_plugin_span_kind = kind_mapping[conf.trace_plugins.plugin_span_kind]
593+
594+
-- Store excluded plugins configuration
595+
api_ctx.otel_excluded_plugins = {}
596+
if conf.trace_plugins.excluded_plugins then
597+
for _, plugin_name in ipairs(conf.trace_plugins.excluded_plugins) do
598+
api_ctx.otel_excluded_plugins[plugin_name] = true
599+
end
600+
end
601+
602+
-- Create OpenTelemetry API for plugins
603+
api_ctx.otel = create_otel_api(api_ctx, tracer, ctx)
604+
else
605+
-- Always provide API - no-op when tracing disabled
606+
api_ctx.otel = noop_api
607+
end
608+
381609
-- inject trace context into the headers of upstream HTTP request
382610
trace_context_propagator:inject(ctx, ngx.req)
383611
end
@@ -419,7 +647,50 @@ function _M.log(conf, api_ctx)
419647
end
420648

421649
span:finish()
650+
-- Clear the context token to prevent double finishing
651+
api_ctx.otel_context_token = nil
652+
653+
-- Cleanup plugin spans (guaranteed cleanup on request end)
654+
cleanup_plugin_spans(api_ctx)
655+
end
656+
end
657+
658+
659+
-- Public functions for plugin tracing integration
660+
-- ===============================================
661+
662+
-- Start plugin phase span
663+
-- Safe to call even if OpenTelemetry plugin is not enabled (will be no-op)
664+
function _M.start_plugin_span(api_ctx, plugin_name, phase)
665+
-- Check if plugin tracing is enabled by checking for otel_plugin_span_kind
666+
-- only set when trace_plugins.enabled is true
667+
if not api_ctx.otel_plugin_span_kind then
668+
return nil
422669
end
670+
671+
-- Check if plugin is excluded from tracing
672+
if api_ctx.otel_excluded_plugins and api_ctx.otel_excluded_plugins[plugin_name] then
673+
return nil
674+
end
675+
676+
return create_phase_span(api_ctx, plugin_name, phase)
677+
end
678+
679+
680+
-- Finish plugin phase span
681+
-- Safe to call even if OpenTelemetry plugin is not enabled (will be no-op)
682+
function _M.finish_plugin_span(api_ctx, plugin_name, phase, error_msg)
683+
-- If tracing disabled, api_ctx.otel_plugin_spans won't be initialized
684+
if not api_ctx.otel_plugin_spans then
685+
return
686+
end
687+
688+
-- Check if plugin is excluded from tracing
689+
if api_ctx.otel_excluded_plugins and api_ctx.otel_excluded_plugins[plugin_name] then
690+
return
691+
end
692+
693+
finish_phase_span(api_ctx, plugin_name, phase, error_msg)
423694
end
424695

425696

0 commit comments

Comments
 (0)