Skip to content

Commit 61ca49f

Browse files
authored
Merge pull request #4232 from DataDog/ivoanjo/prof-11045-fix-profiling-logging
[PROF-11045] Fix profiling warnings being really hard to silence
2 parents be1c39a + 12ca619 commit 61ca49f

File tree

5 files changed

+137
-122
lines changed

5 files changed

+137
-122
lines changed

Diff for: lib/datadog/core/configuration/components.rb

+2-1
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,8 @@ def initialize(settings)
105105
@profiler, profiler_logger_extra = Datadog::Profiling::Component.build_profiler_component(
106106
settings: settings,
107107
agent_settings: agent_settings,
108-
optional_tracer: @tracer
108+
optional_tracer: @tracer,
109+
logger: @logger,
109110
)
110111
@environment_logger_extra.merge!(profiler_logger_extra) if profiler_logger_extra
111112

Diff for: lib/datadog/profiling/component.rb

+55-53
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,13 @@ module Datadog
44
module Profiling
55
# Responsible for wiring up the Profiler for execution
66
module Component
7+
ALLOCATION_WITH_RACTORS_ONLY_ONCE = Datadog::Core::Utils::OnlyOnce.new
8+
private_constant :ALLOCATION_WITH_RACTORS_ONLY_ONCE
9+
710
# Passing in a `nil` tracer is supported and will disable the following profiling features:
8-
# * Code Hotspots panel in the trace viewer, as well as scoping a profile down to a span
11+
# * Profiling in the trace viewer, as well as scoping a profile down to a span
912
# * Endpoint aggregation in the profiler UX, including normalization (resource per endpoint call)
10-
def self.build_profiler_component(settings:, agent_settings:, optional_tracer:) # rubocop:disable Metrics/MethodLength
13+
def self.build_profiler_component(settings:, agent_settings:, optional_tracer:, logger:) # rubocop:disable Metrics/MethodLength
1114
return [nil, {profiling_enabled: false}] unless settings.profiling.enabled
1215

1316
# Workaround for weird dependency direction: the Core::Configuration::Components class currently has a
@@ -36,14 +39,14 @@ def self.build_profiler_component(settings:, agent_settings:, optional_tracer:)
3639

3740
# NOTE: Please update the Initialization section of ProfilingDevelopment.md with any changes to this method
3841

39-
no_signals_workaround_enabled = no_signals_workaround_enabled?(settings)
42+
no_signals_workaround_enabled = no_signals_workaround_enabled?(settings, logger)
4043
timeline_enabled = settings.profiling.advanced.timeline_enabled
41-
allocation_profiling_enabled = enable_allocation_profiling?(settings)
44+
allocation_profiling_enabled = enable_allocation_profiling?(settings, logger)
4245
heap_sample_every = get_heap_sample_every(settings)
43-
heap_profiling_enabled = enable_heap_profiling?(settings, allocation_profiling_enabled, heap_sample_every)
44-
heap_size_profiling_enabled = enable_heap_size_profiling?(settings, heap_profiling_enabled)
46+
heap_profiling_enabled = enable_heap_profiling?(settings, allocation_profiling_enabled, heap_sample_every, logger)
47+
heap_size_profiling_enabled = enable_heap_size_profiling?(settings, heap_profiling_enabled, logger)
4548

46-
overhead_target_percentage = valid_overhead_target(settings.profiling.advanced.overhead_target_percentage)
49+
overhead_target_percentage = valid_overhead_target(settings.profiling.advanced.overhead_target_percentage, logger)
4750
upload_period_seconds = [60, settings.profiling.advanced.upload_period_seconds].max
4851

4952
recorder = Datadog::Profiling::StackRecorder.new(
@@ -57,13 +60,13 @@ def self.build_profiler_component(settings:, agent_settings:, optional_tracer:)
5760
)
5861
thread_context_collector = build_thread_context_collector(settings, recorder, optional_tracer, timeline_enabled)
5962
worker = Datadog::Profiling::Collectors::CpuAndWallTimeWorker.new(
60-
gc_profiling_enabled: enable_gc_profiling?(settings),
63+
gc_profiling_enabled: enable_gc_profiling?(settings, logger),
6164
no_signals_workaround_enabled: no_signals_workaround_enabled,
6265
thread_context_collector: thread_context_collector,
6366
dynamic_sampling_rate_overhead_target_percentage: overhead_target_percentage,
6467
allocation_profiling_enabled: allocation_profiling_enabled,
6568
allocation_counting_enabled: settings.profiling.advanced.allocation_counting_enabled,
66-
gvl_profiling_enabled: enable_gvl_profiling?(settings),
69+
gvl_profiling_enabled: enable_gvl_profiling?(settings, logger),
6770
)
6871

6972
internal_metadata = {
@@ -120,7 +123,7 @@ def self.build_profiler_component(settings:, agent_settings:, optional_tracer:)
120123
)
121124
end
122125

123-
private_class_method def self.enable_gc_profiling?(settings)
126+
private_class_method def self.enable_gc_profiling?(settings, logger)
124127
return false unless settings.profiling.advanced.gc_enabled
125128

126129
# SEVERE - Only with Ractors
@@ -131,14 +134,14 @@ def self.build_profiler_component(settings:, agent_settings:, optional_tracer:)
131134
if RUBY_VERSION.start_with?("3.0.") ||
132135
(RUBY_VERSION.start_with?("3.1.") && RUBY_VERSION < "3.1.4") ||
133136
(RUBY_VERSION.start_with?("3.2.") && RUBY_VERSION < "3.2.3")
134-
Datadog.logger.warn(
137+
logger.warn(
135138
"Current Ruby version (#{RUBY_VERSION}) has a VM bug where enabling GC profiling would cause " \
136139
"crashes (https://bugs.ruby-lang.org/issues/18464). GC profiling has been disabled."
137140
)
138141
return false
139142
elsif RUBY_VERSION.start_with?("3.")
140-
Datadog.logger.debug(
141-
"In all known versions of Ruby 3.x, using Ractors may result in GC profiling unexpectedly " \
143+
logger.debug(
144+
"Using Ractors may result in GC profiling unexpectedly " \
142145
"stopping (https://bugs.ruby-lang.org/issues/19112). Note that this stop has no impact in your " \
143146
"application stability or performance. This does not happen if Ractors are not used."
144147
)
@@ -155,7 +158,7 @@ def self.build_profiler_component(settings:, agent_settings:, optional_tracer:)
155158
heap_sample_rate
156159
end
157160

158-
private_class_method def self.enable_allocation_profiling?(settings)
161+
private_class_method def self.enable_allocation_profiling?(settings, logger)
159162
return false unless settings.profiling.allocation_enabled
160163

161164
# Allocation sampling is safe and supported on Ruby 2.x, but has a few caveats on Ruby 3.x.
@@ -165,7 +168,7 @@ def self.build_profiler_component(settings:, agent_settings:, optional_tracer:)
165168
# https://github.com/ruby/ruby/pull/7464) that makes this crash in any configuration. This bug is
166169
# fixed on Ruby versions 3.2.3 and 3.3.0.
167170
if RUBY_VERSION.start_with?("3.2.") && RUBY_VERSION < "3.2.3"
168-
Datadog.logger.warn(
171+
logger.warn(
169172
"Allocation profiling is not supported in Ruby versions 3.2.0, 3.2.1 and 3.2.2 and will be forcibly " \
170173
"disabled. This is due to a VM bug that can lead to crashes (https://bugs.ruby-lang.org/issues/19482). " \
171174
"Other Ruby versions do not suffer from this issue."
@@ -181,7 +184,7 @@ def self.build_profiler_component(settings:, agent_settings:, optional_tracer:)
181184
if RUBY_VERSION.start_with?("3.0.") ||
182185
(RUBY_VERSION.start_with?("3.1.") && RUBY_VERSION < "3.1.4") ||
183186
(RUBY_VERSION.start_with?("3.2.") && RUBY_VERSION < "3.2.3")
184-
Datadog.logger.warn(
187+
logger.warn(
185188
"Current Ruby version (#{RUBY_VERSION}) has a VM bug where enabling allocation profiling while using " \
186189
"Ractors may cause unexpected issues, including crashes (https://bugs.ruby-lang.org/issues/18464). " \
187190
"This does not happen if Ractors are not used."
@@ -190,25 +193,27 @@ def self.build_profiler_component(settings:, agent_settings:, optional_tracer:)
190193
# On all known versions of Ruby 3.x, due to https://bugs.ruby-lang.org/issues/19112, when a ractor gets
191194
# garbage collected, Ruby will disable all active tracepoints, which this feature internally relies on.
192195
elsif RUBY_VERSION.start_with?("3.")
193-
Datadog.logger.warn(
194-
"In all known versions of Ruby 3.x, using Ractors may result in allocation profiling unexpectedly " \
195-
"stopping (https://bugs.ruby-lang.org/issues/19112). Note that this stop has no impact in your " \
196-
"application stability or performance. This does not happen if Ractors are not used."
197-
)
196+
ALLOCATION_WITH_RACTORS_ONLY_ONCE.run do
197+
logger.info(
198+
"Using Ractors may result in allocation profiling " \
199+
"stopping (https://bugs.ruby-lang.org/issues/19112). Note that this stop has no impact in your " \
200+
"application stability or performance. This does not happen if Ractors are not used."
201+
)
202+
end
198203
end
199204

200-
Datadog.logger.debug("Enabled allocation profiling")
205+
logger.debug("Enabled allocation profiling")
201206

202207
true
203208
end
204209

205-
private_class_method def self.enable_heap_profiling?(settings, allocation_profiling_enabled, heap_sample_rate)
210+
private_class_method def self.enable_heap_profiling?(settings, allocation_profiling_enabled, heap_sample_rate, logger)
206211
heap_profiling_enabled = settings.profiling.advanced.experimental_heap_enabled
207212

208213
return false unless heap_profiling_enabled
209214

210215
if RUBY_VERSION < "3.1"
211-
Datadog.logger.warn(
216+
logger.warn(
212217
"Current Ruby version (#{RUBY_VERSION}) cannot support heap profiling due to VM limitations. " \
213218
"Please upgrade to Ruby >= 3.1 in order to use this feature. Heap profiling has been disabled."
214219
)
@@ -219,33 +224,31 @@ def self.build_profiler_component(settings:, agent_settings:, optional_tracer:)
219224
raise ArgumentError, "Heap profiling requires allocation profiling to be enabled"
220225
end
221226

222-
Datadog.logger.warn(
227+
logger.warn(
223228
"Enabled experimental heap profiling: heap_sample_rate=#{heap_sample_rate}. This is experimental, not " \
224229
"recommended, and will increase overhead!"
225230
)
226231

227232
true
228233
end
229234

230-
private_class_method def self.enable_heap_size_profiling?(settings, heap_profiling_enabled)
235+
private_class_method def self.enable_heap_size_profiling?(settings, heap_profiling_enabled, logger)
231236
heap_size_profiling_enabled = settings.profiling.advanced.experimental_heap_size_enabled
232237

233238
return false unless heap_profiling_enabled && heap_size_profiling_enabled
234239

235-
Datadog.logger.warn(
240+
logger.warn(
236241
"Enabled experimental heap size profiling. This is experimental, not recommended, and will increase overhead!"
237242
)
238243

239244
true
240245
end
241246

242-
private_class_method def self.no_signals_workaround_enabled?(settings) # rubocop:disable Metrics/MethodLength
247+
private_class_method def self.no_signals_workaround_enabled?(settings, logger) # rubocop:disable Metrics/MethodLength
243248
setting_value = settings.profiling.advanced.no_signals_workaround_enabled
244-
legacy_ruby_that_should_use_workaround = RUBY_VERSION.start_with?("2.5.")
245249

246250
unless [true, false, :auto].include?(setting_value)
247-
# TODO: Replace with a warning instead.
248-
Datadog.logger.error(
251+
logger.warn(
249252
"Ignoring invalid value for profiling no_signals_workaround_enabled setting: #{setting_value.inspect}. " \
250253
"Valid options are `true`, `false` or (default) `:auto`."
251254
)
@@ -254,23 +257,23 @@ def self.build_profiler_component(settings:, agent_settings:, optional_tracer:)
254257
end
255258

256259
if setting_value == false
257-
if legacy_ruby_that_should_use_workaround
258-
Datadog.logger.warn(
259-
'The profiling "no signals" workaround has been disabled via configuration on a legacy Ruby version ' \
260-
"(< 2.6). This is not recommended " \
261-
"in production environments, as due to limitations in Ruby APIs, we suspect it may lead to crashes " \
262-
"in very rare situations. Please report any issues you run into to Datadog support or " \
260+
if RUBY_VERSION.start_with?("2.5.")
261+
logger.warn(
262+
'The profiling "no signals" workaround has been disabled via configuration on Ruby 2.5. ' \
263+
"This is not recommended " \
264+
"in production environments, as due to limitations in Ruby APIs, we suspect it may lead to rare crashes " \
265+
"Please report any issues you run into to Datadog support or " \
263266
"via <https://github.com/datadog/dd-trace-rb/issues/new>!"
264267
)
265268
else
266-
Datadog.logger.warn('Profiling "no signals" workaround disabled via configuration')
269+
logger.warn('Profiling "no signals" workaround disabled via configuration')
267270
end
268271

269272
return false
270273
end
271274

272275
if setting_value == true
273-
Datadog.logger.warn(
276+
logger.warn(
274277
'Profiling "no signals" workaround enabled via configuration. Profiling data will have lower quality.'
275278
)
276279

@@ -280,10 +283,10 @@ def self.build_profiler_component(settings:, agent_settings:, optional_tracer:)
280283
# Setting is in auto mode. Let's probe to see if we should enable it:
281284

282285
# We don't warn users in this situation because "upgrade your Ruby" is not a great warning
283-
return true if legacy_ruby_that_should_use_workaround
286+
return true if RUBY_VERSION.start_with?("2.5.")
284287

285-
if Gem.loaded_specs["mysql2"] && incompatible_libmysqlclient_version?(settings)
286-
Datadog.logger.warn(
288+
if Gem.loaded_specs["mysql2"] && incompatible_libmysqlclient_version?(settings, logger)
289+
logger.warn(
287290
'Enabling the profiling "no signals" workaround because an incompatible version of the mysql2 gem is ' \
288291
"installed. Profiling data will have lower quality. " \
289292
"To fix this, upgrade the libmysqlclient in your OS image to version 8.0.0 or above."
@@ -292,7 +295,7 @@ def self.build_profiler_component(settings:, agent_settings:, optional_tracer:)
292295
end
293296

294297
if Gem.loaded_specs["rugged"]
295-
Datadog.logger.warn(
298+
logger.warn(
296299
'Enabling the profiling "no signals" workaround because the rugged gem is installed. ' \
297300
"This is needed because some operations on this gem are currently incompatible with the normal working mode " \
298301
"of the profiler, as detailed in <https://github.com/datadog/dd-trace-rb/issues/2721>. " \
@@ -302,7 +305,7 @@ def self.build_profiler_component(settings:, agent_settings:, optional_tracer:)
302305
end
303306

304307
if (defined?(::PhusionPassenger) || Gem.loaded_specs["passenger"]) && incompatible_passenger_version?
305-
Datadog.logger.warn(
308+
logger.warn(
306309
'Enabling the profiling "no signals" workaround because an incompatible version of the passenger gem is ' \
307310
"installed. Profiling data will have lower quality." \
308311
"To fix this, upgrade the passenger gem to version 6.0.19 or above."
@@ -322,10 +325,10 @@ def self.build_profiler_component(settings:, agent_settings:, optional_tracer:)
322325
#
323326
# The `mysql2` gem's `info` method can be used to determine which `libmysqlclient` version is in use, and thus to
324327
# detect if it's safe for the profiler to use signals or if we need to employ a fallback.
325-
private_class_method def self.incompatible_libmysqlclient_version?(settings)
328+
private_class_method def self.incompatible_libmysqlclient_version?(settings, logger)
326329
return true if settings.profiling.advanced.skip_mysql2_check
327330

328-
Datadog.logger.debug(
331+
logger.debug(
329332
"Requiring `mysql2` to check if the `libmysqlclient` version it uses is compatible with profiling"
330333
)
331334

@@ -354,14 +357,14 @@ def self.build_profiler_component(settings:, agent_settings:, optional_tracer:)
354357
libmysqlclient_version >= Gem::Version.new("8.0.0") ||
355358
looks_like_mariadb?(info, libmysqlclient_version)
356359

357-
Datadog.logger.debug(
360+
logger.debug(
358361
"The `mysql2` gem is using #{compatible ? "a compatible" : "an incompatible"} version of " \
359362
"the `libmysqlclient` library (#{libmysqlclient_version})"
360363
)
361364

362365
!compatible
363366
rescue StandardError, LoadError => e
364-
Datadog.logger.warn(
367+
logger.warn(
365368
"Failed to probe `mysql2` gem information. " \
366369
"Cause: #{e.class.name} #{e.message} Location: #{Array(e.backtrace).first}"
367370
)
@@ -383,12 +386,11 @@ def self.build_profiler_component(settings:, agent_settings:, optional_tracer:)
383386
end
384387
end
385388

386-
private_class_method def self.valid_overhead_target(overhead_target_percentage)
389+
private_class_method def self.valid_overhead_target(overhead_target_percentage, logger)
387390
if overhead_target_percentage > 0 && overhead_target_percentage <= 20
388391
overhead_target_percentage
389392
else
390-
# TODO: Replace with a warning instead.
391-
Datadog.logger.error(
393+
logger.warn(
392394
"Ignoring invalid value for profiling overhead_target_percentage setting: " \
393395
"#{overhead_target_percentage.inspect}. Falling back to default value."
394396
)
@@ -432,10 +434,10 @@ def self.build_profiler_component(settings:, agent_settings:, optional_tracer:)
432434
settings.profiling.advanced.dir_interruption_workaround_enabled
433435
end
434436

435-
private_class_method def self.enable_gvl_profiling?(settings)
437+
private_class_method def self.enable_gvl_profiling?(settings, logger)
436438
if RUBY_VERSION < "3.2"
437439
if settings.profiling.advanced.preview_gvl_enabled
438-
Datadog.logger.warn("GVL profiling is currently not supported in Ruby < 3.2 and will not be enabled.")
440+
logger.warn("GVL profiling is currently not supported in Ruby < 3.2 and will not be enabled.")
439441
end
440442

441443
return false

0 commit comments

Comments
 (0)