diff --git a/benchmarks/profiling_sample_gvl.rb b/benchmarks/profiling_sample_gvl.rb index 4557bb5b4d3..d33251dade5 100644 --- a/benchmarks/profiling_sample_gvl.rb +++ b/benchmarks/profiling_sample_gvl.rb @@ -61,7 +61,7 @@ def run_benchmark x.report("gvl benchmark samples") do Datadog::Profiling::Collectors::ThreadContext::Testing._native_on_gvl_waiting(@target_thread) Datadog::Profiling::Collectors::ThreadContext::Testing._native_on_gvl_running(@target_thread) - Datadog::Profiling::Collectors::ThreadContext::Testing._native_sample_after_gvl_running(@collector, @target_thread) + Datadog::Profiling::Collectors::ThreadContext::Testing._native_sample_after_gvl_running(@collector, @target_thread, false) end x.save! "#{File.basename(__FILE__)}-results.json" unless VALIDATE_BENCHMARK_MODE diff --git a/ext/datadog_profiling_native_extension/clock_id_from_pthread.c b/ext/datadog_profiling_native_extension/clock_id_from_pthread.c index be033110f73..db7f2eb44bd 100644 --- a/ext/datadog_profiling_native_extension/clock_id_from_pthread.c +++ b/ext/datadog_profiling_native_extension/clock_id_from_pthread.c @@ -11,6 +11,7 @@ #include "clock_id.h" #include "helpers.h" #include "private_vm_api_access.h" +#include "ruby_helpers.h" #include "time_helpers.h" // Validate that our home-cooked pthread_id_for() matches pthread_self() for the current thread @@ -18,7 +19,7 @@ void self_test_clock_id(void) { rb_nativethread_id_t expected_pthread_id = pthread_self(); rb_nativethread_id_t actual_pthread_id = pthread_id_for(rb_thread_current()); - if (expected_pthread_id != actual_pthread_id) rb_raise(rb_eRuntimeError, "pthread_id_for() self-test failed"); + if (expected_pthread_id != actual_pthread_id) raise_error(rb_eRuntimeError, "pthread_id_for() self-test failed"); } // Safety: This function is assumed never to raise exceptions by callers diff --git a/ext/datadog_profiling_native_extension/collectors_cpu_and_wall_time_worker.c b/ext/datadog_profiling_native_extension/collectors_cpu_and_wall_time_worker.c index 022c4588705..2ac64ecafb6 100644 --- a/ext/datadog_profiling_native_extension/collectors_cpu_and_wall_time_worker.c +++ b/ext/datadog_profiling_native_extension/collectors_cpu_and_wall_time_worker.c @@ -289,7 +289,7 @@ void collectors_cpu_and_wall_time_worker_init(VALUE profiling_module) { after_gc_from_postponed_job_handle == POSTPONED_JOB_HANDLE_INVALID || after_gvl_running_from_postponed_job_handle == POSTPONED_JOB_HANDLE_INVALID ) { - rb_raise(rb_eRuntimeError, "Failed to register profiler postponed jobs (got POSTPONED_JOB_HANDLE_INVALID)"); + raise_error(rb_eRuntimeError, "Failed to register profiler postponed jobs (got POSTPONED_JOB_HANDLE_INVALID)"); } #else gc_finalize_deferred_workaround = objspace_ptr_for_gc_finalize_deferred_workaround(); @@ -472,10 +472,7 @@ static VALUE _native_sampling_loop(DDTRACE_UNUSED VALUE _self, VALUE instance) { cpu_and_wall_time_worker_state *old_state = active_sampler_instance_state; if (old_state != NULL) { if (is_thread_alive(old_state->owner_thread)) { - rb_raise( - rb_eRuntimeError, - "Could not start CpuAndWallTimeWorker: There's already another instance of CpuAndWallTimeWorker active in a different thread" - ); + raise_error(rb_eRuntimeError, "Could not start CpuAndWallTimeWorker: There's already another instance of CpuAndWallTimeWorker active in a different thread"); } else { // The previously active thread seems to have died without cleaning up after itself. // In this case, we can still go ahead and start the profiler BUT we make sure to disable any existing tracepoint @@ -821,7 +818,7 @@ static VALUE release_gvl_and_run_sampling_trigger_loop(VALUE instance) { NULL ); #else - rb_raise(rb_eArgError, "GVL profiling is not supported in this Ruby version"); + raise_error(rb_eArgError, "GVL profiling is not supported in this Ruby version"); #endif } diff --git a/ext/datadog_profiling_native_extension/collectors_discrete_dynamic_sampler.c b/ext/datadog_profiling_native_extension/collectors_discrete_dynamic_sampler.c index 931adf85cb5..26ad9611f38 100644 --- a/ext/datadog_profiling_native_extension/collectors_discrete_dynamic_sampler.c +++ b/ext/datadog_profiling_native_extension/collectors_discrete_dynamic_sampler.c @@ -51,7 +51,7 @@ void discrete_dynamic_sampler_reset(discrete_dynamic_sampler *sampler, long now_ void discrete_dynamic_sampler_set_overhead_target_percentage(discrete_dynamic_sampler *sampler, double target_overhead, long now_ns) { if (target_overhead <= 0 || target_overhead > 100) { - rb_raise(rb_eArgError, "Target overhead must be a double between ]0,100] was %f", target_overhead); + raise_error(rb_eArgError, "Target overhead must be a double between ]0,100] was %f", target_overhead); } sampler->target_overhead = target_overhead; return discrete_dynamic_sampler_reset(sampler, now_ns); @@ -369,7 +369,7 @@ static VALUE _native_new(VALUE klass) { long now_ns = monotonic_wall_time_now_ns(DO_NOT_RAISE_ON_FAILURE); if (now_ns == 0) { - rb_raise(rb_eRuntimeError, "failed to get clock time"); + raise_error(rb_eRuntimeError, "failed to get clock time"); } discrete_dynamic_sampler_init(&state->sampler, "test sampler", now_ns); diff --git a/ext/datadog_profiling_native_extension/collectors_gc_profiling_helper.c b/ext/datadog_profiling_native_extension/collectors_gc_profiling_helper.c index e9dd6c727d9..7afc62998ae 100644 --- a/ext/datadog_profiling_native_extension/collectors_gc_profiling_helper.c +++ b/ext/datadog_profiling_native_extension/collectors_gc_profiling_helper.c @@ -2,6 +2,7 @@ #include #include "collectors_gc_profiling_helper.h" +#include "ruby_helpers.h" // This helper is used by the Datadog::Profiling::Collectors::ThreadContext to profile garbage collection. // It's tested through that class' interfaces. @@ -71,7 +72,7 @@ uint8_t gc_profiling_set_metadata(ddog_prof_Label *labels, int labels_length) { 1; // gc type if (max_label_count > labels_length) { - rb_raise(rb_eArgError, "BUG: gc_profiling_set_metadata invalid labels_length (%d) < max_label_count (%d)", labels_length, max_label_count); + raise_error(rb_eArgError, "BUG: gc_profiling_set_metadata invalid labels_length (%d) < max_label_count (%d)", labels_length, max_label_count); } uint8_t label_pos = 0; @@ -119,7 +120,7 @@ uint8_t gc_profiling_set_metadata(ddog_prof_Label *labels, int labels_length) { }; if (label_pos > max_label_count) { - rb_raise(rb_eRuntimeError, "BUG: gc_profiling_set_metadata unexpected label_pos (%d) > max_label_count (%d)", label_pos, max_label_count); + raise_error(rb_eRuntimeError, "BUG: gc_profiling_set_metadata unexpected label_pos (%d) > max_label_count (%d)", label_pos, max_label_count); } return label_pos; diff --git a/ext/datadog_profiling_native_extension/collectors_stack.c b/ext/datadog_profiling_native_extension/collectors_stack.c index 94ca42cbdca..2df56796222 100644 --- a/ext/datadog_profiling_native_extension/collectors_stack.c +++ b/ext/datadog_profiling_native_extension/collectors_stack.c @@ -17,6 +17,7 @@ #include "datadog_ruby_common.h" #include "private_vm_api_access.h" +#include "ruby_helpers.h" #include "stack_recorder.h" #include "collectors_stack.h" @@ -284,11 +285,11 @@ void sample_thread( // here, but >= 0 makes this easier to understand/debug. bool only_wall_time = cpu_or_wall_sample && values.cpu_time_ns == 0 && values.wall_time_ns >= 0; - if (cpu_or_wall_sample && state_label == NULL) rb_raise(rb_eRuntimeError, "BUG: Unexpected missing state_label"); + if (cpu_or_wall_sample && state_label == NULL) raise_error(rb_eRuntimeError, "BUG: Unexpected missing state_label"); if (has_cpu_time) { state_label->str = DDOG_CHARSLICE_C("had cpu"); - if (labels.is_gvl_waiting_state) rb_raise(rb_eRuntimeError, "BUG: Unexpected combination of cpu-time with is_gvl_waiting"); + if (labels.is_gvl_waiting_state) raise_error(rb_eRuntimeError, "BUG: Unexpected combination of cpu-time with is_gvl_waiting"); } int top_of_stack_position = captured_frames - 1; @@ -600,8 +601,8 @@ bool prepare_sample_thread(VALUE thread, sampling_buffer *buffer) { } uint16_t sampling_buffer_check_max_frames(int max_frames) { - if (max_frames < 5) rb_raise(rb_eArgError, "Invalid max_frames: value must be >= 5"); - if (max_frames > MAX_FRAMES_LIMIT) rb_raise(rb_eArgError, "Invalid max_frames: value must be <= " MAX_FRAMES_LIMIT_AS_STRING); + if (max_frames < 5) raise_error(rb_eArgError, "Invalid max_frames: value must be >= 5"); + if (max_frames > MAX_FRAMES_LIMIT) raise_error(rb_eArgError, "Invalid max_frames: value must be <= " MAX_FRAMES_LIMIT_AS_STRING); return max_frames; } @@ -618,7 +619,7 @@ void sampling_buffer_initialize(sampling_buffer *buffer, uint16_t max_frames, dd void sampling_buffer_free(sampling_buffer *buffer) { if (buffer->max_frames == 0 || buffer->locations == NULL || buffer->stack_buffer == NULL) { - rb_raise(rb_eArgError, "sampling_buffer_free called with invalid buffer"); + raise_error(rb_eArgError, "sampling_buffer_free called with invalid buffer"); } ruby_xfree(buffer->stack_buffer); diff --git a/ext/datadog_profiling_native_extension/collectors_thread_context.c b/ext/datadog_profiling_native_extension/collectors_thread_context.c index f2cc76c6021..cf495e1f9dd 100644 --- a/ext/datadog_profiling_native_extension/collectors_thread_context.c +++ b/ext/datadog_profiling_native_extension/collectors_thread_context.c @@ -8,6 +8,7 @@ #include "helpers.h" #include "libdatadog_helpers.h" #include "private_vm_api_access.h" +#include "ruby_helpers.h" #include "stack_recorder.h" #include "time_helpers.h" #include "unsafe_api_calls_check.h" @@ -292,7 +293,7 @@ static bool handle_gvl_waiting( static VALUE _native_on_gvl_waiting(DDTRACE_UNUSED VALUE self, VALUE thread); static VALUE _native_gvl_waiting_at_for(DDTRACE_UNUSED VALUE self, VALUE thread); static VALUE _native_on_gvl_running(DDTRACE_UNUSED VALUE self, VALUE thread); -static VALUE _native_sample_after_gvl_running(DDTRACE_UNUSED VALUE self, VALUE collector_instance, VALUE thread); +static VALUE _native_sample_after_gvl_running(DDTRACE_UNUSED VALUE self, VALUE collector_instance, VALUE thread, VALUE allow_exception); static VALUE _native_apply_delta_to_cpu_time_at_previous_sample_ns(DDTRACE_UNUSED VALUE self, VALUE collector_instance, VALUE thread, VALUE delta_ns); static void otel_without_ddtrace_trace_identifiers_for( thread_context_collector_state *state, @@ -342,7 +343,7 @@ void collectors_thread_context_init(VALUE profiling_module) { rb_define_singleton_method(testing_module, "_native_on_gvl_waiting", _native_on_gvl_waiting, 1); rb_define_singleton_method(testing_module, "_native_gvl_waiting_at_for", _native_gvl_waiting_at_for, 1); rb_define_singleton_method(testing_module, "_native_on_gvl_running", _native_on_gvl_running, 1); - rb_define_singleton_method(testing_module, "_native_sample_after_gvl_running", _native_sample_after_gvl_running, 2); + rb_define_singleton_method(testing_module, "_native_sample_after_gvl_running", _native_sample_after_gvl_running, 3); rb_define_singleton_method(testing_module, "_native_apply_delta_to_cpu_time_at_previous_sample_ns", _native_apply_delta_to_cpu_time_at_previous_sample_ns, 3); #endif @@ -518,7 +519,7 @@ static VALUE _native_initialize(int argc, VALUE *argv, DDTRACE_UNUSED VALUE _sel } else if (otel_context_enabled == ID2SYM(rb_intern("both"))) { state->otel_context_enabled = OTEL_CONTEXT_ENABLED_BOTH; } else { - rb_raise(rb_eArgError, "Unexpected value for otel_context_enabled: %+" PRIsVALUE, otel_context_enabled); + raise_error(rb_eArgError, "Unexpected value for otel_context_enabled: %+" PRIsVALUE, otel_context_enabled); } global_waiting_for_gvl_threshold_ns = NUM2UINT(waiting_for_gvl_threshold_ns); @@ -539,7 +540,7 @@ static VALUE _native_initialize(int argc, VALUE *argv, DDTRACE_UNUSED VALUE _sel static VALUE _native_sample(DDTRACE_UNUSED VALUE _self, VALUE collector_instance, VALUE profiler_overhead_stack_thread, VALUE allow_exception) { ENFORCE_BOOLEAN(allow_exception); - if (!is_thread_alive(profiler_overhead_stack_thread)) rb_raise(rb_eArgError, "Unexpected: profiler_overhead_stack_thread is not alive"); + if (!is_thread_alive(profiler_overhead_stack_thread)) raise_error(rb_eArgError, "Unexpected: profiler_overhead_stack_thread is not alive"); if (allow_exception == Qfalse) debug_enter_unsafe_context(); @@ -831,7 +832,7 @@ VALUE thread_context_collector_sample_after_gc(VALUE self_instance) { TypedData_Get_Struct(self_instance, thread_context_collector_state, &thread_context_collector_typed_data, state); if (state->gc_tracking.wall_time_at_previous_gc_ns == INVALID_TIME) { - rb_raise(rb_eRuntimeError, "BUG: Unexpected call to sample_after_gc without valid GC information available"); + raise_error(rb_eRuntimeError, "BUG: Unexpected call to sample_after_gc without valid GC information available"); } int max_labels_needed_for_gc = 7; // Magic number gets validated inside gc_profiling_set_metadata @@ -998,7 +999,7 @@ static void trigger_sample_for_thread( // @ivoanjo: I wonder if C compilers are smart enough to statically prove this check never triggers unless someone // changes the code erroneously and remove it entirely? if (label_pos > max_label_count) { - rb_raise(rb_eRuntimeError, "BUG: Unexpected label_pos (%d) > max_label_count (%d)", label_pos, max_label_count); + raise_error(rb_eRuntimeError, "BUG: Unexpected label_pos (%d) > max_label_count (%d)", label_pos, max_label_count); } ddog_prof_Slice_Label slice_labels = {.ptr = labels, .len = label_pos}; @@ -1295,7 +1296,7 @@ static long update_time_since_previous_sample(long *time_at_previous_sample_ns, elapsed_time_ns = 0; } else { // We don't expect non-wall time to go backwards, so let's flag this as a bug - rb_raise(rb_eRuntimeError, "BUG: Unexpected negative elapsed_time_ns between samples"); + raise_error(rb_eRuntimeError, "BUG: Unexpected negative elapsed_time_ns between samples"); } } @@ -1961,7 +1962,7 @@ static uint64_t otel_span_id_to_uint(VALUE otel_span_id) { thread_context_collector_state *state; TypedData_Get_Struct(self_instance, thread_context_collector_state, &thread_context_collector_typed_data, state); - if (!state->timeline_enabled) rb_raise(rb_eRuntimeError, "GVL profiling requires timeline to be enabled"); + if (!state->timeline_enabled) raise_error(rb_eRuntimeError, "GVL profiling requires timeline to be enabled"); intptr_t gvl_waiting_at = gvl_profiling_state_thread_object_get(current_thread); @@ -2131,10 +2132,13 @@ static uint64_t otel_span_id_to_uint(VALUE otel_span_id) { return result; } - static VALUE _native_sample_after_gvl_running(DDTRACE_UNUSED VALUE self, VALUE collector_instance, VALUE thread) { + static VALUE _native_sample_after_gvl_running(DDTRACE_UNUSED VALUE self, VALUE collector_instance, VALUE thread, VALUE allow_exception) { ENFORCE_THREAD(thread); + ENFORCE_BOOLEAN(allow_exception); + - debug_enter_unsafe_context(); + + if (allow_exception == Qfalse) debug_enter_unsafe_context(); VALUE result = thread_context_collector_sample_after_gvl_running( collector_instance, @@ -2142,7 +2146,7 @@ static uint64_t otel_span_id_to_uint(VALUE otel_span_id) { monotonic_wall_time_now_ns(RAISE_ON_FAILURE) ); - debug_leave_unsafe_context(); + if (allow_exception == Qfalse) debug_leave_unsafe_context(); return result; } @@ -2154,7 +2158,7 @@ static uint64_t otel_span_id_to_uint(VALUE otel_span_id) { TypedData_Get_Struct(collector_instance, thread_context_collector_state, &thread_context_collector_typed_data, state); per_thread_context *thread_context = get_context_for(thread, state); - if (thread_context == NULL) rb_raise(rb_eArgError, "Unexpected: This method cannot be used unless the per-thread context for the thread already exists"); + if (thread_context == NULL) raise_error(rb_eArgError, "Unexpected: This method cannot be used unless the per-thread context for the thread already exists"); thread_context->cpu_time_at_previous_sample_ns += NUM2LONG(delta_ns); diff --git a/ext/datadog_profiling_native_extension/datadog_ruby_common.c b/ext/datadog_profiling_native_extension/datadog_ruby_common.c index ca402dca264..1327454e84f 100644 --- a/ext/datadog_profiling_native_extension/datadog_ruby_common.c +++ b/ext/datadog_profiling_native_extension/datadog_ruby_common.c @@ -1,8 +1,11 @@ #include "datadog_ruby_common.h" +#include // IMPORTANT: Currently this file is copy-pasted between extensions. Make sure to update all versions when doing any change! -void raise_unexpected_type(VALUE value, const char *value_name, const char *type_name, const char *file, int line, const char* function_name) { + + +void raise_unexpected_type(VALUE value, const char *value_name, const char *type_name, const char *file, int line, const char *function_name) { rb_exc_raise( rb_exc_new_str( rb_eTypeError, @@ -18,6 +21,14 @@ void raise_unexpected_type(VALUE value, const char *value_name, const char *type ); } +void raise_error(VALUE error_class, const char *fmt, ...) { + va_list args; + va_start(args, fmt); + VALUE message = rb_vsprintf(fmt, args); + va_end(args); + rb_raise(error_class, "%"PRIsVALUE, message); +} + VALUE datadog_gem_version(void) { VALUE ddtrace_module = rb_const_get(rb_cObject, rb_intern("Datadog")); ENFORCE_TYPE(ddtrace_module, T_MODULE); @@ -78,3 +89,8 @@ ddog_Vec_Tag convert_tags(VALUE tags_as_array) { return tags; } + +void datadog_ruby_common_init(VALUE datadog_module) { + // No longer needed - using Ruby's built-in exception classes + (void)datadog_module; +} diff --git a/ext/datadog_profiling_native_extension/datadog_ruby_common.h b/ext/datadog_profiling_native_extension/datadog_ruby_common.h index d55c2bb479f..b5f86ae7584 100644 --- a/ext/datadog_profiling_native_extension/datadog_ruby_common.h +++ b/ext/datadog_profiling_native_extension/datadog_ruby_common.h @@ -5,6 +5,9 @@ #include #include +// Must be called once during initialization +void datadog_ruby_common_init(VALUE datadog_module); + // Used to mark symbols to be exported to the outside of the extension. // Consider very carefully before tagging a function with this. #define DDTRACE_EXPORT __attribute__ ((visibility ("default"))) @@ -32,6 +35,11 @@ NORETURN(void raise_unexpected_type(VALUE value, const char *value_name, const char *type_name, const char *file, int line, const char* function_name)); +// Helper to raise errors with formatted messages +NORETURN(void raise_error(VALUE error_class, const char *fmt, ...)) __attribute__ ((format (gnu_printf, 2, 3))); + + + // Helper to retrieve Datadog::VERSION::STRING VALUE datadog_gem_version(void); diff --git a/ext/datadog_profiling_native_extension/encoded_profile.c b/ext/datadog_profiling_native_extension/encoded_profile.c index cc28b9e40c5..1475ca82a95 100644 --- a/ext/datadog_profiling_native_extension/encoded_profile.c +++ b/ext/datadog_profiling_native_extension/encoded_profile.c @@ -1,6 +1,7 @@ #include "encoded_profile.h" #include "datadog_ruby_common.h" #include "libdatadog_helpers.h" +#include "ruby_helpers.h" // This class exists to wrap a ddog_prof_EncodedProfile into a Ruby object // This file implements the native bits of the Datadog::Profiling::EncodedProfile class @@ -41,7 +42,7 @@ VALUE from_ddog_prof_EncodedProfile(ddog_prof_EncodedProfile profile) { static ddog_ByteSlice get_bytes(ddog_prof_EncodedProfile *state) { ddog_prof_Result_ByteSlice raw_bytes = ddog_prof_EncodedProfile_bytes(state); if (raw_bytes.tag == DDOG_PROF_RESULT_BYTE_SLICE_ERR_BYTE_SLICE) { - rb_raise(rb_eRuntimeError, "Failed to get bytes from profile: %"PRIsVALUE, get_error_details_and_drop(&raw_bytes.err)); + raise_error(rb_eRuntimeError, "Failed to get bytes from profile: %"PRIsVALUE, get_error_details_and_drop(&raw_bytes.err)); } return raw_bytes.ok; } diff --git a/ext/datadog_profiling_native_extension/heap_recorder.c b/ext/datadog_profiling_native_extension/heap_recorder.c index f869d24c6d3..47030463073 100644 --- a/ext/datadog_profiling_native_extension/heap_recorder.c +++ b/ext/datadog_profiling_native_extension/heap_recorder.c @@ -259,7 +259,7 @@ void heap_recorder_set_sample_rate(heap_recorder *heap_recorder, int sample_rate } if (sample_rate <= 0) { - rb_raise(rb_eArgError, "Heap sample rate must be a positive integer value but was %d", sample_rate); + raise_error(rb_eArgError, "Heap sample rate must be a positive integer value but was %d", sample_rate); } heap_recorder->sample_rate = sample_rate; @@ -300,7 +300,7 @@ void start_heap_allocation_recording(heap_recorder *heap_recorder, VALUE new_obj } if (heap_recorder->active_recording != NULL) { - rb_raise(rb_eRuntimeError, "Detected consecutive heap allocation recording starts without end."); + raise_error(rb_eRuntimeError, "Detected consecutive heap allocation recording starts without end."); } if (++heap_recorder->num_recordings_skipped < heap_recorder->sample_rate || @@ -323,7 +323,7 @@ void start_heap_allocation_recording(heap_recorder *heap_recorder, VALUE new_obj VALUE ruby_obj_id = rb_obj_id(new_obj); if (!FIXNUM_P(ruby_obj_id)) { - rb_raise(rb_eRuntimeError, "Detected a bignum object id. These are not supported by heap profiling."); + raise_error(rb_eRuntimeError, "Detected a bignum object id. These are not supported by heap profiling."); } heap_recorder->active_recording = object_record_new( @@ -371,7 +371,7 @@ static VALUE end_heap_allocation_recording(VALUE protect_args) { if (active_recording == NULL) { // Recording ended without having been started? - rb_raise(rb_eRuntimeError, "Ended a heap recording that was not started"); + raise_error(rb_eRuntimeError, "Ended a heap recording that was not started"); } // From now on, mark the global active recording as invalid so we can short-circuit at any point // and not end up with a still active recording. the local active_recording still holds the @@ -487,14 +487,14 @@ void heap_recorder_prepare_iteration(heap_recorder *heap_recorder) { if (heap_recorder->object_records_snapshot != NULL) { // we could trivially handle this but we raise to highlight and catch unexpected usages. - rb_raise(rb_eRuntimeError, "New heap recorder iteration prepared without the previous one having been finished."); + raise_error(rb_eRuntimeError, "New heap recorder iteration prepared without the previous one having been finished."); } heap_recorder_update(heap_recorder, /* full_update: */ true); heap_recorder->object_records_snapshot = st_copy(heap_recorder->object_records); if (heap_recorder->object_records_snapshot == NULL) { - rb_raise(rb_eRuntimeError, "Failed to create heap snapshot."); + raise_error(rb_eRuntimeError, "Failed to create heap snapshot."); } } @@ -505,7 +505,7 @@ void heap_recorder_finish_iteration(heap_recorder *heap_recorder) { if (heap_recorder->object_records_snapshot == NULL) { // we could trivially handle this but we raise to highlight and catch unexpected usages. - rb_raise(rb_eRuntimeError, "Heap recorder iteration finished without having been prepared."); + raise_error(rb_eRuntimeError, "Heap recorder iteration finished without having been prepared."); } st_free_table(heap_recorder->object_records_snapshot); @@ -581,7 +581,7 @@ typedef struct { VALUE heap_recorder_testonly_debug(heap_recorder *heap_recorder) { if (heap_recorder == NULL) { - rb_raise(rb_eArgError, "heap_recorder is NULL"); + raise_error(rb_eArgError, "heap_recorder is NULL"); } VALUE debug_str = rb_str_new2("object records:\n"); @@ -733,7 +733,7 @@ static void commit_recording(heap_recorder *heap_recorder, heap_record *heap_rec // needed to fully build the object_record. active_recording->heap_record = heap_record; if (heap_record->num_tracked_objects == UINT32_MAX) { - rb_raise(rb_eRuntimeError, "Reached maximum number of tracked objects for heap record"); + raise_error(rb_eRuntimeError, "Reached maximum number of tracked objects for heap record"); } heap_record->num_tracked_objects++; @@ -741,11 +741,11 @@ static void commit_recording(heap_recorder *heap_recorder, heap_record *heap_rec if (existing_error) { object_record *existing_record = NULL; st_lookup(heap_recorder->object_records, active_recording->obj_id, (st_data_t *) &existing_record); - if (existing_record == NULL) rb_raise(rb_eRuntimeError, "Unexpected NULL when reading existing record"); + if (existing_record == NULL) raise_error(rb_eRuntimeError, "Unexpected NULL when reading existing record"); VALUE existing_inspect = object_record_inspect(heap_recorder, existing_record); VALUE new_inspect = object_record_inspect(heap_recorder, active_recording); - rb_raise(rb_eRuntimeError, "Object ids are supposed to be unique. We got 2 allocation recordings with " + raise_error(rb_eRuntimeError, "Object ids are supposed to be unique. We got 2 allocation recordings with " "the same id. previous={%"PRIsVALUE"} new={%"PRIsVALUE"}", existing_inspect, new_inspect); } } @@ -781,7 +781,7 @@ static void cleanup_heap_record_if_unused(heap_recorder *heap_recorder, heap_rec } if (!st_delete(heap_recorder->heap_records, (st_data_t*) &heap_record, NULL)) { - rb_raise(rb_eRuntimeError, "Attempted to cleanup an untracked heap_record"); + raise_error(rb_eRuntimeError, "Attempted to cleanup an untracked heap_record"); }; heap_record_free(heap_recorder, heap_record); } @@ -791,14 +791,14 @@ static void on_committed_object_record_cleanup(heap_recorder *heap_recorder, obj // (See PROF-10656 Datadog-internal for details). Just in case, I've sprinkled a bunch of NULL tests in this function for now. // Once we figure out the issue we can get rid of them again. - if (heap_recorder == NULL) rb_raise(rb_eRuntimeError, "heap_recorder was NULL in on_committed_object_record_cleanup"); - if (heap_recorder->heap_records == NULL) rb_raise(rb_eRuntimeError, "heap_recorder->heap_records was NULL in on_committed_object_record_cleanup"); - if (record == NULL) rb_raise(rb_eRuntimeError, "record was NULL in on_committed_object_record_cleanup"); + if (heap_recorder == NULL) raise_error(rb_eRuntimeError, "heap_recorder was NULL in on_committed_object_record_cleanup"); + if (heap_recorder->heap_records == NULL) raise_error(rb_eRuntimeError, "heap_recorder->heap_records was NULL in on_committed_object_record_cleanup"); + if (record == NULL) raise_error(rb_eRuntimeError, "record was NULL in on_committed_object_record_cleanup"); // Starting with the associated heap record. There will now be one less tracked object pointing to it heap_record *heap_record = record->heap_record; - if (heap_record == NULL) rb_raise(rb_eRuntimeError, "heap_record was NULL in on_committed_object_record_cleanup"); + if (heap_record == NULL) raise_error(rb_eRuntimeError, "heap_record was NULL in on_committed_object_record_cleanup"); heap_record->num_tracked_objects--; @@ -862,7 +862,7 @@ heap_record* heap_record_new(heap_recorder *recorder, ddog_prof_Slice_Location l uint16_t frames_len = locations.len; if (frames_len > MAX_FRAMES_LIMIT) { // This is not expected as MAX_FRAMES_LIMIT is shared with the stacktrace construction mechanism - rb_raise(rb_eRuntimeError, "Found stack with more than %d frames (%d)", MAX_FRAMES_LIMIT, frames_len); + raise_error(rb_eRuntimeError, "Found stack with more than %d frames (%d)", MAX_FRAMES_LIMIT, frames_len); } heap_record *stack = calloc(1, sizeof(heap_record) + frames_len * sizeof(heap_frame)); // See "note on calloc vs ruby_xcalloc use" above stack->num_tracked_objects = 0; @@ -933,21 +933,21 @@ static void unintern_or_raise(heap_recorder *recorder, ddog_prof_ManagedStringId ddog_prof_MaybeError result = ddog_prof_ManagedStringStorage_unintern(recorder->string_storage, id); if (result.tag == DDOG_PROF_OPTION_ERROR_SOME_ERROR) { - rb_raise(rb_eRuntimeError, "Failed to unintern id: %"PRIsVALUE, get_error_details_and_drop(&result.some)); + raise_error(rb_eRuntimeError, "Failed to unintern id: %"PRIsVALUE, get_error_details_and_drop(&result.some)); } } static void unintern_all_or_raise(heap_recorder *recorder, ddog_prof_Slice_ManagedStringId ids) { ddog_prof_MaybeError result = ddog_prof_ManagedStringStorage_unintern_all(recorder->string_storage, ids); if (result.tag == DDOG_PROF_OPTION_ERROR_SOME_ERROR) { - rb_raise(rb_eRuntimeError, "Failed to unintern_all: %"PRIsVALUE, get_error_details_and_drop(&result.some)); + raise_error(rb_eRuntimeError, "Failed to unintern_all: %"PRIsVALUE, get_error_details_and_drop(&result.some)); } } static VALUE get_ruby_string_or_raise(heap_recorder *recorder, ddog_prof_ManagedStringId id) { ddog_StringWrapperResult get_string_result = ddog_prof_ManagedStringStorage_get_string(recorder->string_storage, id); if (get_string_result.tag == DDOG_STRING_WRAPPER_RESULT_ERR) { - rb_raise(rb_eRuntimeError, "Failed to get string: %"PRIsVALUE, get_error_details_and_drop(&get_string_result.err)); + raise_error(rb_eRuntimeError, "Failed to get string: %"PRIsVALUE, get_error_details_and_drop(&get_string_result.err)); } VALUE ruby_string = ruby_string_from_vec_u8(get_string_result.ok.message); ddog_StringWrapper_drop((ddog_StringWrapper *) &get_string_result.ok); @@ -962,7 +962,7 @@ static inline double ewma_stat(double previous, double current) { VALUE heap_recorder_testonly_is_object_recorded(heap_recorder *heap_recorder, VALUE obj_id) { if (heap_recorder == NULL) { - rb_raise(rb_eArgError, "heap_recorder is NULL"); + raise_error(rb_eArgError, "heap_recorder is NULL"); } // Check if object records contains an object with this object_id @@ -971,15 +971,15 @@ VALUE heap_recorder_testonly_is_object_recorded(heap_recorder *heap_recorder, VA void heap_recorder_testonly_reset_last_update(heap_recorder *heap_recorder) { if (heap_recorder == NULL) { - rb_raise(rb_eArgError, "heap_recorder is NULL"); + raise_error(rb_eArgError, "heap_recorder is NULL"); } heap_recorder->last_update_ns = 0; } void heap_recorder_testonly_benchmark_intern(heap_recorder *heap_recorder, ddog_CharSlice string, int times, bool use_all) { - if (heap_recorder == NULL) rb_raise(rb_eArgError, "heap profiling must be enabled"); - if (times > REUSABLE_FRAME_DETAILS_SIZE) rb_raise(rb_eArgError, "times cannot be > than REUSABLE_FRAME_DETAILS_SIZE"); + if (heap_recorder == NULL) raise_error(rb_eArgError, "heap profiling must be enabled"); + if (times > REUSABLE_FRAME_DETAILS_SIZE) raise_error(rb_eArgError, "times cannot be > than REUSABLE_FRAME_DETAILS_SIZE"); if (use_all) { ddog_CharSlice *strings = heap_recorder->reusable_char_slices; diff --git a/ext/datadog_profiling_native_extension/http_transport.c b/ext/datadog_profiling_native_extension/http_transport.c index 52e4db67c5d..28586c67b3d 100644 --- a/ext/datadog_profiling_native_extension/http_transport.c +++ b/ext/datadog_profiling_native_extension/http_transport.c @@ -85,7 +85,7 @@ static ddog_prof_Endpoint endpoint_from(VALUE exporter_configuration) { return ddog_prof_Endpoint_agent(char_slice_from_ruby_string(base_url)); } else { - rb_raise(rb_eArgError, "Failed to initialize transport: Unexpected working mode, expected :agentless or :agent"); + raise_error(rb_eArgError, "Failed to initialize transport: Unexpected working mode, expected :agentless or :agent"); } } @@ -112,7 +112,7 @@ static ddog_prof_ProfileExporter_Result create_exporter(VALUE exporter_configura } static void validate_token(ddog_CancellationToken token, const char *file, int line) { - if (token.inner == NULL) rb_raise(rb_eRuntimeError, "Unexpected: Validation token was empty at %s:%d", file, line); + if (token.inner == NULL) raise_error(rb_eRuntimeError, "Unexpected: Validation token was empty at %s:%d", file, line); } static VALUE handle_exporter_failure(ddog_prof_ProfileExporter_Result exporter_result) { diff --git a/ext/datadog_profiling_native_extension/libdatadog_helpers.c b/ext/datadog_profiling_native_extension/libdatadog_helpers.c index 16185b8080b..ac5f5c5cd5e 100644 --- a/ext/datadog_profiling_native_extension/libdatadog_helpers.c +++ b/ext/datadog_profiling_native_extension/libdatadog_helpers.c @@ -1,6 +1,7 @@ #include "libdatadog_helpers.h" #include +#include "ruby_helpers.h" const char *ruby_value_type_to_string(enum ruby_value_type type) { return ruby_value_type_to_char_slice(type).ptr; @@ -66,7 +67,7 @@ ddog_prof_ManagedStringId intern_or_raise(ddog_prof_ManagedStringStorage string_ ddog_prof_ManagedStringStorageInternResult intern_result = ddog_prof_ManagedStringStorage_intern(string_storage, string); if (intern_result.tag == DDOG_PROF_MANAGED_STRING_STORAGE_INTERN_RESULT_ERR) { - rb_raise(rb_eRuntimeError, "Failed to intern string: %"PRIsVALUE, get_error_details_and_drop(&intern_result.err)); + raise_error(rb_eRuntimeError, "Failed to intern string: %"PRIsVALUE, get_error_details_and_drop(&intern_result.err)); } return intern_result.ok; } @@ -79,6 +80,6 @@ void intern_all_or_raise( ) { ddog_prof_MaybeError result = ddog_prof_ManagedStringStorage_intern_all(string_storage, strings, output_ids, output_ids_size); if (result.tag == DDOG_PROF_OPTION_ERROR_SOME_ERROR) { - rb_raise(rb_eRuntimeError, "Failed to intern_all: %"PRIsVALUE, get_error_details_and_drop(&result.some)); + raise_error(rb_eRuntimeError, "Failed to intern_all: %"PRIsVALUE, get_error_details_and_drop(&result.some)); } } diff --git a/ext/datadog_profiling_native_extension/profiling.c b/ext/datadog_profiling_native_extension/profiling.c index bcf97709c14..c1527badc6c 100644 --- a/ext/datadog_profiling_native_extension/profiling.c +++ b/ext/datadog_profiling_native_extension/profiling.c @@ -8,6 +8,7 @@ #include "clock_id.h" #include "helpers.h" #include "private_vm_api_access.h" +#include "datadog_ruby_common.h" #include "ruby_helpers.h" #include "setup_signal_handler.h" #include "time_helpers.h" @@ -49,6 +50,10 @@ void DDTRACE_EXPORT Init_datadog_profiling_native_extension(void) { #endif VALUE datadog_module = rb_define_module("Datadog"); + + // MUST be called before all other initialization + datadog_ruby_common_init(datadog_module); + VALUE profiling_module = rb_define_module_under(datadog_module, "Profiling"); VALUE native_extension_module = rb_define_module_under(profiling_module, "NativeExtension"); @@ -97,7 +102,7 @@ static VALUE native_working_p(DDTRACE_UNUSED VALUE _self) { typedef struct { VALUE exception_class; char *test_message; - int test_message_arg; + char *test_message_arg; } trigger_grab_gvl_and_raise_arguments; static VALUE _native_grab_gvl_and_raise(DDTRACE_UNUSED VALUE _self, VALUE exception_class, VALUE test_message, VALUE test_message_arg, VALUE release_gvl) { @@ -107,24 +112,24 @@ static VALUE _native_grab_gvl_and_raise(DDTRACE_UNUSED VALUE _self, VALUE except args.exception_class = exception_class; args.test_message = StringValueCStr(test_message); - args.test_message_arg = test_message_arg != Qnil ? NUM2INT(test_message_arg) : -1; + args.test_message_arg = test_message_arg != Qnil ? StringValueCStr(test_message_arg) : NULL; if (RTEST(release_gvl)) { rb_thread_call_without_gvl(trigger_grab_gvl_and_raise, &args, NULL, NULL); } else { - grab_gvl_and_raise(args.exception_class, "%s", args.test_message); + private_grab_gvl_and_raise(args.exception_class, 0, args.test_message, args.test_message_arg); } - rb_raise(rb_eRuntimeError, "Failed to raise exception in _native_grab_gvl_and_raise; this should never happen"); + raise_error(rb_eRuntimeError, "Failed to raise exception in _native_grab_gvl_and_raise; this should never happen"); } static void *trigger_grab_gvl_and_raise(void *trigger_args) { trigger_grab_gvl_and_raise_arguments *args = (trigger_grab_gvl_and_raise_arguments *) trigger_args; - if (args->test_message_arg >= 0) { - grab_gvl_and_raise(args->exception_class, "%s%d", args->test_message, args->test_message_arg); + if (args->test_message_arg != NULL) { + private_grab_gvl_and_raise(args->exception_class, 0, args->test_message, args->test_message_arg); } else { - grab_gvl_and_raise(args->exception_class, "%s", args->test_message); + private_grab_gvl_and_raise(args->exception_class, 0, args->test_message); } return NULL; @@ -133,7 +138,7 @@ static void *trigger_grab_gvl_and_raise(void *trigger_args) { typedef struct { int syserr_errno; char *test_message; - int test_message_arg; + char *test_message_arg; } trigger_grab_gvl_and_raise_syserr_arguments; static VALUE _native_grab_gvl_and_raise_syserr(DDTRACE_UNUSED VALUE _self, VALUE syserr_errno, VALUE test_message, VALUE test_message_arg, VALUE release_gvl) { @@ -143,24 +148,24 @@ static VALUE _native_grab_gvl_and_raise_syserr(DDTRACE_UNUSED VALUE _self, VALUE args.syserr_errno = NUM2INT(syserr_errno); args.test_message = StringValueCStr(test_message); - args.test_message_arg = test_message_arg != Qnil ? NUM2INT(test_message_arg) : -1; + args.test_message_arg = test_message_arg != Qnil ? StringValueCStr(test_message_arg) : NULL; if (RTEST(release_gvl)) { rb_thread_call_without_gvl(trigger_grab_gvl_and_raise_syserr, &args, NULL, NULL); } else { - grab_gvl_and_raise_syserr(args.syserr_errno, "%s", args.test_message); + private_grab_gvl_and_raise(Qnil, args.syserr_errno, args.test_message, args.test_message_arg); } - rb_raise(rb_eRuntimeError, "Failed to raise exception in _native_grab_gvl_and_raise_syserr; this should never happen"); + raise_error(rb_eRuntimeError, "Failed to raise exception in _native_grab_gvl_and_raise_syserr; this should never happen"); } static void *trigger_grab_gvl_and_raise_syserr(void *trigger_args) { trigger_grab_gvl_and_raise_syserr_arguments *args = (trigger_grab_gvl_and_raise_syserr_arguments *) trigger_args; - if (args->test_message_arg >= 0) { - grab_gvl_and_raise_syserr(args->syserr_errno, "%s%d", args->test_message, args->test_message_arg); + if (args->test_message_arg != NULL) { + private_grab_gvl_and_raise(Qnil, args->syserr_errno, args->test_message, args->test_message_arg); } else { - grab_gvl_and_raise_syserr(args->syserr_errno, "%s", args->test_message); + private_grab_gvl_and_raise(Qnil, args->syserr_errno, args->test_message); } return NULL; @@ -246,7 +251,7 @@ static VALUE _native_trigger_holding_the_gvl_signal_handler_on(DDTRACE_UNUSED VA replace_sigprof_signal_handler_with_empty_handler(holding_the_gvl_signal_handler); - if (holding_the_gvl_signal_handler_result[0] == Qfalse) rb_raise(rb_eRuntimeError, "Could not signal background_thread"); + if (holding_the_gvl_signal_handler_result[0] == Qfalse) raise_error(rb_eRuntimeError, "Could not signal background_thread"); VALUE result = rb_hash_new(); rb_hash_aset(result, ID2SYM(rb_intern("ruby_thread_has_gvl_p")), holding_the_gvl_signal_handler_result[1]); diff --git a/ext/datadog_profiling_native_extension/ruby_helpers.c b/ext/datadog_profiling_native_extension/ruby_helpers.c index 61b9db1af4f..b411564050e 100644 --- a/ext/datadog_profiling_native_extension/ruby_helpers.c +++ b/ext/datadog_profiling_native_extension/ruby_helpers.c @@ -12,6 +12,8 @@ static ID _id2ref_id = Qnil; static ID inspect_id = Qnil; static ID to_s_id = Qnil; +static ID telemetry_message_id = Qnil; + void ruby_helpers_init(void) { rb_global_variable(&module_object_space); @@ -19,76 +21,121 @@ void ruby_helpers_init(void) { _id2ref_id = rb_intern("_id2ref"); inspect_id = rb_intern("inspect"); to_s_id = rb_intern("to_s"); + + telemetry_message_id = rb_intern("@telemetry_message"); } #define MAX_RAISE_MESSAGE_SIZE 256 -typedef struct { - VALUE exception_class; - char exception_message[MAX_RAISE_MESSAGE_SIZE]; -} raise_args; +#define FORMAT_VA_ERROR_MESSAGE(buf, fmt) \ + char buf[MAX_RAISE_MESSAGE_SIZE]; \ + va_list buf##_args; \ + va_start(buf##_args, fmt); \ + vsnprintf(buf, MAX_RAISE_MESSAGE_SIZE, fmt, buf##_args); \ + va_end(buf##_args); -static void *trigger_raise(void *raise_arguments) { - raise_args *args = (raise_args *) raise_arguments; - rb_raise(args->exception_class, "%s", args->exception_message); -} -void grab_gvl_and_raise(VALUE exception_class, const char *format_string, ...) { - raise_args args; - args.exception_class = exception_class; +// Raises an exception with separate telemetry-safe and detailed messages. +// Make sure to *not* invoke Ruby code as this function can run in unsafe contexts. +// NOTE: Raising the exception acquires the GVL (unsafe), but it also aborts the current execution flow. +// @see debug_enter_unsafe_context +static NORETURN(void private_raise_exception(VALUE exception, const char *static_message)) { + rb_ivar_set(exception, telemetry_message_id, rb_str_new_cstr(static_message)); + rb_exc_raise(exception); +} - va_list format_string_arguments; - va_start(format_string_arguments, format_string); - vsnprintf(args.exception_message, MAX_RAISE_MESSAGE_SIZE, format_string, format_string_arguments); +// Internal helper for raising pre-formatted exceptions +static NORETURN(void private_raise_error_formatted(VALUE exception_class, const char *detailed_message, const char *static_message)) { + VALUE exception = rb_exc_new_cstr(exception_class, detailed_message); + private_raise_exception(exception, static_message); +} - if (is_current_thread_holding_the_gvl()) { - rb_raise( - rb_eRuntimeError, - "grab_gvl_and_raise called by thread holding the global VM lock. exception_message: '%s'", - args.exception_message - ); - } +// Internal helper for raising pre-formatted syserr exceptions +static NORETURN(void private_raise_syserr_formatted(int syserr_errno, const char *detailed_message, const char *static_message)) { + VALUE exception = rb_syserr_new(syserr_errno, detailed_message); + private_raise_exception(exception, static_message); +} - rb_thread_call_with_gvl(trigger_raise, &args); +// Use `raise_error` the macro instead, as it provides additional argument checks. +void private_raise_error(VALUE exception_class, const char *fmt, ...) { + FORMAT_VA_ERROR_MESSAGE(detailed_message, fmt); + private_raise_error_formatted(exception_class, detailed_message, fmt); +} - rb_bug("[ddtrace] Unexpected: Reached the end of grab_gvl_and_raise while raising '%s'\n", args.exception_message); +// Use `raise_syserr` the macro instead, as it provides additional argument checks. +void private_raise_syserr(int syserr_errno, const char *fmt, ...) { + FORMAT_VA_ERROR_MESSAGE(detailed_message, fmt); + private_raise_syserr_formatted(syserr_errno, detailed_message, fmt); } typedef struct { + VALUE exception_class; int syserr_errno; char exception_message[MAX_RAISE_MESSAGE_SIZE]; -} syserr_raise_args; + char telemetry_message[MAX_RAISE_MESSAGE_SIZE]; +} raise_args; + +static void *trigger_raise(void *raise_arguments) { + raise_args *args = (raise_args *) raise_arguments; + + if (args->syserr_errno) { + private_raise_syserr_formatted( + args->syserr_errno, + args->exception_message, + args->telemetry_message + ); + } else { + private_raise_error_formatted( + args->exception_class, + args->exception_message, + args->telemetry_message + ); + } -static void *trigger_syserr_raise(void *syserr_raise_arguments) { - syserr_raise_args *args = (syserr_raise_args *) syserr_raise_arguments; - rb_syserr_fail(args->syserr_errno, args->exception_message); + return NULL; } -void grab_gvl_and_raise_syserr(int syserr_errno, const char *format_string, ...) { - syserr_raise_args args; +void private_grab_gvl_and_raise(VALUE exception_class, int syserr_errno, const char *format_string, ...) { + raise_args args; - args.syserr_errno = syserr_errno; + if (syserr_errno != 0) { + args.exception_class = Qnil; + args.syserr_errno = syserr_errno; + } else { + args.exception_class = exception_class; + args.syserr_errno = 0; + } - va_list format_string_arguments; - va_start(format_string_arguments, format_string); - vsnprintf(args.exception_message, MAX_RAISE_MESSAGE_SIZE, format_string, format_string_arguments); + FORMAT_VA_ERROR_MESSAGE(formatted_exception_message, format_string); + snprintf(args.exception_message, MAX_RAISE_MESSAGE_SIZE, "%s", formatted_exception_message); + snprintf(args.telemetry_message, MAX_RAISE_MESSAGE_SIZE, "%s", format_string); if (is_current_thread_holding_the_gvl()) { - rb_raise( - rb_eRuntimeError, - "grab_gvl_and_raise_syserr called by thread holding the global VM lock. syserr_errno: %d, exception_message: '%s'", - syserr_errno, + char telemetry_message[MAX_RAISE_MESSAGE_SIZE]; + snprintf( + telemetry_message, + MAX_RAISE_MESSAGE_SIZE, + "grab_gvl_and_raise called by thread holding the global VM lock: %s", + format_string + ); + char exception_message[MAX_RAISE_MESSAGE_SIZE]; + snprintf( + exception_message, + MAX_RAISE_MESSAGE_SIZE, + "grab_gvl_and_raise called by thread holding the global VM lock: %s", args.exception_message ); + VALUE exception = rb_exc_new_cstr(rb_eRuntimeError, exception_message); + private_raise_exception(exception, telemetry_message); } - rb_thread_call_with_gvl(trigger_syserr_raise, &args); + rb_thread_call_with_gvl(trigger_raise, &args); - rb_bug("[ddtrace] Unexpected: Reached the end of grab_gvl_and_raise_syserr while raising '%s'\n", args.exception_message); + rb_bug("[ddtrace] Unexpected: Reached the end of grab_gvl_and_raise while raising '%s'\n", args.exception_message); } -void raise_syserr( +void raise_enforce_syserr( int syserr_errno, bool have_gvl, const char *expression, @@ -99,7 +146,7 @@ void raise_syserr( if (have_gvl) { rb_exc_raise(rb_syserr_new_str(syserr_errno, rb_sprintf("Failure returned by '%s' at %s:%d:in `%s'", expression, file, line, function_name))); } else { - grab_gvl_and_raise_syserr(syserr_errno, "Failure returned by '%s' at %s:%d:in `%s'", expression, file, line, function_name); + private_grab_gvl_and_raise(Qnil, syserr_errno, "Failure returned by '%s' at %s:%d:in `%s'", expression, file, line, function_name); } } diff --git a/ext/datadog_profiling_native_extension/ruby_helpers.h b/ext/datadog_profiling_native_extension/ruby_helpers.h index 5c135a21d14..cbb52e39482 100644 --- a/ext/datadog_profiling_native_extension/ruby_helpers.h +++ b/ext/datadog_profiling_native_extension/ruby_helpers.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include "datadog_ruby_common.h" // Initialize internal data needed by some ruby helpers. Should be called during start, before any actual @@ -39,26 +40,49 @@ static inline int check_if_pending_exception(void) { #define VALUE_COUNT(array) (sizeof(array) / sizeof(VALUE)) +// Raises an exception of the specified class with the formatted string as its message. +// This macro ensures that the literal string is sent for telemetry, while the formatted +// message is the default `Exception#message`. +// *Ruby exceptions not raised through this function will not be reported via telemetry.* +#define raise_error(exception_class, fmt, ...) \ + private_raise_error(exception_class, "" fmt, ##__VA_ARGS__) + +// Raises a SysErr exception with the formatted string as its message. +// See `raise_error` for details about telemetry messages. +#define raise_syserr(syserr_errno, fmt, ...) \ + private_raise_syserr(syserr_errno, "" fmt, ##__VA_ARGS__) + +#define grab_gvl_and_raise(exception_class, fmt, ...) \ + private_grab_gvl_and_raise(exception_class, 0, "" fmt, ##__VA_ARGS__) + NORETURN( - void grab_gvl_and_raise(VALUE exception_class, const char *format_string, ...) + void private_raise_error(VALUE exception_class, const char *fmt, ...) __attribute__ ((format (printf, 2, 3))); ); + NORETURN( - void grab_gvl_and_raise_syserr(int syserr_errno, const char *format_string, ...) + void private_raise_syserr(int syserr_errno, const char *fmt, ...) __attribute__ ((format (printf, 2, 3))); ); +NORETURN( + void private_grab_gvl_and_raise(VALUE exception_class, int syserr_errno, const char *format_string, ...) + __attribute__ ((format (printf, 3, 4))); +); + + + #define ENFORCE_SUCCESS_GVL(expression) ENFORCE_SUCCESS_HELPER(expression, true) #define ENFORCE_SUCCESS_NO_GVL(expression) ENFORCE_SUCCESS_HELPER(expression, false) #define ENFORCE_SUCCESS_HELPER(expression, have_gvl) \ - { int result_syserr_errno = expression; if (RB_UNLIKELY(result_syserr_errno)) raise_syserr(result_syserr_errno, have_gvl, ADD_QUOTES(expression), __FILE__, __LINE__, __func__); } + { int result_syserr_errno = expression; if (RB_UNLIKELY(result_syserr_errno)) raise_enforce_syserr(result_syserr_errno, have_gvl, ADD_QUOTES(expression), __FILE__, __LINE__, __func__); } #define RUBY_NUM_OR_NIL(val, condition, conv) ((val condition) ? conv(val) : Qnil) #define RUBY_AVG_OR_NIL(total, count) ((count == 0) ? Qnil : DBL2NUM(((double) total) / count)) // Called by ENFORCE_SUCCESS_HELPER; should not be used directly -NORETURN(void raise_syserr( +NORETURN(void raise_enforce_syserr( int syserr_errno, bool have_gvl, const char *expression, diff --git a/ext/datadog_profiling_native_extension/setup_signal_handler.c b/ext/datadog_profiling_native_extension/setup_signal_handler.c index fa4bb27e95c..610933a91b4 100644 --- a/ext/datadog_profiling_native_extension/setup_signal_handler.c +++ b/ext/datadog_profiling_native_extension/setup_signal_handler.c @@ -95,8 +95,14 @@ static inline void toggle_sigprof_signal_handler_for_current_thread(int action) sigset_t signals_to_toggle; sigemptyset(&signals_to_toggle); sigaddset(&signals_to_toggle, SIGPROF); + int error = pthread_sigmask(action, &signals_to_toggle, NULL); - if (error) rb_exc_raise(rb_syserr_new_str(error, rb_sprintf("Unexpected failure in pthread_sigmask, action=%d", action))); + if (error) { + const char *message = (action == SIG_BLOCK) ? + "Unexpected failure in pthread_sigmask: action SIG_BLOCK" : + "Unexpected failure in pthread_sigmask: action SIG_UNBLOCK"; + private_raise_syserr(error, message, message); + } } void block_sigprof_signal_handler_from_running_in_current_thread(void) { diff --git a/ext/datadog_profiling_native_extension/setup_signal_handler.h b/ext/datadog_profiling_native_extension/setup_signal_handler.h index 3c969986c4d..22503af1aaa 100644 --- a/ext/datadog_profiling_native_extension/setup_signal_handler.h +++ b/ext/datadog_profiling_native_extension/setup_signal_handler.h @@ -3,7 +3,10 @@ #include #include "datadog_ruby_common.h" + + void empty_signal_handler(DDTRACE_UNUSED int _signal, DDTRACE_UNUSED siginfo_t *_info, DDTRACE_UNUSED void *_ucontext); + void install_sigprof_signal_handler(void (*signal_handler_function)(int, siginfo_t *, void *), const char *handler_pretty_name); void replace_sigprof_signal_handler_with_empty_handler(void (*expected_existing_handler)(int, siginfo_t *, void *)); void remove_sigprof_signal_handler(void); diff --git a/ext/datadog_profiling_native_extension/stack_recorder.c b/ext/datadog_profiling_native_extension/stack_recorder.c index bc3b3439500..28d74da0b35 100644 --- a/ext/datadog_profiling_native_extension/stack_recorder.c +++ b/ext/datadog_profiling_native_extension/stack_recorder.c @@ -349,7 +349,7 @@ static VALUE _native_new(VALUE klass) { ddog_prof_ManagedStringStorageNewResult string_storage = ddog_prof_ManagedStringStorage_new(); if (string_storage.tag == DDOG_PROF_MANAGED_STRING_STORAGE_NEW_RESULT_ERR) { - rb_raise(rb_eRuntimeError, "Failed to initialize string storage: %"PRIsVALUE, get_error_details_and_drop(&string_storage.err)); + raise_error(rb_eRuntimeError, "Failed to initialize string storage: %"PRIsVALUE, get_error_details_and_drop(&string_storage.err)); } state->string_storage = string_storage.ok; @@ -383,7 +383,7 @@ static void initialize_profiles(stack_recorder_state *state, ddog_prof_Slice_Val ddog_prof_Profile_with_string_storage(sample_types, NULL /* period is optional */, state->string_storage); if (slot_one_profile_result.tag == DDOG_PROF_PROFILE_NEW_RESULT_ERR) { - rb_raise(rb_eRuntimeError, "Failed to initialize slot one profile: %"PRIsVALUE, get_error_details_and_drop(&slot_one_profile_result.err)); + raise_error(rb_eRuntimeError, "Failed to initialize slot one profile: %"PRIsVALUE, get_error_details_and_drop(&slot_one_profile_result.err)); } state->profile_slot_one = (profile_slot) { .profile = slot_one_profile_result.ok, .start_timestamp = start_timestamp }; @@ -393,7 +393,7 @@ static void initialize_profiles(stack_recorder_state *state, ddog_prof_Slice_Val if (slot_two_profile_result.tag == DDOG_PROF_PROFILE_NEW_RESULT_ERR) { // Note: No need to take any special care of slot one, it'll get cleaned up by stack_recorder_typed_data_free - rb_raise(rb_eRuntimeError, "Failed to initialize slot two profile: %"PRIsVALUE, get_error_details_and_drop(&slot_two_profile_result.err)); + raise_error(rb_eRuntimeError, "Failed to initialize slot two profile: %"PRIsVALUE, get_error_details_and_drop(&slot_two_profile_result.err)); } state->profile_slot_two = (profile_slot) { .profile = slot_two_profile_result.ok, .start_timestamp = start_timestamp }; @@ -591,7 +591,7 @@ static VALUE _native_serialize(DDTRACE_UNUSED VALUE _self, VALUE recorder_instan ddog_prof_MaybeError result = args.advance_gen_result; if (result.tag == DDOG_PROF_OPTION_ERROR_SOME_ERROR) { - rb_raise(rb_eRuntimeError, "Failed to advance string storage gen: %"PRIsVALUE, get_error_details_and_drop(&result.some)); + raise_error(rb_eRuntimeError, "Failed to advance string storage gen: %"PRIsVALUE, get_error_details_and_drop(&result.some)); } VALUE start = ruby_time_from(args.slot->start_timestamp); @@ -658,7 +658,7 @@ void record_sample(VALUE recorder_instance, ddog_prof_Slice_Location locations, sampler_unlock_active_profile(active_slot); if (result.tag == DDOG_PROF_PROFILE_RESULT_ERR) { - rb_raise(rb_eArgError, "Failed to record sample: %"PRIsVALUE, get_error_details_and_drop(&result.err)); + raise_error(rb_eArgError, "Failed to record sample: %"PRIsVALUE, get_error_details_and_drop(&result.err)); } } @@ -682,7 +682,7 @@ void record_endpoint(VALUE recorder_instance, uint64_t local_root_span_id, ddog_ sampler_unlock_active_profile(active_slot); if (result.tag == DDOG_PROF_PROFILE_RESULT_ERR) { - rb_raise(rb_eArgError, "Failed to record endpoint: %"PRIsVALUE, get_error_details_and_drop(&result.err)); + raise_error(rb_eArgError, "Failed to record endpoint: %"PRIsVALUE, get_error_details_and_drop(&result.err)); } } @@ -824,7 +824,7 @@ static locked_profile_slot sampler_lock_active_profile(stack_recorder_state *sta } // We already tried both multiple times, and we did not succeed. This is not expected to happen. Let's stop sampling. - rb_raise(rb_eRuntimeError, "Failed to grab either mutex in sampler_lock_active_profile"); + raise_error(rb_eRuntimeError, "Failed to grab either mutex in sampler_lock_active_profile"); } static void sampler_unlock_active_profile(locked_profile_slot active_slot) { @@ -889,7 +889,7 @@ static VALUE test_slot_mutex_state(VALUE recorder_instance, int slot) { return Qtrue; } else { ENFORCE_SUCCESS_GVL(error); - rb_raise(rb_eRuntimeError, "Failed to raise exception in test_slot_mutex_state; this should never happen"); + raise_error(rb_eRuntimeError, "Failed to raise exception in test_slot_mutex_state; this should never happen"); } } @@ -941,7 +941,7 @@ static VALUE _native_track_object(DDTRACE_UNUSED VALUE _self, VALUE recorder_ins static void reset_profile_slot(profile_slot *slot, ddog_Timespec start_timestamp) { ddog_prof_Profile_Result reset_result = ddog_prof_Profile_reset(&slot->profile); if (reset_result.tag == DDOG_PROF_PROFILE_RESULT_ERR) { - rb_raise(rb_eRuntimeError, "Failed to reset profile: %"PRIsVALUE, get_error_details_and_drop(&reset_result.err)); + raise_error(rb_eRuntimeError, "Failed to reset profile: %"PRIsVALUE, get_error_details_and_drop(&reset_result.err)); } slot->start_timestamp = start_timestamp; slot->stats = (stats_slot) {}; @@ -1056,14 +1056,14 @@ static VALUE _native_test_managed_string_storage_produces_valid_profiles(DDTRACE ddog_prof_ManagedStringStorageNewResult string_storage = ddog_prof_ManagedStringStorage_new(); if (string_storage.tag == DDOG_PROF_MANAGED_STRING_STORAGE_NEW_RESULT_ERR) { - rb_raise(rb_eRuntimeError, "Failed to initialize string storage: %"PRIsVALUE, get_error_details_and_drop(&string_storage.err)); + raise_error(rb_eRuntimeError, "Failed to initialize string storage: %"PRIsVALUE, get_error_details_and_drop(&string_storage.err)); } ddog_prof_Slice_ValueType sample_types = {.ptr = all_value_types, .len = ALL_VALUE_TYPES_COUNT}; ddog_prof_Profile_NewResult profile = ddog_prof_Profile_with_string_storage(sample_types, NULL, string_storage.ok); if (profile.tag == DDOG_PROF_PROFILE_NEW_RESULT_ERR) { - rb_raise(rb_eRuntimeError, "Failed to initialize profile: %"PRIsVALUE, get_error_details_and_drop(&profile.err)); + raise_error(rb_eRuntimeError, "Failed to initialize profile: %"PRIsVALUE, get_error_details_and_drop(&profile.err)); } ddog_prof_ManagedStringId hello = intern_or_raise(string_storage.ok, DDOG_CHARSLICE_C("hello")); @@ -1097,7 +1097,7 @@ static VALUE _native_test_managed_string_storage_produces_valid_profiles(DDTRACE ); if (result.tag == DDOG_PROF_PROFILE_RESULT_ERR) { - rb_raise(rb_eArgError, "Failed to record sample: %"PRIsVALUE, get_error_details_and_drop(&result.err)); + raise_error(rb_eArgError, "Failed to record sample: %"PRIsVALUE, get_error_details_and_drop(&result.err)); } ddog_Timespec finish_timestamp = system_epoch_now_timespec(); @@ -1105,13 +1105,13 @@ static VALUE _native_test_managed_string_storage_produces_valid_profiles(DDTRACE ddog_prof_Profile_SerializeResult serialize_result = ddog_prof_Profile_serialize(&profile.ok, &start_timestamp, &finish_timestamp); if (serialize_result.tag == DDOG_PROF_PROFILE_SERIALIZE_RESULT_ERR) { - rb_raise(rb_eRuntimeError, "Failed to serialize: %"PRIsVALUE, get_error_details_and_drop(&serialize_result.err)); + raise_error(rb_eRuntimeError, "Failed to serialize: %"PRIsVALUE, get_error_details_and_drop(&serialize_result.err)); } ddog_prof_MaybeError advance_gen_result = ddog_prof_ManagedStringStorage_advance_gen(string_storage.ok); if (advance_gen_result.tag == DDOG_PROF_OPTION_ERROR_SOME_ERROR) { - rb_raise(rb_eRuntimeError, "Failed to advance string storage gen: %"PRIsVALUE, get_error_details_and_drop(&advance_gen_result.some)); + raise_error(rb_eRuntimeError, "Failed to advance string storage gen: %"PRIsVALUE, get_error_details_and_drop(&advance_gen_result.some)); } VALUE encoded_pprof_1 = from_ddog_prof_EncodedProfile(serialize_result.ok); @@ -1127,13 +1127,13 @@ static VALUE _native_test_managed_string_storage_produces_valid_profiles(DDTRACE ); if (result.tag == DDOG_PROF_PROFILE_RESULT_ERR) { - rb_raise(rb_eArgError, "Failed to record sample: %"PRIsVALUE, get_error_details_and_drop(&result.err)); + raise_error(rb_eArgError, "Failed to record sample: %"PRIsVALUE, get_error_details_and_drop(&result.err)); } serialize_result = ddog_prof_Profile_serialize(&profile.ok, &start_timestamp, &finish_timestamp); if (serialize_result.tag == DDOG_PROF_PROFILE_SERIALIZE_RESULT_ERR) { - rb_raise(rb_eArgError, "Failed to serialize: %"PRIsVALUE, get_error_details_and_drop(&serialize_result.err)); + raise_error(rb_eArgError, "Failed to serialize: %"PRIsVALUE, get_error_details_and_drop(&serialize_result.err)); } VALUE encoded_pprof_2 = from_ddog_prof_EncodedProfile(serialize_result.ok); diff --git a/ext/datadog_profiling_native_extension/unsafe_api_calls_check.c b/ext/datadog_profiling_native_extension/unsafe_api_calls_check.c index c3c23b7f1c7..d18441f977e 100644 --- a/ext/datadog_profiling_native_extension/unsafe_api_calls_check.c +++ b/ext/datadog_profiling_native_extension/unsafe_api_calls_check.c @@ -3,6 +3,7 @@ #include #include "datadog_ruby_common.h" +#include "ruby_helpers.h" #include "unsafe_api_calls_check.h" #include "extconf.h" @@ -21,7 +22,7 @@ void unsafe_api_calls_check_init(void) { check_for_unsafe_api_calls_handle = rb_postponed_job_preregister(unused_flags, check_for_unsafe_api_calls, NULL); if (check_for_unsafe_api_calls_handle == POSTPONED_JOB_HANDLE_INVALID) { - rb_raise(rb_eRuntimeError, "Failed to register check_for_unsafe_api_calls_handle postponed job (got POSTPONED_JOB_HANDLE_INVALID)"); + raise_error(rb_eRuntimeError, "Failed to register check_for_unsafe_api_calls_handle postponed job (got POSTPONED_JOB_HANDLE_INVALID)"); } #endif } diff --git a/ext/datadog_profiling_native_extension/unsafe_api_calls_check.h b/ext/datadog_profiling_native_extension/unsafe_api_calls_check.h index c6c8dc56cf5..9a74f36d30d 100644 --- a/ext/datadog_profiling_native_extension/unsafe_api_calls_check.h +++ b/ext/datadog_profiling_native_extension/unsafe_api_calls_check.h @@ -5,8 +5,11 @@ // Specifically, when the profiler is sampling, we're never supposed to call into Ruby code (e.g. methods // implemented using Ruby code) or allocate Ruby objects. // That's because those events introduce thread switch points, and really we don't the VM switching between threads -// in the middle of the profiler sampling. -// This includes raising exceptions, unless we're trying to stop the profiler, and even then we must be careful. +// in the middle of the profiler sampling. This includes raising exceptions. +// +// Raising exceptions as the very last operation, to stop the profiler is ok, but comes a caveat: raising exceptions +// will fail the unsafe check. When testing exception paths, you must disable unsafe checking for that test execution. +// See `allow_exception` usage for how to we disable it for testing today. // // The above is especially true in situations such as GC profiling or allocation/heap profiling, as in those situations // we can even crash the Ruby VM if we switch away at the wrong time. diff --git a/ext/libdatadog_api/crashtracker.c b/ext/libdatadog_api/crashtracker.c index bd63e657304..b3c7e766f11 100644 --- a/ext/libdatadog_api/crashtracker.c +++ b/ext/libdatadog_api/crashtracker.c @@ -2,6 +2,7 @@ #include #include "datadog_ruby_common.h" +#include "helpers.h" static VALUE _native_start_or_update_on_fork(int argc, VALUE *argv, DDTRACE_UNUSED VALUE _self); static VALUE _native_stop(DDTRACE_UNUSED VALUE _self); @@ -41,7 +42,7 @@ static VALUE _native_start_or_update_on_fork(int argc, VALUE *argv, DDTRACE_UNUS ENFORCE_TYPE(action, T_SYMBOL); ENFORCE_TYPE(upload_timeout_seconds, T_FIXNUM); - if (action != start_action && action != update_on_fork_action) rb_raise(rb_eArgError, "Unexpected action: %+"PRIsVALUE, action); + if (action != start_action && action != update_on_fork_action) raise_error(rb_eArgError, "Unexpected action: %+"PRIsVALUE, action); VALUE version = datadog_gem_version(); @@ -49,7 +50,7 @@ static VALUE _native_start_or_update_on_fork(int argc, VALUE *argv, DDTRACE_UNUS // Start of exception-free zone to prevent leaks {{ ddog_Endpoint *endpoint = ddog_endpoint_from_url(char_slice_from_ruby_string(agent_base_url)); if (endpoint == NULL) { - rb_raise(rb_eRuntimeError, "Failed to create endpoint from agent_base_url: %"PRIsVALUE, agent_base_url); + raise_error(rb_eRuntimeError, "Failed to create endpoint from agent_base_url: %"PRIsVALUE, agent_base_url); } ddog_Vec_Tag tags = convert_tags(tags_as_array); @@ -107,9 +108,7 @@ static VALUE _native_start_or_update_on_fork(int argc, VALUE *argv, DDTRACE_UNUS ddog_endpoint_drop(endpoint); // }} End of exception-free zone to prevent leaks - if (result.tag == DDOG_VOID_RESULT_ERR) { - rb_raise(rb_eRuntimeError, "Failed to start/update the crash tracker: %"PRIsVALUE, get_error_details_and_drop(&result.err)); - } + CHECK_VOID_RESULT("Failed to start/update the crash tracker", result); return Qtrue; } @@ -117,9 +116,7 @@ static VALUE _native_start_or_update_on_fork(int argc, VALUE *argv, DDTRACE_UNUS static VALUE _native_stop(DDTRACE_UNUSED VALUE _self) { ddog_VoidResult result = ddog_crasht_disable(); - if (result.tag == DDOG_VOID_RESULT_ERR) { - rb_raise(rb_eRuntimeError, "Failed to stop the crash tracker: %"PRIsVALUE, get_error_details_and_drop(&result.err)); - } + CHECK_VOID_RESULT("Failed to stop the crash tracker", result); return Qtrue; } diff --git a/ext/libdatadog_api/datadog_ruby_common.c b/ext/libdatadog_api/datadog_ruby_common.c index ca402dca264..1327454e84f 100644 --- a/ext/libdatadog_api/datadog_ruby_common.c +++ b/ext/libdatadog_api/datadog_ruby_common.c @@ -1,8 +1,11 @@ #include "datadog_ruby_common.h" +#include // IMPORTANT: Currently this file is copy-pasted between extensions. Make sure to update all versions when doing any change! -void raise_unexpected_type(VALUE value, const char *value_name, const char *type_name, const char *file, int line, const char* function_name) { + + +void raise_unexpected_type(VALUE value, const char *value_name, const char *type_name, const char *file, int line, const char *function_name) { rb_exc_raise( rb_exc_new_str( rb_eTypeError, @@ -18,6 +21,14 @@ void raise_unexpected_type(VALUE value, const char *value_name, const char *type ); } +void raise_error(VALUE error_class, const char *fmt, ...) { + va_list args; + va_start(args, fmt); + VALUE message = rb_vsprintf(fmt, args); + va_end(args); + rb_raise(error_class, "%"PRIsVALUE, message); +} + VALUE datadog_gem_version(void) { VALUE ddtrace_module = rb_const_get(rb_cObject, rb_intern("Datadog")); ENFORCE_TYPE(ddtrace_module, T_MODULE); @@ -78,3 +89,8 @@ ddog_Vec_Tag convert_tags(VALUE tags_as_array) { return tags; } + +void datadog_ruby_common_init(VALUE datadog_module) { + // No longer needed - using Ruby's built-in exception classes + (void)datadog_module; +} diff --git a/ext/libdatadog_api/datadog_ruby_common.h b/ext/libdatadog_api/datadog_ruby_common.h index d55c2bb479f..b5f86ae7584 100644 --- a/ext/libdatadog_api/datadog_ruby_common.h +++ b/ext/libdatadog_api/datadog_ruby_common.h @@ -5,6 +5,9 @@ #include #include +// Must be called once during initialization +void datadog_ruby_common_init(VALUE datadog_module); + // Used to mark symbols to be exported to the outside of the extension. // Consider very carefully before tagging a function with this. #define DDTRACE_EXPORT __attribute__ ((visibility ("default"))) @@ -32,6 +35,11 @@ NORETURN(void raise_unexpected_type(VALUE value, const char *value_name, const char *type_name, const char *file, int line, const char* function_name)); +// Helper to raise errors with formatted messages +NORETURN(void raise_error(VALUE error_class, const char *fmt, ...)) __attribute__ ((format (gnu_printf, 2, 3))); + + + // Helper to retrieve Datadog::VERSION::STRING VALUE datadog_gem_version(void); diff --git a/ext/libdatadog_api/ddsketch.c b/ext/libdatadog_api/ddsketch.c index d0deab44955..77f3c9847af 100644 --- a/ext/libdatadog_api/ddsketch.c +++ b/ext/libdatadog_api/ddsketch.c @@ -2,6 +2,7 @@ #include #include "datadog_ruby_common.h" +#include "helpers.h" static VALUE _native_new(VALUE klass); static void ddsketch_free(void *ptr); @@ -9,7 +10,6 @@ static VALUE native_add(VALUE self, VALUE point); static VALUE native_add_with_count(VALUE self, VALUE point, VALUE count); static VALUE native_count(VALUE self); static VALUE native_encode(VALUE self); -NORETURN(static void raise_ddsketch_error(const char *message, ddog_VoidResult result)); void ddsketch_init(VALUE core_module) { VALUE ddsketch_class = rb_define_class_under(core_module, "DDSketch", rb_cObject); @@ -48,17 +48,13 @@ static void ddsketch_free(void *ptr) { ruby_xfree(ptr); } -static void raise_ddsketch_error(const char *message, ddog_VoidResult result) { - rb_raise(rb_eRuntimeError, "%s: %"PRIsVALUE, message, get_error_details_and_drop(&result.err)); -} - static VALUE native_add(VALUE self, VALUE point) { ddsketch_Handle_DDSketch *state; TypedData_Get_Struct(self, ddsketch_Handle_DDSketch, &ddsketch_typed_data, state); ddog_VoidResult result = ddog_ddsketch_add(state, NUM2DBL(point)); - if (result.tag == DDOG_VOID_RESULT_ERR) raise_ddsketch_error("DDSketch add failed", result); + CHECK_VOID_RESULT("DDSketch add failed", result); return self; } @@ -69,7 +65,7 @@ static VALUE native_add_with_count(VALUE self, VALUE point, VALUE count) { ddog_VoidResult result = ddog_ddsketch_add_with_count(state, NUM2DBL(point), NUM2DBL(count)); - if (result.tag == DDOG_VOID_RESULT_ERR) raise_ddsketch_error("DDSketch add_with_count failed", result); + CHECK_VOID_RESULT("DDSketch add_with_count failed", result); return self; } @@ -81,7 +77,7 @@ static VALUE native_count(VALUE self) { double count_out; ddog_VoidResult result = ddog_ddsketch_count(state, &count_out); - if (result.tag == DDOG_VOID_RESULT_ERR) raise_ddsketch_error("DDSketch count failed", result); + CHECK_VOID_RESULT("DDSketch count failed", result); return DBL2NUM(count_out); } diff --git a/ext/libdatadog_api/feature_flags.c b/ext/libdatadog_api/feature_flags.c index b908fa54461..940801f7721 100644 --- a/ext/libdatadog_api/feature_flags.c +++ b/ext/libdatadog_api/feature_flags.c @@ -130,7 +130,7 @@ void feature_flags_init(VALUE core_module) { static VALUE configuration_new(VALUE klass, VALUE json_str) { struct ddog_ffe_Result_HandleConfiguration result = ddog_ffe_configuration_new(borrow_str(json_str)); if (result.tag == DDOG_FFE_RESULT_HANDLE_CONFIGURATION_ERR_HANDLE_CONFIGURATION) { - rb_raise(feature_flags_error_class, "Failed to create configuration from JSON: %"PRIsVALUE, get_error_details_and_drop(&result.err)); + raise_error(feature_flags_error_class, "Failed to create configuration from JSON: %"PRIsVALUE, get_error_details_and_drop(&result.err)); } return TypedData_Wrap_Struct(klass, &configuration_data_type, result.ok); } @@ -159,7 +159,7 @@ static ddog_ffe_ExpectedFlagType expected_type_from_value(VALUE expected_type) { } else if (id == id_float) { return DDOG_FFE_EXPECTED_FLAG_TYPE_FLOAT; } else { - rb_raise(feature_flags_error_class, "Internal: Unexpected flag type: %"PRIsVALUE, expected_type); + raise_error(feature_flags_error_class, "Internal: Unexpected flag type: %"PRIsVALUE, expected_type); } } @@ -199,7 +199,7 @@ static int evaluation_context_foreach_callback(VALUE key, VALUE value, VALUE arg if (builder->attr_count >= builder->attr_capacity) { // This should never happen because evaluation_context_from_hash() // pre-allocates attr_capacity equal to iterated Hash size. - rb_raise(feature_flags_error_class, "Internal: Attribute count exceeded capacity"); + raise_error(feature_flags_error_class, "Internal: Attribute count exceeded capacity"); } ddog_ffe_AttributePair *attr = &builder->attrs[builder->attr_count]; @@ -354,7 +354,7 @@ static VALUE resolution_details_get_raw_value(VALUE self) { return Qnil; default: // This should never happen as we checked for all possible tag values. - rb_raise(feature_flags_error_class, "Internal: Unexpected ResolutionDetails value tag"); + raise_error(feature_flags_error_class, "Internal: Unexpected ResolutionDetails value tag"); } } @@ -387,7 +387,7 @@ static VALUE resolution_details_get_flag_type(VALUE self) { return Qnil; default: // This should never happen as we checked for all possible tag values. - rb_raise(feature_flags_error_class, "Internal: Unexpected ResolutionDetails value tag"); + raise_error(feature_flags_error_class, "Internal: Unexpected ResolutionDetails value tag"); } } diff --git a/ext/libdatadog_api/helpers.h b/ext/libdatadog_api/helpers.h new file mode 100644 index 00000000000..98659a065c4 --- /dev/null +++ b/ext/libdatadog_api/helpers.h @@ -0,0 +1,25 @@ +#pragma once + +#include "datadog_ruby_common.h" + +// Raises a Ruby error if the `ddog_VoidResult` indicates an error. +// The error message in `result.err` is appended to the provided `message`. +// +// @param[in] message (const char *) The error message +// @param[in] result (ddog_VoidResult) A result to check +#define CHECK_VOID_RESULT(message, result) \ + do { \ + if (result.tag == DDOG_VOID_RESULT_ERR) { \ + raise_lib_error(message, result); \ + } \ + } while (0) + +// Raises a Ruby error for the error result. +// The error message in `result.err` is appended to the provided `message`. +// +// @param[in] message (const char *) The error message +// @param[in] result (struct { ddog_Error res; ... }) Any type of result +#define raise_lib_error(message, result) \ + do { \ + raise_error(rb_eRuntimeError, message ": %" PRIsVALUE, get_error_details_and_drop(&result.err)); \ + } while (0) diff --git a/ext/libdatadog_api/init.c b/ext/libdatadog_api/init.c index 511115555fc..b7ca8218274 100644 --- a/ext/libdatadog_api/init.c +++ b/ext/libdatadog_api/init.c @@ -10,6 +10,10 @@ void ddsketch_init(VALUE core_module); void DDTRACE_EXPORT Init_libdatadog_api(void) { VALUE datadog_module = rb_define_module("Datadog"); + + // MUST be called before all other initialization + datadog_ruby_common_init(datadog_module); + VALUE core_module = rb_define_module_under(datadog_module, "Core"); crashtracker_init(core_module); diff --git a/gemfiles/ruby_3.4_core_old.gemfile.lock b/gemfiles/ruby_3.4_core_old.gemfile.lock index 4c3259e2f69..53e113fb108 100644 --- a/gemfiles/ruby_3.4_core_old.gemfile.lock +++ b/gemfiles/ruby_3.4_core_old.gemfile.lock @@ -36,6 +36,7 @@ GEM docile (1.4.1) dogstatsd-ruby (4.8.3) ffi (1.17.2-aarch64-linux-gnu) + ffi (1.17.2-arm64-darwin) ffi (1.17.2-x86_64-linux-gnu) google-protobuf (3.25.3) hashdiff (1.1.0) @@ -46,10 +47,13 @@ GEM reline (>= 0.4.2) json-schema (2.8.1) addressable (>= 2.4) + libdatadog (24.0.1.1.0) libdatadog (24.0.1.1.0-aarch64-linux) libdatadog (24.0.1.1.0-x86_64-linux) libddwaf (1.30.0.0.0-aarch64-linux) ffi (~> 1.0) + libddwaf (1.30.0.0.0-arm64-darwin) + ffi (~> 1.0) libddwaf (1.30.0.0.0-x86_64-linux) ffi (~> 1.0) logger (1.7.0) @@ -115,6 +119,7 @@ GEM PLATFORMS aarch64-linux + arm64-darwin-23 x86_64-linux DEPENDENCIES diff --git a/gemfiles/ruby_3.4_relational_db.gemfile.lock b/gemfiles/ruby_3.4_relational_db.gemfile.lock index 5d9d0ae350c..5448aa7628d 100644 --- a/gemfiles/ruby_3.4_relational_db.gemfile.lock +++ b/gemfiles/ruby_3.4_relational_db.gemfile.lock @@ -51,6 +51,7 @@ GEM docile (1.4.1) dogstatsd-ruby (5.6.3) ffi (1.17.2-aarch64-linux-gnu) + ffi (1.17.2-arm64-darwin) ffi (1.17.2-x86_64-linux-gnu) google-protobuf (3.25.5) hashdiff (1.1.2) @@ -63,10 +64,13 @@ GEM reline (>= 0.4.2) json-schema (2.8.1) addressable (>= 2.4) + libdatadog (24.0.1.1.0) libdatadog (24.0.1.1.0-aarch64-linux) libdatadog (24.0.1.1.0-x86_64-linux) libddwaf (1.30.0.0.0-aarch64-linux) ffi (~> 1.0) + libddwaf (1.30.0.0.0-arm64-darwin) + ffi (~> 1.0) libddwaf (1.30.0.0.0-x86_64-linux) ffi (~> 1.0) logger (1.7.0) @@ -143,6 +147,7 @@ GEM PLATFORMS aarch64-linux + arm64-darwin-23 x86_64-linux DEPENDENCIES diff --git a/lib/datadog/core/telemetry/logger.rb b/lib/datadog/core/telemetry/logger.rb index b89aedfadca..7b1ca17a337 100644 --- a/lib/datadog/core/telemetry/logger.rb +++ b/lib/datadog/core/telemetry/logger.rb @@ -14,10 +14,12 @@ module Telemetry # read: lib/datadog/core/telemetry/logging.rb module Logger class << self + # (see Datadog::Core::Telemetry::Logging#report) def report(exception, level: :error, description: nil) instance&.report(exception, level: level, description: description) end + # (see Datadog::Core::Telemetry::Logging#error) def error(description) instance&.error(description) end diff --git a/lib/datadog/core/telemetry/logging.rb b/lib/datadog/core/telemetry/logging.rb index 036bb78d8f4..0438b3031b5 100644 --- a/lib/datadog/core/telemetry/logging.rb +++ b/lib/datadog/core/telemetry/logging.rb @@ -45,11 +45,20 @@ def self.from(exception) end end + # @param exception [Exception] The exception to report + # @param level [Symbol] The log level (:error, :warn, :info, :debug) + # @param description [String, nil] A low cardinality description, without dynamic data def report(exception, level: :error, description: nil) # Anonymous exceptions to be logged as - message = +"#{exception.class.name || exception.class.inspect}" # standard:disable Style/RedundantInterpolation + message = +(exception.class.name || exception.class.inspect) - message << ": #{description}" if description + telemetry_message = message_for_telemetry(exception) + + if description || telemetry_message + message << ':' + message << " #{description}" if description + message << " (#{telemetry_message})" if telemetry_message + end event = Event::Log.new( message: message, @@ -65,6 +74,15 @@ def error(description) log!(event) end + + private + + # A constant string representing the exception + def message_for_telemetry(exception) + return unless exception.instance_variable_defined?(:@telemetry_message) + + exception.instance_variable_get(:@telemetry_message) + end end end end diff --git a/sig/datadog/core/telemetry/logging.rbs b/sig/datadog/core/telemetry/logging.rbs index f64aa023f3c..3df2bb5c14b 100644 --- a/sig/datadog/core/telemetry/logging.rbs +++ b/sig/datadog/core/telemetry/logging.rbs @@ -15,6 +15,10 @@ module Datadog def report: (Exception exception, ?level: Symbol, ?description: String?) -> void def error: (String description) -> void + + private + + def message_for_telemetry: (Exception exception) -> String? end end end diff --git a/spec/datadog/core/ddsketch_spec.rb b/spec/datadog/core/ddsketch_spec.rb index edb928cf7b5..5135245b7fa 100644 --- a/spec/datadog/core/ddsketch_spec.rb +++ b/spec/datadog/core/ddsketch_spec.rb @@ -27,7 +27,9 @@ context 'when the point is a negative number' do it 'raises an error' do - expect { sketch.add(-1.0) }.to raise_error(RuntimeError, 'DDSketch add failed: point is invalid') + expect { sketch.add(-1.0) }.to raise_error(::RuntimeError) do |error| + expect(error.message).to eq('DDSketch add failed: point is invalid') + end end end end @@ -43,7 +45,9 @@ context 'when the point is a negative number' do it 'raises an error' do - expect { sketch.add_with_count(-1.0, 1.0) }.to raise_error(RuntimeError, 'DDSketch add_with_count failed: point is invalid') + expect { sketch.add_with_count(-1.0, 1.0) }.to raise_error(::RuntimeError) do |error| + expect(error.message).to eq('DDSketch add_with_count failed: point is invalid') + end end end end diff --git a/spec/datadog/core/telemetry/logging_spec.rb b/spec/datadog/core/telemetry/logging_spec.rb index 363bc1b380f..2a82404d7e3 100644 --- a/spec/datadog/core/telemetry/logging_spec.rb +++ b/spec/datadog/core/telemetry/logging_spec.rb @@ -82,6 +82,115 @@ def log!(_event) end end end + context 'with an Errno error carrying telemetry message' do + subject(:report) do + raise Errno::ENOENT, 'Dynamic message' + rescue SystemCallError => error + # This is normally done by native extensions, when raising Errno errors + error.instance_variable_set(:@telemetry_message, 'Safe message') + + component.report(error, level: :error, description: description) + end + + let(:description) { nil } + + it 'includes the telemetry message but not the dynamic exception message' do + expect(component).to receive(:log!).with(instance_of(Datadog::Core::Telemetry::Event::Log)) do |event| + expect(event.payload).to include( + logs: [{message: 'Errno::ENOENT: (Safe message)', level: 'ERROR', count: 1, + stack_trace: a_string_including('REDACTED')}] + ) + expect(event.payload[:logs].map { |log| log[:message] }).not_to include('Dynamic message') + end + + report + end + + context 'with description' do + let(:description) { 'Operation failed' } + + it 'includes both description and telemetry message' do + expect(component).to receive(:log!).with(instance_of(Datadog::Core::Telemetry::Event::Log)) do |event| + expect(event.payload).to include( + logs: [{message: 'Errno::ENOENT: Operation failed (Safe message)', level: 'ERROR', count: 1, + stack_trace: a_string_including('REDACTED')}] + ) + expect(event.payload[:logs].map { |log| log[:message] }).not_to include('Dynamic message') + end + + report + end + end + end + context 'with telemetry_message instance variable' do + it 'includes the telemetry-safe message in telemetry' do + expect(component).to receive(:log!).with(instance_of(Datadog::Core::Telemetry::Event::Log)) do |event| + expect(event.payload).to include( + logs: [{message: 'RuntimeError: (Static message)', level: 'ERROR', count: 1, + stack_trace: a_string_including('REDACTED')}] + ) + expect(event.payload[:logs].map { |log| log[:message] }).not_to include('Dynamic message') + end + begin + error = ::RuntimeError.new('Dynamic message') + error.instance_variable_set(:@telemetry_message, 'Static message') + raise error + rescue => e + component.report(e, level: :error) + end + end + context 'with description' do + it 'includes both description and telemetry message' do + expect(component).to receive(:log!).with(instance_of(Datadog::Core::Telemetry::Event::Log)) do |event| + expect(event.payload).to include( + logs: [{message: 'RuntimeError: Static description (Static message)', level: 'ERROR', count: 1, + stack_trace: a_string_including('REDACTED')}] + ) + expect(event.payload[:logs].map { |log| log[:message] }).not_to include('Dynamic message') + end + begin + error = ::RuntimeError.new('Dynamic message') + error.instance_variable_set(:@telemetry_message, 'Static message') + raise error + rescue => e + component.report(e, level: :error, description: 'Static description') + end + end + end + context 'without telemetry message' do + it 'omits the dynamic exception message from telemetry' do + expect(component).to receive(:log!).with(instance_of(Datadog::Core::Telemetry::Event::Log)) do |event| + expect(event.payload).to include( + logs: [{message: 'RuntimeError', level: 'ERROR', count: 1, + stack_trace: a_string_including('REDACTED')}] + ) + expect(event.payload[:logs].map { |log| log[:message] }).not_to include('Dynamic message') + end + begin + raise 'Dynamic message' + rescue => e + component.report(e, level: :error) + end + end + end + + context 'with description and dynamic content' do + it 'includes the description but not the dynamic exception message' do + expect(component).to receive(:log!).with(instance_of(Datadog::Core::Telemetry::Event::Log)) do |event| + expect(event.payload).to include( + logs: [{message: 'RuntimeError: Static description', level: 'ERROR', count: 1, + stack_trace: a_string_including('REDACTED')}] + ) + expect(event.payload[:logs].map { |log| log[:message] }).not_to include(/memory address/) + end + begin + raise 'Dynamic message' + rescue => e + component.report(e, level: :error, description: 'Static description') + end + end + end + end end describe '.error' do diff --git a/spec/datadog/profiling/collectors/code_provenance_spec.rb b/spec/datadog/profiling/collectors/code_provenance_spec.rb index ce111a7de88..f6dec6dced0 100644 --- a/spec/datadog/profiling/collectors/code_provenance_spec.rb +++ b/spec/datadog/profiling/collectors/code_provenance_spec.rb @@ -15,6 +15,11 @@ JSON.parse(code_provenance.generate_json, symbolize_names: true).fetch(:v1) end + let(:expected_platform_fragment) do + platform_fragment = RUBY_PLATFORM + platform_fragment.sub(/darwin(\d+)/, 'darwin-\1') + end + describe "#refresh" do subject(:refresh) { code_provenance.refresh } @@ -34,7 +39,9 @@ version: Datadog::VERSION::STRING, paths: contain_exactly( start_with("/"), - include("extensions").and(include(RUBY_PLATFORM)), + satisfy do |path| + path.include?("extensions") && path.include?(expected_platform_fragment) + end, "#{Gem.bindir}/ddprofrb", "#{Bundler.bin_path}/ddprofrb", ), @@ -58,7 +65,9 @@ version: MessagePack::VERSION, paths: contain_exactly( satisfy { |it| it.start_with?(Gem.dir) && !it.include?("extensions") }, - include("extensions").and(include(RUBY_PLATFORM)), + satisfy do |path| + path.include?("extensions") && path.include?(expected_platform_fragment) + end, ), } ) diff --git a/spec/datadog/profiling/collectors/stack_spec.rb b/spec/datadog/profiling/collectors/stack_spec.rb index 292a596b09e..a438d69f17e 100644 --- a/spec/datadog/profiling/collectors/stack_spec.rb +++ b/spec/datadog/profiling/collectors/stack_spec.rb @@ -270,7 +270,7 @@ def call_sleep expect(gathered_stack).to eq reference_stack end - context "when native filenames are enabled" do + context "when native filenames are enabled", if: PlatformHelpers.linux? do let(:native_filenames_enabled) { true } before do @@ -320,7 +320,7 @@ def save_rounding_mode # rubocop:disable Lint/UselessMethodDefinition expect(gathered_stack).to eq reference_stack end - context "when native filenames are enabled" do + context "when native filenames are enabled", if: PlatformHelpers.linux? do let(:native_filenames_enabled) { true } before do @@ -358,7 +358,7 @@ def save_rounding_mode # rubocop:disable Lint/UselessMethodDefinition it do expect { sample_and_decode(background_thread, :labels, is_gvl_waiting_state: true) - }.to raise_error(RuntimeError, /BUG: .* is_gvl_waiting/) + }.to raise_error(::RuntimeError, /BUG: .* is_gvl_waiting/) end end @@ -394,7 +394,7 @@ def save_rounding_mode # rubocop:disable Lint/UselessMethodDefinition let(:metric_values) { {"cpu-samples" => 1} } it "raises an exception" do - expect { gathered_stack }.to raise_error(RuntimeError, /BUG: Unexpected missing state_label/) + expect { gathered_stack }.to raise_error(::RuntimeError, /BUG: Unexpected missing state_label/) end end @@ -853,30 +853,18 @@ def dummy_template.#{method_name}(ready_queue) end describe "_native_filenames_available?" do - context "on linux", if: PlatformHelpers.linux? do - it "returns true" do - expect(described_class._native_filenames_available?).to be true - end - end - - context "on non-linux", if: !PlatformHelpers.linux? do - it "returns false" do - expect(described_class._native_filenames_available?).to be false - end + it "returns true on linux and macOS" do + expect(described_class._native_filenames_available?).to be true end end describe "_native_ruby_native_filename" do - context "on linux", if: PlatformHelpers.linux? do - it "returns the correct filename" do - expect(described_class._native_ruby_native_filename).to end_with("/ruby").or(include("libruby.so")) - end + it "returns the correct filename", if: PlatformHelpers.linux? do + expect(described_class._native_ruby_native_filename).to end_with("/ruby").or(include("libruby.so")) end - context "on non-linux", if: !PlatformHelpers.linux? do - it "returns nil" do - expect(described_class._native_ruby_native_filename).to be nil - end + it "returns the correct filename on Mac", if: PlatformHelpers.mac? do + expect(described_class._native_ruby_native_filename).to match(/libruby[^\/]+dylib$/) end end diff --git a/spec/datadog/profiling/collectors/thread_context_spec.rb b/spec/datadog/profiling/collectors/thread_context_spec.rb index dedf7f9b90c..32c904473bb 100644 --- a/spec/datadog/profiling/collectors/thread_context_spec.rb +++ b/spec/datadog/profiling/collectors/thread_context_spec.rb @@ -119,8 +119,8 @@ def on_gvl_running(thread) described_class::Testing._native_on_gvl_running(thread) end - def sample_after_gvl_running(thread) - described_class::Testing._native_sample_after_gvl_running(thread_context_collector, thread) + def sample_after_gvl_running(thread, allow_exception: false) + described_class::Testing._native_sample_after_gvl_running(thread_context_collector, thread, allow_exception) end def thread_list @@ -1582,7 +1582,7 @@ def sample_and_check(expected_state:) context "when called before on_gc_start/on_gc_finish" do it do - expect { sample_after_gc(allow_exception: true) }.to raise_error(RuntimeError, /Unexpected call to sample_after_gc/) + expect { sample_after_gc(allow_exception: true) }.to raise_error(::RuntimeError, /Unexpected call to sample_after_gc/) end end @@ -1601,7 +1601,7 @@ def sample_and_check(expected_state:) sample_after_gc expect { sample_after_gc(allow_exception: true) } - .to raise_error(RuntimeError, /Unexpected call to sample_after_gc/) + .to raise_error(::RuntimeError, /Unexpected call to sample_after_gc/) end end @@ -1968,6 +1968,16 @@ def sample_and_check(expected_state:) describe "#sample_after_gvl_running" do before { skip_if_gvl_profiling_not_supported(self) } + context "when timeline is disabled" do + let(:timeline_enabled) { false } + it "raises a telemetry-instrumented error" do + expect { sample_after_gvl_running(t1, allow_exception: true) }.to raise_error( + ::RuntimeError, + "GVL profiling requires timeline to be enabled" + ) + end + end + let(:timeline_enabled) { true } context "when thread is not at the end of a Waiting for GVL period" do diff --git a/spec/datadog/profiling/http_transport_spec.rb b/spec/datadog/profiling/http_transport_spec.rb index 8709c9759b0..054e71d9c7b 100644 --- a/spec/datadog/profiling/http_transport_spec.rb +++ b/spec/datadog/profiling/http_transport_spec.rb @@ -14,7 +14,10 @@ # We also have "integration" specs, where we exercise the Ruby code together with the C code and libdatadog to ensure # that things come out of libdatadog as we expected. RSpec.describe Datadog::Profiling::HttpTransport do - before { skip_if_profiling_not_supported(self) } + before do + skip "Profiling HTTP transport integration tests require Linux networking helpers" if PlatformHelpers.mac? + skip_if_profiling_not_supported(self) + end subject(:http_transport) do described_class.new( diff --git a/spec/datadog/profiling/libdatadog_extconf_helpers_spec.rb b/spec/datadog/profiling/libdatadog_extconf_helpers_spec.rb index 3ee75ff2758..bab12a18a92 100644 --- a/spec/datadog/profiling/libdatadog_extconf_helpers_spec.rb +++ b/spec/datadog/profiling/libdatadog_extconf_helpers_spec.rb @@ -11,7 +11,7 @@ before do skip_if_profiling_not_supported(self) if PlatformHelpers.mac? && Libdatadog.pkgconfig_folder.nil? && ENV["LIBDATADOG_VENDOR_OVERRIDE"].nil? - raise "You have a libdatadog setup without macOS support. Did you forget to set LIBDATADOG_VENDOR_OVERRIDE?" + skip "Needs LIBDATADOG_VENDOR_OVERRIDE pointing at a valid libdatadog build on macOS" end end @@ -46,7 +46,7 @@ before do skip_if_profiling_not_supported(self) if PlatformHelpers.mac? && Libdatadog.pkgconfig_folder.nil? && ENV["LIBDATADOG_VENDOR_OVERRIDE"].nil? - raise "You have a libdatadog setup without macOS support. Did you forget to set LIBDATADOG_VENDOR_OVERRIDE?" + skip "Needs LIBDATADOG_VENDOR_OVERRIDE pointing at a valid libdatadog build on macOS" end end diff --git a/spec/datadog/profiling/native_extension_spec.rb b/spec/datadog/profiling/native_extension_spec.rb index 6ab079de6a5..b79d6cb6587 100644 --- a/spec/datadog/profiling/native_extension_spec.rb +++ b/spec/datadog/profiling/native_extension_spec.rb @@ -12,26 +12,90 @@ describe "grab_gvl_and_raise" do it "raises the requested exception with the passed in message" do - expect { described_class::Testing._native_grab_gvl_and_raise(ZeroDivisionError, "this is a test", nil, true) } - .to raise_exception(ZeroDivisionError, "this is a test") + expect { described_class::Testing._native_grab_gvl_and_raise(::RuntimeError, "this is a test", nil, true) } + .to raise_error(::RuntimeError) do |error| + expect(error.message).to eq("this is a test") + expect(error.instance_variable_get(:@telemetry_message)).to eq("this is a test") + end end - it "accepts printf-style string formatting" do - expect { described_class::Testing._native_grab_gvl_and_raise(ZeroDivisionError, "divided zero by ", 42, true) } - .to raise_exception(ZeroDivisionError, "divided zero by 42") + it "on printf-style, only report the fixed string for telemetry" do + expect { described_class::Testing._native_grab_gvl_and_raise(::RuntimeError, "message %s", "oops", true) } + .to raise_error(::RuntimeError) do |error| + expect(error.message).to eq("message oops") + expect(error.instance_variable_get(:@telemetry_message)).to eq("message %s") + end end it "limits the exception message to 255 characters" do big_message = "a" * 500 - expect { described_class::Testing._native_grab_gvl_and_raise(ZeroDivisionError, big_message, nil, true) } - .to raise_exception(ZeroDivisionError, /a{255}\z/) + expect { described_class::Testing._native_grab_gvl_and_raise(::RuntimeError, big_message, nil, true) } + .to raise_error(::RuntimeError) do |error| + expect(error.message).to match(/a{255}\z/) + expect(error.instance_variable_get(:@telemetry_message)).to match(/a{255}\z/) + end end context "when called without releasing the gvl" do + it "raises a RuntimeError with appropriate error handling when called without GVL" do + expect do + described_class::Testing._native_grab_gvl_and_raise(ZeroDivisionError, "message %s", 'oops', false) + end.to raise_error(::RuntimeError) do |error| + expect(error.message).to include('grab_gvl_and_raise called by thread holding the global VM lock: message oops') + expect(error.instance_variable_get(:@telemetry_message)).to include('grab_gvl_and_raise called by thread holding the global VM lock: message %s') + end + end + end + + context "when raising RuntimeError" do + subject(:raise_native_runtime_error) do + described_class::Testing._native_grab_gvl_and_raise(::RuntimeError, "runtime error test", nil, true) + end + it "raises a RuntimeError" do - expect { described_class::Testing._native_grab_gvl_and_raise(ZeroDivisionError, "this is a test", nil, false) } - .to raise_exception(RuntimeError, /called by thread holding the global VM lock/) + expect { raise_native_runtime_error }.to raise_error(::RuntimeError) do |error| + expect(error.message).to eq("runtime error test") + expect(error.instance_variable_get(:@telemetry_message)).to eq("runtime error test") + end + end + + it "is an instance of RuntimeError" do + expect { raise_native_runtime_error }.to raise_exception(RuntimeError) + end + end + + context "when raising ArgumentError" do + subject(:raise_native_argument_error) do + described_class::Testing._native_grab_gvl_and_raise(::ArgumentError, "argument error test", nil, true) + end + + it "raises an ArgumentError" do + expect { raise_native_argument_error }.to raise_error(::ArgumentError) do |error| + expect(error.message).to eq("argument error test") + expect(error.instance_variable_get(:@telemetry_message)).to eq("argument error test") + end + end + + it "is an instance of ArgumentError" do + expect { raise_native_argument_error }.to raise_exception(ArgumentError) + end + end + + context "when raising TypeError" do + subject(:raise_native_type_error) do + described_class::Testing._native_grab_gvl_and_raise(::TypeError, "type error test", nil, true) + end + + it "raises a TypeError" do + expect { raise_native_type_error }.to raise_error(::TypeError) do |error| + expect(error.message).to eq("type error test") + expect(error.instance_variable_get(:@telemetry_message)).to eq("type error test") + end + end + + it "is an instance of TypeError" do + expect { raise_native_type_error }.to raise_exception(TypeError) end end end @@ -45,8 +109,17 @@ it "accepts printf-style string formatting" do expect do - described_class::Testing._native_grab_gvl_and_raise_syserr(Errno::EINTR::Errno, "divided zero by ", 42, true) - end.to raise_exception(Errno::EINTR, "#{Errno::EINTR.exception.message} - divided zero by 42") + described_class::Testing._native_grab_gvl_and_raise_syserr(Errno::EINTR::Errno, "message %s", "oops", true) + end.to raise_exception(Errno::EINTR, "#{Errno::EINTR.exception.message} - message oops") + end + + it "keeps telemetry-safe message unformatted" do + expect do + described_class::Testing._native_grab_gvl_and_raise_syserr(Errno::EINTR::Errno, "message %s", "oops", true) + end.to raise_error(Errno::EINTR) do |error| + expect(error.message).to include("message oops") + expect(error.instance_variable_get(:@telemetry_message)).to eq("message %s") + end end it "limits the caller-provided exception message to 255 characters" do @@ -58,10 +131,13 @@ end context "when called without releasing the gvl" do - it "raises a RuntimeError" do + it "raises a RuntimeError with appropriate error handling" do expect do - described_class::Testing._native_grab_gvl_and_raise_syserr(Errno::EINTR::Errno, "this is a test", nil, false) - end.to raise_exception(RuntimeError, /called by thread holding the global VM lock/) + described_class::Testing._native_grab_gvl_and_raise_syserr(Errno::EINTR::Errno, "message %s", "oops", false) + end.to raise_error(::RuntimeError) do |error| + expect(error.message).to include("grab_gvl_and_raise called by thread holding the global VM lock: message oops") + expect(error.instance_variable_get(:@telemetry_message)).to include("grab_gvl_and_raise called by thread holding the global VM lock: message %s") + end end end end diff --git a/spec/datadog/profiling/stack_recorder_spec.rb b/spec/datadog/profiling/stack_recorder_spec.rb index 3a75be9a6c9..176888af2e2 100644 --- a/spec/datadog/profiling/stack_recorder_spec.rb +++ b/spec/datadog/profiling/stack_recorder_spec.rb @@ -679,7 +679,7 @@ def introduce_distinct_stacktraces(i, obj) expect do Datadog::Profiling::Collectors::Stack::Testing ._native_sample(Thread.current, stack_recorder, metric_values, labels, numeric_labels) - end.to raise_error(RuntimeError, /Ended a heap recording/) + end.to raise_error(::RuntimeError, include("Ended a heap recording")) end it "does not keep the active slot mutex locked" do