From 19fbaae2d03b32aac357df47659ee996260b32fb Mon Sep 17 00:00:00 2001 From: Christophe Nasarre Date: Tue, 21 Oct 2025 10:12:56 +0200 Subject: [PATCH 1/4] Add heap snapshot configuration --- .../Datadog.Profiler.Native/Configuration.cpp | 31 ++++++++++++++++++- .../Datadog.Profiler.Native/Configuration.h | 9 ++++++ .../EnvironmentVariables.h | 3 ++ .../Datadog.Profiler.Native/IConfiguration.h | 6 +++- .../ConfigurationTest.cpp | 20 ++++++++++++ .../ProfilerMockedInterface.h | 3 ++ 6 files changed, 70 insertions(+), 2 deletions(-) diff --git a/profiler/src/ProfilerEngine/Datadog.Profiler.Native/Configuration.cpp b/profiler/src/ProfilerEngine/Datadog.Profiler.Native/Configuration.cpp index 110ef24b3043..0e7a9f6a68ee 100644 --- a/profiler/src/ProfilerEngine/Datadog.Profiler.Native/Configuration.cpp +++ b/profiler/src/ProfilerEngine/Datadog.Profiler.Native/Configuration.cpp @@ -118,6 +118,9 @@ Configuration::Configuration() _forceHttpSampling = GetEnvironmentValue(EnvironmentVariables::ForceHttpSampling, false); _cpuProfilerType = GetEnvironmentValue(EnvironmentVariables::CpuProfilerType, DefaultCpuProfilerType); _isWaitHandleProfilingEnabled = GetEnvironmentValue(EnvironmentVariables::WaitHandleProfilingEnabled, false); + _isHeapSnapshotEnabled = GetEnvironmentValue(EnvironmentVariables::HeapSnapshotEnabled, false); + _heapSnapshotInterval = ExtractHeapSnapshotInterval(); + _heapSnapshotUsedMemoryThreshold = GetEnvironmentValue(EnvironmentVariables::HeapSnapshotUsedMemoryThreshold, 85); } fs::path Configuration::ExtractLogDirectory() @@ -797,7 +800,6 @@ void Configuration::SetEnablementStatus(EnablementStatus status) _enablementStatus = status; } - std::chrono::milliseconds Configuration::ExtractHttpRequestDurationThreshold() const { auto const defaultValue = 50ms; @@ -812,3 +814,30 @@ std::chrono::milliseconds Configuration::GetHttpRequestDurationThreshold() const { return _httpRequestDurationThreshold; } + +bool Configuration::IsHeapSnapshotEnabled() const +{ + return _isHeapSnapshotEnabled; +} + +std::chrono::minutes Configuration::ExtractHeapSnapshotInterval() const +{ + auto r = shared::GetEnvironmentValue(EnvironmentVariables::HeapSnapshotInterval); + int32_t interval; + if (TryParse(r, interval)) + { + return std::chrono::minutes(interval); + } + + return 5min; +} + +std::chrono::minutes Configuration::GetHeapSnapshotInterval() const +{ + return _heapSnapshotInterval; +} + +int32_t Configuration::GetHeapSnapshotUsedMemoryThreshold() const +{ + return _heapSnapshotUsedMemoryThreshold; +} diff --git a/profiler/src/ProfilerEngine/Datadog.Profiler.Native/Configuration.h b/profiler/src/ProfilerEngine/Datadog.Profiler.Native/Configuration.h index 835d6e58b082..151c32910672 100644 --- a/profiler/src/ProfilerEngine/Datadog.Profiler.Native/Configuration.h +++ b/profiler/src/ProfilerEngine/Datadog.Profiler.Native/Configuration.h @@ -85,6 +85,10 @@ class Configuration final : public IConfiguration bool IsWaitHandleProfilingEnabled() const override; bool IsManagedActivationEnabled() const override; void SetEnablementStatus(EnablementStatus status) override; + bool IsHeapSnapshotEnabled() const override; + std::chrono::minutes GetHeapSnapshotInterval() const override; + int32_t GetHeapSnapshotUsedMemoryThreshold() const override; + private: static tags ExtractUserTags(); @@ -110,6 +114,7 @@ class Configuration final : public IConfiguration EnablementStatus ExtractEnablementStatus(); std::chrono::milliseconds ExtractSsiLongLivedThreshold() const; std::chrono::milliseconds ExtractHttpRequestDurationThreshold() const; + std::chrono::minutes ExtractHeapSnapshotInterval() const; private: static std::string const DefaultProdSite; @@ -187,4 +192,8 @@ class Configuration final : public IConfiguration CpuProfilerType _cpuProfilerType; std::chrono::milliseconds _cpuProfilingInterval; bool _isWaitHandleProfilingEnabled; + + bool _isHeapSnapshotEnabled; + std::chrono::minutes _heapSnapshotInterval; + int32_t _heapSnapshotUsedMemoryThreshold; // in % of used memory }; diff --git a/profiler/src/ProfilerEngine/Datadog.Profiler.Native/EnvironmentVariables.h b/profiler/src/ProfilerEngine/Datadog.Profiler.Native/EnvironmentVariables.h index b81fb1188cb3..7f01325f6edd 100644 --- a/profiler/src/ProfilerEngine/Datadog.Profiler.Native/EnvironmentVariables.h +++ b/profiler/src/ProfilerEngine/Datadog.Profiler.Native/EnvironmentVariables.h @@ -73,6 +73,9 @@ class EnvironmentVariables final inline static const shared::WSTRING HttpProfilingInternalEnabled = WStr("DD_INTERNAL_PROFILING_HTTP_ENABLED"); inline static const shared::WSTRING HttpProfilingEnabled = WStr("DD_PROFILING_HTTP_ENABLED"); inline static const shared::WSTRING HttpRequestDurationThreshold = WStr("DD_INTERNAL_PROFILING_HTTP_REQUEST_DURATION_THRESHOLD"); + inline static const shared::WSTRING HeapSnapshotEnabled = WStr("DD_PROFILING_HEAPSNAPSHOT_ENABLED"); + inline static const shared::WSTRING HeapSnapshotInterval = WStr("DD_INTERNAL_PROFILING_HEAPSNAPSHOT_INTERVAL"); + inline static const shared::WSTRING HeapSnapshotUsedMemoryThreshold = WStr("DD_INTERNAL_PROFILING_HEAPSNAPSHOT_USED_MEMORY_THRESHOLD"); // used for tests only inline static const shared::WSTRING ForceHttpSampling = WStr("DD_INTERNAL_PROFILING_FORCE_HTTP_SAMPLING"); diff --git a/profiler/src/ProfilerEngine/Datadog.Profiler.Native/IConfiguration.h b/profiler/src/ProfilerEngine/Datadog.Profiler.Native/IConfiguration.h index 95529692cd04..87a0fd0b1e9e 100644 --- a/profiler/src/ProfilerEngine/Datadog.Profiler.Native/IConfiguration.h +++ b/profiler/src/ProfilerEngine/Datadog.Profiler.Native/IConfiguration.h @@ -81,6 +81,10 @@ class IConfiguration virtual bool IsWaitHandleProfilingEnabled() const = 0; virtual bool IsManagedActivationEnabled() const = 0; - // these setter functions are needed for Stable Configuration support + // this setter function isneeded for Stable Configuration support virtual void SetEnablementStatus(EnablementStatus status) = 0; + + virtual bool IsHeapSnapshotEnabled() const = 0; + virtual std::chrono::minutes GetHeapSnapshotInterval() const = 0; + virtual int32_t GetHeapSnapshotUsedMemoryThreshold() const = 0; }; diff --git a/profiler/test/Datadog.Profiler.Native.Tests/ConfigurationTest.cpp b/profiler/test/Datadog.Profiler.Native.Tests/ConfigurationTest.cpp index 21be3de96da9..b2c97473b918 100644 --- a/profiler/test/Datadog.Profiler.Native.Tests/ConfigurationTest.cpp +++ b/profiler/test/Datadog.Profiler.Native.Tests/ConfigurationTest.cpp @@ -1418,3 +1418,23 @@ TEST_F(ConfigurationTest, CheckWaitHandleProfilingIsDisabledIfEnvVarSetToFalse) auto configuration = Configuration{}; ASSERT_THAT(configuration.IsWaitHandleProfilingEnabled(), false); } + +TEST_F(ConfigurationTest, CheckHeapSnapshotIsDisabledByDefault) +{ + auto configuration = Configuration{}; + ASSERT_THAT(configuration.IsHeapSnapshotEnabled(), false); +} + +TEST_F(ConfigurationTest, CheckHeapSnapshotIsEnabledIfEnvVarSetToTrue) +{ + EnvironmentHelper::EnvironmentVariable ar(EnvironmentVariables::HeapSnapshotEnabled, WStr("1")); + auto configuration = Configuration{}; + ASSERT_THAT(configuration.IsHeapSnapshotEnabled(), true); +} + +TEST_F(ConfigurationTest, CheckHeapSnapshotIsDisabledIfEnvVarSetToFalse) +{ + EnvironmentHelper::EnvironmentVariable ar(EnvironmentVariables::HeapSnapshotEnabled, WStr("0")); + auto configuration = Configuration{}; + ASSERT_THAT(configuration.IsHeapSnapshotEnabled(), false); +} diff --git a/profiler/test/Datadog.Profiler.Native.Tests/ProfilerMockedInterface.h b/profiler/test/Datadog.Profiler.Native.Tests/ProfilerMockedInterface.h index e2c2396e95a6..f8723f522a8b 100644 --- a/profiler/test/Datadog.Profiler.Native.Tests/ProfilerMockedInterface.h +++ b/profiler/test/Datadog.Profiler.Native.Tests/ProfilerMockedInterface.h @@ -94,6 +94,9 @@ class MockConfiguration : public IConfiguration MOCK_METHOD(bool, IsWaitHandleProfilingEnabled, (), (const override)); MOCK_METHOD(bool, IsManagedActivationEnabled, (), (const override)); MOCK_METHOD(void, SetEnablementStatus, (EnablementStatus status), (override)); + MOCK_METHOD(bool, IsHeapSnapshotEnabled, (), (const override)); + MOCK_METHOD(std::chrono::minutes, GetHeapSnapshotInterval, (), (const override)); + MOCK_METHOD(int32_t, GetHeapSnapshotUsedMemoryThreshold, (), (const override)); }; class MockExporter : public IExporter From 5a41bdc3f4f25f083632f13651c0858a58ee6d46 Mon Sep 17 00:00:00 2001 From: Christophe Nasarre Date: Thu, 23 Oct 2025 17:49:47 +0200 Subject: [PATCH 2/4] Integrate HeapSnapshotManager into the profiler --- .../ClrEventsParser.cpp | 18 ++- .../Datadog.Profiler.Native/ClrEventsParser.h | 6 +- .../CorProfilerCallback.cpp | 28 +++- .../CorProfilerCallback.h | 3 + .../Datadog.Profiler.Native.vcxproj | 3 + .../Datadog.Profiler.Native.vcxproj.filters | 18 ++- .../EnabledProfilers.cpp | 5 + .../GarbageCollectionProvider.cpp | 3 +- .../GarbageCollectionProvider.h | 3 +- .../HeapSnapshotManager.cpp | 149 ++++++++++++++++++ .../HeapSnapshotManager.h | 106 +++++++++++++ .../IEnabledProfilers.h | 1 + .../IGarbageCollectionsListener.h | 3 +- .../IHeapSnapshotManager.h | 14 ++ .../LiveObjectsProvider.cpp | 3 +- .../LiveObjectsProvider.h | 3 +- .../ProfileExporter.cpp | 49 +++++- .../Datadog.Profiler.Native/ProfileExporter.h | 7 +- .../EnabledProfilersTest.cpp | 5 + 19 files changed, 400 insertions(+), 27 deletions(-) create mode 100644 profiler/src/ProfilerEngine/Datadog.Profiler.Native/HeapSnapshotManager.cpp create mode 100644 profiler/src/ProfilerEngine/Datadog.Profiler.Native/HeapSnapshotManager.h create mode 100644 profiler/src/ProfilerEngine/Datadog.Profiler.Native/IHeapSnapshotManager.h diff --git a/profiler/src/ProfilerEngine/Datadog.Profiler.Native/ClrEventsParser.cpp b/profiler/src/ProfilerEngine/Datadog.Profiler.Native/ClrEventsParser.cpp index 701f4d0729ca..97119bd0a96d 100644 --- a/profiler/src/ProfilerEngine/Datadog.Profiler.Native/ClrEventsParser.cpp +++ b/profiler/src/ProfilerEngine/Datadog.Profiler.Native/ClrEventsParser.cpp @@ -480,11 +480,12 @@ void ClrEventsParser::NotifyGarbageCollectionEnd( std::chrono::nanoseconds endTimestamp, uint64_t gen2Size, uint64_t lohSize, - uint64_t pohSize) + uint64_t pohSize, + uint32_t memPressure) { for (auto& pGarbageCollectionsListener : _pGarbageCollectionsListeners) { - LogGcEvent("OnGarbageCollectionEnd: ", number, " ", generation, " ", reason, " ", type); + LogGcEvent("OnGarbageCollectionEnd: #", number, " gen", generation, " ", reason, " ", type, " ", memPressure, "%"); pGarbageCollectionsListener->OnGarbageCollectionEnd( number, @@ -497,7 +498,8 @@ void ClrEventsParser::NotifyGarbageCollectionEnd( endTimestamp, gen2Size, lohSize, - pohSize); + pohSize, + memPressure); } } @@ -623,7 +625,8 @@ void ClrEventsParser::OnGCRestartEEEnd(std::chrono::nanoseconds timestamp) timestamp, gc.gen2Size, gc.lohSize, - gc.pohSize); + gc.pohSize, + gc.memPressure); ResetGC(gc); } } @@ -641,7 +644,6 @@ void ClrEventsParser::OnGCHeapStats(std::chrono::nanoseconds timestamp, uint64_t gc.gen2Size = gen2Size; gc.lohSize = lohSize; gc.pohSize = pohSize; - if (gc.HasGlobalHeapHistoryBeenReceived && (gc.Generation == 2) && (gc.Type == GCType::BackgroundGC)) { auto duration = std::chrono::duration_cast(timestamp - gc.StartTimestamp).count(); @@ -658,7 +660,8 @@ void ClrEventsParser::OnGCHeapStats(std::chrono::nanoseconds timestamp, uint64_t timestamp, gc.gen2Size, gc.lohSize, - gc.pohSize); + gc.pohSize, + gc.memPressure); ResetGC(gc); } } @@ -694,7 +697,8 @@ void ClrEventsParser::OnGCGlobalHeapHistory(std::chrono::nanoseconds timestamp, timestamp, gc.gen2Size, gc.lohSize, - gc.pohSize); + gc.pohSize, + payload.MemPressure); ResetGC(gc); } } diff --git a/profiler/src/ProfilerEngine/Datadog.Profiler.Native/ClrEventsParser.h b/profiler/src/ProfilerEngine/Datadog.Profiler.Native/ClrEventsParser.h index 0e5cdae0832b..5cd5fa5b6404 100644 --- a/profiler/src/ProfilerEngine/Datadog.Profiler.Native/ClrEventsParser.h +++ b/profiler/src/ProfilerEngine/Datadog.Profiler.Native/ClrEventsParser.h @@ -233,6 +233,8 @@ struct GCGlobalHeapPayload uint32_t Gen0ReductionCount; uint32_t Reason; uint32_t GlobalMechanisms; + uint32_t PauseMode; + uint32_t MemPressure; }; struct WaitHandleWaitStartPayload // for .NET 9+ @@ -264,6 +266,7 @@ struct GCDetails uint64_t gen2Size; uint64_t lohSize; uint64_t pohSize; + uint32_t memPressure; // GlobalHeapHistory and HeapStats events are not received in the same order // between Framework and CoreCLR. So we need to keep track of what has been received @@ -335,7 +338,8 @@ class ClrEventsParser std::chrono::nanoseconds endTimestamp, uint64_t gen2Size, uint64_t lohSize, - uint64_t pohSize + uint64_t pohSize, + uint32_t memPressure ); GCDetails& GetCurrentGC(); void InitializeGC(std::chrono::nanoseconds timestamp, GCDetails& gc, GCStartPayload& payload); diff --git a/profiler/src/ProfilerEngine/Datadog.Profiler.Native/CorProfilerCallback.cpp b/profiler/src/ProfilerEngine/Datadog.Profiler.Native/CorProfilerCallback.cpp index ea0ffc193ab4..d046819fc419 100644 --- a/profiler/src/ProfilerEngine/Datadog.Profiler.Native/CorProfilerCallback.cpp +++ b/profiler/src/ProfilerEngine/Datadog.Profiler.Native/CorProfilerCallback.cpp @@ -342,7 +342,17 @@ void CorProfilerCallback::InitializeServices() ); } - if (_pConfiguration->IsGarbageCollectionProfilingEnabled()) + if (_pConfiguration->IsHeapSnapshotEnabled()) + { + _pHeapSnapshotManager = RegisterService( + _pConfiguration.get(), + _pCorProfilerInfoEvents, + _pFrameStore.get() + ); + } + + // GC profiling is needed for both GC provider and heap snapshots + if ((_pHeapSnapshotManager != nullptr) || _pConfiguration->IsGarbageCollectionProfilingEnabled()) { _pStopTheWorldProvider = RegisterService( valueTypeProvider, @@ -566,7 +576,8 @@ void CorProfilerCallback::InitializeServices() _metricsRegistry, _pMetadataProvider.get(), _pSsiManager.get(), - _pAllocationsRecorder.get() + _pAllocationsRecorder.get(), + _pHeapSnapshotManager ); if (_pConfiguration->IsGcThreadsCpuTimeEnabled() && @@ -1380,7 +1391,8 @@ HRESULT STDMETHODCALLTYPE CorProfilerCallback::Initialize(IUnknown* corProfilerI _pConfiguration->IsContentionProfilingEnabled() || _pConfiguration->IsGarbageCollectionProfilingEnabled() || _pConfiguration->IsHttpProfilingEnabled() || - _pConfiguration->IsWaitHandleProfilingEnabled() + _pConfiguration->IsWaitHandleProfilingEnabled() || + _pConfiguration->IsHeapSnapshotEnabled() ; if ((major >= 5) && AreEventBasedProfilersEnabled) @@ -1477,6 +1489,8 @@ HRESULT STDMETHODCALLTYPE CorProfilerCallback::Initialize(IUnknown* corProfilerI // - GC related events // - WaitHandle events for .NET 9+ // - AllocationSampled events for .NET+ 10 (AllocationTick will not be received) + // - HTTP events via System.Net.Http provider + // - Bulkxxx events for heap snapshots // UINT64 activatedKeywords = 0; uint32_t verbosity = InformationalVerbosity; @@ -1523,7 +1537,6 @@ HRESULT STDMETHODCALLTYPE CorProfilerCallback::Initialize(IUnknown* corProfilerI // if (_pConfiguration->IsHttpProfilingEnabled()) { - providerCount = 6; providers = { @@ -1579,6 +1592,13 @@ HRESULT STDMETHODCALLTYPE CorProfilerCallback::Initialize(IUnknown* corProfilerI }; } + // TODO: generating a heap snapshot requires the creation of another EventPipe session + // with the same Microsoft-Windows-DotNETRuntime provider and other keywords and, + // more important, possiblly a different verbosity (i.e. verbose). + // CHECK if we need to keep track of the current verbosity level after the heap snapshot + // probably by creating another EventPipe session just to reset the verbosity level. + // Hoping that it is not required to also reset the keywords... + hr = _pCorProfilerInfoEvents->EventPipeStartSession( providerCount, providers.data(), false, &_session ); diff --git a/profiler/src/ProfilerEngine/Datadog.Profiler.Native/CorProfilerCallback.h b/profiler/src/ProfilerEngine/Datadog.Profiler.Native/CorProfilerCallback.h index a8cf9d82db26..23e8e542d6a7 100644 --- a/profiler/src/ProfilerEngine/Datadog.Profiler.Native/CorProfilerCallback.h +++ b/profiler/src/ProfilerEngine/Datadog.Profiler.Native/CorProfilerCallback.h @@ -42,6 +42,7 @@ #endif #include "IEtwEventsManager.h" #include "ISsiLifetime.h" +#include "HeapSnapshotManager.h" #include "PInvoke.h" #include "shared/src/native-src/dd_memory_resource.hpp" @@ -259,6 +260,8 @@ private : ThreadLifetimeProvider* _pThreadLifetimeProvider = nullptr; NetworkProvider* _pNetworkProvider = nullptr; RuntimeIdStore* _pRuntimeIdStore = nullptr; + HeapSnapshotManager* _pHeapSnapshotManager = nullptr; + #ifdef LINUX SystemCallsShield* _systemCallsShield = nullptr; std::unique_ptr _pCpuProfiler = nullptr; diff --git a/profiler/src/ProfilerEngine/Datadog.Profiler.Native/Datadog.Profiler.Native.vcxproj b/profiler/src/ProfilerEngine/Datadog.Profiler.Native/Datadog.Profiler.Native.vcxproj index eda6d292116b..3e19231a5d22 100644 --- a/profiler/src/ProfilerEngine/Datadog.Profiler.Native/Datadog.Profiler.Native.vcxproj +++ b/profiler/src/ProfilerEngine/Datadog.Profiler.Native/Datadog.Profiler.Native.vcxproj @@ -254,6 +254,8 @@ + + @@ -408,6 +410,7 @@ + diff --git a/profiler/src/ProfilerEngine/Datadog.Profiler.Native/Datadog.Profiler.Native.vcxproj.filters b/profiler/src/ProfilerEngine/Datadog.Profiler.Native/Datadog.Profiler.Native.vcxproj.filters index 43ab530af74f..db5f6279e392 100644 --- a/profiler/src/ProfilerEngine/Datadog.Profiler.Native/Datadog.Profiler.Native.vcxproj.filters +++ b/profiler/src/ProfilerEngine/Datadog.Profiler.Native/Datadog.Profiler.Native.vcxproj.filters @@ -54,6 +54,9 @@ {6c3f1e16-c431-45e0-bfef-43a6c925f1f4} + + {98823685-380d-47c7-ab99-ce5bd24a3b00} + @@ -446,9 +449,6 @@ Utils - - libdatadog - Profiler-Driver @@ -495,6 +495,12 @@ Utils + + HeapSnapshot + + + HeapSnapshot + @@ -725,9 +731,6 @@ Utils - - libdatadog - Profiler-Driver @@ -761,5 +764,8 @@ Utils + + HeapSnapshot + \ No newline at end of file diff --git a/profiler/src/ProfilerEngine/Datadog.Profiler.Native/EnabledProfilers.cpp b/profiler/src/ProfilerEngine/Datadog.Profiler.Native/EnabledProfilers.cpp index 27ac2a8f45bd..7f113b88148d 100644 --- a/profiler/src/ProfilerEngine/Datadog.Profiler.Native/EnabledProfilers.cpp +++ b/profiler/src/ProfilerEngine/Datadog.Profiler.Native/EnabledProfilers.cpp @@ -60,6 +60,11 @@ EnabledProfilers::EnabledProfilers(IConfiguration* pConfiguration, bool isListen _enabledProfilers |= RuntimeProfiler::Network; } + if (pConfiguration->IsHeapSnapshotEnabled()) + { + _enabledProfilers |= RuntimeProfiler::HeapSnapshot; + } + // TODO: add new CLR event driven profilers } } diff --git a/profiler/src/ProfilerEngine/Datadog.Profiler.Native/GarbageCollectionProvider.cpp b/profiler/src/ProfilerEngine/Datadog.Profiler.Native/GarbageCollectionProvider.cpp index 8019d973cdf8..84f5c02340d8 100644 --- a/profiler/src/ProfilerEngine/Datadog.Profiler.Native/GarbageCollectionProvider.cpp +++ b/profiler/src/ProfilerEngine/Datadog.Profiler.Native/GarbageCollectionProvider.cpp @@ -52,7 +52,8 @@ void GarbageCollectionProvider::OnGarbageCollectionEnd( std::chrono::nanoseconds endTimestamp, // end of GC uint64_t gen2Size, uint64_t lohSize, - uint64_t pohSize) + uint64_t pohSize, + uint32_t memPressure) { _suspensionDurationMetric->Add((double_t)pauseDuration.count()); if (generation == 0) diff --git a/profiler/src/ProfilerEngine/Datadog.Profiler.Native/GarbageCollectionProvider.h b/profiler/src/ProfilerEngine/Datadog.Profiler.Native/GarbageCollectionProvider.h index 3934ed34b63b..f3d931ae675f 100644 --- a/profiler/src/ProfilerEngine/Datadog.Profiler.Native/GarbageCollectionProvider.h +++ b/profiler/src/ProfilerEngine/Datadog.Profiler.Native/GarbageCollectionProvider.h @@ -48,7 +48,8 @@ class GarbageCollectionProvider std::chrono::nanoseconds endTimestamp, // end of GC uint64_t gen2Size, uint64_t lohSize, - uint64_t pohSize) override; + uint64_t pohSize, + uint32_t memPressure) override; private: std::shared_ptr _gen0CountMetric; diff --git a/profiler/src/ProfilerEngine/Datadog.Profiler.Native/HeapSnapshotManager.cpp b/profiler/src/ProfilerEngine/Datadog.Profiler.Native/HeapSnapshotManager.cpp new file mode 100644 index 000000000000..7acced180447 --- /dev/null +++ b/profiler/src/ProfilerEngine/Datadog.Profiler.Native/HeapSnapshotManager.cpp @@ -0,0 +1,149 @@ +// Unless explicitly stated otherwise all files in this repository are licensed under the Apache 2 License. +// This product includes software developed at Datadog (https://www.datadoghq.com/). Copyright 2022 Datadog, Inc. + +#include "HeapSnapshotManager.h" +#include "Log.h" + +HeapSnapshotManager::HeapSnapshotManager( + IConfiguration* pConfiguration, + ICorProfilerInfo12* pCorProfilerInfo, + IFrameStore* pFrameStore) : + ServiceBase(), + _inducedGCNumber(-1), + _session(0), + _gen2Size(0), + _lohSize(0), + _pohSize(0), + _memPressure(0), + _isHeapDumpInProgress(false), + _pCorProfilerInfo{pCorProfilerInfo}, + _pFrameStore{pFrameStore} +{ + _heapDumpInterval = pConfiguration->GetHeapSnapshotInterval(); + _memPressureThreshold = pConfiguration->GetHeapSnapshotUsedMemoryThreshold(); +} + +bool HeapSnapshotManager::StartImpl() +{ + // TODO: decide how to trigger a heap snapshot (timer + memory pressure threshold + growing gen2+loh) + return true; +} + +bool HeapSnapshotManager::StopImpl() +{ + // don't forget to close the current EventPipe session if any + if (_session != 0) + { + + } + + return true; +} + +std::string HeapSnapshotManager::GetHeapSnapshotText() +{ + return std::string{}; +} + +void HeapSnapshotManager::OnGarbageCollectionStart( + std::chrono::nanoseconds timestamp, + int32_t number, + uint32_t generation, + GCReason reason, + GCType type) +{ + if ((_session != 0) && (_inducedGCNumber != -1)) + { + // waiting for the first induced foregrouned gen2 collection + if ((reason == GCReason::Induced) && (generation == 2) && (type == GCType::NonConcurrentGC)) + { + _inducedGCNumber = number; + } + } +} + +void HeapSnapshotManager::OnGarbageCollectionEnd( + int32_t number, + uint32_t generation, + GCReason reason, + GCType type, + bool isCompacting, + std::chrono::nanoseconds pauseDuration, + std::chrono::nanoseconds totalDuration, // from start to end (includes pauses) + std::chrono::nanoseconds endTimestamp, // end of GC + uint64_t gen2Size, + uint64_t lohSize, + uint64_t pohSize, + uint32_t memPressure) +{ + if (_session != 0) + { + if (number == _inducedGCNumber) + { + // the induced GC triggered to generate the heap snapshot has ended + _inducedGCNumber = -1; + + StopGCDump(); + + // TODO: restart the timer before the next heap snapshot + } + } + + // store sizes for next heap snapshot + _gen2Size = gen2Size; + _lohSize = lohSize; + _pohSize = pohSize; + _memPressure = memPressure; +} + +void HeapSnapshotManager::StartGCDump() +{ + if (_session != 0) + { + // TODO: log a message and probably stop the current session + return; + } + + // reset the class histogram + _classHistogram.clear(); + + // creating an EventPipe session with the right keywords/verbosity on the .NET profider triggers a GC heap dump + // i.e. an induced GC will be started and specific BulkXXX events will be emitted while dumping the surviving objects in the managed heap + // Read https://chnasarre.medium.com/net-gcdump-internals-fcce5d327be7?source=friends_link&sk=3225ff119458adafc0e6935951fcc323 for more details + // + // no need to add TypeKeyword or GCHeapAndTypeNamesKeyword because ICorProfilerInfo allows us + // to directly get the name of the types + UINT64 activatedKeywords = 0x900000; // GCHeapDumpKeyword and ManagedHeapCollectKeyword + + uint32_t verbosity = 5; // verbose verbosity + COR_PRF_EVENTPIPE_PROVIDER_CONFIG providers[1] = + { + COR_PRF_EVENTPIPE_PROVIDER_CONFIG{ + WStr("Microsoft-Windows-DotNETRuntime"), + activatedKeywords, + verbosity, + nullptr}, + }; + + // TODO: maybe this is sort of synchronous so we won't get the session before some events might be received + _isHeapDumpInProgress = true; + auto hr =_pCorProfilerInfo->EventPipeStartSession(1, providers, false, &_session); + if (FAILED(hr)) + { + _session = 0; + _isHeapDumpInProgress = false; + Log::Error("Failed to start event pipe session with hr=0x", std::hex, hr, std::dec, " for heap snapshot."); + } +} + +void HeapSnapshotManager::StopGCDump() +{ + if (_session == 0) + { + return; + } + + _pCorProfilerInfo->EventPipeStopSession(_session); + _isHeapDumpInProgress = false; + _session = 0; +} diff --git a/profiler/src/ProfilerEngine/Datadog.Profiler.Native/HeapSnapshotManager.h b/profiler/src/ProfilerEngine/Datadog.Profiler.Native/HeapSnapshotManager.h new file mode 100644 index 000000000000..ab63d42c3af0 --- /dev/null +++ b/profiler/src/ProfilerEngine/Datadog.Profiler.Native/HeapSnapshotManager.h @@ -0,0 +1,106 @@ +// Unless explicitly stated otherwise all files in this repository are licensed under the Apache 2 License. +// This product includes software developed at Datadog (https://www.datadoghq.com/). Copyright 2022 Datadog, Inc. + +#pragma once + +#include "IHeapSnapshotManager.h" +#include "IConfiguration.h" +#include "IFrameStore.h" +#include "IGarbageCollectionsListener.h" +#include "ServiceBase.h" + +#include "corprof.h" + +#include + +class ClassHistogramEntry +{ +public: + ClassHistogramEntry(std::string& className) + : + InstanceCount(0), + TotalSize(0), + ClassName(className) + { + } + +public: + std::string ClassName; + uint64_t InstanceCount; + uint64_t TotalSize; +}; + +class HeapSnapshotManager + : + public IHeapSnapshotManager, + public IGarbageCollectionsListener, + public ServiceBase +{ +public: + HeapSnapshotManager( + IConfiguration* pConfiguration, + ICorProfilerInfo12* pProfilerInfo, + IFrameStore* pFrameStore); + ~HeapSnapshotManager() override = default; + +protected: + // inherited via IService + const char* GetName() override + { + return "HeapSnapshotManager"; + } + + // Inherited via IHeapSnapshotManager + std::string GetHeapSnapshotText() override; + + // Inherited via IGarbageCollectionsListener + void OnGarbageCollectionStart( + std::chrono::nanoseconds timestamp, + int32_t number, + uint32_t generation, + GCReason reason, + GCType type) override; + void OnGarbageCollectionEnd( + int32_t number, + uint32_t generation, + GCReason reason, + GCType type, + bool isCompacting, + std::chrono::nanoseconds pauseDuration, + std::chrono::nanoseconds totalDuration, // from start to end (includes pauses) + std::chrono::nanoseconds endTimestamp, // end of GC + uint64_t gen2Size, + uint64_t lohSize, + uint64_t pohSize, + uint32_t memPressure) override; + + // Inherited via ServiceBase + bool StartImpl() override; + bool StopImpl() override; + +private: + void StartGCDump(); + void StopGCDump(); + +private: + std::chrono::minutes _heapDumpInterval; + int32_t _memPressureThreshold; + uint64_t _gen2Size; + uint64_t _lohSize; + uint64_t _pohSize; + uint32_t _memPressure; + + ICorProfilerInfo12* _pCorProfilerInfo; + IFrameStore* _pFrameStore; + + // session used to trigger a heap dump + // TODO: check if we need to synchronize the update of this field from different threads + EVENTPIPE_SESSION _session; + bool _isHeapDumpInProgress; + + // id of the induced GC triggering a heap dump + int32_t _inducedGCNumber; + + // keep track of each type instances count and size during heap snapshot + std::unordered_map _classHistogram; +}; diff --git a/profiler/src/ProfilerEngine/Datadog.Profiler.Native/IEnabledProfilers.h b/profiler/src/ProfilerEngine/Datadog.Profiler.Native/IEnabledProfilers.h index a02c6d6a756a..51368f6489da 100644 --- a/profiler/src/ProfilerEngine/Datadog.Profiler.Native/IEnabledProfilers.h +++ b/profiler/src/ProfilerEngine/Datadog.Profiler.Native/IEnabledProfilers.h @@ -19,6 +19,7 @@ ENUM_FLAGS(RuntimeProfiler, size_t) Network = 128, // TODO: should it be renamed "Http"? CpuGc = 256, ThreadsLifetime = 512, + HeapSnapshot = 1024, }; class IEnabledProfilers diff --git a/profiler/src/ProfilerEngine/Datadog.Profiler.Native/IGarbageCollectionsListener.h b/profiler/src/ProfilerEngine/Datadog.Profiler.Native/IGarbageCollectionsListener.h index 9dc01d535fdf..977d155024de 100644 --- a/profiler/src/ProfilerEngine/Datadog.Profiler.Native/IGarbageCollectionsListener.h +++ b/profiler/src/ProfilerEngine/Datadog.Profiler.Native/IGarbageCollectionsListener.h @@ -30,7 +30,8 @@ class IGarbageCollectionsListener std::chrono::nanoseconds endTimestamp, // end of GC uint64_t gen2Size, uint64_t lohSize, - uint64_t pohSize) = 0; + uint64_t pohSize, + uint32_t memPressure) = 0; virtual ~IGarbageCollectionsListener() = default; }; \ No newline at end of file diff --git a/profiler/src/ProfilerEngine/Datadog.Profiler.Native/IHeapSnapshotManager.h b/profiler/src/ProfilerEngine/Datadog.Profiler.Native/IHeapSnapshotManager.h new file mode 100644 index 000000000000..4868bed3bb83 --- /dev/null +++ b/profiler/src/ProfilerEngine/Datadog.Profiler.Native/IHeapSnapshotManager.h @@ -0,0 +1,14 @@ +// Unless explicitly stated otherwise all files in this repository are licensed under the Apache 2 License. +// This product includes software developed at Datadog (https://www.datadoghq.com/). Copyright 2022 Datadog, Inc. + +#pragma once + +#include + +class IHeapSnapshotManager +{ +public: + virtual std::string GetHeapSnapshotText() = 0; + + virtual ~IHeapSnapshotManager() = default; +}; \ No newline at end of file diff --git a/profiler/src/ProfilerEngine/Datadog.Profiler.Native/LiveObjectsProvider.cpp b/profiler/src/ProfilerEngine/Datadog.Profiler.Native/LiveObjectsProvider.cpp index ff6632525f77..31c987dff713 100644 --- a/profiler/src/ProfilerEngine/Datadog.Profiler.Native/LiveObjectsProvider.cpp +++ b/profiler/src/ProfilerEngine/Datadog.Profiler.Native/LiveObjectsProvider.cpp @@ -67,7 +67,8 @@ void LiveObjectsProvider::OnGarbageCollectionEnd( std::chrono::nanoseconds endTimestamp, uint64_t gen2Size, uint64_t lohSize, - uint64_t pohSize) + uint64_t pohSize, + uint32_t memPressure) { std::lock_guard lock(_liveObjectsLock); diff --git a/profiler/src/ProfilerEngine/Datadog.Profiler.Native/LiveObjectsProvider.h b/profiler/src/ProfilerEngine/Datadog.Profiler.Native/LiveObjectsProvider.h index 61a02206bc08..c7439c754f0b 100644 --- a/profiler/src/ProfilerEngine/Datadog.Profiler.Native/LiveObjectsProvider.h +++ b/profiler/src/ProfilerEngine/Datadog.Profiler.Native/LiveObjectsProvider.h @@ -64,7 +64,8 @@ class LiveObjectsProvider : public ServiceBase, std::chrono::nanoseconds endTimestamp, uint64_t gen2Size, uint64_t lohSize, - uint64_t pohSize) override; + uint64_t pohSize, + uint32_t memPressure) override; private: ObjectHandleID CreateWeakHandle(uintptr_t address) const; diff --git a/profiler/src/ProfilerEngine/Datadog.Profiler.Native/ProfileExporter.cpp b/profiler/src/ProfilerEngine/Datadog.Profiler.Native/ProfileExporter.cpp index 2a8da8fccf23..2e7741a18165 100644 --- a/profiler/src/ProfilerEngine/Datadog.Profiler.Native/ProfileExporter.cpp +++ b/profiler/src/ProfilerEngine/Datadog.Profiler.Native/ProfileExporter.cpp @@ -25,6 +25,7 @@ #include "SamplesEnumerator.h" #include "ScopeFinalizer.h" #include "dd_profiler_version.h" +#include "IHeapSnapshotManager.h" #include #include @@ -69,6 +70,8 @@ std::string const ProfileExporter::MetricsFilename = "metrics.json"; std::string const ProfileExporter::AllocationsExtension = ".balloc"; +std::string const ProfileExporter::ClassHistogramFilename = "classhistogram.json"; + ProfileExporter::ProfileExporter( std::vector sampleTypeDefinitions, IConfiguration* configuration, @@ -78,7 +81,8 @@ ProfileExporter::ProfileExporter( MetricsRegistry& metricsRegistry, IMetadataProvider* metadataProvider, ISsiManager* ssiManager, - IAllocationsRecorder* allocationsRecorder) : + IAllocationsRecorder* allocationsRecorder, + IHeapSnapshotManager* heapSnapshotManager) : _sampleTypeDefinitions{std::move(sampleTypeDefinitions)}, _applicationStore{applicationStore}, _metricsRegistry{metricsRegistry}, @@ -86,7 +90,8 @@ ProfileExporter::ProfileExporter( _metadataProvider{metadataProvider}, _configuration{configuration}, _runtimeInfo{runtimeInfo}, - _ssiManager{ssiManager} + _ssiManager{ssiManager}, + _heapSnapshotManager{heapSnapshotManager} { _exporter = CreateExporter(_configuration, CreateFixedTags(_configuration, runtimeInfo, enabledProfilers)); _outputPath = CreatePprofOutputPath(_configuration); @@ -306,6 +311,16 @@ std::string ProfileExporter::GetEnabledProfilersTag(IEnabledProfilers* enabledPr emptyList = false; } + if (enabledProfilers->IsEnabled(RuntimeProfiler::HeapSnapshot)) + { + if (!emptyList) + { + buffer << separator; + } + buffer << "heapsnapshot"; + emptyList = false; + } + return buffer.str(); } @@ -577,6 +592,10 @@ bool ProfileExporter::Export(bool lastCall) // Process-level samples auto processSamples = GetProcessSamples(); + // additional content to be sent along the .pprof + auto metricsFileContent = CreateMetricsFileContent(); + auto classHistogramContent = CreateClassHistogramContent(); + for (auto& runtimeId : keys) { std::unique_ptr profile; @@ -648,12 +667,16 @@ bool ProfileExporter::Export(bool lastCall) auto filesToSend = std::vector>{}; - auto metricsFileContent = CreateMetricsFileContent(); if (!metricsFileContent.empty()) { filesToSend.emplace_back(MetricsFilename, std::move(metricsFileContent)); } + if (!classHistogramContent.empty()) + { + filesToSend.emplace_back(ClassHistogramFilename, std::move(classHistogramContent)); + } + std::string metadataJson = GetMetadata(); std::string infoJson = GetInfo(); @@ -700,6 +723,26 @@ std::string ProfileExporter::CreateMetricsFileContent() const return builder.str(); } +std::string ProfileExporter::CreateClassHistogramContent() const +{ + if (_heapSnapshotManager == nullptr) + { + return ""; + } + + // prepare class histogram to be sent + std::stringstream builder; + + // TODO: just for tests, the heap snapshot manager returns a string instead of a reference to an unorderedmap + auto heapSnapshot = _heapSnapshotManager->GetHeapSnapshotText(); + if (!heapSnapshot.empty()) + { + builder << heapSnapshot; + } + + return builder.str(); +} + std::string ProfileExporter::GetMetadata() const { // in tests, the metadata provider might be null diff --git a/profiler/src/ProfilerEngine/Datadog.Profiler.Native/ProfileExporter.h b/profiler/src/ProfilerEngine/Datadog.Profiler.Native/ProfileExporter.h index a78c4b8a7f5e..116bf1d04881 100644 --- a/profiler/src/ProfilerEngine/Datadog.Profiler.Native/ProfileExporter.h +++ b/profiler/src/ProfilerEngine/Datadog.Profiler.Native/ProfileExporter.h @@ -28,6 +28,7 @@ class IMetadataProvider; class IConfiguration; class IRuntimeInfo; class ISsiManager; +class IHeapSnapshotManager; namespace libdatadog { class Exporter; @@ -47,7 +48,8 @@ class ProfileExporter : public IExporter MetricsRegistry& metricsRegistry, IMetadataProvider* metadataProvider, ISsiManager* ssiManager, - IAllocationsRecorder* allocationsRecorder); + IAllocationsRecorder* allocationsRecorder, + IHeapSnapshotManager* heapSnapshotManager); ~ProfileExporter() override; bool Export(bool lastCall = false) override; @@ -105,6 +107,7 @@ class ProfileExporter : public IExporter static fs::path CreatePprofOutputPath(IConfiguration* configuration); std::string CreateMetricsFileContent() const; + std::string CreateClassHistogramContent() const; std::vector GetUpscalingInfos(); std::vector GetUpscalingPoissonInfos(); std::list> GetProcessSamples(); @@ -118,6 +121,7 @@ class ProfileExporter : public IExporter static int const RequestTimeOutMs; static std::string const MetricsFilename; static std::string const AllocationsExtension; + static std::string const ClassHistogramFilename; // TODO: this should be passed in the constructor to avoid overwriting // the .pprof generated by the managed side @@ -145,6 +149,7 @@ class ProfileExporter : public IExporter IConfiguration* _configuration; IRuntimeInfo* _runtimeInfo; ISsiManager* _ssiManager; + IHeapSnapshotManager* _heapSnapshotManager; public: // for tests static std::string GetEnabledProfilersTag(IEnabledProfilers* enabledProfilers); diff --git a/profiler/test/Datadog.Profiler.Native.Tests/EnabledProfilersTest.cpp b/profiler/test/Datadog.Profiler.Native.Tests/EnabledProfilersTest.cpp index d977d6f3f062..754b8cbda25a 100644 --- a/profiler/test/Datadog.Profiler.Native.Tests/EnabledProfilersTest.cpp +++ b/profiler/test/Datadog.Profiler.Native.Tests/EnabledProfilersTest.cpp @@ -27,6 +27,7 @@ TEST(EnabledProfilersTest, CheckWhenNothingIsEnabled) EXPECT_CALL(mockConfiguration, IsHttpProfilingEnabled()).WillRepeatedly(Return(false)); EXPECT_CALL(mockConfiguration, IsGcThreadsCpuTimeEnabled()).WillRepeatedly(Return(false)); EXPECT_CALL(mockConfiguration, IsThreadLifetimeEnabled()).WillRepeatedly(Return(false)); + EXPECT_CALL(mockConfiguration, IsHeapSnapshotEnabled()).WillRepeatedly(Return(false)); EnabledProfilers enabledProfilers(configuration.get(), false, false); @@ -40,6 +41,7 @@ TEST(EnabledProfilersTest, CheckWhenNothingIsEnabled) ASSERT_FALSE(enabledProfilers.IsEnabled(RuntimeProfiler::Network)); ASSERT_FALSE(enabledProfilers.IsEnabled(RuntimeProfiler::CpuGc)); ASSERT_FALSE(enabledProfilers.IsEnabled(RuntimeProfiler::ThreadsLifetime)); + ASSERT_FALSE(enabledProfilers.IsEnabled(RuntimeProfiler::HeapSnapshot)); } TEST(EnabledProfilersTest, CheckWhenEverythingIsEnabled) @@ -56,6 +58,7 @@ TEST(EnabledProfilersTest, CheckWhenEverythingIsEnabled) EXPECT_CALL(mockConfiguration, IsHttpProfilingEnabled()).WillRepeatedly(Return(true)); EXPECT_CALL(mockConfiguration, IsGcThreadsCpuTimeEnabled()).WillRepeatedly(Return(true)); EXPECT_CALL(mockConfiguration, IsThreadLifetimeEnabled()).WillRepeatedly(Return(true)); + EXPECT_CALL(mockConfiguration, IsHeapSnapshotEnabled()).WillRepeatedly(Return(true)); EnabledProfilers enabledProfilers(configuration.get(), true, true); @@ -69,6 +72,7 @@ TEST(EnabledProfilersTest, CheckWhenEverythingIsEnabled) ASSERT_TRUE(enabledProfilers.IsEnabled(RuntimeProfiler::Network)); ASSERT_TRUE(enabledProfilers.IsEnabled(RuntimeProfiler::CpuGc)); ASSERT_TRUE(enabledProfilers.IsEnabled(RuntimeProfiler::ThreadsLifetime)); + ASSERT_TRUE(enabledProfilers.IsEnabled(RuntimeProfiler::HeapSnapshot)); } TEST(EnabledProfilersTest, CheckWhenSomeAreDisabled) @@ -92,6 +96,7 @@ TEST(EnabledProfilersTest, CheckWhenSomeAreDisabled) ASSERT_FALSE(enabledProfilers.IsEnabled(RuntimeProfiler::Exceptions)); ASSERT_FALSE(enabledProfilers.IsEnabled(RuntimeProfiler::CpuGc)); ASSERT_FALSE(enabledProfilers.IsEnabled(RuntimeProfiler::ThreadsLifetime)); + ASSERT_FALSE(enabledProfilers.IsEnabled(RuntimeProfiler::HeapSnapshot)); ASSERT_TRUE(enabledProfilers.IsEnabled(RuntimeProfiler::WallTime)); ASSERT_TRUE(enabledProfilers.IsEnabled(RuntimeProfiler::Allocations)); ASSERT_TRUE(enabledProfilers.IsEnabled(RuntimeProfiler::LockContention)); From b16d56f57f2d072a427c2b1a2356fc93c59bc586 Mon Sep 17 00:00:00 2001 From: Christophe Nasarre Date: Fri, 24 Oct 2025 13:41:38 +0200 Subject: [PATCH 3/4] Parse BulkXXX events received during heap snapshot generation --- .../EtwEventsManager.cpp | 4 +- .../ClrEventsParser.cpp | 49 ++++++++++++++++- .../Datadog.Profiler.Native/ClrEventsParser.h | 41 +++++++++++++- .../CorProfilerCallback.cpp | 3 +- .../Datadog.Profiler.Native.vcxproj | 1 + .../EventPipeEventsManager.cpp | 7 ++- .../EventPipeEventsManager.h | 3 +- .../HeapSnapshotManager.cpp | 20 ++++++- .../HeapSnapshotManager.h | 12 ++++ .../Datadog.Profiler.Native/IGCDumpListener.h | 55 +++++++++++++++++++ 10 files changed, 183 insertions(+), 12 deletions(-) create mode 100644 profiler/src/ProfilerEngine/Datadog.Profiler.Native/IGCDumpListener.h diff --git a/profiler/src/ProfilerEngine/Datadog.Profiler.Native.Windows/EtwEventsManager.cpp b/profiler/src/ProfilerEngine/Datadog.Profiler.Native.Windows/EtwEventsManager.cpp index 3b95eb05bd00..21a73c382673 100644 --- a/profiler/src/ProfilerEngine/Datadog.Profiler.Native.Windows/EtwEventsManager.cpp +++ b/profiler/src/ProfilerEngine/Datadog.Profiler.Native.Windows/EtwEventsManager.cpp @@ -39,7 +39,9 @@ EtwEventsManager::EtwEventsManager( _parser = std::make_unique( nullptr, // to avoid duplicates with what is done in EtwEventsHandler nullptr, // to avoid duplicates with what is done in EtwEventsHandler - pGCSuspensionsListener); + pGCSuspensionsListener, + nullptr // no GC dump for .NET Framework (TODO: how to trigger it from ETW?) + ); _logger = std::make_unique(); _IpcClient = nullptr; _IpcServer = nullptr; diff --git a/profiler/src/ProfilerEngine/Datadog.Profiler.Native/ClrEventsParser.cpp b/profiler/src/ProfilerEngine/Datadog.Profiler.Native/ClrEventsParser.cpp index 97119bd0a96d..fdadc180072a 100644 --- a/profiler/src/ProfilerEngine/Datadog.Profiler.Native/ClrEventsParser.cpp +++ b/profiler/src/ProfilerEngine/Datadog.Profiler.Native/ClrEventsParser.cpp @@ -41,11 +41,13 @@ void ClrEventsParser::LogGcEvent( ClrEventsParser::ClrEventsParser( IAllocationsListener* pAllocationListener, IContentionListener* pContentionListener, - IGCSuspensionsListener* pGCSuspensionsListener) + IGCSuspensionsListener* pGCSuspensionsListener, + IGCDumpListener* pGCDumpListener) : _pAllocationListener{pAllocationListener}, _pContentionListener{pContentionListener}, - _pGCSuspensionsListener{pGCSuspensionsListener} + _pGCSuspensionsListener{pGCSuspensionsListener}, + _pGCDumpListener{pGCDumpListener} { ResetGC(_gcInProgress); ResetGC(_currentBGC); @@ -242,6 +244,49 @@ ClrEventsParser::ParseGcEvent(std::chrono::nanoseconds timestamp, DWORD id, DWOR return; } + // GC dump related events + if (id == EVENT_GC_BULK_NODE) + { + // TODO: get the list of objects in the GC heap dump + LogGcEvent("OnGCBulkNode"); + + if (_pGCDumpListener != nullptr) + { + GCBulkNodePayload payload{0}; + ULONG offset = 0; + if (!EventsParserHelper::Read(payload, pEventData, cbEventData, offset)) + { + // TODO: log and stop the dump? + return; + } + + // sanity check + _pGCDumpListener->OnBulkNodes( + payload.Index, + payload.Count, + (GCBulkNodeValue*)(pEventData + offset)); + } + } + else if (id == EVENT_GC_BULK_EDGE) + { + // TODO: get the list of references between objects in the GC heap dump + LogGcEvent("OnGCBulkEdge"); + + if (_pGCDumpListener != nullptr) + { + // TODO: _pGCDumpListener->OnGCBulkEdge(...); + GCBulkEdgePayload payload{0}; + ULONG offset = 0; + if (!EventsParserHelper::Read(payload, pEventData, cbEventData, offset)) + { + _pGCDumpListener->OnBulkEdges( + payload.Index, + payload.Count, + (GCBulkEdgeValue*)(pEventData + offset)); + } + } + } + // the rest of events are related to garbage collections lifetime // read https://medium.com/criteo-engineering/spying-on-net-garbage-collector-with-net-core-eventpipes-9f2a986d5705?source=friends_link&sk=baf9a7766fb5c7899b781f016803597f // for more details about the state machine diff --git a/profiler/src/ProfilerEngine/Datadog.Profiler.Native/ClrEventsParser.h b/profiler/src/ProfilerEngine/Datadog.Profiler.Native/ClrEventsParser.h index 5cd5fa5b6404..832e0f625ffc 100644 --- a/profiler/src/ProfilerEngine/Datadog.Profiler.Native/ClrEventsParser.h +++ b/profiler/src/ProfilerEngine/Datadog.Profiler.Native/ClrEventsParser.h @@ -19,6 +19,7 @@ #include "IAllocationsListener.h" #include "IGarbageCollectionsListener.h" #include "IGCSuspensionsListener.h" +#include "IGCDumpListener.h" #include "../../../../shared/src/native-src/string.h" #include "assert.h" @@ -50,6 +51,9 @@ const int EVENT_GC_PINOBJECTATGCTIME = 33; const int EVENT_SW_STACK = 82; +// events sent during heap dumps +const int EVENT_GC_BULK_NODE = 18; +const int EVENT_GC_BULK_EDGE = 19; #define LONG_LENGTH 1024 @@ -248,8 +252,40 @@ struct WaitHandleWaitStopPayload // for .NET 9+ { uint16_t ClrInstanceId; // Unique ID for the instance of CLR. }; + +//struct GCBulkNodeValue +//{ +// uintptr_t Address; +// uint64_t Size; +// uint64_t TypeID; +// uint64_t EdgeCount; +//}; +//struct GCBulkNodePayload +//{ +// uint32_t Index; +// uint32_t Count; +// uint16_t ClrInstanceID; +// +// // this is followed by an array of Count GCBulkNodeValue structures +//}; +// +//struct GCBulkEdgeValue +//{ +// uintptr_t Value; +// uint32_t ReferencingFieldID; +//}; +//struct GCBulkEdgePayload +//{ +// uint32_t Index; +// uint32_t Count; +// uint16_t ClrInstanceID; +// +// // this is followed by an array of Count GCBulkEdgeValue structures +//}; + #pragma pack() + class IContentionListener; @@ -286,10 +322,10 @@ class ClrEventsParser ClrEventsParser( IAllocationsListener* pAllocationListener, IContentionListener* pContentionListener, - IGCSuspensionsListener* pGCSuspensionsListener + IGCSuspensionsListener* pGCSuspensionsListener, + IGCDumpListener* pGCDumpListener ); - // the parser is used both for synchronous (ICorProfilerCallback) and // asynchronous (.NET Framework via the Agent) cases. The timestamp parameter // is only valid (different from 0) in the asynchronous scenario. @@ -350,6 +386,7 @@ class ClrEventsParser IContentionListener* _pContentionListener = nullptr; IGCSuspensionsListener* _pGCSuspensionsListener = nullptr; std::vector _pGarbageCollectionsListeners; + IGCDumpListener* _pGCDumpListener = nullptr; template void LogGcEvent(Args const&... args); diff --git a/profiler/src/ProfilerEngine/Datadog.Profiler.Native/CorProfilerCallback.cpp b/profiler/src/ProfilerEngine/Datadog.Profiler.Native/CorProfilerCallback.cpp index d046819fc419..8b4261a2cd07 100644 --- a/profiler/src/ProfilerEngine/Datadog.Profiler.Native/CorProfilerCallback.cpp +++ b/profiler/src/ProfilerEngine/Datadog.Profiler.Native/CorProfilerCallback.cpp @@ -400,7 +400,8 @@ void CorProfilerCallback::InitializeServices() _pAllocationsProvider, _pContentionProvider, _pStopTheWorldProvider, - _pNetworkProvider + _pNetworkProvider, + _pHeapSnapshotManager ); if (_pGarbageCollectionProvider != nullptr) diff --git a/profiler/src/ProfilerEngine/Datadog.Profiler.Native/Datadog.Profiler.Native.vcxproj b/profiler/src/ProfilerEngine/Datadog.Profiler.Native/Datadog.Profiler.Native.vcxproj index 3e19231a5d22..701b155236bf 100644 --- a/profiler/src/ProfilerEngine/Datadog.Profiler.Native/Datadog.Profiler.Native.vcxproj +++ b/profiler/src/ProfilerEngine/Datadog.Profiler.Native/Datadog.Profiler.Native.vcxproj @@ -255,6 +255,7 @@ + diff --git a/profiler/src/ProfilerEngine/Datadog.Profiler.Native/EventPipeEventsManager.cpp b/profiler/src/ProfilerEngine/Datadog.Profiler.Native/EventPipeEventsManager.cpp index f420107fda7c..daf0af88ae74 100644 --- a/profiler/src/ProfilerEngine/Datadog.Profiler.Native/EventPipeEventsManager.cpp +++ b/profiler/src/ProfilerEngine/Datadog.Profiler.Native/EventPipeEventsManager.cpp @@ -6,6 +6,7 @@ #include "EventsParserHelper.h" #include "IAllocationsListener.h" #include "IContentionListener.h" +#include "IGCDumpListener.h" #include "IGCSuspensionsListener.h" #include "INetworkListener.h" #include "OpSysTools.h" @@ -16,14 +17,16 @@ EventPipeEventsManager::EventPipeEventsManager( IAllocationsListener* pAllocationListener, IContentionListener* pContentionListener, IGCSuspensionsListener* pGCSuspensionsListener, - INetworkListener* pNetworkListener) + INetworkListener* pNetworkListener, + IGCDumpListener* pGCDumpListener) : _pCorProfilerInfo{pCorProfilerInfo} { _clrParser = std::make_unique( pAllocationListener, pContentionListener, - pGCSuspensionsListener); + pGCSuspensionsListener, + pGCDumpListener); _bclParser = std::make_unique(pNetworkListener); } diff --git a/profiler/src/ProfilerEngine/Datadog.Profiler.Native/EventPipeEventsManager.h b/profiler/src/ProfilerEngine/Datadog.Profiler.Native/EventPipeEventsManager.h index d63f936724c5..9faf499373ef 100644 --- a/profiler/src/ProfilerEngine/Datadog.Profiler.Native/EventPipeEventsManager.h +++ b/profiler/src/ProfilerEngine/Datadog.Profiler.Native/EventPipeEventsManager.h @@ -27,7 +27,8 @@ class EventPipeEventsManager IAllocationsListener* pAllocationListener, IContentionListener* pContentionListener, IGCSuspensionsListener* pGCSuspensionsListener, - INetworkListener* pNetworkListener); + INetworkListener* pNetworkListener, + IGCDumpListener* pGCDumpListener); void Register(IGarbageCollectionsListener* pGarbageCollectionsListener); void ParseEvent(EVENTPIPE_PROVIDER provider, DWORD eventId, diff --git a/profiler/src/ProfilerEngine/Datadog.Profiler.Native/HeapSnapshotManager.cpp b/profiler/src/ProfilerEngine/Datadog.Profiler.Native/HeapSnapshotManager.cpp index 7acced180447..9f10d5783a9e 100644 --- a/profiler/src/ProfilerEngine/Datadog.Profiler.Native/HeapSnapshotManager.cpp +++ b/profiler/src/ProfilerEngine/Datadog.Profiler.Native/HeapSnapshotManager.cpp @@ -45,6 +45,20 @@ std::string HeapSnapshotManager::GetHeapSnapshotText() return std::string{}; } +void HeapSnapshotManager::OnBulkNodes( + uint32_t Index, + uint32_t Count, + GCBulkNodeValue* pNodes) +{ +} + +void HeapSnapshotManager::OnBulkEdges( + uint32_t Index, + uint32_t Count, + GCBulkEdgeValue* pEdges) +{ +} + void HeapSnapshotManager::OnGarbageCollectionStart( std::chrono::nanoseconds timestamp, int32_t number, @@ -52,9 +66,9 @@ void HeapSnapshotManager::OnGarbageCollectionStart( GCReason reason, GCType type) { - if ((_session != 0) && (_inducedGCNumber != -1)) + // waiting for the first induced foregrouned gen2 collection + if (_isHeapDumpInProgress && (_inducedGCNumber == -1)) { - // waiting for the first induced foregrouned gen2 collection if ((reason == GCReason::Induced) && (generation == 2) && (type == GCType::NonConcurrentGC)) { _inducedGCNumber = number; @@ -76,7 +90,7 @@ void HeapSnapshotManager::OnGarbageCollectionEnd( uint64_t pohSize, uint32_t memPressure) { - if (_session != 0) + if (_isHeapDumpInProgress) { if (number == _inducedGCNumber) { diff --git a/profiler/src/ProfilerEngine/Datadog.Profiler.Native/HeapSnapshotManager.h b/profiler/src/ProfilerEngine/Datadog.Profiler.Native/HeapSnapshotManager.h index ab63d42c3af0..e173a0508990 100644 --- a/profiler/src/ProfilerEngine/Datadog.Profiler.Native/HeapSnapshotManager.h +++ b/profiler/src/ProfilerEngine/Datadog.Profiler.Native/HeapSnapshotManager.h @@ -7,6 +7,7 @@ #include "IConfiguration.h" #include "IFrameStore.h" #include "IGarbageCollectionsListener.h" +#include "IGCDumpListener.h" #include "ServiceBase.h" #include "corprof.h" @@ -34,6 +35,7 @@ class HeapSnapshotManager : public IHeapSnapshotManager, public IGarbageCollectionsListener, + public IGCDumpListener, public ServiceBase { public: @@ -74,6 +76,16 @@ class HeapSnapshotManager uint64_t pohSize, uint32_t memPressure) override; + // inherited via IGCDumpListener + void OnBulkNodes( + uint32_t Index, + uint32_t Count, + GCBulkNodeValue* pNodes) override; + void OnBulkEdges( + uint32_t Index, + uint32_t Count, + GCBulkEdgeValue* pEdges) override; + // Inherited via ServiceBase bool StartImpl() override; bool StopImpl() override; diff --git a/profiler/src/ProfilerEngine/Datadog.Profiler.Native/IGCDumpListener.h b/profiler/src/ProfilerEngine/Datadog.Profiler.Native/IGCDumpListener.h new file mode 100644 index 000000000000..4a52740279c8 --- /dev/null +++ b/profiler/src/ProfilerEngine/Datadog.Profiler.Native/IGCDumpListener.h @@ -0,0 +1,55 @@ +// Unless explicitly stated otherwise all files in this repository are licensed under the Apache 2 License. +// This product includes software developed at Datadog (https://www.datadoghq.com/). Copyright 2022 Datadog, Inc. + +#pragma once + +#pragma pack(1) + +struct GCBulkNodeValue +{ + uintptr_t Address; + uint64_t Size; + uint64_t TypeID; + uint64_t EdgeCount; +}; +struct GCBulkNodePayload +{ + uint32_t Index; + uint32_t Count; + uint16_t ClrInstanceID; + + // this is followed by an array of Count GCBulkNodeValue structures +}; + +struct GCBulkEdgeValue +{ + uintptr_t Value; + uint32_t ReferencingFieldID; +}; +struct GCBulkEdgePayload +{ + uint32_t Index; + uint32_t Count; + uint16_t ClrInstanceID; + + // this is followed by an array of Count GCBulkEdgeValue structures +}; + +#pragma pack() + + +class IGCDumpListener +{ +public: + virtual void OnBulkNodes( + uint32_t Index, + uint32_t Count, + GCBulkNodeValue* pNodes) = 0; + + virtual void OnBulkEdges( + uint32_t Index, + uint32_t Count, + GCBulkEdgeValue* pEdges) = 0; + + virtual ~IGCDumpListener() = default; +}; \ No newline at end of file From b708f3f58f4eef39d51f20df1f8e8e2dd4df00bf Mon Sep 17 00:00:00 2001 From: Christophe Nasarre Date: Fri, 31 Oct 2025 23:36:26 +0100 Subject: [PATCH 4/4] Just for tests with dotnet-gcdump, allow remote trigger --- .../CorProfilerCallback.cpp | 4 + .../HeapSnapshotManager.cpp | 84 +++++++++++++++---- .../HeapSnapshotManager.h | 6 +- 3 files changed, 77 insertions(+), 17 deletions(-) diff --git a/profiler/src/ProfilerEngine/Datadog.Profiler.Native/CorProfilerCallback.cpp b/profiler/src/ProfilerEngine/Datadog.Profiler.Native/CorProfilerCallback.cpp index 726722f59df9..2d1c1681bcb3 100644 --- a/profiler/src/ProfilerEngine/Datadog.Profiler.Native/CorProfilerCallback.cpp +++ b/profiler/src/ProfilerEngine/Datadog.Profiler.Native/CorProfilerCallback.cpp @@ -414,6 +414,10 @@ void CorProfilerCallback::InitializeServices() { _pEventPipeEventsManager->Register(_pLiveObjectsProvider); } + if (_pHeapSnapshotManager != nullptr) + { + _pEventPipeEventsManager->Register(_pHeapSnapshotManager); + } // TODO: register any provider that needs to get notified when GCs start and end } else if ((_pRuntimeInfo->IsDotnetFramework()) && (_pConfiguration->IsEtwEnabled())) diff --git a/profiler/src/ProfilerEngine/Datadog.Profiler.Native/HeapSnapshotManager.cpp b/profiler/src/ProfilerEngine/Datadog.Profiler.Native/HeapSnapshotManager.cpp index 9f10d5783a9e..1772aa48bf16 100644 --- a/profiler/src/ProfilerEngine/Datadog.Profiler.Native/HeapSnapshotManager.cpp +++ b/profiler/src/ProfilerEngine/Datadog.Profiler.Native/HeapSnapshotManager.cpp @@ -42,14 +42,60 @@ bool HeapSnapshotManager::StopImpl() std::string HeapSnapshotManager::GetHeapSnapshotText() { - return std::string{}; + std::stringstream ss; + ss << "[" << std::endl; + int current = 1; + int last = static_cast(_classHistogram.size()); + for (auto& [classID, entry] : _classHistogram) + { + ss << "[\""; + ss << entry.ClassName << "\"," + << entry.InstanceCount << "," + << entry.TotalSize; + ss << "]"; + if (current < last) + { + ss << "," << std::endl; + } + else + { + ss << std::endl; + } + } + ss << "]" << std::endl; + + return ss.str(); } void HeapSnapshotManager::OnBulkNodes( - uint32_t Index, - uint32_t Count, + uint32_t index, + uint32_t count, GCBulkNodeValue* pNodes) { + for (size_t i = 0; i < count; i++) + { + // TODO: we should not be called from different threads so no need to lock + auto entry = _classHistogram.find(pNodes[i].TypeID); + if (entry == _classHistogram.end()) + { + std::string className; + if (_pFrameStore->GetTypeName(static_cast(pNodes[i].TypeID), className)) + { + ClassHistogramEntry histogramEntry(className); + histogramEntry.InstanceCount = 1; + histogramEntry.TotalSize = pNodes[i].Size; + _classHistogram.emplace(pNodes[i].TypeID, histogramEntry); + } + else // should never happen :^( + { + } + } + else + { + entry->second.InstanceCount++; + entry->second.TotalSize += pNodes[i].Size; + } + } } void HeapSnapshotManager::OnBulkEdges( @@ -57,6 +103,7 @@ void HeapSnapshotManager::OnBulkEdges( uint32_t Count, GCBulkEdgeValue* pEdges) { + // TODO: use to rebuild the reference chain } void HeapSnapshotManager::OnGarbageCollectionStart( @@ -67,13 +114,13 @@ void HeapSnapshotManager::OnGarbageCollectionStart( GCType type) { // waiting for the first induced foregrouned gen2 collection - if (_isHeapDumpInProgress && (_inducedGCNumber == -1)) - { + //if (_isHeapDumpInProgress && (_inducedGCNumber == -1)) + //{ if ((reason == GCReason::Induced) && (generation == 2) && (type == GCType::NonConcurrentGC)) { _inducedGCNumber = number; } - } + //} } void HeapSnapshotManager::OnGarbageCollectionEnd( @@ -90,8 +137,9 @@ void HeapSnapshotManager::OnGarbageCollectionEnd( uint64_t pohSize, uint32_t memPressure) { - if (_isHeapDumpInProgress) - { + + //if (_isHeapDumpInProgress) + //{ if (number == _inducedGCNumber) { // the induced GC triggered to generate the heap snapshot has ended @@ -101,7 +149,7 @@ void HeapSnapshotManager::OnGarbageCollectionEnd( // TODO: restart the timer before the next heap snapshot } - } + //} // store sizes for next heap snapshot _gen2Size = gen2Size; @@ -130,7 +178,7 @@ void HeapSnapshotManager::StartGCDump() UINT64 activatedKeywords = 0x900000; // GCHeapDumpKeyword and ManagedHeapCollectKeyword uint32_t verbosity = 5; // verbose verbosity - COR_PRF_EVENTPIPE_PROVIDER_CONFIG providers[1] = + COR_PRF_EVENTPIPE_PROVIDER_CONFIG providers[1] = { COR_PRF_EVENTPIPE_PROVIDER_CONFIG{ WStr("Microsoft-Windows-DotNETRuntime"), @@ -152,10 +200,18 @@ void HeapSnapshotManager::StartGCDump() void HeapSnapshotManager::StopGCDump() { - if (_session == 0) - { - return; - } + //if (_session == 0) + //{ + // return; + //} + +#ifdef NDEBUG + // for debugging purpose only +#else + // dump each entry in _classHistogram + auto content = GetHeapSnapshotText(); + std::cout << content << std::endl; +#endif _pCorProfilerInfo->EventPipeStopSession(_session); _isHeapDumpInProgress = false; diff --git a/profiler/src/ProfilerEngine/Datadog.Profiler.Native/HeapSnapshotManager.h b/profiler/src/ProfilerEngine/Datadog.Profiler.Native/HeapSnapshotManager.h index e173a0508990..d481a67f8f3f 100644 --- a/profiler/src/ProfilerEngine/Datadog.Profiler.Native/HeapSnapshotManager.h +++ b/profiler/src/ProfilerEngine/Datadog.Profiler.Native/HeapSnapshotManager.h @@ -18,7 +18,7 @@ class ClassHistogramEntry { public: ClassHistogramEntry(std::string& className) - : + : InstanceCount(0), TotalSize(0), ClassName(className) @@ -31,8 +31,8 @@ class ClassHistogramEntry uint64_t TotalSize; }; -class HeapSnapshotManager - : +class HeapSnapshotManager + : public IHeapSnapshotManager, public IGarbageCollectionsListener, public IGCDumpListener,