diff --git a/Configuration/StandardSequences/python/Services_cff.py b/Configuration/StandardSequences/python/Services_cff.py index 0949c7b8d4972..f68e2e399e9e9 100644 --- a/Configuration/StandardSequences/python/Services_cff.py +++ b/Configuration/StandardSequences/python/Services_cff.py @@ -10,7 +10,9 @@ # load CUDA services when the "gpu" or "pixelNtupletFit" modifiers are enabled def _addCUDAServices(process): - process.load("HeterogeneousCore.CUDAServices.CUDAService_cfi") + process.load("HeterogeneousCore.CUDAServices.CUDAService_cfi") + process.load("FWCore.MessageService.MessageLogger_cfi") + process.MessageLogger.CUDAService = cms.untracked.PSet() from Configuration.ProcessModifiers.gpu_cff import gpu from Configuration.ProcessModifiers.pixelNtupletFit_cff import pixelNtupletFit diff --git a/HLTrigger/Configuration/python/customizeHLTforPatatrack.py b/HLTrigger/Configuration/python/customizeHLTforPatatrack.py index 89987dd8320f9..d133b2978f46b 100644 --- a/HLTrigger/Configuration/python/customizeHLTforPatatrack.py +++ b/HLTrigger/Configuration/python/customizeHLTforPatatrack.py @@ -24,8 +24,10 @@ def customiseCommon(process): # Services process.load("HeterogeneousCore.CUDAServices.CUDAService_cfi") + if 'MessageLogger' in process.__dict__: + process.MessageLogger.CUDAService = cms.untracked.PSet() - # NVProfilerService is broken in CMSSW 12.0,x and later + # NVProfilerService is broken in CMSSW 12.0.x and later #process.load("HeterogeneousCore.CUDAServices.NVProfilerService_cfi") diff --git a/HeterogeneousCore/CUDAServices/BuildFile.xml b/HeterogeneousCore/CUDAServices/BuildFile.xml index 5fcaf5e5527b0..a48e1c639eaf3 100644 --- a/HeterogeneousCore/CUDAServices/BuildFile.xml +++ b/HeterogeneousCore/CUDAServices/BuildFile.xml @@ -5,6 +5,7 @@ + diff --git a/HeterogeneousCore/CUDAServices/interface/CUDAService.h b/HeterogeneousCore/CUDAServices/interface/CUDAService.h index 5295af75513b0..d24571b8e48ce 100644 --- a/HeterogeneousCore/CUDAServices/interface/CUDAService.h +++ b/HeterogeneousCore/CUDAServices/interface/CUDAService.h @@ -33,6 +33,7 @@ class CUDAService { int numberOfDevices_ = 0; std::vector> computeCapabilities_; bool enabled_ = false; + bool verbose_ = false; }; #endif diff --git a/HeterogeneousCore/CUDAServices/src/CUDAService.cc b/HeterogeneousCore/CUDAServices/src/CUDAService.cc index d7f194829d159..346c81267ec49 100644 --- a/HeterogeneousCore/CUDAServices/src/CUDAService.cc +++ b/HeterogeneousCore/CUDAServices/src/CUDAService.cc @@ -3,6 +3,8 @@ #include #include +#include +#include #include "FWCore/MessageLogger/interface/MessageLogger.h" #include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" @@ -10,13 +12,14 @@ #include "FWCore/ParameterSet/interface/ParameterSetDescription.h" #include "FWCore/Utilities/interface/ReusableObjectHolder.h" #include "HeterogeneousCore/CUDAServices/interface/CUDAService.h" -#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" #include "HeterogeneousCore/CUDAUtilities/interface/EventCache.h" #include "HeterogeneousCore/CUDAUtilities/interface/StreamCache.h" +#include "HeterogeneousCore/CUDAUtilities/interface/cachingAllocators.h" +#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" +#include "HeterogeneousCore/CUDAUtilities/interface/currentDevice.h" #include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h" #include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h" -#include "HeterogeneousCore/CUDAUtilities/interface/currentDevice.h" -#include "HeterogeneousCore/CUDAUtilities/interface/cachingAllocators.h" +#include "HeterogeneousCore/CUDAUtilities/interface/nvmlCheck.h" void setCudaLimit(cudaLimit limit, const char* name, size_t request) { // read the current device @@ -30,7 +33,7 @@ void setCudaLimit(cudaLimit limit, const char* name, size_t request) { } // read back the limit value size_t value; - cudaCheck(cudaDeviceGetLimit(&value, limit)); + result = cudaDeviceGetLimit(&value, limit); if (cudaSuccess != result) { edm::LogWarning("CUDAService") << "CUDA device " << device << ": failed to set limit \"" << name << "\" to " << request << ", current value is " << value; @@ -77,12 +80,22 @@ constexpr unsigned int getCudaCoresPerSM(unsigned int major, unsigned int minor) case 75: // SM 7.5: TU10x class return 64; + // Ampere architecture + case 80: // SM 8.0: GA100 class + return 64; + case 86: // SM 8.6: GA10x class + return 128; + // unknown architecture, return a default value default: return 64; } } +std::string decodeVersion(int version) { + return std::to_string(version / 1000) + '.' + std::to_string(version % 1000 / 10); +} + namespace { template