Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ export CUDA_DEPS := $(CUDA_LIBDIR)/libcudart.so
export CUDA_ARCH := 35 50 60 70
export CUDA_CXXFLAGS := -I$(CUDA_BASE)/include
export CUDA_TEST_CXXFLAGS := -DGPU_DEBUG
export CUDA_LDFLAGS := -L$(CUDA_LIBDIR) -lcudart -lcudadevrt
export CUDA_LDFLAGS := -L$(CUDA_LIBDIR) -lcudart -lcudadevrt
export CUDA_NVCC := $(CUDA_BASE)/bin/nvcc
define CUFLAGS_template
$(2)NVCC_FLAGS := $$(foreach ARCH,$(1),-gencode arch=compute_$$(ARCH),code=[sm_$$(ARCH),compute_$$(ARCH)]) -Wno-deprecated-gpu-targets -Xcudafe --diag_suppress=esa_on_defaulted_function_ignored --expt-relaxed-constexpr --expt-extended-lambda --generate-line-info --source-in-ptx --display-error-number --threads $$(words $(1)) --cudart=shared
Expand Down
58 changes: 58 additions & 0 deletions src/cudaautotune/CUDACore/AtomicPairCounter.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
#ifndef HeterogeneousCore_CUDAUtilities_interface_AtomicPairCounter_h
#define HeterogeneousCore_CUDAUtilities_interface_AtomicPairCounter_h

#include <cstdint>

#include "CUDACore/cudaCompat.h"

namespace cms {
namespace cuda {

class AtomicPairCounter {
public:
using c_type = unsigned long long int;

AtomicPairCounter() {}
AtomicPairCounter(c_type i) { counter.ac = i; }

__device__ __host__ AtomicPairCounter& operator=(c_type i) {
counter.ac = i;
return *this;
}

struct Counters {
uint32_t n; // in a "One to Many" association is the number of "One"
uint32_t m; // in a "One to Many" association is the total number of associations
};

union Atomic2 {
Counters counters;
c_type ac;
};

static constexpr c_type incr = 1UL << 32;

__device__ __host__ Counters get() const { return counter.counters; }

// increment n by 1 and m by i. return previous value
__host__ __device__ __forceinline__ Counters add(uint32_t i) {
c_type c = i;
c += incr;
Atomic2 ret;
#ifdef __CUDA_ARCH__
ret.ac = atomicAdd(&counter.ac, c);
#else
ret.ac = counter.ac;
counter.ac += c;
#endif
return ret.counters;
}

private:
Atomic2 counter;
};

} // namespace cuda
} // namespace cms

#endif // HeterogeneousCore_CUDAUtilities_interface_AtomicPairCounter_h
Loading