Skip to content
Open
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ export CUDA_DEPS := $(CUDA_LIBDIR)/libcudart.so
export CUDA_ARCH := 35 50 60 70
export CUDA_CXXFLAGS := -I$(CUDA_BASE)/include
export CUDA_TEST_CXXFLAGS := -DGPU_DEBUG
export CUDA_LDFLAGS := -L$(CUDA_LIBDIR) -lcudart -lcudadevrt
export CUDA_LDFLAGS := -L$(CUDA_LIBDIR) -lcudart -lcudadevrt -lcudaautotunert
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could this library be added only in src/cudaautotune/Makefile?

export CUDA_NVCC := $(CUDA_BASE)/bin/nvcc
define CUFLAGS_template
$(2)NVCC_FLAGS := $$(foreach ARCH,$(1),-gencode arch=compute_$$(ARCH),code=[sm_$$(ARCH),compute_$$(ARCH)]) -Wno-deprecated-gpu-targets -Xcudafe --diag_suppress=esa_on_defaulted_function_ignored --expt-relaxed-constexpr --expt-extended-lambda --generate-line-info --source-in-ptx --display-error-number --threads $$(words $(1)) --cudart=shared
Expand Down
58 changes: 58 additions & 0 deletions src/cudaautotune/CUDACore/AtomicPairCounter.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
#ifndef HeterogeneousCore_CUDAUtilities_interface_AtomicPairCounter_h
#define HeterogeneousCore_CUDAUtilities_interface_AtomicPairCounter_h

#include <cstdint>

#include "CUDACore/cudaCompat.h"

namespace cms {
namespace cuda {

class AtomicPairCounter {
public:
using c_type = unsigned long long int;

AtomicPairCounter() {}
AtomicPairCounter(c_type i) { counter.ac = i; }

__device__ __host__ AtomicPairCounter& operator=(c_type i) {
counter.ac = i;
return *this;
}

struct Counters {
uint32_t n; // in a "One to Many" association is the number of "One"
uint32_t m; // in a "One to Many" association is the total number of associations
};

union Atomic2 {
Counters counters;
c_type ac;
};

static constexpr c_type incr = 1UL << 32;

__device__ __host__ Counters get() const { return counter.counters; }

// increment n by 1 and m by i. return previous value
__host__ __device__ __forceinline__ Counters add(uint32_t i) {
c_type c = i;
c += incr;
Atomic2 ret;
#ifdef __CUDA_ARCH__
ret.ac = atomicAdd(&counter.ac, c);
#else
ret.ac = counter.ac;
counter.ac += c;
#endif
return ret.counters;
}

private:
Atomic2 counter;
};

} // namespace cuda
} // namespace cms

#endif // HeterogeneousCore_CUDAUtilities_interface_AtomicPairCounter_h
Loading