Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 28 additions & 0 deletions DataFormats/GeometrySurface/interface/SOARotation.h
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,34 @@ class SOAFrame {
uz += pz;
}

constexpr inline void toGlobal(T cxx, T cxy, T cyy, T *gl) const {
auto const &r = rot;
gl[0] = r.xx() * (r.xx() * cxx + r.yx() * cxy) + r.yx() * (r.xx() * cxy + r.yx() * cyy);
gl[1] = r.xx() * (r.xy() * cxx + r.yy() * cxy) + r.yx() * (r.xy() * cxy + r.yy() * cyy);
gl[2] = r.xy() * (r.xy() * cxx + r.yy() * cxy) + r.yy() * (r.xy() * cxy + r.yy() * cyy);
gl[3] = r.xx() * (r.xz() * cxx + r.yz() * cxy) + r.yx() * (r.xz() * cxy + r.yz() * cyy);
gl[4] = r.xy() * (r.xz() * cxx + r.yz() * cxy) + r.yy() * (r.xz() * cxy + r.yz() * cyy);
gl[5] = r.xz() * (r.xz() * cxx + r.yz() * cxy) + r.yz() * (r.xz() * cxy + r.yz() * cyy);
}

constexpr inline void toLocal(T const *ge, T &lxx, T &lxy, T &lyy) const {
auto const &r = rot;

T cxx = ge[0];
T cyx = ge[1];
T cyy = ge[2];
T czx = ge[3];
T czy = ge[4];
T czz = ge[5];

lxx = r.xx() * (r.xx() * cxx + r.xy() * cyx + r.xz() * czx) +
r.xy() * (r.xx() * cyx + r.xy() * cyy + r.xz() * czy) + r.xz() * (r.xx() * czx + r.xy() * czy + r.xz() * czz);
lxy = r.yx() * (r.xx() * cxx + r.xy() * cyx + r.xz() * czx) +
r.yy() * (r.xx() * cyx + r.xy() * cyy + r.xz() * czy) + r.yz() * (r.xx() * czx + r.xy() * czy + r.xz() * czz);
lyy = r.yx() * (r.yx() * cxx + r.yy() * cyx + r.yz() * czx) +
r.yy() * (r.yx() * cyx + r.yy() * cyy + r.yz() * czy) + r.yz() * (r.yx() * czx + r.yy() * czy + r.yz() * czz);
}

constexpr inline T x() const { return px; }
constexpr inline T y() const { return py; }
constexpr inline T z() const { return pz; }
Expand Down
13 changes: 13 additions & 0 deletions DataFormats/GeometrySurface/test/BuildFile.xml
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,16 @@
</bin>
<bin file="Bounds_t.cpp">
</bin>

<bin file="gpuFrameTransformKernel.cu, gpuFrameTransformTest.cpp" name="gpuFrameTransformTest">
<use name="cuda"/>
<use name="HeterogeneousCore/CUDAUtilities"/>
<flags CXXFLAGS="-g"/>
</bin>

<bin file="gpuFrameTransformKernel.cu, gpuFrameTransformTest.cpp" name="gpuFrameTransformTestRep">
<use name="cuda"/>
<use name="HeterogeneousCore/CUDAUtilities"/>
<flags CXXFLAGS="-ffp-contract=off"/>
<flags CUDA_FLAGS="-fmad=false -ftz=false -prec-div=true -prec-sqrt=true"/>
</bin>
40 changes: 40 additions & 0 deletions DataFormats/GeometrySurface/test/gpuFrameTransformKernel.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
#include <cstdint>
#include <iostream>
#include <iomanip>

#include "DataFormats/GeometrySurface/interface/SOARotation.h"
#include "HeterogeneousCore/CUDAUtilities/interface/launch.h"

__global__ void toGlobal(SOAFrame<float> const* frame,
float const* xl,
float const* yl,
float* x,
float* y,
float* z,
float const* le,
float* ge,
uint32_t n) {
int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i >= n)
return;

frame[0].toGlobal(xl[i], yl[i], x[i], y[i], z[i]);
frame[0].toGlobal(le[3 * i], le[3 * i + 1], le[3 * i + 2], ge + 6 * i);
}

void toGlobalWrapper(SOAFrame<float> const* frame,
float const* xl,
float const* yl,
float* x,
float* y,
float* z,
float const* le,
float* ge,
uint32_t n) {
int threadsPerBlock = 256;
int blocksPerGrid = (n + threadsPerBlock - 1) / threadsPerBlock;
std::cout << "CUDA toGlobal kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads"
<< std::endl;

cms::cuda::launch(toGlobal, {blocksPerGrid, threadsPerBlock}, frame, xl, yl, x, y, z, le, ge, n);
}
114 changes: 114 additions & 0 deletions DataFormats/GeometrySurface/test/gpuFrameTransformTest.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
#include <algorithm>
#include <cassert>
#include <cmath>
#include <cstdint>
#include <cstdint>
#include <iomanip>
#include <iostream>
#include <memory>
#include <numeric>

#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h"
#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h"
#include "DataFormats/GeometrySurface/interface/GloballyPositioned.h"
#include "DataFormats/GeometrySurface/interface/SOARotation.h"
#include "DataFormats/GeometrySurface/interface/TkRotation.h"
#include "HeterogeneousCore/CUDAUtilities/interface/requireDevices.h"

void toGlobalWrapper(SOAFrame<float> const *frame,
float const *xl,
float const *yl,
float *x,
float *y,
float *z,
float const *le,
float *ge,
uint32_t n);

int main(void) {
cms::cudatest::requireDevices();

typedef float T;
typedef TkRotation<T> Rotation;
typedef SOARotation<T> SRotation;
typedef GloballyPositioned<T> Frame;
typedef SOAFrame<T> SFrame;
typedef typename Frame::PositionType Position;
typedef typename Frame::GlobalVector GlobalVector;
typedef typename Frame::GlobalPoint GlobalPoint;
typedef typename Frame::LocalVector LocalVector;
typedef typename Frame::LocalPoint LocalPoint;

constexpr uint32_t size = 10000;
constexpr uint32_t size32 = size * sizeof(float);

float xl[size], yl[size];
float x[size], y[size], z[size];

// errors
float le[3 * size];
float ge[6 * size];

auto d_xl = cms::cuda::make_device_unique<float[]>(size, nullptr);
auto d_yl = cms::cuda::make_device_unique<float[]>(size, nullptr);

auto d_x = cms::cuda::make_device_unique<float[]>(size, nullptr);
auto d_y = cms::cuda::make_device_unique<float[]>(size, nullptr);
auto d_z = cms::cuda::make_device_unique<float[]>(size, nullptr);

auto d_le = cms::cuda::make_device_unique<float[]>(3 * size, nullptr);
auto d_ge = cms::cuda::make_device_unique<float[]>(6 * size, nullptr);

double a = 0.01;
double ca = std::cos(a);
double sa = std::sin(a);

Rotation r1(ca, sa, 0, -sa, ca, 0, 0, 0, 1);
Frame f1(Position(2, 3, 4), r1);
std::cout << "f1.position() " << f1.position() << std::endl;
std::cout << "f1.rotation() " << '\n' << f1.rotation() << std::endl;

SFrame sf1(f1.position().x(), f1.position().y(), f1.position().z(), f1.rotation());

auto d_sf = cms::cuda::make_device_unique<char[]>(sizeof(SFrame), nullptr);
cudaCheck(cudaMemcpy(d_sf.get(), &sf1, sizeof(SFrame), cudaMemcpyHostToDevice));

for (auto i = 0U; i < size; ++i) {
xl[i] = yl[i] = 0.1f * float(i) - float(size / 2);
le[3 * i] = 0.01f;
le[3 * i + 2] = (i > size / 2) ? 1.f : 0.04f;
le[2 * i + 1] = 0.;
}
std::random_shuffle(xl, xl + size);
std::random_shuffle(yl, yl + size);

cudaCheck(cudaMemcpy(d_xl.get(), xl, size32, cudaMemcpyHostToDevice));
cudaCheck(cudaMemcpy(d_yl.get(), yl, size32, cudaMemcpyHostToDevice));
cudaCheck(cudaMemcpy(d_le.get(), le, 3 * size32, cudaMemcpyHostToDevice));

toGlobalWrapper((SFrame const *)(d_sf.get()),
d_xl.get(),
d_yl.get(),
d_x.get(),
d_y.get(),
d_z.get(),
d_le.get(),
d_ge.get(),
size);
cudaCheck(cudaMemcpy(x, d_x.get(), size32, cudaMemcpyDeviceToHost));
cudaCheck(cudaMemcpy(y, d_y.get(), size32, cudaMemcpyDeviceToHost));
cudaCheck(cudaMemcpy(z, d_z.get(), size32, cudaMemcpyDeviceToHost));
cudaCheck(cudaMemcpy(ge, d_ge.get(), 6 * size32, cudaMemcpyDeviceToHost));

float eps = 0.;
for (auto i = 0U; i < size; ++i) {
auto gp = f1.toGlobal(LocalPoint(xl[i], yl[i]));
eps = std::max(eps, std::abs(x[i] - gp.x()));
eps = std::max(eps, std::abs(y[i] - gp.y()));
eps = std::max(eps, std::abs(z[i] - gp.z()));
}

std::cout << "max eps " << eps << std::endl;

return 0;
}
7 changes: 4 additions & 3 deletions DataFormats/Math/BuildFile.xml
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
<use name="DataFormats/Common"/>
<use name="rootmath"/>
<use name="eigen"/>
<use name="rootmath"/>
<use name="DataFormats/Common"/>

<export>
<lib name="1"/>
<lib name="1"/>
</export>
Loading