Skip to content
Draft
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 35 additions & 0 deletions RecoTracker/LSTCore/interface/DenseLayer.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
#ifndef RecoTracker_LSTCore_interface_DenseLayer_h
#define RecoTracker_LSTCore_interface_DenseLayer_h

#include <array>
#include <cstddef>
#include <cstdint>

/**
* Represents a dense (fully connected) layer with fixed input and output sizes.
*
* IN: Number of input neurons
* OUT: Number of output neurons
*/
template <std::size_t IN, std::size_t OUT>
struct DenseLayer {
/**
* Biases: one float per output neuron.
*/
std::array<float, OUT> biases{};

/**
* Weights: stored as IN rows of OUT columns.
*/
std::array<std::array<float, OUT>, IN> weights{};

/**
* Returns the weight from input neuron index `in` to output neuron index `out`.
*/
float getWeight(std::size_t in, std::size_t out) const { return weights[in][out]; }

static constexpr std::size_t inputSize = IN;
static constexpr std::size_t outputSize = OUT;
};

#endif
140 changes: 140 additions & 0 deletions RecoTracker/LSTCore/interface/Dnn.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
#ifndef RecoTracker_LSTCore_interface_Dnn_h
#define RecoTracker_LSTCore_interface_Dnn_h

#include <tuple>
#include <fstream>
#include <iostream>
#include <stdexcept>
#include <type_traits>
#include <utility>

/**
* A general Dnn class that holds a sequence (tuple) of DenseLayer<T> types,
* each with compile-time fixed dimensions.
*
* Layers: A parameter pack of layer types (e.g. DenseLayer<23,32>, DenseLayer<32,1>, etc.)
*/
template <class... Layers>
class Dnn {
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

perhaps keep everything in lst namespace for now

public:
Dnn() = default;
explicit Dnn(const std::string& filename) { load(filename); }

/**
* Loads biases and weights for each layer in the tuple from a binary file.
*/
void load(const std::string& filename) {
std::ifstream file(filename, std::ios::binary);
if (!file) {
throw std::runtime_error("Failed to open file: " + filename);
}

loadLayers<0>(file);

if (!file.good()) {
throw std::runtime_error("Error reading from file: " + filename);
}
file.close();
}

/**
* Prints the biases and weights of each layer to stdout.
*/
void print() const { printLayers<0>(); }

/**
* A const reference to the underlying tuple of layers.
*/
const std::tuple<Layers...>& getLayers() const { return layers_; }

/**
* A reference to the underlying tuple of layers.
*/
std::tuple<Layers...>& getLayers() { return layers_; }

private:
// Store all layers in a compile-time tuple
std::tuple<Layers...> layers_;

/**
* Internal compile-time recursion for loading each layer from file
*/
template <std::size_t I>
typename std::enable_if<I == sizeof...(Layers), void>::type loadLayers(std::ifstream&) {
// Base case: no more layers to load
}

template <std::size_t I>
typename std::enable_if < I<sizeof...(Layers), void>::type loadLayers(std::ifstream& file) {
auto& layer = std::get<I>(layers_);

// Read and verify header information
uint32_t layer_id, num_inputs, num_outputs;
file.read(reinterpret_cast<char*>(&layer_id), sizeof(layer_id));
file.read(reinterpret_cast<char*>(&num_inputs), sizeof(num_inputs));
file.read(reinterpret_cast<char*>(&num_outputs), sizeof(num_outputs));

// Verify the dimensions match our template parameters
if (num_inputs != layer.inputSize || num_outputs != layer.outputSize) {
throw std::runtime_error("Layer " + std::to_string(I) +
" dimension mismatch: "
"expected " +
std::to_string(layer.inputSize) + "x" + std::to_string(layer.outputSize) + ", got " +
std::to_string(num_inputs) + "x" + std::to_string(num_outputs));
}

// Verify layer index matches
if (layer_id != I + 1) { // Assumes 1-based layer IDs
throw std::runtime_error("Layer index mismatch: expected " + std::to_string(I + 1) + ", got " +
std::to_string(layer_id));
}

// Read biases
file.read(reinterpret_cast<char*>(layer.biases.data()), layer.biases.size() * sizeof(float));

// Read weights row by row
for (auto& row : layer.weights) {
file.read(reinterpret_cast<char*>(row.data()), row.size() * sizeof(float));
}

if (!file.good()) {
throw std::runtime_error("Failed to read parameters for layer " + std::to_string(I));
}

// Recurse to next layer
loadLayers<I + 1>(file);
}

/**
* Internal compile-time recursion for printing each layer
*/
template <std::size_t I>
typename std::enable_if<I == sizeof...(Layers), void>::type printLayers() const {
// Base case: no more layers to print
}

template <std::size_t I>
typename std::enable_if < I<sizeof...(Layers), void>::type printLayers() const {
const auto& layer = std::get<I>(layers_);
std::cout << "\n=== Layer " << I + 1 << " ===\nInputs=" << layer.inputSize << ", Outputs=" << layer.outputSize
<< "\n\nBiases:\n";

for (float b : layer.biases) {
std::cout << b << " ";
}
std::cout << "\n\nWeights:\n";

for (std::size_t in = 0; in < layer.inputSize; ++in) {
std::cout << " [ ";
for (std::size_t out = 0; out < layer.outputSize; ++out) {
std::cout << layer.getWeight(in, out) << " ";
}
std::cout << "]\n";
}

// Recurse to next layer
printLayers<I + 1>();
}
};

#endif
19 changes: 19 additions & 0 deletions RecoTracker/LSTCore/interface/DnnWeightsDevSoA.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
#ifndef RecoTracker_LSTCore_interface_DnnWeightsDevSoA_h
#define RecoTracker_LSTCore_interface_DnnWeightsDevSoA_h

#include "RecoTracker/LSTCore/interface/DenseLayer.h"

namespace lst {

/**
* Data structure holding multiple dense layers for the DNN weights.
*/
struct DnnWeightsDevData {
DenseLayer<23, 32> layer1;
DenseLayer<32, 32> layer2;
DenseLayer<32, 1> layer3;
Comment on lines +12 to +14
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

don't we have named constants for these 23 and 32?
... also, why DevData ? This doesn't look like a device-specific type

};

} // namespace lst

#endif // RecoTracker_LSTCore_interface_DnnWeightsDevSoA_h
21 changes: 17 additions & 4 deletions RecoTracker/LSTCore/interface/LSTESData.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
#include "RecoTracker/LSTCore/interface/ModulesHostCollection.h"
#include "RecoTracker/LSTCore/interface/PixelMap.h"

#include "RecoTracker/LSTCore/interface/DnnWeightsDevSoA.h"
#include "DataFormats/Portable/interface/PortableObject.h"
#include "HeterogeneousCore/AlpakaInterface/interface/CopyToDevice.h"

#include <memory>
Expand All @@ -23,21 +25,25 @@ namespace lst {
std::shared_ptr<const PortableCollection<EndcapGeometryDevSoA, TDev>> endcapGeometry;
// Host-side object that is shared between the LSTESData<TDev> objects for different devices
std::shared_ptr<const PixelMap> pixelMapping;

// ==== New DNN weights pointer ====
std::shared_ptr<const PortableObject<lst::DnnWeightsDevData, TDev>> dnnWeights;
LSTESData(uint16_t const& nModulesIn,
uint16_t const& nLowerModulesIn,
unsigned int const& nPixelsIn,
unsigned int const& nEndCapMapIn,
std::shared_ptr<const PortableMultiCollection<TDev, ModulesSoA, ModulesPixelSoA>> modulesIn,
std::shared_ptr<const PortableCollection<EndcapGeometryDevSoA, TDev>> endcapGeometryIn,
std::shared_ptr<const PixelMap> const& pixelMappingIn)
std::shared_ptr<const PixelMap> const& pixelMappingIn,
// New constructor argument for DNN
std::shared_ptr<const PortableObject<lst::DnnWeightsDevData, TDev>> dnnWeightsIn)
: nModules(nModulesIn),
nLowerModules(nLowerModulesIn),
nPixels(nPixelsIn),
nEndCapMap(nEndCapMapIn),
modules(std::move(modulesIn)),
endcapGeometry(std::move(endcapGeometryIn)),
pixelMapping(pixelMappingIn) {}
pixelMapping(pixelMappingIn),
dnnWeights(std::move(dnnWeightsIn)) {}
};

std::unique_ptr<LSTESData<alpaka_common::DevHost>> loadAndFillESHost(std::string& ptCutLabel);
Expand All @@ -54,16 +60,22 @@ namespace cms::alpakatools {
using TDev = alpaka::Dev<TQueue>;
std::shared_ptr<const PortableMultiCollection<TDev, lst::ModulesSoA, lst::ModulesPixelSoA>> deviceModules;
std::shared_ptr<const PortableCollection<lst::EndcapGeometryDevSoA, TDev>> deviceEndcapGeometry;
// === New pointer for the copied DNN weights ===
std::shared_ptr<const PortableObject<lst::DnnWeightsDevData, TDev>> deviceDnnWeights;

if constexpr (std::is_same_v<TDev, alpaka_common::DevHost>) {
deviceModules = srcData.modules;
deviceEndcapGeometry = srcData.endcapGeometry;
deviceDnnWeights = srcData.dnnWeights;
} else {
deviceModules = std::make_shared<PortableMultiCollection<TDev, lst::ModulesSoA, lst::ModulesPixelSoA>>(
CopyToDevice<PortableHostMultiCollection<lst::ModulesSoA, lst::ModulesPixelSoA>>::copyAsync(
queue, *srcData.modules));
deviceEndcapGeometry = std::make_shared<PortableCollection<lst::EndcapGeometryDevSoA, TDev>>(
CopyToDevice<PortableHostCollection<lst::EndcapGeometryDevSoA>>::copyAsync(queue, *srcData.endcapGeometry));
// Copy the DNN weights to device
deviceDnnWeights = std::make_shared<PortableObject<lst::DnnWeightsDevData, TDev>>(
CopyToDevice<PortableHostObject<lst::DnnWeightsDevData>>::copyAsync(queue, *srcData.dnnWeights));
}

return lst::LSTESData<alpaka::Dev<TQueue>>(srcData.nModules,
Expand All @@ -72,7 +84,8 @@ namespace cms::alpakatools {
srcData.nEndCapMap,
std::move(deviceModules),
std::move(deviceEndcapGeometry),
srcData.pixelMapping);
srcData.pixelMapping,
std::move(deviceDnnWeights));
}
};
} // namespace cms::alpakatools
Expand Down
24 changes: 22 additions & 2 deletions RecoTracker/LSTCore/src/LSTESData.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@
#include "RecoTracker/LSTCore/interface/ModuleConnectionMap.h"
#include "RecoTracker/LSTCore/interface/TiltedGeometry.h"
#include "RecoTracker/LSTCore/interface/PixelMap.h"

#include "RecoTracker/LSTCore/interface/Dnn.h"
#include "RecoTracker/LSTCore/interface/DenseLayer.h"
#include "RecoTracker/LSTCore/interface/DnnWeightsDevSoA.h"
#include "ModuleMethods.h"

#include <filesystem>
Expand Down Expand Up @@ -111,11 +113,29 @@ std::unique_ptr<lst::LSTESData<alpaka_common::DevHost>> lst::loadAndFillESHost(s
tiltedGeometry,
moduleConnectionMap);
auto pixelMappingPtr = std::make_shared<PixelMap>(std::move(pixelMapping));

// === Load from the DNN instance ===
auto model = Dnn<DenseLayer<23, 32>, DenseLayer<32, 32>, DenseLayer<32, 1>>("../standalone/analysis/DNN/network_weights.bin");

// Copy the loaded model into a host DnnWeightsDevData struct
lst::DnnWeightsDevData hostDnn;
{
auto const& layers = model.getLayers();
hostDnn.layer1 = std::get<0>(layers);
hostDnn.layer2 = std::get<1>(layers);
hostDnn.layer3 = std::get<2>(layers);
}

// Wrap it in a PortableHostObject so it can be copied to device
auto hostDnnWeights = std::make_shared<PortableHostObject<lst::DnnWeightsDevData>>(cms::alpakatools::host());
hostDnnWeights->value() = hostDnn;

return std::make_unique<LSTESData<alpaka_common::DevHost>>(nModules,
nLowerModules,
nPixels,
endcapGeometry.nEndCapMap,
std::move(modulesBuffers),
std::move(endcapGeometryDev),
pixelMappingPtr);
pixelMappingPtr,
hostDnnWeights);
}
1 change: 1 addition & 0 deletions RecoTracker/LSTCore/src/alpaka/LSTEvent.dev.cc
Original file line number Diff line number Diff line change
Expand Up @@ -914,6 +914,7 @@ void LSTEvent::createQuintuplets() {
quintupletsDC_->view<QuintupletsOccupancySoA>(),
rangesDC_->const_view(),
nEligibleT5Modules,
dnnWeights_.data(),
ptCut_);

Vec3D const threadsPerBlockDupQuint{1, 16, 16};
Expand Down
4 changes: 4 additions & 0 deletions RecoTracker/LSTCore/src/alpaka/LSTEvent.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@
#include "RecoTracker/LSTCore/interface/alpaka/ModulesDeviceCollection.h"
#include "RecoTracker/LSTCore/interface/alpaka/ObjectRangesDeviceCollection.h"
#include "RecoTracker/LSTCore/interface/alpaka/EndcapGeometryDevDeviceCollection.h"
#include "RecoTracker/LSTCore/interface/DnnWeightsDevSoA.h"
#include "DataFormats/Portable/interface/PortableObject.h"

#include "Hit.h"
#include "Kernels.h"
Expand Down Expand Up @@ -78,6 +80,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
ModulesDeviceCollection const& modules_;
PixelMap const& pixelMapping_;
EndcapGeometryDevDeviceCollection const& endcapGeometry_;
PortableObject<lst::DnnWeightsDevData, Device> const& dnnWeights_;
bool addObjects_;

public:
Expand All @@ -92,6 +95,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
modules_(*deviceESData->modules),
pixelMapping_(*deviceESData->pixelMapping),
endcapGeometry_(*deviceESData->endcapGeometry),
dnnWeights_(*deviceESData->dnnWeights),
addObjects_(verbose) {
if (pt_cut < 0.6f) {
throw std::invalid_argument("Minimum pT cut must be at least 0.6 GeV. Provided value: " +
Expand Down
17 changes: 10 additions & 7 deletions RecoTracker/LSTCore/src/alpaka/NeuralNetwork.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include "RecoTracker/LSTCore/interface/MiniDoubletsSoA.h"

#include "NeuralNetworkWeights.h"
#include "RecoTracker/LSTCore/interface/DnnWeightsDevSoA.h"

namespace ALPAKA_ACCELERATOR_NAMESPACE::lst::t5dnn {

Expand All @@ -24,10 +25,11 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst::t5dnn {
}

template <int IN_FEATURES, int OUT_FEATURES>
ALPAKA_FN_ACC ALPAKA_FN_INLINE void linear_layer(const float (&input)[IN_FEATURES],
float (&output)[OUT_FEATURES],
const float (&weights)[IN_FEATURES][OUT_FEATURES],
const float (&biases)[OUT_FEATURES]) {
ALPAKA_FN_ACC ALPAKA_FN_INLINE void linear_layer(
const float (&input)[IN_FEATURES],
float (&output)[OUT_FEATURES],
const std::array<std::array<float, OUT_FEATURES>, IN_FEATURES>& weights,
Copy link
Member Author

@GNiendorf GNiendorf Jan 7, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Line 31 here, changing the weight arrays to these 2d std array's, is what causes most of the timing increase on GPU.

Without changing this line (but keeping all other changes, including the bias change).
Screenshot 2025-01-07 at 5 55 33 PM

After changing this line (and passing in the 2d std arrays from the dnnPtr below).
Screenshot 2025-01-07 at 4 51 14 PM

const std::array<float, OUT_FEATURES>& biases) {
CMS_UNROLL_LOOP
for (unsigned int i = 0; i < OUT_FEATURES; ++i) {
output[i] = biases[i];
Expand All @@ -52,6 +54,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst::t5dnn {

template <typename TAcc>
ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runInference(TAcc const& acc,
lst::DnnWeightsDevData const* dnnPtr,
MiniDoubletsConst mds,
const unsigned int mdIndex1,
const unsigned int mdIndex2,
Expand Down Expand Up @@ -126,15 +129,15 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst::t5dnn {
float x_3[1]; // Layer 3 linear output

// Layer 1: Linear + Relu
linear_layer<kinputFeatures, khiddenFeatures>(x, x_1, wgtT_layer1, bias_layer1);
linear_layer<kinputFeatures, khiddenFeatures>(x, x_1, dnnPtr->layer1.weights, dnnPtr->layer1.biases);
relu_activation<khiddenFeatures>(x_1);

// Layer 2: Linear + Relu
linear_layer<khiddenFeatures, khiddenFeatures>(x_1, x_2, wgtT_layer2, bias_layer2);
linear_layer<khiddenFeatures, khiddenFeatures>(x_1, x_2, dnnPtr->layer2.weights, dnnPtr->layer2.biases);
relu_activation<khiddenFeatures>(x_2);

// Layer 3: Linear + Sigmoid
linear_layer<khiddenFeatures, 1>(x_2, x_3, wgtT_output_layer, bias_output_layer);
linear_layer<khiddenFeatures, 1>(x_2, x_3, dnnPtr->layer3.weights, dnnPtr->layer3.biases);
float x_5 = sigmoid_activation(acc, x_3[0]);

// Get the bin index based on abs(eta) of first hit and t5_pt
Expand Down
Loading