diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index 65af9582..044e35c5 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -21,6 +21,7 @@ add_subdirectory(Example16) add_subdirectory(Example17) add_subdirectory(Example18) add_subdirectory(Example19) +add_subdirectory(Example20) add_subdirectory(TestEm3) add_subdirectory(TestEm3MT) add_subdirectory(TestEm3Compact) diff --git a/examples/Example20/CMakeLists.txt b/examples/Example20/CMakeLists.txt new file mode 100644 index 00000000..8db86260 --- /dev/null +++ b/examples/Example20/CMakeLists.txt @@ -0,0 +1,47 @@ +# SPDX-FileCopyrightText: 2022 CERN +# SPDX-License-Identifier: Apache-2.0 + +set(TargetName example20) + +if(NOT TARGET G4HepEm::g4HepEm) + message(STATUS "Disabling ${TargetName} (needs G4HepEm)") + return() +endif() + +if(Geant4_FOUND) + if(NOT Geant4_gdml_FOUND) + message(STATUS "Disabling ${TargetName} (needs Geant4 with GDML support)") + return() + endif() +else() + message(STATUS "Disabling ${TargetName} (needs Geant4)") + return() +endif() + +add_executable(${TargetName} + main.cpp + main.cu + electrons.cu + gammas.cu) +target_link_libraries(${TargetName} + PRIVATE + AdePT + CopCore::CopCore + VecGeom::vecgeom + VecGeom::vecgeomcuda_static + VecGeom::vgdml + ${Geant4_LIBRARIES} + G4HepEm::g4HepEmData + G4HepEm::g4HepEmInit + G4HepEm::g4HepEmRun + CUDA::cudart) + +set_target_properties(${TargetName} PROPERTIES CUDA_SEPARABLE_COMPILATION ON CUDA_RESOLVE_DEVICE_SYMBOLS ON) + +# NVTX +target_link_libraries(${TargetName} PRIVATE NVTX) + +# Tests +add_test(NAME ${TargetName} + COMMAND $ -gdml_file ${CMAKE_BINARY_DIR}/cms2018.gdml) + diff --git a/examples/Example20/README.md b/examples/Example20/README.md new file mode 100644 index 00000000..891d39b0 --- /dev/null +++ b/examples/Example20/README.md @@ -0,0 +1,61 @@ + + +## Example 20 + +Example based on Example 19, but smaller kernels are fed with queues instead of the `SOAData` structure. + + * arbitrary geometry via gdml file (tested with cms2018.gdml from VecGeom persistency/gdml/gdmls folder) and optionally a magnetic field with constant Bz, + * geometry read as Geant4 geometry, reading in regions and cuts, to initialize G4HepEm data + * geometry read then into VecGeom, and synchronized to GPU + * G4HepEm material-cuts couple indices mapped to VecGeom logical volume id's + * physics processes for e-/e+ (including MSC) and gammas using G4HepEm. + * scoring per placed volume, no sensitive detector feature + * configurable particle gun via command line arguments + * E.g., use `-gunpos -220,0,0 -gundir 1,0,0` for `testEm3.gdml`. For `cms2018.gdml`, the default gun is correct. + +Electrons, positrons, and gammas are stored in separate containers in device memory. +Free positions in the storage are handed out with monotonic slot numbers, slots are not reused. +Active tracks are passed via three queues per particle type (see `struct ParticleQueues`). +Results are reproducible using one RANLUX++ state per track. + +Additionally, the kernels score energy deposit and the charged track length per volume. + +### Kernels + +This example uses one stream per particle type to launch kernels asynchronously. +They are synchronized via a fourth stream using CUDA events. + +#### `TransportElectrons` + +1. Obtain safety unless the track is currently on a boundary. +2. Determine physics step limit, including conversion to geometric step length according to MSC. +3. Query geometry (or optionally magnetic field) to get geometry step length. +4. Convert geometry to true step length according to MSC, apply net direction change and displacement. +5. Apply continuous effects; kill track if stopped. +6. If the particle reaches a boundary, perform relocation. +7. If not, and if there is a discrete process, hand over to interaction kernel. + +#### `TransportGammas` + +1. Determine the physics step limit. +2. Query VecGeom to get geometry step length (no magnetic field for neutral particles!). +3. If the particle reaches a boundary, perform relocation. +4. If not, and if there is a discrete process, hand over to interaction kernel. + +#### Interaction kernels +In electrons.cu and gammas.cu, there is dedicated kernels for all discrete processes. These were +extracted from the main transport kernels in example18. +1. Sample the final state. +2. Update the primary and produce secondaries. + +#### `FinishIteration` + +Clear the queues and return the tracks in flight. +This kernel runs after all secondary particles were produced. + +#### `InitPrimaries` and `InitParticleQueues` + +Used to initialize multiple primary particles with separate seeds. diff --git a/examples/Example20/electrons.cu b/examples/Example20/electrons.cu new file mode 100644 index 00000000..6dd1a44b --- /dev/null +++ b/examples/Example20/electrons.cu @@ -0,0 +1,444 @@ +// SPDX-FileCopyrightText: 2022 CERN +// SPDX-License-Identifier: Apache-2.0 + +#include "example.cuh" + +#include +#include + +#include + +#define NOFLUCTUATION + +#include +#include +#include +#include +#include +#include +// Pull in implementation. +#include +#include +#include +#include +#include +#include +#include + +// Compute the physics and geometry step limit, transport the electrons while +// applying the continuous effects and maybe a discrete process that could +// generate secondaries. +template +static __device__ __forceinline__ void TransportElectrons(Track *electrons, const adept::MParray *active, + Secondaries &secondaries, adept::MParray *activeQueue, + Interactions interactions, GlobalScoring *globalScoring, + ScoringPerVolume *scoringPerVolume) +{ +#ifdef VECGEOM_FLOAT_PRECISION + const Precision kPush = 10 * vecgeom::kTolerance; +#else + const Precision kPush = 0.; +#endif + constexpr int Charge = IsElectron ? -1 : 1; + constexpr double Mass = copcore::units::kElectronMassC2; + fieldPropagatorConstBz fieldPropagatorBz(BzFieldValue); + + int activeSize = active->size(); + for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < activeSize; i += blockDim.x * gridDim.x) { + const int globalSlot = (*active)[i]; + Track ¤tTrack = electrons[globalSlot]; + auto energy = currentTrack.energy; + auto pos = currentTrack.pos; + auto dir = currentTrack.dir; + auto navState = currentTrack.navState; + const auto volume = navState.Top(); + const int volumeID = volume->id(); + // the MCC vector is indexed by the logical volume id + const int lvolID = volume->GetLogicalVolume()->id(); + const int theMCIndex = MCIndex[lvolID]; + + auto survive = [&](bool push = true) { + currentTrack.energy = energy; + currentTrack.pos = pos; + currentTrack.dir = dir; + currentTrack.navState = navState; + if (push) activeQueue->push_back(globalSlot); + }; + + // Init a track with the needed data to call into G4HepEm. + G4HepEmElectronTrack elTrack; + G4HepEmTrack *theTrack = elTrack.GetTrack(); + theTrack->SetEKin(energy); + theTrack->SetMCIndex(theMCIndex); + theTrack->SetOnBoundary(navState.IsOnBoundary()); + theTrack->SetCharge(Charge); + G4HepEmMSCTrackData *mscData = elTrack.GetMSCTrackData(); + mscData->fIsFirstStep = currentTrack.initialRange < 0; + mscData->fInitialRange = currentTrack.initialRange; + mscData->fDynamicRangeFactor = currentTrack.dynamicRangeFactor; + mscData->fTlimitMin = currentTrack.tlimitMin; + + // Prepare a branched RNG state while threads are synchronized. Even if not + // used, this provides a fresh round of random numbers and reduces thread + // divergence because the RNG state doesn't need to be advanced later. + RanluxppDouble newRNG(currentTrack.rngState.BranchNoAdvance()); + + // Compute safety, needed for MSC step limit. + double safety = 0; + if (!navState.IsOnBoundary()) { + safety = BVHNavigator::ComputeSafety(pos, navState); + } + theTrack->SetSafety(safety); + + G4HepEmRandomEngine rnge(¤tTrack.rngState); + + // Sample the `number-of-interaction-left` and put it into the track. + for (int ip = 0; ip < 3; ++ip) { + double numIALeft = currentTrack.numIALeft[ip]; + if (numIALeft <= 0) { + numIALeft = -std::log(currentTrack.Uniform()); + } + theTrack->SetNumIALeft(numIALeft, ip); + } + + // Call G4HepEm to compute the physics step limit. + // G4HepEmElectronManager::HowFar(&g4HepEmData, &g4HepEmPars, &elTrack, &rnge); + G4HepEmElectronManager::HowFarToDiscreteInteraction(&g4HepEmData, &g4HepEmPars, &elTrack); + + bool restrictedPhysicalStepLength = false; + if (BzFieldValue != 0) { + const double momentumMag = sqrt(energy * (energy + 2.0 * Mass)); + // Distance along the track direction to reach the maximum allowed error + const double safeLength = fieldPropagatorBz.ComputeSafeLength(momentumMag, Charge, dir); + + constexpr int MaxSafeLength = 10; + double limit = MaxSafeLength * safeLength; + limit = safety > limit ? safety : limit; + + double physicalStepLength = elTrack.GetPStepLength(); + if (physicalStepLength > limit) { + physicalStepLength = limit; + restrictedPhysicalStepLength = true; + elTrack.SetPStepLength(physicalStepLength); + // Note: We are limiting the true step length, which is converted to + // a shorter geometry step length in HowFarToMSC. In that sense, the + // limit is an over-approximation, but that is fine for our purpose. + } + } + + G4HepEmElectronManager::HowFarToMSC(&g4HepEmData, &g4HepEmPars, &elTrack, &rnge); + + // Remember MSC values for the next step(s). + currentTrack.initialRange = mscData->fInitialRange; + currentTrack.dynamicRangeFactor = mscData->fDynamicRangeFactor; + currentTrack.tlimitMin = mscData->fTlimitMin; + + // Get result into variables. + double geometricalStepLengthFromPhysics = theTrack->GetGStepLength(); + // The phyiscal step length is the amount that the particle experiences + // which might be longer than the geometrical step length due to MSC. As + // long as we call PerformContinuous in the same kernel we don't need to + // care, but we need to make this available when splitting the operations. + // double physicalStepLength = elTrack.GetPStepLength(); + int winnerProcessIndex = theTrack->GetWinnerProcessIndex(); + // Leave the range and MFP inside the G4HepEmTrack. If we split kernels, we + // also need to carry them over! + + // Check if there's a volume boundary in between. + bool propagated = true; + double geometryStepLength; + vecgeom::NavStateIndex nextState; + if (BzFieldValue != 0) { + geometryStepLength = fieldPropagatorBz.ComputeStepAndNextVolume( + energy, Mass, Charge, geometricalStepLengthFromPhysics, pos, dir, navState, nextState, propagated, safety); + } else { + geometryStepLength = BVHNavigator::ComputeStepAndNextVolume(pos, dir, geometricalStepLengthFromPhysics, navState, + nextState, kPush); + pos += geometryStepLength * dir; + } + + // Set boundary state in navState so the next step and secondaries get the + // correct information (navState = nextState only if relocated + // in case of a boundary; see below) + navState.SetBoundaryState(nextState.IsOnBoundary()); + + // Propagate information from geometrical step to MSC. + theTrack->SetDirection(dir.x(), dir.y(), dir.z()); + theTrack->SetGStepLength(geometryStepLength); + theTrack->SetOnBoundary(nextState.IsOnBoundary()); + + // Apply continuous effects. + bool stopped = G4HepEmElectronManager::PerformContinuous(&g4HepEmData, &g4HepEmPars, &elTrack, &rnge); + + // Collect the direction change and displacement by MSC. + const double *direction = theTrack->GetDirection(); + dir.Set(direction[0], direction[1], direction[2]); + if (!nextState.IsOnBoundary()) { + const double *mscDisplacement = mscData->GetDisplacement(); + vecgeom::Vector3D displacement(mscDisplacement[0], mscDisplacement[1], mscDisplacement[2]); + const double dLength2 = displacement.Length2(); + constexpr double kGeomMinLength = 5 * copcore::units::nm; // 0.05 [nm] + constexpr double kGeomMinLength2 = kGeomMinLength * kGeomMinLength; // (0.05 [nm])^2 + if (dLength2 > kGeomMinLength2) { + const double dispR = std::sqrt(dLength2); + // Estimate safety by subtracting the geometrical step length. + safety -= geometryStepLength; + constexpr double sFact = 0.99; + double reducedSafety = sFact * safety; + + // Apply displacement, depending on how close we are to a boundary. + // 1a. Far away from geometry boundary: + if (reducedSafety > 0.0 && dispR <= reducedSafety) { + pos += displacement; + } else { + // Recompute safety. + safety = BVHNavigator::ComputeSafety(pos, navState); + reducedSafety = sFact * safety; + + // 1b. Far away from geometry boundary: + if (reducedSafety > 0.0 && dispR <= reducedSafety) { + pos += displacement; + // 2. Push to boundary: + } else if (reducedSafety > kGeomMinLength) { + pos += displacement * (reducedSafety / dispR); + } + // 3. Very small safety: do nothing. + } + } + } + + // Collect the charged step length (might be changed by MSC). + atomicAdd(&globalScoring->chargedSteps, 1); + atomicAdd(&scoringPerVolume->chargedTrackLength[volumeID], elTrack.GetPStepLength()); + + // Collect the changes in energy and deposit. + energy = theTrack->GetEKin(); + double energyDeposit = theTrack->GetEnergyDeposit(); + atomicAdd(&globalScoring->energyDeposit, energyDeposit); + atomicAdd(&scoringPerVolume->energyDeposit[volumeID], energyDeposit); + + // Save the `number-of-interaction-left` in our track. + for (int ip = 0; ip < 3; ++ip) { + double numIALeft = theTrack->GetNumIALeft(ip); + currentTrack.numIALeft[ip] = numIALeft; + } + + if (stopped) { + if (!IsElectron) { + // Annihilate the stopped positron into two gammas heading to opposite + // directions (isotropic). + Track &gamma1 = secondaries.gammas.NextTrack(); + Track &gamma2 = secondaries.gammas.NextTrack(); + atomicAdd(&globalScoring->numGammas, 2); + + const double cost = 2 * currentTrack.Uniform() - 1; + const double sint = sqrt(1 - cost * cost); + const double phi = k2Pi * currentTrack.Uniform(); + double sinPhi, cosPhi; + sincos(phi, &sinPhi, &cosPhi); + + gamma1.InitAsSecondary(pos, navState); + newRNG.Advance(); + gamma1.rngState = newRNG; + gamma1.energy = copcore::units::kElectronMassC2; + gamma1.dir.Set(sint * cosPhi, sint * sinPhi, cost); + + gamma2.InitAsSecondary(pos, navState); + // Reuse the RNG state of the dying track. + gamma2.rngState = currentTrack.rngState; + gamma2.energy = copcore::units::kElectronMassC2; + gamma2.dir = -gamma1.dir; + } + // Particles are killed by not enqueuing them into the new activeQueue. + continue; + } + + if (nextState.IsOnBoundary()) { + // For now, just count that we hit something. + atomicAdd(&globalScoring->hits, 1); + + // Kill the particle if it left the world. + if (nextState.Top() != nullptr) { + BVHNavigator::RelocateToNextVolume(pos, dir, nextState); + + // Move to the next boundary. + navState = nextState; + survive(); + } + continue; + } else if (!propagated || restrictedPhysicalStepLength) { + // Did not yet reach the interaction point due to error in the magnetic + // field propagation. Try again next time. + survive(); + continue; + } else if (winnerProcessIndex < 0) { + // No discrete process, move on. + survive(); + continue; + } + + // Reset number of interaction left for the winner discrete process. + // (Will be resampled in the next iteration.) + currentTrack.numIALeft[winnerProcessIndex] = -1.0; + + // Check if a delta interaction happens instead of the real discrete process. + if (G4HepEmElectronManager::CheckDelta(&g4HepEmData, theTrack, currentTrack.Uniform())) { + // A delta interaction happened, move on. + survive(); + continue; + } + + interactions.queues[winnerProcessIndex]->push_back(globalSlot); + + survive(false); + } +} + +// Instantiate kernels for electrons and positrons. +__global__ void TransportElectrons(Track *electrons, const adept::MParray *active, Secondaries secondaries, + adept::MParray *activeQueue, Interactions interactions, GlobalScoring *globalScoring, + ScoringPerVolume *scoringPerVolume) +{ + TransportElectrons(electrons, active, secondaries, activeQueue, interactions, globalScoring, + scoringPerVolume); +} +__global__ void TransportPositrons(Track *positrons, const adept::MParray *active, Secondaries secondaries, + adept::MParray *activeQueue, Interactions interactions, GlobalScoring *globalScoring, + ScoringPerVolume *scoringPerVolume) +{ + TransportElectrons(positrons, active, secondaries, activeQueue, interactions, globalScoring, + scoringPerVolume); +} + +template +__device__ void ElectronInteraction(Track *particles, const adept::MParray *active, Secondaries secondaries, + adept::MParray *activeQueue, GlobalScoring *globalScoring, + ScoringPerVolume *scoringPerVolume) +{ + int activeSize = active->size(); + for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < activeSize; i += blockDim.x * gridDim.x) { + const int globalSlot = (*active)[i]; + Track ¤tTrack = particles[globalSlot]; + auto energy = currentTrack.energy; + const auto pos = currentTrack.pos; + auto dir = currentTrack.dir; + const auto navState = currentTrack.navState; + const auto volume = navState.Top(); + // the MCC vector is indexed by the logical volume id + const int lvolID = volume->GetLogicalVolume()->id(); + const int theMCIndex = MCIndex[lvolID]; + + auto survive = [&] { + currentTrack.dir = dir; + currentTrack.energy = energy; + activeQueue->push_back(globalSlot); + }; + + const double theElCut = g4HepEmData.fTheMatCutData->fMatCutData[theMCIndex].fSecElProdCutE; + + RanluxppDouble newRNG{currentTrack.rngState.Branch()}; + G4HepEmRandomEngine rnge{¤tTrack.rngState}; + + if constexpr (ProcessIndex == 0) { + // Invoke ionization (for e-/e+): + double deltaEkin = (IsElectron) ? G4HepEmElectronInteractionIoni::SampleETransferMoller(theElCut, energy, &rnge) + : G4HepEmElectronInteractionIoni::SampleETransferBhabha(theElCut, energy, &rnge); + + double dirPrimary[] = {dir.x(), dir.y(), dir.z()}; + double dirSecondary[3]; + G4HepEmElectronInteractionIoni::SampleDirections(energy, deltaEkin, dirSecondary, dirPrimary, &rnge); + + Track &secondary = secondaries.electrons.NextTrack(); + atomicAdd(&globalScoring->numElectrons, 1); + + secondary.InitAsSecondary(pos, navState); + secondary.rngState = newRNG; + secondary.energy = deltaEkin; + secondary.dir.Set(dirSecondary[0], dirSecondary[1], dirSecondary[2]); + + energy -= deltaEkin; + dir.Set(dirPrimary[0], dirPrimary[1], dirPrimary[2]); + survive(); + } else if constexpr (ProcessIndex == 1) { + // Invoke model for Bremsstrahlung: either SB- or Rel-Brem. + double logEnergy = std::log(energy); + double deltaEkin = energy < g4HepEmPars.fElectronBremModelLim + ? G4HepEmElectronInteractionBrem::SampleETransferSB(&g4HepEmData, energy, logEnergy, + theMCIndex, &rnge, IsElectron) + : G4HepEmElectronInteractionBrem::SampleETransferRB(&g4HepEmData, energy, logEnergy, + theMCIndex, &rnge, IsElectron); + + double dirPrimary[] = {dir.x(), dir.y(), dir.z()}; + double dirSecondary[3]; + G4HepEmElectronInteractionBrem::SampleDirections(energy, deltaEkin, dirSecondary, dirPrimary, &rnge); + + Track &gamma = secondaries.gammas.NextTrack(); + atomicAdd(&globalScoring->numGammas, 1); + + gamma.InitAsSecondary(pos, navState); + gamma.rngState = newRNG; + gamma.energy = deltaEkin; + gamma.dir.Set(dirSecondary[0], dirSecondary[1], dirSecondary[2]); + + energy -= deltaEkin; + dir.Set(dirPrimary[0], dirPrimary[1], dirPrimary[2]); + survive(); + } else if constexpr (ProcessIndex == 2) { + // Invoke annihilation (in-flight) for e+ + double dirPrimary[] = {dir.x(), dir.y(), dir.z()}; + double theGamma1Ekin, theGamma2Ekin; + double theGamma1Dir[3], theGamma2Dir[3]; + G4HepEmPositronInteractionAnnihilation::SampleEnergyAndDirectionsInFlight( + energy, dirPrimary, &theGamma1Ekin, theGamma1Dir, &theGamma2Ekin, theGamma2Dir, &rnge); + + Track &gamma1 = secondaries.gammas.NextTrack(); + Track &gamma2 = secondaries.gammas.NextTrack(); + atomicAdd(&globalScoring->numGammas, 2); + + gamma1.InitAsSecondary(pos, navState); + gamma1.rngState = newRNG; + gamma1.energy = theGamma1Ekin; + gamma1.dir.Set(theGamma1Dir[0], theGamma1Dir[1], theGamma1Dir[2]); + + gamma2.InitAsSecondary(pos, navState); + // Reuse the RNG state of the dying track. + gamma2.rngState = currentTrack.rngState; + gamma2.energy = theGamma2Ekin; + gamma2.dir.Set(theGamma2Dir[0], theGamma2Dir[1], theGamma2Dir[2]); + + // The current track is killed by not enqueuing into the next activeQueue. + } + } +} + +__global__ void IonizationEl(Track *particles, const adept::MParray *active, Secondaries secondaries, + adept::MParray *activeQueue, GlobalScoring *globalScoring, + ScoringPerVolume *scoringPerVolume) +{ + ElectronInteraction(particles, active, secondaries, activeQueue, globalScoring, scoringPerVolume); +} +__global__ void BremsstrahlungEl(Track *particles, const adept::MParray *active, Secondaries secondaries, + adept::MParray *activeQueue, GlobalScoring *globalScoring, + ScoringPerVolume *scoringPerVolume) +{ + ElectronInteraction(particles, active, secondaries, activeQueue, globalScoring, scoringPerVolume); +} + +__global__ void IonizationPos(Track *particles, const adept::MParray *active, Secondaries secondaries, + adept::MParray *activeQueue, GlobalScoring *globalScoring, + ScoringPerVolume *scoringPerVolume) +{ + ElectronInteraction(particles, active, secondaries, activeQueue, globalScoring, scoringPerVolume); +} +__global__ void BremsstrahlungPos(Track *particles, const adept::MParray *active, Secondaries secondaries, + adept::MParray *activeQueue, GlobalScoring *globalScoring, + ScoringPerVolume *scoringPerVolume) +{ + ElectronInteraction(particles, active, secondaries, activeQueue, globalScoring, scoringPerVolume); +} +__global__ void AnnihilationPos(Track *particles, const adept::MParray *active, Secondaries secondaries, + adept::MParray *activeQueue, GlobalScoring *globalScoring, + ScoringPerVolume *scoringPerVolume) +{ + ElectronInteraction(particles, active, secondaries, activeQueue, globalScoring, scoringPerVolume); +} diff --git a/examples/Example20/example.cuh b/examples/Example20/example.cuh new file mode 100644 index 00000000..0f274325 --- /dev/null +++ b/examples/Example20/example.cuh @@ -0,0 +1,174 @@ +// SPDX-FileCopyrightText: 2022 CERN +// SPDX-License-Identifier: Apache-2.0 + +#ifndef EXAMPLE_CUH +#define EXAMPLE_CUH + +#include "example.h" + +#include +#include +#include + +#include +#include +#include + +#include +#include + +constexpr int ThreadsPerBlock = 256; + +// A data structure to represent a particle track. The particle type is implicit +// by the queue and not stored in memory. +struct Track { + using Precision = vecgeom::Precision; + RanluxppDouble rngState; + double energy; + double numIALeft[3]; + double initialRange; + double dynamicRangeFactor; + double tlimitMin; + + vecgeom::Vector3D pos; + vecgeom::Vector3D dir; + vecgeom::NavStateIndex navState; + + __host__ __device__ double Uniform() { return rngState.Rndm(); } + + __host__ __device__ void InitAsSecondary(const vecgeom::Vector3D &parentPos, + const vecgeom::NavStateIndex &parentNavState) + { + // The caller is responsible to branch a new RNG state and to set the energy. + this->numIALeft[0] = -1.0; + this->numIALeft[1] = -1.0; + this->numIALeft[2] = -1.0; + + this->initialRange = -1.0; + this->dynamicRangeFactor = -1.0; + this->tlimitMin = -1.0; + + // A secondary inherits the position of its parent; the caller is responsible + // to update the directions. + this->pos = parentPos; + this->navState = parentNavState; + } +}; + +#ifdef __CUDA_ARCH__ +// Define inline implementations of the RNG methods for the device. +// (nvcc ignores the __device__ attribute in definitions, so this is only to +// communicate the intent.) +inline __device__ double G4HepEmRandomEngine::flat() +{ + return ((RanluxppDouble *)fObject)->Rndm(); +} + +inline __device__ void G4HepEmRandomEngine::flatArray(const int size, double *vect) +{ + for (int i = 0; i < size; i++) { + vect[i] = ((RanluxppDouble *)fObject)->Rndm(); + } +} +#endif + +// A data structure to manage slots in the track storage. +class SlotManager { + adept::Atomic_t fNextSlot; + const int fMaxSlot; + +public: + __host__ __device__ SlotManager(int maxSlot) : fMaxSlot(maxSlot) { fNextSlot = 0; } + + __host__ __device__ int NextSlot() + { + int next = fNextSlot.fetch_add(1); + if (next >= fMaxSlot) return -1; + return next; + } +}; + +// A bundle of pointers to generate particles of an implicit type. +class ParticleGenerator { + Track *fTracks; + SlotManager *fSlotManager; + adept::MParray *fActiveQueue; + +public: + __host__ __device__ ParticleGenerator(Track *tracks, SlotManager *slotManager, adept::MParray *activeQueue) + : fTracks(tracks), fSlotManager(slotManager), fActiveQueue(activeQueue) + { + } + + __host__ __device__ Track &NextTrack() + { + int slot = fSlotManager->NextSlot(); + if (slot == -1) { + COPCORE_EXCEPTION("No slot available in ParticleGenerator::NextTrack"); + } + fActiveQueue->push_back(slot); + return fTracks[slot]; + } +}; + +// A bundle of generators for the three particle types. +struct Secondaries { + ParticleGenerator electrons; + ParticleGenerator positrons; + ParticleGenerator gammas; +}; + +struct Interactions { + adept::MParray *queues[3]; +}; + +// Kernels in different TUs. +__global__ void TransportElectrons(Track *electrons, const adept::MParray *active, Secondaries secondaries, + adept::MParray *activeQueue, Interactions interactions, GlobalScoring *globalScoring, + ScoringPerVolume *scoringPerVolume); +__global__ void TransportPositrons(Track *positrons, const adept::MParray *active, Secondaries secondaries, + adept::MParray *activeQueue, Interactions interactions, GlobalScoring *globalScoring, + ScoringPerVolume *scoringPerVolume); + +__global__ void TransportGammas(Track *gammas, const adept::MParray *active, Secondaries secondaries, + adept::MParray *activeQueue, Interactions interactions, GlobalScoring *globalScoring, + ScoringPerVolume *scoringPerVolume); + +__global__ void IonizationEl(Track *particles, const adept::MParray *active, Secondaries secondaries, + adept::MParray *activeQueue, GlobalScoring *globalScoring, + ScoringPerVolume *scoringPerVolume); +__global__ void BremsstrahlungEl(Track *particles, const adept::MParray *active, Secondaries secondaries, + adept::MParray *activeQueue, GlobalScoring *globalScoring, + ScoringPerVolume *scoringPerVolume); + +__global__ void IonizationPos(Track *particles, const adept::MParray *active, Secondaries secondaries, + adept::MParray *activeQueue, GlobalScoring *globalScoring, + ScoringPerVolume *scoringPerVolume); +__global__ void BremsstrahlungPos(Track *particles, const adept::MParray *active, Secondaries secondaries, + adept::MParray *activeQueue, GlobalScoring *globalScoring, + ScoringPerVolume *scoringPerVolume); +__global__ void AnnihilationPos(Track *particles, const adept::MParray *active, Secondaries secondaries, + adept::MParray *activeQueue, GlobalScoring *globalScoring, + ScoringPerVolume *scoringPerVolume); + +__global__ void PairCreation(Track *particles, const adept::MParray *active, Secondaries secondaries, + adept::MParray *activeQueue, GlobalScoring *globalScoring, + ScoringPerVolume *scoringPerVolume); +__global__ void ComptonScattering(Track *particles, const adept::MParray *active, Secondaries secondaries, + adept::MParray *activeQueue, GlobalScoring *globalScoring, + ScoringPerVolume *scoringPerVolume); +__global__ void PhotoelectricEffect(Track *particles, const adept::MParray *active, Secondaries secondaries, + adept::MParray *activeQueue, GlobalScoring *globalScoring, + ScoringPerVolume *scoringPerVolume); + +// Constant data structures from G4HepEm accessed by the kernels. +// (defined in TestEm3.cu) +extern __constant__ __device__ struct G4HepEmParameters g4HepEmPars; +extern __constant__ __device__ struct G4HepEmData g4HepEmData; + +extern __constant__ __device__ int *MCIndex; + +// constexpr vecgeom::Precision BzFieldValue = 3.8 * copcore::units::tesla; +constexpr vecgeom::Precision BzFieldValue = 0; + +#endif diff --git a/examples/Example20/example.h b/examples/Example20/example.h new file mode 100644 index 00000000..743727a2 --- /dev/null +++ b/examples/Example20/example.h @@ -0,0 +1,39 @@ +// SPDX-FileCopyrightText: 2021 CERN +// SPDX-License-Identifier: Apache-2.0 + +#ifndef EXAMPLE_H +#define EXAMPLE_H + +struct G4HepEmState; + +// Data structures for scoring. The accessors must make sure to use atomic operations if needed. +struct GlobalScoring { + double energyDeposit; + // Not int to avoid overflows for more than 100,000 events; unsigned long long + // is the only other data type available for atomicAdd(). + unsigned long long chargedSteps; + unsigned long long neutralSteps; + unsigned long long hits; + unsigned long long numGammas; + unsigned long long numElectrons; + unsigned long long numPositrons; + // Not used on the device, filled in by the host. + unsigned long long numKilled; +}; + +struct ScoringPerVolume { + double *energyDeposit; + double *chargedTrackLength; +}; + +struct GunConfig { + float position[3]; + float direction[3]; + bool movingGun; +}; + +// Interface between C++ and CUDA. +void runGPU(int numParticles, double energy, int batch, const int *MCCindex, ScoringPerVolume *scoringPerVolume, + GlobalScoring *globalScoring, int numVolumes, int numPlaced, G4HepEmState *state, GunConfig gunConfig); + +#endif diff --git a/examples/Example20/gammas.cu b/examples/Example20/gammas.cu new file mode 100644 index 00000000..a4607cd0 --- /dev/null +++ b/examples/Example20/gammas.cu @@ -0,0 +1,276 @@ +// SPDX-FileCopyrightText: 2022 CERN +// SPDX-License-Identifier: Apache-2.0 + +#include "example.cuh" + +#include + +#include + +#include +#include +#include +#include +#include +#include +// Pull in implementation. +#include +#include +#include +#include + +__global__ void TransportGammas(Track *gammas, const adept::MParray *active, Secondaries secondaries, + adept::MParray *activeQueue, Interactions interactions, GlobalScoring *globalScoring, + ScoringPerVolume *scoringPerVolume) +{ +#ifdef VECGEOM_FLOAT_PRECISION + const Precision kPush = 10 * vecgeom::kTolerance; +#else + const Precision kPush = 0.; +#endif + int activeSize = active->size(); + for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < activeSize; i += blockDim.x * gridDim.x) { + const int slot = (*active)[i]; + Track ¤tTrack = gammas[slot]; + const auto energy = currentTrack.energy; + auto pos = currentTrack.pos; + const auto dir = currentTrack.dir; + auto navState = currentTrack.navState; + const auto volume = navState.Top(); + const int volumeID = volume->id(); + // the MCC vector is indexed by the logical volume id + const int lvolID = volume->GetLogicalVolume()->id(); + const int theMCIndex = MCIndex[lvolID]; + + auto survive = [&](bool push = true) { + currentTrack.pos = pos; + currentTrack.navState = navState; + if (push) activeQueue->push_back(slot); + }; + + // Init a track with the needed data to call into G4HepEm. + G4HepEmGammaTrack gammaTrack; + G4HepEmTrack *theTrack = gammaTrack.GetTrack(); + theTrack->SetEKin(energy); + theTrack->SetMCIndex(theMCIndex); + + // Sample the `number-of-interaction-left` and put it into the track. + for (int ip = 0; ip < 3; ++ip) { + double numIALeft = currentTrack.numIALeft[ip]; + if (numIALeft <= 0) { + numIALeft = -std::log(currentTrack.Uniform()); + } + theTrack->SetNumIALeft(numIALeft, ip); + } + + // Call G4HepEm to compute the physics step limit. + G4HepEmGammaManager::HowFar(&g4HepEmData, &g4HepEmPars, &gammaTrack); + + // Get result into variables. + double geometricalStepLengthFromPhysics = theTrack->GetGStepLength(); + int winnerProcessIndex = theTrack->GetWinnerProcessIndex(); + // Leave the range and MFP inside the G4HepEmTrack. If we split kernels, we + // also need to carry them over! + + // Check if there's a volume boundary in between. + vecgeom::NavStateIndex nextState; + double geometryStepLength = + BVHNavigator::ComputeStepAndNextVolume(pos, dir, geometricalStepLengthFromPhysics, navState, nextState, kPush); + pos += geometryStepLength * dir; + atomicAdd(&globalScoring->neutralSteps, 1); + + // Set boundary state in navState so the next step and secondaries get the + // correct information (navState = nextState only if relocated + // in case of a boundary; see below) + navState.SetBoundaryState(nextState.IsOnBoundary()); + + // Propagate information from geometrical step to G4HepEm. + theTrack->SetGStepLength(geometryStepLength); + theTrack->SetOnBoundary(nextState.IsOnBoundary()); + + G4HepEmGammaManager::UpdateNumIALeft(theTrack); + + // Save the `number-of-interaction-left` in our track. + for (int ip = 0; ip < 3; ++ip) { + double numIALeft = theTrack->GetNumIALeft(ip); + currentTrack.numIALeft[ip] = numIALeft; + } + + if (nextState.IsOnBoundary()) { + // For now, just count that we hit something. + atomicAdd(&globalScoring->hits, 1); + + // Kill the particle if it left the world. + if (nextState.Top() != nullptr) { + BVHNavigator::RelocateToNextVolume(pos, dir, nextState); + + // Move to the next boundary. + navState = nextState; + survive(); + } + continue; + } else if (winnerProcessIndex < 0) { + // No discrete process, move on. + survive(); + continue; + } + + // Reset number of interaction left for the winner discrete process. + // (Will be resampled in the next iteration.) + currentTrack.numIALeft[winnerProcessIndex] = -1.0; + + interactions.queues[winnerProcessIndex]->push_back(slot); + survive(false); + } +} + +template +__device__ void GammaInteraction(Track *particles, const adept::MParray *active, Secondaries secondaries, + adept::MParray *activeQueue, GlobalScoring *globalScoring, + ScoringPerVolume *scoringPerVolume) +{ + int activeSize = active->size(); + for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < activeSize; i += blockDim.x * gridDim.x) { + const int globalSlot = (*active)[i]; + Track ¤tTrack = particles[globalSlot]; + const auto energy = currentTrack.energy; + const auto pos = currentTrack.pos; + const auto dir = currentTrack.dir; + const auto navState = currentTrack.navState; + const auto volume = navState.Top(); + const int volumeID = volume->id(); + // the MCC vector is indexed by the logical volume id + const int lvolID = volume->GetLogicalVolume()->id(); + const int theMCIndex = MCIndex[lvolID]; + + auto survive = [&] { activeQueue->push_back(globalSlot); }; + + RanluxppDouble newRNG{currentTrack.rngState.Branch()}; + G4HepEmRandomEngine rnge{¤tTrack.rngState}; + + if constexpr (ProcessIndex == 0) { + // Invoke gamma conversion to e-/e+ pairs, if the energy is above the threshold. + if (energy < 2 * copcore::units::kElectronMassC2) { + survive(); + return; + } + + double logEnergy = std::log(energy); + double elKinEnergy, posKinEnergy; + G4HepEmGammaInteractionConversion::SampleKinEnergies(&g4HepEmData, energy, logEnergy, theMCIndex, elKinEnergy, + posKinEnergy, &rnge); + + double dirPrimary[] = {dir.x(), dir.y(), dir.z()}; + double dirSecondaryEl[3], dirSecondaryPos[3]; + G4HepEmGammaInteractionConversion::SampleDirections(dirPrimary, dirSecondaryEl, dirSecondaryPos, elKinEnergy, + posKinEnergy, &rnge); + + Track &electron = secondaries.electrons.NextTrack(); + Track &positron = secondaries.positrons.NextTrack(); + atomicAdd(&globalScoring->numElectrons, 1); + atomicAdd(&globalScoring->numPositrons, 1); + + electron.InitAsSecondary(pos, navState); + electron.rngState = newRNG; + electron.energy = elKinEnergy; + electron.dir.Set(dirSecondaryEl[0], dirSecondaryEl[1], dirSecondaryEl[2]); + + positron.InitAsSecondary(pos, navState); + // Reuse the RNG state of the dying track. + positron.rngState = currentTrack.rngState; + positron.energy = posKinEnergy; + positron.dir.Set(dirSecondaryPos[0], dirSecondaryPos[1], dirSecondaryPos[2]); + + // The current track is killed by not enqueuing into the next activeQueue. + } else if constexpr (ProcessIndex == 1) { + // Invoke Compton scattering of gamma. + constexpr double LowEnergyThreshold = 100 * copcore::units::eV; + if (energy < LowEnergyThreshold) { + survive(); + return; + } + const double origDirPrimary[] = {dir.x(), dir.y(), dir.z()}; + double dirPrimary[3]; + const double newEnergyGamma = + G4HepEmGammaInteractionCompton::SamplePhotonEnergyAndDirection(energy, dirPrimary, origDirPrimary, &rnge); + vecgeom::Vector3D newDirGamma(dirPrimary[0], dirPrimary[1], dirPrimary[2]); + + const double energyEl = energy - newEnergyGamma; + if (energyEl > LowEnergyThreshold) { + // Create a secondary electron and sample/compute directions. + Track &electron = secondaries.electrons.NextTrack(); + atomicAdd(&globalScoring->numElectrons, 1); + + electron.InitAsSecondary(pos, navState); + electron.rngState = newRNG; + electron.energy = energyEl; + electron.dir = energy * dir - newEnergyGamma * newDirGamma; + electron.dir.Normalize(); + } else { + atomicAdd(&globalScoring->energyDeposit, energyEl); + atomicAdd(&scoringPerVolume->energyDeposit[volumeID], energyEl); + } + + // Check the new gamma energy and deposit if below threshold. + if (newEnergyGamma > LowEnergyThreshold) { + currentTrack.energy = newEnergyGamma; + currentTrack.dir = newDirGamma; + survive(); + } else { + atomicAdd(&globalScoring->energyDeposit, newEnergyGamma); + atomicAdd(&scoringPerVolume->energyDeposit[volumeID], newEnergyGamma); + // The current track is killed by not enqueuing into the next activeQueue. + } + } else if constexpr (ProcessIndex == 2) { + // Invoke photoelectric process. + const double theLowEnergyThreshold = 1 * copcore::units::eV; + + // (Re)compute total macroscopic cross section + const int theMatIndx = g4HepEmData.fTheMatCutData->fMatCutData[theMCIndex].fHepEmMatIndex; + double mxsec = G4HepEmGammaManager::GetMacXSecPE(&g4HepEmData, theMatIndx, energy); + + const double bindingEnergy = G4HepEmGammaInteractionPhotoelectric::SelectElementBindingEnergy( + &g4HepEmData, theMCIndex, mxsec, energy, &rnge); + + double edep = bindingEnergy; + const double photoElecE = energy - edep; + if (photoElecE > theLowEnergyThreshold) { + // Create a secondary electron and sample directions. + Track &electron = secondaries.electrons.NextTrack(); + atomicAdd(&globalScoring->numElectrons, 1); + + double dirGamma[] = {dir.x(), dir.y(), dir.z()}; + double dirPhotoElec[3]; + G4HepEmGammaInteractionPhotoelectric::SamplePhotoElectronDirection(photoElecE, dirGamma, dirPhotoElec, &rnge); + + electron.InitAsSecondary(pos, navState); + electron.rngState = newRNG; + electron.energy = photoElecE; + electron.dir.Set(dirPhotoElec[0], dirPhotoElec[1], dirPhotoElec[2]); + } else { + edep = energy; + } + atomicAdd(&globalScoring->energyDeposit, edep); + // The current track is killed by not enqueuing into the next activeQueue. + } + } +} + +__global__ void PairCreation(Track *particles, const adept::MParray *active, Secondaries secondaries, + adept::MParray *activeQueue, GlobalScoring *globalScoring, + ScoringPerVolume *scoringPerVolume) +{ + GammaInteraction<0>(particles, active, secondaries, activeQueue, globalScoring, scoringPerVolume); +} +__global__ void ComptonScattering(Track *particles, const adept::MParray *active, Secondaries secondaries, + adept::MParray *activeQueue, GlobalScoring *globalScoring, + ScoringPerVolume *scoringPerVolume) +{ + GammaInteraction<1>(particles, active, secondaries, activeQueue, globalScoring, scoringPerVolume); +} +__global__ void PhotoelectricEffect(Track *particles, const adept::MParray *active, Secondaries secondaries, + adept::MParray *activeQueue, GlobalScoring *globalScoring, + ScoringPerVolume *scoringPerVolume) +{ + GammaInteraction<2>(particles, active, secondaries, activeQueue, globalScoring, scoringPerVolume); +} diff --git a/examples/Example20/main.cpp b/examples/Example20/main.cpp new file mode 100644 index 00000000..09273665 --- /dev/null +++ b/examples/Example20/main.cpp @@ -0,0 +1,303 @@ +// SPDX-FileCopyrightText: 2022 CERN +// SPDX-License-Identifier: Apache-2.0 + +#include "example.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include +#include + +static constexpr double DefaultCut = 0.7 * mm; + +const G4VPhysicalVolume *InitGeant4(const std::string &gdml_file) +{ + auto defaultRegion = new G4Region("DefaultRegionForTheWorld"); // deleted by store + auto pcuts = G4ProductionCutsTable::GetProductionCutsTable()->GetDefaultProductionCuts(); + pcuts->SetProductionCut(DefaultCut, "gamma"); + pcuts->SetProductionCut(DefaultCut, "e-"); + pcuts->SetProductionCut(DefaultCut, "e+"); + pcuts->SetProductionCut(DefaultCut, "proton"); + defaultRegion->SetProductionCuts(pcuts); + + // Read the geometry, regions and cuts from the GDML file + std::cout << "reading " << gdml_file << " transiently on CPU for Geant4 ...\n"; + G4GDMLParser parser; + parser.Read(gdml_file, false); // turn off schema checker + G4VPhysicalVolume *world = parser.GetWorldVolume(); + + if (world == nullptr) { + std::cerr << "example18: World volume not set properly check your setup selection criteria or GDML input!\n"; + return world; + } + + // - PHYSICS + // --- Create particles that have secondary production threshold. + G4Gamma::Gamma(); + G4Electron::Electron(); + G4Positron::Positron(); + G4Proton::Proton(); + G4ParticleTable *partTable = G4ParticleTable::GetParticleTable(); + partTable->SetReadiness(); + + // - REGIONS + if (world->GetLogicalVolume()->GetRegion() == nullptr) { + // Add default region if none available + defaultRegion->AddRootLogicalVolume(world->GetLogicalVolume()); + } + for (auto region : *G4RegionStore::GetInstance()) { + region->UsedInMassGeometry(true); // make sure all regions are marked as used + region->UpdateMaterialList(); + } + + // - UPDATE COUPLES + std::cout << "updating material-cut couples based on " << G4RegionStore::GetInstance()->size() << " regions ...\n"; + G4ProductionCutsTable *theCoupleTable = G4ProductionCutsTable::GetProductionCutsTable(); + theCoupleTable->UpdateCoupleTable(world); + + // --- Set the world volume to fix initialization of G4SafetyHelper (used by G4UrbanMscModel) + G4TransportationManager::GetTransportationManager()->SetWorldForTracking(world); + + // --- Set MSC range factor to match G4HepEm physics lists. + G4EmParameters *param = G4EmParameters::Instance(); + param->SetDefaults(); + param->SetMscRangeFactor(0.04); + + return world; +} + +const vecgeom::cxx::VPlacedVolume *InitVecGeom(const std::string &gdml_file, int cache_depth) +{ + // Import the gdml file into VecGeom + vecgeom::GeoManager::Instance().SetTransformationCacheDepth(cache_depth); + vgdml::Parser vgdmlParser; + auto middleWare = vgdmlParser.Load(gdml_file.c_str(), false, copcore::units::mm); + if (middleWare == nullptr) { + std::cerr << "Failed to read geometry from GDML file '" << gdml_file << "'" << std::endl; + return nullptr; + } + + const vecgeom::VPlacedVolume *world = vecgeom::GeoManager::Instance().GetWorld(); + if (world == nullptr) { + std::cerr << "GeoManager world volume is nullptr" << std::endl; + return nullptr; + } + return world; +} + +int *CreateMCCindex(const G4VPhysicalVolume *g4world, const vecgeom::VPlacedVolume *world, + const G4HepEmState &hepEmState) +{ + const int *g4tohepmcindex = hepEmState.fData->fTheMatCutData->fG4MCIndexToHepEmMCIndex; + + // - FIND vecgeom::LogicalVolume corresponding to each and every G4LogicalVolume + int nphysical = 0; + + int nvolumes = vecgeom::GeoManager::Instance().GetRegisteredVolumesCount(); + int *mcc_index = new int[nvolumes]; + memset(mcc_index, 0, nvolumes * sizeof(int)); + + // recursive geometry visitor lambda matching one by one Geant4 and VecGeom logical volumes + // (we need to make sure we set the right MCC index to the right volume) + typedef std::function func_t; + func_t visitAndSetMCindex = [&](G4LogicalVolume const *g4vol, vecgeom::LogicalVolume const *vol) { + int nd = g4vol->GetNoDaughters(); + auto daughters = vol->GetDaughters(); + if (nd != daughters.size()) throw std::runtime_error("Mismatch in number of daughters"); + // Check the couples + if (g4vol->GetMaterialCutsCouple() == nullptr) + throw std::runtime_error("G4LogicalVolume " + std::string(g4vol->GetName()) + + std::string(" has no material-cuts couple")); + int g4mcindex = g4vol->GetMaterialCutsCouple()->GetIndex(); + int hepemmcindex = g4tohepmcindex[g4mcindex]; + // Check consistency with G4HepEm data + if (hepEmState.fData->fTheMatCutData->fMatCutData[hepemmcindex].fG4MatCutIndex != g4mcindex) + throw std::runtime_error("Mismatch between Geant4 mcindex and corresponding G4HepEm index"); + if (vol->id() >= nvolumes) throw std::runtime_error("Volume id larger than number of volumes"); + + // All OK, now fill the index in the array + mcc_index[vol->id()] = hepemmcindex; + nphysical++; + + // Now do the daughters + for (int id = 0; id < nd; ++id) { + auto g4pvol = g4vol->GetDaughter(id); + auto pvol = daughters[id]; + // VecGeom does not strip pointers from logical volume names + if (std::string(pvol->GetLogicalVolume()->GetName()).rfind(g4pvol->GetLogicalVolume()->GetName(), 0) != 0) + throw std::runtime_error("Volume names " + std::string(pvol->GetLogicalVolume()->GetName()) + " and " + + std::string(g4pvol->GetLogicalVolume()->GetName()) + " mismatch"); + visitAndSetMCindex(g4pvol->GetLogicalVolume(), pvol->GetLogicalVolume()); + } + }; + + visitAndSetMCindex(g4world->GetLogicalVolume(), world->GetLogicalVolume()); + std::cout << "Visited " << nphysical << " matching physical volumes\n"; + return mcc_index; +} + +void FreeG4HepEm(G4HepEmState *state) +{ + FreeG4HepEmData(state->fData); +} + +void InitBVH() +{ + vecgeom::cxx::BVHManager::Init(); + vecgeom::cxx::BVHManager::DeviceInit(); +} + +void PrintScoringPerVolume(const vecgeom::VPlacedVolume *placed, const ScoringPerVolume *scoring, int level = 0) +{ + for (auto *daughter : placed->GetDaughters()) { + std::cout << std::setw(level * 2) << ""; + auto id = daughter->id(); + std::cout << " ID " << id << " Charged-TrakL " << scoring->chargedTrackLength[id] / copcore::units::mm + << " mm; Energy-Dep " << scoring->energyDeposit[id] / copcore::units::MeV << " MeV" << std::endl; + PrintScoringPerVolume(daughter, scoring, level + 1); + } +} + +int main(int argc, char *argv[]) +{ + // Only outputs are the data file(s) + // Separate for now, but will want to unify + OPTION_STRING(gdml_file, "cms2018.gdml"); + OPTION_INT(cache_depth, 0); // 0 = full depth + OPTION_INT(particles, 100); + OPTION_DOUBLE(energy, 10); // entered in GeV + energy *= copcore::units::GeV; + OPTION_INT(batch, 52); + OPTION_STRING(gunpos, "0,0,0"); + OPTION_STRING(gundir, "1,0,0"); + OPTION_BOOL(rotatingParticleGun, false); + + // parse gun config + GunConfig gunConfig{}; + std::sscanf(gunpos.c_str(), "%f,%f,%f", &gunConfig.position[0], &gunConfig.position[1], &gunConfig.position[2]); + std::sscanf(gundir.c_str(), "%f,%f,%f", &gunConfig.direction[0], &gunConfig.direction[1], &gunConfig.direction[2]); + gunConfig.movingGun = rotatingParticleGun; + + vecgeom::Stopwatch timer; + timer.Start(); + + NVTXTracer tracer("InitG4"); + + // Initialize Geant4 + auto g4world = InitGeant4(gdml_file); + if (!g4world) return 3; + + tracer.setTag("InitVecGeom"); + // Initialize VecGeom + std::cout << "reading " << gdml_file << " transiently on CPU for VecGeom ...\n"; + auto world = InitVecGeom(gdml_file, cache_depth); + if (!world) return 3; + + // Construct and initialize the G4HepEmState data/tables + tracer.setTag("InitG4HepEM"); + std::cout << "initializing G4HepEm state ...\n"; + G4HepEmState hepEmState; + InitG4HepEmState(&hepEmState); + + // Initialize G4HepEm material-cut couple array indexed by VecGeom volume id. + // (In future we should connect the index directly to the VecGeom logical volume) + tracer.setTag("InitMaterialCutCouple"); + std::cout << "initializing material-cut couple indices ...\n"; + int *MCCindex = nullptr; + + try { + MCCindex = CreateMCCindex(g4world, world, hepEmState); + } catch (const std::runtime_error &ex) { + std::cerr << "*** CreateMCCindex: " << ex.what() << "\n"; + return 1; + } + + // Load and synchronize the geometry on the GPU + tracer.setTag("SyncGeom"); + std::cout << "synchronizing VecGeom geometry to GPU ...\n"; + auto &cudaManager = vecgeom::cxx::CudaManager::Instance(); + cudaManager.LoadGeometry(world); + cudaManager.Synchronize(); + + tracer.setTag("InitBVH"); + InitBVH(); + + auto time_cpu = timer.Stop(); + std::cout << "Initialization took: " << time_cpu << " sec\n"; + + int NumVolumes = vecgeom::GeoManager::Instance().GetRegisteredVolumesCount(); + int NumPlaced = vecgeom::GeoManager::Instance().GetPlacedVolumesCount(); + + // Scoring is done per placed volume (for now...) + double *chargedTrackLength = new double[NumPlaced]; + double *energyDeposit = new double[NumPlaced]; + ScoringPerVolume scoringPerVolume; + scoringPerVolume.chargedTrackLength = chargedTrackLength; + scoringPerVolume.energyDeposit = energyDeposit; + GlobalScoring globalScoring; + + tracer.setTag("GPU function"); + runGPU(particles, energy, batch, MCCindex, &scoringPerVolume, &globalScoring, NumVolumes, NumPlaced, &hepEmState, + gunConfig); + + std::cout << std::endl; + std::cout << std::endl; + double meanEnergyDeposit = globalScoring.energyDeposit / particles; + std::cout << "Mean energy deposit " << (meanEnergyDeposit / copcore::units::GeV) << " GeV\n" + << "Mean number of gamma " << ((double)globalScoring.numGammas / particles) << "\n" + << "Mean number of e- " << ((double)globalScoring.numElectrons / particles) << "\n" + << "Mean number of e+ " << ((double)globalScoring.numPositrons / particles) << "\n" + << "Mean number of charged steps " << ((double)globalScoring.chargedSteps / particles) << "\n" + << "Mean number of neutral steps " << ((double)globalScoring.neutralSteps / particles) << "\n" + << "Mean number of hits " << ((double)globalScoring.hits / particles) << "\n"; + if (globalScoring.numKilled > 0) { + std::cout << "Total killed particles " << globalScoring.numKilled << "\n"; + } + std::cout << std::endl; + + // Average charged track length and energy deposit per particle. + for (int i = 0; i < NumPlaced; i++) { + chargedTrackLength[i] /= particles; + energyDeposit[i] /= particles; + } + + std::cout << std::scientific; +#ifdef VERBOSE + std::cout << std::endl; + const int id = world->id(); + std::cout << "World (ID " << world->id() << ") Charged-TrakL " << chargedTrackLength[id] / copcore::units::mm + << " Energy-Dep " << energyDeposit[id] / copcore::units::MeV << " MeV" << std::endl; + PrintScoringPerVolume(world, &scoringPerVolume); +#endif + FreeG4HepEm(&hepEmState); + delete[] MCCindex; + delete[] chargedTrackLength; + delete[] energyDeposit; + return 0; +} diff --git a/examples/Example20/main.cu b/examples/Example20/main.cu new file mode 100644 index 00000000..40145b70 --- /dev/null +++ b/examples/Example20/main.cu @@ -0,0 +1,519 @@ +// SPDX-FileCopyrightText: 2022 CERN +// SPDX-License-Identifier: Apache-2.0 + +#include "example.h" +#include "example.cuh" + +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include +#ifdef VECGEOM_ENABLE_CUDA +#include +#endif + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +__constant__ __device__ struct G4HepEmParameters g4HepEmPars; +__constant__ __device__ struct G4HepEmData g4HepEmData; + +__constant__ __device__ int *MCIndex = nullptr; + +void InitG4HepEmGPU(G4HepEmState *state) +{ + // Copy to GPU. + CopyG4HepEmDataToGPU(state->fData); + COPCORE_CUDA_CHECK(cudaMemcpyToSymbol(g4HepEmPars, state->fParameters, sizeof(G4HepEmParameters))); + + // Create G4HepEmData with the device pointers. + G4HepEmData dataOnDevice; + dataOnDevice.fTheMatCutData = state->fData->fTheMatCutData_gpu; + dataOnDevice.fTheMaterialData = state->fData->fTheMaterialData_gpu; + dataOnDevice.fTheElementData = state->fData->fTheElementData_gpu; + dataOnDevice.fTheElectronData = state->fData->fTheElectronData_gpu; + dataOnDevice.fThePositronData = state->fData->fThePositronData_gpu; + dataOnDevice.fTheSBTableData = state->fData->fTheSBTableData_gpu; + dataOnDevice.fTheGammaData = state->fData->fTheGammaData_gpu; + // The other pointers should never be used. + dataOnDevice.fTheMatCutData_gpu = nullptr; + dataOnDevice.fTheMaterialData_gpu = nullptr; + dataOnDevice.fTheElementData_gpu = nullptr; + dataOnDevice.fTheElectronData_gpu = nullptr; + dataOnDevice.fThePositronData_gpu = nullptr; + dataOnDevice.fTheSBTableData_gpu = nullptr; + dataOnDevice.fTheGammaData_gpu = nullptr; + + COPCORE_CUDA_CHECK(cudaMemcpyToSymbol(g4HepEmData, &dataOnDevice, sizeof(G4HepEmData))); +} + +// A bundle of queues per particle type: +// * Two for active particles, one for the current iteration and the second for the next. +struct ParticleQueues { + adept::MParray *currentlyActive; + adept::MParray *nextActive; + + void SwapActive() { std::swap(currentlyActive, nextActive); } +}; + +struct ParticleType { + Track *tracks; + SlotManager *slotManager; + ParticleQueues queues; + Interactions interactions; + cudaStream_t stream; + cudaEvent_t event; + + enum { + Electron = 0, + Positron = 1, + Gamma = 2, + + NumParticleTypes, + }; +}; + +// A bundle of queues for the three particle types. +struct AllParticleQueues { + ParticleQueues queues[ParticleType::NumParticleTypes]; + Interactions interactions[ParticleType::NumParticleTypes]; +}; + +// Kernel to initialize the set of queues per particle type. +__global__ void InitParticleQueues(ParticleQueues queues, Interactions interactions, size_t Capacity) +{ + adept::MParray::MakeInstanceAt(Capacity, queues.currentlyActive); + adept::MParray::MakeInstanceAt(Capacity, queues.nextActive); + adept::MParray::MakeInstanceAt(Capacity, interactions.queues[0]); + adept::MParray::MakeInstanceAt(Capacity, interactions.queues[1]); + adept::MParray::MakeInstanceAt(Capacity, interactions.queues[2]); +} + +// Kernel function to initialize a set of primary particles. +__global__ void InitPrimaries(ParticleGenerator generator, int startEvent, int numEvents, double energy, + const vecgeom::VPlacedVolume *world, GlobalScoring *globalScoring, const GunConfig gun) +{ + for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < numEvents; i += blockDim.x * gridDim.x) { + Track &track = generator.NextTrack(); + + track.rngState.SetSeed(startEvent + i); + track.energy = energy; + track.numIALeft[0] = -1.0; + track.numIALeft[1] = -1.0; + track.numIALeft[2] = -1.0; + + track.initialRange = -1.0; + track.dynamicRangeFactor = -1.0; + track.tlimitMin = -1.0; + + track.pos = {gun.position[0], gun.position[1], gun.position[2]}; + if (gun.movingGun) { + // Generate particles flat in phi and in eta between -5 and 5. We'll lose the far forwards ones, so no need to + // simulate. + const double phi = 2. * M_PI * track.rngState.Rndm(); + const double eta = -5. + 10. * track.rngState.Rndm(); + track.dir.x() = static_cast(cos(phi) / cosh(eta)); + track.dir.y() = static_cast(sin(phi) / cosh(eta)); + track.dir.z() = static_cast(tanh(eta)); + } else { + track.dir = {gun.direction[0], gun.direction[1], gun.direction[2]}; + } + track.navState.Clear(); + BVHNavigator::LocatePointIn(world, track.pos, track.navState, true); + + atomicAdd(&globalScoring->numElectrons, 1); + } +} + +// A data structure to transfer statistics after each iteration. +struct Stats { + int inFlight[ParticleType::NumParticleTypes]; +}; + +// Finish iteration: clear queues and fill statistics. +__global__ void FinishIteration(AllParticleQueues all, Stats *stats) +{ + for (int i = 0; i < ParticleType::NumParticleTypes; i++) { + all.queues[i].currentlyActive->clear(); + all.interactions[i].queues[0]->clear(); + all.interactions[i].queues[1]->clear(); + all.interactions[i].queues[2]->clear(); + stats->inFlight[i] = all.queues[i].nextActive->size(); + } +} + +__global__ void ClearQueue(adept::MParray *queue) +{ + queue->clear(); +} + +void runGPU(int numParticles, double energy, int batch, const int *MCIndex_host, + ScoringPerVolume *scoringPerVolume_host, GlobalScoring *globalScoring_host, int numVolumes, int numPlaced, + G4HepEmState *state, GunConfig gunConfig) +{ + NVTXTracer tracer("InitG4HepEM"); + InitG4HepEmGPU(state); + + tracer.setTag("InitParticles/malloc/copy"); + // Transfer MC indices. + int *MCIndex_dev = nullptr; + COPCORE_CUDA_CHECK(cudaMalloc(&MCIndex_dev, sizeof(int) * numVolumes)); + COPCORE_CUDA_CHECK(cudaMemcpy(MCIndex_dev, MCIndex_host, sizeof(int) * numVolumes, cudaMemcpyHostToDevice)); + COPCORE_CUDA_CHECK(cudaMemcpyToSymbol(MCIndex, &MCIndex_dev, sizeof(int *))); + + cudaDeviceProp deviceProp; + cudaGetDeviceProperties(&deviceProp, 0); + + // Capacity of the different containers aka the maximum number of particles. + // Use 1/5 of GPU memory for each of e+/e-/gammas, leaving 2/5 for the rest. + const size_t Capacity = (deviceProp.totalGlobalMem / sizeof(Track) / 5); + + std::cout << "INFO: capacity of containers set to " << Capacity << std::endl; + if (batch == -1) { + // Rule of thumb: at most 2000 particles of one type per GeV primary. + batch = Capacity / ((int)energy / copcore::units::GeV) / 2000; + } else if (batch < 1) { + batch = 1; + } + std::cout << "INFO: batching " << batch << " particles for transport on the GPU" << std::endl; + if (BzFieldValue != 0) { + std::cout << "INFO: running with field Bz = " << BzFieldValue / copcore::units::tesla << " T" << std::endl; + } else { + std::cout << "INFO: running with magnetic field OFF" << std::endl; + } + + // Allocate structures to manage tracks of an implicit type: + // * memory to hold the actual Track elements, + // * objects to manage slots inside the memory, + // * queues of slots to remember active particle and those needing relocation, + // * a stream and an event for synchronization of kernels. + const size_t TracksSize = sizeof(Track) * Capacity; + const size_t ManagerSize = sizeof(SlotManager); + const size_t QueueSize = adept::MParray::SizeOfInstance(Capacity); + + ParticleType particles[ParticleType::NumParticleTypes]; + for (int i = 0; i < ParticleType::NumParticleTypes; i++) { + COPCORE_CUDA_CHECK(cudaMalloc(&particles[i].tracks, TracksSize)); + + COPCORE_CUDA_CHECK(cudaMalloc(&particles[i].slotManager, ManagerSize)); + + COPCORE_CUDA_CHECK(cudaMalloc(&particles[i].queues.currentlyActive, QueueSize)); + COPCORE_CUDA_CHECK(cudaMalloc(&particles[i].queues.nextActive, QueueSize)); + COPCORE_CUDA_CHECK(cudaMalloc(&particles[i].interactions.queues[0], QueueSize)); + COPCORE_CUDA_CHECK(cudaMalloc(&particles[i].interactions.queues[1], QueueSize)); + COPCORE_CUDA_CHECK(cudaMalloc(&particles[i].interactions.queues[2], QueueSize)); + InitParticleQueues<<<1, 1>>>(particles[i].queues, particles[i].interactions, Capacity); + + COPCORE_CUDA_CHECK(cudaStreamCreate(&particles[i].stream)); + COPCORE_CUDA_CHECK(cudaEventCreate(&particles[i].event)); + } + COPCORE_CUDA_CHECK(cudaDeviceSynchronize()); + + ParticleType &electrons = particles[ParticleType::Electron]; + ParticleType &positrons = particles[ParticleType::Positron]; + ParticleType &gammas = particles[ParticleType::Gamma]; + + // Create a stream to synchronize kernels of all particle types. + cudaStream_t stream; + COPCORE_CUDA_CHECK(cudaStreamCreate(&stream)); + + cudaStream_t interactionStreams[3]; + for (auto i = 0; i < 3; ++i) + COPCORE_CUDA_CHECK(cudaStreamCreate(&interactionStreams[i])); + + // Allocate memory to score charged track length and energy deposit per volume. + double *chargedTrackLength = nullptr; + COPCORE_CUDA_CHECK(cudaMalloc(&chargedTrackLength, sizeof(double) * numPlaced)); + COPCORE_CUDA_CHECK(cudaMemset(chargedTrackLength, 0, sizeof(double) * numPlaced)); + double *energyDeposit = nullptr; + COPCORE_CUDA_CHECK(cudaMalloc(&energyDeposit, sizeof(double) * numPlaced)); + COPCORE_CUDA_CHECK(cudaMemset(energyDeposit, 0, sizeof(double) * numPlaced)); + + // Allocate and initialize scoring and statistics. + GlobalScoring *globalScoring = nullptr; + COPCORE_CUDA_CHECK(cudaMalloc(&globalScoring, sizeof(GlobalScoring))); + COPCORE_CUDA_CHECK(cudaMemset(globalScoring, 0, sizeof(GlobalScoring))); + + ScoringPerVolume *scoringPerVolume = nullptr; + ScoringPerVolume scoringPerVolume_devPtrs; + scoringPerVolume_devPtrs.chargedTrackLength = chargedTrackLength; + scoringPerVolume_devPtrs.energyDeposit = energyDeposit; + COPCORE_CUDA_CHECK(cudaMalloc(&scoringPerVolume, sizeof(ScoringPerVolume))); + COPCORE_CUDA_CHECK( + cudaMemcpy(scoringPerVolume, &scoringPerVolume_devPtrs, sizeof(ScoringPerVolume), cudaMemcpyHostToDevice)); + + Stats *stats_dev = nullptr; + COPCORE_CUDA_CHECK(cudaMalloc(&stats_dev, sizeof(Stats))); + Stats *stats = nullptr; + COPCORE_CUDA_CHECK(cudaMallocHost(&stats, sizeof(Stats))); + + // Allocate memory to hold a "vanilla" SlotManager to initialize for each batch. + SlotManager slotManagerInit(Capacity); + SlotManager *slotManagerInit_dev = nullptr; + COPCORE_CUDA_CHECK(cudaMalloc(&slotManagerInit_dev, sizeof(SlotManager))); + COPCORE_CUDA_CHECK(cudaMemcpy(slotManagerInit_dev, &slotManagerInit, sizeof(SlotManager), cudaMemcpyHostToDevice)); + + vecgeom::Stopwatch timer; + timer.Start(); + tracer.setTag("sim"); + + std::cout << std::endl << "Simulating particles "; + const bool detailed = (numParticles / batch) < 50; + if (!detailed) { + std::cout << "... " << std::flush; + } + + unsigned long long killed = 0; + tracer.setTag("start event loop"); + + for (int startEvent = 1; startEvent <= numParticles; startEvent += batch) { + if (detailed) { + std::cout << startEvent << " ... " << std::flush; + } + int left = numParticles - startEvent + 1; + int chunk = std::min(left, batch); + + for (int i = 0; i < ParticleType::NumParticleTypes; i++) { + COPCORE_CUDA_CHECK(cudaMemcpyAsync(particles[i].slotManager, slotManagerInit_dev, ManagerSize, + cudaMemcpyDeviceToDevice, stream)); + } + + // Initialize primary particles. + constexpr int InitThreads = ThreadsPerBlock; + int initBlocks = (chunk + ThreadsPerBlock - 1) / ThreadsPerBlock; + ParticleGenerator electronGenerator(electrons.tracks, electrons.slotManager, electrons.queues.currentlyActive); + auto world_dev = vecgeom::cxx::CudaManager::Instance().world_gpu(); + InitPrimaries<<>>(electronGenerator, startEvent, chunk, energy, world_dev, + globalScoring, gunConfig); + COPCORE_CUDA_CHECK(cudaStreamSynchronize(stream)); + + stats->inFlight[ParticleType::Electron] = chunk; + stats->inFlight[ParticleType::Positron] = 0; + stats->inFlight[ParticleType::Gamma] = 0; + + constexpr int MaxBlocks = 8192; + int transportBlocks; + + int inFlight; + int loopingNo = 0; + int previousElectrons = -1, previousPositrons = -1; + + do { + Secondaries secondaries = { + .electrons = {electrons.tracks, electrons.slotManager, electrons.queues.nextActive}, + .positrons = {positrons.tracks, positrons.slotManager, positrons.queues.nextActive}, + .gammas = {gammas.tracks, gammas.slotManager, gammas.queues.nextActive}, + }; + + // *** ELECTRONS *** + int numElectrons = stats->inFlight[ParticleType::Electron]; + if (numElectrons > 0) { + transportBlocks = (numElectrons + ThreadsPerBlock - 1) / ThreadsPerBlock; + transportBlocks = std::min(transportBlocks, MaxBlocks); + + TransportElectrons<<>>( + electrons.tracks, electrons.queues.currentlyActive, secondaries, electrons.queues.nextActive, + electrons.interactions, globalScoring, scoringPerVolume); + + COPCORE_CUDA_CHECK(cudaEventRecord(electrons.event, electrons.stream)); + COPCORE_CUDA_CHECK(cudaStreamWaitEvent(interactionStreams[0], electrons.event, 0)); + + IonizationEl<<>>( + electrons.tracks, electrons.interactions.queues[0], secondaries, electrons.queues.nextActive, globalScoring, + scoringPerVolume); + BremsstrahlungEl<<>>( + electrons.tracks, electrons.interactions.queues[1], secondaries, electrons.queues.nextActive, globalScoring, + scoringPerVolume); + + for (auto streamToWaitFor : {interactionStreams[0], electrons.stream}) { + COPCORE_CUDA_CHECK(cudaEventRecord(electrons.event, streamToWaitFor)); + COPCORE_CUDA_CHECK(cudaStreamWaitEvent(stream, electrons.event, 0)); + } + } + + // *** POSITRONS *** + int numPositrons = stats->inFlight[ParticleType::Positron]; + if (numPositrons > 0) { + transportBlocks = (numPositrons + ThreadsPerBlock - 1) / ThreadsPerBlock; + transportBlocks = std::min(transportBlocks, MaxBlocks); + + TransportPositrons<<>>( + positrons.tracks, positrons.queues.currentlyActive, secondaries, positrons.queues.nextActive, + positrons.interactions, globalScoring, scoringPerVolume); + + COPCORE_CUDA_CHECK(cudaEventRecord(positrons.event, positrons.stream)); + COPCORE_CUDA_CHECK(cudaStreamWaitEvent(interactionStreams[1], positrons.event, 0)); + COPCORE_CUDA_CHECK(cudaStreamWaitEvent(interactionStreams[2], positrons.event, 0)); + + IonizationPos<<>>( + positrons.tracks, positrons.interactions.queues[0], secondaries, positrons.queues.nextActive, globalScoring, + scoringPerVolume); + BremsstrahlungPos<<>>( + positrons.tracks, positrons.interactions.queues[1], secondaries, positrons.queues.nextActive, globalScoring, + scoringPerVolume); + AnnihilationPos<<>>( + positrons.tracks, positrons.interactions.queues[2], secondaries, positrons.queues.nextActive, globalScoring, + scoringPerVolume); + + for (auto streamToWaitFor : {interactionStreams[1], positrons.stream, interactionStreams[2]}) { + COPCORE_CUDA_CHECK(cudaEventRecord(positrons.event, streamToWaitFor)); + COPCORE_CUDA_CHECK(cudaStreamWaitEvent(stream, positrons.event, 0)); + } + } + + // *** GAMMAS *** + int numGammas = stats->inFlight[ParticleType::Gamma]; + if (numGammas > 0) { + transportBlocks = (numGammas + ThreadsPerBlock - 1) / ThreadsPerBlock; + transportBlocks = std::min(transportBlocks, MaxBlocks); + + TransportGammas<<>>( + gammas.tracks, gammas.queues.currentlyActive, secondaries, gammas.queues.nextActive, gammas.interactions, + globalScoring, scoringPerVolume); + + COPCORE_CUDA_CHECK(cudaEventRecord(gammas.event, gammas.stream)); + COPCORE_CUDA_CHECK(cudaStreamWaitEvent(stream, gammas.event, 0)); + + for (auto i = 0; i < 3; ++i) { + COPCORE_CUDA_CHECK(cudaStreamWaitEvent(interactionStreams[i], gammas.event, 0)); + } + // About 2% of all gammas: + PairCreation<<>>( + gammas.tracks, gammas.interactions.queues[0], secondaries, gammas.queues.nextActive, globalScoring, + scoringPerVolume); + // About 10% of all gammas: + ComptonScattering<<>>( + gammas.tracks, gammas.interactions.queues[1], secondaries, gammas.queues.nextActive, globalScoring, + scoringPerVolume); + // About 15% of all gammas: + PhotoelectricEffect<<>>( + gammas.tracks, gammas.interactions.queues[2], secondaries, gammas.queues.nextActive, globalScoring, + scoringPerVolume); + for (auto i = 0; i < 3; ++i) { + COPCORE_CUDA_CHECK(cudaEventRecord(positrons.event, interactionStreams[i])); + COPCORE_CUDA_CHECK(cudaStreamWaitEvent(stream, positrons.event, 0)); + } + COPCORE_CUDA_CHECK(cudaEventRecord(positrons.event, positrons.stream)); + COPCORE_CUDA_CHECK(cudaStreamWaitEvent(stream, positrons.event, 0)); + } + + // *** END OF TRANSPORT *** + + // The events ensure synchronization before finishing this iteration and + // copying the Stats back to the host. + AllParticleQueues queues = { + {electrons.queues, positrons.queues, gammas.queues}, + {electrons.interactions, positrons.interactions, gammas.interactions}, + }; + FinishIteration<<<1, 1, 0, stream>>>(queues, stats_dev); + COPCORE_CUDA_CHECK(cudaMemcpyAsync(stats, stats_dev, sizeof(Stats), cudaMemcpyDeviceToHost, stream)); + + // Finally synchronize all kernels. + COPCORE_CUDA_CHECK(cudaStreamSynchronize(stream)); + + // Count the number of particles in flight. + inFlight = 0; + for (int i = 0; i < ParticleType::NumParticleTypes; i++) { + inFlight += stats->inFlight[i]; + } + + tracer.setOccupancy(inFlight); + + tracer.setOccupancy(inFlight); + + // Swap the queues for the next iteration. + electrons.queues.SwapActive(); + positrons.queues.SwapActive(); + gammas.queues.SwapActive(); + + // Check if only charged particles are left that are looping. + numElectrons = stats->inFlight[ParticleType::Electron]; + numPositrons = stats->inFlight[ParticleType::Positron]; + numGammas = stats->inFlight[ParticleType::Gamma]; + if (numElectrons == previousElectrons && numPositrons == previousPositrons && numGammas == 0) { + loopingNo++; + } else { + previousElectrons = numElectrons; + previousPositrons = numPositrons; + loopingNo = 0; + } + + } while (inFlight > 0 && loopingNo < 200); + + if (inFlight > 0) { + killed += inFlight; + for (int i = 0; i < ParticleType::NumParticleTypes; i++) { + ParticleType &pType = particles[i]; + int inFlightParticles = stats->inFlight[i]; + if (inFlightParticles == 0) { + continue; + } + + ClearQueue<<<1, 1, 0, stream>>>(pType.queues.currentlyActive); + } + COPCORE_CUDA_CHECK(cudaStreamSynchronize(stream)); + } + } + std::cout << "done!" << std::endl; + + auto time = timer.Stop(); + std::cout << "Run time: " << time << "\n"; + + // Transfer back scoring. + COPCORE_CUDA_CHECK(cudaMemcpy(globalScoring_host, globalScoring, sizeof(GlobalScoring), cudaMemcpyDeviceToHost)); + globalScoring_host->numKilled = killed; + + // Transfer back the scoring per volume (charged track length and energy deposit). + COPCORE_CUDA_CHECK(cudaMemcpy(scoringPerVolume_host->chargedTrackLength, scoringPerVolume_devPtrs.chargedTrackLength, + sizeof(double) * numPlaced, cudaMemcpyDeviceToHost)); + COPCORE_CUDA_CHECK(cudaMemcpy(scoringPerVolume_host->energyDeposit, scoringPerVolume_devPtrs.energyDeposit, + sizeof(double) * numPlaced, cudaMemcpyDeviceToHost)); + + // Free resources. + COPCORE_CUDA_CHECK(cudaFree(MCIndex_dev)); + COPCORE_CUDA_CHECK(cudaFree(chargedTrackLength)); + COPCORE_CUDA_CHECK(cudaFree(energyDeposit)); + + COPCORE_CUDA_CHECK(cudaFree(globalScoring)); + COPCORE_CUDA_CHECK(cudaFree(scoringPerVolume)); + COPCORE_CUDA_CHECK(cudaFree(stats_dev)); + COPCORE_CUDA_CHECK(cudaFreeHost(stats)); + COPCORE_CUDA_CHECK(cudaFree(slotManagerInit_dev)); + + COPCORE_CUDA_CHECK(cudaStreamDestroy(stream)); + + for (auto i = 0; i < 3; ++i) + COPCORE_CUDA_CHECK(cudaStreamDestroy(interactionStreams[i])); + + for (int i = 0; i < ParticleType::NumParticleTypes; i++) { + COPCORE_CUDA_CHECK(cudaFree(particles[i].tracks)); + COPCORE_CUDA_CHECK(cudaFree(particles[i].slotManager)); + + COPCORE_CUDA_CHECK(cudaFree(particles[i].queues.currentlyActive)); + COPCORE_CUDA_CHECK(cudaFree(particles[i].queues.nextActive)); + COPCORE_CUDA_CHECK(cudaFree(particles[i].interactions.queues[0])); + COPCORE_CUDA_CHECK(cudaFree(particles[i].interactions.queues[1])); + COPCORE_CUDA_CHECK(cudaFree(particles[i].interactions.queues[2])); + + COPCORE_CUDA_CHECK(cudaStreamDestroy(particles[i].stream)); + COPCORE_CUDA_CHECK(cudaEventDestroy(particles[i].event)); + } +}