diff --git a/CoMD/src-threaded/CMakeLists.txt b/CoMD/src-threaded/CMakeLists.txt index 4455544..bad7eb7 100644 --- a/CoMD/src-threaded/CMakeLists.txt +++ b/CoMD/src-threaded/CMakeLists.txt @@ -2,9 +2,13 @@ add_definitions(-DDOUBLE) set (CoMD_depends RAJA) +#find_package(caliper) +#set(CoMD_depends ${CoMD_depends} caliper) + if (ENABLE_MPI) add_definitions(-DDO_MPI) set (CoMD_depends ${CoMD_depends} MPI) + #set(CoMD_depends ${CoMD_depends} caliper-mpi) endif() if (ENABLE_OPENMP) diff --git a/CoMD/src-threaded/CoMD.cpp b/CoMD/src-threaded/CoMD.cpp index 68d1405..d8eba1d 100644 --- a/CoMD/src-threaded/CoMD.cpp +++ b/CoMD/src-threaded/CoMD.cpp @@ -62,6 +62,7 @@ #include "mycommand.h" #include "timestep.h" #include "constants.h" +#include #define REDIRECT_OUTPUT 0 #define MIN(A,B) ((A) < (B) ? (A) : (B)) @@ -95,6 +96,7 @@ static void sanityChecks(Command cmd, double cutoff, double latticeConst, char l int main(int argc, char** argv) { + //CALI_CXX_MARK_FUNCTION; // Prolog initParallel(&argc, &argv); profileStart(totalTimer); @@ -105,6 +107,10 @@ int main(int argc, char** argv) yamlAppInfo(screenOut); Command cmd = parseCommandLine(argc, argv); + // if cmd.seed is not supplied as argument, make sure it is the same on all nodes + bcastParallel(&cmd.seed, 1, 0); + srand(cmd.seed); + printCmdYaml(yamlFile, &cmd); printCmdYaml(screenOut, &cmd); @@ -270,7 +276,6 @@ SimFlat* initSimulation(Command cmd) } } - /* Create Local IndexSet View */ sim->isLocal = sim->isTotal->createSlice(0, sim->boxes->nLocalBoxes); sim->isLocalSegment = new RAJA::RangeSegment(0, sim->boxes->nLocalBoxes); @@ -315,22 +320,19 @@ SimFlat* initSimulation(Command cmd) */ #endif + // remove some atoms to create load-imbalance + performCutout(sim, cmd.holeCount, cmd.holeRadius); + // Forces must be computed before we call the time stepper. startTimer(redistributeTimer); redistributeAtoms(sim); stopTimer(redistributeTimer); - startTimer(computeForceTimer); computeForce(sim); stopTimer(computeForceTimer); -printf("init Force complete\n"); -#ifdef DO_CUDA - cudaStreamSynchronize(0); -#endif kineticEnergy(sim); -printf("init Energy complete\n"); return sim; } diff --git a/CoMD/src-threaded/initAtoms.cpp b/CoMD/src-threaded/initAtoms.cpp index 031dcb2..250e43e 100644 --- a/CoMD/src-threaded/initAtoms.cpp +++ b/CoMD/src-threaded/initAtoms.cpp @@ -14,6 +14,9 @@ #include "timestep.h" #include "memUtils.h" #include "performanceTimers.h" +#include "yamlOutput.h" + +#define MIN(A,B) ((A) < (B) ? (A) : (B)) static void computeVcm(SimFlat* s, real_t vcm[3]); @@ -220,3 +223,102 @@ void computeVcm(SimFlat* s, real_t vcm[3]) vcm[2] = vcmSum[2]/totalMass; } +int is_within_spheres(struct SimFlatSt* s, real3 *sphere_pos, const int count, const double radius, const real_t *pos) +{ + for (int i = 0; i < count; i++){ + real3 d2; + for (int dim=0; dim<3; dim++) { + real_t d = pos[dim] - sphere_pos[i][dim]; + d2[dim] = d*d; + + if (pos[dim] <= radius + s->domain->globalMin[dim]) { + d = pos[dim] - (sphere_pos[i][dim] - s->domain->globalExtent[dim]); + d2[dim] = MIN(d2[dim], d*d); + } + if (s->domain->globalMax[dim] - radius <= pos[dim]) { + d = (sphere_pos[i][dim] + s->domain->globalMax[dim]) - pos[dim]; + d2[dim] = MIN(d2[dim], d*d); + } + } + + if ( d2[0] + d2[1] + d2[2] <= radius * radius ) + return 1; + } + return 0; +} + + +void performCutout(struct SimFlatSt* s, int count, const double radius) +{ + int total = s->atoms->nLocal * getNRanks(); + + real3 sphere_pos[count]; + + for(int i = 0; idomain->globalExtent[0] * rand()/RAND_MAX + s->domain->globalMin[0]; + sphere_pos[i][1] = s->domain->globalExtent[1] * rand()/RAND_MAX + s->domain->globalMin[1]; + sphere_pos[i][2] = s->domain->globalExtent[2] * rand()/RAND_MAX + s->domain->globalMin[2]; +// printf("Center Coordinates: %f, %f %f\n", sphere_pos[i][0], sphere_pos[i][1], sphere_pos[i][2]); + } + + int removed = 0; + + for (int iBox = 0; iBox < s->boxes->nLocalBoxes; iBox++) + { + int nIBox = s->boxes->nAtoms[iBox]; + int iOff=MAXATOMS*iBox; + int ii = 0; + // loop over atoms in iBox + while (ii < s->boxes->nAtoms[iBox]) + { + if (is_within_spheres(s, sphere_pos, count, radius, s->atoms->r[iOff+ii])) + { + removed++; + deleteAtom(s, ii, iBox); + } + else + { + ii++; + } + } + + } + + // sum + int total_removed = 0; + addIntParallelRoot(&removed, &total_removed, 1, rankOfPrintRank()); + + if(printRank()) { + int total_notRemoved = total - total_removed; + + + fprintf(screenOut, + "Cutout holes:\n" + " fraction removed: %3.1f%\n" + " atoms_removed: %9d\n" + " atoms_before: %9d\n" + " atoms_left: %9d\n" + "\n" + , + ((real_t) total_removed * 100) / ((real_t) total), + total_removed, + total, + total_notRemoved + ); + + fprintf(yamlFile, + "Cutout holes:\n" + " fraction removed: %3.1f%\n" + " atoms_removed: %9d\n" + " atoms_before: %9d\n" + " atoms_left: %9d\n" + "\n" + , + ((real_t) total_removed * 100) / ((real_t) total), + total_removed, + total, + total_notRemoved + ); + } +} diff --git a/CoMD/src-threaded/initAtoms.h b/CoMD/src-threaded/initAtoms.h index b8241b0..73407ff 100644 --- a/CoMD/src-threaded/initAtoms.h +++ b/CoMD/src-threaded/initAtoms.h @@ -29,10 +29,11 @@ typedef struct AtomsSt /// Allocates memory to store atom data. Atoms* initAtoms(struct LinkCellSt* boxes); void destroyAtoms(struct AtomsSt* atoms); - void createFccLattice(int nx, int ny, int nz, real_t lat, struct SimFlatSt* s); void setVcm(struct SimFlatSt* s, real_t vcm[3]); void setTemperature(struct SimFlatSt* s, real_t temperature); void randomDisplacements(struct SimFlatSt* s, real_t delta); +void performCutout(struct SimFlatSt* s, const int count, const double radius); +int is_within_spheres(struct SimFlatSt* s, real3 *sphere_pos, const int count, const double radius, const real_t *pos); #endif diff --git a/CoMD/src-threaded/linkCells.cpp b/CoMD/src-threaded/linkCells.cpp index 5b1fa50..0dd5a99 100644 --- a/CoMD/src-threaded/linkCells.cpp +++ b/CoMD/src-threaded/linkCells.cpp @@ -488,4 +488,18 @@ void getTuple(LinkCell* boxes, int iBox, int* ixp, int* iyp, int* izp) *izp = iz; } - +/// delete one atom from its box +// \param iId atom index +// \param iBox linkCell index +void deleteAtom(struct SimFlatSt* s, int iId, int iBox) +{ + int n = --s->boxes->nAtoms[iBox]; +//printf("atoms: %d, aID: %d, box id: %d\n",tt, iId, iBox); + s->atoms->nLocal--; + + // use the last atom to fill the hole caused by removing atom + if (n > 0 && n != iId) + { + copyAtom(s->boxes, s->atoms, n, iBox, iId, iBox); + } +} diff --git a/CoMD/src-threaded/linkCells.h b/CoMD/src-threaded/linkCells.h index 21047dc..96aca56 100644 --- a/CoMD/src-threaded/linkCells.h +++ b/CoMD/src-threaded/linkCells.h @@ -53,6 +53,6 @@ COMD_HOST_DEVICE void moveAtom(LinkCell* boxes, struct AtomsSt* atoms, int iId, void updateLinkCells(LinkCell* boxes, struct AtomsSt* atoms); int maxOccupancy(LinkCell* boxes); - +void deleteAtom(struct SimFlatSt* s, int iId, int iBox); #endif diff --git a/CoMD/src-threaded/mycommand.cpp b/CoMD/src-threaded/mycommand.cpp index 11cfc26..1b4c948 100644 --- a/CoMD/src-threaded/mycommand.cpp +++ b/CoMD/src-threaded/mycommand.cpp @@ -5,6 +5,7 @@ #include #include +#include #include "cmdLineParser.h" #include "parallel.h" @@ -43,6 +44,9 @@ /// | \--lat | -l | -1 | lattice parameter (Angstroms) /// | \--temp | -T | 600 | initial temperature (K) /// | \--delta | -r | 0 | initial delta (Angstroms) +/// | \--seed | -S | N/A | seed for random generator +/// | \--holeRadius | -R | 10.0 | radius for cutout holes (Angstroms) +/// | \--holeCount | -C | 0 | number of cutout holes /// /// Notes: /// @@ -206,6 +210,9 @@ Command parseCommandLine(int argc, char** argv) cmd.lat = -1.0; cmd.temperature = 600.0; cmd.initialDelta = 0.0; + cmd.seed = (int) time(NULL); + cmd.holeRadius = 10.0; + cmd.holeCount = 0; int help=0; // add arguments for processing. Please update the html documentation too! @@ -226,6 +233,9 @@ Command parseCommandLine(int argc, char** argv) addArg("lat", 'l', 1, 'd', &(cmd.lat), 0, "lattice parameter (Angstroms)"); addArg("temp", 'T', 1, 'd', &(cmd.temperature), 0, "initial temperature (K)"); addArg("delta", 'r', 1, 'd', &(cmd.initialDelta), 0, "initial delta (Angstroms)"); + addArg("seed", 'S', 1, 'i', &(cmd.seed), 0, "seed for random generator"); + addArg("holeRadius", 'R', 1, 'd', &(cmd.holeRadius), 0, "radius for cutout holes (Angstroms)"); + addArg("holeCount", 'C', 1, 'i', &(cmd.holeCount), 0, "nuber of cutout holes"); processArgs(argc,argv); @@ -271,6 +281,9 @@ void printCmdYaml(FILE* file, Command* cmd) " Time step: %g fs\n" " Initial Temperature: %g K\n" " Initial Delta: %g Angstroms\n" + " seed: %d\n" + " holeRadius: %g # Angstroms\n" + " holeCount: %d\n" "\n", cmd->doeam, cmd->potDir, @@ -283,7 +296,10 @@ void printCmdYaml(FILE* file, Command* cmd) cmd->printRate, cmd->dt, cmd->temperature, - cmd->initialDelta + cmd->initialDelta, + cmd->seed, + cmd->holeRadius, + cmd->holeCount ); fflush(file); } diff --git a/CoMD/src-threaded/mycommand.h b/CoMD/src-threaded/mycommand.h index 68bb434..522a73a 100644 --- a/CoMD/src-threaded/mycommand.h +++ b/CoMD/src-threaded/mycommand.h @@ -26,6 +26,9 @@ typedef struct CommandSt double lat; //!< lattice constant (in Angstroms) double temperature; //!< simulation initial temperature (in Kelvin) double initialDelta; //!< magnitude of initial displacement from lattice (in Angstroms) + int seed; //!< seed for random generator + double holeRadius; //!< radius for cutout holes + int holeCount; //!< number of cutout holes } Command; /// Process command line arguments into an easy to handle structure. diff --git a/CoMD/src-threaded/parallel.cpp b/CoMD/src-threaded/parallel.cpp index 63f9f77..f60c1a2 100644 --- a/CoMD/src-threaded/parallel.cpp +++ b/CoMD/src-threaded/parallel.cpp @@ -28,6 +28,8 @@ static int nRanks = 1; #endif +#define PRINTRANK 0 + int getNRanks() { return nRanks; @@ -48,6 +50,11 @@ int printRank() return 0; } +int rankOfPrintRank() +{ + return PRINTRANK; +} + void timestampBarrier(const char* msg) { barrierParallel(); @@ -120,6 +127,16 @@ void addIntParallel(int* sendBuf, int* recvBuf, int count) #endif } +void addIntParallelRoot(int* sendBuf, int* recvBuf, int count, int root) +{ +#ifdef DO_MPI + MPI_Reduce(sendBuf, recvBuf, count, MPI_INT, MPI_SUM, root, MPI_COMM_WORLD); +#else + for (int ii=0; ii