From 86016edf2d6b98c23adc0c960820f07e2f56a707 Mon Sep 17 00:00:00 2001 From: "Kenneth E. Jansen" Date: Mon, 31 Jul 2023 15:58:49 -0600 Subject: [PATCH 01/68] starting with a merge of work done to bring CGNS to compile with C++11 --- CMakeLists.txt | 26 ++++++++++++++--------- apf/apfCGNS.cc | 52 +++++++++++++++++++++++++++++----------------- mds/mdsCGNS.cc | 14 ++++++------- test/cgns.cc | 2 +- test/testing.cmake | 2 +- 5 files changed, 58 insertions(+), 38 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 7648e54d3..395bc43d8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -16,9 +16,7 @@ option(USE_XSDK_DEFAULTS "enable the XDSK v0.3.0 default configuration" NO) #requre c++11 without extensions set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_CXX_EXTENSION OFF) -if(NOT ENABLE_CGNS) - set(CMAKE_CXX_STANDARD 11) -endif() +set(CMAKE_CXX_STANDARD 11) xsdk_begin_package() bob_begin_package() @@ -27,8 +25,7 @@ if(USE_XSDK_DEFAULTS) xsdk_compiler_flags() endif() -# require c++14 -option(ENABLE_CGNS "Enable the CGNS reader: requires c++14 extensions" OFF) +option(ENABLE_CGNS "Enable the CGNS reader" OFF) message(STATUS "ENABLE_CGNS: ${ENABLE_CGNS}") # Set some default compiler flags that should always be used @@ -37,10 +34,7 @@ if(NOT USE_XSDK_DEFAULTS) bob_begin_cxx_flags() bob_end_cxx_flags() set(CMAKE_C_FLAGS "${CMAKE_CXX_FLAGS}") - if(ENABLE_CGNS) #takes precedence over SCOREC_ENABLE_CXX11 - message(STATUS "enabling cxx14") - bob_cxx14_flags() - elseif(SCOREC_ENABLE_CXX11) + if(SCOREC_ENABLE_CXX11) bob_cxx11_flags() endif() endif() @@ -60,6 +54,8 @@ message(STATUS "IS_TESTING: ${IS_TESTING}") set(MESHES "${CMAKE_SOURCE_DIR}/pumi-meshes" CACHE STRING "Directory of test meshes") message(STATUS "MESHES: ${MESHES}") +get_filename_component(MESHES ${MESHES} ABSOLUTE) +message(STATUS "Using absolute file path MESHES: ${MESHES}") option(BUILD_EXES "Build executables" ON) message(STATUS "BUILD_EXES: ${BUILD_EXES}") @@ -142,6 +138,14 @@ if(ENABLE_CGNS) set(SCOREC_USE_HDF5_DEFAULT ${ENABLE_CGNS}) bob_public_dep(HDF5) add_definitions(-DHAVE_CGNS) +else() + set(SCOREC_USE_CGNS_DEFAULT ${ENABLE_CGNS}) + bob_public_dep(CGNS) + #CGNS does not provide cmake targets :( + include_directories(SYSTEM ${CGNS_INCLUDE_DIR}) + set(SCOREC_USE_HDF5_DEFAULT ${ENABLE_CGNS}) + bob_public_dep(HDF5) + add_definitions(-DHAVE_CGNS) endif() # Include the SCOREC project packages @@ -172,8 +176,10 @@ add_library(core INTERFACE) target_link_libraries(core INTERFACE ${SCOREC_EXPORTED_TARGETS}) if(ENABLE_CGNS) target_link_libraries(core INTERFACE ${CMAKE_DL_LIBS}) #HDF5 uses dlopen - target_compile_features(core INTERFACE cxx_std_14) + # target_compile_features(core INTERFACE cxx_std_14) + target_compile_features(core INTERFACE cxx_std_11) else() + target_link_libraries(core INTERFACE ${CMAKE_DL_LIBS}) #HDF5 uses dlopen target_compile_features(core INTERFACE cxx_std_11) endif() scorec_export_library(core) diff --git a/apf/apfCGNS.cc b/apf/apfCGNS.cc index 4debbb58b..d76757be0 100644 --- a/apf/apfCGNS.cc +++ b/apf/apfCGNS.cc @@ -11,6 +11,7 @@ #include "apfNumberingClass.h" #include "apfShape.h" #include "apfFieldData.h" +#include #include #include // @@ -269,9 +270,16 @@ void WriteTags(const CGNS &cgns, const std::vector> &orderedEnts, const std::vector> &ranges, const std::vector &orderedVertices, const int &vStart, const int &vEnd, apf::Mesh *m) + +typedef std::vector VecMeshEntity_t; +typedef std::pair CGRange_t; + +void WriteFields(const CGNS &cgns, const std::vector &orderedEnts, const std::vector &ranges, const VecMeshEntity_t &orderedVertices, const int &vStart, const int &vEnd, apf::Mesh *m) { - const auto writeField = [&m, &cgns](apf::Field *f, const auto &orderedEnts, const int &solIndex, const auto &inner, const auto &post, const int &numComponents, const int &component, const std::string &fieldName, const int &start, const int &end, int &fieldIndex) { + typedef std::function *fieldData, std::vector &ddata, const int &numComponents, const int &component)> innerLambda_t; + typedef std::function &ddata, const cgsize_t *rmin, const cgsize_t *rmax, const int &globalSize, const int &fieldIndex)> postLambda_t; + + const auto writeField = [&m, &cgns](apf::Field *f, const VecMeshEntity_t &orderedEnt, const int &solIndex, const innerLambda_t &inner, const postLambda_t &post, const int &numComponents, const int &component, const std::string &fieldName, const int &start, const int &end, int &fieldIndex) { std::vector data; cgsize_t rmin[3]; @@ -281,7 +289,7 @@ void WriteFields(const CGNS &cgns, const std::vector *fieldData = f->getData(); - for (const auto &e : orderedEnts) + for (const auto &e : orderedEnt) { if (fieldData->hasEntity(e) && m->isOwned(e)) { @@ -310,7 +318,7 @@ void WriteFields(const CGNS &cgns, const std::vector &orderedEnts, const int &solIndex, const innerLambda_t &inner, const postLambda_t &post, const std::vector &ranges) { for (int i = 0; i < m->countFields(); ++i) { apf::Field *f = m->getField(i); @@ -335,7 +343,7 @@ void WriteFields(const CGNS &cgns, const std::vectorcountFields(); ++i) { apf::Field *f = m->getField(i); @@ -352,12 +360,12 @@ void WriteFields(const CGNS &cgns, const std::vector &ddata, const cgsize_t *rmin, const cgsize_t *rmax, const int &globalSize, const int &fieldIndex) { + const postLambda_t postLambda = [&cgns](const int &solIndex, std::vector &ddata, const cgsize_t *rmin, const cgsize_t *rmax, const int &globalSize, const int &fieldIndex) { if (globalSize > 0) { if (cgp_field_write_data(cgns.index, cgns.base, cgns.zone, solIndex, fieldIndex, &rmin[0], &rmax[0], @@ -366,7 +374,7 @@ void WriteFields(const CGNS &cgns, const std::vector *fieldData, std::vector &ddata, const int &numComponents, const int &component) { + const innerLambda_t innerLambda = [](apf::MeshEntity *elem, apf::FieldDataOf *fieldData, std::vector &ddata, const int &numComponents, const int &component) { std::vector vals(numComponents, -12345); fieldData->get(elem, vals.data()); //std::cout << numComponents << " " << component << " " << vals[0] << std::endl; @@ -390,7 +398,7 @@ void WriteFields(const CGNS &cgns, const std::vector WriteVertices(const CGNS &cgns, apf::Mesh *m, apf::GlobalNumbering *gvn) { int Cx = -1; int Cy = -1; @@ -412,7 +420,7 @@ auto WriteVertices(const CGNS &cgns, apf::Mesh *m, apf::GlobalNumbering *gvn) cgp_error_exit(); } - std::vector orderedVertices; + VecMeshEntity_t orderedVertices; cgsize_t vertexMin[3]; cgsize_t vertexMax[3]; cgsize_t contigRange = -1; @@ -574,7 +582,10 @@ CellElementReturn WriteElements(const CGNS &cgns, apf::Mesh *m, apf::GlobalNumbe void AddBocosToMainBase(const CGNS &cgns, const CellElementReturn &cellResults, const int &cellCount, apf::Mesh *m, const apf::CGNSBCMap &cgnsBCMap, const std::map &apf2cgns, apf::GlobalNumbering *gvn) { - const auto EdgeLoop = [&m](const auto &lambda, apf::MeshTag *edgeTag) { + typedef std::function LambdaMeshEntity_t; + typedef std::vector VecCGNSInfo_t; + + const auto EdgeLoop = [&m](const LambdaMeshEntity_t &lambda, apf::MeshTag *edgeTag) { apf::MeshIterator *edgeIter = m->begin(1); apf::MeshEntity *edge = nullptr; int vals[1]; @@ -591,7 +602,7 @@ void AddBocosToMainBase(const CGNS &cgns, const CellElementReturn &cellResults, m->end(edgeIter); }; - const auto FaceLoop = [&m](const auto &lambda, apf::MeshTag *faceTag) { + const auto FaceLoop = [&m](const LambdaMeshEntity_t &lambda, apf::MeshTag *faceTag) { apf::MeshIterator *faceIter = m->begin(2); apf::MeshEntity *face = nullptr; int vals[1]; @@ -608,7 +619,8 @@ void AddBocosToMainBase(const CGNS &cgns, const CellElementReturn &cellResults, m->end(faceIter); }; - const auto BCEntityAdder = [&apf2cgns, &m, &cgns, &gvn](const auto &Looper, const auto &bcGroup, int &startingLocation) { + + const auto BCEntityAdder = [&apf2cgns, &m, &cgns, &gvn](const std::function &Looper, const apf::CGNSInfo &bcGroup, int &startingLocation) { std::map> bcEntTypes; for (const auto &r : apf2cgns) bcEntTypes.insert(std::make_pair(r.first, std::vector())); @@ -715,7 +727,9 @@ void AddBocosToMainBase(const CGNS &cgns, const CellElementReturn &cellResults, PCU_Get_Comm()); }; - const auto doVertexBC = [&](const auto &iter) { + typedef std::map, std::vector>::const_iterator MapCGNSInfo_t; + + const auto doVertexBC = [&](const MapCGNSInfo_t &iter) { for (const auto &p : iter->second) { std::vector bcList; @@ -751,7 +765,7 @@ void AddBocosToMainBase(const CGNS &cgns, const CellElementReturn &cellResults, } }; - const auto doEdgeBC = [&](const auto &iter, int &startingLocation) { + const auto doEdgeBC = [&](const MapCGNSInfo_t &iter, int &startingLocation) { for (const auto &p : iter->second) { const auto se = BCEntityAdder(EdgeLoop, p, startingLocation); @@ -774,7 +788,7 @@ void AddBocosToMainBase(const CGNS &cgns, const CellElementReturn &cellResults, } }; - const auto doFaceBC = [&](const auto &iter, int &startingLocation) { + const auto doFaceBC = [&](const MapCGNSInfo_t &iter, int &startingLocation) { for (const auto &p : iter->second) { const auto se = BCEntityAdder(FaceLoop, p, startingLocation); @@ -797,7 +811,7 @@ void AddBocosToMainBase(const CGNS &cgns, const CellElementReturn &cellResults, } }; - const auto doCellBC = [&](const auto &iter, const int &) { + const auto doCellBC = [&](const MapCGNSInfo_t &iter, const int &) { for (const auto &p : iter->second) { std::vector bcList; @@ -1051,11 +1065,11 @@ void WriteCGNS(const char *prefix, apf::Mesh *m, const apf::CGNSBCMap &cgnsBCMap auto communicator = PCU_Get_Comm(); cgp_mpi_comm(communicator); // - cgp_pio_mode(CGNS_ENUMV(CGP_INDEPENDENT)); + cgp_pio_mode(CGP_INDEPENDENT); CGNS cgns; cgns.fname = std::string(prefix); - if (cgp_open(prefix, CGNS_ENUMV(CG_MODE_WRITE), &cgns.index)) + if (cgp_open(prefix, CG_MODE_WRITE, &cgns.index)) cgp_error_exit(); { diff --git a/mds/mdsCGNS.cc b/mds/mdsCGNS.cc index acdef4aae..0d24e67ec 100644 --- a/mds/mdsCGNS.cc +++ b/mds/mdsCGNS.cc @@ -177,19 +177,19 @@ struct MeshDataGroup if (components.size() == 1) { std::cout << "Scalar Group has " << components.size() << " related componenets: " << std::endl; - for (const auto m : components) + for (const auto &m : components) std::cout << "Field " << m.second.name << " @ " << m.second.si << " " << m.second.fi << std::endl; } else if (components.size() == 3) { std::cout << "Vector Group has " << components.size() << " related componenets: " << std::endl; - for (const auto m : components) + for (const auto &m : components) std::cout << "Field " << m.second.name << " @ " << m.second.si << " " << m.second.fi << std::endl; } else if (components.size() == 9) { std::cout << "Matrix Group has " << components.size() << " related componenets: " << std::endl; - for (const auto m : components) + for (const auto &m : components) std::cout << "Field " << m.second.name << " @ " << m.second.si << " " << m.second.fi << std::endl; } else @@ -265,7 +265,7 @@ void Kill(const int fid) } } -auto ReadCGNSCoords(int cgid, int base, int zone, int ncoords, int nverts, const std::vector &, const apf::GlobalToVert &globalToVert) +std::map> ReadCGNSCoords(int cgid, int base, int zone, int ncoords, int nverts, const std::vector &, const apf::GlobalToVert &globalToVert) { // Read min required as defined by consecutive range // make one based as ReadElements makes zero based @@ -389,7 +389,7 @@ void SimpleElementPartition(std::vector &numberToReadPerProc, std::vec using Pair = std::pair; using LocalElementRanges = std::vector; // one based -auto ReadElements(int cgid, int base, int zone, int section, int el_start /* one based */, int el_end, int numElements, int verticesPerElement, LocalElementRanges &localElementRanges) +std::tuple, cgsize_t> ReadElements(int cgid, int base, int zone, int section, int el_start /* one based */, int el_end, int numElements, int verticesPerElement, LocalElementRanges &localElementRanges) { std::vector numberToReadPerProc; std::vector startingIndex; @@ -1056,8 +1056,8 @@ apf::Mesh2 *DoIt(gmi_model *g, const std::string &fname, apf::CGNSBCMap &cgnsBCM int cgid = -1; auto comm = PCU_Get_Comm(); cgp_mpi_comm(comm); - cgp_pio_mode(CGNS_ENUMV(CGP_INDEPENDENT)); - cgp_open(fname.c_str(), CGNS_ENUMV(CG_MODE_READ), &cgid); + cgp_pio_mode(CGP_INDEPENDENT); + cgp_open(fname.c_str(), CG_MODE_READ, &cgid); int nbases = -1; cg_nbases(cgid, &nbases); diff --git a/test/cgns.cc b/test/cgns.cc index 6f853d2e0..a3c57d777 100644 --- a/test/cgns.cc +++ b/test/cgns.cc @@ -119,7 +119,7 @@ pMesh toPumi(const std::string &prefix, gmi_model *g, apf::Mesh2 *mesh) return pm; } -auto additional(const std::string &prefix, gmi_model *g, apf::Mesh2 *mesh) +std::function additional(const std::string &prefix, gmi_model *g, apf::Mesh2 *mesh) { // seems essential to make pm first before calling balance or reorder... auto pm = toPumi(prefix, g, mesh); diff --git a/test/testing.cmake b/test/testing.cmake index 069006a26..ed5c92888 100644 --- a/test/testing.cmake +++ b/test/testing.cmake @@ -560,7 +560,7 @@ mpi_test(cgns_3d_2 ${numProcs} # # 3D BCS tests # -set(numProcs 5) +set(numProcs 4) # set(CGNSDIR ${MESHES}/cgns/withBCS/3D) # From 7aa1eb91273e247487b785eec913a42526f91ce6 Mon Sep 17 00:00:00 2001 From: "Kenneth E. Jansen" Date: Sat, 5 Aug 2023 07:56:13 -0600 Subject: [PATCH 02/68] baby step of copying phGeomBC.cc to phCGNSgbc.cc and makeing the mods to compute a flat connectivity array transposed to CGNS needs and the same transpose plus reduction from volume connectivity to surface connectivity for boundary elements. Compiles but not tested as we still need to modify the actual writing function in this file open and write a CGNS file. Further, nothing done yet for parallel with regard to global numbering --- phasta/CMakeLists.txt | 1 + phasta/phCGNSgbc.cc | 173 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 174 insertions(+) create mode 100644 phasta/phCGNSgbc.cc diff --git a/phasta/CMakeLists.txt b/phasta/CMakeLists.txt index 0a785e268..4e5ff54ab 100644 --- a/phasta/CMakeLists.txt +++ b/phasta/CMakeLists.txt @@ -6,6 +6,7 @@ set(SOURCES phOutput.cc phLinks.cc phGeomBC.cc + phCGNSgbc.cc phBlock.cc phAdapt.cc phRestart.cc diff --git a/phasta/phCGNSgbc.cc b/phasta/phCGNSgbc.cc new file mode 100644 index 000000000..d455096d2 --- /dev/null +++ b/phasta/phCGNSgbc.cc @@ -0,0 +1,173 @@ +#include +#include "phOutput.h" +#include "phIO.h" +#include "phiotimer.h" +#include +#include +#include +#include + +namespace ph { + +// renamed, retained but not yet updated +static std::string buildCGNSgbcFileName(std::string timestep_or_dat) +{ + std::stringstream ss; + int rank = PCU_Comm_Self() + 1; + ss << "geombc." << timestep_or_dat << "." << rank; + return ss.str(); +} + +enum { + MAX_PARAMS = 12 +}; + +// renamed, update is only a transpose to match CNGS. Parallel will require mapping here or later to global numbering +void getInteriorConnectivityCGNS(Output& o, int block, apf::DynamicArray& c) +{ + int nelem = o.blocks.interior.nElements[block]; + int nvert = o.blocks.interior.keys[block].nElementVertices; + c.setSize(nelem * nvert); + size_t i = 0; + for (int elem = 0; elem < nelem; ++elem) + for (int vert = 0; vert < nvert; ++vert) + c[i++] = o.arrays.ien[block][elem][vert] + 1; /* FORTRAN indexing */ + PCU_ALWAYS_ASSERT(i == c.getSize()); +} + +//renamed, update is both a transpose to match CNGS and reduction to only filling the first number of vertices on the boundary whereas PHAST wanted full volume +void getBoundaryConnectivityCGNS(Output& o, int block, apf::DynamicArray& c) +{ + int nelem = o.blocks.boundary.nElements[block]; +// CGNS wants surface elements int nvert = o.blocks.boundary.keys[block].nElementVertices; + int nvert = o.blocks.boundary.keys[block].nBoundaryFaceEdges; + c.setSize(nelem * nvert); + size_t i = 0; + for (int elem = 0; elem < nelem; ++elem) + for (int vert = 0; vert < nvert; ++vert) + c[i++] = o.arrays.ienb[block][elem][vert] + 1; + PCU_ALWAYS_ASSERT(i == c.getSize()); +} + +void getInterfaceConnectivityCGNS // not extended yet other than transpose +( + Output& o, + int block, + apf::DynamicArray& c +) +{ + int nelem = o.blocks.interface.nElements[block]; + int nvert0 = o.blocks.interface.keys[block].nElementVertices; + int nvert1 = o.blocks.interface.keys[block].nElementVertices1; + c.setSize(nelem * (nvert0 + nvert1)); + size_t i = 0; + for (int elem = 0; elem < nelem; ++elem) + for (int vert = 0; vert < nvert0; ++vert) + c[i++] = o.arrays.ienif0[block][elem][vert] + 1; + for (int elem = 0; elem < nelem; ++elem) + for (int vert = 0; vert < nvert1; ++vert) + c[i++] = o.arrays.ienif1[block][elem][vert] + 1; + PCU_ALWAYS_ASSERT(i == c.getSize()); +} + +// renamed but not updated yet +void getNaturalBCCodesCGNS(Output& o, int block, apf::DynamicArray& codes) +{ + int nelem = o.blocks.boundary.nElements[block]; + codes.setSize(nelem * 2); + size_t i = 0; + for (int j = 0; j < 2; ++j) + for (int elem = 0; elem < nelem; ++elem) + codes[i++] = o.arrays.ibcb[block][elem][j]; + PCU_ALWAYS_ASSERT(i == codes.getSize()); +} + +// renamed and calling the renamed functions above with output writes commented as they are PHASTA file style +void writeBlocksCGNS(FILE* f, Output& o) +{ + apf::DynamicArray c; + int params[MAX_PARAMS]; + for (int i = 0; i < o.blocks.interior.getSize(); ++i) { + BlockKey& k = o.blocks.interior.keys[i]; + std::string phrase = getBlockKeyPhrase(k, "connectivity interior "); + params[0] = o.blocks.interior.nElements[i]; +// fillBlockKeyParams(params, k); + getInteriorConnectivityCGNS(o, i, c); +// ph_write_ints(f, phrase.c_str(), &c[0], c.getSize(), 7, params); + } + for (int i = 0; i < o.blocks.boundary.getSize(); ++i) { + BlockKey& k = o.blocks.boundary.keys[i]; + std::string phrase = getBlockKeyPhrase(k, "connectivity boundary "); + params[0] = o.blocks.boundary.nElements[i]; +// fillBlockKeyParams(params, k); + getBoundaryConnectivityCGNS(o, i, c); +// ph_write_ints(f, phrase.c_str(), &c[0], c.getSize(), 8, params); +// this is probably the easiest path to getting the list that tells us the face (through surfID of smd) that each boundary element face is on + phrase = getBlockKeyPhrase(k, "nbc codes "); + apf::DynamicArray codes; + getNaturalBCCodesCGNS(o, i, codes); +// ph_write_ints(f, phrase.c_str(), &codes[0], codes.getSize(), 8, params); + } + +} + + + +// retaining in case it is useful but only renamed at this point +void writeCGNSgbc(Output& o, std::string path, int timestep) +{ + double t0 = PCU_Time(); + apf::Mesh* m = o.mesh; + std::stringstream tss; + std::string timestep_or_dat; + if (! timestep) + timestep_or_dat = "dat"; + else { + tss << timestep; + timestep_or_dat = tss.str(); + } + path += buildCGNSgbcFileName(timestep_or_dat); + phastaio_setfile(GEOMBC_WRITE); + FILE* f = o.openfile_write(o, path.c_str()); + if (!f) { + lion_eprint(1,"failed to open \"%s\"!\n", path.c_str()); + abort(); + } + ph_write_preamble(f); + int params[MAX_PARAMS]; +/* all of these strings are looked for by the other programs + reading this format, so don't fix spelling errors or + other silliness, it has already been set in stone */ +/* + writeInt(f, "number of nodes", m->count(0)); + writeInt(f, "number of modes", o.nOverlapNodes); + writeInt(f, "number of shapefunctions soved on processor", 0); + writeInt(f, "number of global modes", 0); + writeInt(f, "number of interior elements", m->count(m->getDimension())); + writeInt(f, "number of boundary elements", o.nBoundaryElements); + writeInt(f, "maximum number of element nodes", o.nMaxElementNodes); + writeInt(f, "number of interior tpblocks", o.blocks.interior.getSize()); + writeInt(f, "number of boundary tpblocks", o.blocks.boundary.getSize()); + writeInt(f, "number of nodes with Dirichlet BCs", o.nEssentialBCNodes); + + params[0] = m->count(0); + params[1] = 3; + ph_write_doubles(f, "co-ordinates", o.arrays.coordinates, + params[0] * params[1], 2, params); + writeInt(f, "number of processors", PCU_Comm_Peers()); + writeInt(f, "size of ilwork array", o.nlwork); + params[0] = m->count(0); + writeInts(f, " mode number map from partition to global", + o.arrays.globalNodeNumbers, m->count(0)); + writeBlocksCGNS(f, o); + writeInts(f, "bc mapping array", o.arrays.nbc, m->count(0)); + writeInts(f, "bc codes array", o.arrays.ibc, o.nEssentialBCNodes); + apf::DynamicArray bc; + PHASTAIO_CLOSETIME(fclose(f);) + double t1 = PCU_Time(); + if (!PCU_Comm_Self()) + lion_oprint(1,"geombc file written in %f seconds\n", t1 - t0); +*/ +} + +} From 07e45c4e4fb04f1799a78aacb2ee045be27f1184 Mon Sep 17 00:00:00 2001 From: "Kenneth E. Jansen" Date: Sat, 5 Aug 2023 15:27:08 -0600 Subject: [PATCH 03/68] compiles with code to generate PETSc-style-global-node-number =ncorp[on-rank-node-number] --- phasta/phCGNSgbc.cc | 173 +++++++++++++++++++++++++++++++++++++++++++- phasta/phOutput.h | 2 + 2 files changed, 174 insertions(+), 1 deletion(-) diff --git a/phasta/phCGNSgbc.cc b/phasta/phCGNSgbc.cc index d455096d2..56b420ebe 100644 --- a/phasta/phCGNSgbc.cc +++ b/phasta/phCGNSgbc.cc @@ -6,9 +6,178 @@ #include #include #include +#include +#include +typedef int lcorp_t; +#define NCORP_MPI_T MPI_INTEGER +typedef long long int gcorp_t; namespace ph { + +static lcorp_t count_owned(int* ilwork, int nlwork,gcorp_t* ncorp_tmp, int num_nodes); +static lcorp_t count_local(int* ilwork, int nlwork,gcorp_t* ncorp_tmp, int num_nodes); + + +void gen_ncorp(Output& o) +{ + apf::Mesh* m = o.mesh; + int part; + int num_parts; + int i; + lcorp_t nilwork = o.nlwork; + int num_nodes=m->count(0); + o.arrays.ncorp = new gcorp_t[num_nodes]; + lcorp_t owned; + lcorp_t local; + lcorp_t* owner_counts; + gcorp_t local_start_id; + gcorp_t gid; + + MPI_Comm_rank(MPI_COMM_WORLD, &part); + MPI_Comm_size(MPI_COMM_WORLD, &num_parts); + + memset(o.arrays.ncorp, 0, sizeof(gcorp_t)*(num_nodes)); + owned = count_owned(o.arrays.ilwork, nilwork, o.arrays.ncorp, num_nodes); + local = count_local(o.arrays.ilwork, nilwork, o.arrays.ncorp, num_nodes); + // conpar.iownnodes = owned+local; +#ifdef PRINT_EVERYTHING + printf("%d: %d local only nodes\n", part, local); + printf("%d: %d owned nodes\n", part, owned); +#endif + assert( owned <= num_nodes ); + assert( owned+local <= num_nodes ); + + owner_counts = (lcorp_t*) malloc(sizeof(lcorp_t)*num_parts); + memset(owner_counts, 0, sizeof(lcorp_t)*num_parts); + owner_counts[part] = owned+local; +#ifdef PRINT_EVERYTHING + for(i=0;i=0); + for(i=0;i=0); + +// global so needs long long + gid++; + continue; + } + if(o.arrays.ncorp[i] == 0) + { + o.arrays.ncorp[i] = gid; + assert(o.arrays.ncorp[i]>=0); + gid++; + continue; + } + if(o.arrays.ncorp[i] == -1) + { + o.arrays.ncorp[i] = 0; //commu() adds, so zero slaves + } + + } + //char code[] = "out"; + //int ione = 1; + +} + +static lcorp_t count_local(int* ilwork, int nlwork,gcorp_t* ncorp_tmp, int num_nodes) +{ + int i; + lcorp_t num_local = 0; + for(i=0;i 1)); + } + return(num_local); +} +static lcorp_t count_owned(int* ilwork, int nlwork,gcorp_t* ncorp_tmp, int num_nodes) +{ + int numtask = ilwork[0]; + int itkbeg = 0; //task offset + int owned = 0; + int i,j,k; + for(i=0;i= 0 && iacc <= 1); + int iother = ilwork[itkbeg+3]-1; //other rank (see ctypes.f for off by one) + int numseg = ilwork[itkbeg+4]; //number of segments + for(j=0;j Date: Sat, 5 Aug 2023 15:40:15 -0600 Subject: [PATCH 04/68] possibly correct (compiles a least) of connectivity data to global numbering --- phasta/phCGNSgbc.cc | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/phasta/phCGNSgbc.cc b/phasta/phCGNSgbc.cc index 56b420ebe..2bc6b8e25 100644 --- a/phasta/phCGNSgbc.cc +++ b/phasta/phCGNSgbc.cc @@ -200,7 +200,7 @@ void getInteriorConnectivityCGNS(Output& o, int block, apf::DynamicArray& c size_t i = 0; for (int elem = 0; elem < nelem; ++elem) for (int vert = 0; vert < nvert; ++vert) - c[i++] = o.arrays.ien[block][elem][vert] + 1; /* FORTRAN indexing */ + c[i++] = o.arrays.ncorp[o.arrays.ien[block][elem][vert]]; // plus 1 built in + 1; /* FORTRAN indexing */ PCU_ALWAYS_ASSERT(i == c.getSize()); } @@ -214,7 +214,7 @@ void getBoundaryConnectivityCGNS(Output& o, int block, apf::DynamicArray& c size_t i = 0; for (int elem = 0; elem < nelem; ++elem) for (int vert = 0; vert < nvert; ++vert) - c[i++] = o.arrays.ienb[block][elem][vert] + 1; + c[i++] = o.arrays.ncorp[o.arrays.ienb[block][elem][vert]]; // plus 1 built in + 1; PCU_ALWAYS_ASSERT(i == c.getSize()); } @@ -232,10 +232,10 @@ void getInterfaceConnectivityCGNS // not extended yet other than transpose size_t i = 0; for (int elem = 0; elem < nelem; ++elem) for (int vert = 0; vert < nvert0; ++vert) - c[i++] = o.arrays.ienif0[block][elem][vert] + 1; + c[i++] = o.arrays.ncorp[o.arrays.ienif0[block][elem][vert]]; // plus 1 built in + 1; for (int elem = 0; elem < nelem; ++elem) for (int vert = 0; vert < nvert1; ++vert) - c[i++] = o.arrays.ienif1[block][elem][vert] + 1; + c[i++] = o.arrays.ncorp[o.arrays.ienif1[block][elem][vert]]; // plus 1 built in + 1; PCU_ALWAYS_ASSERT(i == c.getSize()); } From 81a707b8c64a339abe476c9a060f9928862a4f2a Mon Sep 17 00:00:00 2001 From: "Kenneth E. Jansen" Date: Sat, 5 Aug 2023 16:56:25 -0600 Subject: [PATCH 05/68] Cleaned up a few bugs, added useful numbers to the o data structure for doing the owned-node data condensation (coordinates and later solution), and wrote a potential coordinate condensation (though it might be better to change it to what the PETSc CGNS writer does...looked at that too late to copy in first pass --- phasta/phCGNSgbc.cc | 34 ++++++++++++++++++++++++++++------ phasta/phOutput.h | 5 ++++- 2 files changed, 32 insertions(+), 7 deletions(-) diff --git a/phasta/phCGNSgbc.cc b/phasta/phCGNSgbc.cc index 2bc6b8e25..8ca313683 100644 --- a/phasta/phCGNSgbc.cc +++ b/phasta/phCGNSgbc.cc @@ -19,7 +19,7 @@ static lcorp_t count_owned(int* ilwork, int nlwork,gcorp_t* ncorp_tmp, int num_n static lcorp_t count_local(int* ilwork, int nlwork,gcorp_t* ncorp_tmp, int num_nodes); -void gen_ncorp(Output& o) +void gen_ncorp(Output& o ) { apf::Mesh* m = o.mesh; int part; @@ -40,7 +40,7 @@ void gen_ncorp(Output& o) memset(o.arrays.ncorp, 0, sizeof(gcorp_t)*(num_nodes)); owned = count_owned(o.arrays.ilwork, nilwork, o.arrays.ncorp, num_nodes); local = count_local(o.arrays.ilwork, nilwork, o.arrays.ncorp, num_nodes); - // conpar.iownnodes = owned+local; + o.iownnodes = owned+local; #ifdef PRINT_EVERYTHING printf("%d: %d local only nodes\n", part, local); printf("%d: %d owned nodes\n", part, owned); @@ -74,6 +74,7 @@ void gen_ncorp(Output& o) local_start_id += owner_counts[i]; } local_start_id++; //Fortran numbering + o.local_start_id = local_start_id; #ifdef PRINT_EVERYTHING printf("%d: %d\n", part, local_start_id); #endif @@ -200,7 +201,7 @@ void getInteriorConnectivityCGNS(Output& o, int block, apf::DynamicArray& c size_t i = 0; for (int elem = 0; elem < nelem; ++elem) for (int vert = 0; vert < nvert; ++vert) - c[i++] = o.arrays.ncorp[o.arrays.ien[block][elem][vert]]; // plus 1 built in + 1; /* FORTRAN indexing */ + c[i++] = o.arrays.ncorp[o.arrays.ien[block][elem][vert]-1]; // input is 0-based, out is 1-based do drop the +1 PCU_ALWAYS_ASSERT(i == c.getSize()); } @@ -214,7 +215,7 @@ void getBoundaryConnectivityCGNS(Output& o, int block, apf::DynamicArray& c size_t i = 0; for (int elem = 0; elem < nelem; ++elem) for (int vert = 0; vert < nvert; ++vert) - c[i++] = o.arrays.ncorp[o.arrays.ienb[block][elem][vert]]; // plus 1 built in + 1; + c[i++] = o.arrays.ncorp[o.arrays.ienb[block][elem][vert]-1]; PCU_ALWAYS_ASSERT(i == c.getSize()); } @@ -232,10 +233,10 @@ void getInterfaceConnectivityCGNS // not extended yet other than transpose size_t i = 0; for (int elem = 0; elem < nelem; ++elem) for (int vert = 0; vert < nvert0; ++vert) - c[i++] = o.arrays.ncorp[o.arrays.ienif0[block][elem][vert]]; // plus 1 built in + 1; + c[i++] = o.arrays.ncorp[o.arrays.ienif0[block][elem][vert]-1]; for (int elem = 0; elem < nelem; ++elem) for (int vert = 0; vert < nvert1; ++vert) - c[i++] = o.arrays.ncorp[o.arrays.ienif1[block][elem][vert]]; // plus 1 built in + 1; + c[i++] = o.arrays.ncorp[o.arrays.ienif1[block][elem][vert]-1]; PCU_ALWAYS_ASSERT(i == c.getSize()); } @@ -291,6 +292,26 @@ void writeCGNSgbc(Output& o, std::string path, int timestep) std::string timestep_or_dat; // copied gen_ncorp from PHASTA to help map on-rank numbering to CGNS/PETSC friendly global numbering gen_ncorp( o ); +// o carries +// o.arrays.ncorp[on-rank-node-number(0-based)] => PETSc global node number (1-based) +// o.iownnodes => nodes owned by this rank +// o.local_start_id => this rank's first node number (1-based and also which must be a long long int) + + +// condense out vertices owned by another rank in a new array, x, whose slices are ready for CGNS. Seeing now PETSc CGNS writer did one coordinate at a time which is probably better....feel free to rewrite. + int num_nodes=m->count(0); + int icount=0; + gcorp_t gnod; + double* x = new double[o.iownnodes * 3]; + for (int inode = 0; inode < num_nodes; ++inode){ + gnod=o.arrays.ncorp[inode]; + if(gnod >= o.local_start_id && gnod <= o.local_start_id + o.iownnodes -1) { // coordinate to write + for (int j = 0; j < 3; ++j) + x[j*o.iownnodes+icount]= o.arrays.coordinates[j*num_nodes+inode]; + icount++; + } + } + if (! timestep) timestep_or_dat = "dat"; @@ -307,6 +328,7 @@ void writeCGNSgbc(Output& o, std::string path, int timestep) } ph_write_preamble(f); int params[MAX_PARAMS]; + /* all of these strings are looked for by the other programs reading this format, so don't fix spelling errors or other silliness, it has already been set in stone */ diff --git a/phasta/phOutput.h b/phasta/phOutput.h index 3a08ad666..ce95c8f07 100644 --- a/phasta/phOutput.h +++ b/phasta/phOutput.h @@ -136,7 +136,7 @@ idx: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 int* ifather; /* an array of integers of size nfather that has nsons in each entry */ int* nsonsArr; -/* an array of integers of size nfather that has nsons in each entry */ +/* an array that maps on-rank-node-number (input) to PETSc global-node-number */ long long int* ncorp; }; @@ -155,6 +155,8 @@ struct Output int nMaxElementNodes; int nEssentialBCNodes; int nOverlapEdges; + long long int local_start_id; /* this rank's first global node number (1 based) */ + int iownnodes; /* how many node this rank owns */ int nlwork; /* size of arrays.ilwork */ int nlworkf; /* size of arrays.ilworkf */ int nlworkl; /* size of arrays.ilworkl */ @@ -170,6 +172,7 @@ struct Output void generateOutput(Input& in, BCs& bcs, apf::Mesh* mesh, Output& o); void writeGeomBC(Output& o, std::string path, int timestep_or_dat = 0); +void writeCGNSgbc(Output& o, std::string path, int timestep_or_dat = 0); } From b920292e20d7b0c3c3648ba7635c12a0bc97581b Mon Sep 17 00:00:00 2001 From: "Kenneth E. Jansen" Date: Sat, 5 Aug 2023 23:04:52 -0600 Subject: [PATCH 06/68] Added input flag (writeCGNSFiles 1 in adapt.inp) to call writeCGNS though it does not yet actually write a CGNS file in the function writeCNGS --- phasta/phCGNSgbc.cc | 29 +++++++++++------------------ phasta/phCook.cc | 2 ++ phasta/phInput.cc | 2 ++ phasta/phInput.h | 2 ++ phasta/phOutput.h | 2 +- 5 files changed, 18 insertions(+), 19 deletions(-) diff --git a/phasta/phCGNSgbc.cc b/phasta/phCGNSgbc.cc index 8ca313683..bd779e9ff 100644 --- a/phasta/phCGNSgbc.cc +++ b/phasta/phCGNSgbc.cc @@ -180,7 +180,7 @@ static lcorp_t count_owned(int* ilwork, int nlwork,gcorp_t* ncorp_tmp, int num_n // renamed, retained but not yet updated -static std::string buildCGNSgbcFileName(std::string timestep_or_dat) +static std::string buildCGNSFileName(std::string timestep_or_dat) { std::stringstream ss; int rank = PCU_Comm_Self() + 1; @@ -283,13 +283,12 @@ void writeBlocksCGNS(FILE* f, Output& o) -// retaining in case it is useful but only renamed at this point -void writeCGNSgbc(Output& o, std::string path, int timestep) +// WIP +void writeCGNS(Output& o, std::string path) { double t0 = PCU_Time(); apf::Mesh* m = o.mesh; std::stringstream tss; - std::string timestep_or_dat; // copied gen_ncorp from PHASTA to help map on-rank numbering to CGNS/PETSC friendly global numbering gen_ncorp( o ); // o carries @@ -313,20 +312,14 @@ void writeCGNSgbc(Output& o, std::string path, int timestep) } - if (! timestep) - timestep_or_dat = "dat"; - else { - tss << timestep; - timestep_or_dat = tss.str(); - } - path += buildCGNSgbcFileName(timestep_or_dat); - phastaio_setfile(GEOMBC_WRITE); - FILE* f = o.openfile_write(o, path.c_str()); - if (!f) { - lion_eprint(1,"failed to open \"%s\"!\n", path.c_str()); - abort(); - } - ph_write_preamble(f); +// path += buildCGNSFileName(timestep_or_dat); +// phastaio_setfile(GEOMBC_WRITE); +// FILE* f = o.openfile_write(o, path.c_str()); +// if (!f) { +// lion_eprint(1,"failed to open \"%s\"!\n", path.c_str()); +// abort(); +// } +// ph_write_preamble(f); int params[MAX_PARAMS]; /* all of these strings are looked for by the other programs diff --git a/phasta/phCook.cc b/phasta/phCook.cc index 983570d43..5b67b8405 100644 --- a/phasta/phCook.cc +++ b/phasta/phCook.cc @@ -224,6 +224,8 @@ namespace ph { out.openfile_write = fn; } ph::writeGeomBC(out, subDirPath); //write geombc + if ( in.writeCGNSFiles ) + ph::writeCGNS(out, subDirPath); //write CGNS if(!PCU_Comm_Self()) ph::writeAuxiliaryFiles(path, in.timeStepNumber); m->verify(); diff --git a/phasta/phInput.cc b/phasta/phInput.cc index ffc9989c2..65118d677 100644 --- a/phasta/phInput.cc +++ b/phasta/phInput.cc @@ -60,6 +60,7 @@ static void setDefaults(Input& in) in.axisymmetry = 0; in.parmaLoops = 3; //a magical value in.parmaVerbosity = 1; //fairly quiet + in.writeCGNSFiles = 0; // write CGNS Files in.writeGeomBCFiles = 0; // write additional geombc file for vis in streaming in.writeRestartFiles = 0; // write additional restart file for vis in streaming in.writeVTK = 0; @@ -153,6 +154,7 @@ static void formMaps(Input& in, StringMap& stringMap, IntMap& intMap, DblMap& db intMap["parmaLoops"] = &in.parmaLoops; intMap["parmaVerbosity"] = &in.parmaVerbosity; intMap["writeVTK"] = &in.writeVTK; + intMap["writeCGNSFiles"] = &in.writeCGNSFiles; intMap["writeGeomBCFiles"] = &in.writeGeomBCFiles; intMap["writeRestartFiles"] = &in.writeRestartFiles; intMap["ramdisk"] = &in.ramdisk; diff --git a/phasta/phInput.h b/phasta/phInput.h index a6bf88c90..28123f805 100644 --- a/phasta/phInput.h +++ b/phasta/phInput.h @@ -147,6 +147,8 @@ class Input /** \brief write the geombc file during in-memory data transfer between phasta and chef. */ int writeGeomBCFiles; + /* \brief write CGNS files for pre-processing */ + int writeCGNSFiles; /** \brief write the restart file during in-memory data transfer between phasta and chef. */ int writeRestartFiles; diff --git a/phasta/phOutput.h b/phasta/phOutput.h index ce95c8f07..21120ee3d 100644 --- a/phasta/phOutput.h +++ b/phasta/phOutput.h @@ -172,7 +172,7 @@ struct Output void generateOutput(Input& in, BCs& bcs, apf::Mesh* mesh, Output& o); void writeGeomBC(Output& o, std::string path, int timestep_or_dat = 0); -void writeCGNSgbc(Output& o, std::string path, int timestep_or_dat = 0); +void writeCGNS(Output& o, std::string path); } From 885d5779b0f184fbf6cb84a37e756008ef9affe6 Mon Sep 17 00:00:00 2001 From: "Kenneth E. Jansen" Date: Sun, 6 Aug 2023 10:48:50 -0600 Subject: [PATCH 07/68] opened CGNS file and computed global counts it needs --- phasta/phCGNSgbc.cc | 41 +++++++++++++++++++++++++++++++++++++++-- phasta/phOutput.h | 2 ++ 2 files changed, 41 insertions(+), 2 deletions(-) diff --git a/phasta/phCGNSgbc.cc b/phasta/phCGNSgbc.cc index bd779e9ff..d3130504b 100644 --- a/phasta/phCGNSgbc.cc +++ b/phasta/phCGNSgbc.cc @@ -8,6 +8,12 @@ #include #include #include +#ifdef HAVE_CGNS +// +#include +#include +// +#endif typedef int lcorp_t; #define NCORP_MPI_T MPI_INTEGER typedef long long int gcorp_t; @@ -75,6 +81,12 @@ void gen_ncorp(Output& o ) } local_start_id++; //Fortran numbering o.local_start_id = local_start_id; + +// also get the global number of nodes + o.numGlobalNodes=0; + for(i=0;i 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "cg_open(\"%s\",...) did not return a valid file ID", filename); + // copied gen_ncorp from PHASTA to help map on-rank numbering to CGNS/PETSC friendly global numbering gen_ncorp( o ); // o carries // o.arrays.ncorp[on-rank-node-number(0-based)] => PETSc global node number (1-based) // o.iownnodes => nodes owned by this rank // o.local_start_id => this rank's first node number (1-based and also which must be a long long int) +// o.numGlobalNodes + int numel=m->count(m->getDimension()); + PCU_Add_Ints(&numel,1); + o.numGlobalVolumeElements = numel; // condense out vertices owned by another rank in a new array, x, whose slices are ready for CGNS. Seeing now PETSc CGNS writer did one coordinate at a time which is probably better....feel free to rewrite. diff --git a/phasta/phOutput.h b/phasta/phOutput.h index 21120ee3d..6f72cc9c4 100644 --- a/phasta/phOutput.h +++ b/phasta/phOutput.h @@ -156,6 +156,8 @@ struct Output int nEssentialBCNodes; int nOverlapEdges; long long int local_start_id; /* this rank's first global node number (1 based) */ + long long int numGlobalNodes; + long long int numGlobalVolumeElements; int iownnodes; /* how many node this rank owns */ int nlwork; /* size of arrays.ilwork */ int nlworkf; /* size of arrays.ilworkf */ From aa48cae12de5b9f85b7fbbc2a2eb865d21a566d4 Mon Sep 17 00:00:00 2001 From: "Kenneth E. Jansen" Date: Sun, 6 Aug 2023 11:56:06 -0600 Subject: [PATCH 08/68] writing coordinates compiles --- phasta/phCGNSgbc.cc | 56 +++++++++++++++++++++++++++++++++++++-------- 1 file changed, 46 insertions(+), 10 deletions(-) diff --git a/phasta/phCGNSgbc.cc b/phasta/phCGNSgbc.cc index d3130504b..284e18faa 100644 --- a/phasta/phCGNSgbc.cc +++ b/phasta/phCGNSgbc.cc @@ -301,7 +301,6 @@ void writeCGNS(Output& o, std::string path) { double t0 = PCU_Time(); apf::Mesh* m = o.mesh; - int cgid = -1; std::string timestep_or_dat; // if (! timestep) @@ -311,31 +310,67 @@ void writeCGNS(Output& o, std::string path) // timestep_or_dat = tss.str(); // } // cgp_mpi_comm(); -// cgp_open('chefOut.cgns', CG_MODE_WRITE, &cgid); +// cgp_open('chefOut.cgns', CG_MODE_WRITE, &F); //static std::string buildCGNSFileName(std::string timestep_or_dat) // path += buildCGNSFileName(timestep_or_dat); static char *outfile = "chefOut.cgns"; + int F, B, Z, E, S, Fs, A, Cx, Cy, Cz; + cgsize_t sizes[3],*e, start, end, ncells; +// ^^^^^^ need to be sure this is long since using PCU_Add_Long below even when not needed // if (!PCU_Comm_Self()) - cgp_mpi_comm(MPI_COMM_WORLD); - cgp_open(outfile, CG_MODE_READ, &cgid); -//FAILED cgp_open('chefO.cgns', CG_MODE_READ, &cgid); -// PetscCheck(cgid > 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "cg_open(\"%s\",...) did not return a valid file ID", filename); +//FAILED cgp_open('chefO.cgns', CG_MODE_READ, &F); +// PetscCheck(F > 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "cg_open(\"%s\",...) did not return a valid file ID", filename); // copied gen_ncorp from PHASTA to help map on-rank numbering to CGNS/PETSC friendly global numbering - gen_ncorp( o ); + gen_ncorp( o ); // o carries // o.arrays.ncorp[on-rank-node-number(0-based)] => PETSc global node number (1-based) // o.iownnodes => nodes owned by this rank // o.local_start_id => this rank's first node number (1-based and also which must be a long long int) // o.numGlobalNodes - int numel=m->count(m->getDimension()); - PCU_Add_Ints(&numel,1); - o.numGlobalVolumeElements = numel; + ncells=m->count(m->getDimension()); + ncells=PCU_Add_Long(ncells); +// may not need o.numGlobalVolumeElements = ncells; + + sizes[0]=o.numGlobalNodes; + sizes[1]=ncells; + sizes[0]; + cgp_mpi_comm(MPI_COMM_WORLD); + if ( cgp_open(outfile, CG_MODE_READ, &F) || + cg_base_write(F, "Base", 3, 3, &B) || + cg_zone_write(F, B, "Zone", sizes, CG_Unstructured, &Z)) + cgp_error_exit(); + /* create data nodes for coordinates */ + + if (cgp_coord_write(F, B, Z, CG_RealDouble, "CoordinateX", &Cx) || + cgp_coord_write(F, B, Z, CG_RealDouble, "CoordinateY", &Cy) || + cgp_coord_write(F, B, Z, CG_RealDouble, "CoordinateZ", &Cz)) + cgp_error_exit(); // condense out vertices owned by another rank in a new array, x, whose slices are ready for CGNS. Seeing now PETSc CGNS writer did one coordinate at a time which is probably better....feel free to rewrite. int num_nodes=m->count(0); +//V2 + gcorp_t gnod; + start=o.local_start_id; + end=start+o.iownnodes-1; + double* x = new double[o.iownnodes]; + for (int j = 0; j < 3; ++j) { + int icount=0; + for (int inode = 0; inode < num_nodes; ++inode){ + gnod=o.arrays.ncorp[inode]; + if(gnod >= start && gnod <= end) { // coordinate to write + x[icount]= o.arrays.coordinates[j*num_nodes+inode]; + icount++; + } + if(j==0) cgp_coord_write_data(F, B, Z, Cx, &start, &end, x); + if(j==1) cgp_coord_write_data(F, B, Z, Cy, &start, &end, x); + if(j==2) cgp_coord_write_data(F, B, Z, Cz, &start, &end, x); + } + } +//V1 that KEJ wrote mothballed for V2 that mimics PETSc +/* int icount=0; gcorp_t gnod; double* x = new double[o.iownnodes * 3]; @@ -347,6 +382,7 @@ void writeCGNS(Output& o, std::string path) icount++; } } +*/ // path += buildCGNSFileName(timestep_or_dat); From ec7e650e854755bf2e972bc28cb27136ae3602f1 Mon Sep 17 00:00:00 2001 From: "Kenneth E. Jansen" Date: Sun, 6 Aug 2023 12:59:09 -0600 Subject: [PATCH 09/68] in pretty far over my C++ skill level at this point....cgns libary seems to think cgsize_t are int which will never fly for our meshes with global nubmering so probably need to find a way to tell Spack or other I want long long int there...I am also uncertain if SCOREC convention to call this gcorp_t is going to play nice with CGNS calling it cgsize_t but pushing this up to get help --- phasta/phCGNSgbc.cc | 97 +++++++++++++++++++-------------------------- 1 file changed, 40 insertions(+), 57 deletions(-) diff --git a/phasta/phCGNSgbc.cc b/phasta/phCGNSgbc.cc index 284e18faa..bf08ad78e 100644 --- a/phasta/phCGNSgbc.cc +++ b/phasta/phCGNSgbc.cc @@ -206,30 +206,30 @@ enum { }; // renamed, update is only a transpose to match CNGS. Parallel will require mapping here or later to global numbering -void getInteriorConnectivityCGNS(Output& o, int block, apf::DynamicArray& c) +void getInteriorConnectivityCGNS(Output& o, int block, gcorp_t* c) { int nelem = o.blocks.interior.nElements[block]; int nvert = o.blocks.interior.keys[block].nElementVertices; - c.setSize(nelem * nvert); +// c.setSize(nelem * nvert); size_t i = 0; for (int elem = 0; elem < nelem; ++elem) for (int vert = 0; vert < nvert; ++vert) c[i++] = o.arrays.ncorp[o.arrays.ien[block][elem][vert]-1]; // input is 0-based, out is 1-based do drop the +1 - PCU_ALWAYS_ASSERT(i == c.getSize()); + PCU_ALWAYS_ASSERT(i == nelem*nvert); } //renamed, update is both a transpose to match CNGS and reduction to only filling the first number of vertices on the boundary whereas PHAST wanted full volume -void getBoundaryConnectivityCGNS(Output& o, int block, apf::DynamicArray& c) +void getBoundaryConnectivityCGNS(Output& o, int block, gcorp_t* c) { int nelem = o.blocks.boundary.nElements[block]; // CGNS wants surface elements int nvert = o.blocks.boundary.keys[block].nElementVertices; int nvert = o.blocks.boundary.keys[block].nBoundaryFaceEdges; - c.setSize(nelem * nvert); + //c.setSize(nelem * nvert); size_t i = 0; for (int elem = 0; elem < nelem; ++elem) for (int vert = 0; vert < nvert; ++vert) c[i++] = o.arrays.ncorp[o.arrays.ienb[block][elem][vert]-1]; - PCU_ALWAYS_ASSERT(i == c.getSize()); + PCU_ALWAYS_ASSERT(i == nelem*nvert); } void getInterfaceConnectivityCGNS // not extended yet other than transpose @@ -266,32 +266,56 @@ void getNaturalBCCodesCGNS(Output& o, int block, apf::DynamicArray& codes) } // renamed and calling the renamed functions above with output writes commented as they are PHASTA file style -void writeBlocksCGNS(FILE* f, Output& o) +void writeBlocksCGNS(int F,int B,int Z, Output& o) { - apf::DynamicArray c; int params[MAX_PARAMS]; + + int E; + gcorp_t e_owned, e_start,e_end; + + /* create data node for elements */ + if (cgp_section_write(F, B, Z, "Hex", CG_HEXA_8, 1, o.numGlobalVolumeElements, 0, &E)) + cgp_error_exit(); + for (int i = 0; i < o.blocks.interior.getSize(); ++i) { + BlockKey& k = o.blocks.interior.keys[i]; std::string phrase = getBlockKeyPhrase(k, "connectivity interior "); params[0] = o.blocks.interior.nElements[i]; // fillBlockKeyParams(params, k); - getInteriorConnectivityCGNS(o, i, c); -// ph_write_ints(f, phrase.c_str(), &c[0], c.getSize(), 7, params); + e_owned = o.blocks.interior.nElements[i]; + int nvert = o.blocks.interior.keys[i].nElementVertices; + gcorp_t e = (cgsize_t *)malloc(nvert * e_owned * sizeof(cgsize_t)); + getInteriorConnectivityCGNS(o, i, &e); + /* create data node for elements */ + // will start testing with single topology, all hex so allow hardcode for pass 1 + //nvert can case switch this or enumv like PETSc + if (cgp_section_write(F, B, Z, "Hex", CG_HEXA_8, 1, o.numGlobalVolumeElements, 0, &E)) + cgp_error_exit(); + MPI_Exscan(&e_owned, &e_start, 1, MPI_UINT64_T, MPI_SUM, MPI_COMM_WORLD); + e_end=e_start+e_owned -1; + /* write the element connectivity in parallel */ + if (cgp_elements_write_data(F, B, Z, E, e_start, e_end, e)) + cgp_error_exit(); + free(e); } for (int i = 0; i < o.blocks.boundary.getSize(); ++i) { BlockKey& k = o.blocks.boundary.keys[i]; std::string phrase = getBlockKeyPhrase(k, "connectivity boundary "); params[0] = o.blocks.boundary.nElements[i]; + e_owned = params[0]; + int nvert = o.blocks.boundary.keys[i].nBoundaryFaceEdges; + gcorp_t e = (cgsize_t *)malloc(nvert * e_owned * sizeof(cgsize_t)); // fillBlockKeyParams(params, k); - getBoundaryConnectivityCGNS(o, i, c); + getBoundaryConnectivityCGNS(o, i, &e); // ph_write_ints(f, phrase.c_str(), &c[0], c.getSize(), 8, params); // this is probably the easiest path to getting the list that tells us the face (through surfID of smd) that each boundary element face is on phrase = getBlockKeyPhrase(k, "nbc codes "); apf::DynamicArray codes; getNaturalBCCodesCGNS(o, i, codes); + free(e); // ph_write_ints(f, phrase.c_str(), &codes[0], codes.getSize(), 8, params); } - } @@ -331,7 +355,7 @@ void writeCGNS(Output& o, std::string path) // o.numGlobalNodes ncells=m->count(m->getDimension()); ncells=PCU_Add_Long(ncells); -// may not need o.numGlobalVolumeElements = ncells; + o.numGlobalVolumeElements = ncells; sizes[0]=o.numGlobalNodes; sizes[1]=ncells; @@ -384,49 +408,8 @@ void writeCGNS(Output& o, std::string path) } */ - -// path += buildCGNSFileName(timestep_or_dat); -// phastaio_setfile(GEOMBC_WRITE); -// FILE* f = o.openfile_write(o, path.c_str()); -// if (!f) { -// lion_eprint(1,"failed to open \"%s\"!\n", path.c_str()); -// abort(); -// } -// ph_write_preamble(f); - int params[MAX_PARAMS]; - -/* all of these strings are looked for by the other programs - reading this format, so don't fix spelling errors or - other silliness, it has already been set in stone */ -/* - writeInt(f, "number of nodes", m->count(0)); - writeInt(f, "number of modes", o.nOverlapNodes); - writeInt(f, "number of shapefunctions soved on processor", 0); - writeInt(f, "number of global modes", 0); - writeInt(f, "number of interior elements", m->count(m->getDimension())); - writeInt(f, "number of boundary elements", o.nBoundaryElements); - writeInt(f, "maximum number of element nodes", o.nMaxElementNodes); - writeInt(f, "number of interior tpblocks", o.blocks.interior.getSize()); - writeInt(f, "number of boundary tpblocks", o.blocks.boundary.getSize()); - writeInt(f, "number of nodes with Dirichlet BCs", o.nEssentialBCNodes); - - params[0] = m->count(0); - params[1] = 3; - ph_write_doubles(f, "co-ordinates", o.arrays.coordinates, - params[0] * params[1], 2, params); - writeInt(f, "number of processors", PCU_Comm_Peers()); - writeInt(f, "size of ilwork array", o.nlwork); - params[0] = m->count(0); - writeInts(f, " mode number map from partition to global", - o.arrays.globalNodeNumbers, m->count(0)); - writeBlocksCGNS(f, o); - writeInts(f, "bc mapping array", o.arrays.nbc, m->count(0)); - writeInts(f, "bc codes array", o.arrays.ibc, o.nEssentialBCNodes); - apf::DynamicArray bc; - PHASTAIO_CLOSETIME(fclose(f);) - double t1 = PCU_Time(); - if (!PCU_Comm_Self()) - lion_oprint(1,"geombc file written in %f seconds\n", t1 - t0); -*/ + writeBlocksCGNS(F,B,Z, o); +// if (!PCU_Comm_Self()) +// lion_oprint(1,"CGNS file written in %f seconds\n", t1 - t0); } } From 3a934bb8006b153c6fa829e875dc6f5b27e4b0a5 Mon Sep 17 00:00:00 2001 From: "Kenneth E. Jansen" Date: Sun, 6 Aug 2023 14:07:49 -0600 Subject: [PATCH 10/68] compiling version that, if I made no errors, will write a CGNS coordinates and connectivity file for hexes (hard coded at this point --- apf/apfCGNS.cc | 2 +- mds/mdsCGNS.cc | 2 +- phasta/phCGNSgbc.cc | 35 +++++++++++++++++------------------ phasta/phOutput.h | 8 ++++---- 4 files changed, 23 insertions(+), 24 deletions(-) diff --git a/apf/apfCGNS.cc b/apf/apfCGNS.cc index d76757be0..93a85d12d 100644 --- a/apf/apfCGNS.cc +++ b/apf/apfCGNS.cc @@ -1023,7 +1023,7 @@ void Write2DEdges(CGNS cgns, apf::Mesh *m, const Count &edgeCount, const Count & // Todo split this out into a list of calls to local functions to show process/work flow void WriteCGNS(const char *prefix, apf::Mesh *m, const apf::CGNSBCMap &cgnsBCMap) { - static_assert(std::is_same::value, "cgsize_t not compiled as int"); +// static_assert(std::is_same::value, "cgsize_t not compiled as int"); const auto myRank = PCU_Comm_Self(); const Count vertexCount = count(m, 0); diff --git a/mds/mdsCGNS.cc b/mds/mdsCGNS.cc index 0d24e67ec..2a591fd07 100644 --- a/mds/mdsCGNS.cc +++ b/mds/mdsCGNS.cc @@ -1051,7 +1051,7 @@ void ReadBCInfo(const int cgid, const int base, const int zone, const int nBocos apf::Mesh2 *DoIt(gmi_model *g, const std::string &fname, apf::CGNSBCMap &cgnsBCMap, const std::vector> &readMeshData) { - static_assert(std::is_same::value, "cgsize_t not compiled as int"); +// static_assert(std::is_same::value, "cgsize_t not compiled as int"); int cgid = -1; auto comm = PCU_Get_Comm(); diff --git a/phasta/phCGNSgbc.cc b/phasta/phCGNSgbc.cc index bf08ad78e..3b7aae558 100644 --- a/phasta/phCGNSgbc.cc +++ b/phasta/phCGNSgbc.cc @@ -16,13 +16,12 @@ #endif typedef int lcorp_t; #define NCORP_MPI_T MPI_INTEGER -typedef long long int gcorp_t; namespace ph { -static lcorp_t count_owned(int* ilwork, int nlwork,gcorp_t* ncorp_tmp, int num_nodes); -static lcorp_t count_local(int* ilwork, int nlwork,gcorp_t* ncorp_tmp, int num_nodes); +static lcorp_t count_owned(int* ilwork, int nlwork,cgsize_t* ncorp_tmp, int num_nodes); +static lcorp_t count_local(int* ilwork, int nlwork,cgsize_t* ncorp_tmp, int num_nodes); void gen_ncorp(Output& o ) @@ -33,17 +32,17 @@ void gen_ncorp(Output& o ) int i; lcorp_t nilwork = o.nlwork; int num_nodes=m->count(0); - o.arrays.ncorp = new gcorp_t[num_nodes]; + o.arrays.ncorp = new cgsize_t[num_nodes]; lcorp_t owned; lcorp_t local; lcorp_t* owner_counts; - gcorp_t local_start_id; - gcorp_t gid; + cgsize_t local_start_id; + cgsize_t gid; MPI_Comm_rank(MPI_COMM_WORLD, &part); MPI_Comm_size(MPI_COMM_WORLD, &num_parts); - memset(o.arrays.ncorp, 0, sizeof(gcorp_t)*(num_nodes)); + memset(o.arrays.ncorp, 0, sizeof(cgsize_t)*(num_nodes)); owned = count_owned(o.arrays.ilwork, nilwork, o.arrays.ncorp, num_nodes); local = count_local(o.arrays.ilwork, nilwork, o.arrays.ncorp, num_nodes); o.iownnodes = owned+local; @@ -127,7 +126,7 @@ void gen_ncorp(Output& o ) } -static lcorp_t count_local(int* ilwork, int nlwork,gcorp_t* ncorp_tmp, int num_nodes) +static lcorp_t count_local(int* ilwork, int nlwork,cgsize_t* ncorp_tmp, int num_nodes) { int i; lcorp_t num_local = 0; @@ -139,7 +138,7 @@ static lcorp_t count_local(int* ilwork, int nlwork,gcorp_t* ncorp_tmp, int num_n } return(num_local); } -static lcorp_t count_owned(int* ilwork, int nlwork,gcorp_t* ncorp_tmp, int num_nodes) +static lcorp_t count_owned(int* ilwork, int nlwork,cgsize_t* ncorp_tmp, int num_nodes) { int numtask = ilwork[0]; int itkbeg = 0; //task offset @@ -206,7 +205,7 @@ enum { }; // renamed, update is only a transpose to match CNGS. Parallel will require mapping here or later to global numbering -void getInteriorConnectivityCGNS(Output& o, int block, gcorp_t* c) +void getInteriorConnectivityCGNS(Output& o, int block, cgsize_t* c) { int nelem = o.blocks.interior.nElements[block]; int nvert = o.blocks.interior.keys[block].nElementVertices; @@ -219,7 +218,7 @@ void getInteriorConnectivityCGNS(Output& o, int block, gcorp_t* c) } //renamed, update is both a transpose to match CNGS and reduction to only filling the first number of vertices on the boundary whereas PHAST wanted full volume -void getBoundaryConnectivityCGNS(Output& o, int block, gcorp_t* c) +void getBoundaryConnectivityCGNS(Output& o, int block, cgsize_t* c) { int nelem = o.blocks.boundary.nElements[block]; // CGNS wants surface elements int nvert = o.blocks.boundary.keys[block].nElementVertices; @@ -271,7 +270,7 @@ void writeBlocksCGNS(int F,int B,int Z, Output& o) int params[MAX_PARAMS]; int E; - gcorp_t e_owned, e_start,e_end; + cgsize_t e_owned, e_start,e_end; /* create data node for elements */ if (cgp_section_write(F, B, Z, "Hex", CG_HEXA_8, 1, o.numGlobalVolumeElements, 0, &E)) @@ -285,8 +284,8 @@ void writeBlocksCGNS(int F,int B,int Z, Output& o) // fillBlockKeyParams(params, k); e_owned = o.blocks.interior.nElements[i]; int nvert = o.blocks.interior.keys[i].nElementVertices; - gcorp_t e = (cgsize_t *)malloc(nvert * e_owned * sizeof(cgsize_t)); - getInteriorConnectivityCGNS(o, i, &e); + cgsize_t* e = (cgsize_t *)malloc(nvert * e_owned * sizeof(cgsize_t)); + getInteriorConnectivityCGNS(o, i, e); /* create data node for elements */ // will start testing with single topology, all hex so allow hardcode for pass 1 //nvert can case switch this or enumv like PETSc @@ -305,9 +304,9 @@ void writeBlocksCGNS(int F,int B,int Z, Output& o) params[0] = o.blocks.boundary.nElements[i]; e_owned = params[0]; int nvert = o.blocks.boundary.keys[i].nBoundaryFaceEdges; - gcorp_t e = (cgsize_t *)malloc(nvert * e_owned * sizeof(cgsize_t)); + cgsize_t* e = (cgsize_t *)malloc(nvert * e_owned * sizeof(cgsize_t)); // fillBlockKeyParams(params, k); - getBoundaryConnectivityCGNS(o, i, &e); + getBoundaryConnectivityCGNS(o, i, e); // ph_write_ints(f, phrase.c_str(), &c[0], c.getSize(), 8, params); // this is probably the easiest path to getting the list that tells us the face (through surfID of smd) that each boundary element face is on phrase = getBlockKeyPhrase(k, "nbc codes "); @@ -376,7 +375,7 @@ void writeCGNS(Output& o, std::string path) // condense out vertices owned by another rank in a new array, x, whose slices are ready for CGNS. Seeing now PETSc CGNS writer did one coordinate at a time which is probably better....feel free to rewrite. int num_nodes=m->count(0); //V2 - gcorp_t gnod; + cgsize_t gnod; start=o.local_start_id; end=start+o.iownnodes-1; double* x = new double[o.iownnodes]; @@ -396,7 +395,7 @@ void writeCGNS(Output& o, std::string path) //V1 that KEJ wrote mothballed for V2 that mimics PETSc /* int icount=0; - gcorp_t gnod; + cgsize_t gnod; double* x = new double[o.iownnodes * 3]; for (int inode = 0; inode < num_nodes; ++inode){ gnod=o.arrays.ncorp[inode]; diff --git a/phasta/phOutput.h b/phasta/phOutput.h index 6f72cc9c4..444740891 100644 --- a/phasta/phOutput.h +++ b/phasta/phOutput.h @@ -137,7 +137,7 @@ idx: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 /* an array of integers of size nfather that has nsons in each entry */ int* nsonsArr; /* an array that maps on-rank-node-number (input) to PETSc global-node-number */ - long long int* ncorp; + long int* ncorp; }; @@ -155,9 +155,9 @@ struct Output int nMaxElementNodes; int nEssentialBCNodes; int nOverlapEdges; - long long int local_start_id; /* this rank's first global node number (1 based) */ - long long int numGlobalNodes; - long long int numGlobalVolumeElements; + long int local_start_id; /* this rank's first global node number (1 based) */ + long int numGlobalNodes; + long int numGlobalVolumeElements; int iownnodes; /* how many node this rank owns */ int nlwork; /* size of arrays.ilwork */ int nlworkf; /* size of arrays.ilworkf */ From dac4caa6c5582a28c85b9cc1c70f2daa85575eab Mon Sep 17 00:00:00 2001 From: "Kenneth E. Jansen" Date: Sun, 6 Aug 2023 18:05:35 -0600 Subject: [PATCH 11/68] runs through chef and produces a chefOut.cgns but it crashes paraview so probably more bugs to find. --- phasta/phCGNSgbc.cc | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/phasta/phCGNSgbc.cc b/phasta/phCGNSgbc.cc index 3b7aae558..0b9a7d9f7 100644 --- a/phasta/phCGNSgbc.cc +++ b/phasta/phCGNSgbc.cc @@ -271,11 +271,9 @@ void writeBlocksCGNS(int F,int B,int Z, Output& o) int E; cgsize_t e_owned, e_start,e_end; +// int num_parts; +// MPI_Comm_size(MPI_COMM_WORLD, &num_parts); - /* create data node for elements */ - if (cgp_section_write(F, B, Z, "Hex", CG_HEXA_8, 1, o.numGlobalVolumeElements, 0, &E)) - cgp_error_exit(); - for (int i = 0; i < o.blocks.interior.getSize(); ++i) { BlockKey& k = o.blocks.interior.keys[i]; @@ -291,10 +289,13 @@ void writeBlocksCGNS(int F,int B,int Z, Output& o) //nvert can case switch this or enumv like PETSc if (cgp_section_write(F, B, Z, "Hex", CG_HEXA_8, 1, o.numGlobalVolumeElements, 0, &E)) cgp_error_exit(); + e_start=0; +// if(num_parts !=1) MPI_Exscan(&e_owned, &e_start, 1, MPI_UINT64_T, MPI_SUM, MPI_COMM_WORLD); - e_end=e_start+e_owned -1; + + e_end=e_start+e_owned; /* write the element connectivity in parallel */ - if (cgp_elements_write_data(F, B, Z, E, e_start, e_end, e)) + if (cgp_elements_write_data(F, B, Z, E, e_start+1, e_end, e)) cgp_error_exit(); free(e); } @@ -360,7 +361,7 @@ void writeCGNS(Output& o, std::string path) sizes[1]=ncells; sizes[0]; cgp_mpi_comm(MPI_COMM_WORLD); - if ( cgp_open(outfile, CG_MODE_READ, &F) || + if ( cgp_open(outfile, CG_MODE_WRITE, &F) || cg_base_write(F, "Base", 3, 3, &B) || cg_zone_write(F, B, "Zone", sizes, CG_Unstructured, &Z)) cgp_error_exit(); From 742b226b834120a92b3f14c8d486f0cb4afbde51 Mon Sep 17 00:00:00 2001 From: "Kenneth E. Jansen" Date: Sun, 6 Aug 2023 21:53:36 -0600 Subject: [PATCH 12/68] first big bug squashed but there are more --- phasta/phCGNSgbc.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/phasta/phCGNSgbc.cc b/phasta/phCGNSgbc.cc index 0b9a7d9f7..791bbd341 100644 --- a/phasta/phCGNSgbc.cc +++ b/phasta/phCGNSgbc.cc @@ -388,10 +388,10 @@ void writeCGNS(Output& o, std::string path) x[icount]= o.arrays.coordinates[j*num_nodes+inode]; icount++; } - if(j==0) cgp_coord_write_data(F, B, Z, Cx, &start, &end, x); - if(j==1) cgp_coord_write_data(F, B, Z, Cy, &start, &end, x); - if(j==2) cgp_coord_write_data(F, B, Z, Cz, &start, &end, x); } + if(j==0) cgp_coord_write_data(F, B, Z, Cx, &start, &end, x); + if(j==1) cgp_coord_write_data(F, B, Z, Cy, &start, &end, x); + if(j==2) cgp_coord_write_data(F, B, Z, Cz, &start, &end, x); } //V1 that KEJ wrote mothballed for V2 that mimics PETSc /* From 3959fad7f0700ba252e950337e1ba952ca95761d Mon Sep 17 00:00:00 2001 From: "Kenneth E. Jansen" Date: Sun, 6 Aug 2023 23:14:45 -0600 Subject: [PATCH 13/68] fixed the usual +-1 c and fortran bug --- phasta/phCGNSgbc.cc | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/phasta/phCGNSgbc.cc b/phasta/phCGNSgbc.cc index 791bbd341..5b23d8601 100644 --- a/phasta/phCGNSgbc.cc +++ b/phasta/phCGNSgbc.cc @@ -213,7 +213,7 @@ void getInteriorConnectivityCGNS(Output& o, int block, cgsize_t* c) size_t i = 0; for (int elem = 0; elem < nelem; ++elem) for (int vert = 0; vert < nvert; ++vert) - c[i++] = o.arrays.ncorp[o.arrays.ien[block][elem][vert]-1]; // input is 0-based, out is 1-based do drop the +1 + c[i++] = o.arrays.ncorp[o.arrays.ien[block][elem][vert]]; // input is 0-based, out is 1-based do drop the +1 PCU_ALWAYS_ASSERT(i == nelem*nvert); } @@ -227,7 +227,7 @@ void getBoundaryConnectivityCGNS(Output& o, int block, cgsize_t* c) size_t i = 0; for (int elem = 0; elem < nelem; ++elem) for (int vert = 0; vert < nvert; ++vert) - c[i++] = o.arrays.ncorp[o.arrays.ienb[block][elem][vert]-1]; + c[i++] = o.arrays.ncorp[o.arrays.ienb[block][elem][vert]]; PCU_ALWAYS_ASSERT(i == nelem*nvert); } @@ -359,7 +359,7 @@ void writeCGNS(Output& o, std::string path) sizes[0]=o.numGlobalNodes; sizes[1]=ncells; - sizes[0]; + sizes[2]=0; cgp_mpi_comm(MPI_COMM_WORLD); if ( cgp_open(outfile, CG_MODE_WRITE, &F) || cg_base_write(F, "Base", 3, 3, &B) || @@ -409,6 +409,7 @@ void writeCGNS(Output& o, std::string path) */ writeBlocksCGNS(F,B,Z, o); + cgp_close(F); // if (!PCU_Comm_Self()) // lion_oprint(1,"CGNS file written in %f seconds\n", t1 - t0); } From 32fcbaa2861aac7362033e9e3ed1a7c225d6184e Mon Sep 17 00:00:00 2001 From: "Kenneth E. Jansen" Date: Sun, 6 Aug 2023 23:46:23 -0600 Subject: [PATCH 14/68] fprintf confirms data for coordinates and connectivity look correct --- phasta/phCGNSgbc.cc | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/phasta/phCGNSgbc.cc b/phasta/phCGNSgbc.cc index 5b23d8601..e38b5b2b5 100644 --- a/phasta/phCGNSgbc.cc +++ b/phasta/phCGNSgbc.cc @@ -297,6 +297,12 @@ void writeBlocksCGNS(int F,int B,int Z, Output& o) /* write the element connectivity in parallel */ if (cgp_elements_write_data(F, B, Z, E, e_start+1, e_end, e)) cgp_error_exit(); + printf("%ld, %ld \n", e_start+1, e_end); + for (int ne=0; ne Date: Mon, 7 Aug 2023 09:09:34 -0600 Subject: [PATCH 15/68] every cgpxx call is wrapped with cgp_error_exit() which I assume means none of those calls is returning an error as the code runs through --- phasta/phCGNSgbc.cc | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/phasta/phCGNSgbc.cc b/phasta/phCGNSgbc.cc index e38b5b2b5..f38c7c45b 100644 --- a/phasta/phCGNSgbc.cc +++ b/phasta/phCGNSgbc.cc @@ -366,7 +366,7 @@ void writeCGNS(Output& o, std::string path) sizes[0]=o.numGlobalNodes; sizes[1]=ncells; sizes[2]=0; - cgp_mpi_comm(MPI_COMM_WORLD); + if(cgp_mpi_comm(MPI_COMM_WORLD)) cgp_error_exit; if ( cgp_open(outfile, CG_MODE_WRITE, &F) || cg_base_write(F, "Base", 3, 3, &B) || cg_zone_write(F, B, "Zone", sizes, CG_Unstructured, &Z)) @@ -398,9 +398,9 @@ void writeCGNS(Output& o, std::string path) printf("%ld, %ld \n", start, end); for (int ne=0; ne Date: Mon, 7 Aug 2023 15:16:43 -0600 Subject: [PATCH 16/68] added switch/case statment to handle tets. I have not cross checked whether it is yet setup for mixed meshes but going to do that after I get boundary elements writing and parallel tested. --- phasta/phCGNSgbc.cc | 25 +++++++++++++++++++++++-- phasta/phOutput.cc | 6 ++++++ phasta/phOutput.h | 20 ++++++++++++++++---- 3 files changed, 45 insertions(+), 6 deletions(-) diff --git a/phasta/phCGNSgbc.cc b/phasta/phCGNSgbc.cc index f38c7c45b..e792d2193 100644 --- a/phasta/phCGNSgbc.cc +++ b/phasta/phCGNSgbc.cc @@ -287,8 +287,24 @@ void writeBlocksCGNS(int F,int B,int Z, Output& o) /* create data node for elements */ // will start testing with single topology, all hex so allow hardcode for pass 1 //nvert can case switch this or enumv like PETSc - if (cgp_section_write(F, B, Z, "Hex", CG_HEXA_8, 1, o.numGlobalVolumeElements, 0, &E)) - cgp_error_exit(); + switch(nvert){ + case 4: + if (cgp_section_write(F, B, Z, "Tet", CG_TETRA_4, 1, o.numGlobalVolumeElements, 0, &E)) + cgp_error_exit(); + break; + case 5: + if (cgp_section_write(F, B, Z, "Pyr", CG_PYRA_5, 1, o.numGlobalVolumeElements, 0, &E)) + cgp_error_exit(); + break; + case 6: + if (cgp_section_write(F, B, Z, "Wdg", CG_PENTA_6, 1, o.numGlobalVolumeElements, 0, &E)) + cgp_error_exit(); + break; + case 8: + if (cgp_section_write(F, B, Z, "Hex", CG_HEXA_8, 1, o.numGlobalVolumeElements, 0, &E)) + cgp_error_exit(); + break; + } e_start=0; // if(num_parts !=1) MPI_Exscan(&e_owned, &e_start, 1, MPI_UINT64_T, MPI_SUM, MPI_COMM_WORLD); @@ -297,11 +313,13 @@ void writeBlocksCGNS(int F,int B,int Z, Output& o) /* write the element connectivity in parallel */ if (cgp_elements_write_data(F, B, Z, E, e_start+1, e_end, e)) cgp_error_exit(); +if(0==1){ printf("%ld, %ld \n", e_start+1, e_end); for (int ne=0; ne #include #include +#ifdef HAVE_CGNS +// +#include +#include +// +#endif namespace ph { diff --git a/phasta/phOutput.h b/phasta/phOutput.h index 444740891..1507784cc 100644 --- a/phasta/phOutput.h +++ b/phasta/phOutput.h @@ -4,6 +4,13 @@ #include "phInput.h" #include "phBlock.h" #include "phBC.h" +#ifdef HAVE_CGNS +// +#include +#include +// +#endif + namespace apf { class Mesh; @@ -137,7 +144,10 @@ idx: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 /* an array of integers of size nfather that has nsons in each entry */ int* nsonsArr; /* an array that maps on-rank-node-number (input) to PETSc global-node-number */ - long int* ncorp; +// worked but long int* ncorp; +#ifdef HAVE_CGNS + cgsize_t* ncorp; +#endif }; @@ -155,9 +165,11 @@ struct Output int nMaxElementNodes; int nEssentialBCNodes; int nOverlapEdges; - long int local_start_id; /* this rank's first global node number (1 based) */ - long int numGlobalNodes; - long int numGlobalVolumeElements; +#ifdef HAVE_CGNS + cgsize_t local_start_id; /* this rank's first global node number (1 based) */ + cgsize_t numGlobalNodes; + cgsize_t numGlobalVolumeElements; +#endif int iownnodes; /* how many node this rank owns */ int nlwork; /* size of arrays.ilwork */ int nlworkf; /* size of arrays.ilworkf */ From ebf92d8c255c6b574d60bfe7885b8a9c64363c86 Mon Sep 17 00:00:00 2001 From: "Kenneth E. Jansen" Date: Mon, 7 Aug 2023 19:02:06 -0600 Subject: [PATCH 17/68] seems like boundary elements are running through except the mixed wedge tet case...not viewable in PV so far so others will have to test --- phasta/phCGNSgbc.cc | 52 ++++++++++++++++++++++++++++----------------- pumi-meshes | 2 +- 2 files changed, 33 insertions(+), 21 deletions(-) diff --git a/phasta/phCGNSgbc.cc b/phasta/phCGNSgbc.cc index e792d2193..3749932f7 100644 --- a/phasta/phCGNSgbc.cc +++ b/phasta/phCGNSgbc.cc @@ -268,51 +268,48 @@ void getNaturalBCCodesCGNS(Output& o, int block, apf::DynamicArray& codes) void writeBlocksCGNS(int F,int B,int Z, Output& o) { int params[MAX_PARAMS]; - int E; cgsize_t e_owned, e_start,e_end; -// int num_parts; -// MPI_Comm_size(MPI_COMM_WORLD, &num_parts); - + cgsize_t e_startg,e_endg; + cgsize_t e_written=0; for (int i = 0; i < o.blocks.interior.getSize(); ++i) { - BlockKey& k = o.blocks.interior.keys[i]; std::string phrase = getBlockKeyPhrase(k, "connectivity interior "); params[0] = o.blocks.interior.nElements[i]; -// fillBlockKeyParams(params, k); e_owned = o.blocks.interior.nElements[i]; int nvert = o.blocks.interior.keys[i].nElementVertices; cgsize_t* e = (cgsize_t *)malloc(nvert * e_owned * sizeof(cgsize_t)); getInteriorConnectivityCGNS(o, i, e); /* create data node for elements */ - // will start testing with single topology, all hex so allow hardcode for pass 1 - //nvert can case switch this or enumv like PETSc + e_startg=1+e_written; // start for the elements of this topology + e_endg=e_written + PCU_Add_Long(e_owned); // end for the elements of this topology switch(nvert){ case 4: - if (cgp_section_write(F, B, Z, "Tet", CG_TETRA_4, 1, o.numGlobalVolumeElements, 0, &E)) + if (cgp_section_write(F, B, Z, "Tet", CG_TETRA_4, e_startg, e_endg, 0, &E)) cgp_error_exit(); break; case 5: - if (cgp_section_write(F, B, Z, "Pyr", CG_PYRA_5, 1, o.numGlobalVolumeElements, 0, &E)) + if (cgp_section_write(F, B, Z, "Pyr", CG_PYRA_5, e_startg, e_endg, 0, &E)) cgp_error_exit(); break; case 6: - if (cgp_section_write(F, B, Z, "Wdg", CG_PENTA_6, 1, o.numGlobalVolumeElements, 0, &E)) + if (cgp_section_write(F, B, Z, "Wdg", CG_PENTA_6, e_startg, e_endg, 0, &E)) cgp_error_exit(); break; case 8: - if (cgp_section_write(F, B, Z, "Hex", CG_HEXA_8, 1, o.numGlobalVolumeElements, 0, &E)) +// if (cgp_section_write(F, B, Z, "Hex", CG_HEXA_8, 1, o.numGlobalVolumeElements, 0, &E)) + if (cgp_section_write(F, B, Z, "Hex", CG_HEXA_8, e_startg, e_endg, 0, &E)) cgp_error_exit(); break; } e_start=0; -// if(num_parts !=1) MPI_Exscan(&e_owned, &e_start, 1, MPI_UINT64_T, MPI_SUM, MPI_COMM_WORLD); - - e_end=e_start+e_owned; + e_start+=1+e_written; // my ranks global element start 1-based + e_end=e_start+e_owned-1; // my ranks global element stop 1-based /* write the element connectivity in parallel */ - if (cgp_elements_write_data(F, B, Z, E, e_start+1, e_end, e)) + if (cgp_elements_write_data(F, B, Z, E, e_start, e_end, e)) cgp_error_exit(); + e_written=e_endg; // update count of elements written if(0==1){ printf("%ld, %ld \n", e_start+1, e_end); for (int ne=0; ne codes; getNaturalBCCodesCGNS(o, i, codes); free(e); -// ph_write_ints(f, phrase.c_str(), &codes[0], codes.getSize(), 8, params); } } diff --git a/pumi-meshes b/pumi-meshes index c00ba9c16..0cd77590d 160000 --- a/pumi-meshes +++ b/pumi-meshes @@ -1 +1 @@ -Subproject commit c00ba9c16cacbb361ee538c03a3ec694ddb989f2 +Subproject commit 0cd77590d748b9cb5e190ecd4a33126d9823bdbb From d5022925c4b0fbcdf487ebeddfa963091a489494 Mon Sep 17 00:00:00 2001 From: "Kenneth E. Jansen" Date: Mon, 7 Aug 2023 21:45:12 -0600 Subject: [PATCH 18/68] srfID extracted and currently being printed to the screen. Output looks reasonable. --- phasta/phCGNSgbc.cc | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/phasta/phCGNSgbc.cc b/phasta/phCGNSgbc.cc index 3749932f7..16a4a480a 100644 --- a/phasta/phCGNSgbc.cc +++ b/phasta/phCGNSgbc.cc @@ -253,15 +253,14 @@ void getInterfaceConnectivityCGNS // not extended yet other than transpose } // renamed but not updated yet -void getNaturalBCCodesCGNS(Output& o, int block, apf::DynamicArray& codes) +void getNaturalBCCodesCGNS(Output& o, int block, int* codes) { int nelem = o.blocks.boundary.nElements[block]; - codes.setSize(nelem * 2); size_t i = 0; - for (int j = 0; j < 2; ++j) - for (int elem = 0; elem < nelem; ++elem) - codes[i++] = o.arrays.ibcb[block][elem][j]; - PCU_ALWAYS_ASSERT(i == codes.getSize()); + for (int elem = 0; elem < nelem; ++elem) + codes[i++] = o.arrays.ibcb[block][elem][1]; //srfID is the second number so 1 +// if we wanted we could use PHASTA's bit in coding in the first number to us attributes to set +// arbitrary combinations of BCs but leaving that out for now } // renamed and calling the renamed functions above with output writes commented as they are PHASTA file style @@ -289,6 +288,7 @@ void writeBlocksCGNS(int F,int B,int Z, Output& o) cgp_error_exit(); break; case 5: + free(e); if (cgp_section_write(F, B, Z, "Pyr", CG_PYRA_5, e_startg, e_endg, 0, &E)) cgp_error_exit(); break; @@ -321,7 +321,6 @@ if(0==1){ } for (int i = 0; i < o.blocks.boundary.getSize(); ++i) { BlockKey& k = o.blocks.boundary.keys[i]; - std::string phrase = getBlockKeyPhrase(k, "connectivity boundary "); params[0] = o.blocks.boundary.nElements[i]; e_owned = params[0]; int nvert = o.blocks.boundary.keys[i].nBoundaryFaceEdges; @@ -346,11 +345,15 @@ if(0==1){ /* write the element connectivity in parallel */ if (cgp_elements_write_data(F, B, Z, E, e_start, e_end, e)) cgp_error_exit(); -// this is probably the easiest path to getting the list that tells us the face (through surfID of smd) that each boundary element face is on - phrase = getBlockKeyPhrase(k, "nbc codes "); - apf::DynamicArray codes; - getNaturalBCCodesCGNS(o, i, codes); free(e); + int* srfID = (int *)malloc(nvert * e_owned * sizeof(int)); + getNaturalBCCodesCGNS(o, i, srfID); + printf("%ld, %ld \n", e_start+1, e_end); + for (int ne=0; ne Date: Tue, 8 Aug 2023 09:20:04 -0600 Subject: [PATCH 19/68] fix: String constant must be char[], not char* - In C those are identical to each other. So I'm not sure what C++ is doing there --- phasta/phCGNSgbc.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/phasta/phCGNSgbc.cc b/phasta/phCGNSgbc.cc index 16a4a480a..f3e2048f3 100644 --- a/phasta/phCGNSgbc.cc +++ b/phasta/phCGNSgbc.cc @@ -376,7 +376,7 @@ void writeCGNS(Output& o, std::string path) // cgp_open('chefOut.cgns', CG_MODE_WRITE, &F); //static std::string buildCGNSFileName(std::string timestep_or_dat) // path += buildCGNSFileName(timestep_or_dat); - static char *outfile = "chefOut.cgns"; + static char outfile[] = "chefOut.cgns"; int F, B, Z, E, S, Fs, A, Cx, Cy, Cz; cgsize_t sizes[3],*e, start, end, ncells; // ^^^^^^ need to be sure this is long since using PCU_Add_Long below even when not needed From c49e7a5e502f1b081489b0769d320c1690066a85 Mon Sep 17 00:00:00 2001 From: James Wright Date: Tue, 8 Aug 2023 09:20:57 -0600 Subject: [PATCH 20/68] style: Misc formatting, trailing spaces --- phasta/phCGNSgbc.cc | 51 ++++++++++++++++++++++----------------------- 1 file changed, 25 insertions(+), 26 deletions(-) diff --git a/phasta/phCGNSgbc.cc b/phasta/phCGNSgbc.cc index f3e2048f3..3a2053ece 100644 --- a/phasta/phCGNSgbc.cc +++ b/phasta/phCGNSgbc.cc @@ -83,7 +83,7 @@ void gen_ncorp(Output& o ) // also get the global number of nodes o.numGlobalNodes=0; - for(i=0;i 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "cg_open(\"%s\",...) did not return a valid file ID", filename); - + // copied gen_ncorp from PHASTA to help map on-rank numbering to CGNS/PETSC friendly global numbering gen_ncorp( o ); // o carries @@ -395,7 +394,7 @@ void writeCGNS(Output& o, std::string path) ncells=m->count(m->getDimension()); ncells=PCU_Add_Long(ncells); o.numGlobalVolumeElements = ncells; - + sizes[0]=o.numGlobalNodes; sizes[1]=ncells; sizes[2]=0; @@ -413,10 +412,10 @@ void writeCGNS(Output& o, std::string path) cgp_error_exit(); -// condense out vertices owned by another rank in a new array, x, whose slices are ready for CGNS. Seeing now PETSc CGNS writer did one coordinate at a time which is probably better....feel free to rewrite. +// condense out vertices owned by another rank in a new array, x, whose slices are ready for CGNS. Seeing now PETSc CGNS writer did one coordinate at a time which is probably better....feel free to rewrite. int num_nodes=m->count(0); //V2 - cgsize_t gnod; + cgsize_t gnod; start=o.local_start_id; end=start+o.iownnodes-1; double* x = new double[o.iownnodes]; @@ -424,7 +423,7 @@ void writeCGNS(Output& o, std::string path) int icount=0; for (int inode = 0; inode < num_nodes; ++inode){ gnod=o.arrays.ncorp[inode]; - if(gnod >= start && gnod <= end) { // coordinate to write + if(gnod >= start && gnod <= end) { // coordinate to write x[icount]= o.arrays.coordinates[j*num_nodes+inode]; icount++; } @@ -441,12 +440,12 @@ if(0==1) { //V1 that KEJ wrote mothballed for V2 that mimics PETSc /* int icount=0; - cgsize_t gnod; + cgsize_t gnod; double* x = new double[o.iownnodes * 3]; for (int inode = 0; inode < num_nodes; ++inode){ gnod=o.arrays.ncorp[inode]; - if(gnod >= o.local_start_id && gnod <= o.local_start_id + o.iownnodes -1) { // coordinate to write - for (int j = 0; j < 3; ++j) + if(gnod >= o.local_start_id && gnod <= o.local_start_id + o.iownnodes -1) { // coordinate to write + for (int j = 0; j < 3; ++j) x[j*o.iownnodes+icount]= o.arrays.coordinates[j*num_nodes+inode]; icount++; } @@ -454,7 +453,7 @@ if(0==1) { */ writeBlocksCGNS(F,B,Z, o); - if(cgp_close(F)) cgp_error_exit(); + if(cgp_close(F)) cgp_error_exit(); // if (!PCU_Comm_Self()) // lion_oprint(1,"CGNS file written in %f seconds\n", t1 - t0); } From e54436f1ae7a18b73966eed43869d5e7c6ec5524 Mon Sep 17 00:00:00 2001 From: "Kenneth E. Jansen" Date: Wed, 9 Aug 2023 21:47:50 -0600 Subject: [PATCH 21/68] messy debug code that I save to potentially help debug later but next commit will clean up --- phasta/phCGNSgbc.cc | 149 ++++++++++++++++++++++++++++++++++++++++++-- phasta/phCook.cc | 3 +- phasta/phOutput.h | 1 + 3 files changed, 148 insertions(+), 5 deletions(-) diff --git a/phasta/phCGNSgbc.cc b/phasta/phCGNSgbc.cc index 3a2053ece..a4b2462dc 100644 --- a/phasta/phCGNSgbc.cc +++ b/phasta/phCGNSgbc.cc @@ -1,4 +1,5 @@ #include +#include "phInput.h" #include "phOutput.h" #include "phIO.h" #include "phiotimer.h" @@ -123,6 +124,97 @@ void gen_ncorp(Output& o ) } //char code[] = "out"; //int ione = 1; + int rank = PCU_Comm_Self() + 0; + for (int ipart=0; ipart 1) { +// translating a commuInt out from PHASTA to c + int numtask=o.arrays.ilwork[0]; + int itkbeg = 0; // 0-based arrays + int itag, iacc, iother, numseg, isgbeg; + MPI_Datatype sevsegtype[numtask]; +//first do what ctypes does for setup +//other stuff long int? + int maxseg=30; // set to 30,0000 for real problems + int isbegin[maxseg]; + int lenseg[maxseg]; + int ioffset[maxseg]; + MPI_Request req[numtask]; + MPI_Status stat[numtask]; + int maxfront=0; + int lfront; + for (int itask=0; itask 2) { for (int i = 0; i < o.blocks.boundary.getSize(); ++i) { BlockKey& k = o.blocks.boundary.keys[i]; params[0] = o.blocks.boundary.nElements[i]; @@ -346,16 +447,23 @@ if(0==1){ /* write the element connectivity in parallel */ if (cgp_elements_write_data(F, B, Z, E, e_start, e_end, e)) cgp_error_exit(); - free(e); + printf("boundary cnn %d, %ld, %ld \n", rank, e_start, e_end); + for (int ne=0; ne 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "cg_open(\"%s\",...) did not return a valid file ID", filename); // copied gen_ncorp from PHASTA to help map on-rank numbering to CGNS/PETSC friendly global numbering +if(0==1) { + int igo=0; + double work=9.0e33; + while (igo==0) { + work=work*0.9999999999; + if(work<=1) igo=1; + } +} + int num_nodes=m->count(0); +// debug prints:w +// for (int ipart=0; ipart PETSc global node number (1-based) @@ -413,7 +555,6 @@ void writeCGNS(Output& o, std::string path) // condense out vertices owned by another rank in a new array, x, whose slices are ready for CGNS. Seeing now PETSc CGNS writer did one coordinate at a time which is probably better....feel free to rewrite. - int num_nodes=m->count(0); //V2 cgsize_t gnod; start=o.local_start_id; @@ -451,7 +592,7 @@ if(0==1) { } } */ - + if(o.writeCGNSFiles > 1) writeBlocksCGNS(F,B,Z, o); if(cgp_close(F)) cgp_error_exit(); // if (!PCU_Comm_Self()) diff --git a/phasta/phCook.cc b/phasta/phCook.cc index 5b67b8405..0c8b5ed6b 100644 --- a/phasta/phCook.cc +++ b/phasta/phCook.cc @@ -224,7 +224,8 @@ namespace ph { out.openfile_write = fn; } ph::writeGeomBC(out, subDirPath); //write geombc - if ( in.writeCGNSFiles ) + out.writeCGNSFiles=in.writeCGNSFiles; + if ( in.writeCGNSFiles > 0 ) ph::writeCGNS(out, subDirPath); //write CGNS if(!PCU_Comm_Self()) ph::writeAuxiliaryFiles(path, in.timeStepNumber); diff --git a/phasta/phOutput.h b/phasta/phOutput.h index 1507784cc..d31d7182f 100644 --- a/phasta/phOutput.h +++ b/phasta/phOutput.h @@ -170,6 +170,7 @@ struct Output cgsize_t numGlobalNodes; cgsize_t numGlobalVolumeElements; #endif + int writeCGNSFiles; int iownnodes; /* how many node this rank owns */ int nlwork; /* size of arrays.ilwork */ int nlworkf; /* size of arrays.ilworkf */ From 5457288853b843584463504c4e864aa06e13ce04 Mon Sep 17 00:00:00 2001 From: "Kenneth E. Jansen" Date: Wed, 9 Aug 2023 22:47:02 -0600 Subject: [PATCH 22/68] cleaned up but failing AllHex 128 element case. --- phasta/phCGNSgbc.cc | 163 +++++--------------------------------------- 1 file changed, 17 insertions(+), 146 deletions(-) diff --git a/phasta/phCGNSgbc.cc b/phasta/phCGNSgbc.cc index a4b2462dc..534e4f1c5 100644 --- a/phasta/phCGNSgbc.cc +++ b/phasta/phCGNSgbc.cc @@ -1,5 +1,4 @@ #include -#include "phInput.h" #include "phOutput.h" #include "phIO.h" #include "phiotimer.h" @@ -124,46 +123,23 @@ void gen_ncorp(Output& o ) } //char code[] = "out"; //int ione = 1; - int rank = PCU_Comm_Self() + 0; - for (int ipart=0; ipart 1) { // translating a commuInt out from PHASTA to c int numtask=o.arrays.ilwork[0]; - int itkbeg = 0; // 0-based arrays - int itag, iacc, iother, numseg, isgbeg; + int itkbeg=0; + int maxseg=1; + int numseg; + for (int itask=0; itask 2) { @@ -447,18 +396,9 @@ if(0==1){ /* write the element connectivity in parallel */ if (cgp_elements_write_data(F, B, Z, E, e_start, e_end, e)) cgp_error_exit(); - printf("boundary cnn %d, %ld, %ld \n", rank, e_start, e_end); - for (int ne=0; ne 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "cg_open(\"%s\",...) did not return a valid file ID", filename); - -// copied gen_ncorp from PHASTA to help map on-rank numbering to CGNS/PETSC friendly global numbering -if(0==1) { - int igo=0; - double work=9.0e33; - while (igo==0) { - work=work*0.9999999999; - if(work<=1) igo=1; - } -} int num_nodes=m->count(0); -// debug prints:w -// for (int ipart=0; ipart PETSc global node number (1-based) @@ -553,9 +447,7 @@ if(0==1) { cgp_coord_write(F, B, Z, CG_RealDouble, "CoordinateZ", &Cz)) cgp_error_exit(); - // condense out vertices owned by another rank in a new array, x, whose slices are ready for CGNS. Seeing now PETSc CGNS writer did one coordinate at a time which is probably better....feel free to rewrite. -//V2 cgsize_t gnod; start=o.local_start_id; end=start+o.iownnodes-1; @@ -569,33 +461,12 @@ if(0==1) { icount++; } } -if(0==1) { - printf("%ld, %ld \n", start, end); - for (int ne=0; ne= o.local_start_id && gnod <= o.local_start_id + o.iownnodes -1) { // coordinate to write - for (int j = 0; j < 3; ++j) - x[j*o.iownnodes+icount]= o.arrays.coordinates[j*num_nodes+inode]; - icount++; - } - } -*/ if(o.writeCGNSFiles > 1) writeBlocksCGNS(F,B,Z, o); if(cgp_close(F)) cgp_error_exit(); -// if (!PCU_Comm_Self()) -// lion_oprint(1,"CGNS file written in %f seconds\n", t1 - t0); -} } +} // namespace From d724b5477b3f93ef4e60d0e4f2c4b73fb0bc7a04 Mon Sep 17 00:00:00 2001 From: "Kenneth E. Jansen" Date: Wed, 9 Aug 2023 23:01:01 -0600 Subject: [PATCH 23/68] AllHex working for 2 and 4 processes --- phasta/phCGNSgbc.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/phasta/phCGNSgbc.cc b/phasta/phCGNSgbc.cc index 534e4f1c5..e6f65a2ee 100644 --- a/phasta/phCGNSgbc.cc +++ b/phasta/phCGNSgbc.cc @@ -147,6 +147,7 @@ void gen_ncorp(Output& o ) MPI_Status stat[numtask]; int maxfront=0; int lfront; + itkbeg=0; for (int itask=0; itask Date: Wed, 9 Aug 2023 23:10:02 -0600 Subject: [PATCH 24/68] spurious paste of a free command from who knows where --- phasta/phCGNSgbc.cc | 1 - 1 file changed, 1 deletion(-) diff --git a/phasta/phCGNSgbc.cc b/phasta/phCGNSgbc.cc index e6f65a2ee..c5ee015b0 100644 --- a/phasta/phCGNSgbc.cc +++ b/phasta/phCGNSgbc.cc @@ -347,7 +347,6 @@ void writeBlocksCGNS(int F,int B,int Z, Output& o) cgp_error_exit(); break; case 5: - free(e); if (cgp_section_write(F, B, Z, "Pyr", CG_PYRA_5, e_startg, e_endg, 0, &E)) cgp_error_exit(); break; From 08cf07666fd3f03d6ccbac3999ab51957673723c Mon Sep 17 00:00:00 2001 From: "Kenneth E. Jansen" Date: Thu, 10 Aug 2023 09:07:09 -0600 Subject: [PATCH 25/68] now writing the rank of writer as a cell centered value as a test field --- phasta/phCGNSgbc.cc | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/phasta/phCGNSgbc.cc b/phasta/phCGNSgbc.cc index c5ee015b0..1a7d4975d 100644 --- a/phasta/phCGNSgbc.cc +++ b/phasta/phCGNSgbc.cc @@ -326,10 +326,11 @@ void getNaturalBCCodesCGNS(Output& o, int block, int* codes) void writeBlocksCGNS(int F,int B,int Z, Output& o) { int params[MAX_PARAMS]; - int E; + int E,S,Fs; cgsize_t e_owned, e_start,e_end; cgsize_t e_startg,e_endg; cgsize_t e_written=0; + int rank = PCU_Comm_Self() ; for (int i = 0; i < o.blocks.interior.getSize(); ++i) { BlockKey& k = o.blocks.interior.keys[i]; std::string phrase = getBlockKeyPhrase(k, "connectivity interior "); @@ -366,8 +367,23 @@ void writeBlocksCGNS(int F,int B,int Z, Output& o) /* write the element connectivity in parallel */ if (cgp_elements_write_data(F, B, Z, E, e_start, e_end, e)) cgp_error_exit(); + /* create a centered solution */ + if (cg_sol_write(F, B, Z, "RankCellOwner", CG_CellCenter, &S) || + cgp_field_write(F, B, Z, S, CG_Integer, "RankOfWriter", &Fs)) + cgp_error_exit(); + /* create the field data for this process */ + int* d = (int *)malloc(e_owned * sizeof(int)); + for (int n = 0; n < e_owned; n++) + d[n] = rank; + /* write the solution field data in parallel */ +// from example if (cgp_field_write_data(F, B, Z, S, Fs, &start, &end, d)) + if (cgp_field_write_data(F, B, Z, S, Fs, &e_start, &e_end, d)) + cgp_error_exit(); + + e_written=e_endg; // update count of elements written free(e); + free(d); } if(o.writeCGNSFiles > 2) { for (int i = 0; i < o.blocks.boundary.getSize(); ++i) { @@ -399,6 +415,7 @@ void writeBlocksCGNS(int F,int B,int Z, Output& o) free(e); int* srfID = (int *)malloc(nvert * e_owned * sizeof(int)); getNaturalBCCodesCGNS(o, i, srfID); + // I am not sure if you want to put the code here to generate the face BC "node" but srfID has // a number from 1 to 6 for the same numbered surfaces as we use in the box From ed65be9ea4d6764219493d6fa5512d4dd8b69afe Mon Sep 17 00:00:00 2001 From: James Wright Date: Thu, 10 Aug 2023 08:48:05 -0600 Subject: [PATCH 26/68] chef: Write ZoneBC/GridLocations for serial box meshes --- phasta/phCGNSgbc.cc | 39 ++++++++++++++++++++++++++++++++++++--- 1 file changed, 36 insertions(+), 3 deletions(-) diff --git a/phasta/phCGNSgbc.cc b/phasta/phCGNSgbc.cc index 1a7d4975d..e16889cd9 100644 --- a/phasta/phCGNSgbc.cc +++ b/phasta/phCGNSgbc.cc @@ -2,6 +2,7 @@ #include "phOutput.h" #include "phIO.h" #include "phiotimer.h" +#include #include #include #include @@ -415,10 +416,42 @@ void writeBlocksCGNS(int F,int B,int Z, Output& o) free(e); int* srfID = (int *)malloc(nvert * e_owned * sizeof(int)); getNaturalBCCodesCGNS(o, i, srfID); + printf("%ld, %ld \n", e_start+1, e_end); + + int num_ranks; + MPI_Comm_size(MPI_COMM_WORLD, &num_ranks); + if (num_ranks > 1) { + printf("Boundary conditions cannot be written in parallel right now\n"); + } else { + // waaay too large, but works as proof of concept + cgsize_t (*bc_elems)[e_owned] = (cgsize_t (*)[e_owned])calloc(6 * e_owned, sizeof(cgsize_t)); + cgsize_t bc_elems_count[6] = {0}; + for (int ne=0; ne Date: Thu, 10 Aug 2023 19:10:43 -0600 Subject: [PATCH 27/68] partition of writer helper arrays added to userData. Also added srfID same shape and numbered as boundary elements as a back door to parallel BC data. Also MPI data types are pulled from a function to inherit cgsize compile time flag. --- apf/apfCGNS.cc | 4 +- mds/mdsCGNS.cc | 2 +- phasta/phCGNSgbc.cc | 107 ++++++++++++++++++++++++++++++++++++-------- phasta/phOutput.cc | 6 --- phasta/phOutput.h | 2 +- 5 files changed, 94 insertions(+), 27 deletions(-) diff --git a/apf/apfCGNS.cc b/apf/apfCGNS.cc index 93a85d12d..da3b2b419 100644 --- a/apf/apfCGNS.cc +++ b/apf/apfCGNS.cc @@ -1023,7 +1023,9 @@ void Write2DEdges(CGNS cgns, apf::Mesh *m, const Count &edgeCount, const Count & // Todo split this out into a list of calls to local functions to show process/work flow void WriteCGNS(const char *prefix, apf::Mesh *m, const apf::CGNSBCMap &cgnsBCMap) { -// static_assert(std::is_same::value, "cgsize_t not compiled as int"); + + PCU_ALWAYS_ASSERT_VERBOSE(sizeof(cgsize_t) == sizeof(int), "cgsize_t is not size of int"); + const auto myRank = PCU_Comm_Self(); const Count vertexCount = count(m, 0); diff --git a/mds/mdsCGNS.cc b/mds/mdsCGNS.cc index 2a591fd07..cf0230f3d 100644 --- a/mds/mdsCGNS.cc +++ b/mds/mdsCGNS.cc @@ -1051,7 +1051,7 @@ void ReadBCInfo(const int cgid, const int base, const int zone, const int nBocos apf::Mesh2 *DoIt(gmi_model *g, const std::string &fname, apf::CGNSBCMap &cgnsBCMap, const std::vector> &readMeshData) { -// static_assert(std::is_same::value, "cgsize_t not compiled as int"); + PCU_ALWAYS_ASSERT_VERBOSE(sizeof(cgsize_t) == sizeof(int), "cgsize_t is not size of int"); int cgid = -1; auto comm = PCU_Get_Comm(); diff --git a/phasta/phCGNSgbc.cc b/phasta/phCGNSgbc.cc index 1a7d4975d..5468b4439 100644 --- a/phasta/phCGNSgbc.cc +++ b/phasta/phCGNSgbc.cc @@ -17,8 +17,29 @@ typedef int lcorp_t; #define NCORP_MPI_T MPI_INTEGER -namespace ph { +namespace { + +template +MPI_Datatype getMpiType(T) { + MPI_Datatype mpitype; + //determine the type based on what is being sent + if( std::is_same::value ) { + mpitype = MPI_DOUBLE; + } else if ( std::is_same::value ) { + mpitype = MPI_INT64_T; + } else if ( std::is_same::value ) { + mpitype = MPI_INT32_T; + } else { + assert(false); + fprintf(stderr, "Unknown type in %s... exiting\n", __func__); + exit(EXIT_FAILURE); + } + return mpitype; +} + +} +namespace ph { static lcorp_t count_owned(int* ilwork, int nlwork,cgsize_t* ncorp_tmp, int num_nodes); static lcorp_t count_local(int* ilwork, int nlwork,cgsize_t* ncorp_tmp, int num_nodes); @@ -160,7 +181,8 @@ void gen_ncorp(Output& o ) } maxfront=std::max(maxfront,lfront); for ( int iseg=0; iseg 2) { + cgsize_t eVolElm=e_written; for (int i = 0; i < o.blocks.boundary.getSize(); ++i) { BlockKey& k = o.blocks.boundary.keys[i]; params[0] = o.blocks.boundary.nElements[i]; @@ -394,7 +433,8 @@ void writeBlocksCGNS(int F,int B,int Z, Output& o) cgsize_t* e = (cgsize_t *)malloc(nvert * e_owned * sizeof(cgsize_t)); getBoundaryConnectivityCGNS(o, i, e); e_startg=1+e_written; // start for the elements of this topology - e_endg=e_written + PCU_Add_Long(e_owned); // end for the elements of this topology + cgsize_t numBelTP = PCU_Add_Long(e_owned); // number of elements of this topology + e_endg=e_written + numBelTP; // end for the elements of this topology switch(nvert){ case 3: if (cgp_section_write(F, B, Z, "Tri", CG_TETRA_4, e_startg, e_endg, 0, &E)) @@ -406,32 +446,50 @@ void writeBlocksCGNS(int F,int B,int Z, Output& o) break; } e_start=0; - MPI_Exscan(&e_owned, &e_start, 1, MPI_UINT64_T, MPI_SUM, MPI_COMM_WORLD); + auto type = getMpiType( cgsize_t() ); + MPI_Exscan(&e_owned, &e_start, 1, type , MPI_SUM, MPI_COMM_WORLD); e_start+=1+e_written; // my ranks global element start 1-based e_end=e_start+e_owned-1; // my ranks global element stop 1-based /* write the element connectivity in parallel */ if (cgp_elements_write_data(F, B, Z, E, e_start, e_end, e)) cgp_error_exit(); free(e); - int* srfID = (int *)malloc(nvert * e_owned * sizeof(int)); + int* srfID = (int *)malloc( e_owned * sizeof(int)); + int* nBelVec = (int *)malloc( 1 * sizeof(int)); getNaturalBCCodesCGNS(o, i, srfID); - -// I am not sure if you want to put the code here to generate the face BC "node" but srfID has -// a number from 1 to 6 for the same numbered surfaces as we use in the box - + printf("%ld ", numBelTP); + /* create a centered solution on boundary faces ONLY for srfID */ + if ( cg_goto(F, B, "Zone_t", 1, NULL) || +//done above cg_user_data_write("User Data") || + cg_gorel(F, "User Data", 0, NULL) || + cgp_array_write("srfID", CG_Integer, 1,&numBelTP, &Fsb) || + cgp_array_write("nBelOnRank", CG_Integer, 1, &num_parts, &Fsb2)) + cgp_error_exit(); + /* create the field data for this process */ + e_start-=eVolElm; + e_end-=eVolElm; + nBelVec[0]=e_owned; + printf("Bndy %ld, %ld %d, %d, %d, %d \n", e_start, e_end, nBelVec[0],rank,Fsb,Fsb2); +// for (int ibel=0; ibelcount(0); @@ -482,6 +540,19 @@ void writeCGNS(Output& o, std::string path) if(j==1) if(cgp_coord_write_data(F, B, Z, Cy, &start, &end, x)) cgp_error_exit(); if(j==2) if(cgp_coord_write_data(F, B, Z, Cz, &start, &end, x)) cgp_error_exit(); } + /* create Helper array for number of elements on rank */ + if ( cg_goto(F, B, "Zone_t", 1, NULL) || + cg_user_data_write("User Data") || + cg_gorel(F, "User Data", 0, NULL) || + cgp_array_write("nCoordsOnRank", CG_Integer, 1, &num_parts, &Fs2)) + cgp_error_exit(); + /* create the field data for this process */ + int* nCoordVec = (int *)malloc( 1 * sizeof(int)); + nCoordVec[0]=o.iownnodes; + rank+=1; + printf("Coor %d, %d, %d, \n", nCoordVec[0],rank,Fs2); + if ( cgp_array_write_data(Fs2, &rank, &rank, nCoordVec)) + cgp_error_exit(); if(o.writeCGNSFiles > 1) writeBlocksCGNS(F,B,Z, o); if(cgp_close(F)) cgp_error_exit(); diff --git a/phasta/phOutput.cc b/phasta/phOutput.cc index 648a928e4..d4b71028b 100644 --- a/phasta/phOutput.cc +++ b/phasta/phOutput.cc @@ -21,12 +21,6 @@ #include #include #include -#ifdef HAVE_CGNS -// -#include -#include -// -#endif namespace ph { diff --git a/phasta/phOutput.h b/phasta/phOutput.h index d31d7182f..ad417505a 100644 --- a/phasta/phOutput.h +++ b/phasta/phOutput.h @@ -169,9 +169,9 @@ struct Output cgsize_t local_start_id; /* this rank's first global node number (1 based) */ cgsize_t numGlobalNodes; cgsize_t numGlobalVolumeElements; + int iownnodes; /* how many node this rank owns */ #endif int writeCGNSFiles; - int iownnodes; /* how many node this rank owns */ int nlwork; /* size of arrays.ilwork */ int nlworkf; /* size of arrays.ilworkf */ int nlworkl; /* size of arrays.ilworkl */ From 51ee9ea9d403cc4a7d565d7824aa1bba8e1bcc5f Mon Sep 17 00:00:00 2001 From: James Wright Date: Thu, 10 Aug 2023 21:51:00 -0600 Subject: [PATCH 28/68] fix: Correct boco index offset - Previously forgot to include the offset due to volume element indexing --- phasta/phCGNSgbc.cc | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/phasta/phCGNSgbc.cc b/phasta/phCGNSgbc.cc index 222f3656c..2051addc9 100644 --- a/phasta/phCGNSgbc.cc +++ b/phasta/phCGNSgbc.cc @@ -475,7 +475,6 @@ void writeBlocksCGNS(int F,int B,int Z, Output& o) if (cgp_array_write_data(Fsb, &e_start, &e_end, srfID) || cgp_array_write_data(Fsb2, &rank, &rank, nBelVec)) cgp_error_exit(); - } printf("%ld, %ld \n", e_start+1, e_end); int num_ranks; @@ -486,9 +485,9 @@ void writeBlocksCGNS(int F,int B,int Z, Output& o) // waaay too large, but works as proof of concept cgsize_t (*bc_elems)[e_owned] = (cgsize_t (*)[e_owned])calloc(6 * e_owned, sizeof(cgsize_t)); cgsize_t bc_elems_count[6] = {0}; - for (int ne=0; ne Date: Thu, 10 Aug 2023 21:52:08 -0600 Subject: [PATCH 29/68] style: Fix indent of for loop, remove commented code --- phasta/phCGNSgbc.cc | 161 +++++++++++++++++++++----------------------- 1 file changed, 77 insertions(+), 84 deletions(-) diff --git a/phasta/phCGNSgbc.cc b/phasta/phCGNSgbc.cc index 2051addc9..5d6d699b6 100644 --- a/phasta/phCGNSgbc.cc +++ b/phasta/phCGNSgbc.cc @@ -425,94 +425,87 @@ void writeBlocksCGNS(int F,int B,int Z, Output& o) cgp_error_exit(); } if(o.writeCGNSFiles > 2) { - cgsize_t eVolElm=e_written; - for (int i = 0; i < o.blocks.boundary.getSize(); ++i) { - BlockKey& k = o.blocks.boundary.keys[i]; - params[0] = o.blocks.boundary.nElements[i]; - e_owned = params[0]; - int nvert = o.blocks.boundary.keys[i].nBoundaryFaceEdges; - cgsize_t* e = (cgsize_t *)malloc(nvert * e_owned * sizeof(cgsize_t)); - getBoundaryConnectivityCGNS(o, i, e); - e_startg=1+e_written; // start for the elements of this topology - cgsize_t numBelTP = PCU_Add_Long(e_owned); // number of elements of this topology - e_endg=e_written + numBelTP; // end for the elements of this topology - switch(nvert){ - case 3: - if (cgp_section_write(F, B, Z, "Tri", CG_TETRA_4, e_startg, e_endg, 0, &E)) - cgp_error_exit(); - break; - case 4: - if (cgp_section_write(F, B, Z, "Quad", CG_QUAD_4, e_startg, e_endg, 0, &E)) - cgp_error_exit(); - break; - } - e_start=0; - auto type = getMpiType( cgsize_t() ); - MPI_Exscan(&e_owned, &e_start, 1, type , MPI_SUM, MPI_COMM_WORLD); - e_start+=1+e_written; // my ranks global element start 1-based - e_end=e_start+e_owned-1; // my ranks global element stop 1-based - /* write the element connectivity in parallel */ - if (cgp_elements_write_data(F, B, Z, E, e_start, e_end, e)) - cgp_error_exit(); - free(e); - int* srfID = (int *)malloc( e_owned * sizeof(int)); - int* nBelVec = (int *)malloc( 1 * sizeof(int)); - getNaturalBCCodesCGNS(o, i, srfID); - printf("%ld ", numBelTP); - /* create a centered solution on boundary faces ONLY for srfID */ - if ( cg_goto(F, B, "Zone_t", 1, NULL) || -//done above cg_user_data_write("User Data") || - cg_gorel(F, "User Data", 0, NULL) || - cgp_array_write("srfID", CG_Integer, 1,&numBelTP, &Fsb) || - cgp_array_write("nBelOnRank", CG_Integer, 1, &num_parts, &Fsb2)) - cgp_error_exit(); - /* create the field data for this process */ - e_start-=eVolElm; - e_end-=eVolElm; - nBelVec[0]=e_owned; - printf("Bndy %ld, %ld %d, %d, %d, %d \n", e_start, e_end, nBelVec[0],rank,Fsb,Fsb2); -// for (int ibel=0; ibel 1) { - printf("Boundary conditions cannot be written in parallel right now\n"); - } else { - // waaay too large, but works as proof of concept - cgsize_t (*bc_elems)[e_owned] = (cgsize_t (*)[e_owned])calloc(6 * e_owned, sizeof(cgsize_t)); - cgsize_t bc_elems_count[6] = {0}; - for (int elem_id=0; elem_id 1) { + printf("Boundary conditions cannot be written in parallel right now\n"); + } else { + // waaay too large, but works as proof of concept + cgsize_t (*bc_elems)[e_owned] = (cgsize_t (*)[e_owned])calloc(6 * e_owned, sizeof(cgsize_t)); + cgsize_t bc_elems_count[6] = {0}; + for (int elem_id=0; elem_id Date: Fri, 11 Aug 2023 07:59:17 -0600 Subject: [PATCH 30/68] small cleanup --- phasta/phCGNSgbc.cc | 6 +----- pumi-meshes | 2 +- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/phasta/phCGNSgbc.cc b/phasta/phCGNSgbc.cc index 5d6d699b6..15e87c47e 100644 --- a/phasta/phCGNSgbc.cc +++ b/phasta/phCGNSgbc.cc @@ -412,7 +412,6 @@ void writeBlocksCGNS(int F,int B,int Z, Output& o) /* create Helper array for number of elements on rank */ if ( cg_goto(F, B, "Zone_t", 1, NULL) || -//done for coords cg_user_data_write("User Data") || cg_gorel(F, "User Data", 0, NULL) || cgp_array_write("nIelOnRank", CG_Integer, 1, &num_parts, &Fs2)) cgp_error_exit(); @@ -461,7 +460,6 @@ void writeBlocksCGNS(int F,int B,int Z, Output& o) printf("%ld ", numBelTP); /* create a centered solution on boundary faces ONLY for srfID */ if ( cg_goto(F, B, "Zone_t", 1, NULL) || - //done above cg_user_data_write("User Data") || cg_gorel(F, "User Data", 0, NULL) || cgp_array_write("srfID", CG_Integer, 1,&numBelTP, &Fsb) || cgp_array_write("nBelOnRank", CG_Integer, 1, &num_parts, &Fsb2)) @@ -477,9 +475,7 @@ void writeBlocksCGNS(int F,int B,int Z, Output& o) cgp_error_exit(); printf("%ld, %ld \n", e_start+1, e_end); - int num_ranks; - MPI_Comm_size(MPI_COMM_WORLD, &num_ranks); - if (num_ranks > 1) { + if (num_parts > 1) { printf("Boundary conditions cannot be written in parallel right now\n"); } else { // waaay too large, but works as proof of concept diff --git a/pumi-meshes b/pumi-meshes index 0cd77590d..4d07746d7 160000 --- a/pumi-meshes +++ b/pumi-meshes @@ -1 +1 @@ -Subproject commit 0cd77590d748b9cb5e190ecd4a33126d9823bdbb +Subproject commit 4d07746d7e10bbc5a7da992ef2e0a18dd1be55be From fdefc1469558c78babeca4ac5abdc2eb38c784b6 Mon Sep 17 00:00:00 2001 From: "Kenneth E. Jansen" Date: Fri, 11 Aug 2023 08:54:54 -0600 Subject: [PATCH 31/68] srfID moved out of boundary element topology loop and dimensioned to the size of the total on-rank boundary elements both for writing and for post processing into ZonalBCs --- phasta/phCGNSgbc.cc | 88 +++++++++++++++++++++++---------------------- 1 file changed, 45 insertions(+), 43 deletions(-) diff --git a/phasta/phCGNSgbc.cc b/phasta/phCGNSgbc.cc index 15e87c47e..4bad94016 100644 --- a/phasta/phCGNSgbc.cc +++ b/phasta/phCGNSgbc.cc @@ -425,6 +425,11 @@ void writeBlocksCGNS(int F,int B,int Z, Output& o) } if(o.writeCGNSFiles > 2) { cgsize_t eVolElm=e_written; + cgsize_t eBelWritten=0; + cgsize_t totOnRankBel=0; + for (int i = 0; i < o.blocks.boundary.getSize(); ++i) + totOnRankBel += o.blocks.boundary.nElements[i]; + int* srfID = (int *)malloc( totOnRankBel * sizeof(int)); for (int i = 0; i < o.blocks.boundary.getSize(); ++i) { BlockKey& k = o.blocks.boundary.keys[i]; params[0] = o.blocks.boundary.nElements[i]; @@ -454,52 +459,49 @@ void writeBlocksCGNS(int F,int B,int Z, Output& o) if (cgp_elements_write_data(F, B, Z, E, e_start, e_end, e)) cgp_error_exit(); free(e); - int* srfID = (int *)malloc( e_owned * sizeof(int)); - int* nBelVec = (int *)malloc( 1 * sizeof(int)); - getNaturalBCCodesCGNS(o, i, srfID); - printf("%ld ", numBelTP); - /* create a centered solution on boundary faces ONLY for srfID */ - if ( cg_goto(F, B, "Zone_t", 1, NULL) || - cg_gorel(F, "User Data", 0, NULL) || - cgp_array_write("srfID", CG_Integer, 1,&numBelTP, &Fsb) || - cgp_array_write("nBelOnRank", CG_Integer, 1, &num_parts, &Fsb2)) - cgp_error_exit(); - /* create the field data for this process */ - e_start-=eVolElm; - e_end-=eVolElm; - nBelVec[0]=e_owned; - printf("Bndy %ld, %ld %d, %d, %d, %d \n", e_start, e_end, nBelVec[0],rank,Fsb,Fsb2); - // for (int ibel=0; ibel 1) { - printf("Boundary conditions cannot be written in parallel right now\n"); - } else { - // waaay too large, but works as proof of concept - cgsize_t (*bc_elems)[e_owned] = (cgsize_t (*)[e_owned])calloc(6 * e_owned, sizeof(cgsize_t)); - cgsize_t bc_elems_count[6] = {0}; - for (int elem_id=0; elem_id 1) { + printf("Boundary conditions cannot be written in parallel right now\n"); + } else { + // waaay too large, but works as proof of concept + cgsize_t (*bc_elems)[totOnRankBel] = (cgsize_t (*)[totOnRankBel])calloc(6 * totOnRankBel, sizeof(cgsize_t)); + cgsize_t bc_elems_count[6] = {0}; + for (int elem_id=0; elem_id Date: Fri, 11 Aug 2023 09:22:30 -0600 Subject: [PATCH 32/68] not so ugly printf statements restored to find issue with tri-face bels. --- phasta/phCGNSgbc.cc | 66 ++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 63 insertions(+), 3 deletions(-) diff --git a/phasta/phCGNSgbc.cc b/phasta/phCGNSgbc.cc index 4bad94016..20b41d4de 100644 --- a/phasta/phCGNSgbc.cc +++ b/phasta/phCGNSgbc.cc @@ -206,7 +206,16 @@ void gen_ncorp(Output& o ) } MPI_Waitall(m, req, stat); } - +if(1==1) { + for (int ipart=0; ipart 1) { printf("Boundary conditions cannot be written in parallel right now\n"); } else { @@ -521,7 +546,37 @@ void writeCGNS(Output& o, std::string path) cgsize_t sizes[3],*e, start, end, ncells; int num_nodes=m->count(0); - +// debug prints +if(0==1){ + for (int ipart=0; ipart Date: Fri, 11 Aug 2023 10:39:10 -0600 Subject: [PATCH 33/68] cut and paste error was the tri-face problem...fixed --- phasta/phCGNSgbc.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/phasta/phCGNSgbc.cc b/phasta/phCGNSgbc.cc index 20b41d4de..80078606b 100644 --- a/phasta/phCGNSgbc.cc +++ b/phasta/phCGNSgbc.cc @@ -458,7 +458,7 @@ if(1==1){ e_endg=e_written + numBelTP; // end for the elements of this topology switch(nvert){ case 3: - if (cgp_section_write(F, B, Z, "Tri", CG_TETRA_4, e_startg, e_endg, 0, &E)) + if (cgp_section_write(F, B, Z, "Tri", CG_TRI_3, e_startg, e_endg, 0, &E)) cgp_error_exit(); break; case 4: From 1fdd6ed112d2c7b7f6090c3fc70128676ee6bfef Mon Sep 17 00:00:00 2001 From: "Kenneth E. Jansen" Date: Fri, 11 Aug 2023 13:35:53 -0600 Subject: [PATCH 34/68] Wedges working. Tets now positive volume. All boundary element have inward normals from crossing first two edges of the face directed along numbering. All but pyramids have been checked. --- phasta/phCGNSgbc.cc | 49 ++++++++++++++++++++++++++++++++++++--------- 1 file changed, 39 insertions(+), 10 deletions(-) diff --git a/phasta/phCGNSgbc.cc b/phasta/phCGNSgbc.cc index 80078606b..83f1c5272 100644 --- a/phasta/phCGNSgbc.cc +++ b/phasta/phCGNSgbc.cc @@ -302,9 +302,18 @@ void getInteriorConnectivityCGNS(Output& o, int block, cgsize_t* c) int nelem = o.blocks.interior.nElements[block]; int nvert = o.blocks.interior.keys[block].nElementVertices; size_t i = 0; - for (int elem = 0; elem < nelem; ++elem) - for (int vert = 0; vert < nvert; ++vert) - c[i++] = o.arrays.ncorp[o.arrays.ien[block][elem][vert]]; // input is 0-based, out is 1-based do drop the +1 + if(nvert==4) { //prepped for PHASTA's negative volume tets so flip second and third vertex + for (int elem = 0; elem < nelem; ++elem){ + c[i++] = o.arrays.ncorp[o.arrays.ien[block][elem][0]]; + c[i++] = o.arrays.ncorp[o.arrays.ien[block][elem][2]]; + c[i++] = o.arrays.ncorp[o.arrays.ien[block][elem][1]]; + c[i++] = o.arrays.ncorp[o.arrays.ien[block][elem][3]]; + } + } else { + for (int elem = 0; elem < nelem; ++elem) + for (int vert = 0; vert < nvert; ++vert) + c[i++] = o.arrays.ncorp[o.arrays.ien[block][elem][vert]]; // input is 0-based, out is 1-based do drop the +1 + } PCU_ALWAYS_ASSERT(i == nelem*nvert); } @@ -313,11 +322,30 @@ void getBoundaryConnectivityCGNS(Output& o, int block, cgsize_t* c) { int nelem = o.blocks.boundary.nElements[block]; // CGNS wants surface elements int nvert = o.blocks.boundary.keys[block].nElementVertices; + int nvertVol = o.blocks.boundary.keys[block].nElementVertices; int nvert = o.blocks.boundary.keys[block].nBoundaryFaceEdges; size_t i = 0; - for (int elem = 0; elem < nelem; ++elem) - for (int vert = 0; vert < nvert; ++vert) - c[i++] = o.arrays.ncorp[o.arrays.ienb[block][elem][vert]]; +// int* lnode[4]; + std::vector lnode={0,1,2,3}; // Standard pattern of first 4 (or 3) + // PHASTA's use of volume elements has an lnode array that maps the surface nodes from the volume numbering. We need it here too + // see hierarchic.f but note that is fortran numbering + if(nvertVol==4) lnode={0, 2, 1, -1}; // tet is first three but opposite normal of others to go with neg volume +// if(nvertVol==5 && nvert==4) lnode={0, 1, 2, 3}; // pyramid quad is first 4 + if(nvertVol==5 && nvert==3) lnode={0, 4, 1, -1}; // pyramid tri is a fortran map of 1 5 2 + if(nvertVol==6 && nvert==4) lnode={0, 3, 4, 1}; // wedge quad is a fortran map of 1 4 5 2 +// if(nvertVol==6 && nvert==3) lnode={0, 1, 2, -1}; // wedge tri first three +// if(nvertVol==8) lnode={0, 1, 2, 3}; // hex first 4 +/* if(nvertVol==4) { //see interior above + for (int elem = 0; elem < nelem; ++elem){ + c[i++] = o.arrays.ncorp[o.arrays.ienb[block][elem][0]]; + c[i++] = o.arrays.ncorp[o.arrays.ienb[block][elem][2]]; + c[i++] = o.arrays.ncorp[o.arrays.ienb[block][elem][1]]; + } + } else { */ + for (int elem = 0; elem < nelem; ++elem) + for (int vert = 0; vert < nvert; ++vert) + c[i++] = o.arrays.ncorp[o.arrays.ienb[block][elem][lnode[vert]]]; +// } PCU_ALWAYS_ASSERT(i == nelem*nvert); } @@ -441,7 +469,7 @@ if(1==1){ } if(o.writeCGNSFiles > 2) { cgsize_t eVolElm=e_written; - cgsize_t eBelWritten=0; + cgsize_t e_belWritten=0; cgsize_t totOnRankBel=0; for (int i = 0; i < o.blocks.boundary.getSize(); ++i) totOnRankBel += o.blocks.boundary.nElements[i]; @@ -483,8 +511,9 @@ if(1==1){ } } free(e); - getNaturalBCCodesCGNS(o, i, &srfID[eBelWritten]); - eBelWritten+=e_owned; + getNaturalBCCodesCGNS(o, i, &srfID[e_belWritten]); + e_written+=e_owned; + e_belWritten+=e_owned; } printf("%ld ", totOnRankBel); @@ -496,7 +525,7 @@ if(1==1){ cgp_error_exit(); /* write the user data for this process */ e_start=1; - e_end=eBelWritten; // user data is ranged differently than field data + e_end=e_belWritten; // user data is ranged differently than field data printf("Bndy %ld, %ld %d, %d, %d, %d \n", e_start, e_end, rank,Fsb,Fsb2); if (cgp_array_write_data(Fsb, &e_start, &e_end, srfID) || cgp_array_write_data(Fsb2, &rank, &rank, &e_end)) From cccf8b3a199359c37a771134740c29ef26ebb1d0 Mon Sep 17 00:00:00 2001 From: "Kenneth E. Jansen" Date: Fri, 11 Aug 2023 16:04:09 -0600 Subject: [PATCH 35/68] mixed wedge and tet volume elemement meshes pass a sniff test of cgnsview. That required retooling/rearranging some of the CGNS write stuff and also required handling mutiple boundary element meshes for tris (tris from wedges and tris from tets). We could consider merging these lists but for now they get enumerated to distinguish between them. --- phasta/phCGNSgbc.cc | 107 +++++++++++++++++++++++++++++--------------- pumi-meshes | 2 +- 2 files changed, 72 insertions(+), 37 deletions(-) diff --git a/phasta/phCGNSgbc.cc b/phasta/phCGNSgbc.cc index 83f1c5272..ac4dff54e 100644 --- a/phasta/phCGNSgbc.cc +++ b/phasta/phCGNSgbc.cc @@ -392,6 +392,10 @@ void writeBlocksCGNS(int F,int B,int Z, Output& o) const int nparts = PCU_Comm_Peers(); cgsize_t num_parts=nparts; cgsize_t rank = PCU_Comm_Self() ; + /* create a centered solution */ + if (cg_sol_write(F, B, Z, "RankOfWriter", CG_CellCenter, &S) || + cgp_field_write(F, B, Z, S, CG_Integer, "RankOfWriter", &Fs)) + cgp_error_exit(); for (int i = 0; i < o.blocks.interior.getSize(); ++i) { BlockKey& k = o.blocks.interior.keys[i]; std::string phrase = getBlockKeyPhrase(k, "connectivity interior "); @@ -403,21 +407,27 @@ void writeBlocksCGNS(int F,int B,int Z, Output& o) /* create data node for elements */ e_startg=1+e_written; // start for the elements of this topology e_endg=e_written + PCU_Add_Long(e_owned); // end for the elements of this topology +// char Ename[33]; + char Ename[5]; switch(nvert){ case 4: - if (cgp_section_write(F, B, Z, "Tet", CG_TETRA_4, e_startg, e_endg, 0, &E)) + snprintf(Ename, 4, "Tet"); + if (cgp_section_write(F, B, Z, Ename, CG_TETRA_4, e_startg, e_endg, 0, &E)) cgp_error_exit(); break; case 5: - if (cgp_section_write(F, B, Z, "Pyr", CG_PYRA_5, e_startg, e_endg, 0, &E)) + snprintf(Ename, 4, "Pyr"); + if (cgp_section_write(F, B, Z, Ename, CG_PYRA_5, e_startg, e_endg, 0, &E)) cgp_error_exit(); break; case 6: - if (cgp_section_write(F, B, Z, "Wdg", CG_PENTA_6, e_startg, e_endg, 0, &E)) + snprintf(Ename, 4, "Wdg"); + if (cgp_section_write(F, B, Z, Ename, CG_PENTA_6, e_startg, e_endg, 0, &E)) cgp_error_exit(); break; case 8: - if (cgp_section_write(F, B, Z, "Hex", CG_HEXA_8, e_startg, e_endg, 0, &E)) + snprintf(Ename, 4, "Hex"); + if (cgp_section_write(F, B, Z, Ename, CG_HEXA_8, e_startg, e_endg, 0, &E)) cgp_error_exit(); break; } @@ -429,17 +439,7 @@ void writeBlocksCGNS(int F,int B,int Z, Output& o) /* write the element connectivity in parallel */ if (cgp_elements_write_data(F, B, Z, E, e_start, e_end, e)) cgp_error_exit(); - /* create a centered solution */ - if (cg_sol_write(F, B, Z, "RankCellOwner", CG_CellCenter, &S) || - cgp_field_write(F, B, Z, S, CG_Integer, "RankOfWriter", &Fs)) - cgp_error_exit(); - /* create the field data for this process */ - int* d = (int *)malloc(e_owned * sizeof(int)); - for (int n = 0; n < e_owned; n++) - d[n] = rank; - /* write the solution field data in parallel */ - if (cgp_field_write_data(F, B, Z, S, Fs, &e_start, &e_end, d)) - cgp_error_exit(); + e_written=e_endg; // update count of elements written if(1==1){ printf("interior cnn %d, %ld, %ld \n", rank, e_start, e_end); @@ -449,28 +449,47 @@ if(1==1){ printf("\n"); } } - - e_written=e_endg; // update count of elements written free(e); + +// /* create the field data for this process */ + int* d = (int *)malloc(e_owned * sizeof(int)); + for (int n = 0; n < e_owned; n++) + d[n] = rank; +// /* write the solution field data in parallel */ + if (cgp_field_write_data(F, B, Z, S, Fs, &e_start, &e_end, d)) + cgp_error_exit(); free(d); - /* create Helper array for number of elements on rank */ - if ( cg_goto(F, B, "Zone_t", 1, NULL) || + +// char UserDataName[33]; +// snprintf(UserDataName, 33, "n%sOnRank", Ename); + char UserDataName[11]; + snprintf(UserDataName, 11, "n%sOnRank", Ename); + /* create Helper array for number of elements on rank of a given topology */ + if ( cg_goto(F, B, "Zone_t", 1, NULL) || cg_gorel(F, "User Data", 0, NULL) || - cgp_array_write("nIelOnRank", CG_Integer, 1, &num_parts, &Fs2)) +// cgp_array_write("nIelOnRank", CG_Integer, 1, &num_parts, &Fs2)) + cgp_array_write(UserDataName, CG_Integer, 1, &num_parts, &Fs2)) cgp_error_exit(); /* create the field data for this process */ - int* nIelVec = (int *)malloc( 1 * sizeof(int)); - nIelVec[0]=e_owned; - rank+=1; - printf("Intr %d, %d, %d, %d \n", nIelVec[0],rank,Fs,Fs2); - if ( cgp_array_write_data(Fs2, &rank, &rank, nIelVec)) +// int* nIelVec = (int *)malloc( 1 * sizeof(int)); +// nIelVec[0]=e_owned; + int nIelVec=e_owned; + cgsize_t rankP1=rank+1; + printf("Intr, %s, %d, %d, %d, %d \n", UserDataName, nIelVec,rank,Fs,Fs2); + if ( cgp_array_write_data(Fs2, &rankP1, &rankP1, &nIelVec)) cgp_error_exit(); - } + } // end of loop over blocks + + + + if(o.writeCGNSFiles > 2) { cgsize_t eVolElm=e_written; cgsize_t e_belWritten=0; cgsize_t totOnRankBel=0; + int triCount=0; + int quadCount=0; for (int i = 0; i < o.blocks.boundary.getSize(); ++i) totOnRankBel += o.blocks.boundary.nElements[i]; int* srfID = (int *)malloc( totOnRankBel * sizeof(int)); @@ -484,13 +503,19 @@ if(1==1){ e_startg=1+e_written; // start for the elements of this topology cgsize_t numBelTP = PCU_Add_Long(e_owned); // number of elements of this topology e_endg=e_written + numBelTP; // end for the elements of this topology + if(nvert==3) triCount++; + if(nvert==4) quadCount++; + char Ename[7]; + switch(nvert){ case 3: - if (cgp_section_write(F, B, Z, "Tri", CG_TRI_3, e_startg, e_endg, 0, &E)) + snprintf(Ename, 5, "Tri%d",triCount); + if (cgp_section_write(F, B, Z, Ename, CG_TRI_3, e_startg, e_endg, 0, &E)) cgp_error_exit(); break; case 4: - if (cgp_section_write(F, B, Z, "Quad", CG_QUAD_4, e_startg, e_endg, 0, &E)) + snprintf(Ename, 6, "Quad%d",quadCount); + if (cgp_section_write(F, B, Z, Ename, CG_QUAD_4, e_startg, e_endg, 0, &E)) cgp_error_exit(); break; } @@ -514,24 +539,34 @@ if(1==1){ getNaturalBCCodesCGNS(o, i, &srfID[e_belWritten]); e_written+=e_owned; e_belWritten+=e_owned; - } + char UserDataName[12]; + snprintf(UserDataName, 13, "n%sOnRank", Ename); + if ( cg_goto(F, B, "Zone_t", 1, NULL) || + cg_gorel(F, "User Data", 0, NULL) || + cgp_array_write(UserDataName, CG_Integer, 1, &num_parts, &Fsb2)) + cgp_error_exit(); + printf("Bndy %s, %ld, %ld %d, %d, %d, %d \n", UserDataName, e_start, e_end, rank,Fsb,Fsb2); + cgsize_t rankP1=rank+1; + if (cgp_array_write_data(Fsb2, &rankP1, &rankP1, &e_end)) + cgp_error_exit(); + + } +// srfID is for ALL Boundary faces printf("%ld ", totOnRankBel); - /* setup User Data for boundary faces */ + /* setup User Data for boundary faces */ if ( cg_goto(F, B, "Zone_t", 1, NULL) || cg_gorel(F, "User Data", 0, NULL) || - cgp_array_write("srfID", CG_Integer, 1,&totOnRankBel, &Fsb) || - cgp_array_write("nBelOnRank", CG_Integer, 1, &num_parts, &Fsb2)) + cgp_array_write("srfID", CG_Integer, 1,&totOnRankBel, &Fsb)) cgp_error_exit(); /* write the user data for this process */ e_start=1; e_end=e_belWritten; // user data is ranged differently than field data - printf("Bndy %ld, %ld %d, %d, %d, %d \n", e_start, e_end, rank,Fsb,Fsb2); - if (cgp_array_write_data(Fsb, &e_start, &e_end, srfID) || - cgp_array_write_data(Fsb2, &rank, &rank, &e_end)) + printf("Bndy %s, %ld, %ld %d, %d, %d, %d \n", "srfID", e_start, e_end, rank,Fsb,Fsb2); + cgsize_t rankP1=rank+1; + if (cgp_array_write_data(Fsb, &e_start, &e_end, srfID)) cgp_error_exit(); - if (num_parts > 1) { printf("Boundary conditions cannot be written in parallel right now\n"); } else { diff --git a/pumi-meshes b/pumi-meshes index 4d07746d7..9dd816fea 160000 --- a/pumi-meshes +++ b/pumi-meshes @@ -1 +1 @@ -Subproject commit 4d07746d7e10bbc5a7da992ef2e0a18dd1be55be +Subproject commit 9dd816fea029a235619d6b70cfc9a3c2506cf9f6 From a6fc0371edc5c893ae212111dbc75008167b4aa8 Mon Sep 17 00:00:00 2001 From: "Kenneth E. Jansen" Date: Sat, 12 Aug 2023 09:47:35 -0600 Subject: [PATCH 36/68] fixed bug in srfID write for parallel with multiple topologies --- phasta/phCGNSgbc.cc | 269 ++++++++++++++++++++++---------------------- phasta/phOutput.h | 2 - pumi-meshes | 2 +- 3 files changed, 137 insertions(+), 136 deletions(-) diff --git a/phasta/phCGNSgbc.cc b/phasta/phCGNSgbc.cc index ac4dff54e..b3fc826e1 100644 --- a/phasta/phCGNSgbc.cc +++ b/phasta/phCGNSgbc.cc @@ -49,22 +49,20 @@ static lcorp_t count_local(int* ilwork, int nlwork,cgsize_t* ncorp_tmp, int num_ void gen_ncorp(Output& o ) { apf::Mesh* m = o.mesh; - int part; - int num_parts; int i; lcorp_t nilwork = o.nlwork; int num_nodes=m->count(0); - o.arrays.ncorp = new cgsize_t[num_nodes]; + o.arrays.ncorp = (cgsize_t *)malloc(num_nodes * sizeof(cgsize_t)); lcorp_t owned; lcorp_t local; lcorp_t* owner_counts; cgsize_t local_start_id; cgsize_t gid; - MPI_Comm_rank(MPI_COMM_WORLD, &part); - MPI_Comm_size(MPI_COMM_WORLD, &num_parts); + const int num_parts = PCU_Comm_Peers(); + const int part = PCU_Comm_Self() ; - memset(o.arrays.ncorp, 0, sizeof(cgsize_t)*(num_nodes)); + for(int i=0; i < num_nodes; i++) o.arrays.ncorp[i]=0; owned = count_owned(o.arrays.ilwork, nilwork, o.arrays.ncorp, num_nodes); local = count_local(o.arrays.ilwork, nilwork, o.arrays.ncorp, num_nodes); o.iownnodes = owned+local; @@ -76,7 +74,7 @@ void gen_ncorp(Output& o ) assert( owned+local <= num_nodes ); owner_counts = (lcorp_t*) malloc(sizeof(lcorp_t)*num_parts); - memset(owner_counts, 0, sizeof(lcorp_t)*num_parts); + for(int i=0; i < num_parts; i++) owner_counts[i]=0; owner_counts[part] = owned+local; #ifdef PRINT_EVERYTHING for(i=0;i 1) { // translating a commuInt out from PHASTA to c @@ -162,11 +153,19 @@ void gen_ncorp(Output& o ) int itag, iacc, iother, isgbeg; MPI_Datatype sevsegtype[numtask]; //first do what ctypes does for setup - int isbegin[maxseg]; - int lenseg[maxseg]; - int ioffset[maxseg]; - MPI_Request req[numtask]; - MPI_Status stat[numtask]; + int* isbegin; + int* lenseg; + int* ioffset; + isbegin = (int*) malloc(sizeof(int) * maxseg); + lenseg = (int*) malloc(sizeof(int) * maxseg); + ioffset = (int*) malloc(sizeof(int) * maxseg); +// no VLA MPI_Request req[numtask]; +// no VLA MPI_Status stat[numtask]; + + int maxtask=1000; + assert(maxtask>=numtask); + MPI_Request req[maxtask]; + MPI_Status stat[maxtask]; int maxfront=0; int lfront; itkbeg=0; @@ -187,6 +186,9 @@ void gen_ncorp(Output& o ) MPI_Type_commit (&sevsegtype[itask]); itkbeg+=4+2*numseg; } + free(isbegin); + free(lenseg); + free(ioffset); int m = 0; itkbeg=0; @@ -206,7 +208,7 @@ void gen_ncorp(Output& o ) } MPI_Waitall(m, req, stat); } -if(1==1) { +if(1==0) { for (int ipart=0; ipart lnode={0,1,2,3}; // Standard pattern of first 4 (or 3) // PHASTA's use of volume elements has an lnode array that maps the surface nodes from the volume numbering. We need it here too // see hierarchic.f but note that is fortran numbering if(nvertVol==4) lnode={0, 2, 1, -1}; // tet is first three but opposite normal of others to go with neg volume -// if(nvertVol==5 && nvert==4) lnode={0, 1, 2, 3}; // pyramid quad is first 4 if(nvertVol==5 && nvert==3) lnode={0, 4, 1, -1}; // pyramid tri is a fortran map of 1 5 2 if(nvertVol==6 && nvert==4) lnode={0, 3, 4, 1}; // wedge quad is a fortran map of 1 4 5 2 -// if(nvertVol==6 && nvert==3) lnode={0, 1, 2, -1}; // wedge tri first three -// if(nvertVol==8) lnode={0, 1, 2, 3}; // hex first 4 -/* if(nvertVol==4) { //see interior above - for (int elem = 0; elem < nelem; ++elem){ - c[i++] = o.arrays.ncorp[o.arrays.ienb[block][elem][0]]; - c[i++] = o.arrays.ncorp[o.arrays.ienb[block][elem][2]]; - c[i++] = o.arrays.ncorp[o.arrays.ienb[block][elem][1]]; - } - } else { */ - for (int elem = 0; elem < nelem; ++elem) - for (int vert = 0; vert < nvert; ++vert) - c[i++] = o.arrays.ncorp[o.arrays.ienb[block][elem][lnode[vert]]]; -// } + for (int elem = 0; elem < nelem; ++elem) + for (int vert = 0; vert < nvert; ++vert) + c[i++] = o.arrays.ncorp[o.arrays.ienb[block][elem][lnode[vert]]]; PCU_ALWAYS_ASSERT(i == nelem*nvert); } @@ -370,7 +358,7 @@ void getInterfaceConnectivityCGNS // not extended yet other than transpose PCU_ALWAYS_ASSERT(i == c.getSize()); } -// renamed but not updated yet +// renamed stripped down to just give srfID void getNaturalBCCodesCGNS(Output& o, int block, int* codes) { int nelem = o.blocks.boundary.nElements[block]; @@ -381,7 +369,7 @@ void getNaturalBCCodesCGNS(Output& o, int block, int* codes) // arbitrary combinations of BCs but leaving that out for now } -// renamed and calling the renamed functions above with output writes commented as they are PHASTA file style +// renamed and calling the renamed functions above with output writes now to CGNS void writeBlocksCGNS(int F,int B,int Z, Output& o) { int params[MAX_PARAMS]; @@ -389,10 +377,11 @@ void writeBlocksCGNS(int F,int B,int Z, Output& o) cgsize_t e_owned, e_start,e_end; cgsize_t e_startg,e_endg; cgsize_t e_written=0; - const int nparts = PCU_Comm_Peers(); - cgsize_t num_parts=nparts; - cgsize_t rank = PCU_Comm_Self() ; - /* create a centered solution */ + const int num_parts = PCU_Comm_Peers(); + const cgsize_t num_parts_cg=num_parts; + const int part = PCU_Comm_Self() ; + const cgsize_t part_cg=part; + /* create a centered solution */ if (cg_sol_write(F, B, Z, "RankOfWriter", CG_CellCenter, &S) || cgp_field_write(F, B, Z, S, CG_Integer, "RankOfWriter", &Fs)) cgp_error_exit(); @@ -406,8 +395,8 @@ void writeBlocksCGNS(int F,int B,int Z, Output& o) getInteriorConnectivityCGNS(o, i, e); /* create data node for elements */ e_startg=1+e_written; // start for the elements of this topology - e_endg=e_written + PCU_Add_Long(e_owned); // end for the elements of this topology -// char Ename[33]; + long safeArg=e_owned; // e_owned is cgsize_t which could be an 32 or 64 bit int + e_endg=e_written + PCU_Add_Long(safeArg); // end for the elements of this topology char Ename[5]; switch(nvert){ case 4: @@ -434,17 +423,17 @@ void writeBlocksCGNS(int F,int B,int Z, Output& o) e_start=0; auto type = getMpiType( cgsize_t() ); MPI_Exscan(&e_owned, &e_start, 1, type , MPI_SUM, MPI_COMM_WORLD); - e_start+=1+e_written; // my ranks global element start 1-based - e_end=e_start+e_owned-1; // my ranks global element stop 1-based + e_start+=1+e_written; // my parts global element start 1-based + e_end=e_start+e_owned-1; // my parts global element stop 1-based /* write the element connectivity in parallel */ if (cgp_elements_write_data(F, B, Z, E, e_start, e_end, e)) cgp_error_exit(); e_written=e_endg; // update count of elements written -if(1==1){ - printf("interior cnn %d, %ld, %ld \n", rank, e_start, e_end); +if(1==0){ + printf("interior cnn %d, %ld, %ld \n", part, e_start, e_end); for (int ne=0; ne 2) { cgsize_t eVolElm=e_written; cgsize_t e_belWritten=0; cgsize_t totOnRankBel=0; int triCount=0; int quadCount=0; - for (int i = 0; i < o.blocks.boundary.getSize(); ++i) + int nblkb = o.blocks.boundary.getSize(); + for (int i = 0; i < nblkb; ++i) totOnRankBel += o.blocks.boundary.nElements[i]; int* srfID = (int *)malloc( totOnRankBel * sizeof(int)); + int* srfIDidx = (int *)malloc( totOnRankBel * sizeof(int)); + int* startBelBlk = (int *)malloc( nblkb * sizeof(int)); + int* endBelBlk = (int *)malloc( nblkb * sizeof(int)); for (int i = 0; i < o.blocks.boundary.getSize(); ++i) { BlockKey& k = o.blocks.boundary.keys[i]; params[0] = o.blocks.boundary.nElements[i]; @@ -501,7 +486,8 @@ if(1==1){ cgsize_t* e = (cgsize_t *)malloc(nvert * e_owned * sizeof(cgsize_t)); getBoundaryConnectivityCGNS(o, i, e); e_startg=1+e_written; // start for the elements of this topology - cgsize_t numBelTP = PCU_Add_Long(e_owned); // number of elements of this topology + long safeArg=e_owned; // e_owned is cgsize_t which could be an 32 or 64 bit int + cgsize_t numBelTP = PCU_Add_Long(safeArg); // number of elements of this topology e_endg=e_written + numBelTP; // end for the elements of this topology if(nvert==3) triCount++; if(nvert==4) quadCount++; @@ -522,51 +508,69 @@ if(1==1){ e_start=0; auto type = getMpiType( cgsize_t() ); MPI_Exscan(&e_owned, &e_start, 1, type , MPI_SUM, MPI_COMM_WORLD); - e_start+=1+e_written; // my ranks global element start 1-based - e_end=e_start+e_owned-1; // my ranks global element stop 1-based + e_start+=1+e_written; // my parts global element start 1-based + e_end=e_start+e_owned-1; // my parts global element stop 1-based /* write the element connectivity in parallel */ if (cgp_elements_write_data(F, B, Z, E, e_start, e_end, e)) cgp_error_exit(); -if(1==1){ - printf("boundary cnn %d, %ld, %ld \n", rank, e_start, e_end); + printf("boundary cnn %d, %ld, %ld \n", part, e_start, e_end); +if(1==0){ for (int ne=0; ne 1) { printf("Boundary conditions cannot be written in parallel right now\n"); } else { @@ -599,22 +603,21 @@ void writeCGNS(Output& o, std::string path) { double t0 = PCU_Time(); apf::Mesh* m = o.mesh; - cgsize_t rank = PCU_Comm_Self() + 0; - int nparts; - MPI_Comm_size(MPI_COMM_WORLD, &nparts); - cgsize_t num_parts=nparts; + const int num_parts = PCU_Comm_Peers(); + const int part = PCU_Comm_Self() ; + const cgsize_t num_parts_cg=num_parts; std::string timestep_or_dat; static char outfile[] = "chefOut.cgns"; int F, B, Z, E, S, Fs, Fs2, A, Cx, Cy, Cz; - cgsize_t sizes[3],*e, start, end, ncells; + cgsize_t sizes[3],*e, start, end; + + int num_nodes=m->count(0); - int num_nodes=m->count(0); -// debug prints -if(0==1){ +if(0==1){ // ilwork debugging for (int ipart=0; ipart PETSc global node number (1-based) // o.iownnodes => nodes owned by this rank // o.local_start_id => this rank's first node number (1-based and also which must be a long long int) -// o.numGlobalNodes - ncells=m->count(m->getDimension()); - ncells=PCU_Add_Long(ncells); - o.numGlobalVolumeElements = ncells; - - sizes[0]=o.numGlobalNodes; - sizes[1]=ncells; - sizes[2]=0; - if(cgp_mpi_comm(MPI_COMM_WORLD)) cgp_error_exit; - if ( cgp_open(outfile, CG_MODE_WRITE, &F) || - cg_base_write(F, "Base", 3, 3, &B) || - cg_zone_write(F, B, "Zone", sizes, CG_Unstructured, &Z)) - cgp_error_exit(); + + long safeArg=o.iownnodes; // cgsize_t could be an int + sizes[0]=PCU_Add_Long(safeArg); + int ncells=m->count(m->getDimension()); // this ranks number of elements + safeArg=ncells; // cgsize_t could be an int + sizes[1]=PCU_Add_Long(safeArg); + sizes[2]=0; + if(cgp_mpi_comm(MPI_COMM_WORLD)) cgp_error_exit; + if ( cgp_open(outfile, CG_MODE_WRITE, &F) || + cg_base_write(F, "Base", 3, 3, &B) || + cg_zone_write(F, B, "Zone", sizes, CG_Unstructured, &Z)) + cgp_error_exit(); /* create data nodes for coordinates */ - cg_set_file_type(CG_FILE_HDF5); + cg_set_file_type(CG_FILE_HDF5); - if (cgp_coord_write(F, B, Z, CG_RealDouble, "CoordinateX", &Cx) || - cgp_coord_write(F, B, Z, CG_RealDouble, "CoordinateY", &Cy) || - cgp_coord_write(F, B, Z, CG_RealDouble, "CoordinateZ", &Cz)) - cgp_error_exit(); + if (cgp_coord_write(F, B, Z, CG_RealDouble, "CoordinateX", &Cx) || + cgp_coord_write(F, B, Z, CG_RealDouble, "CoordinateY", &Cy) || + cgp_coord_write(F, B, Z, CG_RealDouble, "CoordinateZ", &Cz)) + cgp_error_exit(); -// condense out vertices owned by another rank in a new array, x, whose slices are ready for CGNS. Seeing now PETSc CGNS writer did one coordinate at a time which is probably better....feel free to rewrite. +// condense out vertices owned by another rank in a new array, x, whose slices are ready for CGNS. cgsize_t gnod; start=o.local_start_id; end=start+o.iownnodes-1; - double* x = new double[o.iownnodes]; + double* x = (double *)malloc(o.iownnodes * sizeof(double)); for (int j = 0; j < 3; ++j) { int icount=0; for (int inode = 0; inode < num_nodes; ++inode){ @@ -692,19 +695,19 @@ if(0==1) { if(j==1) if(cgp_coord_write_data(F, B, Z, Cy, &start, &end, x)) cgp_error_exit(); if(j==2) if(cgp_coord_write_data(F, B, Z, Cz, &start, &end, x)) cgp_error_exit(); } - /* create Helper array for number of elements on rank */ - if ( cg_goto(F, B, "Zone_t", 1, NULL) || - cg_user_data_write("User Data") || - cg_gorel(F, "User Data", 0, NULL) || - cgp_array_write("nCoordsOnRank", CG_Integer, 1, &num_parts, &Fs2)) - cgp_error_exit(); - /* create the field data for this process */ - int* nCoordVec = (int *)malloc( 1 * sizeof(int)); - nCoordVec[0]=o.iownnodes; - rank+=1; - printf("Coor %d, %d, %d, \n", nCoordVec[0],rank,Fs2); - if ( cgp_array_write_data(Fs2, &rank, &rank, nCoordVec)) - cgp_error_exit(); + free (x); + /* create Helper array for number of elements on rank */ + if ( cg_goto(F, B, "Zone_t", 1, NULL) || + cg_user_data_write("User Data") || + cg_gorel(F, "User Data", 0, NULL) || + cgp_array_write("nCoordsOnRank", CG_Integer, 1, &num_parts_cg, &Fs2)) + cgp_error_exit(); + /* create the field data for this process */ + int nCoordVec=o.iownnodes; + cgsize_t partP1=part+1; + printf("Coor %d, %d, %d, \n", nCoordVec,part,Fs2); + if ( cgp_array_write_data(Fs2, &partP1, &partP1, &nCoordVec)) + cgp_error_exit(); if(o.writeCGNSFiles > 1) writeBlocksCGNS(F,B,Z, o); if(cgp_close(F)) cgp_error_exit(); diff --git a/phasta/phOutput.h b/phasta/phOutput.h index ad417505a..0e3c85351 100644 --- a/phasta/phOutput.h +++ b/phasta/phOutput.h @@ -167,8 +167,6 @@ struct Output int nOverlapEdges; #ifdef HAVE_CGNS cgsize_t local_start_id; /* this rank's first global node number (1 based) */ - cgsize_t numGlobalNodes; - cgsize_t numGlobalVolumeElements; int iownnodes; /* how many node this rank owns */ #endif int writeCGNSFiles; diff --git a/pumi-meshes b/pumi-meshes index 9dd816fea..fecc2dae4 160000 --- a/pumi-meshes +++ b/pumi-meshes @@ -1 +1 @@ -Subproject commit 9dd816fea029a235619d6b70cfc9a3c2506cf9f6 +Subproject commit fecc2dae4d3e5a288022fd10ddf78fa60ba05e86 From f68aa5f9e7fb283d632bb2e6df069349e9c0eb83 Mon Sep 17 00:00:00 2001 From: "Kenneth E. Jansen" Date: Sat, 12 Aug 2023 13:18:58 -0600 Subject: [PATCH 37/68] parallel BC write almost working --- phasta/phCGNSgbc.cc | 114 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 112 insertions(+), 2 deletions(-) diff --git a/phasta/phCGNSgbc.cc b/phasta/phCGNSgbc.cc index b3fc826e1..956aaae77 100644 --- a/phasta/phCGNSgbc.cc +++ b/phasta/phCGNSgbc.cc @@ -40,6 +40,38 @@ MPI_Datatype getMpiType(T) { } +// https://www.geeksforgeeks.org/sorting-array-according-another-array-using-pair-stl/ +// Sort an array according to +// other using pair in STL. +#include +using namespace std; + +// Function to sort character array b[] +// according to the order defined by a[] +void pairsort(int a[], int b[], int n) +{ + pair pairt[n]; + + // Storing the respective array + // elements in pairs. + for (int i = 0; i < n; i++) + { + pairt[i].first = a[i]; + pairt[i].second = b[i]; + } + + // Sorting the pair array. + sort(pairt, pairt + n); + + // Modifying original arrays + for (int i = 0; i < n; i++) + { + a[i] = pairt[i].first; + b[i] = pairt[i].second; + } +} + + namespace ph { static lcorp_t count_owned(int* ilwork, int nlwork,cgsize_t* ncorp_tmp, int num_nodes); @@ -468,7 +500,8 @@ if(1==0){ if(o.writeCGNSFiles > 2) { cgsize_t eVolElm=e_written; cgsize_t e_belWritten=0; - cgsize_t totOnRankBel=0; +// cgsize_t totOnRankBel=0; + int totOnRankBel=0; int triCount=0; int quadCount=0; int nblkb = o.blocks.boundary.getSize(); @@ -544,7 +577,8 @@ if(1==0){ // long safeArg=totOnRankBel; // is cgsize_t which could be an 32 or 64 bit int // cgsize_t totBel = PCU_Add_Long(safeArg); // number of elements of this topology cgsize_t totBel = e_written-eVolElm; - printf("%ld %ld ", totOnRankBel,totBel); +// printf("%ld %ld ", totOnRankBel,totBel); + printf("%d %ld ", totOnRankBel,totBel); /* setup User Data for boundary faces */ if ( cg_goto(F, B, "Zone_t", 1, NULL) || cg_gorel(F, "User Data", 0, NULL) || @@ -571,6 +605,82 @@ if(1==0){ e_written += PCU_Add_Long(safeArg); // number of elements of this topology } // ZonalBC data When made parallel be mindful of srfID being in segments on each rank....NOT globally ordered but srIDidx gives global idx in same order. + int* srfIDG = (int *)malloc( totBel * sizeof(int)); + int* srfIDGidx = (int *)malloc( totBel * sizeof(int)); + int* rcounts = (int *)malloc( num_parts * sizeof(int)); + int* displs = (int *)malloc( num_parts * sizeof(int)); + auto type_cg = getMpiType( cgsize_t() ); + auto type_i = getMpiType( int() ); + MPI_Gather(&totOnRankBel,1,type_i,rcounts,1,type_i,0,MPI_COMM_WORLD); + displs[0]=0; + if(part==0){ + for (int i = 1; i < num_parts; ++i) displs[i]=displs[i-1]+rcounts[i-1]; +if(1==1){ + for(int ip=0; ip< num_parts; ++ip) printf("%ld ", rcounts[ip]); + printf("\n"); + for(int ip=0; ip< num_parts; ++ip) printf("%ld ", displs[ip]); + printf("\n"); +} + } + MPI_Gatherv(srfID,totOnRankBel,type_i,srfIDG,rcounts,displs,type_i,0,MPI_COMM_WORLD); + MPI_Gatherv(srfIDidx,totOnRankBel,type_i,srfIDGidx,rcounts,displs,type_i,0,MPI_COMM_WORLD); +if(1==1){ + if(part==0) { + printf(" srfID GLOBAL "); + for(int is=0; is< totBel; ++is) printf("%d ", srfIDG[is]); + printf("\n"); + printf(" srfIDidx GLOBAL "); + for(int is=0; is< totBel; ++is) printf("%d ", srfIDGidx[is]); + printf("\n"); + } + printf("rank %d ",part); + printf(" srfID on Part "); + for(int is=0; is< totOnRankBel; ++is) printf("%d ", srfID[is]); + printf("\n"); + printf(" srfIDidx on Part "); + for(int is=0; is< totOnRankBel; ++is) printf("%d ", srfIDidx[is]); + printf("\n"); +} + if(part==0) pairsort(srfIDG,srfIDGidx,totBel); +if(1==1){ + if(part==0) { + printf(" srfID GLOBAL "); + for(int is=0; is< totBel; ++is) printf("%d ", srfIDG[is]); + printf("\n"); + printf(" srfIDidx GLOBAL "); + for(int is=0; is< totBel; ++is) printf("%d ", srfIDGidx[is]); + printf("\n"); + } +} + if(part==0) { + int BC_scan=0; + cgsize_t* eBC = (cgsize_t *)malloc(totBel * sizeof(cgsize_t)); + for (int BCid = 1; BCid < 7; BCid++) { + int imatch=0; + while (srfIDG[BC_scan]==BCid) { + eBC[imatch]=srfIDGidx[BC_scan]; + BC_scan++; + imatch++; + } + int BC_index; + char BC_name[33]; + snprintf(BC_name, 33, "SurfID_%d", BCid + 1); + if(cg_boco_write(F, B, Z, BC_name, CGNS_ENUMV(BCTypeUserDefined), CGNS_ENUMV(PointList), imatch, eBC, &BC_index)) + cg_error_exit(); + if(cg_goto(F, B, "Zone_t", 1, "ZoneBC_t", 1, "BC_t", BC_index, "end")) cg_error_exit();; + if(cg_gridlocation_write(CGNS_ENUMV(FaceCenter))) cg_error_exit(); + +if(1==1) { + printf(" srfID =%d ",BCid); + for(int is=0; is< imatch; ++is) printf("%d ", eBC[is]); + printf("\n"); +} + } + free(eBC); + } + + +//James Work if (num_parts > 1) { printf("Boundary conditions cannot be written in parallel right now\n"); } else { From 0b09db1aca41751afa654a5c66ad37a424fb1cd8 Mon Sep 17 00:00:00 2001 From: "Kenneth E. Jansen" Date: Sat, 12 Aug 2023 13:56:02 -0600 Subject: [PATCH 38/68] gather is not good enough for cg_boco_write but committing the failure because this is so wrong to have to do an all gather for rank0 to be able to write --- phasta/phCGNSgbc.cc | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/phasta/phCGNSgbc.cc b/phasta/phCGNSgbc.cc index 956aaae77..51ab4fe82 100644 --- a/phasta/phCGNSgbc.cc +++ b/phasta/phCGNSgbc.cc @@ -652,32 +652,32 @@ if(1==1){ printf("\n"); } } - if(part==0) { int BC_scan=0; cgsize_t* eBC = (cgsize_t *)malloc(totBel * sizeof(cgsize_t)); for (int BCid = 1; BCid < 7; BCid++) { int imatch=0; + if(part==0) { while (srfIDG[BC_scan]==BCid) { eBC[imatch]=srfIDGidx[BC_scan]; BC_scan++; imatch++; } +if(1==1) { + printf(" srfID =%d ",BCid); + for(int is=0; is< imatch; ++is) printf("%d ", eBC[is]); + printf("\n"); +} + } else imatch=1; int BC_index; char BC_name[33]; - snprintf(BC_name, 33, "SurfID_%d", BCid + 1); + snprintf(BC_name, 33, "SurfID_%d", BCid ); if(cg_boco_write(F, B, Z, BC_name, CGNS_ENUMV(BCTypeUserDefined), CGNS_ENUMV(PointList), imatch, eBC, &BC_index)) cg_error_exit(); if(cg_goto(F, B, "Zone_t", 1, "ZoneBC_t", 1, "BC_t", BC_index, "end")) cg_error_exit();; if(cg_gridlocation_write(CGNS_ENUMV(FaceCenter))) cg_error_exit(); -if(1==1) { - printf(" srfID =%d ",BCid); - for(int is=0; is< imatch; ++is) printf("%d ", eBC[is]); - printf("\n"); -} } free(eBC); - } //James Work From aaa56c5fd1b0b340935fe0ee67fc8c97c9fa059b Mon Sep 17 00:00:00 2001 From: "Kenneth E. Jansen" Date: Sat, 12 Aug 2023 14:11:14 -0600 Subject: [PATCH 39/68] Gather->Allgather produces correct result....but what other insane performance landmines are out there? --- phasta/phCGNSgbc.cc | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/phasta/phCGNSgbc.cc b/phasta/phCGNSgbc.cc index 51ab4fe82..6570ec036 100644 --- a/phasta/phCGNSgbc.cc +++ b/phasta/phCGNSgbc.cc @@ -611,9 +611,10 @@ if(1==0){ int* displs = (int *)malloc( num_parts * sizeof(int)); auto type_cg = getMpiType( cgsize_t() ); auto type_i = getMpiType( int() ); - MPI_Gather(&totOnRankBel,1,type_i,rcounts,1,type_i,0,MPI_COMM_WORLD); +//FAIL MPI_Gather(&totOnRankBel,1,type_i,rcounts,1,type_i,0,MPI_COMM_WORLD); + MPI_Allgather(&totOnRankBel,1,type_i,rcounts,1,type_i,MPI_COMM_WORLD); displs[0]=0; - if(part==0){ +// if(part==0){ for (int i = 1; i < num_parts; ++i) displs[i]=displs[i-1]+rcounts[i-1]; if(1==1){ for(int ip=0; ip< num_parts; ++ip) printf("%ld ", rcounts[ip]); @@ -621,9 +622,9 @@ if(1==1){ for(int ip=0; ip< num_parts; ++ip) printf("%ld ", displs[ip]); printf("\n"); } - } - MPI_Gatherv(srfID,totOnRankBel,type_i,srfIDG,rcounts,displs,type_i,0,MPI_COMM_WORLD); - MPI_Gatherv(srfIDidx,totOnRankBel,type_i,srfIDGidx,rcounts,displs,type_i,0,MPI_COMM_WORLD); +// } + MPI_Allgatherv(srfID,totOnRankBel,type_i,srfIDG,rcounts,displs,type_i,MPI_COMM_WORLD); + MPI_Allgatherv(srfIDidx,totOnRankBel,type_i,srfIDGidx,rcounts,displs,type_i,MPI_COMM_WORLD); if(1==1){ if(part==0) { printf(" srfID GLOBAL "); @@ -641,7 +642,8 @@ if(1==1){ for(int is=0; is< totOnRankBel; ++is) printf("%d ", srfIDidx[is]); printf("\n"); } - if(part==0) pairsort(srfIDG,srfIDGidx,totBel); +// if(part==0) pairsort(srfIDG,srfIDGidx,totBel); + pairsort(srfIDG,srfIDGidx,totBel); if(1==1){ if(part==0) { printf(" srfID GLOBAL "); @@ -656,7 +658,7 @@ if(1==1){ cgsize_t* eBC = (cgsize_t *)malloc(totBel * sizeof(cgsize_t)); for (int BCid = 1; BCid < 7; BCid++) { int imatch=0; - if(part==0) { +// if(part==0) { while (srfIDG[BC_scan]==BCid) { eBC[imatch]=srfIDGidx[BC_scan]; BC_scan++; @@ -667,7 +669,7 @@ if(1==1) { for(int is=0; is< imatch; ++is) printf("%d ", eBC[is]); printf("\n"); } - } else imatch=1; +// } else imatch=1; int BC_index; char BC_name[33]; snprintf(BC_name, 33, "SurfID_%d", BCid ); From 2fd47b1d070162f664d33a46b4d0f3d21104658a Mon Sep 17 00:00:00 2001 From: "Kenneth E. Jansen" Date: Sat, 12 Aug 2023 19:52:37 -0600 Subject: [PATCH 40/68] replaced the STL sort which the web says is Nlog_2N in distance between the elements with a deal 6 player. Obviously this is highly specialized to our boxes while the STL will always work. --- phasta/phCGNSgbc.cc | 142 +++++++++++++++++++++++++------------------- pumi-meshes | 2 +- 2 files changed, 82 insertions(+), 62 deletions(-) diff --git a/phasta/phCGNSgbc.cc b/phasta/phCGNSgbc.cc index 6570ec036..94463e0a6 100644 --- a/phasta/phCGNSgbc.cc +++ b/phasta/phCGNSgbc.cc @@ -46,11 +46,11 @@ MPI_Datatype getMpiType(T) { #include using namespace std; -// Function to sort character array b[] +// Function to sort integer array b[] // according to the order defined by a[] void pairsort(int a[], int b[], int n) { - pair pairt[n]; + pair pairt[n]; // Storing the respective array // elements in pairs. @@ -70,6 +70,33 @@ void pairsort(int a[], int b[], int n) b[i] = pairt[i].second; } } +void pairDeal6sort(int a[], int b[], int n) +{ + int c[6]={0}; + for (int i = 0; i < n; i++) c[a[i]-1]++; // count number each type in a pre-scan + int** p = new int*[6]; + for (int i = 0; i < 6; i++) p[i]=new int[c[i]]; + int** idx = new int*[6]; + for (int i = 0; i < 6; i++) idx[i]=new int[c[i]]; + for (int i = 0; i < 6; i++) c[i]=0; + int isrfM1; + for (int i = 0; i < n; i++) + { + isrfM1=a[i]-1; + p[isrfM1][c[isrfM1]]=b[i]; + idx[isrfM1][c[isrfM1]]=a[i]; + c[isrfM1]++; + } + int igc=0; + for (int j = 0; j < 6; j++){ + for (int i = 0; i < c[j]; i++) { + b[igc] = p[j][i]; + a[igc] = idx[j][i]; + igc++; + } + } + assert(igc==n); +} namespace ph { @@ -611,78 +638,71 @@ if(1==0){ int* displs = (int *)malloc( num_parts * sizeof(int)); auto type_cg = getMpiType( cgsize_t() ); auto type_i = getMpiType( int() ); -//FAIL MPI_Gather(&totOnRankBel,1,type_i,rcounts,1,type_i,0,MPI_COMM_WORLD); MPI_Allgather(&totOnRankBel,1,type_i,rcounts,1,type_i,MPI_COMM_WORLD); displs[0]=0; -// if(part==0){ - for (int i = 1; i < num_parts; ++i) displs[i]=displs[i-1]+rcounts[i-1]; + for (int i = 1; i < num_parts; ++i) displs[i]=displs[i-1]+rcounts[i-1]; if(1==1){ - for(int ip=0; ip< num_parts; ++ip) printf("%ld ", rcounts[ip]); - printf("\n"); - for(int ip=0; ip< num_parts; ++ip) printf("%ld ", displs[ip]); - printf("\n"); + for(int ip=0; ip< num_parts; ++ip) printf("%ld ", displs[ip]); + printf("\n"); } -// } - MPI_Allgatherv(srfID,totOnRankBel,type_i,srfIDG,rcounts,displs,type_i,MPI_COMM_WORLD); - MPI_Allgatherv(srfIDidx,totOnRankBel,type_i,srfIDGidx,rcounts,displs,type_i,MPI_COMM_WORLD); + MPI_Allgatherv(srfID,totOnRankBel,type_i,srfIDG,rcounts,displs,type_i,MPI_COMM_WORLD); + MPI_Allgatherv(srfIDidx,totOnRankBel,type_i,srfIDGidx,rcounts,displs,type_i,MPI_COMM_WORLD); if(1==1){ - if(part==0) { - printf(" srfID GLOBAL "); - for(int is=0; is< totBel; ++is) printf("%d ", srfIDG[is]); - printf("\n"); - printf(" srfIDidx GLOBAL "); - for(int is=0; is< totBel; ++is) printf("%d ", srfIDGidx[is]); - printf("\n"); - } - printf("rank %d ",part); - printf(" srfID on Part "); - for(int is=0; is< totOnRankBel; ++is) printf("%d ", srfID[is]); + if(part==0) { + printf(" srfID GLOBAL "); + for(int is=0; is< totBel; ++is) printf("%d ", srfIDG[is]); + printf("\n"); + printf(" srfIDidx GLOBAL "); + for(int is=0; is< totBel; ++is) printf("%d ", srfIDGidx[is]); + printf("\n"); + } + printf("rank %d ",part); + printf(" srfID on Part "); + for(int is=0; is< totOnRankBel; ++is) printf("%d ", srfID[is]); + printf("\n"); + printf(" srfIDidx on Part "); + for(int is=0; is< totOnRankBel; ++is) printf("%d ", srfIDidx[is]); + printf("\n"); +} +// pairsort(srfIDG,srfIDGidx,totBel); + pairDeal6sort(srfIDG,srfIDGidx,totBel); +if(1==1){ + if(part==0) { + printf(" srfID GLOBAL "); + for(int is=0; is< totBel; ++is) printf("%d ", srfIDG[is]); printf("\n"); - printf(" srfIDidx on Part "); - for(int is=0; is< totOnRankBel; ++is) printf("%d ", srfIDidx[is]); + printf(" srfIDidx GLOBAL "); + for(int is=0; is< totBel; ++is) printf("%d ", srfIDGidx[is]); printf("\n"); + } } -// if(part==0) pairsort(srfIDG,srfIDGidx,totBel); - pairsort(srfIDG,srfIDGidx,totBel); -if(1==1){ - if(part==0) { - printf(" srfID GLOBAL "); - for(int is=0; is< totBel; ++is) printf("%d ", srfIDG[is]); - printf("\n"); - printf(" srfIDidx GLOBAL "); - for(int is=0; is< totBel; ++is) printf("%d ", srfIDGidx[is]); - printf("\n"); + int BC_scan=0; + cgsize_t* eBC = (cgsize_t *)malloc(totBel * sizeof(cgsize_t)); + for (int BCid = 1; BCid < 7; BCid++) { + int imatch=0; + while (srfIDG[BC_scan]==BCid) { + eBC[imatch]=srfIDGidx[BC_scan]; + BC_scan++; + imatch++; } -} - int BC_scan=0; - cgsize_t* eBC = (cgsize_t *)malloc(totBel * sizeof(cgsize_t)); - for (int BCid = 1; BCid < 7; BCid++) { - int imatch=0; -// if(part==0) { - while (srfIDG[BC_scan]==BCid) { - eBC[imatch]=srfIDGidx[BC_scan]; - BC_scan++; - imatch++; - } if(1==1) { - printf(" srfID =%d ",BCid); - for(int is=0; is< imatch; ++is) printf("%d ", eBC[is]); - printf("\n"); + printf(" srfID =%d ",BCid); + for(int is=0; is< imatch; ++is) printf("%d ", eBC[is]); + printf("\n"); } -// } else imatch=1; - int BC_index; - char BC_name[33]; - snprintf(BC_name, 33, "SurfID_%d", BCid ); - if(cg_boco_write(F, B, Z, BC_name, CGNS_ENUMV(BCTypeUserDefined), CGNS_ENUMV(PointList), imatch, eBC, &BC_index)) - cg_error_exit(); - if(cg_goto(F, B, "Zone_t", 1, "ZoneBC_t", 1, "BC_t", BC_index, "end")) cg_error_exit();; - if(cg_gridlocation_write(CGNS_ENUMV(FaceCenter))) cg_error_exit(); + int BC_index; + char BC_name[33]; + snprintf(BC_name, 33, "SurfID_%d", BCid ); + if(cg_boco_write(F, B, Z, BC_name, CGNS_ENUMV(BCTypeUserDefined), CGNS_ENUMV(PointList), imatch, eBC, &BC_index)) + cg_error_exit(); + if(cg_goto(F, B, "Zone_t", 1, "ZoneBC_t", 1, "BC_t", BC_index, "end")) cg_error_exit();; + if(cg_gridlocation_write(CGNS_ENUMV(FaceCenter))) cg_error_exit(); - } - free(eBC); + } + free(eBC); - //James Work +/* if (num_parts > 1) { printf("Boundary conditions cannot be written in parallel right now\n"); } else { @@ -707,7 +727,7 @@ if(1==1) { } free(bc_elems); - } + } */ } } diff --git a/pumi-meshes b/pumi-meshes index fecc2dae4..a3a241a71 160000 --- a/pumi-meshes +++ b/pumi-meshes @@ -1 +1 @@ -Subproject commit fecc2dae4d3e5a288022fd10ddf78fa60ba05e86 +Subproject commit a3a241a715de566f0e812d253f5cfc2a82705f62 From 5ef895dc5d973bc11a99055eadd24613be5e5f6e Mon Sep 17 00:00:00 2001 From: "Kenneth E. Jansen" Date: Sun, 13 Aug 2023 19:07:09 -0600 Subject: [PATCH 41/68] As I test on a bump mesh I realize the help message is a bit out of date. Also found some fossil code that usage went extinct. --- test/matchedNodeElmReader.cc | 33 +++------------------------------ 1 file changed, 3 insertions(+), 30 deletions(-) diff --git a/test/matchedNodeElmReader.cc b/test/matchedNodeElmReader.cc index 4dfdfabd0..1572b12e8 100644 --- a/test/matchedNodeElmReader.cc +++ b/test/matchedNodeElmReader.cc @@ -20,33 +20,6 @@ #include #include -/* from https://github.com/SCOREC/core/issues/205 -0=fully interior of the volume -1-6 =classified on face (not edge or vertex) -11-22 = classified on model edge (not end points which are model vertices) -31-38 = classified on a model vertex. -*/ - -/* tags on vertices */ -#define INTERIORTAG 0 -#define FACE 1 -#define FACE_LAST 6 -#define EDGE 11 -#define EDGE_LAST 22 -#define VERTEX 31 -#define VERTEX_LAST 38 - -/* model entity ids */ -//#define INTERIOR_REGION 0 -//int INTERIOR_REGION=0; // initialized but will be checked from read input - -//Manifold single region apf::ModelEntity* getMdlRgn(gmi_model* model) { -//Manifold single region apf::ModelEntity* rgn = reinterpret_cast( -//Manifold single region gmi_find(model, 3, INTERIOR_REGION)); -//Manifold single region PCU_ALWAYS_ASSERT(rgn); -//Manifold single region return rgn; -//Manifold single region } - apf::ModelEntity* getMdlRegion(apf::Mesh2* mesh, int tag) { apf::ModelEntity* region = mesh->findModelEntity(3,tag); @@ -749,14 +722,14 @@ int main(int argc, char** argv) int noVerify=0; // maintain default of verifying if not explicitly requesting it off if( argc < 11 ) { if( !PCU_Comm_Self() ) { - printf("Usage: %s " - " " + printf("Usage: %s no rank but .rank added to next 6 " + " " " " " " " " " " " " - " " + " " " " "turn off verify mesh if equal 1 (on if you give nothing)\n", argv[0]); From b448217bb21d406a96933c30f7f2299238d0a416 Mon Sep 17 00:00:00 2001 From: "Kenneth E. Jansen" Date: Mon, 14 Aug 2023 06:50:08 -0600 Subject: [PATCH 42/68] Initial condition extracted and written to CGNS. For now, PHASTA restart is bypassed since code bundles extraction and destruction and in that case, solution not available to CGNS writer to extract. --- phasta/phCGNSgbc.cc | 136 ++++++++++++++++++++++++++++++++++++++++++-- phasta/phCook.cc | 28 ++++----- phasta/phRestart.h | 2 + 3 files changed, 149 insertions(+), 17 deletions(-) diff --git a/phasta/phCGNSgbc.cc b/phasta/phCGNSgbc.cc index 94463e0a6..5b7e11f8d 100644 --- a/phasta/phCGNSgbc.cc +++ b/phasta/phCGNSgbc.cc @@ -9,6 +9,12 @@ #include #include #include +#include "phRestart.h" +#include +#include +#include "apfShape.h" + + #ifdef HAVE_CGNS // #include @@ -96,11 +102,67 @@ void pairDeal6sort(int a[], int b[], int n) } } assert(igc==n); + free(p); + free(idx); } namespace ph { +/* +void detachField( + apf::Field* f, + double*& data, + int& size) +{ + apf::Mesh* m = apf::getMesh(f); + size = apf::countComponents(f); + size_t n = m->count(0); + apf::NewArray c(size); + data = (double*)malloc(sizeof(double) * size * m->count(0)); + apf::MeshEntity* e; + size_t i = 0; + apf::MeshIterator* it = m->begin(0); + while ((e = m->iterate(it))) { + apf::getComponents(f, e, 0, &c[0]); + for (int j = 0; j < size; ++j) + data[j * n + i] = c[j]; + ++i; + } + m->end(it); + PCU_ALWAYS_ASSERT(i == n); + apf::destroyField(f); +} +*/ +/* +void detachField( + apf::Mesh* m, + const char* fieldname, + double*& data, + int& size) +{ + apf::Field* f = m->findField(fieldname); + PCU_ALWAYS_ASSERT(f); +// detachField(f, data, size); + size = apf::countComponents(f); + size_t n = m->count(0); + apf::NewArray c(size); + data = (double*)malloc(sizeof(double) * size * m->count(0)); + apf::MeshEntity* e; + size_t i = 0; + apf::MeshIterator* it = m->begin(0); + while ((e = m->iterate(it))) { + apf::getComponents(f, e, 0, &c[0]); + for (int j = 0; j < size; ++j) + data[j * n + i] = c[j]; + ++i; + } + m->end(it); + PCU_ALWAYS_ASSERT(i == n); + apf::destroyField(f); +} +*/ + static lcorp_t count_owned(int* ilwork, int nlwork,cgsize_t* ncorp_tmp, int num_nodes); static lcorp_t count_local(int* ilwork, int nlwork,cgsize_t* ncorp_tmp, int num_nodes); @@ -444,6 +506,7 @@ void writeBlocksCGNS(int F,int B,int Z, Output& o) if (cg_sol_write(F, B, Z, "RankOfWriter", CG_CellCenter, &S) || cgp_field_write(F, B, Z, S, CG_Integer, "RankOfWriter", &Fs)) cgp_error_exit(); + printf("S=%d \n",S); for (int i = 0; i < o.blocks.interior.getSize(); ++i) { BlockKey& k = o.blocks.interior.keys[i]; std::string phrase = getBlockKeyPhrase(k, "connectivity interior "); @@ -641,13 +704,13 @@ if(1==0){ MPI_Allgather(&totOnRankBel,1,type_i,rcounts,1,type_i,MPI_COMM_WORLD); displs[0]=0; for (int i = 1; i < num_parts; ++i) displs[i]=displs[i-1]+rcounts[i-1]; -if(1==1){ +if(0==1){ for(int ip=0; ip< num_parts; ++ip) printf("%ld ", displs[ip]); printf("\n"); } MPI_Allgatherv(srfID,totOnRankBel,type_i,srfIDG,rcounts,displs,type_i,MPI_COMM_WORLD); MPI_Allgatherv(srfIDidx,totOnRankBel,type_i,srfIDGidx,rcounts,displs,type_i,MPI_COMM_WORLD); -if(1==1){ +if(0==1){ if(part==0) { printf(" srfID GLOBAL "); for(int is=0; is< totBel; ++is) printf("%d ", srfIDG[is]); @@ -666,7 +729,7 @@ if(1==1){ } // pairsort(srfIDG,srfIDGidx,totBel); pairDeal6sort(srfIDG,srfIDGidx,totBel); -if(1==1){ +if(0==1){ if(part==0) { printf(" srfID GLOBAL "); for(int is=0; is< totBel; ++is) printf("%d ", srfIDG[is]); @@ -685,7 +748,7 @@ if(1==1){ BC_scan++; imatch++; } -if(1==1) { +if(0==1) { printf(" srfID =%d ",BCid); for(int is=0; is< imatch; ++is) printf("%d ", eBC[is]); printf("\n"); @@ -742,6 +805,8 @@ void writeCGNS(Output& o, std::string path) std::string timestep_or_dat; static char outfile[] = "chefOut.cgns"; int F, B, Z, E, S, Fs, Fs2, A, Cx, Cy, Cz; + int Fp, Fu, Fv, Fw, FT; + int Sp, Su, Sv, Sw, ST; cgsize_t sizes[3],*e, start, end; int num_nodes=m->count(0); @@ -828,6 +893,66 @@ if(0==1) { if(j==2) if(cgp_coord_write_data(F, B, Z, Cz, &start, &end, x)) cgp_error_exit(); } free (x); + /* create a nodal solution */ + char fieldName[12]; + snprintf(fieldName, 13, "solution"); + printf("solution=%s",fieldName); + double* data; + int size; + detachField(o.mesh, fieldName, data, size); + assert(size==5); + +// /* create the field data for this process */ + double* p = (double *)malloc(o.iownnodes * sizeof(double)); + double* u = (double *)malloc(o.iownnodes * sizeof(double)); + double* v = (double *)malloc(o.iownnodes * sizeof(double)); + double* w = (double *)malloc(o.iownnodes * sizeof(double)); + double* T = (double *)malloc(o.iownnodes * sizeof(double)); + int icount=0; + for (int n = 0; n < num_nodes; n++) { + gnod=o.arrays.ncorp[n]; + if(gnod >= start && gnod <= end) { // solution to write + p[icount]= data[0*num_nodes+n]; + u[icount]= data[1*num_nodes+n]; + v[icount]= data[2*num_nodes+n]; + w[icount]= data[3*num_nodes+n]; + T[icount]= data[4*num_nodes+n]; + icount++; + } + } +// /* write the solution field data in parallel */ + if (cg_sol_write(F, B, Z, "Solution", CG_Vertex, &Sp) || + cgp_field_write(F, B, Z, Sp, CG_RealDouble, "Pressure", &Fp)) + cgp_error_exit(); + printf("Sp=%d \n",Sp); + if (cgp_field_write_data(F, B, Z, Sp, Fp, &start, &end, p)) + cgp_error_exit(); + if ( cgp_field_write(F, B, Z, Sp, CG_RealDouble, "VelocityX", &Fu)) + cgp_error_exit(); + printf("Su=%d \n",Su); + if (cgp_field_write_data(F, B, Z, Sp, Fu, &start, &end, u)) + cgp_error_exit(); + if ( cgp_field_write(F, B, Z, Sp, CG_RealDouble, "VelocityY", &Fv)) + cgp_error_exit(); + printf("Sv=%d \n",Sv); + if (cgp_field_write_data(F, B, Z, Sp, Fv, &start, &end, v)) + cgp_error_exit(); + if ( cgp_field_write(F, B, Z, Sp, CG_RealDouble, "VelocityZ", &Fw)) + cgp_error_exit(); + printf("Sw=%d \n",Sw); + if (cgp_field_write_data(F, B, Z, Sp, Fw, &start, &end, w)) + cgp_error_exit(); + if ( cgp_field_write(F, B, Z, Sp, CG_RealDouble, "Temperature", &FT)) + cgp_error_exit(); + printf("ST=%d \n",ST); + if (cgp_field_write_data(F, B, Z, Sp, FT, &start, &end, T)) + cgp_error_exit(); + free(p); + free(u); + free(v); + free(w); + free(T); + free(data); /* create Helper array for number of elements on rank */ if ( cg_goto(F, B, "Zone_t", 1, NULL) || cg_user_data_write("User Data") || @@ -843,5 +968,8 @@ if(0==1) { if(o.writeCGNSFiles > 1) writeBlocksCGNS(F,B,Z, o); if(cgp_close(F)) cgp_error_exit(); + double t1 = PCU_Time(); + if (!PCU_Comm_Self()) + lion_oprint(1,"CGNS file written in %f seconds\n", t1 - t0); } } // namespace diff --git a/phasta/phCook.cc b/phasta/phCook.cc index 0c8b5ed6b..d701ba26e 100644 --- a/phasta/phCook.cc +++ b/phasta/phCook.cc @@ -197,19 +197,21 @@ namespace ph { ph::enterFilteredMatching(m, in, bcs); ph::generateOutput(in, bcs, m, out); ph::exitFilteredMatching(m); - // a path is not needed for inmem - if ( in.writeRestartFiles ) { - if(!PCU_Comm_Self()) lion_oprint(1,"write file-based restart file\n"); - // store the value of the function pointer - FILE* (*fn)(Output& out, const char* path) = out.openfile_write; - // set function pointer for file writing - out.openfile_write = chef::openfile_write; - ph::detachAndWriteSolution(in,out,m,subDirPath); //write restart - // reset the function pointer to the original value - out.openfile_write = fn; - } - else { - ph::detachAndWriteSolution(in,out,m,subDirPath); //write restart + if ( in.writeCGNSFiles ==0 ) { // for now, don't write restarts when writing CGNS since writing restarts is bundled with destroying fields + // a path is not needed for inmem + if ( in.writeRestartFiles ) { + if(!PCU_Comm_Self()) lion_oprint(1,"write file-based restart file\n"); + // store the value of the function pointer + FILE* (*fn)(Output& out, const char* path) = out.openfile_write; + // set function pointer for file writing + out.openfile_write = chef::openfile_write; + ph::detachAndWriteSolution(in,out,m,subDirPath); //write restart + // reset the function pointer to the original value + out.openfile_write = fn; + } + else { + ph::detachAndWriteSolution(in,out,m,subDirPath); //write restart + } } if ( ! in.outMeshFileName.empty() ) m->writeNative(in.outMeshFileName.c_str()); diff --git a/phasta/phRestart.h b/phasta/phRestart.h index cd82967b6..f17b690d6 100644 --- a/phasta/phRestart.h +++ b/phasta/phRestart.h @@ -27,6 +27,8 @@ void detachAndWriteSolution(Input& in, Output& out, void attachZeroSolution(Input& in, apf::Mesh* m); void detachField(apf::Field* f, double*& data, int& size); +void detachField(apf::Mesh* m, const char* fieldname, double*& data, int& size); + } From 7cb7309433e6c082c6f591be1e590bd00b44a12d Mon Sep 17 00:00:00 2001 From: "Kenneth E. Jansen" Date: Wed, 16 Aug 2023 08:29:39 -0600 Subject: [PATCH 43/68] committing an aborted attempt to find matching faces through matched nodes in face connectivity...this approach would work if ncorp were what PETSc wanted without PHASTA's need for ilwork to have final owner (after periodicity) in ilwork but matched meshes currently foul ncorp which is why this approach is abandoned in an icomplete state and mothballed in this hash. --- phasta/phCGNSgbc.cc | 595 ++++++++++++++++++++------------------------ 1 file changed, 276 insertions(+), 319 deletions(-) diff --git a/phasta/phCGNSgbc.cc b/phasta/phCGNSgbc.cc index 5b7e11f8d..a7b6fd997 100644 --- a/phasta/phCGNSgbc.cc +++ b/phasta/phCGNSgbc.cc @@ -109,235 +109,162 @@ void pairDeal6sort(int a[], int b[], int n) namespace ph { -/* -void detachField( - apf::Field* f, - double*& data, - int& size) -{ - apf::Mesh* m = apf::getMesh(f); - size = apf::countComponents(f); - size_t n = m->count(0); - apf::NewArray c(size); - data = (double*)malloc(sizeof(double) * size * m->count(0)); - apf::MeshEntity* e; - size_t i = 0; - apf::MeshIterator* it = m->begin(0); - while ((e = m->iterate(it))) { - apf::getComponents(f, e, 0, &c[0]); - for (int j = 0; j < size; ++j) - data[j * n + i] = c[j]; - ++i; - } - m->end(it); - PCU_ALWAYS_ASSERT(i == n); - apf::destroyField(f); -} -*/ -/* -void detachField( - apf::Mesh* m, - const char* fieldname, - double*& data, - int& size) -{ - apf::Field* f = m->findField(fieldname); - PCU_ALWAYS_ASSERT(f); -// detachField(f, data, size); - size = apf::countComponents(f); - size_t n = m->count(0); - apf::NewArray c(size); - data = (double*)malloc(sizeof(double) * size * m->count(0)); - apf::MeshEntity* e; - size_t i = 0; - apf::MeshIterator* it = m->begin(0); - while ((e = m->iterate(it))) { - apf::getComponents(f, e, 0, &c[0]); - for (int j = 0; j < size; ++j) - data[j * n + i] = c[j]; - ++i; - } - m->end(it); - PCU_ALWAYS_ASSERT(i == n); - apf::destroyField(f); -} -*/ - static lcorp_t count_owned(int* ilwork, int nlwork,cgsize_t* ncorp_tmp, int num_nodes); static lcorp_t count_local(int* ilwork, int nlwork,cgsize_t* ncorp_tmp, int num_nodes); +void commuInt(Output& o, cgsize_t* global) +{ // translating a commuInt out from PHASTA to c + int numtask=o.arrays.ilwork[0]; + int itkbeg=0; + int maxseg=1; + int numseg; + for (int itask=0; itask=numtask); + MPI_Request req[maxtask]; + MPI_Status stat[maxtask]; + int maxfront=0; + int lfront; + itkbeg=0; + for (int itask=0; itaskcount(0); - o.arrays.ncorp = (cgsize_t *)malloc(num_nodes * sizeof(cgsize_t)); - lcorp_t owned; - lcorp_t local; - lcorp_t* owner_counts; - cgsize_t local_start_id; - cgsize_t gid; - - const int num_parts = PCU_Comm_Peers(); - const int part = PCU_Comm_Self() ; - - for(int i=0; i < num_nodes; i++) o.arrays.ncorp[i]=0; - owned = count_owned(o.arrays.ilwork, nilwork, o.arrays.ncorp, num_nodes); - local = count_local(o.arrays.ilwork, nilwork, o.arrays.ncorp, num_nodes); - o.iownnodes = owned+local; + apf::Mesh* m = o.mesh; + int i; + lcorp_t nilwork = o.nlwork; + int num_nodes=m->count(0); + o.arrays.ncorp = (cgsize_t *)malloc(num_nodes * sizeof(cgsize_t)); + lcorp_t owned; + lcorp_t local; + lcorp_t* owner_counts; + cgsize_t local_start_id; + cgsize_t gid; + + const int num_parts = PCU_Comm_Peers(); + const int part = PCU_Comm_Self() ; + + for(int i=0; i < num_nodes; i++) o.arrays.ncorp[i]=0; + owned = count_owned(o.arrays.ilwork, nilwork, o.arrays.ncorp, num_nodes); + local = count_local(o.arrays.ilwork, nilwork, o.arrays.ncorp, num_nodes); + o.iownnodes = owned+local; #ifdef PRINT_EVERYTHING - printf("%d: %d local only nodes\n", part, local); - printf("%d: %d owned nodes\n", part, owned); + printf("%d: %d local only nodes\n", part, local); + printf("%d: %d owned nodes\n", part, owned); #endif - assert( owned <= num_nodes ); - assert( owned+local <= num_nodes ); + assert( owned <= num_nodes ); + assert( owned+local <= num_nodes ); - owner_counts = (lcorp_t*) malloc(sizeof(lcorp_t)*num_parts); - for(int i=0; i < num_parts; i++) owner_counts[i]=0; - owner_counts[part] = owned+local; + owner_counts = (lcorp_t*) malloc(sizeof(lcorp_t)*num_parts); + for(int i=0; i < num_parts; i++) owner_counts[i]=0; + owner_counts[part] = owned+local; #ifdef PRINT_EVERYTHING - for(i=0;i=0); - for(i=0;i=0); - -// global so needs long long - gid++; - continue; - } - if(o.arrays.ncorp[i] == 0) - { - o.arrays.ncorp[i] = gid; - assert(o.arrays.ncorp[i]>=0); - gid++; - continue; - } - if(o.arrays.ncorp[i] == -1) - { - o.arrays.ncorp[i] = 0; //commu() adds, so zero slaves - } - - } - //char code[] = "out"; - //int ione = 1; - - if(num_parts > 1) { -// translating a commuInt out from PHASTA to c - int numtask=o.arrays.ilwork[0]; - int itkbeg=0; - int maxseg=1; - int numseg; - for (int itask=0; itask=0); + for(i=0;i=0); + gid++; + continue; + } + if(o.arrays.ncorp[i] == 0) + { + o.arrays.ncorp[i] = gid; + assert(o.arrays.ncorp[i]>=0); + gid++; + continue; + } + if(o.arrays.ncorp[i] == -1) + o.arrays.ncorp[i] = 0; //commu() adds, so zero slaves + } //char code[] = "out"; //int ione = 1; - int maxtask=1000; - assert(maxtask>=numtask); - MPI_Request req[maxtask]; - MPI_Status stat[maxtask]; - int maxfront=0; - int lfront; - itkbeg=0; - for (int itask=0; itask 1) + commuInt(o, o.arrays.ncorp); +if(1==1) { + for (int ipart=0; ipart 2) { cgsize_t eVolElm=e_written; cgsize_t e_belWritten=0; -// cgsize_t totOnRankBel=0; int totOnRankBel=0; int triCount=0; int quadCount=0; @@ -599,6 +526,11 @@ if(1==0){ totOnRankBel += o.blocks.boundary.nElements[i]; int* srfID = (int *)malloc( totOnRankBel * sizeof(int)); int* srfIDidx = (int *)malloc( totOnRankBel * sizeof(int)); + int** srfIDCnn1 = new int*[nblkb]; + int** srfIDCnn2 = new int*[nblkb]; + int* srfID1OnBlk = (int *)malloc( nblkb * sizeof(int)); + int* srfID2OnBlk = (int *)malloc( nblkb * sizeof(int)); + int* startBelBlk = (int *)malloc( nblkb * sizeof(int)); int* endBelBlk = (int *)malloc( nblkb * sizeof(int)); for (int i = 0; i < o.blocks.boundary.getSize(); ++i) { @@ -615,7 +547,6 @@ if(1==0){ if(nvert==3) triCount++; if(nvert==4) quadCount++; char Ename[7]; - switch(nvert){ case 3: snprintf(Ename, 5, "Tri%d",triCount); @@ -633,27 +564,43 @@ if(1==0){ MPI_Exscan(&e_owned, &e_start, 1, type , MPI_SUM, MPI_COMM_WORLD); e_start+=1+e_written; // my parts global element start 1-based e_end=e_start+e_owned-1; // my parts global element stop 1-based - /* write the element connectivity in parallel */ + // write the element connectivity in parallel if (cgp_elements_write_data(F, B, Z, E, e_start, e_end, e)) cgp_error_exit(); printf("boundary cnn %d, %ld, %ld \n", part, e_start, e_end); if(1==0){ - for (int ne=0; ne 1) { - printf("Boundary conditions cannot be written in parallel right now\n"); - } else { - // waaay too large, but works as proof of concept - cgsize_t (*bc_elems)[totOnRankBel] = (cgsize_t (*)[totOnRankBel])calloc(6 * totOnRankBel, sizeof(cgsize_t)); - cgsize_t bc_elems_count[6] = {0}; - for (int elem_id=0; elem_idcount(0); -if(0==1){ // ilwork debugging +if(1==1){ // ilwork debugging for (int ipart=0; ipart Date: Wed, 16 Aug 2023 23:02:39 -0600 Subject: [PATCH 44/68] SurfID1 and SurfID2 are in the same order to support periodicity of those two surfaces. Quick check of mixed meshs failed but will debug tomorrow. --- phasta/phCGNSgbc.cc | 256 ++++++++++++++++++++++++++++++++++---------- 1 file changed, 198 insertions(+), 58 deletions(-) diff --git a/phasta/phCGNSgbc.cc b/phasta/phCGNSgbc.cc index a7b6fd997..45011d2e3 100644 --- a/phasta/phCGNSgbc.cc +++ b/phasta/phCGNSgbc.cc @@ -52,6 +52,31 @@ MPI_Datatype getMpiType(T) { #include using namespace std; +// Function to sort integer array b[] +// according to the order defined by a[] +void pairsortDI(double a[], int b[], int n) +{ + pair pairt[n]; + + // Storing the respective array + // elements in pairs. + for (int i = 0; i < n; i++) + { + pairt[i].first = a[i]; + pairt[i].second = b[i]; + } + + // Sorting the pair array. + sort(pairt, pairt + n); + + // Modifying original arrays + for (int i = 0; i < n; i++) + { + a[i] = pairt[i].first; + b[i] = pairt[i].second; + } +} + // Function to sort integer array b[] // according to the order defined by a[] void pairsort(int a[], int b[], int n) @@ -102,8 +127,8 @@ void pairDeal6sort(int a[], int b[], int n) } } assert(igc==n); - free(p); - free(idx); + delete idx; + delete p; } @@ -189,7 +214,7 @@ void gen_ncorp(Output& o ) int i; lcorp_t nilwork = o.nlwork; int num_nodes=m->count(0); - o.arrays.ncorp = (cgsize_t *)malloc(num_nodes * sizeof(cgsize_t)); + o.arrays.ncorp = (cgsize_t *)malloc(num_nodes * sizeof(cgsize_t)); //FIXME where to deallocate lcorp_t owned; lcorp_t local; lcorp_t* owner_counts; @@ -257,7 +282,7 @@ void gen_ncorp(Output& o ) if(num_parts > 1) commuInt(o, o.arrays.ncorp); -if(1==1) { +if(1==0) { for (int ipart=0; ipartcount(0); size_t i = 0; + size_t phGnod = 0; std::vector lnode={0,1,2,3}; // Standard pattern of first 4 (or 3) // PHASTA's use of volume elements has an lnode array that maps the surface nodes from the volume numbering. We need it here too // see hierarchic.f but note that is fortran numbering if(nvertVol==4) lnode={0, 2, 1, -1}; // tet is first three but opposite normal of others to go with neg volume if(nvertVol==5 && nvert==3) lnode={0, 4, 1, -1}; // pyramid tri is a fortran map of 1 5 2 if(nvertVol==6 && nvert==4) lnode={0, 3, 4, 1}; // wedge quad is a fortran map of 1 4 5 2 - for (int elem = 0; elem < nelem; ++elem) - for (int vert = 0; vert < nvert; ++vert) - c[i++] = o.arrays.ncorp[o.arrays.ienb[block][elem][lnode[vert]]]; + for (int elem = 0; elem < nelem; ++elem){ + ecenx[elem]=0; + eceny[elem]=0; + ecenz[elem]=0; + for (int vert = 0; vert < nvert; ++vert){ + phGnod=o.arrays.ienb[block][elem][lnode[vert]]; //actually it is on-rank Global + c[i++] = o.arrays.ncorp[phGnod]; // PETSc truely global + ecenx[elem]+=o.arrays.coordinates[0*num_nodes+phGnod]; + eceny[elem]+=o.arrays.coordinates[1*num_nodes+phGnod]; + ecenz[elem]+=o.arrays.coordinates[2*num_nodes+phGnod]; + } + ecenx[elem]/=nvert; // only necessary if you really want to use this as a correct centroid rather than comparison + eceny[elem]/=nvert; // only necessary if you really want to use this as a correct centroid rather than comparison + ecenz[elem]/=nvert; // only necessary if you really want to use this as a correct centroid rather than comparison + } PCU_ALWAYS_ASSERT(i == nelem*nvert); } @@ -434,7 +473,6 @@ void writeBlocksCGNS(int F,int B,int Z, Output& o) if (cg_sol_write(F, B, Z, "RankOfWriter", CG_CellCenter, &S) || cgp_field_write(F, B, Z, S, CG_Integer, "RankOfWriter", &Fs)) cgp_error_exit(); - printf("S=%d \n",S); for (int i = 0; i < o.blocks.interior.getSize(); ++i) { BlockKey& k = o.blocks.interior.keys[i]; std::string phrase = getBlockKeyPhrase(k, "connectivity interior "); @@ -526,8 +564,8 @@ if(1==0){ totOnRankBel += o.blocks.boundary.nElements[i]; int* srfID = (int *)malloc( totOnRankBel * sizeof(int)); int* srfIDidx = (int *)malloc( totOnRankBel * sizeof(int)); - int** srfIDCnn1 = new int*[nblkb]; - int** srfIDCnn2 = new int*[nblkb]; + double** srfIDCen1 = new double*[nblkb]; + double** srfIDCen2 = new double*[nblkb]; int* srfID1OnBlk = (int *)malloc( nblkb * sizeof(int)); int* srfID2OnBlk = (int *)malloc( nblkb * sizeof(int)); @@ -539,7 +577,10 @@ if(1==0){ e_owned = params[0]; int nvert = o.blocks.boundary.keys[i].nBoundaryFaceEdges; cgsize_t* e = (cgsize_t *)malloc(nvert * e_owned * sizeof(cgsize_t)); - getBoundaryConnectivityCGNS(o, i, e); + double* ecenx = (double *)malloc( e_owned * sizeof(double)); + double* eceny = (double *)malloc( e_owned * sizeof(double)); + double* ecenz = (double *)malloc( e_owned * sizeof(double)); + getBoundaryConnectivityCGNS(o, i, e,ecenx,eceny,ecenz); e_startg=1+e_written; // start for the elements of this topology long safeArg=e_owned; // e_owned is cgsize_t which could be an 32 or 64 bit int cgsize_t numBelTP = PCU_Add_Long(safeArg); // number of elements of this topology @@ -575,26 +616,29 @@ if(1==0){ int icnt1=0; int icnt2=0; for (int ne=0; ne search list srfID=2 list to find true match + vDSmin=vDistSq; + DistFails++; + for (int j = 0; j < nmatchFace; ++j) { // if this turns out to be taken a lot then it could be narrowed e.g. j=max(0,i-50), j< i+min(matchFace,i+50), + iclose2=imapD2[j]; + d1=srfID1Gcen[(iclose1)*3+0]-srfID2Gcen[(iclose2)*3+0]; + d2=srfID1Gcen[(iclose1)*3+1]-srfID2Gcen[(iclose2)*3+1]; + vDistSq= d1*d1+d2*d2; + if(vDistSqcount(0); -if(1==1){ // ilwork debugging +if(1==0){ // ilwork debugging for (int ipart=0; ipart Date: Thu, 17 Aug 2023 09:00:20 -0600 Subject: [PATCH 45/68] CGNS standard for defining periodic interface compplete and tested for 1 and 2 process cases. All write a file that looks reasonable at first glance with cgnsview except the wedge-tet mixed case which probably has an issue I have not resolved yet --- phasta/phCGNSgbc.cc | 23 +++++++++++++++++------ pumi-meshes | 2 +- 2 files changed, 18 insertions(+), 7 deletions(-) diff --git a/phasta/phCGNSgbc.cc b/phasta/phCGNSgbc.cc index 45011d2e3..fc33d3c0f 100644 --- a/phasta/phCGNSgbc.cc +++ b/phasta/phCGNSgbc.cc @@ -718,6 +718,7 @@ if(1==0){ printf("Stack2 %d %d, %d, %d, %d, %d\n",part, GsrfID2cnt, ncon, nb int nmatchFace=GsrfID1cnt/3; double* srfID2Gcen = (double *)malloc( GsrfID2cnt * sizeof(double)); MPI_Allgatherv(srfIDCen2AllBlocks,ncon,type_d,srfID2Gcen,rcounts,displs,type_d,MPI_COMM_WORLD); + const float Lz=abs(srfID2Gcen[2]-srfID1Gcen[2]); if(1==0){ printf("%d part srfID 1 xc ",part); for(int ip=0; ip< nmatchFace; ++ip) printf("%f ", srfID1Gcen[ip*3+0]); printf("\n"); } if(1==0){ printf("%d part srfID 1 yc ",part); for(int ip=0; ip< nmatchFace; ++ip) printf("%f ", srfID1Gcen[ip*3+1]); printf("\n"); } if(1==0){ printf("%d part srfID 1 zc ",part); for(int ip=0; ip< nmatchFace; ++ip) printf("%f ", srfID1Gcen[ip*3+2]); printf("\n"); } @@ -805,6 +806,8 @@ if(1==0){ printf("%d part srfID 2 zc ",part); for(int ip=0; ip< nmatchFace; ++i // ZonalBC data int* srfIDG = (int *)malloc( totBel * sizeof(int)); int* srfIDGidx = (int *)malloc( totBel * sizeof(int)); + cgsize_t* donor2 = (cgsize_t *)malloc(nmatchFace * sizeof(cgsize_t)); + cgsize_t* periodic1 = (cgsize_t *)malloc(nmatchFace * sizeof(cgsize_t)); auto type_cg = getMpiType( cgsize_t() ); MPI_Allgather(&totOnRankBel,1,type_i,rcounts,1,type_i,MPI_COMM_WORLD); displs[0]=0; @@ -837,15 +840,13 @@ if(1==0){ if(part==0) { } //reorder SurfID = 1 and 2 using idmapD{1,2} based on distance to support periodicity if(BCid==1) { - cgsize_t* eBCtmp = (cgsize_t *)malloc(nmatchFace * sizeof(cgsize_t)); - for (int i = 0; i < nmatchFace; i++) eBCtmp[i]=eBC[imapD1[i]]; - for (int i = 0; i < nmatchFace; i++) eBC[i]=eBCtmp[i]; + for (int i = 0; i < nmatchFace; i++) periodic1[i]=eBC[imapD1[i]]; + for (int i = 0; i < nmatchFace; i++) eBC[i]=periodic1[i]; if(1==1&&part==1){ printf(" srfIDidx 1 "); for(int is=0; is< nmatchFace; ++is) printf("%d ", eBC[is]); printf("\n"); } } if(BCid==2) { - cgsize_t* eBCtmp = (cgsize_t *)malloc(nmatchFace * sizeof(cgsize_t)); - for (int i = 0; i < nmatchFace; i++) eBCtmp[i]=eBC[imapD2[i]]; - for (int i = 0; i < nmatchFace; i++) eBC[i]=eBCtmp[i]; + for (int i = 0; i < nmatchFace; i++) donor2[i]=eBC[imapD2[i]]; + for (int i = 0; i < nmatchFace; i++) eBC[i]=donor2[i]; if(1==1&&part==1){ printf(" srfIDidx 2 "); for(int is=0; is< nmatchFace; ++is) printf("%d ", eBC[is]); printf("\n"); } } if(0==1) { @@ -859,6 +860,16 @@ if(0==1) { if(cg_goto(F, B, "Zone_t", 1, "ZoneBC_t", 1, "BC_t", BC_index, "end")) cg_error_exit();; if(cg_gridlocation_write(CGNS_ENUMV(FaceCenter))) cg_error_exit(); } + int cgconn; + if (cg_conn_write(F, B, Z, "Periodic Connectivity", + CGNS_ENUMV(FaceCenter), CGNS_ENUMV(Abutting1to1), + CGNS_ENUMV(PointList), nmatchFace, periodic1, "Zone", + CGNS_ENUMV(Unstructured), CGNS_ENUMV(PointListDonor), + CGNS_ENUMV(Integer), nmatchFace, donor2, &cgconn)) cgp_error_exit(); + const float RotationCenter[3]={0}; + const float RotationAngle[3]={0}; + const float Translation[3]={0,0,-Lz}; + if (cg_conn_periodic_write(F, B, Z, cgconn, RotationCenter, RotationAngle, Translation)) cgp_error_exit(); free(imapD1); free(imapD2); free(eBC); free(srfIDG); free(srfIDGidx); } // processing boundary elments diff --git a/pumi-meshes b/pumi-meshes index a3a241a71..3355b3a95 160000 --- a/pumi-meshes +++ b/pumi-meshes @@ -1 +1 @@ -Subproject commit a3a241a715de566f0e812d253f5cfc2a82705f62 +Subproject commit 3355b3a952b114f1c7c02b9bdb7fa4bb9db1b86e From ae008bd096ccb0f485fde4a207bb89dbe72f0362 Mon Sep 17 00:00:00 2001 From: "Kenneth E. Jansen" Date: Fri, 18 Aug 2023 08:13:58 -0600 Subject: [PATCH 46/68] valgrind now only shows HDF5-MPIO issues --- phasta/phCGNSgbc.cc | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/phasta/phCGNSgbc.cc b/phasta/phCGNSgbc.cc index fc33d3c0f..c3fafc23a 100644 --- a/phasta/phCGNSgbc.cc +++ b/phasta/phCGNSgbc.cc @@ -127,8 +127,10 @@ void pairDeal6sort(int a[], int b[], int n) } } assert(igc==n); - delete idx; - delete p; + for (int i = 0; i < 6; i++) delete [] p[i]; + for (int i = 0; i < 6; i++) delete [] idx[i]; + delete [] idx; + delete [] p; } @@ -664,7 +666,7 @@ if(1==0){ printf("CentroidCounts %d %d %d %d %d %d %d %d\n",part,icnt1, icn // write the user data for this process e_written=0; //recycling eVolElm holds for (int i = 0; i < nblkb; ++i) { - int e_startB=startBelBlk[i]-eVolElm; // srfID is only for bel....matches linear order with eVolElm offset from + int e_startB=startBelBlk[i]-eVolElm-1; // srfID is only for bel....matches linear order with eVolElm offset from // bel# that starts from last volume element e_owned=endBelBlk[i]-startBelBlk[i]+1; e_start=0; @@ -694,7 +696,9 @@ if(1==0){ printf("CentroidCounts %d %d %d %d %d %d %d %d\n",part,icnt1, icn for (int j = 0; j < srfID2OnBlk[i]*3; ++j) srfIDCen2AllBlocks[k2++]=srfIDCen2[i][j]; } free(srfID1OnBlk); free(srfID2OnBlk); - delete srfIDCen1; delete srfIDCen2; + for (int i = 0; i < nblkb; ++i) delete [] srfIDCen1[i]; + for (int i = 0; i < nblkb; ++i) delete [] srfIDCen2[i]; + delete [] srfIDCen1; delete [] srfIDCen2; int ncon=numsurfID1onRank*3; auto type_i = getMpiType( int() ); MPI_Allgather(&ncon,1,type_i,rcounts,1,type_i,MPI_COMM_WORLD); @@ -829,15 +833,25 @@ if(1==0){ if(part==0) { printf(" srfIDidx GLOBAL "); for(int is=0; is< totBel; ++is) printf("%d ", srfIDGidx[is]); printf("\n"); } } int BC_scan=0; - int imatch1; cgsize_t* eBC = (cgsize_t *)malloc(totBel * sizeof(cgsize_t)); for (int BCid = 1; BCid < 7; BCid++) { int imatch=0; - while (srfIDG[BC_scan]==BCid) { +// valgrind likes this? + for (int ib = BC_scan; ib < totBel; ib++) { + if(srfIDG[ib]==BCid){ + eBC[imatch]=srfIDGidx[BC_scan]; + BC_scan++; + imatch++; + } else break; + } + +/* works but valgrind no likey + while (srfIDG[BC_scan]==BCid&&BC_scan Date: Fri, 18 Aug 2023 13:54:39 -0600 Subject: [PATCH 47/68] split writeBlocksCGNS into three for readability --- phasta/phCGNSgbc.cc | 263 +++++++++++++++++++++----------------------- 1 file changed, 123 insertions(+), 140 deletions(-) diff --git a/phasta/phCGNSgbc.cc b/phasta/phCGNSgbc.cc index c3fafc23a..38cb6e4ee 100644 --- a/phasta/phCGNSgbc.cc +++ b/phasta/phCGNSgbc.cc @@ -368,10 +368,6 @@ static std::string buildCGNSFileName(std::string timestep_or_dat) return ss.str(); } -enum { - MAX_PARAMS = 12 -}; - // update is only a transpose to match CNGS. void getInteriorConnectivityCGNS(Output& o, int block, cgsize_t* c) { @@ -394,7 +390,7 @@ void getInteriorConnectivityCGNS(Output& o, int block, cgsize_t* c) } // update is both a transpose to match CNGS and reduction to only filling the first number of vertices on the boundary whereas PHASTA wanted full volume -void getBoundaryConnectivityCGNS(Output& o, int block, cgsize_t* c, double* ecenx, double* eceny, double* ecenz) +void getBoundaryConnectivityCGNS(Output& o, int block, cgsize_t* c, double* eCenx, double* eCeny, double* eCenz) { int nelem = o.blocks.boundary.nElements[block]; int nvertVol = o.blocks.boundary.keys[block].nElementVertices; @@ -409,19 +405,19 @@ void getBoundaryConnectivityCGNS(Output& o, int block, cgsize_t* c, double* ecen if(nvertVol==5 && nvert==3) lnode={0, 4, 1, -1}; // pyramid tri is a fortran map of 1 5 2 if(nvertVol==6 && nvert==4) lnode={0, 3, 4, 1}; // wedge quad is a fortran map of 1 4 5 2 for (int elem = 0; elem < nelem; ++elem){ - ecenx[elem]=0; - eceny[elem]=0; - ecenz[elem]=0; + eCenx[elem]=0; + eCeny[elem]=0; + eCenz[elem]=0; for (int vert = 0; vert < nvert; ++vert){ phGnod=o.arrays.ienb[block][elem][lnode[vert]]; //actually it is on-rank Global c[i++] = o.arrays.ncorp[phGnod]; // PETSc truely global - ecenx[elem]+=o.arrays.coordinates[0*num_nodes+phGnod]; - eceny[elem]+=o.arrays.coordinates[1*num_nodes+phGnod]; - ecenz[elem]+=o.arrays.coordinates[2*num_nodes+phGnod]; + eCenx[elem]+=o.arrays.coordinates[0*num_nodes+phGnod]; + eCeny[elem]+=o.arrays.coordinates[1*num_nodes+phGnod]; + eCenz[elem]+=o.arrays.coordinates[2*num_nodes+phGnod]; } - ecenx[elem]/=nvert; // only necessary if you really want to use this as a correct centroid rather than comparison - eceny[elem]/=nvert; // only necessary if you really want to use this as a correct centroid rather than comparison - ecenz[elem]/=nvert; // only necessary if you really want to use this as a correct centroid rather than comparison + eCenx[elem]/=nvert; // only necessary if you really want to use this as a correct Centroid rather than comparison + eCeny[elem]/=nvert; // only necessary if you really want to use this as a correct Centroid rather than comparison + eCenz[elem]/=nvert; // only necessary if you really want to use this as a correct Centroid rather than comparison } PCU_ALWAYS_ASSERT(i == nelem*nvert); } @@ -460,13 +456,11 @@ void getNaturalBCCodesCGNS(Output& o, int block, int* codes) // renamed and calling the renamed functions above with output writes now to CGNS -void writeBlocksCGNS(int F,int B,int Z, Output& o) +void writeBlocksCGNSinteror(int F,int B,int Z, Output& o, cgsize_t *e_written) { - int params[MAX_PARAMS]; int E,S,Fs,Fs2,Fsb,Fsb2; cgsize_t e_owned, e_start,e_end; cgsize_t e_startg,e_endg; - cgsize_t e_written=0; const int num_parts = PCU_Comm_Peers(); const cgsize_t num_parts_cg=num_parts; const int part = PCU_Comm_Self() ; @@ -478,15 +472,14 @@ void writeBlocksCGNS(int F,int B,int Z, Output& o) for (int i = 0; i < o.blocks.interior.getSize(); ++i) { BlockKey& k = o.blocks.interior.keys[i]; std::string phrase = getBlockKeyPhrase(k, "connectivity interior "); - params[0] = o.blocks.interior.nElements[i]; e_owned = o.blocks.interior.nElements[i]; int nvert = o.blocks.interior.keys[i].nElementVertices; cgsize_t* e = (cgsize_t *)malloc(nvert * e_owned * sizeof(cgsize_t)); getInteriorConnectivityCGNS(o, i, e); // create data node for elements - e_startg=1+e_written; // start for the elements of this topology + e_startg=1+*e_written; // start for the elements of this topology long safeArg=e_owned; // e_owned is cgsize_t which could be an 32 or 64 bit int - e_endg=e_written + PCU_Add_Long(safeArg); // end for the elements of this topology + e_endg=*e_written + PCU_Add_Long(safeArg); // end for the elements of this topology char Ename[5]; switch(nvert){ case 4: @@ -513,12 +506,12 @@ void writeBlocksCGNS(int F,int B,int Z, Output& o) e_start=0; auto type = getMpiType( cgsize_t() ); MPI_Exscan(&e_owned, &e_start, 1, type , MPI_SUM, MPI_COMM_WORLD); - e_start+=1+e_written; // my parts global element start 1-based + e_start+=1+*e_written; // my parts global element start 1-based e_end=e_start+e_owned-1; // my parts global element stop 1-based // write the element connectivity in parallel if (cgp_elements_write_data(F, B, Z, E, e_start, e_end, e)) cgp_error_exit(); - e_written=e_endg; // update count of elements written + *e_written=e_endg; // update count of elements written if(1==0){ printf("interior cnn %d, %ld, %ld \n", part, e_start, e_end); @@ -553,40 +546,38 @@ if(1==0){ if ( cgp_array_write_data(Fs2, &partP1, &partP1, &nIelVec)) cgp_error_exit(); } // end of loop over interior blocks - - - if(o.writeCGNSFiles > 2) { - cgsize_t eVolElm=e_written; +} +void writeBlocksCGNSboundary(int F,int B,int Z, Output& o, int* srfID, int* srfIDidx, double** srfIDCen1, double** srfIDCen2, int* srfID1OnBlk, int* srfID2OnBlk, int* startBelBlk, int* endBelBlk, cgsize_t *e_written, cgsize_t *totBel, int nblkb) +{ +// if(o.writeCGNSFiles > 2) { + int E,Fsb,Fsb2; + const int num_parts = PCU_Comm_Peers(); + const cgsize_t num_parts_cg=num_parts; + const int part = PCU_Comm_Self() ; + const cgsize_t part_cg=part; + cgsize_t e_owned, e_start,e_end; + cgsize_t e_startg,e_endg; + cgsize_t eVolElm=*e_written; cgsize_t e_belWritten=0; - int totOnRankBel=0; int triCount=0; int quadCount=0; - int nblkb = o.blocks.boundary.getSize(); + int totOnRankBel=0; for (int i = 0; i < nblkb; ++i) totOnRankBel += o.blocks.boundary.nElements[i]; - int* srfID = (int *)malloc( totOnRankBel * sizeof(int)); - int* srfIDidx = (int *)malloc( totOnRankBel * sizeof(int)); - double** srfIDCen1 = new double*[nblkb]; - double** srfIDCen2 = new double*[nblkb]; - int* srfID1OnBlk = (int *)malloc( nblkb * sizeof(int)); - int* srfID2OnBlk = (int *)malloc( nblkb * sizeof(int)); - int* startBelBlk = (int *)malloc( nblkb * sizeof(int)); - int* endBelBlk = (int *)malloc( nblkb * sizeof(int)); for (int i = 0; i < o.blocks.boundary.getSize(); ++i) { BlockKey& k = o.blocks.boundary.keys[i]; - params[0] = o.blocks.boundary.nElements[i]; - e_owned = params[0]; + e_owned = o.blocks.boundary.nElements[i]; int nvert = o.blocks.boundary.keys[i].nBoundaryFaceEdges; cgsize_t* e = (cgsize_t *)malloc(nvert * e_owned * sizeof(cgsize_t)); - double* ecenx = (double *)malloc( e_owned * sizeof(double)); - double* eceny = (double *)malloc( e_owned * sizeof(double)); - double* ecenz = (double *)malloc( e_owned * sizeof(double)); - getBoundaryConnectivityCGNS(o, i, e,ecenx,eceny,ecenz); - e_startg=1+e_written; // start for the elements of this topology + double* eCenx = (double *)malloc( e_owned * sizeof(double)); + double* eCeny = (double *)malloc( e_owned * sizeof(double)); + double* eCenz = (double *)malloc( e_owned * sizeof(double)); + getBoundaryConnectivityCGNS(o, i, e,eCenx,eCeny,eCenz); + e_startg=1+*e_written; // start for the elements of this topology long safeArg=e_owned; // e_owned is cgsize_t which could be an 32 or 64 bit int cgsize_t numBelTP = PCU_Add_Long(safeArg); // number of elements of this topology - e_endg=e_written + numBelTP; // end for the elements of this topology + e_endg=*e_written + numBelTP; // end for the elements of this topology if(nvert==3) triCount++; if(nvert==4) quadCount++; char Ename[7]; @@ -605,7 +596,7 @@ if(1==0){ e_start=0; auto type = getMpiType( cgsize_t() ); MPI_Exscan(&e_owned, &e_start, 1, type , MPI_SUM, MPI_COMM_WORLD); - e_start+=1+e_written; // my parts global element start 1-based + e_start+=1+*e_written; // my parts global element start 1-based e_end=e_start+e_owned-1; // my parts global element stop 1-based // write the element connectivity in parallel if (cgp_elements_write_data(F, B, Z, E, e_start, e_end, e)) @@ -629,22 +620,22 @@ if(1==0){ int j2=0; for (int ne=0; ne search list srfID=2 list to find true match + } else {// Centroid for i-1 did not match-> search list srfID=2 list to find true match vDSmin=vDistSq; DistFails++; for (int j = 0; j < nmatchFace; ++j) { // if this turns out to be taken a lot then it could be narrowed e.g. j=max(0,i-50), j< i+min(matchFace,i+50), iclose2=imapD2[j]; - d1=srfID1Gcen[(iclose1)*3+0]-srfID2Gcen[(iclose2)*3+0]; - d2=srfID1Gcen[(iclose1)*3+1]-srfID2Gcen[(iclose2)*3+1]; + d1=srfID1GCen[(iclose1)*3+0]-srfID2GCen[(iclose2)*3+0]; + d2=srfID1GCen[(iclose1)*3+1]-srfID2GCen[(iclose2)*3+1]; vDistSq= d1*d1+d2*d2; if(vDistSq 1) - writeBlocksCGNS(F,B,Z, o); +// if(o.writeCGNSFiles > 1) +// got split into 4 writeBlocksCGNS(F,B,Z, o); + cgsize_t e_written=0; + cgsize_t totBel; + writeBlocksCGNSinteror(F,B,Z,o,&e_written); + int nblkb = o.blocks.boundary.getSize(); + double** srfIDCen1 = new double*[nblkb]; + double** srfIDCen2 = new double*[nblkb]; + int totOnRankBel=0; + for (int i = 0; i < nblkb; ++i) + totOnRankBel += o.blocks.boundary.nElements[i]; + int* srfID = (int *)malloc( totOnRankBel * sizeof(int)); + int* srfID1OnBlk = (int *)malloc( nblkb * sizeof(int)); + int* srfID2OnBlk = (int *)malloc( nblkb * sizeof(int)); + int* startBelBlk = (int *)malloc( nblkb * sizeof(int)); + int* endBelBlk = (int *)malloc( nblkb * sizeof(int)); + int* srfIDidx = (int *)malloc( totOnRankBel * sizeof(int)); + writeBlocksCGNSboundary(F,B,Z,o, srfID, srfIDidx, srfIDCen1, srfIDCen2, srfID1OnBlk, srfID2OnBlk, startBelBlk, endBelBlk, &e_written, &totBel, nblkb); + writeCGNSboundary (F,B,Z,o, srfID, srfIDidx, srfIDCen1, srfIDCen2, srfID1OnBlk, srfID2OnBlk, startBelBlk, endBelBlk, &e_written, &totBel, nblkb); + free(srfID); free(srfIDidx); + free(srfID1OnBlk); free(srfID2OnBlk); + free(startBelBlk); free(endBelBlk); + for (int i = 0; i < nblkb; ++i) delete [] srfIDCen1[i]; + for (int i = 0; i < nblkb; ++i) delete [] srfIDCen2[i]; + delete [] srfIDCen1; delete [] srfIDCen2; if(cgp_close(F)) cgp_error_exit(); double t1 = PCU_Time(); if (!PCU_Comm_Self()) From 0bf531316bd2bb436e849ddf44b4adad8bd86e09 Mon Sep 17 00:00:00 2001 From: "Kenneth E. Jansen" Date: Fri, 18 Aug 2023 15:55:32 -0600 Subject: [PATCH 48/68] more helper functions to improve readability --- phasta/phCGNSgbc.cc | 252 ++++++++++++++++++++++---------------------- 1 file changed, 128 insertions(+), 124 deletions(-) diff --git a/phasta/phCGNSgbc.cc b/phasta/phCGNSgbc.cc index 38cb6e4ee..58c416e7d 100644 --- a/phasta/phCGNSgbc.cc +++ b/phasta/phCGNSgbc.cc @@ -649,25 +649,18 @@ if(1==0){ printf("CentroidCounts %d %d %d %d %d %d %d %d\n",part,icnt1, icn } *totBel = *e_written-eVolElm; } -void writeCGNSboundary(int F,int B,int Z, Output& o, int* srfID, int* srfIDidx, double** srfIDCen1, double** srfIDCen2, int* srfID1OnBlk, int* srfID2OnBlk, int* startBelBlk, int *endBelBlk, cgsize_t *e_written, cgsize_t *totBel, int nblkb) +void writeCGNS_UserData(int F,int B, int* srfID, int* startBelBlk, int *endBelBlk, cgsize_t *e_written, cgsize_t *totBel, cgsize_t *eVolElm, int nblkb) { -// srfID is for ALL Boundary faces - const int num_parts = PCU_Comm_Peers(); - const cgsize_t num_parts_cg=num_parts; - const int part = PCU_Comm_Self() ; - const cgsize_t part_cg=part; cgsize_t e_owned, e_start,e_end; int Fsb; - cgsize_t eVolElm = *e_written-*totBel; // setup User Data for boundary faces if ( cg_goto(F, B, "Zone_t", 1, NULL) || cg_gorel(F, "User Data", 0, NULL) || cgp_array_write("srfID", CG_Integer, 1,totBel, &Fsb)) cgp_error_exit(); // write the user data for this process - *e_written=0; //recycling eVolElm holds for (int i = 0; i < nblkb; ++i) { - int e_startB=startBelBlk[i]-eVolElm-1; // srfID is only for bel....matches linear order with eVolElm offset from + int e_startB=startBelBlk[i]-*eVolElm-1; // srfID is only for bel....matches linear order with eVolElm offset from // bel# that starts from last volume element e_owned=endBelBlk[i]-startBelBlk[i]+1; e_start=0; @@ -675,64 +668,20 @@ void writeCGNSboundary(int F,int B,int Z, Output& o, int* srfID, int* srfIDidx, MPI_Exscan(&e_owned, &e_start, 1, type , MPI_SUM, MPI_COMM_WORLD); e_start+=1+*e_written; // my parts global element start 1-based e_end=e_start+e_owned-1; // my parts global element stop 1-based - printf("Bndy %s, %ld, %ld, %ld, %d, %d, %d \n", "srfID", e_start, e_end, e_owned, i, part,Fsb); + printf("Bndy %s, %ld, %ld, %ld, %d, %d \n", "srfID", e_start, e_end, e_owned, i, Fsb); if (cgp_array_write_data(Fsb, &e_start, &e_end, &srfID[e_startB])) cgp_error_exit(); long safeArg=e_owned; // is cgsize_t which could be an 32 or 64 bit int *e_written += PCU_Add_Long(safeArg); // number of elements of this topology } -// stack connectivities on rank before gather (should preserve order) - int* rcounts = (int *)malloc( num_parts * sizeof(int)); - int* displs = (int *)malloc( num_parts * sizeof(int)); - int numsurfID1onRank=0; - int numsurfID2onRank=0; - for (int i = 0; i < nblkb; ++i) numsurfID1onRank+=srfID1OnBlk[i]; - for (int i = 0; i < nblkb; ++i) numsurfID2onRank+=srfID2OnBlk[i]; - double* srfIDCen1AllBlocks = (double *)malloc(numsurfID1onRank*3 * sizeof(double)); - double* srfIDCen2AllBlocks = (double *)malloc(numsurfID2onRank*3 * sizeof(double)); - int k1=0; - int k2=0; - for (int i = 0; i < nblkb; ++i) { - for (int j = 0; j < srfID1OnBlk[i]*3; ++j) srfIDCen1AllBlocks[k1++]=srfIDCen1[i][j]; - for (int j = 0; j < srfID2OnBlk[i]*3; ++j) srfIDCen2AllBlocks[k2++]=srfIDCen2[i][j]; - } - int ncon=numsurfID1onRank*3; - auto type_i = getMpiType( int() ); - MPI_Allgather(&ncon,1,type_i,rcounts,1,type_i,MPI_COMM_WORLD); - displs[0]=0; - for (int i = 1; i < num_parts; ++i) displs[i]=displs[i-1]+rcounts[i-1]; -if(1==0){ printf("displs1 %d ",part);for(int ip=0; ip< num_parts; ++ip) printf("% ld ", displs[ip]); printf("\n"); } - int GsrfID1cnt=displs[num_parts-1]+rcounts[num_parts-1]; -if(1==0){ printf("Stack1 %d %d, %d, %d, %d, %d\n",part, GsrfID1cnt, ncon, nblkb, numsurfID1onRank, numsurfID2onRank);} - double* srfID1GCen = (double *)malloc( GsrfID1cnt * sizeof(double)); - auto type_d = getMpiType( double() ); - MPI_Allgatherv(srfIDCen1AllBlocks,ncon,type_d,srfID1GCen,rcounts,displs,type_d,MPI_COMM_WORLD); -// srfID=2 repeats - ncon=numsurfID2onRank*3; - MPI_Allgather(&ncon,1,type_i,rcounts,1,type_i,MPI_COMM_WORLD); - displs[0]=0; - for (int i = 1; i < num_parts; ++i) displs[i]=displs[i-1]+rcounts[i-1]; -if(1==0){ printf("displs2 %d ",part);for(int ip=0; ip< num_parts; ++ip) printf("% ld ", displs[ip]); printf("\n"); } - int GsrfID2cnt=displs[num_parts-1]+rcounts[num_parts-1]; -if(1==0){ printf("Stack2 %d %d, %d, %d, %d, %d\n",part, GsrfID2cnt, ncon, nblkb, numsurfID1onRank, numsurfID2onRank);} - assert(GsrfID1cnt==GsrfID2cnt); - int nmatchFace=GsrfID1cnt/3; - double* srfID2GCen = (double *)malloc( GsrfID2cnt * sizeof(double)); - MPI_Allgatherv(srfIDCen2AllBlocks,ncon,type_d,srfID2GCen,rcounts,displs,type_d,MPI_COMM_WORLD); - const float Lz=abs(srfID2GCen[2]-srfID1GCen[2]); -if(1==0){ printf("%d part srfID 1 xc ",part); for(int ip=0; ip< nmatchFace; ++ip) printf("%f ", srfID1GCen[ip*3+0]); printf("\n"); } -if(1==0){ printf("%d part srfID 1 yc ",part); for(int ip=0; ip< nmatchFace; ++ip) printf("%f ", srfID1GCen[ip*3+1]); printf("\n"); } -if(1==0){ printf("%d part srfID 1 zc ",part); for(int ip=0; ip< nmatchFace; ++ip) printf("%f ", srfID1GCen[ip*3+2]); printf("\n"); } - PCU_Barrier(); -if(1==0){ printf("%d part srfID 2 xc ",part); for(int ip=0; ip< nmatchFace; ++ip) printf("%f ", srfID2GCen[ip*3+0]); printf("\n"); } -if(1==0){ printf("%d part srfID 2 yc ",part); for(int ip=0; ip< nmatchFace; ++ip) printf("%f ", srfID2GCen[ip*3+1]); printf("\n"); } -if(1==0){ printf("%d part srfID 2 zc ",part); for(int ip=0; ip< nmatchFace; ++ip) printf("%f ", srfID2GCen[ip*3+2]); printf("\n"); } - free(srfIDCen1AllBlocks); free(srfIDCen2AllBlocks); + +} +void sortID1andID2(double* srfID1GCen,double* srfID2GCen, int nmatchFace, int* imapD1, int*imapD2) +{ + int* imapD2v = (int *)malloc( nmatchFace * sizeof(int)); double* srfID1distSq = (double *)malloc( nmatchFace * sizeof(double)); double* srfID2distSq = (double *)malloc( nmatchFace * sizeof(double)); - int* imapD1 = (int *)malloc( nmatchFace * sizeof(int)); - int* imapD2 = (int *)malloc( nmatchFace * sizeof(int)); - int* imapD2v = (int *)malloc( nmatchFace * sizeof(int)); + const int part = PCU_Comm_Self() ; double xc=10; // true cubes with uniform meshes set up ties (good for debugging/verifying that dumb search backup works) for (int i = 0; i < nmatchFace; ++i) { srfID1distSq[i]=(srfID1GCen[i*3+0]-xc)*(srfID1GCen[i*3+0]-xc) @@ -800,9 +749,67 @@ if(1==0){ printf("%d part srfID 2 zc ",part); for(int ip=0; ip< nmatchFace; ++i printf(" imapD1 GLOBAL "); for(int is=0; is< nmatchFace; ++is) printf("%d ", imapD1[is]); printf("\n"); printf(" srfID2dist GLOBAL "); for(int is=0; is< nmatchFace; ++is) printf("%f ", srfID2distSq[is]); printf("\n"); printf(" imapD2 GLOBAL "); for(int is=0; is< nmatchFace; ++is) printf("%d ", imapD2[is]); printf("\n"); } - free(srfID1GCen); free(srfID2GCen); free(srfID1distSq); free(srfID2distSq); free(imapD2v); +} +void gatherCentroid(double** srfIDCen,int* srfIDOnBlk, double** srfIDGCen, int *nmatchFace, int nblkb) +{ +// stack connectivities on rank before gather (should preserve order) + const int num_parts = PCU_Comm_Peers(); + int* rcounts = (int *)malloc( num_parts * sizeof(int)); + int* displs = (int *)malloc( num_parts * sizeof(int)); + int numSurfIDOnRank=0; + for (int i = 0; i < nblkb; ++i) numSurfIDOnRank+=srfIDOnBlk[i]; + double* srfIDCenAllBlocks = (double *)malloc(numSurfIDOnRank*3 * sizeof(double)); + int k1=0; + for (int i = 0; i < nblkb; ++i) + for (int j = 0; j < srfIDOnBlk[i]*3; ++j) srfIDCenAllBlocks[k1++]=srfIDCen[i][j]; + int ncon=numSurfIDOnRank*3; + auto type_i = getMpiType( int() ); + MPI_Allgather(&ncon,1,type_i,rcounts,1,type_i,MPI_COMM_WORLD); + displs[0]=0; + for (int i = 1; i < num_parts; ++i) displs[i]=displs[i-1]+rcounts[i-1]; + int GsrfIDcnt=displs[num_parts-1]+rcounts[num_parts-1]; + *nmatchFace=GsrfIDcnt/3; + *srfIDGCen = (double *)malloc( GsrfIDcnt * sizeof(double)); +if(1==0){ printf("displs1 ");for(int ip=0; ip< num_parts; ++ip) printf("% ld ", displs[ip]); printf("\n"); } + auto type_d = getMpiType( double() ); + MPI_Allgatherv(srfIDCenAllBlocks,ncon,type_d,*srfIDGCen,rcounts,displs,type_d,MPI_COMM_WORLD); + free(srfIDCenAllBlocks); +} + +void writeCGNSboundary(int F,int B,int Z, Output& o, int* srfID, int* srfIDidx, double** srfIDCen1, double** srfIDCen2, int* srfID1OnBlk, int* srfID2OnBlk, int* startBelBlk, int *endBelBlk, cgsize_t *e_written, cgsize_t *totBel, int nblkb) +{ +// srfID is for ALL Boundary faces + const int num_parts = PCU_Comm_Peers(); + const cgsize_t num_parts_cg=num_parts; + const int part = PCU_Comm_Self() ; + const cgsize_t part_cg=part; + int* rcounts = (int *)malloc( num_parts * sizeof(int)); + int* displs = (int *)malloc( num_parts * sizeof(int)); + cgsize_t e_owned, e_start,e_end; + int Fsb; + cgsize_t eVolElm = *e_written-*totBel; + *e_written=0; //recycling eVolElm holds + writeCGNS_UserData(F,B, srfID, startBelBlk, endBelBlk, e_written, totBel, &eVolElm, nblkb); + double* srfID1GCen; + double* srfID2GCen; + int nmatchFace1,nmatchFace; + gatherCentroid(srfIDCen1,srfID1OnBlk,&srfID1GCen,&nmatchFace1, nblkb); + gatherCentroid(srfIDCen2,srfID2OnBlk,&srfID2GCen,&nmatchFace, nblkb); + assert(nmatchFace1==nmatchFace); + const float Lz=abs(srfID2GCen[2]-srfID1GCen[2]); +if(1==0){ printf("%d part srfID 1 xc ",part); for(int ip=0; ip< nmatchFace; ++ip) printf("%f ", srfID1GCen[ip*3+0]); printf("\n"); } +if(1==0){ printf("%d part srfID 1 yc ",part); for(int ip=0; ip< nmatchFace; ++ip) printf("%f ", srfID1GCen[ip*3+1]); printf("\n"); } +if(1==0){ printf("%d part srfID 1 zc ",part); for(int ip=0; ip< nmatchFace; ++ip) printf("%f ", srfID1GCen[ip*3+2]); printf("\n"); } + PCU_Barrier(); +if(1==0){ printf("%d part srfID 2 xc ",part); for(int ip=0; ip< nmatchFace; ++ip) printf("%f ", srfID2GCen[ip*3+0]); printf("\n"); } +if(1==0){ printf("%d part srfID 2 yc ",part); for(int ip=0; ip< nmatchFace; ++ip) printf("%f ", srfID2GCen[ip*3+1]); printf("\n"); } +if(1==0){ printf("%d part srfID 2 zc ",part); for(int ip=0; ip< nmatchFace; ++ip) printf("%f ", srfID2GCen[ip*3+2]); printf("\n"); } + int* imapD1 = (int *)malloc( nmatchFace * sizeof(int)); + int* imapD2 = (int *)malloc( nmatchFace * sizeof(int)); + sortID1andID2(srfID1GCen,srfID2GCen,nmatchFace, imapD1, imapD2); + free(srfID1GCen); free(srfID2GCen); // ZonalBC data int* srfIDG = (int *)malloc( *totBel * sizeof(int)); int* srfIDGidx = (int *)malloc( *totBel * sizeof(int)); @@ -812,6 +819,7 @@ if(1==0){ printf("%d part srfID 2 zc ",part); for(int ip=0; ip< nmatchFace; ++i int totOnRankBel=0; for (int i = 0; i < nblkb; ++i) totOnRankBel += o.blocks.boundary.nElements[i]; + auto type_i = getMpiType( int() ); MPI_Allgather(&totOnRankBel,1,type_i,rcounts,1,type_i,MPI_COMM_WORLD); displs[0]=0; for (int i = 1; i < num_parts; ++i) displs[i]=displs[i-1]+rcounts[i-1]; @@ -834,7 +842,6 @@ if(1==0){ if(part==0) { cgsize_t* eBC = (cgsize_t *)malloc(*totBel * sizeof(cgsize_t)); for (int BCid = 1; BCid < 7; BCid++) { int imatch=0; -// valgrind likes this? for (int ib = BC_scan; ib < *totBel; ib++) { if(srfIDG[ib]==BCid){ eBC[imatch]=srfIDGidx[BC_scan]; @@ -842,14 +849,6 @@ if(1==0){ if(part==0) { imatch++; } else break; } - -/* works but valgrind no likey - while (srfIDG[BC_scan]==BCid&&BC_scan<*totBel) { - eBC[imatch]=srfIDGidx[BC_scan]; - BC_scan++; - imatch++; - } -*/ //reorder SurfID = 1 and 2 using idmapD{1,2} based on distance to support periodicity if(BCid==1) { for (int i = 0; i < nmatchFace; i++) periodic1[i]=eBC[imapD1[i]]; @@ -885,6 +884,63 @@ if(0==1) { free(imapD1); free(imapD2); free(eBC); free(srfIDG); free(srfIDGidx); } +void CGNS_NodalSolution(int F,int B,int Z, Output& o) +{ + // create a nodal solution + char fieldName[12]; + snprintf(fieldName, 13, "solution"); + printf("solution=%s",fieldName); + double* data; + int size, S,Q; + detachField(o.mesh, fieldName, data, size); + assert(size==5); + +// create the field data for this process + double* p = (double *)malloc(o.iownnodes * sizeof(double)); + double* u = (double *)malloc(o.iownnodes * sizeof(double)); + double* v = (double *)malloc(o.iownnodes * sizeof(double)); + double* w = (double *)malloc(o.iownnodes * sizeof(double)); + double* T = (double *)malloc(o.iownnodes * sizeof(double)); + int icount=0; + int num_nodes=o.mesh->count(0); + cgsize_t gnod,start,end; + start=o.local_start_id; + end=start+o.iownnodes-1; + for (int n = 0; n < num_nodes; n++) { + gnod=o.arrays.ncorp[n]; + if(gnod >= start && gnod <= end) { // solution to write + p[icount]= data[0*num_nodes+n]; + u[icount]= data[1*num_nodes+n]; + v[icount]= data[2*num_nodes+n]; + w[icount]= data[3*num_nodes+n]; + T[icount]= data[4*num_nodes+n]; + icount++; + } + } +// write the solution field data in parallel + if (cg_sol_write(F, B, Z, "Solution", CG_Vertex, &S) || + cgp_field_write(F, B, Z, S, CG_RealDouble, "Pressure", &Q)) + cgp_error_exit(); + if (cgp_field_write_data(F, B, Z, S, Q, &start, &end, p)) + cgp_error_exit(); + if ( cgp_field_write(F, B, Z, S, CG_RealDouble, "VelocityX", &Q)) + cgp_error_exit(); + if (cgp_field_write_data(F, B, Z, S, Q, &start, &end, u)) + cgp_error_exit(); + if ( cgp_field_write(F, B, Z, S, CG_RealDouble, "VelocityY", &Q)) + cgp_error_exit(); + if (cgp_field_write_data(F, B, Z, S, Q, &start, &end, v)) + cgp_error_exit(); + if ( cgp_field_write(F, B, Z, S, CG_RealDouble, "VelocityZ", &Q)) + cgp_error_exit(); + if (cgp_field_write_data(F, B, Z, S, Q, &start, &end, w)) + cgp_error_exit(); + if ( cgp_field_write(F, B, Z, S, CG_RealDouble, "Temperature", &Q)) + cgp_error_exit(); + if (cgp_field_write_data(F, B, Z, S, Q, &start, &end, T)) + cgp_error_exit(); + free(p); free(u); free(v); free(w); free(T); free(data); +} void writeCGNS(Output& o, std::string path) { @@ -897,8 +953,6 @@ void writeCGNS(Output& o, std::string path) std::string timestep_or_dat; static char outfile[] = "chefOut.cgns"; int F, B, Z, E, S, Fs, Fs2, A, Cx, Cy, Cz; - int Fp, Fu, Fv, Fw, FT; - int Sp, Su, Sv, Sw, ST; cgsize_t sizes[3],*e, start, end; int num_nodes=m->count(0); @@ -985,56 +1039,7 @@ if(0==1) { if(j==2) if(cgp_coord_write_data(F, B, Z, Cz, &start, &end, x)) cgp_error_exit(); } free (x); - // create a nodal solution - char fieldName[12]; - snprintf(fieldName, 13, "solution"); - printf("solution=%s",fieldName); - double* data; - int size; - detachField(o.mesh, fieldName, data, size); - assert(size==5); - -// create the field data for this process - double* p = (double *)malloc(o.iownnodes * sizeof(double)); - double* u = (double *)malloc(o.iownnodes * sizeof(double)); - double* v = (double *)malloc(o.iownnodes * sizeof(double)); - double* w = (double *)malloc(o.iownnodes * sizeof(double)); - double* T = (double *)malloc(o.iownnodes * sizeof(double)); - int icount=0; - for (int n = 0; n < num_nodes; n++) { - gnod=o.arrays.ncorp[n]; - if(gnod >= start && gnod <= end) { // solution to write - p[icount]= data[0*num_nodes+n]; - u[icount]= data[1*num_nodes+n]; - v[icount]= data[2*num_nodes+n]; - w[icount]= data[3*num_nodes+n]; - T[icount]= data[4*num_nodes+n]; - icount++; - } - } -// write the solution field data in parallel - if (cg_sol_write(F, B, Z, "Solution", CG_Vertex, &Sp) || - cgp_field_write(F, B, Z, Sp, CG_RealDouble, "Pressure", &Fp)) - cgp_error_exit(); - if (cgp_field_write_data(F, B, Z, Sp, Fp, &start, &end, p)) - cgp_error_exit(); - if ( cgp_field_write(F, B, Z, Sp, CG_RealDouble, "VelocityX", &Fu)) - cgp_error_exit(); - if (cgp_field_write_data(F, B, Z, Sp, Fu, &start, &end, u)) - cgp_error_exit(); - if ( cgp_field_write(F, B, Z, Sp, CG_RealDouble, "VelocityY", &Fv)) - cgp_error_exit(); - if (cgp_field_write_data(F, B, Z, Sp, Fv, &start, &end, v)) - cgp_error_exit(); - if ( cgp_field_write(F, B, Z, Sp, CG_RealDouble, "VelocityZ", &Fw)) - cgp_error_exit(); - if (cgp_field_write_data(F, B, Z, Sp, Fw, &start, &end, w)) - cgp_error_exit(); - if ( cgp_field_write(F, B, Z, Sp, CG_RealDouble, "Temperature", &FT)) - cgp_error_exit(); - if (cgp_field_write_data(F, B, Z, Sp, FT, &start, &end, T)) - cgp_error_exit(); - free(p); free(u); free(v); free(w); free(T); free(data); + CGNS_NodalSolution(F,B,Z,o); // create Helper array for number of elements on rank if ( cg_goto(F, B, "Zone_t", 1, NULL) || cg_user_data_write("User Data") || @@ -1047,8 +1052,7 @@ if(0==1) { printf("Coor %d, %d, %d, \n", nCoordVec,part,Fs2); if ( cgp_array_write_data(Fs2, &partP1, &partP1, &nCoordVec)) cgp_error_exit(); -// if(o.writeCGNSFiles > 1) -// got split into 4 writeBlocksCGNS(F,B,Z, o); + cgsize_t e_written=0; cgsize_t totBel; writeBlocksCGNSinteror(F,B,Z,o,&e_written); From cb1131abcea4279a782fd1d178dd72ce37ba4853 Mon Sep 17 00:00:00 2001 From: "Kenneth E. Jansen" Date: Fri, 18 Aug 2023 18:43:20 -0600 Subject: [PATCH 49/68] helper functions broke up remaining code such that all functions fit in a 105 row vi window even with copious debug print statement. Valgrind also checked in for np=1 and 2 where no leaks are attributed to arrays we alloocate-- same for other errors which seems to be wholly within HDF5. --- phasta/phCGNSgbc.cc | 150 +++++++++++++++++++++++--------------------- 1 file changed, 78 insertions(+), 72 deletions(-) diff --git a/phasta/phCGNSgbc.cc b/phasta/phCGNSgbc.cc index 58c416e7d..bab28466e 100644 --- a/phasta/phCGNSgbc.cc +++ b/phasta/phCGNSgbc.cc @@ -48,7 +48,7 @@ MPI_Datatype getMpiType(T) { // https://www.geeksforgeeks.org/sorting-array-according-another-array-using-pair-stl/ // Sort an array according to -// other using pair in STL. +// other using pair in STL. Modified to be real-int pair (for distance matching) and in a separate routine, two integers (for idx sort by surfID) #include using namespace std; @@ -649,7 +649,7 @@ if(1==0){ printf("CentroidCounts %d %d %d %d %d %d %d %d\n",part,icnt1, icn } *totBel = *e_written-eVolElm; } -void writeCGNS_UserData(int F,int B, int* srfID, int* startBelBlk, int *endBelBlk, cgsize_t *e_written, cgsize_t *totBel, cgsize_t *eVolElm, int nblkb) +void writeCGNS_UserData_srfID(int F,int B, int* srfID, int* startBelBlk, int *endBelBlk, cgsize_t *e_written, cgsize_t *totBel, cgsize_t *eVolElm, int nblkb) { cgsize_t e_owned, e_start,e_end; int Fsb; @@ -660,7 +660,7 @@ void writeCGNS_UserData(int F,int B, int* srfID, int* startBelBlk, int *endBelB cgp_error_exit(); // write the user data for this process for (int i = 0; i < nblkb; ++i) { - int e_startB=startBelBlk[i]-*eVolElm-1; // srfID is only for bel....matches linear order with eVolElm offset from + int e_startB=0; //startBelBlk[i]-*eVolElm-1; // srfID is only for bel....matches linear order with eVolElm offset from // bel# that starts from last volume element e_owned=endBelBlk[i]-startBelBlk[i]+1; e_start=0; @@ -668,7 +668,7 @@ void writeCGNS_UserData(int F,int B, int* srfID, int* startBelBlk, int *endBelB MPI_Exscan(&e_owned, &e_start, 1, type , MPI_SUM, MPI_COMM_WORLD); e_start+=1+*e_written; // my parts global element start 1-based e_end=e_start+e_owned-1; // my parts global element stop 1-based - printf("Bndy %s, %ld, %ld, %ld, %d, %d \n", "srfID", e_start, e_end, e_owned, i, Fsb); + printf("BndyUserData %s, %ld, %ld, %ld, %d, %d %d \n", "srfID", e_start, e_end, e_owned, i, e_startB,*totBel); if (cgp_array_write_data(Fsb, &e_start, &e_end, &srfID[e_startB])) cgp_error_exit(); long safeArg=e_owned; // is cgsize_t which could be an 32 or 64 bit int @@ -701,8 +701,8 @@ void sortID1andID2(double* srfID1GCen,double* srfID2GCen, int nmatchFace, int* i printf(" srfID2dist GLOBAL B "); for(int is=0; is< nmatchFace; ++is) printf("%f ", srfID2distSq[is]); printf("\n"); printf(" imapD2 GLOBAL B "); for(int is=0; is< nmatchFace; ++is) printf("%d ", imapD2[is]); printf("\n"); } } - pairsortDI(srfID1distSq,imapD1,nmatchFace); - pairsortDI(srfID2distSq,imapD2,nmatchFace); + pairsortDI(srfID1distSq,imapD1,nmatchFace); // imapD1 puts elements with srfID=1 in order of increasing disatnce from pt 10, 0 0 + pairsortDI(srfID2distSq,imapD2,nmatchFace); // imapD1 puts elements with srfID=2 in order of increasing disatnce from pt 10, 0 0 if(1==0){ if(part==0) { printf(" srfID1dist GLOBAL "); for(int is=0; is< nmatchFace; ++is) printf("%f ", srfID1distSq[is]); printf("\n"); @@ -713,7 +713,6 @@ void sortID1andID2(double* srfID1GCen,double* srfID2GCen, int nmatchFace, int* i printf(" imapD2 GLOBAL "); for(int is=0; is< nmatchFace; ++is) printf("%d ", imapD2[is]); printf("\n"); } } - double tol=1.0e-12; double tol2=1.0e-14; int jclosest, iclose1, iclose2; double d1,d2,vDistSq,vDSmin; @@ -726,7 +725,7 @@ void sortID1andID2(double* srfID1GCen,double* srfID2GCen, int nmatchFace, int* i vDistSq= d1*d1+d2*d2; if(vDistSq < tol2) { imapD2v[i]=imapD2[i]; - } else {// Centroid for i-1 did not match-> search list srfID=2 list to find true match + } else {// Centroid for i did not match-> search list srfID=2 list to find true match vDSmin=vDistSq; DistFails++; for (int j = 0; j < nmatchFace; ++j) { // if this turns out to be taken a lot then it could be narrowed e.g. j=max(0,i-50), j< i+min(matchFace,i+50), @@ -737,8 +736,10 @@ void sortID1andID2(double* srfID1GCen,double* srfID2GCen, int nmatchFace, int* i if(vDistSqcount(0); + cgsize_t gnod; + cgsize_t start=o.local_start_id; + cgsize_t end=start+o.iownnodes-1; + double* x = (double *)malloc(o.iownnodes * sizeof(double)); + for (int j = 0; j < 3; ++j) { + int icount=0; + for (int inode = 0; inode < num_nodes; ++inode){ + gnod=o.arrays.ncorp[inode]; + if(gnod >= start && gnod <= end) { // coordinate to write + x[icount]= o.arrays.coordinates[j*num_nodes+inode]; + icount++; + } + } +if(0==1) { + printf("%ld, %ld \n", start, end); + for (int ne=0; necount(0); - if(1==0){ // ilwork debugging for (int ipart=0; ipart PETSc global node number (1-based) // o.iownnodes => nodes owned by this rank // o.local_start_id => this rank's first node number (1-based and also which must be a long long int) - long safeArg=o.iownnodes; // cgsize_t could be an int sizes[0]=PCU_Add_Long(safeArg); int ncells=m->count(m->getDimension()); // this ranks number of elements @@ -1009,36 +1045,7 @@ if(1==0){ cgp_error_exit(); // create data nodes for coordinates cg_set_file_type(CG_FILE_HDF5); - - if (cgp_coord_write(F, B, Z, CG_RealDouble, "CoordinateX", &Cx) || - cgp_coord_write(F, B, Z, CG_RealDouble, "CoordinateY", &Cy) || - cgp_coord_write(F, B, Z, CG_RealDouble, "CoordinateZ", &Cz)) - cgp_error_exit(); - -// condense out vertices owned by another rank in a new array, x, whose slices are ready for CGNS. - cgsize_t gnod; - start=o.local_start_id; - end=start+o.iownnodes-1; - double* x = (double *)malloc(o.iownnodes * sizeof(double)); - for (int j = 0; j < 3; ++j) { - int icount=0; - for (int inode = 0; inode < num_nodes; ++inode){ - gnod=o.arrays.ncorp[inode]; - if(gnod >= start && gnod <= end) { // coordinate to write - x[icount]= o.arrays.coordinates[j*num_nodes+inode]; - icount++; - } - } -if(0==1) { - printf("%ld, %ld \n", start, end); - for (int ne=0; ne Date: Sat, 19 Aug 2023 13:01:05 -0600 Subject: [PATCH 50/68] Computing translation vector by difference of cenroids of first, ordered element centroids but I find documentation unclear so it might be negation. Also moved the pair sort data struction to heap (was on stack which is not nice for large meshes -> segfault with out increasing stack. Also limited debug prints with an extern defined variable (better way??) and few other cleanups of unused arrays. Eliminated output model as an argument (9) from matchedNodeElemReader becuase it is never used. --- phasta/phCGNSgbc.cc | 88 ++++++++++++++++++------------------ pumi-meshes | 2 +- test/matchedNodeElmReader.cc | 7 ++- 3 files changed, 49 insertions(+), 48 deletions(-) diff --git a/phasta/phCGNSgbc.cc b/phasta/phCGNSgbc.cc index bab28466e..996c97e62 100644 --- a/phasta/phCGNSgbc.cc +++ b/phasta/phCGNSgbc.cc @@ -23,6 +23,8 @@ #endif typedef int lcorp_t; #define NCORP_MPI_T MPI_INTEGER +extern cgsize_t nDbgCG=50; +extern int nDbgI=50; namespace { @@ -56,7 +58,7 @@ using namespace std; // according to the order defined by a[] void pairsortDI(double a[], int b[], int n) { - pair pairt[n]; + pair *pairt = new pair[n]; // when done delete pairt; // Storing the respective array // elements in pairs. @@ -75,13 +77,14 @@ void pairsortDI(double a[], int b[], int n) a[i] = pairt[i].first; b[i] = pairt[i].second; } + delete pairt; } // Function to sort integer array b[] // according to the order defined by a[] void pairsort(int a[], int b[], int n) { - pair pairt[n]; + pair *pairt = new pair[n]; // Storing the respective array // elements in pairs. @@ -100,6 +103,7 @@ void pairsort(int a[], int b[], int n) a[i] = pairt[i].first; b[i] = pairt[i].second; } + delete pairt; } void pairDeal6sort(int a[], int b[], int n) { @@ -515,7 +519,7 @@ void writeBlocksCGNSinteror(int F,int B,int Z, Output& o, cgsize_t *e_written) if(1==0){ printf("interior cnn %d, %ld, %ld \n", part, e_start, e_end); - for (int ne=0; ne " " " " " - " " "turn off verify mesh if equal 1 (on if you give nothing)\n", argv[0]); } @@ -740,14 +739,14 @@ int main(int argc, char** argv) gmi_register_mesh(); gmi_register_null(); - if( argc == 11 ) noVerify=atoi(argv[10]); + if( argc == 11 ) noVerify=atoi(argv[9]); double t0 = PCU_Time(); MeshInfo m; readMesh(argv[2],argv[3],argv[4],argv[5],argv[6],argv[7],argv[8],m); bool isMatched = true; - if( !strcmp(argv[3], "NULL") ) + if( !strcmp(argv[4], "NULL") ) isMatched = false; if(!PCU_Comm_Self()) @@ -795,7 +794,7 @@ int main(int argc, char** argv) outMap.clear(); apf::writeVtkFiles("rendered",mesh); - mesh->writeNative(argv[10]); + mesh->writeNative(argv[9]); if(noVerify != 1) mesh->verify(); mesh->destroyNative(); From 449025c61eb46191650868f0bf6d0df289f1b4ab Mon Sep 17 00:00:00 2001 From: "Kenneth E. Jansen" Date: Sat, 19 Aug 2023 14:33:15 -0600 Subject: [PATCH 51/68] Centroid sort now only done on rank 0 and results bcast back toother ranks since CGNS requires its serial writers to all have the same information --- phasta/phCGNSgbc.cc | 54 +++++++++++++++++++++++++++++++++++++++------ 1 file changed, 47 insertions(+), 7 deletions(-) diff --git a/phasta/phCGNSgbc.cc b/phasta/phCGNSgbc.cc index 996c97e62..cc188d4d0 100644 --- a/phasta/phCGNSgbc.cc +++ b/phasta/phCGNSgbc.cc @@ -757,6 +757,32 @@ void sortID1andID2(double* srfID1GCen,double* srfID2GCen, int nmatchFace, int* i free(srfID1distSq); free(srfID2distSq); free(imapD2v); } +void GatherCentroid(double** srfIDCen,int* srfIDOnBlk, double** srfIDGCen, int *nmatchFace, int nblkb) +{ +// stack connectivities on rank before gather (should preserve order) + const int num_parts = PCU_Comm_Peers(); + const int part = PCU_Comm_Self() ; + int* rcounts = (int *)malloc( num_parts * sizeof(int)); + int* displs = (int *)malloc( num_parts * sizeof(int)); + int numSurfIDOnRank=0; + for (int i = 0; i < nblkb; ++i) numSurfIDOnRank+=srfIDOnBlk[i]; + double* srfIDCenAllBlocks = (double *)malloc(numSurfIDOnRank*3 * sizeof(double)); + int k1=0; + for (int i = 0; i < nblkb; ++i) + for (int j = 0; j < srfIDOnBlk[i]*3; ++j) srfIDCenAllBlocks[k1++]=srfIDCen[i][j]; + int ncon=numSurfIDOnRank*3; + auto type_i = getMpiType( int() ); + MPI_Gather(&ncon,1,type_i,rcounts,1,type_i,0,MPI_COMM_WORLD); + displs[0]=0; + for (int i = 1; i < num_parts; ++i) displs[i]=displs[i-1]+rcounts[i-1]; + int GsrfIDcnt=displs[num_parts-1]+rcounts[num_parts-1]; + *nmatchFace=GsrfIDcnt/3; + if(part==0) *srfIDGCen = (double *)malloc( GsrfIDcnt * sizeof(double)); +if(1==0){ printf("displs1 ");for(int ip=0; ip< num_parts; ++ip) printf("% ld ", displs[ip]); printf("\n"); } + auto type_d = getMpiType( double() ); + MPI_Gatherv(srfIDCenAllBlocks,ncon,type_d,*srfIDGCen,rcounts,displs,type_d,0, MPI_COMM_WORLD); + free(srfIDCenAllBlocks); +} void AllgatherCentroid(double** srfIDCen,int* srfIDOnBlk, double** srfIDGCen, int *nmatchFace, int nblkb) { // stack connectivities on rank before gather (should preserve order) @@ -784,8 +810,8 @@ if(1==0){ printf("displs1 ");for(int ip=0; ip< num_parts; ++ip) printf("% ld ", } void Allgather2IntAndSort(int* srfID, int* srfIDidx,Output& o,int* srfIDG, int* srfIDGidx, int nblkb) { - const int num_parts = PCU_Comm_Peers(); const int part = PCU_Comm_Self() ; + const int num_parts = PCU_Comm_Peers(); const cgsize_t part_cg=part; int* rcounts = (int *)malloc( num_parts * sizeof(int)); int* displs = (int *)malloc( num_parts * sizeof(int)); @@ -827,11 +853,15 @@ void writeCGNSboundary(int F,int B,int Z, Output& o, int* srfID, int* srfIDidx, double* srfID1GCen; double* srfID2GCen; int nmatchFace1,nmatchFace; - AllgatherCentroid(srfIDCen1,srfID1OnBlk,&srfID1GCen,&nmatchFace1, nblkb); - AllgatherCentroid(srfIDCen2,srfID2OnBlk,&srfID2GCen,&nmatchFace, nblkb); - assert(nmatchFace1==nmatchFace); +// AllgatherCentroid(srfIDCen1,srfID1OnBlk,&srfID1GCen,&nmatchFace1, nblkb); +// AllgatherCentroid(srfIDCen2,srfID2OnBlk,&srfID2GCen,&nmatchFace, nblkb); + GatherCentroid(srfIDCen1,srfID1OnBlk,&srfID1GCen,&nmatchFace1, nblkb); + GatherCentroid(srfIDCen2,srfID2OnBlk,&srfID2GCen,&nmatchFace, nblkb); + if(part==0) printf("matchface %d, %d", nmatchFace1, nmatchFace); + if(part==0) assert(nmatchFace1==nmatchFace); // compute the translation while we still have ordered centroids data Assuming Translation = donor minus periodic but documents unclear - const float Translation[3]={ (srfID2GCen[0]-srfID1GCen[0]), (srfID2GCen[1]-srfID1GCen[1]), (srfID2GCen[2]-srfID1GCen[2])}; + double TranslationD[3]; + if (part==0){ TranslationD[0]=srfID2GCen[0]-srfID1GCen[0]; TranslationD[1]=srfID2GCen[1]-srfID1GCen[1];TranslationD[2]=srfID2GCen[2]-srfID1GCen[2];} if(1==0){ printf("%d part srfID 1 xc ",part); for(int ip=0; ip< std::min(nDbgI,nmatchFace); ++ip) printf("%f ", srfID1GCen[ip*3+0]); printf("\n"); } if(1==0){ printf("%d part srfID 1 yc ",part); for(int ip=0; ip< std::min(nDbgI,nmatchFace); ++ip) printf("%f ", srfID1GCen[ip*3+1]); printf("\n"); } if(1==0){ printf("%d part srfID 1 zc ",part); for(int ip=0; ip< std::min(nDbgI,nmatchFace); ++ip) printf("%f ", srfID1GCen[ip*3+2]); printf("\n"); } @@ -839,10 +869,18 @@ if(1==0){ printf("%d part srfID 1 zc ",part); for(int ip=0; ip< std::min(nDbgI, if(1==0){ printf("%d part srfID 2 xc ",part); for(int ip=0; ip< std::min(nDbgI,nmatchFace); ++ip) printf("%f ", srfID2GCen[ip*3+0]); printf("\n"); } if(1==0){ printf("%d part srfID 2 yc ",part); for(int ip=0; ip< std::min(nDbgI,nmatchFace); ++ip) printf("%f ", srfID2GCen[ip*3+1]); printf("\n"); } if(1==0){ printf("%d part srfID 2 zc ",part); for(int ip=0; ip< std::min(nDbgI,nmatchFace); ++ip) printf("%f ", srfID2GCen[ip*3+2]); printf("\n"); } + auto type_i = getMpiType( int() ); + MPI_Bcast(&nmatchFace,1,type_i,0, MPI_COMM_WORLD); int* imapD1 = (int *)malloc( nmatchFace * sizeof(int)); int* imapD2 = (int *)malloc( nmatchFace * sizeof(int)); - sortID1andID2(srfID1GCen,srfID2GCen,nmatchFace, imapD1, imapD2); - free(srfID1GCen); free(srfID2GCen); + if(part==0) sortID1andID2(srfID1GCen,srfID2GCen,nmatchFace, imapD1, imapD2); + PCU_Barrier(); + printf("Barrier %d %d",part,nmatchFace); + MPI_Bcast(imapD1,nmatchFace,type_i,0, MPI_COMM_WORLD); + MPI_Bcast(imapD2,nmatchFace,type_i,0, MPI_COMM_WORLD); + auto type_d = getMpiType( double() ); + MPI_Bcast(TranslationD,3,type_d,0, MPI_COMM_WORLD); + if(part==0) {free(srfID1GCen); free(srfID2GCen);} // ZonalBC data int* srfIDG = (int *)malloc( *totBel * sizeof(int)); int* srfIDGidx = (int *)malloc( *totBel * sizeof(int)); @@ -890,6 +928,8 @@ if(0==1) { CGNS_ENUMV(Integer), nmatchFace, donor2, &cgconn)) cgp_error_exit(); const float RotationCenter[3]={0}; const float RotationAngle[3]={0}; + const float Translation[3]={TranslationD[0],TranslationD[2],TranslationD[2]}; + if (cg_conn_periodic_write(F, B, Z, cgconn, RotationCenter, RotationAngle, Translation)) cgp_error_exit(); free(imapD1); free(imapD2); free(eBC); free(srfIDG); free(srfIDGidx); From 1c8507d7e9478bccb4b34ad9d0fb19a2fbcc5b08 Mon Sep 17 00:00:00 2001 From: "Kenneth E. Jansen" Date: Mon, 21 Aug 2023 21:54:53 -0600 Subject: [PATCH 52/68] incomplete fix of matchedNodeElmReader...forgot to change the expected argument count. --- phasta/phCGNSgbc.cc | 2 +- test/matchedNodeElmReader.cc | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/phasta/phCGNSgbc.cc b/phasta/phCGNSgbc.cc index cc188d4d0..847413b98 100644 --- a/phasta/phCGNSgbc.cc +++ b/phasta/phCGNSgbc.cc @@ -517,7 +517,7 @@ void writeBlocksCGNSinteror(int F,int B,int Z, Output& o, cgsize_t *e_written) cgp_error_exit(); *e_written=e_endg; // update count of elements written -if(1==0){ +if(1==1){ printf("interior cnn %d, %ld, %ld \n", part, e_start, e_end); for (int ne=0; ne no rank but .rank added to next 6 " " " From 61d2cbbbf42e9304512996fb8b7af03261661f02 Mon Sep 17 00:00:00 2001 From: "Kenneth E. Jansen" Date: Tue, 22 Aug 2023 00:01:57 -0600 Subject: [PATCH 53/68] hacky way of handling multiple toplogy wrote connectivity without hanging but must be done better --- phasta/phCGNSgbc.cc | 70 ++++++++++++++++++++++++++++++++++++++------- 1 file changed, 60 insertions(+), 10 deletions(-) diff --git a/phasta/phCGNSgbc.cc b/phasta/phCGNSgbc.cc index 847413b98..b97408a59 100644 --- a/phasta/phCGNSgbc.cc +++ b/phasta/phCGNSgbc.cc @@ -473,9 +473,33 @@ void writeBlocksCGNSinteror(int F,int B,int Z, Output& o, cgsize_t *e_written) if (cg_sol_write(F, B, Z, "RankOfWriter", CG_CellCenter, &S) || cgp_field_write(F, B, Z, S, CG_Integer, "RankOfWriter", &Fs)) cgp_error_exit(); - for (int i = 0; i < o.blocks.interior.getSize(); ++i) { + int nblki= o.blocks.interior.getSize(); + if(nblki==1) { // this part has only one toplogy + int nvert = o.blocks.interior.keys[0].nElementVertices; + if( nvert==4) {// need to make an empty wedge block + e_owned=0; + // cgsize_t* e = (cgsize_t *)malloc(nvert * 1 * sizeof(cgsize_t)); + e_startg=1+*e_written; // start for the elements of this topology + long safeArg=e_owned; // e_owned is cgsize_t which could be an 32 or 64 bit int + e_endg=*e_written + PCU_Add_Long(safeArg); // end for the elements of this topology + char Ename[5]; + snprintf(Ename, 4, "Wdg"); + if (cgp_section_write(F, B, Z, Ename, CG_PENTA_6, e_startg, e_endg, 0, &E)) + cgp_error_exit(); + e_start=0; + auto type = getMpiType( cgsize_t() ); + MPI_Exscan(&e_owned, &e_start, 1, type , MPI_SUM, MPI_COMM_WORLD); + e_start+=1+*e_written; // my parts global element start 1-based +// fail?? e_end=e_start+e_owned-1; // my parts global element stop 1-based + e_end=e_start; // my parts global element stop 1-based + // write the element connectivity in parallel + if (cgp_elements_write_data(F, B, Z, E, e_start, e_end, NULL)) + cgp_error_exit(); + } + //free(e); + } + for (int i = 0; i < nblki; ++i) { BlockKey& k = o.blocks.interior.keys[i]; - std::string phrase = getBlockKeyPhrase(k, "connectivity interior "); e_owned = o.blocks.interior.nElements[i]; int nvert = o.blocks.interior.keys[i].nElementVertices; cgsize_t* e = (cgsize_t *)malloc(nvert * e_owned * sizeof(cgsize_t)); @@ -519,13 +543,13 @@ void writeBlocksCGNSinteror(int F,int B,int Z, Output& o, cgsize_t *e_written) if(1==1){ printf("interior cnn %d, %ld, %ld \n", part, e_start, e_end); - for (int ne=0; ne Date: Wed, 23 Aug 2023 10:02:23 -0600 Subject: [PATCH 54/68] multi-topology hopefully handled properly now. Passes small tests but could still be corner cases laying in wait. --- phasta/phCGNSgbc.cc | 477 +++++++++++++++++++++++--------------------- 1 file changed, 250 insertions(+), 227 deletions(-) diff --git a/phasta/phCGNSgbc.cc b/phasta/phCGNSgbc.cc index b97408a59..1ff65654a 100644 --- a/phasta/phCGNSgbc.cc +++ b/phasta/phCGNSgbc.cc @@ -458,8 +458,54 @@ void getNaturalBCCodesCGNS(Output& o, int block, int* codes) // arbitrary combinations of BCs but leaving that out for now } -// renamed and calling the renamed functions above with output writes now to CGNS +void topoSwitch(char* Ename, int nvert,int F,int B,int Z,int *E, cgsize_t e_startg,cgsize_t e_endg) +{ + int Ep; + switch(nvert){ + case 4: + snprintf(Ename, 4, "Tet"); + if (cgp_section_write(F, B, Z, Ename, CG_TETRA_4, e_startg, e_endg, 0, &Ep)) + cgp_error_exit(); + break; + case 5: + snprintf(Ename, 4, "Pyr"); + if (cgp_section_write(F, B, Z, Ename, CG_PYRA_5, e_startg, e_endg, 0, &Ep)) + cgp_error_exit(); + break; + case 6: + snprintf(Ename, 4, "Wdg"); + if (cgp_section_write(F, B, Z, Ename, CG_PENTA_6, e_startg, e_endg, 0, &Ep)) + cgp_error_exit(); + break; + case 8: + snprintf(Ename, 4, "Hex"); + if (cgp_section_write(F, B, Z, Ename, CG_HEXA_8, e_startg, e_endg, 0, &Ep)) + cgp_error_exit(); + break; + } + printf("%d %d %d %s %ld %ld %d\n",F,B,Z,Ename,e_startg,e_endg,Ep); + *E=Ep; +} +void topoSwitchB(char* Ename, int nvert,int F,int B,int Z,int *E, cgsize_t e_startg,cgsize_t e_endg) +{ + int Ep; + switch(nvert){ + case 3: + snprintf(Ename, 4, "Tri"); + if (cgp_section_write(F, B, Z, Ename, CG_TRI_3, e_startg, e_endg, 0, &Ep)) + cgp_error_exit(); + break; + case 4: + snprintf(Ename, 5, "Quad"); + if (cgp_section_write(F, B, Z, Ename, CG_QUAD_4, e_startg, e_endg, 0, &Ep)) + cgp_error_exit(); + break; + } + printf("%d %d %d %s %ld %ld %d\n",F,B,Z,Ename,e_startg,e_endg,Ep); + *E=Ep; +} +// renamed and calling the renamed functions above with output writes now to CGNS void writeBlocksCGNSinteror(int F,int B,int Z, Output& o, cgsize_t *e_written) { int E,S,Fs,Fs2,Fsb,Fsb2; @@ -474,136 +520,85 @@ void writeBlocksCGNSinteror(int F,int B,int Z, Output& o, cgsize_t *e_written) cgp_field_write(F, B, Z, S, CG_Integer, "RankOfWriter", &Fs)) cgp_error_exit(); int nblki= o.blocks.interior.getSize(); - if(nblki==1) { // this part has only one toplogy - int nvert = o.blocks.interior.keys[0].nElementVertices; - if( nvert==4) {// need to make an empty wedge block - e_owned=0; - // cgsize_t* e = (cgsize_t *)malloc(nvert * 1 * sizeof(cgsize_t)); - e_startg=1+*e_written; // start for the elements of this topology - long safeArg=e_owned; // e_owned is cgsize_t which could be an 32 or 64 bit int - e_endg=*e_written + PCU_Add_Long(safeArg); // end for the elements of this topology - char Ename[5]; - snprintf(Ename, 4, "Wdg"); - if (cgp_section_write(F, B, Z, Ename, CG_PENTA_6, e_startg, e_endg, 0, &E)) - cgp_error_exit(); - e_start=0; - auto type = getMpiType( cgsize_t() ); - MPI_Exscan(&e_owned, &e_start, 1, type , MPI_SUM, MPI_COMM_WORLD); - e_start+=1+*e_written; // my parts global element start 1-based -// fail?? e_end=e_start+e_owned-1; // my parts global element stop 1-based - e_end=e_start; // my parts global element stop 1-based - // write the element connectivity in parallel - if (cgp_elements_write_data(F, B, Z, E, e_start, e_end, NULL)) - cgp_error_exit(); - } - //free(e); - } - for (int i = 0; i < nblki; ++i) { - BlockKey& k = o.blocks.interior.keys[i]; - e_owned = o.blocks.interior.nElements[i]; - int nvert = o.blocks.interior.keys[i].nElementVertices; - cgsize_t* e = (cgsize_t *)malloc(nvert * e_owned * sizeof(cgsize_t)); - getInteriorConnectivityCGNS(o, i, e); - // create data node for elements - e_startg=1+*e_written; // start for the elements of this topology - long safeArg=e_owned; // e_owned is cgsize_t which could be an 32 or 64 bit int - e_endg=*e_written + PCU_Add_Long(safeArg); // end for the elements of this topology - char Ename[5]; - switch(nvert){ - case 4: - snprintf(Ename, 4, "Tet"); - if (cgp_section_write(F, B, Z, Ename, CG_TETRA_4, e_startg, e_endg, 0, &E)) - cgp_error_exit(); - break; - case 5: - snprintf(Ename, 4, "Pyr"); - if (cgp_section_write(F, B, Z, Ename, CG_PYRA_5, e_startg, e_endg, 0, &E)) - cgp_error_exit(); + int nvMap[4] = {4,5,6,8}; + int nvC,nvert,nvAll,invC,iblkC; + for (int i = 0; i < 4; ++i) { // check all topologies + nvAll=0; + nvC=nvMap[i]; + for (int j = 0; j < nblki; ++j) { // check all blocks + BlockKey& k = o.blocks.interior.keys[j]; + nvert = o.blocks.interior.keys[j].nElementVertices; + if(nvC==nvert) { + invC=1; + iblkC=j; break; - case 6: - snprintf(Ename, 4, "Wdg"); - if (cgp_section_write(F, B, Z, Ename, CG_PENTA_6, e_startg, e_endg, 0, &E)) + } else invC=0; + } + nvAll= PCU_Add_Int(invC); // add across all + cgsize_t* e=NULL; // = (cgsize_t *)malloc(nvC * e_owned * sizeof(cgsize_t)); + if(nvAll!=0) { //nvC present on at least 1 rank + if(invC!=0){ //nvC present on my rank + e_owned = o.blocks.interior.nElements[iblkC]; + e = (cgsize_t *)malloc(nvC * e_owned * sizeof(cgsize_t)); + getInteriorConnectivityCGNS(o, iblkC, e); + } + else e_owned=0; + long safeArg=e_owned; // e_owned is cgsize_t which could be an 32 or 64 bit int + e_endg=*e_written + PCU_Add_Long(safeArg); // end for the elements of this topology + e_startg=1+*e_written; // start for the elements of this topology + char Ename[5]; + topoSwitch(Ename, nvC,F,B,Z,&E,e_startg,e_endg); + e_start=0; + auto type = getMpiType( cgsize_t() ); + MPI_Exscan(&e_owned, &e_start, 1, type , MPI_SUM, MPI_COMM_WORLD); + e_start+=1+*e_written; // my parts global element start 1-based + e_end=e_start+e_owned-1; // my parts global element stop 1-based + if (cgp_elements_write_data(F, B, Z, E, e_start, e_end, e)) + cgp_error_exit(); + *e_written=e_endg; + if(invC!=0) free(e); + // create the field data for this process + int* d = NULL; + if(invC!=0){ //nvC present on my rank +//KEN LEARN int* d = (int *)malloc(e_owned * sizeof(int)); + d = (int *)malloc(e_owned * sizeof(int)); + for (int n = 0; n < e_owned; n++) + d[n] = part; + // write the solution field data in parallel + } + if (cgp_field_write_data(F, B, Z, S, Fs, &e_start, &e_end, d)) + cgp_error_exit(); + if(invC!=0) free(d); + char UserDataName[11]; + snprintf(UserDataName, 11, "n%sOnRank", Ename); + // create Helper array for number of elements on part of a given topology + if ( cg_goto(F, B, "Zone_t", 1, NULL) || + cg_gorel(F, "User Data", 0, NULL) || + cgp_array_write(UserDataName, CG_Integer, 1, &num_parts_cg, &Fs2)) cgp_error_exit(); - break; - case 8: - snprintf(Ename, 4, "Hex"); - if (cgp_section_write(F, B, Z, Ename, CG_HEXA_8, e_startg, e_endg, 0, &E)) + // create the field data for this process + int nIelVec=e_owned; + cgsize_t partP1=part+1; + printf("Intr, %s, %d, %d, %d, %d \n", UserDataName, nIelVec,part,Fs,Fs2); + if ( cgp_array_write_data(Fs2, &partP1, &partP1, &nIelVec)) cgp_error_exit(); - break; - } - e_start=0; - auto type = getMpiType( cgsize_t() ); - MPI_Exscan(&e_owned, &e_start, 1, type , MPI_SUM, MPI_COMM_WORLD); - e_start+=1+*e_written; // my parts global element start 1-based - e_end=e_start+e_owned-1; // my parts global element stop 1-based - // write the element connectivity in parallel - if (cgp_elements_write_data(F, B, Z, E, e_start, e_end, e)) - cgp_error_exit(); - *e_written=e_endg; // update count of elements written if(1==1){ - printf("interior cnn %d, %ld, %ld \n", part, e_start, e_end); + printf("interior cnn %s %d %ld %ld \n", Ename,part, e_start, e_end); // for (int ne=0; ne 2) { int E,Fsb,Fsb2; const int num_parts = PCU_Comm_Peers(); const cgsize_t num_parts_cg=num_parts; @@ -613,97 +608,114 @@ void writeBlocksCGNSboundary(int F,int B,int Z, Output& o, int* srfID, int* srfI cgsize_t e_startg,e_endg; cgsize_t eVolElm=*e_written; cgsize_t e_belWritten=0; - int triCount=0; - int quadCount=0; - int totOnRankBel=0; - for (int i = 0; i < nblkb; ++i) - totOnRankBel += o.blocks.boundary.nElements[i]; - - for (int i = 0; i < o.blocks.boundary.getSize(); ++i) { - BlockKey& k = o.blocks.boundary.keys[i]; - e_owned = o.blocks.boundary.nElements[i]; - int nvert = o.blocks.boundary.keys[i].nBoundaryFaceEdges; - cgsize_t* e = (cgsize_t *)malloc(nvert * e_owned * sizeof(cgsize_t)); - double* eCenx = (double *)malloc( e_owned * sizeof(double)); - double* eCeny = (double *)malloc( e_owned * sizeof(double)); - double* eCenz = (double *)malloc( e_owned * sizeof(double)); - getBoundaryConnectivityCGNS(o, i, e,eCenx,eCeny,eCenz); - e_startg=1+*e_written; // start for the elements of this topology - long safeArg=e_owned; // e_owned is cgsize_t which could be an 32 or 64 bit int - cgsize_t numBelTP = PCU_Add_Long(safeArg); // number of elements of this topology - e_endg=*e_written + numBelTP; // end for the elements of this topology - if(nvert==3) triCount++; - if(nvert==4) quadCount++; - char Ename[7]; - switch(nvert){ - case 3: - snprintf(Ename, 5, "Tri%d",triCount); - if (cgp_section_write(F, B, Z, Ename, CG_TRI_3, e_startg, e_endg, 0, &E)) - cgp_error_exit(); - break; - case 4: - snprintf(Ename, 6, "Quad%d",quadCount); - if (cgp_section_write(F, B, Z, Ename, CG_QUAD_4, e_startg, e_endg, 0, &E)) - cgp_error_exit(); - break; + int nvMap[2] = {3,4}; + int iblkC[2]; + int estart[2]; + int nvC,nvert,nvAll,invC; + for (int j = 0; j < nblkb; ++j) { // check all blocks + BlockKey& k = o.blocks.boundary.keys[j]; + nvert = o.blocks.boundary.keys[j].nBoundaryFaceEdges; + } + for (int i = 0; i < 2; ++i) { // check all topologies + nvAll=0; + nvC=nvMap[i]; + invC=0; + int icountB=0; + for (int j = 0; j < nblkb; ++j) { // check all blocks + BlockKey& k = o.blocks.boundary.keys[j]; + nvert = o.blocks.boundary.keys[j].nBoundaryFaceEdges; + if(nvert==nvC) { + invC=1; + iblkC[icountB]=j; // mark the block numbers (could be more than one) that have current topology + icountB++; + } } - e_start=0; - auto type = getMpiType( cgsize_t() ); - MPI_Exscan(&e_owned, &e_start, 1, type , MPI_SUM, MPI_COMM_WORLD); - e_start+=1+*e_written; // my parts global element start 1-based - e_end=e_start+e_owned-1; // my parts global element stop 1-based - // write the element connectivity in parallel - if (cgp_elements_write_data(F, B, Z, E, e_start, e_end, e)) - cgp_error_exit(); - printf("boundary cnn %d, %ld, %ld \n", part, e_start, e_end); + nvAll= PCU_Add_Int(invC); // add across all + cgsize_t* e=NULL; double* eCenx=NULL; double* eCeny=NULL; double* eCenz=NULL; + if(nvAll!=0) { //nvC present on at least 1 rank + e_owned=0; + if(invC!=0){ //nvC present on my rank + for (int j = 0; j < icountB; ++j) { // combine blocks + estart[j]=e_owned; + e_owned += o.blocks.boundary.nElements[iblkC[j]]; + } + e = (cgsize_t *)malloc(nvC * e_owned * sizeof(cgsize_t)); + eCenx = (double *)malloc( e_owned * sizeof(double)); + eCeny = (double *)malloc( e_owned * sizeof(double)); + eCenz = (double *)malloc( e_owned * sizeof(double)); + for (int j = 0; j < icountB; ++j) {// combine blocks + getBoundaryConnectivityCGNS(o, iblkC[j], &e[estart[j]], &eCenx[estart[j]], + &eCeny[estart[j]], &eCenz[estart[j]]); // stack repeated topologies + getNaturalBCCodesCGNS(o, iblkC[j], &srfID[e_belWritten+estart[j]]); // note e_owned counts all same topo + } + (*nStackedOnRank)++; // no longer have nblkb blocks so count them as you stack them + } + e_startg=1+*e_written; // start for the elements of this topology + long safeArg=e_owned; // e_owned is cgsize_t which could be an 32 or 64 bit int + cgsize_t numBelTP = PCU_Add_Long(safeArg); // number of elements of this topology + e_endg=*e_written + numBelTP; // end for the elements of this topology + char Ename[6]; + topoSwitchB(Ename, nvC,F,B,Z,&E,e_startg,e_endg); + e_start=0; + auto type = getMpiType( cgsize_t() ); + MPI_Exscan(&e_owned, &e_start, 1, type , MPI_SUM, MPI_COMM_WORLD); + e_start+=1+*e_written; // my parts global element start 1-based + e_end=e_start+e_owned-1; // my parts global element stop 1-based + // write the element connectivity in parallel + if (cgp_elements_write_data(F, B, Z, E, e_start, e_end, e)) + cgp_error_exit(); + printf("boundary cnn %d, %ld, %ld \n", part, e_start, e_end); if(1==0){ for (int ne=0; ne 2) { int nblkb = o.blocks.boundary.getSize(); - double** srfIDCen1 = new double*[nblkb]; + double** srfIDCen1 = new double*[nblkb]; // might not all be used double** srfIDCen2 = new double*[nblkb]; int totOnRankBel=0; for (int i = 0; i < nblkb; ++i) @@ -1166,17 +1187,19 @@ if(1==0){ int* startBelBlk = (int *)malloc( nblkb * sizeof(int)); int* endBelBlk = (int *)malloc( nblkb * sizeof(int)); int* srfIDidx = (int *)malloc( totOnRankBel * sizeof(int)); - writeBlocksCGNSboundary(F,B,Z,o, srfID, srfIDidx, srfIDCen1, srfIDCen2, srfID1OnBlk, srfID2OnBlk, startBelBlk, endBelBlk, &e_written, &totBel, nblkb); - writeCGNSboundary (F,B,Z,o, srfID, srfIDidx, srfIDCen1, srfIDCen2, srfID1OnBlk, srfID2OnBlk, startBelBlk, endBelBlk, &e_written, &totBel, nblkb); + int nStackedOnRank; + writeBlocksCGNSboundary(F,B,Z,o, srfID, srfIDidx, srfIDCen1, srfIDCen2, srfID1OnBlk, srfID2OnBlk, startBelBlk, endBelBlk, &e_written, &totBel, &nStackedOnRank, nblkb); + writeCGNSboundary (F,B,Z,o, srfID, srfIDidx, srfIDCen1, srfIDCen2, srfID1OnBlk, srfID2OnBlk, startBelBlk, endBelBlk, &e_written, totOnRankBel, &totBel, nStackedOnRank); free(srfID); free(srfIDidx); free(srfID1OnBlk); free(srfID2OnBlk); free(startBelBlk); free(endBelBlk); - for (int i = 0; i < nblkb; ++i) delete [] srfIDCen1[i]; - for (int i = 0; i < nblkb; ++i) delete [] srfIDCen2[i]; + for (int i = 0; i < nStackedOnRank; ++i) delete [] srfIDCen1[i]; + for (int i = 0; i < nStackedOnRank; ++i) delete [] srfIDCen2[i]; delete [] srfIDCen1; delete [] srfIDCen2; if(cgp_close(F)) cgp_error_exit(); double t1 = PCU_Time(); if (!PCU_Comm_Self()) lion_oprint(1,"CGNS file written in %f seconds\n", t1 - t0); + } } } // namespace From 35426c66875d4db6b4157578ec6c15a9f4e42218 Mon Sep 17 00:00:00 2001 From: "Kenneth E. Jansen" Date: Fri, 25 Aug 2023 11:42:02 -0600 Subject: [PATCH 55/68] Adding a text file to describe developments, current assumptions/limitation, and future paths to improvement --- phasta/CGNSFileWritingDev.txt | 76 +++++++++++++++++++++++++++++++++++ pumi-meshes | 2 +- 2 files changed, 77 insertions(+), 1 deletion(-) create mode 100644 phasta/CGNSFileWritingDev.txt diff --git a/phasta/CGNSFileWritingDev.txt b/phasta/CGNSFileWritingDev.txt new file mode 100644 index 000000000..4441cf355 --- /dev/null +++ b/phasta/CGNSFileWritingDev.txt @@ -0,0 +1,76 @@ +CGNS output from Chef + + + +This document is to describe work done to get CGNS output from Chef. + + + +Before doing that, I am going to list EXPECTATIONS of CGNS and how they align or not with classic Chef/PHASTA vs. PETSc/CEED-PHASTA (defs rank=part=process?.I will use part) + +I) CGNS expects global numbering for mesh nodes and elements and that numbering MUST start from 1 (not zero). + +II) The global numbering of elements is inclusive of both volume elements and boundary elements and also inclusive of all topologies with the numbering-start determined by what order you write them to file (might not be a requirement but simplest way when streaming). + +III) If using parallel writing (which we will have to do for any realistic size mesh), the ownership of the writer must be exclusive (write no data you don?t own), continuous (no skipped global numbers), and linearly increasing with part number (e.g., rank0 starts from 1 and ends on nOwnedByRank0, rank1 starts from nOwnedByRank0+1 and ends on nOwnedByRank1+nOwnedbyRank0 and so on). + + + + + +Going to a separate enumeration to discuss how that translated to our work on that now: + +Starting with the most basic, CGNS has the concept of a Base. We keep life simple and only have 1 base. +CGNS has the concept of a Zone. Someday if we get into overset grids (not likely) we might have more but for now, we only support 1 zone. +Within a Zone we will always be type Unstructured and a few things must be described while others are optional. CGNS provides writer ?functions? cg_ or cgp_ and these have a structure that one function establishes the file-node in the file/database and then you are able call a second function to write the data at that node (this is a little bit like PHASTAIO?s notion of write/read header followed by write/read data). cg_ means all parallel processes must have identical data to write while cgp_ allows each process to write its portion of the data and CGNS collects (interpret collect as MPIO collective operations) that data within an HDF5 file. +Chef was co-developed with PHASTA to avoid global numbering and instead number from 0 to n_entity-1 on each rank when parallel and have separate data structures which tracked which rank owned a given entity and which ranks had remote copies of those entities. Chef created data structures for PHASTA to use to manage this partition-specific ownership. Thus, before we can write any parallel distributed data with the CGNS functions described in 3., we needed to create a map from PHASTA?s numbering to a numbering that satisfies I)-III). Since that global node numbering is basically the same as PETSc with a shift by 1, I copied code from PHASTA that did that for use withPETSc solvers (common/gen_ncorp.c) and modified it. That also needed functionality of commuInt.f to communicate ownership on part boundaries back to all the replicas on other parts (which in turn required a chunk of code from ctypes.f) translated to C. All of this code makes use of the ilwork data structure that helps PHASTA know how to setup and efficiently perform peer-to-peer communication. At the end of this code insertion/translation, we have an ncorp array that maps from PHASTA/Chef numbering to CGNS numbering on each part and thus we can start to now describe the arrays that are written. +CGNS of course has to store coordinates. It does so as flat double lists one dimension at a time so that means CoordinatesX then CoordinatesY, then CoordinatesZ for us. To be clear, to use the cgp mid-level functions to write these in parallel, PHASTA/Chef?s part coordinate list must be sifted down to just its owners using ncorp described in 4. and pass that compact ownership array satisfying I)-III) data through the cgp_write functions (both file-node creation and parallel data write). +Next in our output, though not absolutely required is Solution. Similarly to step 5., CGNS has a function to create a file-node for Solution and then you add as many fields as needed to that (currently I have only coded Pressure, VelocityX, VelocityY, VelocityZ, and Temperature. Note CGNS is a standard and they mandate the name of these and any additional fields we might want to add to this so read the docs. As with 5. these have to be sifted and mapped through ncorp to convert PHASTA/Chef?s numbering to a compact array that can be written in parallel using the cgp writers. Note, as of 4.4.0, it looks to be possible to aggregate the writes described in 5. and 6. through cgp_coord_multi_* and cgp_field_multi_* respectively but this has not been explored yet. +Next in the file is some User Data that was a backdoor to writing some data in parallel and to support parallel read with less work that I may describe more later but is not required by CGNS so skipping for now. +Next is a cell-centered solution file (that just means one value per 3D element or cell) that I put the RankOfWriterfield in. This is likely what the PETSc reader will use to understand the partition that Chef used to write this file and if that part-count matches the PETSc reader and solver, the file can be read and processed to derive all the parallel data structures PETSc/CEED-PHASTA?s need. CGNS issue filed to determine if they have a standard for this and if not interest in developing one. +Next is the first 3D element topology connectivity. Basically we create a separate node for each element topology, establish global numbering by rank (easy as there are no replicas of elements and thus the ownership range was established definitively by the partitioner and thus the ownership range just jumps by the number of that element type on a given part). If multi-topology, this repeats for the rest of the 3D element topologies. +Next is the first 2D boundary element topology which follows concepts of 9 for it and subsequent other 2D boundary element topologies. At this time we have elected to write all the elements of a given topology in a single CGNS file-node even if they are distributed across multiples geometric model surfaces (not the only option). Note, since ALL cgp and cg writes are collective, all ranks, even those without boundary elements (or interior elements of a given topology) must participate. Obviously the same for the MPI_Exscan, and PCU collectives. +It was decided/chose in the first pass to forgo writing ZonalBCs based on nodes in favor of writing them as mesh-sets (CGNS calls them PointLists abstracting the face numbers to the non-existent point at the centroid of the mesh face) which are face numbers with a particular surfID set in the smd (GUI if Simmetrix model-based) or spj (flat text file if working with a dmg model as we do with MATLAB->MGEN-MNER or SIMMETRIX->{MDLCONVERT,CONVERT(withExtrude)} ) workflows to get to chef inputs. PETSc will then parse these mesh-sets into DMLabels for the boundary of the mesh. Then, it will handle Dirichlet and Neumann boundary conditions as it normally does (based on yaml input as to what type of BC is on a particular surfID number). For now we have a rather rigid prototype code that is limited to processing and writing 6 distinct mesh sets (one for each of the 6 faces of our topological box). It should not be hard to extend and generalize this code but we took this shortcut in the first version of this code. CGNS clearly supports direct nodal/Dirichlet PointSet but we have CHOSEN not to pursue this in the first pass. +Last but certainly not least is a file-node called ZonalGridConnectivity which is how CGNS encodes periodic boundary conditions as can be seen in the first/only leaf under that file-node Periodic Connectivity. This has been setup rigidly to assume that the faces listed in PointList are ordered in the same way as the faces listed in PointListDonor and further that surfID=2 is the donor and surfID=1 is the periodic partner of the donor. This is again a shortcut or hardcoded link that assumes that the spj file has put surfID=1 on the face that is the periodic match for the face that it has surfID=2. These meshes obviously need to be matched meshes and this creates an issue we still need to resolve (will describe soon). The code currently computes the translation between the two periodic planes. I found the documentation unclear but assumed that vector was FROM the donor To the periodic plane. In the current inputs to the test codes the donor (surfID=2) is at zMax while the periodic plane (surfID=1) is as zMin so this makes the Translation[3]={0,0,-Lz} but that might be backwards and would certainly be flipped if I got the FROM/TO flipped (here Lz is unsigned as it is the spanwise domain width). I made the code general to use the first element with a surfID=2's centroid coordinates - the first element with a surfID=1's centroid coordinates (this picks up a y component of 1e-21 due to roundoff). + + +While the above is functional, the already mentioned ambiguity and the following issues/limitations remain unresolved: + + + +If we feed the current code a matched mesh ncorp will be computed incorrectly for every point that is on a part boundary that is also matched. The reason for this is that ilwork was set up for PHASTA?s needs and capabilities. As noted above PHASTA has replica nodes as REAL nodes (nodes with local node ranges) that it uses for all on-rank work and then the on-rank numbers do their parallel assembly with the true OWNER node which in this case is not the node they physically share on the periodic plane but instead ilwork sets up a communication with the donor for that node. Consequently, if we use the ilwork data structure as it is made for PHASTA, ncorp will map that node to a global node number on the donor plane. Again ilwork is only used in PHASTA for assembling equations so this is right for PHASTA but will foul PETSc by providing a connectivity that gives global node numbers with coordinates on the donor plane. +Currently ZonalBC does not support parallel BC writing (cg available but not cgp). James is working with the CGNS development group to develop cgp_ptset_* for reading/writing PointSet data (which is also used for ZonalGridConnectivity), but for now we are doing MPI_Allgather{v} operations so that cg is correct. Note it is Allgather and not Gather because CGNS does not let part 0 write in serial but instead requires all ranks to have the same data and all to call cg with that same data to have this work correctly. We are told by CGNS developers (and this seems like it has to be true) that only part=0 is actually writing but we have observed that any non-matching data on part!=0 results in a failure of cg. This is a potential scalability issue but seems likely to be addressed through the development of cgp for ZonalBCs. + + +Discussion of ISSUE 1) I just put this question to Jed and James in the GitHub PR but in doing so I think it is clear that CGNS does need global node numbers for the perioidic replicas of the owner/donor nodes. Thus we do need to do one of the following: + +A) Turn off matching (creates another conflict), + +B) suppress it during certain stages of chef?s work, or similarly + +C) alter the code to not add this type of mapping in ilwork. + + + +The conflict with A) might be limited to Simmetrix wedge-tet meshes that won?t match when there is an unstructured mesh region (like tets). TBH, on small meshes I can?t get matching to work anyway. This is what we are currently doing and this has forced us to re-discover matching through reordering the donor and periodic mesh sets to have their centroids match. To make this tractable, I did a MPI_Gather of the data to part 0 and did a serial sort there. This will eventually hit scalability issues (less of an issue for Q3 as they are 3^d more coarse than Q1 meshes) but still not pretty. If we could keep matching on AND we were able to disable it from ilwork so that we build the global numbering that CGNS wants (periodic-nodes/replicas have a global node number) then we MIGHT be able to use matching to order the periodic-mesh set to match the order of the donor-mesh-set in parallel and avoid this serial bottleneck. + + + +CWS and KEJ discussed the following organization of options: + +I) Use SCOREC/core matching + +II) Order periodic faces without use of SCOREC/core matching information + + + +I) further breaks into the following steps and branches + +To have matching available requires one of the following a) replication of ilwork to ilworkCGNS structure without accounting for periodic matching so that it will build an ncorp as if matching were not present or b) POSSIBLY if filterMatching flag is set, existing ilwork will create a PHASTA input set that is lacking periodicity and thus correct correct for CGNS and yet the matching information is saved and can be restored for use in CGNS code to determine matching +The second aspect is how to do that matching. Since inputs coming from matchedNodeElement reader ONLY have VERTEX matching. Here again 3 options are possible: a) make the matching check for matches of nodes through face connectivity, b) make inputs to MNER richer to include face matching (likely available during mesh generation), c) develop code within MNER to elevate matching information to edges and faces (as is done with classification though this is far easier due to classification being to a geometric model that is simple enough to be on all ranks AND model entities are far fewer and this is currently limited to extrusions anyway but so is periodicity so not really a limitation), + + +II) also breaks into at least 2 branches: + +distance of centroid collected to rank0 and sorted (current approach) which we will likely use as long as mesh size on periodic plane does not make this intractable. +OR breadth first search that starts with a single matched face (seed) and then finds adds neighboring faces to a list from which the next in some order (could be centroidal distance or other) is chosen. If mesh is matched, this ordering can proceed in parallel for both the donor and the periodic mesh set. When a face is added that touches a part boundary, existing part boundary adjacency information is used to continue the search on another rank. diff --git a/pumi-meshes b/pumi-meshes index 8b920cf7e..b7860281c 160000 --- a/pumi-meshes +++ b/pumi-meshes @@ -1 +1 @@ -Subproject commit 8b920cf7e0590befcce7a2af6e2c3f3ec6c89712 +Subproject commit b7860281c513fa44ee2047f7a3ad615b921d67cd From 2931f6bfb69e7435a8e94c6a3ea48f5730a82101 Mon Sep 17 00:00:00 2001 From: "Kenneth E. Jansen" Date: Sun, 27 Aug 2023 18:19:29 +0000 Subject: [PATCH 56/68] Enums neededed to be compatable with non-Spack builds --- phasta/phCGNSgbc.cc | 44 ++++++++++++++++++++++---------------------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/phasta/phCGNSgbc.cc b/phasta/phCGNSgbc.cc index 1ff65654a..b7d505a39 100644 --- a/phasta/phCGNSgbc.cc +++ b/phasta/phCGNSgbc.cc @@ -464,22 +464,22 @@ void topoSwitch(char* Ename, int nvert,int F,int B,int Z,int *E, cgsize_t e_star switch(nvert){ case 4: snprintf(Ename, 4, "Tet"); - if (cgp_section_write(F, B, Z, Ename, CG_TETRA_4, e_startg, e_endg, 0, &Ep)) + if (cgp_section_write(F, B, Z, Ename, CGNS_ENUMV(TETRA_4), e_startg, e_endg, 0, &Ep)) cgp_error_exit(); break; case 5: snprintf(Ename, 4, "Pyr"); - if (cgp_section_write(F, B, Z, Ename, CG_PYRA_5, e_startg, e_endg, 0, &Ep)) + if (cgp_section_write(F, B, Z, Ename, CGNS_ENUMV(PYRA_5), e_startg, e_endg, 0, &Ep)) cgp_error_exit(); break; case 6: snprintf(Ename, 4, "Wdg"); - if (cgp_section_write(F, B, Z, Ename, CG_PENTA_6, e_startg, e_endg, 0, &Ep)) + if (cgp_section_write(F, B, Z, Ename, CGNS_ENUMV(PENTA_6), e_startg, e_endg, 0, &Ep)) cgp_error_exit(); break; case 8: snprintf(Ename, 4, "Hex"); - if (cgp_section_write(F, B, Z, Ename, CG_HEXA_8, e_startg, e_endg, 0, &Ep)) + if (cgp_section_write(F, B, Z, Ename, CGNS_ENUMV(HEXA_8), e_startg, e_endg, 0, &Ep)) cgp_error_exit(); break; } @@ -492,12 +492,12 @@ void topoSwitchB(char* Ename, int nvert,int F,int B,int Z,int *E, cgsize_t e_sta switch(nvert){ case 3: snprintf(Ename, 4, "Tri"); - if (cgp_section_write(F, B, Z, Ename, CG_TRI_3, e_startg, e_endg, 0, &Ep)) + if (cgp_section_write(F, B, Z, Ename, CGNS_ENUMV(TRI_3), e_startg, e_endg, 0, &Ep)) cgp_error_exit(); break; case 4: snprintf(Ename, 5, "Quad"); - if (cgp_section_write(F, B, Z, Ename, CG_QUAD_4, e_startg, e_endg, 0, &Ep)) + if (cgp_section_write(F, B, Z, Ename, CGNS_ENUMV(QUAD_4), e_startg, e_endg, 0, &Ep)) cgp_error_exit(); break; } @@ -516,8 +516,8 @@ void writeBlocksCGNSinteror(int F,int B,int Z, Output& o, cgsize_t *e_written) const int part = PCU_Comm_Self() ; const cgsize_t part_cg=part; // create a centered solution - if (cg_sol_write(F, B, Z, "RankOfWriter", CG_CellCenter, &S) || - cgp_field_write(F, B, Z, S, CG_Integer, "RankOfWriter", &Fs)) + if (cg_sol_write(F, B, Z, "RankOfWriter", CGNS_ENUMV(CellCenter), &S) || + cgp_field_write(F, B, Z, S, CGNS_ENUMV(Integer), "RankOfWriter", &Fs)) cgp_error_exit(); int nblki= o.blocks.interior.getSize(); int nvMap[4] = {4,5,6,8}; @@ -574,7 +574,7 @@ void writeBlocksCGNSinteror(int F,int B,int Z, Output& o, cgsize_t *e_written) // create Helper array for number of elements on part of a given topology if ( cg_goto(F, B, "Zone_t", 1, NULL) || cg_gorel(F, "User Data", 0, NULL) || - cgp_array_write(UserDataName, CG_Integer, 1, &num_parts_cg, &Fs2)) + cgp_array_write(UserDataName, CGNS_ENUMV(Integer), 1, &num_parts_cg, &Fs2)) cgp_error_exit(); // create the field data for this process int nIelVec=e_owned; @@ -705,7 +705,7 @@ if(1==1){ printf("CentroidCounts %d %d %d %d %d %d %d %d\n",part,icnt1, icn char UserDataName[12]; snprintf(UserDataName, 13, "n%sOnRank", Ename); if ( cg_goto(F, B, "Zone_t", 1, NULL) || cg_gorel(F, "User Data", 0, NULL) || - cgp_array_write(UserDataName, CG_Integer, 1, &num_parts_cg, &Fsb2)) + cgp_array_write(UserDataName, CGNS_ENUMV(Integer), 1, &num_parts_cg, &Fsb2)) cgp_error_exit(); printf("Bndy %s, %ld, %d, %d \n", UserDataName, e_owned, part,Fsb2); cgsize_t partP1=part+1; @@ -722,7 +722,7 @@ void writeCGNS_UserData_srfID(int F,int B, int* srfID, int* startBelBlk, int *e // setup User Data for boundary faces if ( cg_goto(F, B, "Zone_t", 1, NULL) || cg_gorel(F, "User Data", 0, NULL) || - cgp_array_write("srfID", CG_Integer, 1,totBel, &Fsb)) + cgp_array_write("srfID", CGNS_ENUMV(Integer), 1,totBel, &Fsb)) cgp_error_exit(); // write the user data for this process int nvMap[2] = {3,4}; @@ -1038,24 +1038,24 @@ void CGNS_NodalSolution(int F,int B,int Z, Output& o) } } // write the solution field data in parallel - if (cg_sol_write(F, B, Z, "Solution", CG_Vertex, &S) || - cgp_field_write(F, B, Z, S, CG_RealDouble, "Pressure", &Q)) + if (cg_sol_write(F, B, Z, "Solution", CGNS_ENUMV(Vertex), &S) || + cgp_field_write(F, B, Z, S, CGNS_ENUMV(RealDouble), "Pressure", &Q)) cgp_error_exit(); if (cgp_field_write_data(F, B, Z, S, Q, &start, &end, p)) cgp_error_exit(); - if ( cgp_field_write(F, B, Z, S, CG_RealDouble, "VelocityX", &Q)) + if ( cgp_field_write(F, B, Z, S, CGNS_ENUMV(RealDouble), "VelocityX", &Q)) cgp_error_exit(); if (cgp_field_write_data(F, B, Z, S, Q, &start, &end, u)) cgp_error_exit(); - if ( cgp_field_write(F, B, Z, S, CG_RealDouble, "VelocityY", &Q)) + if ( cgp_field_write(F, B, Z, S, CGNS_ENUMV(RealDouble), "VelocityY", &Q)) cgp_error_exit(); if (cgp_field_write_data(F, B, Z, S, Q, &start, &end, v)) cgp_error_exit(); - if ( cgp_field_write(F, B, Z, S, CG_RealDouble, "VelocityZ", &Q)) + if ( cgp_field_write(F, B, Z, S, CGNS_ENUMV(RealDouble), "VelocityZ", &Q)) cgp_error_exit(); if (cgp_field_write_data(F, B, Z, S, Q, &start, &end, w)) cgp_error_exit(); - if ( cgp_field_write(F, B, Z, S, CG_RealDouble, "Temperature", &Q)) + if ( cgp_field_write(F, B, Z, S, CGNS_ENUMV(RealDouble), "Temperature", &Q)) cgp_error_exit(); if (cgp_field_write_data(F, B, Z, S, Q, &start, &end, T)) cgp_error_exit(); @@ -1064,9 +1064,9 @@ void CGNS_NodalSolution(int F,int B,int Z, Output& o) void CGNS_Coordinates(int F,int B,int Z,Output& o) { int Cx,Cy,Cz; - if (cgp_coord_write(F, B, Z, CG_RealDouble, "CoordinateX", &Cx) || - cgp_coord_write(F, B, Z, CG_RealDouble, "CoordinateY", &Cy) || - cgp_coord_write(F, B, Z, CG_RealDouble, "CoordinateZ", &Cz)) + if (cgp_coord_write(F, B, Z, CGNS_ENUMV(RealDouble), "CoordinateX", &Cx) || + cgp_coord_write(F, B, Z, CGNS_ENUMV(RealDouble), "CoordinateY", &Cy) || + cgp_coord_write(F, B, Z, CGNS_ENUMV(RealDouble), "CoordinateZ", &Cz)) cgp_error_exit(); // condense out vertices owned by another rank in a new array, x, whose slices are ready for CGNS. @@ -1153,7 +1153,7 @@ if(1==0){ if(cgp_mpi_comm(MPI_COMM_WORLD)) cgp_error_exit; if ( cgp_open(outfile, CG_MODE_WRITE, &F) || cg_base_write(F, "Base", 3, 3, &B) || - cg_zone_write(F, B, "Zone", sizes, CG_Unstructured, &Z)) + cg_zone_write(F, B, "Zone", sizes, CGNS_ENUMV(Unstructured), &Z)) cgp_error_exit(); // create data nodes for coordinates cg_set_file_type(CG_FILE_HDF5); @@ -1163,7 +1163,7 @@ if(1==0){ if ( cg_goto(F, B, "Zone_t", 1, NULL) || cg_user_data_write("User Data") || cg_gorel(F, "User Data", 0, NULL) || - cgp_array_write("nCoordsOnRank", CG_Integer, 1, &num_parts_cg, &Fs2)) + cgp_array_write("nCoordsOnRank", CGNS_ENUMV(Integer), 1, &num_parts_cg, &Fs2)) cgp_error_exit(); // create the field data for this process int nCoordVec=o.iownnodes; From 0d96a635bf16d123c9c129ab8244130c19bf78cc Mon Sep 17 00:00:00 2001 From: "Kenneth E. Jansen" Date: Sun, 27 Aug 2023 12:21:04 -0600 Subject: [PATCH 57/68] srfID for BEL only and thus index must be offset by eVolElm --- phasta/phCGNSgbc.cc | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/phasta/phCGNSgbc.cc b/phasta/phCGNSgbc.cc index 1ff65654a..2f5c78f7a 100644 --- a/phasta/phCGNSgbc.cc +++ b/phasta/phCGNSgbc.cc @@ -595,7 +595,6 @@ if(1==1){ } // end if ANY rank has this topology } // end of loop over ALL topologies PCU_Barrier(); - printf("rank=%d reached end of BlockInterior\n",part); } void writeBlocksCGNSboundary(int F,int B,int Z, Output& o, int* srfID, int* srfIDidx, double** srfIDCen1, double** srfIDCen2, int* srfID1OnBlk, int* srfID2OnBlk, int* startBelBlk, int* endBelBlk, cgsize_t *e_written, cgsize_t *totBel, int *nStackedOnRank, int nblkb) { @@ -697,8 +696,8 @@ if(1==0){ free(eCenx); free(eCeny); free(eCenz); if(1==1){ printf("CentroidCounts %d %d %d %d %d %d %d %d\n",part,icnt1, icnt2, j1, j2, e_owned, srfID1OnBlk[i],srfID2OnBlk[i]);} for (int j = 0; j < (int) e_owned; ++j) srfIDidx[e_belWritten+j]=e_start+j; - startBelBlk[idx]=e_start; // provides start point for each block in srfID - endBelBlk[idx]=e_end; // provides end point for each block in srfID + startBelBlk[idx]=e_start-eVolElm; // provides start point for each block in srfID + endBelBlk[idx]=e_end-eVolElm; // provides end point for each block in srfID } *e_written=e_endg; e_belWritten+=e_owned; // this is tracking written by this rank as we unpack srfID later @@ -945,7 +944,6 @@ if(1==0){ printf("%d part srfID 2 zc ",part); for(int ip=0; ip< std::min(nDbgI, int* imapD2 = (int *)malloc( nmatchFace * sizeof(int)); if(part==0) sortID1andID2(srfID1GCen,srfID2GCen,nmatchFace, imapD1, imapD2); PCU_Barrier(); - printf("Barrier %d %d",part,nmatchFace); MPI_Bcast(imapD1,nmatchFace,type_i,0, MPI_COMM_WORLD); MPI_Bcast(imapD2,nmatchFace,type_i,0, MPI_COMM_WORLD); auto type_d = getMpiType( double() ); From 93d90329d9d7cbcba173e705076908837598c6e7 Mon Sep 17 00:00:00 2001 From: "Kenneth E. Jansen" Date: Sun, 27 Aug 2023 14:06:02 -0600 Subject: [PATCH 58/68] all debugging out encased in 0==1 conditional --- phasta/phCGNSgbc.cc | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/phasta/phCGNSgbc.cc b/phasta/phCGNSgbc.cc index 2f2581ec2..b61b5531a 100644 --- a/phasta/phCGNSgbc.cc +++ b/phasta/phCGNSgbc.cc @@ -483,7 +483,7 @@ void topoSwitch(char* Ename, int nvert,int F,int B,int Z,int *E, cgsize_t e_star cgp_error_exit(); break; } - printf("%d %d %d %s %ld %ld %d\n",F,B,Z,Ename,e_startg,e_endg,Ep); +if(0==1) printf("%d %d %d %s %ld %ld %d\n",F,B,Z,Ename,e_startg,e_endg,Ep); *E=Ep; } void topoSwitchB(char* Ename, int nvert,int F,int B,int Z,int *E, cgsize_t e_startg,cgsize_t e_endg) @@ -501,7 +501,7 @@ void topoSwitchB(char* Ename, int nvert,int F,int B,int Z,int *E, cgsize_t e_sta cgp_error_exit(); break; } - printf("%d %d %d %s %ld %ld %d\n",F,B,Z,Ename,e_startg,e_endg,Ep); +if(0==1) printf("%d %d %d %s %ld %ld %d\n",F,B,Z,Ename,e_startg,e_endg,Ep); *E=Ep; } @@ -579,11 +579,11 @@ void writeBlocksCGNSinteror(int F,int B,int Z, Output& o, cgsize_t *e_written) // create the field data for this process int nIelVec=e_owned; cgsize_t partP1=part+1; - printf("Intr, %s, %d, %d, %d, %d \n", UserDataName, nIelVec,part,Fs,Fs2); +if(0==1) printf("Intr, %s, %d, %d, %d, %d \n", UserDataName, nIelVec,part,Fs,Fs2); if ( cgp_array_write_data(Fs2, &partP1, &partP1, &nIelVec)) cgp_error_exit(); -if(1==1){ +if(0==1){ printf("interior cnn %s %d %ld %ld \n", Ename,part, e_start, e_end); // for (int ne=0; necount(0); -if(1==0){ // ilwork debugging +if(0==1){ // ilwork debugging for (int ipart=0; ipart Date: Sun, 27 Aug 2023 16:08:30 -0600 Subject: [PATCH 59/68] valgrind found some slopiness --- phasta/phCGNSgbc.cc | 68 ++++++++++++++++++++++++++------------------- 1 file changed, 40 insertions(+), 28 deletions(-) diff --git a/phasta/phCGNSgbc.cc b/phasta/phCGNSgbc.cc index b61b5531a..0e35aa4a6 100644 --- a/phasta/phCGNSgbc.cc +++ b/phasta/phCGNSgbc.cc @@ -77,7 +77,7 @@ void pairsortDI(double a[], int b[], int n) a[i] = pairt[i].first; b[i] = pairt[i].second; } - delete pairt; + delete [] pairt; } // Function to sort integer array b[] @@ -103,7 +103,7 @@ void pairsort(int a[], int b[], int n) a[i] = pairt[i].first; b[i] = pairt[i].second; } - delete pairt; + delete [] pairt; } void pairDeal6sort(int a[], int b[], int n) { @@ -611,10 +611,6 @@ void writeBlocksCGNSboundary(int F,int B,int Z, Output& o, int* srfID, int* srfI int iblkC[2]; int estart[2]; int nvC,nvert,nvAll,invC; - for (int j = 0; j < nblkb; ++j) { // check all blocks - BlockKey& k = o.blocks.boundary.keys[j]; - nvert = o.blocks.boundary.keys[j].nBoundaryFaceEdges; - } for (int i = 0; i < 2; ++i) { // check all topologies nvAll=0; nvC=nvMap[i]; @@ -667,7 +663,8 @@ if(0==1) printf("boundary cnn %d, %ld, %ld \n", part, e_start, e_end); if(1==0){ for (int ne=0; ne Date: Mon, 28 Aug 2023 15:09:30 -0600 Subject: [PATCH 60/68] added 13 tests --- test/testing.cmake | 84 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 84 insertions(+) diff --git a/test/testing.cmake b/test/testing.cmake index ed5c92888..ea870d0de 100644 --- a/test/testing.cmake +++ b/test/testing.cmake @@ -69,6 +69,90 @@ else() set(GXT dmg) endif() +if(ENABLE_CGNS AND SIM_DOT_VERSION VERSION_GREATER 12.0.171000) + set(MDIR ${MESHES}/phasta/cube_CGNS) + mpi_test(chef-CGNS-multitopology1 1 ${CMAKE_CURRENT_BINARY_DIR}/chef + WORKING_DIRECTORY ${MDIR}/multiTopology/mner/Chef/1-1-Chef) + add_test(NAME chef-CGNS-multitopology1-diff + COMMAND cgnsdiff chefOut.cgns correct.cgns + WORKING_DIRECTORY ${MDIR}/multiTopology/mner/Chef/1-1-Chef) + + mpi_test(chef-CGNS-multitopology2 2 ${CMAKE_CURRENT_BINARY_DIR}/chef + WORKING_DIRECTORY ${MDIR}/multiTopology/mner/Chef/2-1-Chef) + add_test(NAME chef-CGNS-multitopology2-diff + COMMAND cgnsdiff chefOut.cgns correct.cgns + WORKING_DIRECTORY ${MDIR}/multiTopology/mner/Chef/2-1-Chef) + + mpi_test(chef-CGNS-multitopology4 4 ${CMAKE_CURRENT_BINARY_DIR}/chef + WORKING_DIRECTORY ${MDIR}/multiTopology/mner/Chef/4-1-Chef) + add_test(NAME chef-CGNS-multitopology4-diff + COMMAND cgnsdiff chefOut.cgns correct.cgns + WORKING_DIRECTORY ${MDIR}/multiTopology/mner/Chef/4-1-Chef) +endif() + +if(ENABLE_SIMMETRIX AND SIM_PARASOLID AND SIMMODSUITE_SimAdvMeshing_FOUND AND ENABLE_CGNS AND SIM_DOT_VERSION VERSION_GREATER 12.0.171000) + set(MDIR ${MESHES}/phasta/cube_CGNS) + mpi_test(chef-CGNS-8hex1 1 ${CMAKE_CURRENT_BINARY_DIR}/chef + WORKING_DIRECTORY ${MDIR}/sms2mds8Hex/Chef/1-1-Chef) + add_test(NAME chef-CGNS-8hex1-diff + COMMAND cgnsdiff chefOut.cgns correct.cgns + WORKING_DIRECTORY ${MDIR}/sms2mds8Hex/Chef/1-1-Chef) + + mpi_test(chef-CGNS-8hex2 2 ${CMAKE_CURRENT_BINARY_DIR}/chef + WORKING_DIRECTORY ${MDIR}/sms2mds8Hex/Chef/2-1-Chef) + add_test(NAME chef-CGNS-8hex2-diff + COMMAND cgnsdiff chefOut.cgns correct.cgns + WORKING_DIRECTORY ${MDIR}/sms2mds8Hex/Chef/2-1-Chef) + + mpi_test(chef-CGNS-smallTet1 1 ${CMAKE_CURRENT_BINARY_DIR}/chef + WORKING_DIRECTORY ${MDIR}/sms2mds-SmallestTet/Chef/1-1-Chef) + add_test(NAME chef-CGNS-smallTet1-diff + COMMAND cgnsdiff chefOut.cgns correct.cgns + WORKING_DIRECTORY ${MDIR}/sms2mds-SmallestTet/Chef/1-1-Chef) + + mpi_test(chef-CGNS-smallTet2 2 ${CMAKE_CURRENT_BINARY_DIR}/chef + WORKING_DIRECTORY ${MDIR}/sms2mds-SmallestTet/Chef/2-1-Chef) + add_test(NAME chef-CGNS-smallTet2-diff + COMMAND cgnsdiff chefOut.cgns correct.cgns + WORKING_DIRECTORY ${MDIR}/sms2mds-SmallestTet/Chef/2-1-Chef) + + mpi_test(chef-CGNS-AllHex1 1 ${CMAKE_CURRENT_BINARY_DIR}/chef + WORKING_DIRECTORY ${MDIR}/sms2mdsAllHex/Chef/1-1-Chef) + add_test(NAME chef-CGNS-AllHex1-diff + COMMAND cgnsdiff chefOut.cgns correct.cgns + WORKING_DIRECTORY ${MDIR}/sms2mdsAllHex/Chef/1-1-Chef) + + mpi_test(chef-CGNS-AllHex2 2 ${CMAKE_CURRENT_BINARY_DIR}/chef + WORKING_DIRECTORY ${MDIR}/sms2mdsAllHex/Chef/2-1-Chef) + add_test(NAME chef-CGNS-AllHex2-diff + COMMAND cgnsdiff chefOut.cgns correct.cgns + WORKING_DIRECTORY ${MDIR}/sms2mdsAllHex/Chef/2-1-Chef) + + mpi_test(chef-CGNS-AllTet 1 ${CMAKE_CURRENT_BINARY_DIR}/chef + WORKING_DIRECTORY ${MDIR}/sms2mdsAllTet/Chef/1-1-Chef) + add_test(NAME chef-CGNS-AllTet-diff + COMMAND cgnsdiff chefOut.cgns correct.cgns + WORKING_DIRECTORY ${MDIR}/sms2mdsAllTet/Chef/1-1-Chef) + + mpi_test(chef-CGNS-AllTet2 2 ${CMAKE_CURRENT_BINARY_DIR}/chef + WORKING_DIRECTORY ${MDIR}/sms2mdsAllTet/Chef/2-1-Chef) + add_test(NAME chef-CGNS-AllTet2-diff + COMMAND cgnsdiff chefOut.cgns correct.cgns + WORKING_DIRECTORY ${MDIR}/sms2mdsAllTet/Chef/2-1-Chef) + + mpi_test(chef-CGNS-AllWedge1 1 ${CMAKE_CURRENT_BINARY_DIR}/chef + WORKING_DIRECTORY ${MDIR}/sms2mdsAllWedge/Chef/1-1-Chef) + add_test(NAME chef-CGNS-AllWedge1-diff + COMMAND cgnsdiff chefOut.cgns correct.cgns + WORKING_DIRECTORY ${MDIR}/sms2mdsAllWedge/Chef/1-1-Chef) + + mpi_test(chef-CGNS-AllWedge2 2 ${CMAKE_CURRENT_BINARY_DIR}/chef + WORKING_DIRECTORY ${MDIR}/sms2mdsAllWedge/Chef/2-1-Chef) + add_test(NAME chef-CGNS-AllWedge2-diff + COMMAND cgnsdiff chefOut.cgns correct.cgns + WORKING_DIRECTORY ${MDIR}/sms2mdsAllWedge/Chef/2-1-Chef) +endif() + set(MDIR ${MESHES}/phasta/dg) if(ENABLE_SIMMETRIX AND SIM_PARASOLID AND SIMMODSUITE_SimAdvMeshing_FOUND) From 71bf7b4ba638f3f3535345f30a2ed576734ed3da Mon Sep 17 00:00:00 2001 From: "Kenneth E. Jansen" Date: Mon, 28 Aug 2023 16:34:26 -0600 Subject: [PATCH 61/68] removed some dead code --- phasta/phCGNSgbc.cc | 64 +++++++++++++-------------------------------- pumi-meshes | 2 +- 2 files changed, 19 insertions(+), 47 deletions(-) diff --git a/phasta/phCGNSgbc.cc b/phasta/phCGNSgbc.cc index 0e35aa4a6..424cfab34 100644 --- a/phasta/phCGNSgbc.cc +++ b/phasta/phCGNSgbc.cc @@ -23,8 +23,8 @@ #endif typedef int lcorp_t; #define NCORP_MPI_T MPI_INTEGER -extern cgsize_t nDbgCG=50; -extern int nDbgI=50; +static cgsize_t nDbgCG=50; +static int nDbgI=50; namespace { @@ -288,7 +288,7 @@ void gen_ncorp(Output& o ) if(num_parts > 1) commuInt(o, o.arrays.ncorp); -if(1==0) { +if(0==1) { for (int ipart=0; ipart Date: Tue, 29 Aug 2023 06:22:29 -0600 Subject: [PATCH 62/68] fixed matchedNodeElmReader tests to eliminate unused argument 9 --- test/testing.cmake | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/testing.cmake b/test/testing.cmake index ea870d0de..08e5ae7d8 100644 --- a/test/testing.cmake +++ b/test/testing.cmake @@ -297,7 +297,7 @@ mpi_test(matchedNodeElementReader_p1 1 "${MDIR}/1part/geom3D.fathr" "NULL" "${MDIR}/1part/geom3DHead.cnn" - "geom.dmg" "geom.smb") + "geom.smb") mpi_test(matchedNodeElementReader_p4 4 ./matchedNodeElmReader @@ -309,7 +309,7 @@ mpi_test(matchedNodeElementReader_p4 4 "${MDIR}/4part/geom3D.fathr" "NULL" "${MDIR}/4part/geom3DHead.cnn" - "geom.dmg" "geom.smb") + "geom.smb") set(MDIR ${MESHES}/gmsh) mpi_test(gmshv2TwoQuads 1 From 364895c4fe9bd0dc522cd423e344ba1389bdb908 Mon Sep 17 00:00:00 2001 From: "Kenneth E. Jansen" Date: Tue, 29 Aug 2023 08:17:44 -0600 Subject: [PATCH 63/68] fixed swapDoubles-- needed a call to MPI_Finalize to pass with openMPI 4 --- test/swapDoubles.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/test/swapDoubles.cc b/test/swapDoubles.cc index beacc27d2..f85c2a531 100644 --- a/test/swapDoubles.cc +++ b/test/swapDoubles.cc @@ -25,5 +25,6 @@ int main(int argc, char** argv) { } delete [] d_orig; delete [] d; + MPI_Finalize(); return 0; } From 20b9a7d3acaba891d54f8ee97c4cc8a55e1f3514 Mon Sep 17 00:00:00 2001 From: "Kenneth E. Jansen" Date: Thu, 31 Aug 2023 20:00:05 -0600 Subject: [PATCH 64/68] Testing disables prior CGNS tests that are hardwired to 32 bit --- CMakeLists.txt | 9 +------ phasta/phCGNSgbc.cc | 66 ++++++++++++++++++++++----------------------- test/testing.cmake | 4 +-- 3 files changed, 35 insertions(+), 44 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 395bc43d8..ab2af3869 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -131,14 +131,7 @@ if(ENABLE_OMEGA_H) endif() if(ENABLE_CGNS) - set(SCOREC_USE_CGNS_DEFAULT ${ENABLE_CGNS}) - bob_public_dep(CGNS) - #CGNS does not provide cmake targets :( - include_directories(SYSTEM ${CGNS_INCLUDE_DIR}) - set(SCOREC_USE_HDF5_DEFAULT ${ENABLE_CGNS}) - bob_public_dep(HDF5) - add_definitions(-DHAVE_CGNS) -else() + option(ENABLE_CGNS_MULTI_BASE "Enable the CGNS Multi Base tests" OFF) set(SCOREC_USE_CGNS_DEFAULT ${ENABLE_CGNS}) bob_public_dep(CGNS) #CGNS does not provide cmake targets :( diff --git a/phasta/phCGNSgbc.cc b/phasta/phCGNSgbc.cc index 424cfab34..1d98a1a2a 100644 --- a/phasta/phCGNSgbc.cc +++ b/phasta/phCGNSgbc.cc @@ -597,23 +597,21 @@ if(0==1){ } void writeBlocksCGNSboundary(int F,int B,int Z, Output& o, int* srfID, int* srfIDidx, double** srfIDCen1, double** srfIDCen2, int* srfID1OnBlk, int* srfID2OnBlk, int* startBelBlk, int* endBelBlk, cgsize_t *e_written, cgsize_t *totBel, int *nStackedOnRank, int nblkb) { - int E,Fsb,Fsb2; + int E,Fsb,Fsb2, nvC,nvert,nvAll,invC; const int num_parts = PCU_Comm_Peers(); const cgsize_t num_parts_cg=num_parts; const int part = PCU_Comm_Self() ; const cgsize_t part_cg=part; - cgsize_t e_owned, e_start,e_end; - cgsize_t e_startg,e_endg; + cgsize_t e_owned, e_start,e_end, e_startg,e_endg; cgsize_t eVolElm=*e_written; cgsize_t e_belWritten=0; int nvMap[2] = {3,4}; int iblkC[2]; int estart[2]; - int nvC,nvert,nvAll,invC; for (int i = 0; i < 2; ++i) { // check all topologies nvAll=0; - nvC=nvMap[i]; invC=0; + nvC=nvMap[i]; int icountB=0; for (int j = 0; j < nblkb; ++j) { // check all blocks BlockKey& k = o.blocks.boundary.keys[j]; @@ -662,8 +660,7 @@ if(0==1) printf("boundary cnn %d, %ld, %ld \n", part, e_start, e_end); if(0==1){ for (int ne=0; ne 2) + writeBlocksCGNSinteror(F,B,Z,o,&e_written); if(o.writeCGNSFiles > 2) { - int nblkb = o.blocks.boundary.getSize(); - double** srfIDCen1 = new double*[nblkb]; // might not all be used - double** srfIDCen2 = new double*[nblkb]; - int totOnRankBel=0; - for (int i = 0; i < nblkb; ++i) - totOnRankBel += o.blocks.boundary.nElements[i]; - int* srfID = (int *)malloc( totOnRankBel * sizeof(int)); - int* srfID1OnBlk = (int *)malloc( nblkb * sizeof(int)); - int* srfID2OnBlk = (int *)malloc( nblkb * sizeof(int)); - int* startBelBlk = (int *)malloc( nblkb * sizeof(int)); - int* endBelBlk = (int *)malloc( nblkb * sizeof(int)); - int* srfIDidx = (int *)malloc( totOnRankBel * sizeof(int)); - int nStackedOnRank=0; - writeBlocksCGNSboundary(F,B,Z,o, srfID, srfIDidx, srfIDCen1, srfIDCen2, srfID1OnBlk, srfID2OnBlk, startBelBlk, endBelBlk, &e_written, &totBel, &nStackedOnRank, nblkb); - writeCGNSboundary (F,B,Z,o, srfID, srfIDidx, srfIDCen1, srfIDCen2, srfID1OnBlk, srfID2OnBlk, startBelBlk, endBelBlk, &e_written, totOnRankBel, &totBel, nStackedOnRank); - free(srfID); free(srfIDidx); - free(srfID1OnBlk); free(srfID2OnBlk); - free(startBelBlk); free(endBelBlk); - for (int i = 0; i < nStackedOnRank; ++i) delete [] srfIDCen1[i]; - for (int i = 0; i < nStackedOnRank; ++i) delete [] srfIDCen2[i]; - delete [] srfIDCen1; delete [] srfIDCen2; - if(cgp_close(F)) cgp_error_exit(); - double t1 = PCU_Time(); - if (!PCU_Comm_Self()) - lion_oprint(1,"CGNS file written in %f seconds\n", t1 - t0); + cgsize_t totBel; + int nblkb = o.blocks.boundary.getSize(); + double** srfIDCen1 = new double*[nblkb]; // might not all be used + double** srfIDCen2 = new double*[nblkb]; + int totOnRankBel=0; + for (int i = 0; i < nblkb; ++i) + totOnRankBel += o.blocks.boundary.nElements[i]; + int* srfID = (int *)malloc( totOnRankBel * sizeof(int)); + int* srfID1OnBlk = (int *)malloc( nblkb * sizeof(int)); + int* srfID2OnBlk = (int *)malloc( nblkb * sizeof(int)); + int* startBelBlk = (int *)malloc( nblkb * sizeof(int)); + int* endBelBlk = (int *)malloc( nblkb * sizeof(int)); + int* srfIDidx = (int *)malloc( totOnRankBel * sizeof(int)); + int nStackedOnRank=0; + writeBlocksCGNSboundary(F,B,Z,o, srfID, srfIDidx, srfIDCen1, srfIDCen2, srfID1OnBlk, srfID2OnBlk, startBelBlk, endBelBlk, &e_written, &totBel, &nStackedOnRank, nblkb); + writeCGNSboundary (F,B,Z,o, srfID, srfIDidx, srfIDCen1, srfIDCen2, srfID1OnBlk, srfID2OnBlk, startBelBlk, endBelBlk, &e_written, totOnRankBel, &totBel, nStackedOnRank); + free(srfID); free(srfIDidx); + free(srfID1OnBlk); free(srfID2OnBlk); + free(startBelBlk); free(endBelBlk); + for (int i = 0; i < nStackedOnRank; ++i) delete [] srfIDCen1[i]; + for (int i = 0; i < nStackedOnRank; ++i) delete [] srfIDCen2[i]; + delete [] srfIDCen1; delete [] srfIDCen2; + if(cgp_close(F)) cgp_error_exit(); + double t1 = PCU_Time(); + if (!PCU_Comm_Self()) + lion_oprint(1,"CGNS file written in %f seconds\n", t1 - t0); } } } // namespace diff --git a/test/testing.cmake b/test/testing.cmake index 08e5ae7d8..8deea0a63 100644 --- a/test/testing.cmake +++ b/test/testing.cmake @@ -599,7 +599,7 @@ if(ENABLE_ZOLTAN) ) endif() -if(ENABLE_CGNS AND ENABLE_ZOLTAN) +if(ENABLE_CGNS AND ENABLE_ZOLTAN AND ENABLE_CGNS_MULTI_BASE) # # sort of an arbitrary choice set(numProcs 4) @@ -684,7 +684,7 @@ mpi_test(cgns_bcs_3 ${numProcs} bcs3.smb additional) -endif(ENABLE_CGNS AND ENABLE_ZOLTAN) +endif(ENABLE_CGNS AND ENABLE_ZOLTAN AND ENABLE_CGNS_MULTI_BASE) mpi_test(construct 4 ./construct From dd2b713711953fc9dbbcfcab425386a816aa4237 Mon Sep 17 00:00:00 2001 From: "Kenneth E. Jansen" Date: Sat, 2 Sep 2023 21:21:04 -0600 Subject: [PATCH 65/68] Valgrind leaks, added VertexRank, replaced RankOfWriter with CellRank, added a couple of boundary element diagnostic fields, replaced cgnsdiff with hdf5diff which returns 1 if different for testing. --- phasta/phCGNSgbc.cc | 85 +++++++++++++++++++++++++++++++++++++++++---- phasta/phOutput.cc | 1 + test/testing.cmake | 26 +++++++------- 3 files changed, 93 insertions(+), 19 deletions(-) diff --git a/phasta/phCGNSgbc.cc b/phasta/phCGNSgbc.cc index 1d98a1a2a..71d414cb6 100644 --- a/phasta/phCGNSgbc.cc +++ b/phasta/phCGNSgbc.cc @@ -220,7 +220,7 @@ void gen_ncorp(Output& o ) int i; lcorp_t nilwork = o.nlwork; int num_nodes=m->count(0); - o.arrays.ncorp = (cgsize_t *)malloc(num_nodes * sizeof(cgsize_t)); //FIXME where to deallocate + o.arrays.ncorp = new cgsize_t[num_nodes]; lcorp_t owned; lcorp_t local; lcorp_t* owner_counts; @@ -297,6 +297,7 @@ if(0==1) { PCU_Barrier(); } } +free(owner_counts); } static lcorp_t count_local(int* ilwork, int nlwork,cgsize_t* ncorp_tmp, int num_nodes) @@ -516,8 +517,8 @@ void writeBlocksCGNSinteror(int F,int B,int Z, Output& o, cgsize_t *e_written) const int part = PCU_Comm_Self() ; const cgsize_t part_cg=part; // create a centered solution - if (cg_sol_write(F, B, Z, "RankOfWriter", CGNS_ENUMV(CellCenter), &S) || - cgp_field_write(F, B, Z, S, CGNS_ENUMV(Integer), "RankOfWriter", &Fs)) + if (cg_sol_write(F, B, Z, "CellRank", CGNS_ENUMV(CellCenter), &S) || + cgp_field_write(F, B, Z, S, CGNS_ENUMV(Integer), "CellRank", &Fs)) cgp_error_exit(); int nblki= o.blocks.interior.getSize(); int nvMap[4] = {4,5,6,8}; @@ -661,6 +662,68 @@ if(0==1){ for (int ne=0; ne=start && en<=end) { + dv[en-start]= part; + } + } + } + } + if (cgp_field_write_data(F, B, Z, S, Fs, &start, &end, dv)) + cgp_error_exit(); +// more tricky to put srfID on nodes to see in PV (approximately) through vertex field + if (cg_sol_write(F, B, Z, "BoundaryVertexSrfID", CGNS_ENUMV(Vertex), &S) || + cgp_field_write(F, B, Z, S, CGNS_ENUMV(Integer), "BoundaryVertexSrfID", &Fs)) + cgp_error_exit(); + // create the field data for this process + for (int inode = 0; inode < o.iownnodes; ++inode) dv[inode]= -1; + if(invC!=0) { + for (int ibel = 0; ibel < e_owned; ++ibel){ + for (int ilv=0; ilv < nvC; ilv++) { + en=e[ibel*nvC+ilv]; + if(en>=start && en<=end) { + dv[en-start]= srfID[ibel]; +// printf("%d %d %d %d %d %d %d\n ", part,ibel, ilv, en, en-start, dv[en-start], srfID[ibel]); + } + } + } + } + if (cgp_field_write_data(F, B, Z, S, Fs, &start, &end, dv)) + cgp_error_exit(); + free(dv); if(invC!=0) { free(e); //moved above getNaturalBCCodesCGNS(o, iblkC[, &srfID[e_belWritten]); @@ -861,7 +924,7 @@ void GatherCentroid(double** srfIDCen,int* srfIDOnBlk, double** srfIDGCen, int * if(0==1){ printf("displs1 ");for(int ip=0; ip< num_parts; ++ip) printf("% ld ", displs[ip]); printf("\n"); } auto type_d = getMpiType( double() ); MPI_Gatherv(srfIDCenAllBlocks,ncon,type_d,*srfIDGCen,rcounts,displs,type_d,0, MPI_COMM_WORLD); - free(srfIDCenAllBlocks); + free(srfIDCenAllBlocks); free(rcounts); free(displs); } void Allgather2IntAndSort(int* srfID, int* srfIDidx,Output& o,int* srfIDG, int* srfIDGidx, int totOnRankBel) { @@ -907,7 +970,7 @@ void writeCGNSboundary(int F,int B,int Z, Output& o, int* srfID, int* srfIDidx, int nmatchFace1,nmatchFace; GatherCentroid(srfIDCen1,srfID1OnBlk,&srfID1GCen,&nmatchFace1, nStackedOnRank); GatherCentroid(srfIDCen2,srfID2OnBlk,&srfID2GCen,&nmatchFace, nStackedOnRank); - if(part==0) printf("matchface %d, %d", nmatchFace1, nmatchFace); +if(0==1) if(part==0) printf("matchface %d, %d\n", nmatchFace1, nmatchFace); if(part==0) assert(nmatchFace1==nmatchFace); // compute the translation while we still have ordered centroids data Assuming Translation = donor minus periodic but documents unclear double TranslationD[3]; @@ -981,7 +1044,7 @@ if(0==1) { if (cg_conn_periodic_write(F, B, Z, cgconn, RotationCenter, RotationAngle, Translation)) cgp_error_exit(); free(imapD1); free(imapD2); - free(eBC); free(srfIDG); free(srfIDGidx); + free(eBC); free(srfIDG); free(srfIDGidx); free(donor2); free(periodic1); } void CGNS_NodalSolution(int F,int B,int Z, Output& o) { @@ -1073,6 +1136,16 @@ if(0==1) { if(j==2) if(cgp_coord_write_data(F, B, Z, Cz, &start, &end, x)) cgp_error_exit(); } free (x); + int S2,Fs2; + const int part = PCU_Comm_Self() ; + if (cg_sol_write(F, B, Z, "VertexRank", CGNS_ENUMV(Vertex), &S2) || + cgp_field_write(F, B, Z, S2, CGNS_ENUMV(Integer), "VertexRank", &Fs2)) + cgp_error_exit(); + int* d = (int *)malloc(o.iownnodes * sizeof(int)); + for (int inode = 0; inode < o.iownnodes; ++inode) d[inode]= part; + if (cgp_field_write_data(F, B, Z, S2, Fs2, &start, &end, d)) + cgp_error_exit(); + free(d); } void writeCGNS(Output& o, std::string path) { diff --git a/phasta/phOutput.cc b/phasta/phOutput.cc index d4b71028b..fd5e73a69 100644 --- a/phasta/phOutput.cc +++ b/phasta/phOutput.cc @@ -997,6 +997,7 @@ Output::~Output() //nOwnedNodes will still be zero. if(!nOwnedNodes) return; + delete [] arrays.ncorp; delete [] arrays.coordinates; delete [] arrays.ilwork; delete [] arrays.ilworkf; diff --git a/test/testing.cmake b/test/testing.cmake index 8deea0a63..d256c6b30 100644 --- a/test/testing.cmake +++ b/test/testing.cmake @@ -74,19 +74,19 @@ if(ENABLE_CGNS AND SIM_DOT_VERSION VERSION_GREATER 12.0.171000) mpi_test(chef-CGNS-multitopology1 1 ${CMAKE_CURRENT_BINARY_DIR}/chef WORKING_DIRECTORY ${MDIR}/multiTopology/mner/Chef/1-1-Chef) add_test(NAME chef-CGNS-multitopology1-diff - COMMAND cgnsdiff chefOut.cgns correct.cgns + COMMAND h5diff chefOut.cgns correct.cgns WORKING_DIRECTORY ${MDIR}/multiTopology/mner/Chef/1-1-Chef) mpi_test(chef-CGNS-multitopology2 2 ${CMAKE_CURRENT_BINARY_DIR}/chef WORKING_DIRECTORY ${MDIR}/multiTopology/mner/Chef/2-1-Chef) add_test(NAME chef-CGNS-multitopology2-diff - COMMAND cgnsdiff chefOut.cgns correct.cgns + COMMAND h5diff chefOut.cgns correct.cgns WORKING_DIRECTORY ${MDIR}/multiTopology/mner/Chef/2-1-Chef) mpi_test(chef-CGNS-multitopology4 4 ${CMAKE_CURRENT_BINARY_DIR}/chef WORKING_DIRECTORY ${MDIR}/multiTopology/mner/Chef/4-1-Chef) add_test(NAME chef-CGNS-multitopology4-diff - COMMAND cgnsdiff chefOut.cgns correct.cgns + COMMAND h5diff chefOut.cgns correct.cgns WORKING_DIRECTORY ${MDIR}/multiTopology/mner/Chef/4-1-Chef) endif() @@ -95,61 +95,61 @@ if(ENABLE_SIMMETRIX AND SIM_PARASOLID AND SIMMODSUITE_SimAdvMeshing_FOUND AND EN mpi_test(chef-CGNS-8hex1 1 ${CMAKE_CURRENT_BINARY_DIR}/chef WORKING_DIRECTORY ${MDIR}/sms2mds8Hex/Chef/1-1-Chef) add_test(NAME chef-CGNS-8hex1-diff - COMMAND cgnsdiff chefOut.cgns correct.cgns + COMMAND h5diff chefOut.cgns correct.cgns WORKING_DIRECTORY ${MDIR}/sms2mds8Hex/Chef/1-1-Chef) mpi_test(chef-CGNS-8hex2 2 ${CMAKE_CURRENT_BINARY_DIR}/chef WORKING_DIRECTORY ${MDIR}/sms2mds8Hex/Chef/2-1-Chef) add_test(NAME chef-CGNS-8hex2-diff - COMMAND cgnsdiff chefOut.cgns correct.cgns + COMMAND h5diff chefOut.cgns correct.cgns WORKING_DIRECTORY ${MDIR}/sms2mds8Hex/Chef/2-1-Chef) mpi_test(chef-CGNS-smallTet1 1 ${CMAKE_CURRENT_BINARY_DIR}/chef WORKING_DIRECTORY ${MDIR}/sms2mds-SmallestTet/Chef/1-1-Chef) add_test(NAME chef-CGNS-smallTet1-diff - COMMAND cgnsdiff chefOut.cgns correct.cgns + COMMAND h5diff chefOut.cgns correct.cgns WORKING_DIRECTORY ${MDIR}/sms2mds-SmallestTet/Chef/1-1-Chef) mpi_test(chef-CGNS-smallTet2 2 ${CMAKE_CURRENT_BINARY_DIR}/chef WORKING_DIRECTORY ${MDIR}/sms2mds-SmallestTet/Chef/2-1-Chef) add_test(NAME chef-CGNS-smallTet2-diff - COMMAND cgnsdiff chefOut.cgns correct.cgns + COMMAND h5diff chefOut.cgns correct.cgns WORKING_DIRECTORY ${MDIR}/sms2mds-SmallestTet/Chef/2-1-Chef) mpi_test(chef-CGNS-AllHex1 1 ${CMAKE_CURRENT_BINARY_DIR}/chef WORKING_DIRECTORY ${MDIR}/sms2mdsAllHex/Chef/1-1-Chef) add_test(NAME chef-CGNS-AllHex1-diff - COMMAND cgnsdiff chefOut.cgns correct.cgns + COMMAND h5diff chefOut.cgns correct.cgns WORKING_DIRECTORY ${MDIR}/sms2mdsAllHex/Chef/1-1-Chef) mpi_test(chef-CGNS-AllHex2 2 ${CMAKE_CURRENT_BINARY_DIR}/chef WORKING_DIRECTORY ${MDIR}/sms2mdsAllHex/Chef/2-1-Chef) add_test(NAME chef-CGNS-AllHex2-diff - COMMAND cgnsdiff chefOut.cgns correct.cgns + COMMAND h5diff chefOut.cgns correct.cgns WORKING_DIRECTORY ${MDIR}/sms2mdsAllHex/Chef/2-1-Chef) mpi_test(chef-CGNS-AllTet 1 ${CMAKE_CURRENT_BINARY_DIR}/chef WORKING_DIRECTORY ${MDIR}/sms2mdsAllTet/Chef/1-1-Chef) add_test(NAME chef-CGNS-AllTet-diff - COMMAND cgnsdiff chefOut.cgns correct.cgns + COMMAND h5diff chefOut.cgns correct.cgns WORKING_DIRECTORY ${MDIR}/sms2mdsAllTet/Chef/1-1-Chef) mpi_test(chef-CGNS-AllTet2 2 ${CMAKE_CURRENT_BINARY_DIR}/chef WORKING_DIRECTORY ${MDIR}/sms2mdsAllTet/Chef/2-1-Chef) add_test(NAME chef-CGNS-AllTet2-diff - COMMAND cgnsdiff chefOut.cgns correct.cgns + COMMAND h5diff chefOut.cgns correct.cgns WORKING_DIRECTORY ${MDIR}/sms2mdsAllTet/Chef/2-1-Chef) mpi_test(chef-CGNS-AllWedge1 1 ${CMAKE_CURRENT_BINARY_DIR}/chef WORKING_DIRECTORY ${MDIR}/sms2mdsAllWedge/Chef/1-1-Chef) add_test(NAME chef-CGNS-AllWedge1-diff - COMMAND cgnsdiff chefOut.cgns correct.cgns + COMMAND h5diff chefOut.cgns correct.cgns WORKING_DIRECTORY ${MDIR}/sms2mdsAllWedge/Chef/1-1-Chef) mpi_test(chef-CGNS-AllWedge2 2 ${CMAKE_CURRENT_BINARY_DIR}/chef WORKING_DIRECTORY ${MDIR}/sms2mdsAllWedge/Chef/2-1-Chef) add_test(NAME chef-CGNS-AllWedge2-diff - COMMAND cgnsdiff chefOut.cgns correct.cgns + COMMAND h5diff chefOut.cgns correct.cgns WORKING_DIRECTORY ${MDIR}/sms2mdsAllWedge/Chef/2-1-Chef) endif() From bdc26b58f14cfbbf2c0af23aa467653f85974e8a Mon Sep 17 00:00:00 2001 From: "Kenneth E. Jansen" Date: Sun, 3 Sep 2023 12:02:45 -0600 Subject: [PATCH 66/68] checking this in with some extra commented code on my failure to get BoundaryCellRank to be a FaceCenter field. This branch also provides a hacky way to get around ParaView only being able to visualize the first nodal field in the CGNS file by circulating the file-node creation order. --- phasta/phCGNSgbc.cc | 143 ++++++++++++++++++++++++++++---------------- 1 file changed, 93 insertions(+), 50 deletions(-) diff --git a/phasta/phCGNSgbc.cc b/phasta/phCGNSgbc.cc index 71d414cb6..f66a219ef 100644 --- a/phasta/phCGNSgbc.cc +++ b/phasta/phCGNSgbc.cc @@ -507,9 +507,9 @@ if(0==1) printf("%d %d %d %s %ld %ld %d\n",F,B,Z,Ename,e_startg,e_endg,Ep); } // renamed and calling the renamed functions above with output writes now to CGNS -void writeBlocksCGNSinteror(int F,int B,int Z, Output& o, cgsize_t *e_written) +void writeBlocksCGNSinterior(int F,int B,int Z, int SCR, Output& o, cgsize_t *e_written) { - int E,S,Fs,Fs2,Fsb,Fsb2; + int E,Fs,Fs2,Fsb,Fsb2; cgsize_t e_owned, e_start,e_end; cgsize_t e_startg,e_endg; const int num_parts = PCU_Comm_Peers(); @@ -517,8 +517,7 @@ void writeBlocksCGNSinteror(int F,int B,int Z, Output& o, cgsize_t *e_written) const int part = PCU_Comm_Self() ; const cgsize_t part_cg=part; // create a centered solution - if (cg_sol_write(F, B, Z, "CellRank", CGNS_ENUMV(CellCenter), &S) || - cgp_field_write(F, B, Z, S, CGNS_ENUMV(Integer), "CellRank", &Fs)) + if ( cgp_field_write(F, B, Z, SCR, CGNS_ENUMV(Integer), "CellRank", &Fs)) cgp_error_exit(); int nblki= o.blocks.interior.getSize(); int nvMap[4] = {4,5,6,8}; @@ -566,7 +565,7 @@ void writeBlocksCGNSinteror(int F,int B,int Z, Output& o, cgsize_t *e_written) d[n] = part; // write the solution field data in parallel } - if (cgp_field_write_data(F, B, Z, S, Fs, &e_start, &e_end, d)) + if (cgp_field_write_data(F, B, Z, SCR, Fs, &e_start, &e_end, d)) cgp_error_exit(); if(invC!=0) free(d); char UserDataName[11]; @@ -596,7 +595,7 @@ if(0==1){ } // end of loop over ALL topologies PCU_Barrier(); } -void writeBlocksCGNSboundary(int F,int B,int Z, Output& o, int* srfID, int* srfIDidx, double** srfIDCen1, double** srfIDCen2, int* srfID1OnBlk, int* srfID2OnBlk, int* startBelBlk, int* endBelBlk, cgsize_t *e_written, cgsize_t *totBel, int *nStackedOnRank, int nblkb) +void writeBlocksCGNSboundary(int F,int B,int Z, int SBVR, int SBVS, Output& o, int* srfID, int* srfIDidx, double** srfIDCen1, double** srfIDCen2, int* srfID1OnBlk, int* srfID2OnBlk, int* startBelBlk, int* endBelBlk, cgsize_t *e_written, cgsize_t *totBel, int *nStackedOnRank, int nblkb) { int E,Fsb,Fsb2, nvC,nvert,nvAll,invC; const int num_parts = PCU_Comm_Peers(); @@ -663,7 +662,7 @@ if(0==1){ } int idx =((*nStackedOnRank) - 1); - int S, Fs; + int FsR, FsS; // if (cg_sol_write(F, B, Z, "BoundaryCellRank", CGNS_ENUMV(FaceCenter), &S) || // cgp_field_write(F, B, Z, S, CGNS_ENUMV(Integer), "BoundaryCellRank", &Fs)) @@ -686,8 +685,7 @@ if(0==1){ int* dv = (int *)malloc(o.iownnodes * sizeof(int)); cgsize_t start=o.local_start_id; cgsize_t end=start+o.iownnodes-1; - if (cg_sol_write(F, B, Z, "BoundaryVertexRank", CGNS_ENUMV(Vertex), &S) || - cgp_field_write(F, B, Z, S, CGNS_ENUMV(Integer), "BoundaryVertexRank", &Fs)) + if ( cgp_field_write(F, B, Z, SBVR, CGNS_ENUMV(Integer), "BoundaryVertexRank", &FsR)) cgp_error_exit(); // create the field data for this process for (int inode = 0; inode < o.iownnodes; ++inode) dv[inode]= -1; @@ -702,11 +700,10 @@ if(0==1){ } } } - if (cgp_field_write_data(F, B, Z, S, Fs, &start, &end, dv)) + if (cgp_field_write_data(F, B, Z, SBVR, FsR, &start, &end, dv)) cgp_error_exit(); // more tricky to put srfID on nodes to see in PV (approximately) through vertex field - if (cg_sol_write(F, B, Z, "BoundaryVertexSrfID", CGNS_ENUMV(Vertex), &S) || - cgp_field_write(F, B, Z, S, CGNS_ENUMV(Integer), "BoundaryVertexSrfID", &Fs)) + if ( cgp_field_write(F, B, Z, SBVS, CGNS_ENUMV(Integer), "BoundaryVertexSrfID", &FsS)) cgp_error_exit(); // create the field data for this process for (int inode = 0; inode < o.iownnodes; ++inode) dv[inode]= -1; @@ -721,7 +718,7 @@ if(0==1){ } } } - if (cgp_field_write_data(F, B, Z, S, Fs, &start, &end, dv)) + if (cgp_field_write_data(F, B, Z, SBVS, FsS, &start, &end, dv)) cgp_error_exit(); free(dv); if(invC!=0) { @@ -1103,7 +1100,7 @@ if(0==1) printf("solution=%s",fieldName); cgp_error_exit(); free(p); free(u); free(v); free(w); free(T); free(data); } -void CGNS_Coordinates(int F,int B,int Z,Output& o) +void CGNS_Coordinates(int F,int B,int Z, Output& o) { int Cx,Cy,Cz; if (cgp_coord_write(F, B, Z, CGNS_ENUMV(RealDouble), "CoordinateX", &Cx) || @@ -1136,14 +1133,18 @@ if(0==1) { if(j==2) if(cgp_coord_write_data(F, B, Z, Cz, &start, &end, x)) cgp_error_exit(); } free (x); +} +void CGNS_VertexRank(int F,int B,int Z, int SVR, Output& o) +{ int S2,Fs2; const int part = PCU_Comm_Self() ; - if (cg_sol_write(F, B, Z, "VertexRank", CGNS_ENUMV(Vertex), &S2) || - cgp_field_write(F, B, Z, S2, CGNS_ENUMV(Integer), "VertexRank", &Fs2)) + cgsize_t start=o.local_start_id; + cgsize_t end=start+o.iownnodes-1; + if ( cgp_field_write(F, B, Z, SVR, CGNS_ENUMV(Integer), "VertexRank", &Fs2)) cgp_error_exit(); int* d = (int *)malloc(o.iownnodes * sizeof(int)); for (int inode = 0; inode < o.iownnodes; ++inode) d[inode]= part; - if (cgp_field_write_data(F, B, Z, S2, Fs2, &start, &end, d)) + if (cgp_field_write_data(F, B, Z, SVR, Fs2, &start, &end, d)) cgp_error_exit(); free(d); } @@ -1156,7 +1157,7 @@ void writeCGNS(Output& o, std::string path) const cgsize_t num_parts_cg=num_parts; std::string timestep_or_dat; static char outfile[] = "chefOut.cgns"; - int F, B, Z, E, S, Fs, Fs2, A, Cx, Cy, Cz; + int F, B, Z, E, S, SCR, SVR, SBVR, SBVS, Fs, Fs2, A, Cx, Cy, Cz; cgsize_t sizes[3],*e, start, end; int num_nodes=m->count(0); if(0==1){ // ilwork debugging @@ -1209,8 +1210,53 @@ if(0==1){ cgp_error_exit(); // create data nodes for coordinates cg_set_file_type(CG_FILE_HDF5); - CGNS_Coordinates(F,B,Z,o); - CGNS_NodalSolution(F,B,Z,o); + CGNS_Coordinates(F,B,Z, o); +// Paraview will only viz the first sol node created so control that with writeCGNSFiles flag + +// notes on FaceCenter Fails +// int ec0=cg_sol_write(F, B, Z, "BoundaryCellRank2", CGNS_ENUMV(CellCenter), &S); +// ec0 returns 0 GOOD and ec2 below is also 0 so CellCenter works +// int ec1=cg_sol_write(F, B, Z, "BoundaryCellRank", CGNS_ENUMV(FaceCenter), &S); +// ec1 returns 1 ERROR causing ec2 to also fail since S is junk +// int ec2= cgp_field_write(F, B, Z, S, CGNS_ENUMV(Integer), "BoundaryCellRank", &Fs); + + if (cg_sol_write(F, B, Z, "CellRank", CGNS_ENUMV(CellCenter), &SCR)) + cgp_error_exit(); + if(o.writeCGNSFiles == 2) { // Solution + CGNS_NodalSolution(F,B,Z,o); + if (cg_sol_write(F, B, Z, "VertexRank", CGNS_ENUMV(Vertex), &SVR) ) + cgp_error_exit(); + if (cg_sol_write(F, B, Z, "BoundaryVertexSrfID", CGNS_ENUMV(Vertex), &SBVS) ) + cgp_error_exit(); + if (cg_sol_write(F, B, Z, "BoundaryVertexRank", CGNS_ENUMV(Vertex), &SBVR) ) + cgp_error_exit(); + }else if(o.writeCGNSFiles == 3) { // Vertex Rank + if (cg_sol_write(F, B, Z, "VertexRank", CGNS_ENUMV(Vertex), &SVR) ) + cgp_error_exit(); + CGNS_NodalSolution(F,B,Z,o); + if (cg_sol_write(F, B, Z, "BoundaryVertexSrfID", CGNS_ENUMV(Vertex), &SBVS) ) + cgp_error_exit(); + if (cg_sol_write(F, B, Z, "BoundaryVertexRank", CGNS_ENUMV(Vertex), &SBVR) ) + cgp_error_exit(); + }else if(o.writeCGNSFiles == 4) { // Boundary Vertex Rank + if (cg_sol_write(F, B, Z, "BoundaryVertexRank", CGNS_ENUMV(Vertex), &SBVR) ) + cgp_error_exit(); + if (cg_sol_write(F, B, Z, "VertexRank", CGNS_ENUMV(Vertex), &SVR) ) + cgp_error_exit(); + CGNS_NodalSolution(F,B,Z,o); + if (cg_sol_write(F, B, Z, "BoundaryVertexSrfID", CGNS_ENUMV(Vertex), &SBVS) ) + cgp_error_exit(); + }else if(o.writeCGNSFiles == 5) { // Boundary Vertex SrfID + if (cg_sol_write(F, B, Z, "BoundaryVertexSrfID", CGNS_ENUMV(Vertex), &SBVS) ) + cgp_error_exit(); + if (cg_sol_write(F, B, Z, "BoundaryVertexRank", CGNS_ENUMV(Vertex), &SBVR) ) + cgp_error_exit(); + if (cg_sol_write(F, B, Z, "VertexRank", CGNS_ENUMV(Vertex), &SVR) ) + cgp_error_exit(); + CGNS_NodalSolution(F,B,Z,o); + } + CGNS_VertexRank(F,B,Z,SVR, o); +// CGNS_NodalSolution(F,B,Z,o); // create Helper array for number of elements on rank if ( cg_goto(F, B, "Zone_t", 1, NULL) || cg_user_data_write("User Data") || @@ -1224,35 +1270,32 @@ if(0==1) printf("Coor %d, %d, %d, \n", nCoordVec,part,Fs2); if ( cgp_array_write_data(Fs2, &partP1, &partP1, &nCoordVec)) cgp_error_exit(); cgsize_t e_written=0; - if(o.writeCGNSFiles > 2) - writeBlocksCGNSinteror(F,B,Z,o,&e_written); - if(o.writeCGNSFiles > 2) { - cgsize_t totBel; - int nblkb = o.blocks.boundary.getSize(); - double** srfIDCen1 = new double*[nblkb]; // might not all be used - double** srfIDCen2 = new double*[nblkb]; - int totOnRankBel=0; - for (int i = 0; i < nblkb; ++i) - totOnRankBel += o.blocks.boundary.nElements[i]; - int* srfID = (int *)malloc( totOnRankBel * sizeof(int)); - int* srfID1OnBlk = (int *)malloc( nblkb * sizeof(int)); - int* srfID2OnBlk = (int *)malloc( nblkb * sizeof(int)); - int* startBelBlk = (int *)malloc( nblkb * sizeof(int)); - int* endBelBlk = (int *)malloc( nblkb * sizeof(int)); - int* srfIDidx = (int *)malloc( totOnRankBel * sizeof(int)); - int nStackedOnRank=0; - writeBlocksCGNSboundary(F,B,Z,o, srfID, srfIDidx, srfIDCen1, srfIDCen2, srfID1OnBlk, srfID2OnBlk, startBelBlk, endBelBlk, &e_written, &totBel, &nStackedOnRank, nblkb); - writeCGNSboundary (F,B,Z,o, srfID, srfIDidx, srfIDCen1, srfIDCen2, srfID1OnBlk, srfID2OnBlk, startBelBlk, endBelBlk, &e_written, totOnRankBel, &totBel, nStackedOnRank); - free(srfID); free(srfIDidx); - free(srfID1OnBlk); free(srfID2OnBlk); - free(startBelBlk); free(endBelBlk); - for (int i = 0; i < nStackedOnRank; ++i) delete [] srfIDCen1[i]; - for (int i = 0; i < nStackedOnRank; ++i) delete [] srfIDCen2[i]; - delete [] srfIDCen1; delete [] srfIDCen2; - if(cgp_close(F)) cgp_error_exit(); - double t1 = PCU_Time(); - if (!PCU_Comm_Self()) - lion_oprint(1,"CGNS file written in %f seconds\n", t1 - t0); - } + writeBlocksCGNSinterior(F,B,Z,SCR,o,&e_written); + cgsize_t totBel; + int nblkb = o.blocks.boundary.getSize(); + double** srfIDCen1 = new double*[nblkb]; // might not all be used + double** srfIDCen2 = new double*[nblkb]; + int totOnRankBel=0; + for (int i = 0; i < nblkb; ++i) + totOnRankBel += o.blocks.boundary.nElements[i]; + int* srfID = (int *)malloc( totOnRankBel * sizeof(int)); + int* srfID1OnBlk = (int *)malloc( nblkb * sizeof(int)); + int* srfID2OnBlk = (int *)malloc( nblkb * sizeof(int)); + int* startBelBlk = (int *)malloc( nblkb * sizeof(int)); + int* endBelBlk = (int *)malloc( nblkb * sizeof(int)); + int* srfIDidx = (int *)malloc( totOnRankBel * sizeof(int)); + int nStackedOnRank=0; + writeBlocksCGNSboundary(F,B,Z, SBVR, SBVS, o, srfID, srfIDidx, srfIDCen1, srfIDCen2, srfID1OnBlk, srfID2OnBlk, startBelBlk, endBelBlk, &e_written, &totBel, &nStackedOnRank, nblkb); + writeCGNSboundary (F,B,Z,o, srfID, srfIDidx, srfIDCen1, srfIDCen2, srfID1OnBlk, srfID2OnBlk, startBelBlk, endBelBlk, &e_written, totOnRankBel, &totBel, nStackedOnRank); + free(srfID); free(srfIDidx); + free(srfID1OnBlk); free(srfID2OnBlk); + free(startBelBlk); free(endBelBlk); + for (int i = 0; i < nStackedOnRank; ++i) delete [] srfIDCen1[i]; + for (int i = 0; i < nStackedOnRank; ++i) delete [] srfIDCen2[i]; + delete [] srfIDCen1; delete [] srfIDCen2; + if(cgp_close(F)) cgp_error_exit(); + double t1 = PCU_Time(); + if (!PCU_Comm_Self()) + lion_oprint(1,"CGNS file written in %f seconds\n", t1 - t0); } } // namespace From d24287582e5797db22cccabf52de3277199e7004 Mon Sep 17 00:00:00 2001 From: "Kenneth E. Jansen" Date: Sun, 3 Sep 2023 12:57:40 -0600 Subject: [PATCH 67/68] cleaned dead/commented code, valgrind check again, created more helpder functions to keep long functions under 105 lines. --- phasta/phCGNSgbc.cc | 195 +++++++++++++++++++------------------------- 1 file changed, 86 insertions(+), 109 deletions(-) diff --git a/phasta/phCGNSgbc.cc b/phasta/phCGNSgbc.cc index f66a219ef..9e84bc6bd 100644 --- a/phasta/phCGNSgbc.cc +++ b/phasta/phCGNSgbc.cc @@ -157,7 +157,6 @@ void commuInt(Output& o, cgsize_t* global) } int itag, iacc, iother, isgbeg; - MPI_Datatype sevsegtype[numtask]; //first do what ctypes does for setup int* isbegin; int* lenseg; @@ -165,12 +164,15 @@ void commuInt(Output& o, cgsize_t* global) isbegin = (int*) malloc(sizeof(int) * maxseg); lenseg = (int*) malloc(sizeof(int) * maxseg); ioffset = (int*) malloc(sizeof(int) * maxseg); -// no VLA MPI_Request req[numtask]; +// no VLA but could not figure out how to malloc so maxtask FIXME/HELP MPI_Request req[numtask]; // no VLA MPI_Status stat[numtask]; +// no VLA MPI_Datatype sevsegtype[numtask]; int maxtask=1000; assert(maxtask>=numtask); MPI_Request req[maxtask]; MPI_Status stat[maxtask]; + MPI_Datatype sevsegtype[maxtask]; +// FIXME/HELP int maxfront=0; int lfront; itkbeg=0; @@ -256,7 +258,7 @@ void gen_ncorp(Output& o ) printf("\n"); #endif local_start_id=0; - for(i=0;i=start && en<=end) + dv[en-start]= part; + } + } + } + if (cgp_field_write_data(F, B, Z, SBVR, FsR, &start, &end, dv)) + cgp_error_exit(); + // more tricky to put srfID on nodes to see in PV (approximately) through vertex field + if ( cgp_field_write(F, B, Z, SBVS, CGNS_ENUMV(Integer), "BoundaryVertexSrfID", &FsS)) + cgp_error_exit(); + // create the field data for this process + for (int inode = 0; inode < o.iownnodes; ++inode) dv[inode]= -1; + if(invC!=0) { + for (int ibel = 0; ibel < e_owned; ++ibel){ + for (int ilv=0; ilv < nvC; ilv++) { + en=e[ibel*nvC+ilv]; + if(en>=start && en<=end) + dv[en-start]= srfID[ibel]; + } + } + } + if (cgp_field_write_data(F, B, Z, SBVS, FsS, &start, &end, dv)) + cgp_error_exit(); +} void writeBlocksCGNSboundary(int F,int B,int Z, int SBVR, int SBVS, Output& o, int* srfID, int* srfIDidx, double** srfIDCen1, double** srfIDCen2, int* srfID1OnBlk, int* srfID2OnBlk, int* startBelBlk, int* endBelBlk, cgsize_t *e_written, cgsize_t *totBel, int *nStackedOnRank, int nblkb) { int E,Fsb,Fsb2, nvC,nvert,nvAll,invC; @@ -661,69 +703,9 @@ if(0==1){ for (int ne=0; ne=start && en<=end) { - dv[en-start]= part; - } - } - } - } - if (cgp_field_write_data(F, B, Z, SBVR, FsR, &start, &end, dv)) - cgp_error_exit(); -// more tricky to put srfID on nodes to see in PV (approximately) through vertex field - if ( cgp_field_write(F, B, Z, SBVS, CGNS_ENUMV(Integer), "BoundaryVertexSrfID", &FsS)) - cgp_error_exit(); - // create the field data for this process - for (int inode = 0; inode < o.iownnodes; ++inode) dv[inode]= -1; - if(invC!=0) { - for (int ibel = 0; ibel < e_owned; ++ibel){ - for (int ilv=0; ilv < nvC; ilv++) { - en=e[ibel*nvC+ilv]; - if(en>=start && en<=end) { - dv[en-start]= srfID[ibel]; -// printf("%d %d %d %d %d %d %d\n ", part,ibel, ilv, en, en-start, dv[en-start], srfID[ibel]); - } - } - } - } - if (cgp_field_write_data(F, B, Z, SBVS, FsS, &start, &end, dv)) - cgp_error_exit(); - free(dv); + writeBoundaryVertexToSol(F,B,Z, SBVR, SBVS, o, srfID, part,invC, e_owned, nvC, e); if(invC!=0) { free(e); -//moved above getNaturalBCCodesCGNS(o, iblkC[, &srfID[e_belWritten]); int icnt1=0; int icnt2=0; for (int ne=0; ne Date: Thu, 6 Jun 2024 13:18:29 -0600 Subject: [PATCH 68/68] _rebase was a trainwreck getting things from there back onto this branch --- phasta/phCGNSgbc.cc | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/phasta/phCGNSgbc.cc b/phasta/phCGNSgbc.cc index 9e84bc6bd..e86eaf227 100644 --- a/phasta/phCGNSgbc.cc +++ b/phasta/phCGNSgbc.cc @@ -164,15 +164,12 @@ void commuInt(Output& o, cgsize_t* global) isbegin = (int*) malloc(sizeof(int) * maxseg); lenseg = (int*) malloc(sizeof(int) * maxseg); ioffset = (int*) malloc(sizeof(int) * maxseg); -// no VLA but could not figure out how to malloc so maxtask FIXME/HELP MPI_Request req[numtask]; -// no VLA MPI_Status stat[numtask]; -// no VLA MPI_Datatype sevsegtype[numtask]; - int maxtask=1000; - assert(maxtask>=numtask); - MPI_Request req[maxtask]; - MPI_Status stat[maxtask]; - MPI_Datatype sevsegtype[maxtask]; -// FIXME/HELP + MPI_Request* req; + req = (MPI_Request*) malloc(sizeof(MPI_Request) * numtask); + MPI_Status* stat; + stat = (MPI_Status*) malloc(sizeof(MPI_Status) * numtask); + MPI_Datatype* sevsegtype; + sevsegtype = (MPI_Datatype*) malloc(sizeof(MPI_Datatype) * numtask); int maxfront=0; int lfront; itkbeg=0; @@ -1223,8 +1220,14 @@ if(0==1){ sizes[2]=0; if(cgp_mpi_comm(MPI_COMM_WORLD)) cgp_error_exit; if ( cgp_open(outfile, CG_MODE_WRITE, &F) || - cg_base_write(F, "Base", 3, 3, &B) || - cg_zone_write(F, B, "Zone", sizes, CGNS_ENUMV(Unstructured), &Z)) + cg_base_write(F, "Base", 3, 3, &B) ) + cgp_error_exit(); + if ( cg_goto(F,B,"end")) + cgp_error_exit(); + if ( cg_dataclass_write(CGNS_ENUMV(Dimensional))) + cgp_error_exit(); + cg_units_write(CGNS_ENUMV(Kilogram),CGNS_ENUMV(Meter),CGNS_ENUMV(Second),CGNS_ENUMV(Kelvin),CGNS_ENUMV(Degree)); + if ( cg_zone_write(F, B, "Zone", sizes, CGNS_ENUMV(Unstructured), &Z)) cgp_error_exit(); // create data nodes for coordinates cg_set_file_type(CG_FILE_HDF5);