From 86016edf2d6b98c23adc0c960820f07e2f56a707 Mon Sep 17 00:00:00 2001
From: "Kenneth E. Jansen" <Kenneth.Jansen@colorado.edu>
Date: Mon, 31 Jul 2023 15:58:49 -0600
Subject: [PATCH 01/68] starting with a merge of work done to bring CGNS to
 compile with C++11

---
 CMakeLists.txt     | 26 ++++++++++++++---------
 apf/apfCGNS.cc     | 52 +++++++++++++++++++++++++++++-----------------
 mds/mdsCGNS.cc     | 14 ++++++-------
 test/cgns.cc       |  2 +-
 test/testing.cmake |  2 +-
 5 files changed, 58 insertions(+), 38 deletions(-)
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 7648e54d3..395bc43d8 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -16,9 +16,7 @@ option(USE_XSDK_DEFAULTS "enable the XDSK v0.3.0 default configuration" NO)
 #requre c++11 without extensions
 set(CMAKE_CXX_STANDARD_REQUIRED ON)
 set(CMAKE_CXX_EXTENSION OFF)
-if(NOT ENABLE_CGNS)
-  set(CMAKE_CXX_STANDARD 11)
-endif()
+set(CMAKE_CXX_STANDARD 11)
 
 xsdk_begin_package()
 bob_begin_package()
@@ -27,8 +25,7 @@ if(USE_XSDK_DEFAULTS)
   xsdk_compiler_flags()
 endif()
 
-# require c++14
-option(ENABLE_CGNS "Enable the CGNS reader: requires c++14 extensions" OFF)
+option(ENABLE_CGNS "Enable the CGNS reader" OFF)
 message(STATUS "ENABLE_CGNS: ${ENABLE_CGNS}")
 
 # Set some default compiler flags that should always be used
@@ -37,10 +34,7 @@ if(NOT USE_XSDK_DEFAULTS)
   bob_begin_cxx_flags()
   bob_end_cxx_flags()
   set(CMAKE_C_FLAGS "${CMAKE_CXX_FLAGS}")
-  if(ENABLE_CGNS) #takes precedence over SCOREC_ENABLE_CXX11
-    message(STATUS "enabling cxx14")
-    bob_cxx14_flags()
-  elseif(SCOREC_ENABLE_CXX11)
+  if(SCOREC_ENABLE_CXX11)
     bob_cxx11_flags()
   endif()
 endif()
@@ -60,6 +54,8 @@ message(STATUS "IS_TESTING: ${IS_TESTING}")
 
 set(MESHES "${CMAKE_SOURCE_DIR}/pumi-meshes" CACHE STRING "Directory of test meshes")
 message(STATUS "MESHES: ${MESHES}")
+get_filename_component(MESHES ${MESHES} ABSOLUTE)
+message(STATUS "Using absolute file path MESHES: ${MESHES}")
 
 option(BUILD_EXES "Build executables" ON)
 message(STATUS "BUILD_EXES: ${BUILD_EXES}")
@@ -142,6 +138,14 @@ if(ENABLE_CGNS)
   set(SCOREC_USE_HDF5_DEFAULT ${ENABLE_CGNS})
   bob_public_dep(HDF5)
   add_definitions(-DHAVE_CGNS)
+else()
+  set(SCOREC_USE_CGNS_DEFAULT ${ENABLE_CGNS})
+  bob_public_dep(CGNS)
+  #CGNS does not provide cmake targets :(
+  include_directories(SYSTEM ${CGNS_INCLUDE_DIR})
+  set(SCOREC_USE_HDF5_DEFAULT ${ENABLE_CGNS})
+  bob_public_dep(HDF5)
+  add_definitions(-DHAVE_CGNS)
 endif()
 
 # Include the SCOREC project packages
@@ -172,8 +176,10 @@ add_library(core INTERFACE)
 target_link_libraries(core INTERFACE ${SCOREC_EXPORTED_TARGETS})
 if(ENABLE_CGNS)
   target_link_libraries(core INTERFACE ${CMAKE_DL_LIBS}) #HDF5 uses dlopen
-  target_compile_features(core INTERFACE cxx_std_14)
+  # target_compile_features(core INTERFACE cxx_std_14)
+  target_compile_features(core INTERFACE cxx_std_11)
 else()
+  target_link_libraries(core INTERFACE ${CMAKE_DL_LIBS}) #HDF5 uses dlopen
   target_compile_features(core INTERFACE cxx_std_11)
 endif()
 scorec_export_library(core)
diff --git a/apf/apfCGNS.cc b/apf/apfCGNS.cc
index 4debbb58b..d76757be0 100644
--- a/apf/apfCGNS.cc
+++ b/apf/apfCGNS.cc
@@ -11,6 +11,7 @@
 #include "apfNumberingClass.h"
 #include "apfShape.h"
 #include "apfFieldData.h"
+#include <functional>
 #include <pcu_util.h>
 #include <lionPrint.h>
 //
@@ -269,9 +270,16 @@ void WriteTags(const CGNS &cgns, const std::vector<std::vector<apf::MeshEntity *
   }
 }
 */
-void WriteFields(const CGNS &cgns, const std::vector<std::vector<apf::MeshEntity *>> &orderedEnts, const std::vector<std::pair<cgsize_t, cgsize_t>> &ranges, const std::vector<apf::MeshEntity *> &orderedVertices, const int &vStart, const int &vEnd, apf::Mesh *m)
+
+typedef std::vector<apf::MeshEntity *> VecMeshEntity_t;
+typedef std::pair<cgsize_t, cgsize_t> CGRange_t;
+
+void WriteFields(const CGNS &cgns, const std::vector<VecMeshEntity_t> &orderedEnts, const std::vector<CGRange_t> &ranges, const VecMeshEntity_t &orderedVertices, const int &vStart, const int &vEnd, apf::Mesh *m)
 {
-  const auto writeField = [&m, &cgns](apf::Field *f, const auto &orderedEnts, const int &solIndex, const auto &inner, const auto &post, const int &numComponents, const int &component, const std::string &fieldName, const int &start, const int &end, int &fieldIndex) {
+  typedef std::function<void (apf::MeshEntity *elem, apf::FieldDataOf<double> *fieldData, std::vector<double> &ddata, const int &numComponents, const int &component)>  innerLambda_t;
+  typedef std::function<void (const int &solIndex, std::vector<double> &ddata, const cgsize_t *rmin, const cgsize_t *rmax, const int &globalSize, const int &fieldIndex)> postLambda_t;
+
+  const auto writeField = [&m, &cgns](apf::Field *f, const VecMeshEntity_t  &orderedEnt, const int &solIndex, const innerLambda_t &inner, const postLambda_t &post, const int &numComponents, const int &component, const std::string &fieldName, const int &start, const int &end, int &fieldIndex) {
     std::vector<double> data;
 
     cgsize_t rmin[3];
@@ -281,7 +289,7 @@ void WriteFields(const CGNS &cgns, const std::vector<std::vector<apf::MeshEntity
     rmax[0] = end;
 
     apf::FieldDataOf<double> *fieldData = f->getData();
-    for (const auto &e : orderedEnts)
+    for (const auto &e : orderedEnt)
     {
       if (fieldData->hasEntity(e) && m->isOwned(e))
       {
@@ -310,7 +318,7 @@ void WriteFields(const CGNS &cgns, const std::vector<std::vector<apf::MeshEntity
     }
   };
 
-  const auto loopCellFields = [&m, &writeField](const auto &orderedEnts, const int &solIndex, const auto &inner, const auto &post, const auto &ranges) {
+  const auto loopCellFields = [&m, &writeField](const std::vector<VecMeshEntity_t> &orderedEnts, const int &solIndex, const innerLambda_t &inner, const postLambda_t &post, const std::vector<CGRange_t> &ranges) {
     for (int i = 0; i < m->countFields(); ++i)
     {
       apf::Field *f = m->getField(i);
@@ -335,7 +343,7 @@ void WriteFields(const CGNS &cgns, const std::vector<std::vector<apf::MeshEntity
     }
   };
 
-  const auto loopVertexFields = [&m, &writeField](const auto &orderedEnts, const int &solIndex, const auto &inner, const auto &post, const int &vStart, const int &vEnd) {
+  const auto loopVertexFields = [&m, &writeField](const VecMeshEntity_t &orderedVertices, const int &solIndex, const innerLambda_t &inner, const postLambda_t &post, const int &vStart, const int &vEnd) {
     for (int i = 0; i < m->countFields(); ++i)
     {
       apf::Field *f = m->getField(i);
@@ -352,12 +360,12 @@ void WriteFields(const CGNS &cgns, const std::vector<std::vector<apf::MeshEntity
           fieldNameNew += "_[" + std::to_string(component) + "]";
 
         //std::cout << "VERTEX " << fieldNameNew << " " << fieldName << " " << component << " " << numComponents << std::endl;
-        writeField(f, orderedEnts, solIndex, inner, post, numComponents, component, fieldNameNew, vStart, vEnd, fieldIndex);
+        writeField(f, orderedVertices, solIndex, inner, post, numComponents, component, fieldNameNew, vStart, vEnd, fieldIndex);
       }
     }
   };
 
-  const auto postLambda = [&cgns](const int &solIndex, std::vector<double> &ddata, const cgsize_t *rmin, const cgsize_t *rmax, const int &globalSize, const int &fieldIndex) {
+  const postLambda_t postLambda = [&cgns](const int &solIndex, std::vector<double> &ddata, const cgsize_t *rmin, const cgsize_t *rmax, const int &globalSize, const int &fieldIndex) {
     if (globalSize > 0)
     {
       if (cgp_field_write_data(cgns.index, cgns.base, cgns.zone, solIndex, fieldIndex, &rmin[0], &rmax[0],
@@ -366,7 +374,7 @@ void WriteFields(const CGNS &cgns, const std::vector<std::vector<apf::MeshEntity
     }
   };
 
-  const auto innerLambda = [](apf::MeshEntity *elem, apf::FieldDataOf<double> *fieldData, std::vector<double> &ddata, const int &numComponents, const int &component) {
+  const innerLambda_t innerLambda = [](apf::MeshEntity *elem, apf::FieldDataOf<double> *fieldData, std::vector<double> &ddata, const int &numComponents, const int &component) {
     std::vector<double> vals(numComponents, -12345);
     fieldData->get(elem, vals.data());
     //std::cout << numComponents << " " << component << " " << vals[0] << std::endl;
@@ -390,7 +398,7 @@ void WriteFields(const CGNS &cgns, const std::vector<std::vector<apf::MeshEntity
   }
 }
 
-auto WriteVertices(const CGNS &cgns, apf::Mesh *m, apf::GlobalNumbering *gvn)
+std::tuple<VecMeshEntity_t, int, int> WriteVertices(const CGNS &cgns, apf::Mesh *m, apf::GlobalNumbering *gvn)
 {
   int Cx = -1;
   int Cy = -1;
@@ -412,7 +420,7 @@ auto WriteVertices(const CGNS &cgns, apf::Mesh *m, apf::GlobalNumbering *gvn)
       cgp_error_exit();
   }
 
-  std::vector<apf::MeshEntity *> orderedVertices;
+  VecMeshEntity_t orderedVertices;
   cgsize_t vertexMin[3];
   cgsize_t vertexMax[3];
   cgsize_t contigRange = -1;
@@ -574,7 +582,10 @@ CellElementReturn WriteElements(const CGNS &cgns, apf::Mesh *m, apf::GlobalNumbe
 
 void AddBocosToMainBase(const CGNS &cgns, const CellElementReturn &cellResults, const int &cellCount, apf::Mesh *m, const apf::CGNSBCMap &cgnsBCMap, const std::map<apf::Mesh::Type, CGNS_ENUMT(ElementType_t)> &apf2cgns, apf::GlobalNumbering *gvn)
 {
-  const auto EdgeLoop = [&m](const auto &lambda, apf::MeshTag *edgeTag) {
+  typedef std::function<void(apf::MeshEntity*)> LambdaMeshEntity_t;
+  typedef std::vector<apf::CGNSInfo> VecCGNSInfo_t;
+
+  const auto EdgeLoop = [&m](const LambdaMeshEntity_t &lambda, apf::MeshTag *edgeTag) {
     apf::MeshIterator *edgeIter = m->begin(1);
     apf::MeshEntity *edge = nullptr;
     int vals[1];
@@ -591,7 +602,7 @@ void AddBocosToMainBase(const CGNS &cgns, const CellElementReturn &cellResults,
     m->end(edgeIter);
   };
 
-  const auto FaceLoop = [&m](const auto &lambda, apf::MeshTag *faceTag) {
+  const auto FaceLoop = [&m](const LambdaMeshEntity_t &lambda, apf::MeshTag *faceTag) {
     apf::MeshIterator *faceIter = m->begin(2);
     apf::MeshEntity *face = nullptr;
     int vals[1];
@@ -608,7 +619,8 @@ void AddBocosToMainBase(const CGNS &cgns, const CellElementReturn &cellResults,
     m->end(faceIter);
   };
 
-  const auto BCEntityAdder = [&apf2cgns, &m, &cgns, &gvn](const auto &Looper, const auto &bcGroup, int &startingLocation) {
+  
+  const auto BCEntityAdder = [&apf2cgns, &m, &cgns, &gvn](const std::function<void(LambdaMeshEntity_t, apf::MeshTag*)> &Looper, const apf::CGNSInfo &bcGroup, int &startingLocation) {
     std::map<apf::Mesh::Type, std::vector<apf::MeshEntity *>> bcEntTypes;
     for (const auto &r : apf2cgns)
       bcEntTypes.insert(std::make_pair(r.first, std::vector<apf::MeshEntity *>()));
@@ -715,7 +727,9 @@ void AddBocosToMainBase(const CGNS &cgns, const CellElementReturn &cellResults,
                    PCU_Get_Comm());
   };
 
-  const auto doVertexBC = [&](const auto &iter) {
+  typedef std::map<std::basic_string<char>, std::vector<apf::CGNSInfo>>::const_iterator  MapCGNSInfo_t;
+
+  const auto doVertexBC = [&](const MapCGNSInfo_t &iter) {
     for (const auto &p : iter->second)
     {
       std::vector<cgsize_t> bcList;
@@ -751,7 +765,7 @@ void AddBocosToMainBase(const CGNS &cgns, const CellElementReturn &cellResults,
     }
   };
 
-  const auto doEdgeBC = [&](const auto &iter, int &startingLocation) {
+  const auto doEdgeBC = [&](const MapCGNSInfo_t &iter, int &startingLocation) {
     for (const auto &p : iter->second)
     {
       const auto se = BCEntityAdder(EdgeLoop, p, startingLocation);
@@ -774,7 +788,7 @@ void AddBocosToMainBase(const CGNS &cgns, const CellElementReturn &cellResults,
     }
   };
 
-  const auto doFaceBC = [&](const auto &iter, int &startingLocation) {
+  const auto doFaceBC = [&](const MapCGNSInfo_t &iter, int &startingLocation) {
     for (const auto &p : iter->second)
     {
       const auto se = BCEntityAdder(FaceLoop, p, startingLocation);
@@ -797,7 +811,7 @@ void AddBocosToMainBase(const CGNS &cgns, const CellElementReturn &cellResults,
     }
   };
 
-  const auto doCellBC = [&](const auto &iter, const int &) {
+  const auto doCellBC = [&](const MapCGNSInfo_t &iter, const int &) {
     for (const auto &p : iter->second)
     {
       std::vector<cgsize_t> bcList;
@@ -1051,11 +1065,11 @@ void WriteCGNS(const char *prefix, apf::Mesh *m, const apf::CGNSBCMap &cgnsBCMap
   auto communicator = PCU_Get_Comm();
   cgp_mpi_comm(communicator);
   //
-  cgp_pio_mode(CGNS_ENUMV(CGP_INDEPENDENT));
+  cgp_pio_mode(CGP_INDEPENDENT);
 
   CGNS cgns;
   cgns.fname = std::string(prefix);
-  if (cgp_open(prefix, CGNS_ENUMV(CG_MODE_WRITE), &cgns.index))
+  if (cgp_open(prefix, CG_MODE_WRITE, &cgns.index))
     cgp_error_exit();
 
   {
diff --git a/mds/mdsCGNS.cc b/mds/mdsCGNS.cc
index acdef4aae..0d24e67ec 100644
--- a/mds/mdsCGNS.cc
+++ b/mds/mdsCGNS.cc
@@ -177,19 +177,19 @@ struct MeshDataGroup
     if (components.size() == 1)
     {
       std::cout << "Scalar Group has " << components.size() << " related componenets: " << std::endl;
-      for (const auto m : components)
+      for (const auto &m : components)
         std::cout << "Field " << m.second.name << " @ " << m.second.si << " " << m.second.fi << std::endl;
     }
     else if (components.size() == 3)
     {
       std::cout << "Vector Group has " << components.size() << " related componenets: " << std::endl;
-      for (const auto m : components)
+      for (const auto &m : components)
         std::cout << "Field " << m.second.name << " @ " << m.second.si << " " << m.second.fi << std::endl;
     }
     else if (components.size() == 9)
     {
       std::cout << "Matrix Group has " << components.size() << " related componenets: " << std::endl;
-      for (const auto m : components)
+      for (const auto &m : components)
         std::cout << "Field " << m.second.name << " @ " << m.second.si << " " << m.second.fi << std::endl;
     }
     else
@@ -265,7 +265,7 @@ void Kill(const int fid)
   }
 }
 
-auto ReadCGNSCoords(int cgid, int base, int zone, int ncoords, int nverts, const std::vector<cgsize_t> &, const apf::GlobalToVert &globalToVert)
+std::map<int, std::array<double, 3>> ReadCGNSCoords(int cgid, int base, int zone, int ncoords, int nverts, const std::vector<cgsize_t> &, const apf::GlobalToVert &globalToVert)
 {
   // Read min required as defined by consecutive range
   // make one based as ReadElements makes zero based
@@ -389,7 +389,7 @@ void SimpleElementPartition(std::vector<cgsize_t> &numberToReadPerProc, std::vec
 using Pair = std::pair<cgsize_t, cgsize_t>;
 using LocalElementRanges = std::vector<Pair>; // one based
 
-auto ReadElements(int cgid, int base, int zone, int section, int el_start /* one based */, int el_end, int numElements, int verticesPerElement, LocalElementRanges &localElementRanges)
+std::tuple<std::vector<cgsize_t>, cgsize_t> ReadElements(int cgid, int base, int zone, int section, int el_start /* one based */, int el_end, int numElements, int verticesPerElement, LocalElementRanges &localElementRanges)
 {
   std::vector<cgsize_t> numberToReadPerProc;
   std::vector<cgsize_t> startingIndex;
@@ -1056,8 +1056,8 @@ apf::Mesh2 *DoIt(gmi_model *g, const std::string &fname, apf::CGNSBCMap &cgnsBCM
   int cgid = -1;
   auto comm = PCU_Get_Comm();
   cgp_mpi_comm(comm);
-  cgp_pio_mode(CGNS_ENUMV(CGP_INDEPENDENT));
-  cgp_open(fname.c_str(), CGNS_ENUMV(CG_MODE_READ), &cgid);
+  cgp_pio_mode(CGP_INDEPENDENT);
+  cgp_open(fname.c_str(), CG_MODE_READ, &cgid);
 
   int nbases = -1;
   cg_nbases(cgid, &nbases);
diff --git a/test/cgns.cc b/test/cgns.cc
index 6f853d2e0..a3c57d777 100644
--- a/test/cgns.cc
+++ b/test/cgns.cc
@@ -119,7 +119,7 @@ pMesh toPumi(const std::string &prefix, gmi_model *g, apf::Mesh2 *mesh)
   return pm;
 }
 
-auto additional(const std::string &prefix, gmi_model *g, apf::Mesh2 *mesh)
+std::function<void()> additional(const std::string &prefix, gmi_model *g, apf::Mesh2 *mesh)
 {
   // seems essential to make pm first before calling balance or reorder...
   auto pm = toPumi(prefix, g, mesh);
diff --git a/test/testing.cmake b/test/testing.cmake
index 069006a26..ed5c92888 100644
--- a/test/testing.cmake
+++ b/test/testing.cmake
@@ -560,7 +560,7 @@ mpi_test(cgns_3d_2 ${numProcs}
 #
 # 3D BCS tests
 #
-set(numProcs 5)
+set(numProcs 4)
 #
 set(CGNSDIR ${MESHES}/cgns/withBCS/3D)
 #

From 7aa1eb91273e247487b785eec913a42526f91ce6 Mon Sep 17 00:00:00 2001
From: "Kenneth E. Jansen" <Kenneth.Jansen@colorado.edu>
Date: Sat, 5 Aug 2023 07:56:13 -0600
Subject: [PATCH 02/68] baby step of copying phGeomBC.cc to phCGNSgbc.cc and
 makeing the mods to compute a flat connectivity array transposed to CGNS
 needs and the same transpose plus reduction from volume connectivity to
 surface connectivity for boundary elements. Compiles but not tested as we
 still need to modify the actual writing function in this file open and write
 a CGNS file.  Further, nothing done yet for parallel with regard to global
 numbering

---
 phasta/CMakeLists.txt |   1 +
 phasta/phCGNSgbc.cc   | 173 ++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 174 insertions(+)
 create mode 100644 phasta/phCGNSgbc.cc

diff --git a/phasta/CMakeLists.txt b/phasta/CMakeLists.txt
index 0a785e268..4e5ff54ab 100644
--- a/phasta/CMakeLists.txt
+++ b/phasta/CMakeLists.txt
@@ -6,6 +6,7 @@ set(SOURCES
   phOutput.cc
   phLinks.cc
   phGeomBC.cc
+  phCGNSgbc.cc
   phBlock.cc
   phAdapt.cc
   phRestart.cc
diff --git a/phasta/phCGNSgbc.cc b/phasta/phCGNSgbc.cc
new file mode 100644
index 000000000..d455096d2
--- /dev/null
+++ b/phasta/phCGNSgbc.cc
@@ -0,0 +1,173 @@
+#include <PCU.h>
+#include "phOutput.h"
+#include "phIO.h"
+#include "phiotimer.h"
+#include <sstream>
+#include <pcu_util.h>
+#include <lionPrint.h>
+#include <cstdlib>
+
+namespace ph {
+
+// renamed, retained but not yet updated
+static std::string buildCGNSgbcFileName(std::string timestep_or_dat)
+{
+  std::stringstream ss;
+  int rank = PCU_Comm_Self() + 1;
+  ss << "geombc." << timestep_or_dat << "." << rank;
+  return ss.str();
+}
+
+enum {
+  MAX_PARAMS = 12
+};
+
+// renamed, update is only a transpose to match CNGS.  Parallel will require mapping here or later to global numbering
+void getInteriorConnectivityCGNS(Output& o, int block, apf::DynamicArray<int>& c)
+{
+  int nelem = o.blocks.interior.nElements[block];
+  int nvert = o.blocks.interior.keys[block].nElementVertices;
+  c.setSize(nelem * nvert);
+  size_t i = 0;
+  for (int elem = 0; elem < nelem; ++elem)
+    for (int vert = 0; vert < nvert; ++vert)
+      c[i++] = o.arrays.ien[block][elem][vert] + 1; /* FORTRAN indexing */
+  PCU_ALWAYS_ASSERT(i == c.getSize());
+}
+
+//renamed, update is both a transpose to match CNGS and reduction to only filling the first number of vertices on the boundary whereas PHAST wanted full volume
+void getBoundaryConnectivityCGNS(Output& o, int block, apf::DynamicArray<int>& c)
+{
+  int nelem = o.blocks.boundary.nElements[block];
+// CGNS wants surface elements  int nvert = o.blocks.boundary.keys[block].nElementVertices;
+  int nvert = o.blocks.boundary.keys[block].nBoundaryFaceEdges;
+  c.setSize(nelem * nvert);
+  size_t i = 0;
+  for (int elem = 0; elem < nelem; ++elem)
+    for (int vert = 0; vert < nvert; ++vert)
+      c[i++] = o.arrays.ienb[block][elem][vert] + 1;
+  PCU_ALWAYS_ASSERT(i == c.getSize());
+}
+
+void getInterfaceConnectivityCGNS // not extended yet other than transpose
+(
+  Output& o,
+  int block,
+  apf::DynamicArray<int>& c
+)
+{
+  int nelem = o.blocks.interface.nElements[block];
+  int nvert0 = o.blocks.interface.keys[block].nElementVertices;
+  int nvert1 = o.blocks.interface.keys[block].nElementVertices1;
+  c.setSize(nelem * (nvert0 + nvert1));
+  size_t i = 0;
+  for (int elem = 0; elem < nelem; ++elem)
+    for (int vert = 0; vert < nvert0; ++vert)
+      c[i++] = o.arrays.ienif0[block][elem][vert] + 1;
+  for (int elem = 0; elem < nelem; ++elem)
+    for (int vert = 0; vert < nvert1; ++vert)
+      c[i++] = o.arrays.ienif1[block][elem][vert] + 1;
+  PCU_ALWAYS_ASSERT(i == c.getSize());
+}
+
+// renamed but not updated yet
+void getNaturalBCCodesCGNS(Output& o, int block, apf::DynamicArray<int>& codes)
+{
+  int nelem = o.blocks.boundary.nElements[block];
+  codes.setSize(nelem * 2); 
+  size_t i = 0;
+  for (int j = 0; j < 2; ++j)
+    for (int elem = 0; elem < nelem; ++elem)
+      codes[i++] = o.arrays.ibcb[block][elem][j];
+  PCU_ALWAYS_ASSERT(i == codes.getSize());
+}
+
+// renamed and calling the renamed functions above with output writes commented as they are PHASTA file style
+void writeBlocksCGNS(FILE* f, Output& o)
+{
+  apf::DynamicArray<int> c;
+  int params[MAX_PARAMS];
+  for (int i = 0; i < o.blocks.interior.getSize(); ++i) {
+    BlockKey& k = o.blocks.interior.keys[i];
+    std::string phrase = getBlockKeyPhrase(k, "connectivity interior ");
+    params[0] = o.blocks.interior.nElements[i];
+//    fillBlockKeyParams(params, k);
+    getInteriorConnectivityCGNS(o, i, c);
+//    ph_write_ints(f, phrase.c_str(), &c[0], c.getSize(), 7, params);
+  }
+  for (int i = 0; i < o.blocks.boundary.getSize(); ++i) {
+    BlockKey& k = o.blocks.boundary.keys[i];
+    std::string phrase = getBlockKeyPhrase(k, "connectivity boundary ");
+    params[0] = o.blocks.boundary.nElements[i];
+//    fillBlockKeyParams(params, k);
+    getBoundaryConnectivityCGNS(o, i, c);
+//    ph_write_ints(f, phrase.c_str(), &c[0], c.getSize(), 8, params);
+// this is probably the easiest path to getting the list that tells us the face (through surfID of smd) that each boundary element face is on
+    phrase = getBlockKeyPhrase(k, "nbc codes ");
+    apf::DynamicArray<int> codes;
+    getNaturalBCCodesCGNS(o, i, codes);
+//    ph_write_ints(f, phrase.c_str(), &codes[0], codes.getSize(), 8, params);
+  }
+
+}
+
+
+
+// retaining in case it is useful but only renamed at this point
+void writeCGNSgbc(Output& o, std::string path, int timestep)
+{
+  double t0 = PCU_Time();
+  apf::Mesh* m = o.mesh;
+  std::stringstream tss; 
+  std::string timestep_or_dat;
+  if (! timestep)
+    timestep_or_dat = "dat";
+  else {
+    tss << timestep;   
+    timestep_or_dat = tss.str();
+  }
+  path += buildCGNSgbcFileName(timestep_or_dat);
+  phastaio_setfile(GEOMBC_WRITE);
+  FILE* f = o.openfile_write(o, path.c_str());
+  if (!f) {
+    lion_eprint(1,"failed to open \"%s\"!\n", path.c_str());
+    abort();
+  }
+  ph_write_preamble(f);
+  int params[MAX_PARAMS];
+/* all of these strings are looked for by the other programs
+   reading this format, so don't fix spelling errors or
+   other silliness, it has already been set in stone */
+/*
+  writeInt(f, "number of nodes", m->count(0));
+  writeInt(f, "number of modes", o.nOverlapNodes);
+  writeInt(f, "number of shapefunctions soved on processor", 0);
+  writeInt(f, "number of global modes", 0);
+  writeInt(f, "number of interior elements", m->count(m->getDimension()));
+  writeInt(f, "number of boundary elements", o.nBoundaryElements);
+  writeInt(f, "maximum number of element nodes", o.nMaxElementNodes);
+  writeInt(f, "number of interior tpblocks", o.blocks.interior.getSize());
+  writeInt(f, "number of boundary tpblocks", o.blocks.boundary.getSize());
+  writeInt(f, "number of nodes with Dirichlet BCs", o.nEssentialBCNodes);
+
+  params[0] = m->count(0);
+  params[1] = 3;
+  ph_write_doubles(f, "co-ordinates", o.arrays.coordinates,
+      params[0] * params[1], 2, params);
+  writeInt(f, "number of processors", PCU_Comm_Peers());
+  writeInt(f, "size of ilwork array", o.nlwork);
+  params[0] = m->count(0);
+  writeInts(f, " mode number map from partition to global",
+      o.arrays.globalNodeNumbers, m->count(0));
+  writeBlocksCGNS(f, o);
+  writeInts(f, "bc mapping array", o.arrays.nbc, m->count(0));
+  writeInts(f, "bc codes array", o.arrays.ibc, o.nEssentialBCNodes);
+  apf::DynamicArray<double> bc;
+  PHASTAIO_CLOSETIME(fclose(f);)
+  double t1 = PCU_Time();
+  if (!PCU_Comm_Self())
+    lion_oprint(1,"geombc file written in %f seconds\n", t1 - t0);
+*/
+}
+
+}

From 07e45c4e4fb04f1799a78aacb2ee045be27f1184 Mon Sep 17 00:00:00 2001
From: "Kenneth E. Jansen" <Kenneth.Jansen@colorado.edu>
Date: Sat, 5 Aug 2023 15:27:08 -0600
Subject: [PATCH 03/68] compiles with code to generate
 PETSc-style-global-node-number =ncorp[on-rank-node-number]

---
 phasta/phCGNSgbc.cc | 173 +++++++++++++++++++++++++++++++++++++++++++-
 phasta/phOutput.h   |   2 +
 2 files changed, 174 insertions(+), 1 deletion(-)

diff --git a/phasta/phCGNSgbc.cc b/phasta/phCGNSgbc.cc
index d455096d2..56b420ebe 100644
--- a/phasta/phCGNSgbc.cc
+++ b/phasta/phCGNSgbc.cc
@@ -6,9 +6,178 @@
 #include <pcu_util.h>
 #include <lionPrint.h>
 #include <cstdlib>
+#include <string.h>
+#include <assert.h>
+typedef int lcorp_t;
+#define NCORP_MPI_T MPI_INTEGER
+typedef long long int gcorp_t;
 
 namespace ph {
 
+
+static lcorp_t count_owned(int* ilwork, int nlwork,gcorp_t* ncorp_tmp, int num_nodes);
+static lcorp_t count_local(int* ilwork, int nlwork,gcorp_t* ncorp_tmp, int num_nodes);
+
+
+void gen_ncorp(Output& o)
+{
+        apf::Mesh* m = o.mesh;
+	int part;
+	int num_parts;
+	int i;
+	lcorp_t nilwork = o.nlwork;
+        int num_nodes=m->count(0);
+	o.arrays.ncorp = new gcorp_t[num_nodes];
+	lcorp_t owned;
+	lcorp_t local;
+	lcorp_t* owner_counts;
+	gcorp_t  local_start_id;
+	gcorp_t  gid;
+
+	MPI_Comm_rank(MPI_COMM_WORLD, &part);
+	MPI_Comm_size(MPI_COMM_WORLD, &num_parts);
+
+	memset(o.arrays.ncorp, 0, sizeof(gcorp_t)*(num_nodes));
+	owned = count_owned(o.arrays.ilwork, nilwork, o.arrays.ncorp, num_nodes);
+	local = count_local(o.arrays.ilwork, nilwork, o.arrays.ncorp, num_nodes);
+	// conpar.iownnodes = owned+local;
+#ifdef PRINT_EVERYTHING
+	printf("%d: %d local only nodes\n", part, local);
+	printf("%d: %d owned nodes\n", part, owned);
+#endif
+	assert( owned <= num_nodes );
+	assert( owned+local <= num_nodes );
+
+	owner_counts = (lcorp_t*) malloc(sizeof(lcorp_t)*num_parts);
+	memset(owner_counts, 0, sizeof(lcorp_t)*num_parts);
+	owner_counts[part] = owned+local;
+#ifdef PRINT_EVERYTHING
+	for(i=0;i<num_parts;i++)
+	{
+		printf("%d,", owner_counts[i]);
+	}
+	printf("\n");
+#endif
+	MPI_Allgather(MPI_IN_PLACE, 1, NCORP_MPI_T, owner_counts,
+		       	1, NCORP_MPI_T, MPI_COMM_WORLD);
+#ifdef PRINT_EVERYTHING
+	for(i=0;i<num_parts;i++)
+	{
+		printf("%d,", owner_counts[i]);
+	}
+	printf("\n");
+#endif
+	local_start_id=0;
+	for(i=0;i<part;i++) //TODO: MPI_Exscan()?
+	{
+// global so needs long long
+		local_start_id += owner_counts[i];
+	}
+	local_start_id++; //Fortran numbering
+#ifdef PRINT_EVERYTHING
+	printf("%d: %d\n", part, local_start_id);
+#endif
+// global so needs long long
+	gid = local_start_id;
+        if(gid<0) printf("part,gid, %d %ld",part,gid);
+        assert(gid>=0);
+	for(i=0;i<num_nodes;i++) //assign owned node's numbers
+	{
+		//if shared, owned 1
+			//if shared, slave -1
+			//if local only, 0
+		if(o.arrays.ncorp[i] == 1)
+		{
+// global so needs long long
+			o.arrays.ncorp[i]=gid;
+                        assert(o.arrays.ncorp[i]>=0);
+
+// global so needs long long
+			gid++;
+			continue;
+		}
+		if(o.arrays.ncorp[i] == 0)
+		{
+			o.arrays.ncorp[i] = gid;
+                        assert(o.arrays.ncorp[i]>=0);
+			gid++;
+			continue;
+		}
+		if(o.arrays.ncorp[i] == -1)
+		{
+			o.arrays.ncorp[i] = 0; //commu() adds, so zero slaves
+		}
+
+	}
+	//char code[] = "out";
+	//int ione = 1;
+
+}
+
+static lcorp_t count_local(int* ilwork, int nlwork,gcorp_t* ncorp_tmp, int num_nodes)
+{
+	int i;
+	lcorp_t num_local = 0;
+	for(i=0;i<num_nodes;i++)
+	{
+		if(ncorp_tmp[i] == 0)
+			num_local++; //nodes away from part boundary
+		assert(!(ncorp_tmp[i] < -1 || ncorp_tmp[i] > 1));
+	}
+	return(num_local);
+}
+static lcorp_t count_owned(int* ilwork, int nlwork,gcorp_t* ncorp_tmp, int num_nodes)
+{
+	int numtask = ilwork[0];
+	int itkbeg = 0; //task offset
+	int owned = 0;
+	int i,j,k;
+	for(i=0;i<numtask;i++)
+	{
+		int itag = ilwork[itkbeg+1]; //mpi tag
+		int iacc = ilwork[itkbeg+2]; //0 for slave, 1 for master
+		assert(iacc >= 0 && iacc <= 1);
+		int iother = ilwork[itkbeg+3]-1; //other rank (see ctypes.f for off by one)
+		int numseg = ilwork[itkbeg+4]; //number of segments
+		for(j=0;j<numseg;j++)
+		{
+			int isgbeg = ilwork[itkbeg+5+(j*2)]; //first idx of seg
+			int lenseg = ilwork[itkbeg+6+(j*2)]; //length of seg
+			assert(iacc == 0 || iacc == 1);
+			if(iacc)
+			{
+				for(k=0;k<lenseg;k++)
+				{
+					if(ncorp_tmp[isgbeg-1+k] == 0)
+						owned++;
+					//make sure we're not both master and slave
+					assert(ncorp_tmp[isgbeg-1+k] != -1);
+					ncorp_tmp[isgbeg-1+k] = 1;
+					assert(isgbeg-1+k < num_nodes);
+				}
+				assert(owned <= num_nodes);
+			}
+			else
+			{
+				for(k=0;k<lenseg;k++)
+				{
+					ncorp_tmp[isgbeg-1+k] = -1;
+					assert(isgbeg-1+k < num_nodes);
+				}
+			}
+			//ncorp_tmp init'd to 0
+			//if shared, owned 1
+			//if shared, slave -1
+			//if local only, 0
+
+			assert(itkbeg+6+(j*2) < nlwork);
+		}
+		itkbeg+= 4+2*numseg;
+	}
+	return(owned);
+}
+
+
 // renamed, retained but not yet updated
 static std::string buildCGNSgbcFileName(std::string timestep_or_dat)
 {
@@ -120,6 +289,9 @@ void writeCGNSgbc(Output& o, std::string path, int timestep)
   apf::Mesh* m = o.mesh;
   std::stringstream tss; 
   std::string timestep_or_dat;
+// copied gen_ncorp from PHASTA to help map on-rank numbering to CGNS/PETSC friendly global numbering
+  gen_ncorp( o );
+  
   if (! timestep)
     timestep_or_dat = "dat";
   else {
@@ -169,5 +341,4 @@ void writeCGNSgbc(Output& o, std::string path, int timestep)
     lion_oprint(1,"geombc file written in %f seconds\n", t1 - t0);
 */
 }
-
 }
diff --git a/phasta/phOutput.h b/phasta/phOutput.h
index 6a93e9d96..3a08ad666 100644
--- a/phasta/phOutput.h
+++ b/phasta/phOutput.h
@@ -136,6 +136,8 @@ idx:   0  1 2  3   4   5  6   7   8   9  10   11  12  13  14  15   16   17   18
   int* ifather;
 /* an array of integers of size nfather that has nsons in each entry */
   int* nsonsArr;
+/* an array of integers of size nfather that has nsons in each entry */
+  long long int* ncorp;
 };
 
 

From b9e082283f5201d0cbb27f4fa396419ce1fd7e8b Mon Sep 17 00:00:00 2001
From: "Kenneth E. Jansen" <Kenneth.Jansen@colorado.edu>
Date: Sat, 5 Aug 2023 15:40:15 -0600
Subject: [PATCH 04/68] possibly correct (compiles a least) of connectivity
 data to global numbering

---
 phasta/phCGNSgbc.cc | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/phasta/phCGNSgbc.cc b/phasta/phCGNSgbc.cc
index 56b420ebe..2bc6b8e25 100644
--- a/phasta/phCGNSgbc.cc
+++ b/phasta/phCGNSgbc.cc
@@ -200,7 +200,7 @@ void getInteriorConnectivityCGNS(Output& o, int block, apf::DynamicArray<int>& c
   size_t i = 0;
   for (int elem = 0; elem < nelem; ++elem)
     for (int vert = 0; vert < nvert; ++vert)
-      c[i++] = o.arrays.ien[block][elem][vert] + 1; /* FORTRAN indexing */
+      c[i++] = o.arrays.ncorp[o.arrays.ien[block][elem][vert]]; // plus 1 built in + 1; /* FORTRAN indexing */
   PCU_ALWAYS_ASSERT(i == c.getSize());
 }
 
@@ -214,7 +214,7 @@ void getBoundaryConnectivityCGNS(Output& o, int block, apf::DynamicArray<int>& c
   size_t i = 0;
   for (int elem = 0; elem < nelem; ++elem)
     for (int vert = 0; vert < nvert; ++vert)
-      c[i++] = o.arrays.ienb[block][elem][vert] + 1;
+      c[i++] = o.arrays.ncorp[o.arrays.ienb[block][elem][vert]]; // plus 1 built in + 1;
   PCU_ALWAYS_ASSERT(i == c.getSize());
 }
 
@@ -232,10 +232,10 @@ void getInterfaceConnectivityCGNS // not extended yet other than transpose
   size_t i = 0;
   for (int elem = 0; elem < nelem; ++elem)
     for (int vert = 0; vert < nvert0; ++vert)
-      c[i++] = o.arrays.ienif0[block][elem][vert] + 1;
+      c[i++] = o.arrays.ncorp[o.arrays.ienif0[block][elem][vert]]; // plus 1 built in + 1;
   for (int elem = 0; elem < nelem; ++elem)
     for (int vert = 0; vert < nvert1; ++vert)
-      c[i++] = o.arrays.ienif1[block][elem][vert] + 1;
+      c[i++] = o.arrays.ncorp[o.arrays.ienif1[block][elem][vert]]; // plus 1 built in + 1;
   PCU_ALWAYS_ASSERT(i == c.getSize());
 }
 

From 81a707b8c64a339abe476c9a060f9928862a4f2a Mon Sep 17 00:00:00 2001
From: "Kenneth E. Jansen" <Kenneth.Jansen@colorado.edu>
Date: Sat, 5 Aug 2023 16:56:25 -0600
Subject: [PATCH 05/68] Cleaned up a few bugs, added useful numbers to the o
 data structure for doing the owned-node data condensation (coordinates and
 later solution), and wrote a potential coordinate condensation (though it
 might be better to change it to what the PETSc CGNS writer does...looked at
 that too late to copy in first pass

---
 phasta/phCGNSgbc.cc | 34 ++++++++++++++++++++++++++++------
 phasta/phOutput.h   |  5 ++++-
 2 files changed, 32 insertions(+), 7 deletions(-)

diff --git a/phasta/phCGNSgbc.cc b/phasta/phCGNSgbc.cc
index 2bc6b8e25..8ca313683 100644
--- a/phasta/phCGNSgbc.cc
+++ b/phasta/phCGNSgbc.cc
@@ -19,7 +19,7 @@ static lcorp_t count_owned(int* ilwork, int nlwork,gcorp_t* ncorp_tmp, int num_n
 static lcorp_t count_local(int* ilwork, int nlwork,gcorp_t* ncorp_tmp, int num_nodes);
 
 
-void gen_ncorp(Output& o)
+void gen_ncorp(Output& o )
 {
         apf::Mesh* m = o.mesh;
 	int part;
@@ -40,7 +40,7 @@ void gen_ncorp(Output& o)
 	memset(o.arrays.ncorp, 0, sizeof(gcorp_t)*(num_nodes));
 	owned = count_owned(o.arrays.ilwork, nilwork, o.arrays.ncorp, num_nodes);
 	local = count_local(o.arrays.ilwork, nilwork, o.arrays.ncorp, num_nodes);
-	// conpar.iownnodes = owned+local;
+	o.iownnodes = owned+local;
 #ifdef PRINT_EVERYTHING
 	printf("%d: %d local only nodes\n", part, local);
 	printf("%d: %d owned nodes\n", part, owned);
@@ -74,6 +74,7 @@ void gen_ncorp(Output& o)
 		local_start_id += owner_counts[i];
 	}
 	local_start_id++; //Fortran numbering
+        o.local_start_id = local_start_id;
 #ifdef PRINT_EVERYTHING
 	printf("%d: %d\n", part, local_start_id);
 #endif
@@ -200,7 +201,7 @@ void getInteriorConnectivityCGNS(Output& o, int block, apf::DynamicArray<int>& c
   size_t i = 0;
   for (int elem = 0; elem < nelem; ++elem)
     for (int vert = 0; vert < nvert; ++vert)
-      c[i++] = o.arrays.ncorp[o.arrays.ien[block][elem][vert]]; // plus 1 built in + 1; /* FORTRAN indexing */
+      c[i++] = o.arrays.ncorp[o.arrays.ien[block][elem][vert]-1]; // input is 0-based,  out is  1-based do drop the +1
   PCU_ALWAYS_ASSERT(i == c.getSize());
 }
 
@@ -214,7 +215,7 @@ void getBoundaryConnectivityCGNS(Output& o, int block, apf::DynamicArray<int>& c
   size_t i = 0;
   for (int elem = 0; elem < nelem; ++elem)
     for (int vert = 0; vert < nvert; ++vert)
-      c[i++] = o.arrays.ncorp[o.arrays.ienb[block][elem][vert]]; // plus 1 built in + 1;
+      c[i++] = o.arrays.ncorp[o.arrays.ienb[block][elem][vert]-1]; 
   PCU_ALWAYS_ASSERT(i == c.getSize());
 }
 
@@ -232,10 +233,10 @@ void getInterfaceConnectivityCGNS // not extended yet other than transpose
   size_t i = 0;
   for (int elem = 0; elem < nelem; ++elem)
     for (int vert = 0; vert < nvert0; ++vert)
-      c[i++] = o.arrays.ncorp[o.arrays.ienif0[block][elem][vert]]; // plus 1 built in + 1;
+      c[i++] = o.arrays.ncorp[o.arrays.ienif0[block][elem][vert]-1]; 
   for (int elem = 0; elem < nelem; ++elem)
     for (int vert = 0; vert < nvert1; ++vert)
-      c[i++] = o.arrays.ncorp[o.arrays.ienif1[block][elem][vert]]; // plus 1 built in + 1;
+      c[i++] = o.arrays.ncorp[o.arrays.ienif1[block][elem][vert]-1]; 
   PCU_ALWAYS_ASSERT(i == c.getSize());
 }
 
@@ -291,6 +292,26 @@ void writeCGNSgbc(Output& o, std::string path, int timestep)
   std::string timestep_or_dat;
 // copied gen_ncorp from PHASTA to help map on-rank numbering to CGNS/PETSC friendly global numbering
   gen_ncorp( o );
+//  o carries
+//     o.arrays.ncorp[on-rank-node-number(0-based)] => PETSc global node number (1-based)
+//     o.iownnodes => nodes owned by this rank
+//     o.local_start_id => this rank's first node number (1-based and also which must be a long long int)
+
+
+// condense out vertices owned by another rank in a new array, x, whose slices are ready for CGNS.  Seeing now PETSc CGNS writer did one coordinate at a time which is probably better....feel free to rewrite. 
+  int num_nodes=m->count(0);
+  int icount=0;
+  gcorp_t gnod; 
+  double* x = new double[o.iownnodes * 3];
+  for (int inode = 0; inode < num_nodes; ++inode){
+    gnod=o.arrays.ncorp[inode];
+    if(gnod >= o.local_start_id && gnod <= o.local_start_id + o.iownnodes -1) { // coordinate to write 
+       for (int j = 0; j < 3; ++j) 
+         x[j*o.iownnodes+icount]= o.arrays.coordinates[j*num_nodes+inode];
+       icount++;
+    }
+  }
+
   
   if (! timestep)
     timestep_or_dat = "dat";
@@ -307,6 +328,7 @@ void writeCGNSgbc(Output& o, std::string path, int timestep)
   }
   ph_write_preamble(f);
   int params[MAX_PARAMS];
+  
 /* all of these strings are looked for by the other programs
    reading this format, so don't fix spelling errors or
    other silliness, it has already been set in stone */
diff --git a/phasta/phOutput.h b/phasta/phOutput.h
index 3a08ad666..ce95c8f07 100644
--- a/phasta/phOutput.h
+++ b/phasta/phOutput.h
@@ -136,7 +136,7 @@ idx:   0  1 2  3   4   5  6   7   8   9  10   11  12  13  14  15   16   17   18
   int* ifather;
 /* an array of integers of size nfather that has nsons in each entry */
   int* nsonsArr;
-/* an array of integers of size nfather that has nsons in each entry */
+/* an array that maps on-rank-node-number (input) to PETSc global-node-number */
   long long int* ncorp;
 };
 
@@ -155,6 +155,8 @@ struct Output
   int nMaxElementNodes;
   int nEssentialBCNodes;
   int nOverlapEdges;
+  long long int local_start_id; /* this rank's first global node number (1 based) */
+  int iownnodes;  /*  how many node this rank owns */
   int nlwork; /* size of arrays.ilwork */
   int nlworkf; /* size of arrays.ilworkf */
   int nlworkl; /* size of arrays.ilworkl */
@@ -170,6 +172,7 @@ struct Output
 
 void generateOutput(Input& in, BCs& bcs, apf::Mesh* mesh, Output& o);
 void writeGeomBC(Output& o, std::string path, int timestep_or_dat = 0);
+void writeCGNSgbc(Output& o, std::string path, int timestep_or_dat = 0);
 
 }
 

From b920292e20d7b0c3c3648ba7635c12a0bc97581b Mon Sep 17 00:00:00 2001
From: "Kenneth E. Jansen" <Kenneth.Jansen@colorado.edu>
Date: Sat, 5 Aug 2023 23:04:52 -0600
Subject: [PATCH 06/68] Added input flag (writeCGNSFiles 1 in adapt.inp) to
 call writeCGNS though it does not yet actually write a CGNS file in the
 function writeCNGS

---
 phasta/phCGNSgbc.cc | 29 +++++++++++------------------
 phasta/phCook.cc    |  2 ++
 phasta/phInput.cc   |  2 ++
 phasta/phInput.h    |  2 ++
 phasta/phOutput.h   |  2 +-
 5 files changed, 18 insertions(+), 19 deletions(-)

diff --git a/phasta/phCGNSgbc.cc b/phasta/phCGNSgbc.cc
index 8ca313683..bd779e9ff 100644
--- a/phasta/phCGNSgbc.cc
+++ b/phasta/phCGNSgbc.cc
@@ -180,7 +180,7 @@ static lcorp_t count_owned(int* ilwork, int nlwork,gcorp_t* ncorp_tmp, int num_n
 
 
 // renamed, retained but not yet updated
-static std::string buildCGNSgbcFileName(std::string timestep_or_dat)
+static std::string buildCGNSFileName(std::string timestep_or_dat)
 {
   std::stringstream ss;
   int rank = PCU_Comm_Self() + 1;
@@ -283,13 +283,12 @@ void writeBlocksCGNS(FILE* f, Output& o)
 
 
 
-// retaining in case it is useful but only renamed at this point
-void writeCGNSgbc(Output& o, std::string path, int timestep)
+// WIP
+void writeCGNS(Output& o, std::string path)
 {
   double t0 = PCU_Time();
   apf::Mesh* m = o.mesh;
   std::stringstream tss; 
-  std::string timestep_or_dat;
 // copied gen_ncorp from PHASTA to help map on-rank numbering to CGNS/PETSC friendly global numbering
   gen_ncorp( o );
 //  o carries
@@ -313,20 +312,14 @@ void writeCGNSgbc(Output& o, std::string path, int timestep)
   }
 
   
-  if (! timestep)
-    timestep_or_dat = "dat";
-  else {
-    tss << timestep;   
-    timestep_or_dat = tss.str();
-  }
-  path += buildCGNSgbcFileName(timestep_or_dat);
-  phastaio_setfile(GEOMBC_WRITE);
-  FILE* f = o.openfile_write(o, path.c_str());
-  if (!f) {
-    lion_eprint(1,"failed to open \"%s\"!\n", path.c_str());
-    abort();
-  }
-  ph_write_preamble(f);
+//  path += buildCGNSFileName(timestep_or_dat);
+//  phastaio_setfile(GEOMBC_WRITE);
+//  FILE* f = o.openfile_write(o, path.c_str());
+//  if (!f) {
+//    lion_eprint(1,"failed to open \"%s\"!\n", path.c_str());
+//    abort();
+//  }
+//  ph_write_preamble(f);
   int params[MAX_PARAMS];
   
 /* all of these strings are looked for by the other programs
diff --git a/phasta/phCook.cc b/phasta/phCook.cc
index 983570d43..5b67b8405 100644
--- a/phasta/phCook.cc
+++ b/phasta/phCook.cc
@@ -224,6 +224,8 @@ namespace ph {
       out.openfile_write = fn;
     }
     ph::writeGeomBC(out, subDirPath); //write geombc
+    if ( in.writeCGNSFiles ) 
+    ph::writeCGNS(out, subDirPath); //write CGNS
     if(!PCU_Comm_Self())
       ph::writeAuxiliaryFiles(path, in.timeStepNumber);
     m->verify();
diff --git a/phasta/phInput.cc b/phasta/phInput.cc
index ffc9989c2..65118d677 100644
--- a/phasta/phInput.cc
+++ b/phasta/phInput.cc
@@ -60,6 +60,7 @@ static void setDefaults(Input& in)
   in.axisymmetry = 0;
   in.parmaLoops = 3; //a magical value
   in.parmaVerbosity = 1; //fairly quiet
+  in.writeCGNSFiles = 0;  // write CGNS Files 
   in.writeGeomBCFiles = 0;  // write additional geombc file for vis in streaming
   in.writeRestartFiles = 0;  // write additional restart file for vis in streaming
   in.writeVTK = 0; 
@@ -153,6 +154,7 @@ static void formMaps(Input& in, StringMap& stringMap, IntMap& intMap, DblMap& db
   intMap["parmaLoops"] = &in.parmaLoops;
   intMap["parmaVerbosity"] = &in.parmaVerbosity;
   intMap["writeVTK"] = &in.writeVTK;
+  intMap["writeCGNSFiles"] = &in.writeCGNSFiles;
   intMap["writeGeomBCFiles"] = &in.writeGeomBCFiles;
   intMap["writeRestartFiles"] = &in.writeRestartFiles;
   intMap["ramdisk"] = &in.ramdisk;
diff --git a/phasta/phInput.h b/phasta/phInput.h
index a6bf88c90..28123f805 100644
--- a/phasta/phInput.h
+++ b/phasta/phInput.h
@@ -147,6 +147,8 @@ class Input
     /** \brief write the geombc file during in-memory data transfer
        between phasta and chef. */
     int writeGeomBCFiles;
+    /* \brief write CGNS files for pre-processing */
+    int writeCGNSFiles;
     /** \brief write the restart file during in-memory data transfer
        between phasta and chef. */
     int writeRestartFiles;
diff --git a/phasta/phOutput.h b/phasta/phOutput.h
index ce95c8f07..21120ee3d 100644
--- a/phasta/phOutput.h
+++ b/phasta/phOutput.h
@@ -172,7 +172,7 @@ struct Output
 
 void generateOutput(Input& in, BCs& bcs, apf::Mesh* mesh, Output& o);
 void writeGeomBC(Output& o, std::string path, int timestep_or_dat = 0);
-void writeCGNSgbc(Output& o, std::string path, int timestep_or_dat = 0);
+void writeCGNS(Output& o, std::string path);
 
 }
 

From 885d5779b0f184fbf6cb84a37e756008ef9affe6 Mon Sep 17 00:00:00 2001
From: "Kenneth E. Jansen" <Kenneth.Jansen@colorado.edu>
Date: Sun, 6 Aug 2023 10:48:50 -0600
Subject: [PATCH 07/68] opened CGNS file and computed global counts it needs

---
 phasta/phCGNSgbc.cc | 41 +++++++++++++++++++++++++++++++++++++++--
 phasta/phOutput.h   |  2 ++
 2 files changed, 41 insertions(+), 2 deletions(-)

diff --git a/phasta/phCGNSgbc.cc b/phasta/phCGNSgbc.cc
index bd779e9ff..d3130504b 100644
--- a/phasta/phCGNSgbc.cc
+++ b/phasta/phCGNSgbc.cc
@@ -8,6 +8,12 @@
 #include <cstdlib>
 #include <string.h>
 #include <assert.h>
+#ifdef HAVE_CGNS
+//
+#include <cgns_io.h>
+#include <pcgnslib.h>
+//
+#endif
 typedef int lcorp_t;
 #define NCORP_MPI_T MPI_INTEGER
 typedef long long int gcorp_t;
@@ -75,6 +81,12 @@ void gen_ncorp(Output& o )
 	}
 	local_start_id++; //Fortran numbering
         o.local_start_id = local_start_id;
+
+// also get the global number of nodes
+	o.numGlobalNodes=0;
+	for(i=0;i<num_parts;i++) 
+	   o.numGlobalNodes += owner_counts[i];
+
 #ifdef PRINT_EVERYTHING
 	printf("%d: %d\n", part, local_start_id);
 #endif
@@ -184,7 +196,8 @@ static std::string buildCGNSFileName(std::string timestep_or_dat)
 {
   std::stringstream ss;
   int rank = PCU_Comm_Self() + 1;
-  ss << "geombc." << timestep_or_dat << "." << rank;
+//  ss << "geombc." << timestep_or_dat << "." << rank;
+  ss << "chefO." << timestep_or_dat;
   return ss.str();
 }
 
@@ -288,13 +301,37 @@ void writeCGNS(Output& o, std::string path)
 {
   double t0 = PCU_Time();
   apf::Mesh* m = o.mesh;
-  std::stringstream tss; 
+  int         cgid = -1;
+
+  std::string timestep_or_dat;
+//  if (! timestep)
+    timestep_or_dat = "cgns";
+//  else {
+//    tss << timestep;   
+//    timestep_or_dat = tss.str();
+//  }
+//  cgp_mpi_comm();
+//  cgp_open('chefOut.cgns', CG_MODE_WRITE, &cgid);
+//static std::string buildCGNSFileName(std::string timestep_or_dat)
+//  path += buildCGNSFileName(timestep_or_dat);
+  static char *outfile = "chefOut.cgns";
+ // if (!PCU_Comm_Self())
+    cgp_mpi_comm(MPI_COMM_WORLD);
+    cgp_open(outfile, CG_MODE_READ, &cgid);
+  
+//FAILED    cgp_open('chefO.cgns', CG_MODE_READ, &cgid);
+//    PetscCheck(cgid > 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "cg_open(\"%s\",...) did not return a valid file ID", filename);
+     
 // copied gen_ncorp from PHASTA to help map on-rank numbering to CGNS/PETSC friendly global numbering
   gen_ncorp( o );
 //  o carries
 //     o.arrays.ncorp[on-rank-node-number(0-based)] => PETSc global node number (1-based)
 //     o.iownnodes => nodes owned by this rank
 //     o.local_start_id => this rank's first node number (1-based and also which must be a long long int)
+//     o.numGlobalNodes
+       int numel=m->count(m->getDimension());
+       PCU_Add_Ints(&numel,1);
+       o.numGlobalVolumeElements = numel;
 
 
 // condense out vertices owned by another rank in a new array, x, whose slices are ready for CGNS.  Seeing now PETSc CGNS writer did one coordinate at a time which is probably better....feel free to rewrite. 
diff --git a/phasta/phOutput.h b/phasta/phOutput.h
index 21120ee3d..6f72cc9c4 100644
--- a/phasta/phOutput.h
+++ b/phasta/phOutput.h
@@ -156,6 +156,8 @@ struct Output
   int nEssentialBCNodes;
   int nOverlapEdges;
   long long int local_start_id; /* this rank's first global node number (1 based) */
+  long long int numGlobalNodes; 
+  long long int numGlobalVolumeElements; 
   int iownnodes;  /*  how many node this rank owns */
   int nlwork; /* size of arrays.ilwork */
   int nlworkf; /* size of arrays.ilworkf */

From aa48cae12de5b9f85b7fbbc2a2eb865d21a566d4 Mon Sep 17 00:00:00 2001
From: "Kenneth E. Jansen" <Kenneth.Jansen@colorado.edu>
Date: Sun, 6 Aug 2023 11:56:06 -0600
Subject: [PATCH 08/68] writing coordinates compiles

---
 phasta/phCGNSgbc.cc | 56 +++++++++++++++++++++++++++++++++++++--------
 1 file changed, 46 insertions(+), 10 deletions(-)

diff --git a/phasta/phCGNSgbc.cc b/phasta/phCGNSgbc.cc
index d3130504b..284e18faa 100644
--- a/phasta/phCGNSgbc.cc
+++ b/phasta/phCGNSgbc.cc
@@ -301,7 +301,6 @@ void writeCGNS(Output& o, std::string path)
 {
   double t0 = PCU_Time();
   apf::Mesh* m = o.mesh;
-  int         cgid = -1;
 
   std::string timestep_or_dat;
 //  if (! timestep)
@@ -311,31 +310,67 @@ void writeCGNS(Output& o, std::string path)
 //    timestep_or_dat = tss.str();
 //  }
 //  cgp_mpi_comm();
-//  cgp_open('chefOut.cgns', CG_MODE_WRITE, &cgid);
+//  cgp_open('chefOut.cgns', CG_MODE_WRITE, &F);
 //static std::string buildCGNSFileName(std::string timestep_or_dat)
 //  path += buildCGNSFileName(timestep_or_dat);
   static char *outfile = "chefOut.cgns";
+  int  F, B, Z, E, S, Fs, A, Cx, Cy, Cz;
+  cgsize_t sizes[3],*e, start, end, ncells;
+//   ^^^^^^  need to be sure this is long since using PCU_Add_Long below even when not needed
  // if (!PCU_Comm_Self())
-    cgp_mpi_comm(MPI_COMM_WORLD);
-    cgp_open(outfile, CG_MODE_READ, &cgid);
   
-//FAILED    cgp_open('chefO.cgns', CG_MODE_READ, &cgid);
-//    PetscCheck(cgid > 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "cg_open(\"%s\",...) did not return a valid file ID", filename);
+//FAILED    cgp_open('chefO.cgns', CG_MODE_READ, &F);
+//    PetscCheck(F > 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "cg_open(\"%s\",...) did not return a valid file ID", filename);
      
 // copied gen_ncorp from PHASTA to help map on-rank numbering to CGNS/PETSC friendly global numbering
-  gen_ncorp( o );
+    gen_ncorp( o );
 //  o carries
 //     o.arrays.ncorp[on-rank-node-number(0-based)] => PETSc global node number (1-based)
 //     o.iownnodes => nodes owned by this rank
 //     o.local_start_id => this rank's first node number (1-based and also which must be a long long int)
 //     o.numGlobalNodes
-       int numel=m->count(m->getDimension());
-       PCU_Add_Ints(&numel,1);
-       o.numGlobalVolumeElements = numel;
+    ncells=m->count(m->getDimension());
+    ncells=PCU_Add_Long(ncells);
+// may not need    o.numGlobalVolumeElements = ncells;
+ 
+    sizes[0]=o.numGlobalNodes;
+    sizes[1]=ncells;
+    sizes[0];
+    cgp_mpi_comm(MPI_COMM_WORLD);
+    if ( cgp_open(outfile, CG_MODE_READ, &F) ||
+        cg_base_write(F, "Base", 3, 3, &B) ||
+        cg_zone_write(F, B, "Zone", sizes, CG_Unstructured, &Z))
+        cgp_error_exit();
+    /* create data nodes for coordinates */
+
+    if (cgp_coord_write(F, B, Z, CG_RealDouble, "CoordinateX", &Cx) ||
+        cgp_coord_write(F, B, Z, CG_RealDouble, "CoordinateY", &Cy) ||
+        cgp_coord_write(F, B, Z, CG_RealDouble, "CoordinateZ", &Cz))
+        cgp_error_exit();
 
 
 // condense out vertices owned by another rank in a new array, x, whose slices are ready for CGNS.  Seeing now PETSc CGNS writer did one coordinate at a time which is probably better....feel free to rewrite. 
   int num_nodes=m->count(0);
+//V2
+  gcorp_t gnod; 
+  start=o.local_start_id;
+  end=start+o.iownnodes-1;
+  double* x = new double[o.iownnodes];
+  for (int j = 0; j < 3; ++j) {
+    int icount=0;
+    for (int inode = 0; inode < num_nodes; ++inode){
+      gnod=o.arrays.ncorp[inode];
+      if(gnod >= start && gnod <= end) { // coordinate to write 
+         x[icount]= o.arrays.coordinates[j*num_nodes+inode];
+         icount++;
+      }
+      if(j==0) cgp_coord_write_data(F, B, Z, Cx, &start, &end, x);
+      if(j==1) cgp_coord_write_data(F, B, Z, Cy, &start, &end, x);
+      if(j==2) cgp_coord_write_data(F, B, Z, Cz, &start, &end, x);
+    }
+  }
+//V1 that KEJ wrote mothballed for V2 that mimics PETSc
+/*
   int icount=0;
   gcorp_t gnod; 
   double* x = new double[o.iownnodes * 3];
@@ -347,6 +382,7 @@ void writeCGNS(Output& o, std::string path)
        icount++;
     }
   }
+*/
 
   
 //  path += buildCGNSFileName(timestep_or_dat);

From ec7e650e854755bf2e972bc28cb27136ae3602f1 Mon Sep 17 00:00:00 2001
From: "Kenneth E. Jansen" <Kenneth.Jansen@colorado.edu>
Date: Sun, 6 Aug 2023 12:59:09 -0600
Subject: [PATCH 09/68]  in pretty far over my C++ skill level at this
 point....cgns libary seems to think cgsize_t are int which will never fly for
 our meshes with global nubmering so probably need to find a way to tell Spack
 or other I want long long int there...I am also uncertain if SCOREC
 convention to call this gcorp_t is going to play nice with CGNS calling it
 cgsize_t but pushing this up to get help

---
 phasta/phCGNSgbc.cc | 97 +++++++++++++++++++--------------------------
 1 file changed, 40 insertions(+), 57 deletions(-)

diff --git a/phasta/phCGNSgbc.cc b/phasta/phCGNSgbc.cc
index 284e18faa..bf08ad78e 100644
--- a/phasta/phCGNSgbc.cc
+++ b/phasta/phCGNSgbc.cc
@@ -206,30 +206,30 @@ enum {
 };
 
 // renamed, update is only a transpose to match CNGS.  Parallel will require mapping here or later to global numbering
-void getInteriorConnectivityCGNS(Output& o, int block, apf::DynamicArray<int>& c)
+void getInteriorConnectivityCGNS(Output& o, int block, gcorp_t* c)
 {
   int nelem = o.blocks.interior.nElements[block];
   int nvert = o.blocks.interior.keys[block].nElementVertices;
-  c.setSize(nelem * nvert);
+//  c.setSize(nelem * nvert);
   size_t i = 0;
   for (int elem = 0; elem < nelem; ++elem)
     for (int vert = 0; vert < nvert; ++vert)
       c[i++] = o.arrays.ncorp[o.arrays.ien[block][elem][vert]-1]; // input is 0-based,  out is  1-based do drop the +1
-  PCU_ALWAYS_ASSERT(i == c.getSize());
+  PCU_ALWAYS_ASSERT(i == nelem*nvert);
 }
 
 //renamed, update is both a transpose to match CNGS and reduction to only filling the first number of vertices on the boundary whereas PHAST wanted full volume
-void getBoundaryConnectivityCGNS(Output& o, int block, apf::DynamicArray<int>& c)
+void getBoundaryConnectivityCGNS(Output& o, int block, gcorp_t* c)
 {
   int nelem = o.blocks.boundary.nElements[block];
 // CGNS wants surface elements  int nvert = o.blocks.boundary.keys[block].nElementVertices;
   int nvert = o.blocks.boundary.keys[block].nBoundaryFaceEdges;
-  c.setSize(nelem * nvert);
+  //c.setSize(nelem * nvert);
   size_t i = 0;
   for (int elem = 0; elem < nelem; ++elem)
     for (int vert = 0; vert < nvert; ++vert)
       c[i++] = o.arrays.ncorp[o.arrays.ienb[block][elem][vert]-1]; 
-  PCU_ALWAYS_ASSERT(i == c.getSize());
+  PCU_ALWAYS_ASSERT(i == nelem*nvert);
 }
 
 void getInterfaceConnectivityCGNS // not extended yet other than transpose
@@ -266,32 +266,56 @@ void getNaturalBCCodesCGNS(Output& o, int block, apf::DynamicArray<int>& codes)
 }
 
 // renamed and calling the renamed functions above with output writes commented as they are PHASTA file style
-void writeBlocksCGNS(FILE* f, Output& o)
+void writeBlocksCGNS(int F,int B,int Z, Output& o)
 {
-  apf::DynamicArray<int> c;
   int params[MAX_PARAMS];
+ 
+  int E;
+  gcorp_t e_owned, e_start,e_end; 
+
+  /* create data node for elements */
+  if (cgp_section_write(F, B, Z, "Hex", CG_HEXA_8, 1, o.numGlobalVolumeElements, 0, &E))
+    cgp_error_exit();
+ 
   for (int i = 0; i < o.blocks.interior.getSize(); ++i) {
+ 
     BlockKey& k = o.blocks.interior.keys[i];
     std::string phrase = getBlockKeyPhrase(k, "connectivity interior ");
     params[0] = o.blocks.interior.nElements[i];
 //    fillBlockKeyParams(params, k);
-    getInteriorConnectivityCGNS(o, i, c);
-//    ph_write_ints(f, phrase.c_str(), &c[0], c.getSize(), 7, params);
+    e_owned = o.blocks.interior.nElements[i];
+    int nvert = o.blocks.interior.keys[i].nElementVertices;
+    gcorp_t e = (cgsize_t *)malloc(nvert * e_owned * sizeof(cgsize_t));
+    getInteriorConnectivityCGNS(o, i, &e);
+    /* create data node for elements */
+    // will start testing with single topology, all hex so allow hardcode for pass 1
+    //nvert can case switch this or enumv like PETSc
+    if (cgp_section_write(F, B, Z, "Hex", CG_HEXA_8, 1, o.numGlobalVolumeElements, 0, &E))
+    cgp_error_exit();
+    MPI_Exscan(&e_owned, &e_start, 1, MPI_UINT64_T, MPI_SUM, MPI_COMM_WORLD);
+    e_end=e_start+e_owned -1;
+    /* write the element connectivity in parallel */
+    if (cgp_elements_write_data(F, B, Z, E, e_start, e_end, e))
+        cgp_error_exit();
+    free(e);   
   }
   for (int i = 0; i < o.blocks.boundary.getSize(); ++i) {
     BlockKey& k = o.blocks.boundary.keys[i];
     std::string phrase = getBlockKeyPhrase(k, "connectivity boundary ");
     params[0] = o.blocks.boundary.nElements[i];
+    e_owned = params[0];
+    int nvert = o.blocks.boundary.keys[i].nBoundaryFaceEdges;
+    gcorp_t e = (cgsize_t *)malloc(nvert * e_owned * sizeof(cgsize_t));
 //    fillBlockKeyParams(params, k);
-    getBoundaryConnectivityCGNS(o, i, c);
+    getBoundaryConnectivityCGNS(o, i, &e);
 //    ph_write_ints(f, phrase.c_str(), &c[0], c.getSize(), 8, params);
 // this is probably the easiest path to getting the list that tells us the face (through surfID of smd) that each boundary element face is on
     phrase = getBlockKeyPhrase(k, "nbc codes ");
     apf::DynamicArray<int> codes;
     getNaturalBCCodesCGNS(o, i, codes);
+    free(e);   
 //    ph_write_ints(f, phrase.c_str(), &codes[0], codes.getSize(), 8, params);
   }
-
 }
 
 
@@ -331,7 +355,7 @@ void writeCGNS(Output& o, std::string path)
 //     o.numGlobalNodes
     ncells=m->count(m->getDimension());
     ncells=PCU_Add_Long(ncells);
-// may not need    o.numGlobalVolumeElements = ncells;
+    o.numGlobalVolumeElements = ncells;
  
     sizes[0]=o.numGlobalNodes;
     sizes[1]=ncells;
@@ -384,49 +408,8 @@ void writeCGNS(Output& o, std::string path)
   }
 */
 
-  
-//  path += buildCGNSFileName(timestep_or_dat);
-//  phastaio_setfile(GEOMBC_WRITE);
-//  FILE* f = o.openfile_write(o, path.c_str());
-//  if (!f) {
-//    lion_eprint(1,"failed to open \"%s\"!\n", path.c_str());
-//    abort();
-//  }
-//  ph_write_preamble(f);
-  int params[MAX_PARAMS];
-  
-/* all of these strings are looked for by the other programs
-   reading this format, so don't fix spelling errors or
-   other silliness, it has already been set in stone */
-/*
-  writeInt(f, "number of nodes", m->count(0));
-  writeInt(f, "number of modes", o.nOverlapNodes);
-  writeInt(f, "number of shapefunctions soved on processor", 0);
-  writeInt(f, "number of global modes", 0);
-  writeInt(f, "number of interior elements", m->count(m->getDimension()));
-  writeInt(f, "number of boundary elements", o.nBoundaryElements);
-  writeInt(f, "maximum number of element nodes", o.nMaxElementNodes);
-  writeInt(f, "number of interior tpblocks", o.blocks.interior.getSize());
-  writeInt(f, "number of boundary tpblocks", o.blocks.boundary.getSize());
-  writeInt(f, "number of nodes with Dirichlet BCs", o.nEssentialBCNodes);
-
-  params[0] = m->count(0);
-  params[1] = 3;
-  ph_write_doubles(f, "co-ordinates", o.arrays.coordinates,
-      params[0] * params[1], 2, params);
-  writeInt(f, "number of processors", PCU_Comm_Peers());
-  writeInt(f, "size of ilwork array", o.nlwork);
-  params[0] = m->count(0);
-  writeInts(f, " mode number map from partition to global",
-      o.arrays.globalNodeNumbers, m->count(0));
-  writeBlocksCGNS(f, o);
-  writeInts(f, "bc mapping array", o.arrays.nbc, m->count(0));
-  writeInts(f, "bc codes array", o.arrays.ibc, o.nEssentialBCNodes);
-  apf::DynamicArray<double> bc;
-  PHASTAIO_CLOSETIME(fclose(f);)
-  double t1 = PCU_Time();
-  if (!PCU_Comm_Self())
-    lion_oprint(1,"geombc file written in %f seconds\n", t1 - t0);
-*/
+  writeBlocksCGNS(F,B,Z, o);
+//  if (!PCU_Comm_Self())
+//    lion_oprint(1,"CGNS file written in %f seconds\n", t1 - t0);
 }
 }

From 3a934bb8006b153c6fa829e875dc6f5b27e4b0a5 Mon Sep 17 00:00:00 2001
From: "Kenneth E. Jansen" <Kenneth.Jansen@colorado.edu>
Date: Sun, 6 Aug 2023 14:07:49 -0600
Subject: [PATCH 10/68] compiling version that, if I made no errors, will write
 a CGNS coordinates and connectivity file  for hexes (hard coded at this point

---
 apf/apfCGNS.cc      |  2 +-
 mds/mdsCGNS.cc      |  2 +-
 phasta/phCGNSgbc.cc | 35 +++++++++++++++++------------------
 phasta/phOutput.h   |  8 ++++----
 4 files changed, 23 insertions(+), 24 deletions(-)

diff --git a/apf/apfCGNS.cc b/apf/apfCGNS.cc
index d76757be0..93a85d12d 100644
--- a/apf/apfCGNS.cc
+++ b/apf/apfCGNS.cc
@@ -1023,7 +1023,7 @@ void Write2DEdges(CGNS cgns, apf::Mesh *m, const Count &edgeCount, const Count &
 // Todo split this out into a list of calls to local functions to show process/work flow
 void WriteCGNS(const char *prefix, apf::Mesh *m, const apf::CGNSBCMap &cgnsBCMap)
 {
-  static_assert(std::is_same<cgsize_t, int>::value, "cgsize_t not compiled as int");
+//  static_assert(std::is_same<cgsize_t, int>::value, "cgsize_t not compiled as int");
 
   const auto myRank = PCU_Comm_Self();
   const Count vertexCount = count(m, 0);
diff --git a/mds/mdsCGNS.cc b/mds/mdsCGNS.cc
index 0d24e67ec..2a591fd07 100644
--- a/mds/mdsCGNS.cc
+++ b/mds/mdsCGNS.cc
@@ -1051,7 +1051,7 @@ void ReadBCInfo(const int cgid, const int base, const int zone, const int nBocos
 
 apf::Mesh2 *DoIt(gmi_model *g, const std::string &fname, apf::CGNSBCMap &cgnsBCMap, const std::vector<std::pair<std::string, std::string>> &readMeshData)
 {
-  static_assert(std::is_same<cgsize_t, int>::value, "cgsize_t not compiled as int");
+//  static_assert(std::is_same<cgsize_t, int>::value, "cgsize_t not compiled as int");
 
   int cgid = -1;
   auto comm = PCU_Get_Comm();
diff --git a/phasta/phCGNSgbc.cc b/phasta/phCGNSgbc.cc
index bf08ad78e..3b7aae558 100644
--- a/phasta/phCGNSgbc.cc
+++ b/phasta/phCGNSgbc.cc
@@ -16,13 +16,12 @@
 #endif
 typedef int lcorp_t;
 #define NCORP_MPI_T MPI_INTEGER
-typedef long long int gcorp_t;
 
 namespace ph {
 
 
-static lcorp_t count_owned(int* ilwork, int nlwork,gcorp_t* ncorp_tmp, int num_nodes);
-static lcorp_t count_local(int* ilwork, int nlwork,gcorp_t* ncorp_tmp, int num_nodes);
+static lcorp_t count_owned(int* ilwork, int nlwork,cgsize_t* ncorp_tmp, int num_nodes);
+static lcorp_t count_local(int* ilwork, int nlwork,cgsize_t* ncorp_tmp, int num_nodes);
 
 
 void gen_ncorp(Output& o )
@@ -33,17 +32,17 @@ void gen_ncorp(Output& o )
 	int i;
 	lcorp_t nilwork = o.nlwork;
         int num_nodes=m->count(0);
-	o.arrays.ncorp = new gcorp_t[num_nodes];
+	o.arrays.ncorp = new cgsize_t[num_nodes];
 	lcorp_t owned;
 	lcorp_t local;
 	lcorp_t* owner_counts;
-	gcorp_t  local_start_id;
-	gcorp_t  gid;
+	cgsize_t  local_start_id;
+	cgsize_t  gid;
 
 	MPI_Comm_rank(MPI_COMM_WORLD, &part);
 	MPI_Comm_size(MPI_COMM_WORLD, &num_parts);
 
-	memset(o.arrays.ncorp, 0, sizeof(gcorp_t)*(num_nodes));
+	memset(o.arrays.ncorp, 0, sizeof(cgsize_t)*(num_nodes));
 	owned = count_owned(o.arrays.ilwork, nilwork, o.arrays.ncorp, num_nodes);
 	local = count_local(o.arrays.ilwork, nilwork, o.arrays.ncorp, num_nodes);
 	o.iownnodes = owned+local;
@@ -127,7 +126,7 @@ void gen_ncorp(Output& o )
 
 }
 
-static lcorp_t count_local(int* ilwork, int nlwork,gcorp_t* ncorp_tmp, int num_nodes)
+static lcorp_t count_local(int* ilwork, int nlwork,cgsize_t* ncorp_tmp, int num_nodes)
 {
 	int i;
 	lcorp_t num_local = 0;
@@ -139,7 +138,7 @@ static lcorp_t count_local(int* ilwork, int nlwork,gcorp_t* ncorp_tmp, int num_n
 	}
 	return(num_local);
 }
-static lcorp_t count_owned(int* ilwork, int nlwork,gcorp_t* ncorp_tmp, int num_nodes)
+static lcorp_t count_owned(int* ilwork, int nlwork,cgsize_t* ncorp_tmp, int num_nodes)
 {
 	int numtask = ilwork[0];
 	int itkbeg = 0; //task offset
@@ -206,7 +205,7 @@ enum {
 };
 
 // renamed, update is only a transpose to match CNGS.  Parallel will require mapping here or later to global numbering
-void getInteriorConnectivityCGNS(Output& o, int block, gcorp_t* c)
+void getInteriorConnectivityCGNS(Output& o, int block, cgsize_t* c)
 {
   int nelem = o.blocks.interior.nElements[block];
   int nvert = o.blocks.interior.keys[block].nElementVertices;
@@ -219,7 +218,7 @@ void getInteriorConnectivityCGNS(Output& o, int block, gcorp_t* c)
 }
 
 //renamed, update is both a transpose to match CNGS and reduction to only filling the first number of vertices on the boundary whereas PHAST wanted full volume
-void getBoundaryConnectivityCGNS(Output& o, int block, gcorp_t* c)
+void getBoundaryConnectivityCGNS(Output& o, int block, cgsize_t* c)
 {
   int nelem = o.blocks.boundary.nElements[block];
 // CGNS wants surface elements  int nvert = o.blocks.boundary.keys[block].nElementVertices;
@@ -271,7 +270,7 @@ void writeBlocksCGNS(int F,int B,int Z, Output& o)
   int params[MAX_PARAMS];
  
   int E;
-  gcorp_t e_owned, e_start,e_end; 
+  cgsize_t e_owned, e_start,e_end; 
 
   /* create data node for elements */
   if (cgp_section_write(F, B, Z, "Hex", CG_HEXA_8, 1, o.numGlobalVolumeElements, 0, &E))
@@ -285,8 +284,8 @@ void writeBlocksCGNS(int F,int B,int Z, Output& o)
 //    fillBlockKeyParams(params, k);
     e_owned = o.blocks.interior.nElements[i];
     int nvert = o.blocks.interior.keys[i].nElementVertices;
-    gcorp_t e = (cgsize_t *)malloc(nvert * e_owned * sizeof(cgsize_t));
-    getInteriorConnectivityCGNS(o, i, &e);
+    cgsize_t* e = (cgsize_t *)malloc(nvert * e_owned * sizeof(cgsize_t));
+    getInteriorConnectivityCGNS(o, i, e);
     /* create data node for elements */
     // will start testing with single topology, all hex so allow hardcode for pass 1
     //nvert can case switch this or enumv like PETSc
@@ -305,9 +304,9 @@ void writeBlocksCGNS(int F,int B,int Z, Output& o)
     params[0] = o.blocks.boundary.nElements[i];
     e_owned = params[0];
     int nvert = o.blocks.boundary.keys[i].nBoundaryFaceEdges;
-    gcorp_t e = (cgsize_t *)malloc(nvert * e_owned * sizeof(cgsize_t));
+    cgsize_t* e = (cgsize_t *)malloc(nvert * e_owned * sizeof(cgsize_t));
 //    fillBlockKeyParams(params, k);
-    getBoundaryConnectivityCGNS(o, i, &e);
+    getBoundaryConnectivityCGNS(o, i, e);
 //    ph_write_ints(f, phrase.c_str(), &c[0], c.getSize(), 8, params);
 // this is probably the easiest path to getting the list that tells us the face (through surfID of smd) that each boundary element face is on
     phrase = getBlockKeyPhrase(k, "nbc codes ");
@@ -376,7 +375,7 @@ void writeCGNS(Output& o, std::string path)
 // condense out vertices owned by another rank in a new array, x, whose slices are ready for CGNS.  Seeing now PETSc CGNS writer did one coordinate at a time which is probably better....feel free to rewrite. 
   int num_nodes=m->count(0);
 //V2
-  gcorp_t gnod; 
+  cgsize_t gnod; 
   start=o.local_start_id;
   end=start+o.iownnodes-1;
   double* x = new double[o.iownnodes];
@@ -396,7 +395,7 @@ void writeCGNS(Output& o, std::string path)
 //V1 that KEJ wrote mothballed for V2 that mimics PETSc
 /*
   int icount=0;
-  gcorp_t gnod; 
+  cgsize_t gnod; 
   double* x = new double[o.iownnodes * 3];
   for (int inode = 0; inode < num_nodes; ++inode){
     gnod=o.arrays.ncorp[inode];
diff --git a/phasta/phOutput.h b/phasta/phOutput.h
index 6f72cc9c4..444740891 100644
--- a/phasta/phOutput.h
+++ b/phasta/phOutput.h
@@ -137,7 +137,7 @@ idx:   0  1 2  3   4   5  6   7   8   9  10   11  12  13  14  15   16   17   18
 /* an array of integers of size nfather that has nsons in each entry */
   int* nsonsArr;
 /* an array that maps on-rank-node-number (input) to PETSc global-node-number */
-  long long int* ncorp;
+  long int* ncorp;
 };
 
 
@@ -155,9 +155,9 @@ struct Output
   int nMaxElementNodes;
   int nEssentialBCNodes;
   int nOverlapEdges;
-  long long int local_start_id; /* this rank's first global node number (1 based) */
-  long long int numGlobalNodes; 
-  long long int numGlobalVolumeElements; 
+  long int local_start_id; /* this rank's first global node number (1 based) */
+  long int numGlobalNodes; 
+  long int numGlobalVolumeElements; 
   int iownnodes;  /*  how many node this rank owns */
   int nlwork; /* size of arrays.ilwork */
   int nlworkf; /* size of arrays.ilworkf */

From dac4caa6c5582a28c85b9cc1c70f2daa85575eab Mon Sep 17 00:00:00 2001
From: "Kenneth E. Jansen" <Kenneth.Jansen@colorado.edu>
Date: Sun, 6 Aug 2023 18:05:35 -0600
Subject: [PATCH 11/68] runs through chef and produces a chefOut.cgns but it
 crashes paraview so probably more bugs to find.

---
 phasta/phCGNSgbc.cc | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/phasta/phCGNSgbc.cc b/phasta/phCGNSgbc.cc
index 3b7aae558..0b9a7d9f7 100644
--- a/phasta/phCGNSgbc.cc
+++ b/phasta/phCGNSgbc.cc
@@ -271,11 +271,9 @@ void writeBlocksCGNS(int F,int B,int Z, Output& o)
  
   int E;
   cgsize_t e_owned, e_start,e_end; 
+//  int num_parts;
+//  MPI_Comm_size(MPI_COMM_WORLD, &num_parts);
 
-  /* create data node for elements */
-  if (cgp_section_write(F, B, Z, "Hex", CG_HEXA_8, 1, o.numGlobalVolumeElements, 0, &E))
-    cgp_error_exit();
- 
   for (int i = 0; i < o.blocks.interior.getSize(); ++i) {
  
     BlockKey& k = o.blocks.interior.keys[i];
@@ -291,10 +289,13 @@ void writeBlocksCGNS(int F,int B,int Z, Output& o)
     //nvert can case switch this or enumv like PETSc
     if (cgp_section_write(F, B, Z, "Hex", CG_HEXA_8, 1, o.numGlobalVolumeElements, 0, &E))
     cgp_error_exit();
+    e_start=0;
+//    if(num_parts !=1)  
     MPI_Exscan(&e_owned, &e_start, 1, MPI_UINT64_T, MPI_SUM, MPI_COMM_WORLD);
-    e_end=e_start+e_owned -1;
+       
+    e_end=e_start+e_owned;
     /* write the element connectivity in parallel */
-    if (cgp_elements_write_data(F, B, Z, E, e_start, e_end, e))
+    if (cgp_elements_write_data(F, B, Z, E, e_start+1, e_end, e))
         cgp_error_exit();
     free(e);   
   }
@@ -360,7 +361,7 @@ void writeCGNS(Output& o, std::string path)
     sizes[1]=ncells;
     sizes[0];
     cgp_mpi_comm(MPI_COMM_WORLD);
-    if ( cgp_open(outfile, CG_MODE_READ, &F) ||
+    if ( cgp_open(outfile, CG_MODE_WRITE, &F) ||
         cg_base_write(F, "Base", 3, 3, &B) ||
         cg_zone_write(F, B, "Zone", sizes, CG_Unstructured, &Z))
         cgp_error_exit();

From 742b226b834120a92b3f14c8d486f0cb4afbde51 Mon Sep 17 00:00:00 2001
From: "Kenneth E. Jansen" <Kenneth.Jansen@colorado.edu>
Date: Sun, 6 Aug 2023 21:53:36 -0600
Subject: [PATCH 12/68] first big bug squashed but there are more

---
 phasta/phCGNSgbc.cc | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/phasta/phCGNSgbc.cc b/phasta/phCGNSgbc.cc
index 0b9a7d9f7..791bbd341 100644
--- a/phasta/phCGNSgbc.cc
+++ b/phasta/phCGNSgbc.cc
@@ -388,10 +388,10 @@ void writeCGNS(Output& o, std::string path)
          x[icount]= o.arrays.coordinates[j*num_nodes+inode];
          icount++;
       }
-      if(j==0) cgp_coord_write_data(F, B, Z, Cx, &start, &end, x);
-      if(j==1) cgp_coord_write_data(F, B, Z, Cy, &start, &end, x);
-      if(j==2) cgp_coord_write_data(F, B, Z, Cz, &start, &end, x);
     }
+    if(j==0) cgp_coord_write_data(F, B, Z, Cx, &start, &end, x);
+    if(j==1) cgp_coord_write_data(F, B, Z, Cy, &start, &end, x);
+    if(j==2) cgp_coord_write_data(F, B, Z, Cz, &start, &end, x);
   }
 //V1 that KEJ wrote mothballed for V2 that mimics PETSc
 /*

From 3959fad7f0700ba252e950337e1ba952ca95761d Mon Sep 17 00:00:00 2001
From: "Kenneth E. Jansen" <Kenneth.Jansen@colorado.edu>
Date: Sun, 6 Aug 2023 23:14:45 -0600
Subject: [PATCH 13/68] fixed the usual +-1  c and fortran bug

---
 phasta/phCGNSgbc.cc | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/phasta/phCGNSgbc.cc b/phasta/phCGNSgbc.cc
index 791bbd341..5b23d8601 100644
--- a/phasta/phCGNSgbc.cc
+++ b/phasta/phCGNSgbc.cc
@@ -213,7 +213,7 @@ void getInteriorConnectivityCGNS(Output& o, int block, cgsize_t* c)
   size_t i = 0;
   for (int elem = 0; elem < nelem; ++elem)
     for (int vert = 0; vert < nvert; ++vert)
-      c[i++] = o.arrays.ncorp[o.arrays.ien[block][elem][vert]-1]; // input is 0-based,  out is  1-based do drop the +1
+      c[i++] = o.arrays.ncorp[o.arrays.ien[block][elem][vert]]; // input is 0-based,  out is  1-based do drop the +1
   PCU_ALWAYS_ASSERT(i == nelem*nvert);
 }
 
@@ -227,7 +227,7 @@ void getBoundaryConnectivityCGNS(Output& o, int block, cgsize_t* c)
   size_t i = 0;
   for (int elem = 0; elem < nelem; ++elem)
     for (int vert = 0; vert < nvert; ++vert)
-      c[i++] = o.arrays.ncorp[o.arrays.ienb[block][elem][vert]-1]; 
+      c[i++] = o.arrays.ncorp[o.arrays.ienb[block][elem][vert]]; 
   PCU_ALWAYS_ASSERT(i == nelem*nvert);
 }
 
@@ -359,7 +359,7 @@ void writeCGNS(Output& o, std::string path)
  
     sizes[0]=o.numGlobalNodes;
     sizes[1]=ncells;
-    sizes[0];
+    sizes[2]=0;
     cgp_mpi_comm(MPI_COMM_WORLD);
     if ( cgp_open(outfile, CG_MODE_WRITE, &F) ||
         cg_base_write(F, "Base", 3, 3, &B) ||
@@ -409,6 +409,7 @@ void writeCGNS(Output& o, std::string path)
 */
 
   writeBlocksCGNS(F,B,Z, o);
+  cgp_close(F);    
 //  if (!PCU_Comm_Self())
 //    lion_oprint(1,"CGNS file written in %f seconds\n", t1 - t0);
 }

From 32fcbaa2861aac7362033e9e3ed1a7c225d6184e Mon Sep 17 00:00:00 2001
From: "Kenneth E. Jansen" <Kenneth.Jansen@colorado.edu>
Date: Sun, 6 Aug 2023 23:46:23 -0600
Subject: [PATCH 14/68] fprintf confirms data for coordinates and connectivity
 look correct

---
 phasta/phCGNSgbc.cc | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/phasta/phCGNSgbc.cc b/phasta/phCGNSgbc.cc
index 5b23d8601..e38b5b2b5 100644
--- a/phasta/phCGNSgbc.cc
+++ b/phasta/phCGNSgbc.cc
@@ -297,6 +297,12 @@ void writeBlocksCGNS(int F,int B,int Z, Output& o)
     /* write the element connectivity in parallel */
     if (cgp_elements_write_data(F, B, Z, E, e_start+1, e_end, e))
         cgp_error_exit();
+    printf("%ld, %ld \n", e_start+1, e_end);
+    for (int ne=0; ne<e_owned; ++ne)
+	printf("%d, %ld, %ld, %ld, %ld, %ld, %ld, %ld, %ld \n", (ne+1),
+         e[ne*8+0],e[ne*8+1],e[ne*8+2],e[ne*8+3],
+         e[ne*8+4],e[ne*8+5],e[ne*8+6],e[ne*8+7]);
+       
     free(e);   
   }
   for (int i = 0; i < o.blocks.boundary.getSize(); ++i) {
@@ -389,6 +395,9 @@ void writeCGNS(Output& o, std::string path)
          icount++;
       }
     }
+    printf("%ld, %ld \n", start, end);
+    for (int ne=0; ne<num_nodes; ++ne)
+	printf("%d, %f \n", (ne+1), x[ne]);
     if(j==0) cgp_coord_write_data(F, B, Z, Cx, &start, &end, x);
     if(j==1) cgp_coord_write_data(F, B, Z, Cy, &start, &end, x);
     if(j==2) cgp_coord_write_data(F, B, Z, Cz, &start, &end, x);

From 21b183f583ba6f0a9e92f04f044a138dd3553a4c Mon Sep 17 00:00:00 2001
From: "Kenneth E. Jansen" <Kenneth.Jansen@colorado.edu>
Date: Mon, 7 Aug 2023 09:09:34 -0600
Subject: [PATCH 15/68] every cgpxx call is wrapped with cgp_error_exit() which
 I assume means none of those calls is returning an error as the code runs
 through

---
 phasta/phCGNSgbc.cc | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/phasta/phCGNSgbc.cc b/phasta/phCGNSgbc.cc
index e38b5b2b5..f38c7c45b 100644
--- a/phasta/phCGNSgbc.cc
+++ b/phasta/phCGNSgbc.cc
@@ -366,7 +366,7 @@ void writeCGNS(Output& o, std::string path)
     sizes[0]=o.numGlobalNodes;
     sizes[1]=ncells;
     sizes[2]=0;
-    cgp_mpi_comm(MPI_COMM_WORLD);
+    if(cgp_mpi_comm(MPI_COMM_WORLD)) cgp_error_exit;
     if ( cgp_open(outfile, CG_MODE_WRITE, &F) ||
         cg_base_write(F, "Base", 3, 3, &B) ||
         cg_zone_write(F, B, "Zone", sizes, CG_Unstructured, &Z))
@@ -398,9 +398,9 @@ void writeCGNS(Output& o, std::string path)
     printf("%ld, %ld \n", start, end);
     for (int ne=0; ne<num_nodes; ++ne)
 	printf("%d, %f \n", (ne+1), x[ne]);
-    if(j==0) cgp_coord_write_data(F, B, Z, Cx, &start, &end, x);
-    if(j==1) cgp_coord_write_data(F, B, Z, Cy, &start, &end, x);
-    if(j==2) cgp_coord_write_data(F, B, Z, Cz, &start, &end, x);
+    if(j==0) if(cgp_coord_write_data(F, B, Z, Cx, &start, &end, x)) cgp_error_exit();
+    if(j==1) if(cgp_coord_write_data(F, B, Z, Cy, &start, &end, x)) cgp_error_exit();
+    if(j==2) if(cgp_coord_write_data(F, B, Z, Cz, &start, &end, x)) cgp_error_exit();
   }
 //V1 that KEJ wrote mothballed for V2 that mimics PETSc
 /*
@@ -418,7 +418,7 @@ void writeCGNS(Output& o, std::string path)
 */
 
   writeBlocksCGNS(F,B,Z, o);
-  cgp_close(F);    
+  if(cgp_close(F)) cgp_error_exit();    
 //  if (!PCU_Comm_Self())
 //    lion_oprint(1,"CGNS file written in %f seconds\n", t1 - t0);
 }

From b5275125f3a9786f0bfe93c268be44cd3e8fa307 Mon Sep 17 00:00:00 2001
From: "Kenneth E. Jansen" <Kenneth.Jansen@colorado.edu>
Date: Mon, 7 Aug 2023 15:16:43 -0600
Subject: [PATCH 16/68]  added switch/case statment to handle tets.  I have not
 cross checked whether it is yet setup for mixed meshes but going to do that
 after I get boundary elements writing and parallel tested.

---
 phasta/phCGNSgbc.cc | 25 +++++++++++++++++++++++--
 phasta/phOutput.cc  |  6 ++++++
 phasta/phOutput.h   | 20 ++++++++++++++++----
 3 files changed, 45 insertions(+), 6 deletions(-)

diff --git a/phasta/phCGNSgbc.cc b/phasta/phCGNSgbc.cc
index f38c7c45b..e792d2193 100644
--- a/phasta/phCGNSgbc.cc
+++ b/phasta/phCGNSgbc.cc
@@ -287,8 +287,24 @@ void writeBlocksCGNS(int F,int B,int Z, Output& o)
     /* create data node for elements */
     // will start testing with single topology, all hex so allow hardcode for pass 1
     //nvert can case switch this or enumv like PETSc
-    if (cgp_section_write(F, B, Z, "Hex", CG_HEXA_8, 1, o.numGlobalVolumeElements, 0, &E))
-    cgp_error_exit();
+    switch(nvert){
+      case 4: 
+        if (cgp_section_write(F, B, Z, "Tet", CG_TETRA_4, 1, o.numGlobalVolumeElements, 0, &E))
+           cgp_error_exit();
+        break;
+      case 5:
+        if (cgp_section_write(F, B, Z, "Pyr", CG_PYRA_5, 1, o.numGlobalVolumeElements, 0, &E))
+           cgp_error_exit();
+        break;
+      case 6:
+        if (cgp_section_write(F, B, Z, "Wdg", CG_PENTA_6, 1, o.numGlobalVolumeElements, 0, &E))
+           cgp_error_exit();
+        break;
+      case 8: 
+        if (cgp_section_write(F, B, Z, "Hex", CG_HEXA_8, 1, o.numGlobalVolumeElements, 0, &E))
+           cgp_error_exit();
+        break;
+    }
     e_start=0;
 //    if(num_parts !=1)  
     MPI_Exscan(&e_owned, &e_start, 1, MPI_UINT64_T, MPI_SUM, MPI_COMM_WORLD);
@@ -297,11 +313,13 @@ void writeBlocksCGNS(int F,int B,int Z, Output& o)
     /* write the element connectivity in parallel */
     if (cgp_elements_write_data(F, B, Z, E, e_start+1, e_end, e))
         cgp_error_exit();
+if(0==1){
     printf("%ld, %ld \n", e_start+1, e_end);
     for (int ne=0; ne<e_owned; ++ne)
 	printf("%d, %ld, %ld, %ld, %ld, %ld, %ld, %ld, %ld \n", (ne+1),
          e[ne*8+0],e[ne*8+1],e[ne*8+2],e[ne*8+3],
          e[ne*8+4],e[ne*8+5],e[ne*8+6],e[ne*8+7]);
+}
        
     free(e);   
   }
@@ -372,6 +390,7 @@ void writeCGNS(Output& o, std::string path)
         cg_zone_write(F, B, "Zone", sizes, CG_Unstructured, &Z))
         cgp_error_exit();
     /* create data nodes for coordinates */
+    cg_set_file_type(CG_FILE_HDF5);
 
     if (cgp_coord_write(F, B, Z, CG_RealDouble, "CoordinateX", &Cx) ||
         cgp_coord_write(F, B, Z, CG_RealDouble, "CoordinateY", &Cy) ||
@@ -395,9 +414,11 @@ void writeCGNS(Output& o, std::string path)
          icount++;
       }
     }
+if(0==1) {
     printf("%ld, %ld \n", start, end);
     for (int ne=0; ne<num_nodes; ++ne)
 	printf("%d, %f \n", (ne+1), x[ne]);
+}
     if(j==0) if(cgp_coord_write_data(F, B, Z, Cx, &start, &end, x)) cgp_error_exit();
     if(j==1) if(cgp_coord_write_data(F, B, Z, Cy, &start, &end, x)) cgp_error_exit();
     if(j==2) if(cgp_coord_write_data(F, B, Z, Cz, &start, &end, x)) cgp_error_exit();
diff --git a/phasta/phOutput.cc b/phasta/phOutput.cc
index d4b71028b..648a928e4 100644
--- a/phasta/phOutput.cc
+++ b/phasta/phOutput.cc
@@ -21,6 +21,12 @@
 #include <stdlib.h>
 #include <typeinfo>
 #include <pcu_util.h>
+#ifdef HAVE_CGNS
+//
+#include <cgns_io.h>
+#include <pcgnslib.h>
+//
+#endif
 
 namespace ph {
 
diff --git a/phasta/phOutput.h b/phasta/phOutput.h
index 444740891..1507784cc 100644
--- a/phasta/phOutput.h
+++ b/phasta/phOutput.h
@@ -4,6 +4,13 @@
 #include "phInput.h"
 #include "phBlock.h"
 #include "phBC.h"
+#ifdef HAVE_CGNS
+//
+#include <cgns_io.h>
+#include <pcgnslib.h>
+//
+#endif
+
 
 namespace apf {
 class Mesh;
@@ -137,7 +144,10 @@ idx:   0  1 2  3   4   5  6   7   8   9  10   11  12  13  14  15   16   17   18
 /* an array of integers of size nfather that has nsons in each entry */
   int* nsonsArr;
 /* an array that maps on-rank-node-number (input) to PETSc global-node-number */
-  long int* ncorp;
+// worked but  long int* ncorp;
+#ifdef HAVE_CGNS
+  cgsize_t* ncorp;
+#endif
 };
 
 
@@ -155,9 +165,11 @@ struct Output
   int nMaxElementNodes;
   int nEssentialBCNodes;
   int nOverlapEdges;
-  long int local_start_id; /* this rank's first global node number (1 based) */
-  long int numGlobalNodes; 
-  long int numGlobalVolumeElements; 
+#ifdef HAVE_CGNS
+  cgsize_t local_start_id; /* this rank's first global node number (1 based) */
+  cgsize_t numGlobalNodes; 
+  cgsize_t numGlobalVolumeElements; 
+#endif
   int iownnodes;  /*  how many node this rank owns */
   int nlwork; /* size of arrays.ilwork */
   int nlworkf; /* size of arrays.ilworkf */

From ebf92d8c255c6b574d60bfe7885b8a9c64363c86 Mon Sep 17 00:00:00 2001
From: "Kenneth E. Jansen" <Kenneth.Jansen@colorado.edu>
Date: Mon, 7 Aug 2023 19:02:06 -0600
Subject: [PATCH 17/68] seems like boundary elements are running through except
 the mixed wedge tet case...not viewable in PV so far so others will have to
 test

---
 phasta/phCGNSgbc.cc | 52 ++++++++++++++++++++++++++++-----------------
 pumi-meshes         |  2 +-
 2 files changed, 33 insertions(+), 21 deletions(-)

diff --git a/phasta/phCGNSgbc.cc b/phasta/phCGNSgbc.cc
index e792d2193..3749932f7 100644
--- a/phasta/phCGNSgbc.cc
+++ b/phasta/phCGNSgbc.cc
@@ -268,51 +268,48 @@ void getNaturalBCCodesCGNS(Output& o, int block, apf::DynamicArray<int>& codes)
 void writeBlocksCGNS(int F,int B,int Z, Output& o)
 {
   int params[MAX_PARAMS];
- 
   int E;
   cgsize_t e_owned, e_start,e_end; 
-//  int num_parts;
-//  MPI_Comm_size(MPI_COMM_WORLD, &num_parts);
-
+  cgsize_t e_startg,e_endg; 
+  cgsize_t e_written=0;
   for (int i = 0; i < o.blocks.interior.getSize(); ++i) {
- 
     BlockKey& k = o.blocks.interior.keys[i];
     std::string phrase = getBlockKeyPhrase(k, "connectivity interior ");
     params[0] = o.blocks.interior.nElements[i];
-//    fillBlockKeyParams(params, k);
     e_owned = o.blocks.interior.nElements[i];
     int nvert = o.blocks.interior.keys[i].nElementVertices;
     cgsize_t* e = (cgsize_t *)malloc(nvert * e_owned * sizeof(cgsize_t));
     getInteriorConnectivityCGNS(o, i, e);
     /* create data node for elements */
-    // will start testing with single topology, all hex so allow hardcode for pass 1
-    //nvert can case switch this or enumv like PETSc
+    e_startg=1+e_written; // start for the elements of this topology
+    e_endg=e_written + PCU_Add_Long(e_owned); // end for the elements of this topology
     switch(nvert){
       case 4: 
-        if (cgp_section_write(F, B, Z, "Tet", CG_TETRA_4, 1, o.numGlobalVolumeElements, 0, &E))
+        if (cgp_section_write(F, B, Z, "Tet", CG_TETRA_4, e_startg, e_endg, 0, &E))
            cgp_error_exit();
         break;
       case 5:
-        if (cgp_section_write(F, B, Z, "Pyr", CG_PYRA_5, 1, o.numGlobalVolumeElements, 0, &E))
+        if (cgp_section_write(F, B, Z, "Pyr", CG_PYRA_5, e_startg, e_endg, 0, &E))
            cgp_error_exit();
         break;
       case 6:
-        if (cgp_section_write(F, B, Z, "Wdg", CG_PENTA_6, 1, o.numGlobalVolumeElements, 0, &E))
+        if (cgp_section_write(F, B, Z, "Wdg", CG_PENTA_6, e_startg, e_endg, 0, &E))
            cgp_error_exit();
         break;
       case 8: 
-        if (cgp_section_write(F, B, Z, "Hex", CG_HEXA_8, 1, o.numGlobalVolumeElements, 0, &E))
+//        if (cgp_section_write(F, B, Z, "Hex", CG_HEXA_8, 1, o.numGlobalVolumeElements, 0, &E))
+        if (cgp_section_write(F, B, Z, "Hex", CG_HEXA_8, e_startg, e_endg, 0, &E))
            cgp_error_exit();
         break;
     }
     e_start=0;
-//    if(num_parts !=1)  
     MPI_Exscan(&e_owned, &e_start, 1, MPI_UINT64_T, MPI_SUM, MPI_COMM_WORLD);
-       
-    e_end=e_start+e_owned;
+    e_start+=1+e_written; // my ranks global element start 1-based
+    e_end=e_start+e_owned-1;  // my ranks global element stop 1-based
     /* write the element connectivity in parallel */
-    if (cgp_elements_write_data(F, B, Z, E, e_start+1, e_end, e))
+    if (cgp_elements_write_data(F, B, Z, E, e_start, e_end, e))
         cgp_error_exit();
+    e_written=e_endg; // update count of elements written
 if(0==1){
     printf("%ld, %ld \n", e_start+1, e_end);
     for (int ne=0; ne<e_owned; ++ne)
@@ -320,7 +317,6 @@ if(0==1){
          e[ne*8+0],e[ne*8+1],e[ne*8+2],e[ne*8+3],
          e[ne*8+4],e[ne*8+5],e[ne*8+6],e[ne*8+7]);
 }
-       
     free(e);   
   }
   for (int i = 0; i < o.blocks.boundary.getSize(); ++i) {
@@ -330,15 +326,31 @@ if(0==1){
     e_owned = params[0];
     int nvert = o.blocks.boundary.keys[i].nBoundaryFaceEdges;
     cgsize_t* e = (cgsize_t *)malloc(nvert * e_owned * sizeof(cgsize_t));
-//    fillBlockKeyParams(params, k);
     getBoundaryConnectivityCGNS(o, i, e);
-//    ph_write_ints(f, phrase.c_str(), &c[0], c.getSize(), 8, params);
+    e_startg=1+e_written; // start for the elements of this topology
+    e_endg=e_written + PCU_Add_Long(e_owned); // end for the elements of this topology
+    switch(nvert){
+      case 3: 
+        if (cgp_section_write(F, B, Z, "Tri", CG_TETRA_4, e_startg, e_endg, 0, &E))
+           cgp_error_exit();
+        break;
+      case 4:
+        if (cgp_section_write(F, B, Z, "Quad", CG_QUAD_4, e_startg, e_endg, 0, &E))
+           cgp_error_exit();
+        break;
+    }
+    e_start=0;
+    MPI_Exscan(&e_owned, &e_start, 1, MPI_UINT64_T, MPI_SUM, MPI_COMM_WORLD);
+    e_start+=1+e_written; // my ranks global element start 1-based
+    e_end=e_start+e_owned-1;  // my ranks global element stop 1-based
+    /* write the element connectivity in parallel */
+    if (cgp_elements_write_data(F, B, Z, E, e_start, e_end, e))
+        cgp_error_exit();
 // this is probably the easiest path to getting the list that tells us the face (through surfID of smd) that each boundary element face is on
     phrase = getBlockKeyPhrase(k, "nbc codes ");
     apf::DynamicArray<int> codes;
     getNaturalBCCodesCGNS(o, i, codes);
     free(e);   
-//    ph_write_ints(f, phrase.c_str(), &codes[0], codes.getSize(), 8, params);
   }
 }
 
diff --git a/pumi-meshes b/pumi-meshes
index c00ba9c16..0cd77590d 160000
--- a/pumi-meshes
+++ b/pumi-meshes
@@ -1 +1 @@
-Subproject commit c00ba9c16cacbb361ee538c03a3ec694ddb989f2
+Subproject commit 0cd77590d748b9cb5e190ecd4a33126d9823bdbb

From d5022925c4b0fbcdf487ebeddfa963091a489494 Mon Sep 17 00:00:00 2001
From: "Kenneth E. Jansen" <Kenneth.Jansen@colorado.edu>
Date: Mon, 7 Aug 2023 21:45:12 -0600
Subject: [PATCH 18/68] srfID extracted and currently being printed to the
 screen.  Output looks reasonable.

---
 phasta/phCGNSgbc.cc | 25 ++++++++++++++-----------
 1 file changed, 14 insertions(+), 11 deletions(-)

diff --git a/phasta/phCGNSgbc.cc b/phasta/phCGNSgbc.cc
index 3749932f7..16a4a480a 100644
--- a/phasta/phCGNSgbc.cc
+++ b/phasta/phCGNSgbc.cc
@@ -253,15 +253,14 @@ void getInterfaceConnectivityCGNS // not extended yet other than transpose
 }
 
 // renamed but not updated yet
-void getNaturalBCCodesCGNS(Output& o, int block, apf::DynamicArray<int>& codes)
+void getNaturalBCCodesCGNS(Output& o, int block, int* codes)
 {
   int nelem = o.blocks.boundary.nElements[block];
-  codes.setSize(nelem * 2); 
   size_t i = 0;
-  for (int j = 0; j < 2; ++j)
-    for (int elem = 0; elem < nelem; ++elem)
-      codes[i++] = o.arrays.ibcb[block][elem][j];
-  PCU_ALWAYS_ASSERT(i == codes.getSize());
+  for (int elem = 0; elem < nelem; ++elem)
+      codes[i++] = o.arrays.ibcb[block][elem][1]; //srfID is the second number so 1  
+// if we wanted we could use PHASTA's bit in coding in the first number to us attributes to set
+// arbitrary combinations of BCs but leaving that out for now
 }
 
 // renamed and calling the renamed functions above with output writes commented as they are PHASTA file style
@@ -289,6 +288,7 @@ void writeBlocksCGNS(int F,int B,int Z, Output& o)
            cgp_error_exit();
         break;
       case 5:
+    free(e);   
         if (cgp_section_write(F, B, Z, "Pyr", CG_PYRA_5, e_startg, e_endg, 0, &E))
            cgp_error_exit();
         break;
@@ -321,7 +321,6 @@ if(0==1){
   }
   for (int i = 0; i < o.blocks.boundary.getSize(); ++i) {
     BlockKey& k = o.blocks.boundary.keys[i];
-    std::string phrase = getBlockKeyPhrase(k, "connectivity boundary ");
     params[0] = o.blocks.boundary.nElements[i];
     e_owned = params[0];
     int nvert = o.blocks.boundary.keys[i].nBoundaryFaceEdges;
@@ -346,11 +345,15 @@ if(0==1){
     /* write the element connectivity in parallel */
     if (cgp_elements_write_data(F, B, Z, E, e_start, e_end, e))
         cgp_error_exit();
-// this is probably the easiest path to getting the list that tells us the face (through surfID of smd) that each boundary element face is on
-    phrase = getBlockKeyPhrase(k, "nbc codes ");
-    apf::DynamicArray<int> codes;
-    getNaturalBCCodesCGNS(o, i, codes);
     free(e);   
+    int* srfID = (int *)malloc(nvert * e_owned * sizeof(int));
+    getNaturalBCCodesCGNS(o, i, srfID);
+    printf("%ld, %ld \n", e_start+1, e_end);
+    for (int ne=0; ne<e_owned; ++ne)
+	printf("%d, %d \n", (ne+1),srfID[ne]);
+//  I am not sure if you want to put the code here to generate the face BC "node" but srfID has
+//  a number from 1 to 6 for the same numbered surfaces as we use in the box
+
   }
 }
 

From 93919ec0fc469470c2d77784c093aa8c0529fadc Mon Sep 17 00:00:00 2001
From: James Wright <james@jameswright.xyz>
Date: Tue, 8 Aug 2023 09:20:04 -0600
Subject: [PATCH 19/68] fix: String constant must be char[], not char*

- In C those are identical to each other. So I'm not sure what C++ is
  doing there
---
 phasta/phCGNSgbc.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/phasta/phCGNSgbc.cc b/phasta/phCGNSgbc.cc
index 16a4a480a..f3e2048f3 100644
--- a/phasta/phCGNSgbc.cc
+++ b/phasta/phCGNSgbc.cc
@@ -376,7 +376,7 @@ void writeCGNS(Output& o, std::string path)
 //  cgp_open('chefOut.cgns', CG_MODE_WRITE, &F);
 //static std::string buildCGNSFileName(std::string timestep_or_dat)
 //  path += buildCGNSFileName(timestep_or_dat);
-  static char *outfile = "chefOut.cgns";
+  static char outfile[] = "chefOut.cgns";
   int  F, B, Z, E, S, Fs, A, Cx, Cy, Cz;
   cgsize_t sizes[3],*e, start, end, ncells;
 //   ^^^^^^  need to be sure this is long since using PCU_Add_Long below even when not needed

From c49e7a5e502f1b081489b0769d320c1690066a85 Mon Sep 17 00:00:00 2001
From: James Wright <james@jameswright.xyz>
Date: Tue, 8 Aug 2023 09:20:57 -0600
Subject: [PATCH 20/68] style: Misc formatting, trailing spaces

---
 phasta/phCGNSgbc.cc | 51 ++++++++++++++++++++++-----------------------
 1 file changed, 25 insertions(+), 26 deletions(-)

diff --git a/phasta/phCGNSgbc.cc b/phasta/phCGNSgbc.cc
index f3e2048f3..3a2053ece 100644
--- a/phasta/phCGNSgbc.cc
+++ b/phasta/phCGNSgbc.cc
@@ -83,7 +83,7 @@ void gen_ncorp(Output& o )
 
 // also get the global number of nodes
 	o.numGlobalNodes=0;
-	for(i=0;i<num_parts;i++) 
+	for(i=0;i<num_parts;i++)
 	   o.numGlobalNodes += owner_counts[i];
 
 #ifdef PRINT_EVERYTHING
@@ -138,6 +138,7 @@ static lcorp_t count_local(int* ilwork, int nlwork,cgsize_t* ncorp_tmp, int num_
 	}
 	return(num_local);
 }
+
 static lcorp_t count_owned(int* ilwork, int nlwork,cgsize_t* ncorp_tmp, int num_nodes)
 {
 	int numtask = ilwork[0];
@@ -227,7 +228,7 @@ void getBoundaryConnectivityCGNS(Output& o, int block, cgsize_t* c)
   size_t i = 0;
   for (int elem = 0; elem < nelem; ++elem)
     for (int vert = 0; vert < nvert; ++vert)
-      c[i++] = o.arrays.ncorp[o.arrays.ienb[block][elem][vert]]; 
+      c[i++] = o.arrays.ncorp[o.arrays.ienb[block][elem][vert]];
   PCU_ALWAYS_ASSERT(i == nelem*nvert);
 }
 
@@ -245,10 +246,10 @@ void getInterfaceConnectivityCGNS // not extended yet other than transpose
   size_t i = 0;
   for (int elem = 0; elem < nelem; ++elem)
     for (int vert = 0; vert < nvert0; ++vert)
-      c[i++] = o.arrays.ncorp[o.arrays.ienif0[block][elem][vert]-1]; 
+      c[i++] = o.arrays.ncorp[o.arrays.ienif0[block][elem][vert]-1];
   for (int elem = 0; elem < nelem; ++elem)
     for (int vert = 0; vert < nvert1; ++vert)
-      c[i++] = o.arrays.ncorp[o.arrays.ienif1[block][elem][vert]-1]; 
+      c[i++] = o.arrays.ncorp[o.arrays.ienif1[block][elem][vert]-1];
   PCU_ALWAYS_ASSERT(i == c.getSize());
 }
 
@@ -258,7 +259,7 @@ void getNaturalBCCodesCGNS(Output& o, int block, int* codes)
   int nelem = o.blocks.boundary.nElements[block];
   size_t i = 0;
   for (int elem = 0; elem < nelem; ++elem)
-      codes[i++] = o.arrays.ibcb[block][elem][1]; //srfID is the second number so 1  
+      codes[i++] = o.arrays.ibcb[block][elem][1]; //srfID is the second number so 1
 // if we wanted we could use PHASTA's bit in coding in the first number to us attributes to set
 // arbitrary combinations of BCs but leaving that out for now
 }
@@ -268,8 +269,8 @@ void writeBlocksCGNS(int F,int B,int Z, Output& o)
 {
   int params[MAX_PARAMS];
   int E;
-  cgsize_t e_owned, e_start,e_end; 
-  cgsize_t e_startg,e_endg; 
+  cgsize_t e_owned, e_start,e_end;
+  cgsize_t e_startg,e_endg;
   cgsize_t e_written=0;
   for (int i = 0; i < o.blocks.interior.getSize(); ++i) {
     BlockKey& k = o.blocks.interior.keys[i];
@@ -283,12 +284,12 @@ void writeBlocksCGNS(int F,int B,int Z, Output& o)
     e_startg=1+e_written; // start for the elements of this topology
     e_endg=e_written + PCU_Add_Long(e_owned); // end for the elements of this topology
     switch(nvert){
-      case 4: 
+      case 4:
         if (cgp_section_write(F, B, Z, "Tet", CG_TETRA_4, e_startg, e_endg, 0, &E))
            cgp_error_exit();
         break;
       case 5:
-    free(e);   
+    free(e);
         if (cgp_section_write(F, B, Z, "Pyr", CG_PYRA_5, e_startg, e_endg, 0, &E))
            cgp_error_exit();
         break;
@@ -296,7 +297,7 @@ void writeBlocksCGNS(int F,int B,int Z, Output& o)
         if (cgp_section_write(F, B, Z, "Wdg", CG_PENTA_6, e_startg, e_endg, 0, &E))
            cgp_error_exit();
         break;
-      case 8: 
+      case 8:
 //        if (cgp_section_write(F, B, Z, "Hex", CG_HEXA_8, 1, o.numGlobalVolumeElements, 0, &E))
         if (cgp_section_write(F, B, Z, "Hex", CG_HEXA_8, e_startg, e_endg, 0, &E))
            cgp_error_exit();
@@ -317,7 +318,7 @@ if(0==1){
          e[ne*8+0],e[ne*8+1],e[ne*8+2],e[ne*8+3],
          e[ne*8+4],e[ne*8+5],e[ne*8+6],e[ne*8+7]);
 }
-    free(e);   
+    free(e);
   }
   for (int i = 0; i < o.blocks.boundary.getSize(); ++i) {
     BlockKey& k = o.blocks.boundary.keys[i];
@@ -329,7 +330,7 @@ if(0==1){
     e_startg=1+e_written; // start for the elements of this topology
     e_endg=e_written + PCU_Add_Long(e_owned); // end for the elements of this topology
     switch(nvert){
-      case 3: 
+      case 3:
         if (cgp_section_write(F, B, Z, "Tri", CG_TETRA_4, e_startg, e_endg, 0, &E))
            cgp_error_exit();
         break;
@@ -345,7 +346,7 @@ if(0==1){
     /* write the element connectivity in parallel */
     if (cgp_elements_write_data(F, B, Z, E, e_start, e_end, e))
         cgp_error_exit();
-    free(e);   
+    free(e);
     int* srfID = (int *)malloc(nvert * e_owned * sizeof(int));
     getNaturalBCCodesCGNS(o, i, srfID);
     printf("%ld, %ld \n", e_start+1, e_end);
@@ -357,8 +358,6 @@ if(0==1){
   }
 }
 
-
-
 // WIP
 void writeCGNS(Output& o, std::string path)
 {
@@ -369,7 +368,7 @@ void writeCGNS(Output& o, std::string path)
 //  if (! timestep)
     timestep_or_dat = "cgns";
 //  else {
-//    tss << timestep;   
+//    tss << timestep;
 //    timestep_or_dat = tss.str();
 //  }
 //  cgp_mpi_comm();
@@ -381,10 +380,10 @@ void writeCGNS(Output& o, std::string path)
   cgsize_t sizes[3],*e, start, end, ncells;
 //   ^^^^^^  need to be sure this is long since using PCU_Add_Long below even when not needed
  // if (!PCU_Comm_Self())
-  
+
 //FAILED    cgp_open('chefO.cgns', CG_MODE_READ, &F);
 //    PetscCheck(F > 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "cg_open(\"%s\",...) did not return a valid file ID", filename);
-     
+
 // copied gen_ncorp from PHASTA to help map on-rank numbering to CGNS/PETSC friendly global numbering
     gen_ncorp( o );
 //  o carries
@@ -395,7 +394,7 @@ void writeCGNS(Output& o, std::string path)
     ncells=m->count(m->getDimension());
     ncells=PCU_Add_Long(ncells);
     o.numGlobalVolumeElements = ncells;
- 
+
     sizes[0]=o.numGlobalNodes;
     sizes[1]=ncells;
     sizes[2]=0;
@@ -413,10 +412,10 @@ void writeCGNS(Output& o, std::string path)
         cgp_error_exit();
 
 
-// condense out vertices owned by another rank in a new array, x, whose slices are ready for CGNS.  Seeing now PETSc CGNS writer did one coordinate at a time which is probably better....feel free to rewrite. 
+// condense out vertices owned by another rank in a new array, x, whose slices are ready for CGNS.  Seeing now PETSc CGNS writer did one coordinate at a time which is probably better....feel free to rewrite.
   int num_nodes=m->count(0);
 //V2
-  cgsize_t gnod; 
+  cgsize_t gnod;
   start=o.local_start_id;
   end=start+o.iownnodes-1;
   double* x = new double[o.iownnodes];
@@ -424,7 +423,7 @@ void writeCGNS(Output& o, std::string path)
     int icount=0;
     for (int inode = 0; inode < num_nodes; ++inode){
       gnod=o.arrays.ncorp[inode];
-      if(gnod >= start && gnod <= end) { // coordinate to write 
+      if(gnod >= start && gnod <= end) { // coordinate to write
          x[icount]= o.arrays.coordinates[j*num_nodes+inode];
          icount++;
       }
@@ -441,12 +440,12 @@ if(0==1) {
 //V1 that KEJ wrote mothballed for V2 that mimics PETSc
 /*
   int icount=0;
-  cgsize_t gnod; 
+  cgsize_t gnod;
   double* x = new double[o.iownnodes * 3];
   for (int inode = 0; inode < num_nodes; ++inode){
     gnod=o.arrays.ncorp[inode];
-    if(gnod >= o.local_start_id && gnod <= o.local_start_id + o.iownnodes -1) { // coordinate to write 
-       for (int j = 0; j < 3; ++j) 
+    if(gnod >= o.local_start_id && gnod <= o.local_start_id + o.iownnodes -1) { // coordinate to write
+       for (int j = 0; j < 3; ++j)
          x[j*o.iownnodes+icount]= o.arrays.coordinates[j*num_nodes+inode];
        icount++;
     }
@@ -454,7 +453,7 @@ if(0==1) {
 */
 
   writeBlocksCGNS(F,B,Z, o);
-  if(cgp_close(F)) cgp_error_exit();    
+  if(cgp_close(F)) cgp_error_exit();
 //  if (!PCU_Comm_Self())
 //    lion_oprint(1,"CGNS file written in %f seconds\n", t1 - t0);
 }

From e54436f1ae7a18b73966eed43869d5e7c6ec5524 Mon Sep 17 00:00:00 2001
From: "Kenneth E. Jansen" <Kenneth.Jansen@colorado.edu>
Date: Wed, 9 Aug 2023 21:47:50 -0600
Subject: [PATCH 21/68] messy debug code that I save to potentially help debug
 later but next commit will clean up

---
 phasta/phCGNSgbc.cc | 149 ++++++++++++++++++++++++++++++++++++++++++--
 phasta/phCook.cc    |   3 +-
 phasta/phOutput.h   |   1 +
 3 files changed, 148 insertions(+), 5 deletions(-)

diff --git a/phasta/phCGNSgbc.cc b/phasta/phCGNSgbc.cc
index 3a2053ece..a4b2462dc 100644
--- a/phasta/phCGNSgbc.cc
+++ b/phasta/phCGNSgbc.cc
@@ -1,4 +1,5 @@
 #include <PCU.h>
+#include "phInput.h"
 #include "phOutput.h"
 #include "phIO.h"
 #include "phiotimer.h"
@@ -123,6 +124,97 @@ void gen_ncorp(Output& o )
 	}
 	//char code[] = "out";
 	//int ione = 1;
+     int rank = PCU_Comm_Self() + 0;
+     for (int ipart=0; ipart<num_parts; ++ipart){
+        if(rank==ipart) { // my turn
+           printf("ncorp %d, %d, %d \n", rank, num_nodes,o.iownnodes);
+           for (int inod=0; inod<num_nodes; ++inod) printf("%ld ", o.arrays.ncorp[inod]);
+           printf(" \n");
+           
+        }
+        PCU_Barrier();
+     }
+
+
+     for (int ipart=0; ipart<num_parts; ++ipart){
+        if(rank==ipart) { // my turn
+           printf("ilwork %d, %d, %d \n", rank, o.nlwork,o.arrays.ilwork[0]);
+           int ist=0;
+           for (int itask=0; itask<o.arrays.ilwork[0]; ++itask) {
+              printf("%d  ",itask);
+              for (int itt=1; itt<5; ++itt)  printf("%d ", o.arrays.ilwork[ist+itt]);
+              printf(" \n");
+              int pnumseg=o.arrays.ilwork[ist+4];
+              for (int is=0; is<pnumseg; ++is) { 
+                 printf("%d, %d, %d \n",is,o.arrays.ilwork[ist+5+2*is],o.arrays.ilwork[ist+6+2*is]);
+              } 
+           }
+        }
+        PCU_Barrier();
+     }
+
+     cgsize_t* ncorp = new cgsize_t[num_nodes];
+
+     if(num_parts > 1) {
+// translating a commuInt out from PHASTA to c
+        int numtask=o.arrays.ilwork[0];
+        int itkbeg = 0; // 0-based arrays 
+        int itag, iacc, iother, numseg, isgbeg;
+        MPI_Datatype sevsegtype[numtask];
+//first do what ctypes does for setup
+//other stuff long int?
+        int maxseg=30; // set to 30,0000 for real problems
+        int isbegin[maxseg];
+        int lenseg[maxseg];
+        int ioffset[maxseg];
+        MPI_Request  req[numtask];
+        MPI_Status stat[numtask];
+        int maxfront=0;
+        int lfront;
+        for (int itask=0; itask<numtask; ++itask) {
+          iacc   = o.arrays.ilwork[itkbeg + 2];
+          numseg = o.arrays.ilwork[itkbeg + 4];
+         // ctypes.f decrements itkbeg+3 by one for rank 0-based.  do that where used below
+          lfront=0;
+//  debug         numseg=1; 
+          for(int is=0; is<numseg; ++is){
+             isbegin[is]= o.arrays.ilwork[itkbeg+3+2*(is+1)] -1 ; // ilwork was created for 1-based
+             lenseg[is]= o.arrays.ilwork[itkbeg+4+2*(is+1)];
+             lfront+=lenseg[is];
+          }
+          maxfront=std::max(maxfront,lfront);
+          for ( int iseg=0; iseg<numseg; ++iseg) ioffset[iseg] = isbegin[iseg] - isbegin[0];
+          MPI_Type_indexed (numseg, lenseg, ioffset,MPI_LONG_LONG_INT, &sevsegtype[itask]);
+          MPI_Type_commit (&sevsegtype[itask]);
+          itkbeg+=4+2*numseg;
+        }
+
+        int m = 0; 
+        itkbeg=0;
+        for (int itask=0; itask<numtask; ++itask) {
+          itag   = o.arrays.ilwork[itkbeg + 1];
+          iacc   = o.arrays.ilwork[itkbeg + 2];
+          iother = o.arrays.ilwork[itkbeg + 3] - 1; // MPI is 0 based but this was prepped wrong
+          numseg = o.arrays.ilwork[itkbeg + 4]; /// not used
+          isgbeg = o.arrays.ilwork[itkbeg + 5] - 1;
+          if (iacc==0){ 
+             MPI_Irecv(&o.arrays.ncorp[isgbeg], 1, sevsegtype[itask],iother, itag, MPI_COMM_WORLD, &req[m]);
+          } else {
+             MPI_Isend(&o.arrays.ncorp[isgbeg], 1, sevsegtype[itask],iother, itag, MPI_COMM_WORLD, &req[m]);
+          }
+          itkbeg+=4+2*numseg;
+          m      = m + 1; 
+        }
+        MPI_Waitall(m, req, stat);
+      }
+     for (int ipart=0; ipart<num_parts; ++ipart){
+        if(rank==ipart) { // my turn
+           for (int inod=0; inod<num_nodes; ++inod) printf("%ld ", o.arrays.ncorp[inod]);
+           printf(" \n");
+           
+        }
+        PCU_Barrier();
+     }
 
 }
 
@@ -272,6 +364,7 @@ void writeBlocksCGNS(int F,int B,int Z, Output& o)
   cgsize_t e_owned, e_start,e_end;
   cgsize_t e_startg,e_endg;
   cgsize_t e_written=0;
+  int rank= PCU_Comm_Self() +1;
   for (int i = 0; i < o.blocks.interior.getSize(); ++i) {
     BlockKey& k = o.blocks.interior.keys[i];
     std::string phrase = getBlockKeyPhrase(k, "connectivity interior ");
@@ -311,6 +404,13 @@ void writeBlocksCGNS(int F,int B,int Z, Output& o)
     if (cgp_elements_write_data(F, B, Z, E, e_start, e_end, e))
         cgp_error_exit();
     e_written=e_endg; // update count of elements written
+
+    printf("interior cnn %d, %ld, %ld \n", rank, e_start, e_end);
+    for (int ne=0; ne<e_owned; ++ne) {
+      printf("%d, %d ", rank,(ne+1));
+      for(int nv=0; nv< nvert; ++nv) printf("%ld ", e[ne*nvert+nv]);
+      printf("\n");
+    }
 if(0==1){
     printf("%ld, %ld \n", e_start+1, e_end);
     for (int ne=0; ne<e_owned; ++ne)
@@ -320,6 +420,7 @@ if(0==1){
 }
     free(e);
   }
+  if(o.writeCGNSFiles > 2) {
   for (int i = 0; i < o.blocks.boundary.getSize(); ++i) {
     BlockKey& k = o.blocks.boundary.keys[i];
     params[0] = o.blocks.boundary.nElements[i];
@@ -346,16 +447,23 @@ if(0==1){
     /* write the element connectivity in parallel */
     if (cgp_elements_write_data(F, B, Z, E, e_start, e_end, e))
         cgp_error_exit();
-    free(e);
+    printf("boundary cnn %d, %ld, %ld \n", rank, e_start, e_end);
+    for (int ne=0; ne<e_owned; ++ne) {
+      printf("%d, %d ", rank,(ne+1));
+      for(int nv=0; nv< nvert; ++nv) printf("%ld ", e[ne*nvert+nv]);
+      printf("\n");
+    }
+    free(e);   
     int* srfID = (int *)malloc(nvert * e_owned * sizeof(int));
     getNaturalBCCodesCGNS(o, i, srfID);
     printf("%ld, %ld \n", e_start+1, e_end);
     for (int ne=0; ne<e_owned; ++ne)
-	printf("%d, %d \n", (ne+1),srfID[ne]);
+	printf("%d, %d, %d \n", rank, (ne+1),srfID[ne]);
 //  I am not sure if you want to put the code here to generate the face BC "node" but srfID has
 //  a number from 1 to 6 for the same numbered surfaces as we use in the box
 
   }
+ }
 }
 
 // WIP
@@ -363,6 +471,7 @@ void writeCGNS(Output& o, std::string path)
 {
   double t0 = PCU_Time();
   apf::Mesh* m = o.mesh;
+  int rank = PCU_Comm_Self() + 0;
 
   std::string timestep_or_dat;
 //  if (! timestep)
@@ -385,6 +494,39 @@ void writeCGNS(Output& o, std::string path)
 //    PetscCheck(F > 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "cg_open(\"%s\",...) did not return a valid file ID", filename);
 
 // copied gen_ncorp from PHASTA to help map on-rank numbering to CGNS/PETSC friendly global numbering
+if(0==1) {
+    int igo=0;
+    double work=9.0e33;
+    while (igo==0) {
+       work=work*0.9999999999;
+       if(work<=1) igo=1;
+    }
+}
+    int num_nodes=m->count(0);
+// debug prints:w
+//     for (int ipart=0; ipart<num_parts; ++ipart){
+////        if(rank==ipart) { // my turn
+           printf("ilwork %d, %d, %d \n", rank, o.nlwork,o.arrays.ilwork[0]);
+           int ist=0;
+           for (int itask=0; itask<o.arrays.ilwork[0]; ++itask) {
+              printf("%d  ",itask);
+              for (int itt=1; itt<5; ++itt)  printf("%d ", o.arrays.ilwork[ist+itt]);
+              printf(" \n");
+              int pnumseg=o.arrays.ilwork[ist+4];
+              for (int is=0; is<pnumseg; ++is) { 
+                 printf("%d, %d, %d \n",is,o.arrays.ilwork[ist+5+2*is],o.arrays.ilwork[ist+6+2*is]);
+              } 
+           }
+//        }
+  //      PCU_Barrier();
+//     }
+    printf("xyz %d, %d \n", rank, num_nodes);
+    for (int inode = 0; inode < num_nodes; ++inode){
+      printf("%d ",inode+1);
+      for (int j=0; j<3; ++j) printf("%f ", o.arrays.coordinates[j*num_nodes+inode]);
+      printf(" \n");
+   }
+
     gen_ncorp( o );
 //  o carries
 //     o.arrays.ncorp[on-rank-node-number(0-based)] => PETSc global node number (1-based)
@@ -413,7 +555,6 @@ void writeCGNS(Output& o, std::string path)
 
 
 // condense out vertices owned by another rank in a new array, x, whose slices are ready for CGNS.  Seeing now PETSc CGNS writer did one coordinate at a time which is probably better....feel free to rewrite.
-  int num_nodes=m->count(0);
 //V2
   cgsize_t gnod;
   start=o.local_start_id;
@@ -451,7 +592,7 @@ if(0==1) {
     }
   }
 */
-
+  if(o.writeCGNSFiles > 1) 
   writeBlocksCGNS(F,B,Z, o);
   if(cgp_close(F)) cgp_error_exit();
 //  if (!PCU_Comm_Self())
diff --git a/phasta/phCook.cc b/phasta/phCook.cc
index 5b67b8405..0c8b5ed6b 100644
--- a/phasta/phCook.cc
+++ b/phasta/phCook.cc
@@ -224,7 +224,8 @@ namespace ph {
       out.openfile_write = fn;
     }
     ph::writeGeomBC(out, subDirPath); //write geombc
-    if ( in.writeCGNSFiles ) 
+    out.writeCGNSFiles=in.writeCGNSFiles;
+    if ( in.writeCGNSFiles > 0 ) 
     ph::writeCGNS(out, subDirPath); //write CGNS
     if(!PCU_Comm_Self())
       ph::writeAuxiliaryFiles(path, in.timeStepNumber);
diff --git a/phasta/phOutput.h b/phasta/phOutput.h
index 1507784cc..d31d7182f 100644
--- a/phasta/phOutput.h
+++ b/phasta/phOutput.h
@@ -170,6 +170,7 @@ struct Output
   cgsize_t numGlobalNodes; 
   cgsize_t numGlobalVolumeElements; 
 #endif
+  int writeCGNSFiles;
   int iownnodes;  /*  how many node this rank owns */
   int nlwork; /* size of arrays.ilwork */
   int nlworkf; /* size of arrays.ilworkf */

From 5457288853b843584463504c4e864aa06e13ce04 Mon Sep 17 00:00:00 2001
From: "Kenneth E. Jansen" <Kenneth.Jansen@colorado.edu>
Date: Wed, 9 Aug 2023 22:47:02 -0600
Subject: [PATCH 22/68] cleaned up but failing AllHex 128 element case.

---
 phasta/phCGNSgbc.cc | 163 +++++---------------------------------------
 1 file changed, 17 insertions(+), 146 deletions(-)

diff --git a/phasta/phCGNSgbc.cc b/phasta/phCGNSgbc.cc
index a4b2462dc..534e4f1c5 100644
--- a/phasta/phCGNSgbc.cc
+++ b/phasta/phCGNSgbc.cc
@@ -1,5 +1,4 @@
 #include <PCU.h>
-#include "phInput.h"
 #include "phOutput.h"
 #include "phIO.h"
 #include "phiotimer.h"
@@ -124,46 +123,23 @@ void gen_ncorp(Output& o )
 	}
 	//char code[] = "out";
 	//int ione = 1;
-     int rank = PCU_Comm_Self() + 0;
-     for (int ipart=0; ipart<num_parts; ++ipart){
-        if(rank==ipart) { // my turn
-           printf("ncorp %d, %d, %d \n", rank, num_nodes,o.iownnodes);
-           for (int inod=0; inod<num_nodes; ++inod) printf("%ld ", o.arrays.ncorp[inod]);
-           printf(" \n");
-           
-        }
-        PCU_Barrier();
-     }
-
-
-     for (int ipart=0; ipart<num_parts; ++ipart){
-        if(rank==ipart) { // my turn
-           printf("ilwork %d, %d, %d \n", rank, o.nlwork,o.arrays.ilwork[0]);
-           int ist=0;
-           for (int itask=0; itask<o.arrays.ilwork[0]; ++itask) {
-              printf("%d  ",itask);
-              for (int itt=1; itt<5; ++itt)  printf("%d ", o.arrays.ilwork[ist+itt]);
-              printf(" \n");
-              int pnumseg=o.arrays.ilwork[ist+4];
-              for (int is=0; is<pnumseg; ++is) { 
-                 printf("%d, %d, %d \n",is,o.arrays.ilwork[ist+5+2*is],o.arrays.ilwork[ist+6+2*is]);
-              } 
-           }
-        }
-        PCU_Barrier();
-     }
-
      cgsize_t* ncorp = new cgsize_t[num_nodes];
 
      if(num_parts > 1) {
 // translating a commuInt out from PHASTA to c
         int numtask=o.arrays.ilwork[0];
-        int itkbeg = 0; // 0-based arrays 
-        int itag, iacc, iother, numseg, isgbeg;
+        int itkbeg=0;
+        int maxseg=1;
+        int numseg;
+        for (int itask=0; itask<numtask; ++itask) {
+          numseg = o.arrays.ilwork[itkbeg + 4];
+          maxseg=std::max(numseg,maxseg);
+          itkbeg+=4+2*numseg;
+        }
+         
+        int itag, iacc, iother, isgbeg;
         MPI_Datatype sevsegtype[numtask];
 //first do what ctypes does for setup
-//other stuff long int?
-        int maxseg=30; // set to 30,0000 for real problems
         int isbegin[maxseg];
         int lenseg[maxseg];
         int ioffset[maxseg];
@@ -175,8 +151,7 @@ void gen_ncorp(Output& o )
           iacc   = o.arrays.ilwork[itkbeg + 2];
           numseg = o.arrays.ilwork[itkbeg + 4];
          // ctypes.f decrements itkbeg+3 by one for rank 0-based.  do that where used below
-          lfront=0;
-//  debug         numseg=1; 
+          lfront=0; 
           for(int is=0; is<numseg; ++is){
              isbegin[is]= o.arrays.ilwork[itkbeg+3+2*(is+1)] -1 ; // ilwork was created for 1-based
              lenseg[is]= o.arrays.ilwork[itkbeg+4+2*(is+1)];
@@ -207,14 +182,6 @@ void gen_ncorp(Output& o )
         }
         MPI_Waitall(m, req, stat);
       }
-     for (int ipart=0; ipart<num_parts; ++ipart){
-        if(rank==ipart) { // my turn
-           for (int inod=0; inod<num_nodes; ++inod) printf("%ld ", o.arrays.ncorp[inod]);
-           printf(" \n");
-           
-        }
-        PCU_Barrier();
-     }
 
 }
 
@@ -288,7 +255,6 @@ static std::string buildCGNSFileName(std::string timestep_or_dat)
 {
   std::stringstream ss;
   int rank = PCU_Comm_Self() + 1;
-//  ss << "geombc." << timestep_or_dat << "." << rank;
   ss << "chefO." << timestep_or_dat;
   return ss.str();
 }
@@ -297,12 +263,11 @@ enum {
   MAX_PARAMS = 12
 };
 
-// renamed, update is only a transpose to match CNGS.  Parallel will require mapping here or later to global numbering
+// update is only a transpose to match CNGS.  
 void getInteriorConnectivityCGNS(Output& o, int block, cgsize_t* c)
 {
   int nelem = o.blocks.interior.nElements[block];
   int nvert = o.blocks.interior.keys[block].nElementVertices;
-//  c.setSize(nelem * nvert);
   size_t i = 0;
   for (int elem = 0; elem < nelem; ++elem)
     for (int vert = 0; vert < nvert; ++vert)
@@ -310,7 +275,7 @@ void getInteriorConnectivityCGNS(Output& o, int block, cgsize_t* c)
   PCU_ALWAYS_ASSERT(i == nelem*nvert);
 }
 
-//renamed, update is both a transpose to match CNGS and reduction to only filling the first number of vertices on the boundary whereas PHAST wanted full volume
+// update is both a transpose to match CNGS and reduction to only filling the first number of vertices on the boundary whereas PHASTA wanted full volume
 void getBoundaryConnectivityCGNS(Output& o, int block, cgsize_t* c)
 {
   int nelem = o.blocks.boundary.nElements[block];
@@ -364,7 +329,6 @@ void writeBlocksCGNS(int F,int B,int Z, Output& o)
   cgsize_t e_owned, e_start,e_end;
   cgsize_t e_startg,e_endg;
   cgsize_t e_written=0;
-  int rank= PCU_Comm_Self() +1;
   for (int i = 0; i < o.blocks.interior.getSize(); ++i) {
     BlockKey& k = o.blocks.interior.keys[i];
     std::string phrase = getBlockKeyPhrase(k, "connectivity interior ");
@@ -391,7 +355,6 @@ void writeBlocksCGNS(int F,int B,int Z, Output& o)
            cgp_error_exit();
         break;
       case 8:
-//        if (cgp_section_write(F, B, Z, "Hex", CG_HEXA_8, 1, o.numGlobalVolumeElements, 0, &E))
         if (cgp_section_write(F, B, Z, "Hex", CG_HEXA_8, e_startg, e_endg, 0, &E))
            cgp_error_exit();
         break;
@@ -404,20 +367,6 @@ void writeBlocksCGNS(int F,int B,int Z, Output& o)
     if (cgp_elements_write_data(F, B, Z, E, e_start, e_end, e))
         cgp_error_exit();
     e_written=e_endg; // update count of elements written
-
-    printf("interior cnn %d, %ld, %ld \n", rank, e_start, e_end);
-    for (int ne=0; ne<e_owned; ++ne) {
-      printf("%d, %d ", rank,(ne+1));
-      for(int nv=0; nv< nvert; ++nv) printf("%ld ", e[ne*nvert+nv]);
-      printf("\n");
-    }
-if(0==1){
-    printf("%ld, %ld \n", e_start+1, e_end);
-    for (int ne=0; ne<e_owned; ++ne)
-	printf("%d, %ld, %ld, %ld, %ld, %ld, %ld, %ld, %ld \n", (ne+1),
-         e[ne*8+0],e[ne*8+1],e[ne*8+2],e[ne*8+3],
-         e[ne*8+4],e[ne*8+5],e[ne*8+6],e[ne*8+7]);
-}
     free(e);
   }
   if(o.writeCGNSFiles > 2) {
@@ -447,18 +396,9 @@ if(0==1){
     /* write the element connectivity in parallel */
     if (cgp_elements_write_data(F, B, Z, E, e_start, e_end, e))
         cgp_error_exit();
-    printf("boundary cnn %d, %ld, %ld \n", rank, e_start, e_end);
-    for (int ne=0; ne<e_owned; ++ne) {
-      printf("%d, %d ", rank,(ne+1));
-      for(int nv=0; nv< nvert; ++nv) printf("%ld ", e[ne*nvert+nv]);
-      printf("\n");
-    }
-    free(e);   
+    free(e);
     int* srfID = (int *)malloc(nvert * e_owned * sizeof(int));
     getNaturalBCCodesCGNS(o, i, srfID);
-    printf("%ld, %ld \n", e_start+1, e_end);
-    for (int ne=0; ne<e_owned; ++ne)
-	printf("%d, %d, %d \n", rank, (ne+1),srfID[ne]);
 //  I am not sure if you want to put the code here to generate the face BC "node" but srfID has
 //  a number from 1 to 6 for the same numbered surfaces as we use in the box
 
@@ -466,7 +406,6 @@ if(0==1){
  }
 }
 
-// WIP
 void writeCGNS(Output& o, std::string path)
 {
   double t0 = PCU_Time();
@@ -474,59 +413,14 @@ void writeCGNS(Output& o, std::string path)
   int rank = PCU_Comm_Self() + 0;
 
   std::string timestep_or_dat;
-//  if (! timestep)
-    timestep_or_dat = "cgns";
-//  else {
-//    tss << timestep;
-//    timestep_or_dat = tss.str();
-//  }
-//  cgp_mpi_comm();
-//  cgp_open('chefOut.cgns', CG_MODE_WRITE, &F);
-//static std::string buildCGNSFileName(std::string timestep_or_dat)
-//  path += buildCGNSFileName(timestep_or_dat);
   static char outfile[] = "chefOut.cgns";
   int  F, B, Z, E, S, Fs, A, Cx, Cy, Cz;
   cgsize_t sizes[3],*e, start, end, ncells;
-//   ^^^^^^  need to be sure this is long since using PCU_Add_Long below even when not needed
- // if (!PCU_Comm_Self())
 
-//FAILED    cgp_open('chefO.cgns', CG_MODE_READ, &F);
-//    PetscCheck(F > 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "cg_open(\"%s\",...) did not return a valid file ID", filename);
-
-// copied gen_ncorp from PHASTA to help map on-rank numbering to CGNS/PETSC friendly global numbering
-if(0==1) {
-    int igo=0;
-    double work=9.0e33;
-    while (igo==0) {
-       work=work*0.9999999999;
-       if(work<=1) igo=1;
-    }
-}
     int num_nodes=m->count(0);
-// debug prints:w
-//     for (int ipart=0; ipart<num_parts; ++ipart){
-////        if(rank==ipart) { // my turn
-           printf("ilwork %d, %d, %d \n", rank, o.nlwork,o.arrays.ilwork[0]);
-           int ist=0;
-           for (int itask=0; itask<o.arrays.ilwork[0]; ++itask) {
-              printf("%d  ",itask);
-              for (int itt=1; itt<5; ++itt)  printf("%d ", o.arrays.ilwork[ist+itt]);
-              printf(" \n");
-              int pnumseg=o.arrays.ilwork[ist+4];
-              for (int is=0; is<pnumseg; ++is) { 
-                 printf("%d, %d, %d \n",is,o.arrays.ilwork[ist+5+2*is],o.arrays.ilwork[ist+6+2*is]);
-              } 
-           }
-//        }
-  //      PCU_Barrier();
-//     }
-    printf("xyz %d, %d \n", rank, num_nodes);
-    for (int inode = 0; inode < num_nodes; ++inode){
-      printf("%d ",inode+1);
-      for (int j=0; j<3; ++j) printf("%f ", o.arrays.coordinates[j*num_nodes+inode]);
-      printf(" \n");
-   }
 
+
+// copied gen_ncorp from PHASTA to help map on-rank numbering to CGNS/PETSC friendly global numbering
     gen_ncorp( o );
 //  o carries
 //     o.arrays.ncorp[on-rank-node-number(0-based)] => PETSc global node number (1-based)
@@ -553,9 +447,7 @@ if(0==1) {
         cgp_coord_write(F, B, Z, CG_RealDouble, "CoordinateZ", &Cz))
         cgp_error_exit();
 
-
 // condense out vertices owned by another rank in a new array, x, whose slices are ready for CGNS.  Seeing now PETSc CGNS writer did one coordinate at a time which is probably better....feel free to rewrite.
-//V2
   cgsize_t gnod;
   start=o.local_start_id;
   end=start+o.iownnodes-1;
@@ -569,33 +461,12 @@ if(0==1) {
          icount++;
       }
     }
-if(0==1) {
-    printf("%ld, %ld \n", start, end);
-    for (int ne=0; ne<num_nodes; ++ne)
-	printf("%d, %f \n", (ne+1), x[ne]);
-}
     if(j==0) if(cgp_coord_write_data(F, B, Z, Cx, &start, &end, x)) cgp_error_exit();
     if(j==1) if(cgp_coord_write_data(F, B, Z, Cy, &start, &end, x)) cgp_error_exit();
     if(j==2) if(cgp_coord_write_data(F, B, Z, Cz, &start, &end, x)) cgp_error_exit();
   }
-//V1 that KEJ wrote mothballed for V2 that mimics PETSc
-/*
-  int icount=0;
-  cgsize_t gnod;
-  double* x = new double[o.iownnodes * 3];
-  for (int inode = 0; inode < num_nodes; ++inode){
-    gnod=o.arrays.ncorp[inode];
-    if(gnod >= o.local_start_id && gnod <= o.local_start_id + o.iownnodes -1) { // coordinate to write
-       for (int j = 0; j < 3; ++j)
-         x[j*o.iownnodes+icount]= o.arrays.coordinates[j*num_nodes+inode];
-       icount++;
-    }
-  }
-*/
   if(o.writeCGNSFiles > 1) 
   writeBlocksCGNS(F,B,Z, o);
   if(cgp_close(F)) cgp_error_exit();
-//  if (!PCU_Comm_Self())
-//    lion_oprint(1,"CGNS file written in %f seconds\n", t1 - t0);
-}
 }
+} // namespace

From d724b5477b3f93ef4e60d0e4f2c4b73fb0bc7a04 Mon Sep 17 00:00:00 2001
From: "Kenneth E. Jansen" <Kenneth.Jansen@colorado.edu>
Date: Wed, 9 Aug 2023 23:01:01 -0600
Subject: [PATCH 23/68] AllHex working for 2 and 4 processes

---
 phasta/phCGNSgbc.cc | 1 +
 1 file changed, 1 insertion(+)

diff --git a/phasta/phCGNSgbc.cc b/phasta/phCGNSgbc.cc
index 534e4f1c5..e6f65a2ee 100644
--- a/phasta/phCGNSgbc.cc
+++ b/phasta/phCGNSgbc.cc
@@ -147,6 +147,7 @@ void gen_ncorp(Output& o )
         MPI_Status stat[numtask];
         int maxfront=0;
         int lfront;
+        itkbeg=0;
         for (int itask=0; itask<numtask; ++itask) {
           iacc   = o.arrays.ilwork[itkbeg + 2];
           numseg = o.arrays.ilwork[itkbeg + 4];

From 6e2c075300cb13754711e338730eb4050948ad15 Mon Sep 17 00:00:00 2001
From: "Kenneth E. Jansen" <Kenneth.Jansen@colorado.edu>
Date: Wed, 9 Aug 2023 23:10:02 -0600
Subject: [PATCH 24/68] spurious paste of a free command from who knows where

---
 phasta/phCGNSgbc.cc | 1 -
 1 file changed, 1 deletion(-)

diff --git a/phasta/phCGNSgbc.cc b/phasta/phCGNSgbc.cc
index e6f65a2ee..c5ee015b0 100644
--- a/phasta/phCGNSgbc.cc
+++ b/phasta/phCGNSgbc.cc
@@ -347,7 +347,6 @@ void writeBlocksCGNS(int F,int B,int Z, Output& o)
            cgp_error_exit();
         break;
       case 5:
-    free(e);
         if (cgp_section_write(F, B, Z, "Pyr", CG_PYRA_5, e_startg, e_endg, 0, &E))
            cgp_error_exit();
         break;

From 08cf07666fd3f03d6ccbac3999ab51957673723c Mon Sep 17 00:00:00 2001
From: "Kenneth E. Jansen" <Kenneth.Jansen@colorado.edu>
Date: Thu, 10 Aug 2023 09:07:09 -0600
Subject: [PATCH 25/68] now writing the rank of writer as a cell centered value
 as a test field

---
 phasta/phCGNSgbc.cc | 19 ++++++++++++++++++-
 1 file changed, 18 insertions(+), 1 deletion(-)

diff --git a/phasta/phCGNSgbc.cc b/phasta/phCGNSgbc.cc
index c5ee015b0..1a7d4975d 100644
--- a/phasta/phCGNSgbc.cc
+++ b/phasta/phCGNSgbc.cc
@@ -326,10 +326,11 @@ void getNaturalBCCodesCGNS(Output& o, int block, int* codes)
 void writeBlocksCGNS(int F,int B,int Z, Output& o)
 {
   int params[MAX_PARAMS];
-  int E;
+  int E,S,Fs;
   cgsize_t e_owned, e_start,e_end;
   cgsize_t e_startg,e_endg;
   cgsize_t e_written=0;
+  int rank = PCU_Comm_Self() ;
   for (int i = 0; i < o.blocks.interior.getSize(); ++i) {
     BlockKey& k = o.blocks.interior.keys[i];
     std::string phrase = getBlockKeyPhrase(k, "connectivity interior ");
@@ -366,8 +367,23 @@ void writeBlocksCGNS(int F,int B,int Z, Output& o)
     /* write the element connectivity in parallel */
     if (cgp_elements_write_data(F, B, Z, E, e_start, e_end, e))
         cgp_error_exit();
+        /* create a centered solution */
+    if (cg_sol_write(F, B, Z, "RankCellOwner", CG_CellCenter, &S) ||
+        cgp_field_write(F, B, Z, S, CG_Integer, "RankOfWriter", &Fs))
+        cgp_error_exit();
+    /* create the field data for this process */
+    int* d = (int *)malloc(e_owned * sizeof(int));
+    for (int n = 0; n < e_owned; n++) 
+            d[n] = rank;
+    /* write the solution field data in parallel */
+// from example    if (cgp_field_write_data(F, B, Z, S, Fs, &start, &end, d))
+    if (cgp_field_write_data(F, B, Z, S, Fs, &e_start, &e_end, d))
+        cgp_error_exit();
+
+
     e_written=e_endg; // update count of elements written
     free(e);
+    free(d);
   }
   if(o.writeCGNSFiles > 2) {
   for (int i = 0; i < o.blocks.boundary.getSize(); ++i) {
@@ -399,6 +415,7 @@ void writeBlocksCGNS(int F,int B,int Z, Output& o)
     free(e);
     int* srfID = (int *)malloc(nvert * e_owned * sizeof(int));
     getNaturalBCCodesCGNS(o, i, srfID);
+
 //  I am not sure if you want to put the code here to generate the face BC "node" but srfID has
 //  a number from 1 to 6 for the same numbered surfaces as we use in the box
 

From ed65be9ea4d6764219493d6fa5512d4dd8b69afe Mon Sep 17 00:00:00 2001
From: James Wright <james@jameswright.xyz>
Date: Thu, 10 Aug 2023 08:48:05 -0600
Subject: [PATCH 26/68] chef: Write ZoneBC/GridLocations for serial box meshes

---
 phasta/phCGNSgbc.cc | 39 ++++++++++++++++++++++++++++++++++++---
 1 file changed, 36 insertions(+), 3 deletions(-)

diff --git a/phasta/phCGNSgbc.cc b/phasta/phCGNSgbc.cc
index 1a7d4975d..e16889cd9 100644
--- a/phasta/phCGNSgbc.cc
+++ b/phasta/phCGNSgbc.cc
@@ -2,6 +2,7 @@
 #include "phOutput.h"
 #include "phIO.h"
 #include "phiotimer.h"
+#include <cstdio>
 #include <sstream>
 #include <pcu_util.h>
 #include <lionPrint.h>
@@ -415,10 +416,42 @@ void writeBlocksCGNS(int F,int B,int Z, Output& o)
     free(e);
     int* srfID = (int *)malloc(nvert * e_owned * sizeof(int));
     getNaturalBCCodesCGNS(o, i, srfID);
+    printf("%ld, %ld \n", e_start+1, e_end);
+
+    int num_ranks;
+    MPI_Comm_size(MPI_COMM_WORLD, &num_ranks);
+    if (num_ranks > 1) {
+      printf("Boundary conditions cannot be written in parallel right now\n");
+    } else {
+      // waaay too large, but works as proof of concept
+      cgsize_t (*bc_elems)[e_owned] = (cgsize_t (*)[e_owned])calloc(6 * e_owned, sizeof(cgsize_t));
+      cgsize_t bc_elems_count[6] = {0};
+      for (int ne=0; ne<e_owned; ++ne) {
+        int BCid = srfID[ne] - 1;
+        bc_elems[BCid][bc_elems_count[BCid]] = ne+1;
+        bc_elems_count[BCid]++;
+      }
+      // for (int BCid = 0; BCid < 6; BCid++) {
+      //   printf("BCid %d, nelems %ld: ", BCid, bc_elems_count[BCid]);
+      //   for (int nelem = 0; nelem < bc_elems_count[BCid]; nelem++){
+      //     printf("%ld, ", bc_elems[BCid][nelem]);
+      //   }
+      //   printf("\n");
+      // }
+
+      int BC_index;
+      for (int BCid = 0; BCid < 6; BCid++) {
+        char BC_name[33];
+        snprintf(BC_name, 33, "SurfID_%d", BCid + 1);
+        // printf("%s\n", BC_name);
+        if(cg_boco_write(F, B, Z, BC_name, CGNS_ENUMV(BCTypeUserDefined), CGNS_ENUMV(PointList), bc_elems_count[BCid], bc_elems[BCid], &BC_index))
+          cg_error_exit();
+        if(cg_goto(F, B, "Zone_t", 1, "ZoneBC_t", 1, "BC_t", BC_index, "end")) cg_error_exit();;
+        if(cg_gridlocation_write(CGNS_ENUMV(FaceCenter))) cg_error_exit();
+      }
 
-//  I am not sure if you want to put the code here to generate the face BC "node" but srfID has
-//  a number from 1 to 6 for the same numbered surfaces as we use in the box
-
+      free(bc_elems);
+    }
   }
  }
 }

From 8d066a5508b045563f06afd392c3d6d2a0677b0a Mon Sep 17 00:00:00 2001
From: "Kenneth E. Jansen" <Kenneth.Jansen@colorado.edu>
Date: Thu, 10 Aug 2023 19:10:43 -0600
Subject: [PATCH 27/68] partition of writer helper arrays added to userData. 
 Also added srfID same shape and numbered as boundary elements as a back door
 to parallel BC data.  Also MPI data types are pulled from a function to
 inherit cgsize compile time flag.

---
 apf/apfCGNS.cc      |   4 +-
 mds/mdsCGNS.cc      |   2 +-
 phasta/phCGNSgbc.cc | 107 ++++++++++++++++++++++++++++++++++++--------
 phasta/phOutput.cc  |   6 ---
 phasta/phOutput.h   |   2 +-
 5 files changed, 94 insertions(+), 27 deletions(-)

diff --git a/apf/apfCGNS.cc b/apf/apfCGNS.cc
index 93a85d12d..da3b2b419 100644
--- a/apf/apfCGNS.cc
+++ b/apf/apfCGNS.cc
@@ -1023,7 +1023,9 @@ void Write2DEdges(CGNS cgns, apf::Mesh *m, const Count &edgeCount, const Count &
 // Todo split this out into a list of calls to local functions to show process/work flow
 void WriteCGNS(const char *prefix, apf::Mesh *m, const apf::CGNSBCMap &cgnsBCMap)
 {
-//  static_assert(std::is_same<cgsize_t, int>::value, "cgsize_t not compiled as int");
+
+  PCU_ALWAYS_ASSERT_VERBOSE(sizeof(cgsize_t) == sizeof(int), "cgsize_t is not size of int");
+
 
   const auto myRank = PCU_Comm_Self();
   const Count vertexCount = count(m, 0);
diff --git a/mds/mdsCGNS.cc b/mds/mdsCGNS.cc
index 2a591fd07..cf0230f3d 100644
--- a/mds/mdsCGNS.cc
+++ b/mds/mdsCGNS.cc
@@ -1051,7 +1051,7 @@ void ReadBCInfo(const int cgid, const int base, const int zone, const int nBocos
 
 apf::Mesh2 *DoIt(gmi_model *g, const std::string &fname, apf::CGNSBCMap &cgnsBCMap, const std::vector<std::pair<std::string, std::string>> &readMeshData)
 {
-//  static_assert(std::is_same<cgsize_t, int>::value, "cgsize_t not compiled as int");
+  PCU_ALWAYS_ASSERT_VERBOSE(sizeof(cgsize_t) == sizeof(int), "cgsize_t is not size of int");
 
   int cgid = -1;
   auto comm = PCU_Get_Comm();
diff --git a/phasta/phCGNSgbc.cc b/phasta/phCGNSgbc.cc
index 1a7d4975d..5468b4439 100644
--- a/phasta/phCGNSgbc.cc
+++ b/phasta/phCGNSgbc.cc
@@ -17,8 +17,29 @@
 typedef int lcorp_t;
 #define NCORP_MPI_T MPI_INTEGER
 
-namespace ph {
+namespace  {
+
+template<class T>
+MPI_Datatype getMpiType(T) {
+  MPI_Datatype mpitype;
+  //determine the type based on what is being sent
+  if( std::is_same<T, double>::value ) {
+    mpitype = MPI_DOUBLE;
+  } else if ( std::is_same<T, int64_t>::value ) {
+    mpitype = MPI_INT64_T;
+  } else if ( std::is_same<T, int32_t>::value ) {
+    mpitype = MPI_INT32_T;
+  } else {
+    assert(false);
+    fprintf(stderr, "Unknown type in %s... exiting\n", __func__);
+    exit(EXIT_FAILURE);
+  }
+  return mpitype;
+}
+
+}
 
+namespace ph {
 
 static lcorp_t count_owned(int* ilwork, int nlwork,cgsize_t* ncorp_tmp, int num_nodes);
 static lcorp_t count_local(int* ilwork, int nlwork,cgsize_t* ncorp_tmp, int num_nodes);
@@ -160,7 +181,8 @@ void gen_ncorp(Output& o )
           }
           maxfront=std::max(maxfront,lfront);
           for ( int iseg=0; iseg<numseg; ++iseg) ioffset[iseg] = isbegin[iseg] - isbegin[0];
-          MPI_Type_indexed (numseg, lenseg, ioffset,MPI_LONG_LONG_INT, &sevsegtype[itask]);
+          auto type = getMpiType( cgsize_t() );
+          MPI_Type_indexed (numseg, lenseg, ioffset,type, &sevsegtype[itask]);
           MPI_Type_commit (&sevsegtype[itask]);
           itkbeg+=4+2*numseg;
         }
@@ -282,7 +304,6 @@ void getBoundaryConnectivityCGNS(Output& o, int block, cgsize_t* c)
   int nelem = o.blocks.boundary.nElements[block];
 // CGNS wants surface elements  int nvert = o.blocks.boundary.keys[block].nElementVertices;
   int nvert = o.blocks.boundary.keys[block].nBoundaryFaceEdges;
-  //c.setSize(nelem * nvert);
   size_t i = 0;
   for (int elem = 0; elem < nelem; ++elem)
     for (int vert = 0; vert < nvert; ++vert)
@@ -304,10 +325,10 @@ void getInterfaceConnectivityCGNS // not extended yet other than transpose
   size_t i = 0;
   for (int elem = 0; elem < nelem; ++elem)
     for (int vert = 0; vert < nvert0; ++vert)
-      c[i++] = o.arrays.ncorp[o.arrays.ienif0[block][elem][vert]-1];
+      c[i++] = o.arrays.ncorp[o.arrays.ienif0[block][elem][vert]];
   for (int elem = 0; elem < nelem; ++elem)
     for (int vert = 0; vert < nvert1; ++vert)
-      c[i++] = o.arrays.ncorp[o.arrays.ienif1[block][elem][vert]-1];
+      c[i++] = o.arrays.ncorp[o.arrays.ienif1[block][elem][vert]];
   PCU_ALWAYS_ASSERT(i == c.getSize());
 }
 
@@ -326,11 +347,13 @@ void getNaturalBCCodesCGNS(Output& o, int block, int* codes)
 void writeBlocksCGNS(int F,int B,int Z, Output& o)
 {
   int params[MAX_PARAMS];
-  int E,S,Fs;
+  int E,S,Fs,Fs2,Fsb,Fsb2;
   cgsize_t e_owned, e_start,e_end;
   cgsize_t e_startg,e_endg;
   cgsize_t e_written=0;
-  int rank = PCU_Comm_Self() ;
+  const int nparts = PCU_Comm_Peers();
+  cgsize_t  num_parts=nparts;
+  cgsize_t rank = PCU_Comm_Self() ;
   for (int i = 0; i < o.blocks.interior.getSize(); ++i) {
     BlockKey& k = o.blocks.interior.keys[i];
     std::string phrase = getBlockKeyPhrase(k, "connectivity interior ");
@@ -361,7 +384,8 @@ void writeBlocksCGNS(int F,int B,int Z, Output& o)
         break;
     }
     e_start=0;
-    MPI_Exscan(&e_owned, &e_start, 1, MPI_UINT64_T, MPI_SUM, MPI_COMM_WORLD);
+    auto type = getMpiType( cgsize_t() );
+    MPI_Exscan(&e_owned, &e_start, 1, type , MPI_SUM, MPI_COMM_WORLD);
     e_start+=1+e_written; // my ranks global element start 1-based
     e_end=e_start+e_owned-1;  // my ranks global element stop 1-based
     /* write the element connectivity in parallel */
@@ -384,8 +408,23 @@ void writeBlocksCGNS(int F,int B,int Z, Output& o)
     e_written=e_endg; // update count of elements written
     free(e);
     free(d);
+
+        /* create Helper array for number of elements on rank */
+     if ( cg_goto(F, B, "Zone_t", 1, NULL) ||
+//done for coords          cg_user_data_write("User Data") ||
+          cg_gorel(F, "User Data", 0, NULL) ||
+         cgp_array_write("nIelOnRank", CG_Integer, 1, &num_parts, &Fs2))
+        cgp_error_exit();
+    /* create the field data for this process */
+    int* nIelVec = (int *)malloc( 1 * sizeof(int));
+    nIelVec[0]=e_owned;
+    rank+=1;
+    printf("Intr %d, %d, %d, %d \n", nIelVec[0],rank,Fs,Fs2);
+    if ( cgp_array_write_data(Fs2, &rank, &rank, nIelVec))
+        cgp_error_exit();
   }
   if(o.writeCGNSFiles > 2) {
+  cgsize_t eVolElm=e_written;
   for (int i = 0; i < o.blocks.boundary.getSize(); ++i) {
     BlockKey& k = o.blocks.boundary.keys[i];
     params[0] = o.blocks.boundary.nElements[i];
@@ -394,7 +433,8 @@ void writeBlocksCGNS(int F,int B,int Z, Output& o)
     cgsize_t* e = (cgsize_t *)malloc(nvert * e_owned * sizeof(cgsize_t));
     getBoundaryConnectivityCGNS(o, i, e);
     e_startg=1+e_written; // start for the elements of this topology
-    e_endg=e_written + PCU_Add_Long(e_owned); // end for the elements of this topology
+    cgsize_t  numBelTP = PCU_Add_Long(e_owned); // number of elements of this topology
+    e_endg=e_written + numBelTP; // end for the elements of this topology
     switch(nvert){
       case 3:
         if (cgp_section_write(F, B, Z, "Tri", CG_TETRA_4, e_startg, e_endg, 0, &E))
@@ -406,32 +446,50 @@ void writeBlocksCGNS(int F,int B,int Z, Output& o)
         break;
     }
     e_start=0;
-    MPI_Exscan(&e_owned, &e_start, 1, MPI_UINT64_T, MPI_SUM, MPI_COMM_WORLD);
+    auto type = getMpiType( cgsize_t() );
+    MPI_Exscan(&e_owned, &e_start, 1, type , MPI_SUM, MPI_COMM_WORLD);
     e_start+=1+e_written; // my ranks global element start 1-based
     e_end=e_start+e_owned-1;  // my ranks global element stop 1-based
     /* write the element connectivity in parallel */
     if (cgp_elements_write_data(F, B, Z, E, e_start, e_end, e))
         cgp_error_exit();
     free(e);
-    int* srfID = (int *)malloc(nvert * e_owned * sizeof(int));
+    int* srfID = (int *)malloc( e_owned * sizeof(int));
+    int* nBelVec = (int *)malloc( 1 * sizeof(int));
     getNaturalBCCodesCGNS(o, i, srfID);
-
-//  I am not sure if you want to put the code here to generate the face BC "node" but srfID has
-//  a number from 1 to 6 for the same numbered surfaces as we use in the box
-
+    printf("%ld ", numBelTP);
+        /* create a centered solution on boundary faces ONLY for srfID */
+    if ( cg_goto(F, B, "Zone_t", 1, NULL) ||
+//done above          cg_user_data_write("User Data") ||
+          cg_gorel(F, "User Data", 0, NULL) ||
+         cgp_array_write("srfID", CG_Integer, 1,&numBelTP, &Fsb) ||
+         cgp_array_write("nBelOnRank", CG_Integer, 1, &num_parts, &Fsb2))
+        cgp_error_exit();
+    /* create the field data for this process */
+    e_start-=eVolElm;
+    e_end-=eVolElm;
+    nBelVec[0]=e_owned;
+    printf("Bndy %ld, %ld %d, %d, %d, %d \n", e_start, e_end, nBelVec[0],rank,Fsb,Fsb2);
+//    for (int ibel=0; ibel<e_owned; ++ibel) printf("%d, %d \n", ibel, srfID[ibel]);
+    if (cgp_array_write_data(Fsb, &e_start, &e_end, srfID) ||
+        cgp_array_write_data(Fsb2, &rank, &rank, nBelVec))
+        cgp_error_exit();
+  }
   }
- }
 }
 
 void writeCGNS(Output& o, std::string path)
 {
   double t0 = PCU_Time();
   apf::Mesh* m = o.mesh;
-  int rank = PCU_Comm_Self() + 0;
+  cgsize_t  rank = PCU_Comm_Self() + 0;
+  int nparts;
+  MPI_Comm_size(MPI_COMM_WORLD, &nparts);
+  cgsize_t  num_parts=nparts;
 
   std::string timestep_or_dat;
   static char outfile[] = "chefOut.cgns";
-  int  F, B, Z, E, S, Fs, A, Cx, Cy, Cz;
+  int  F, B, Z, E, S, Fs, Fs2, A, Cx, Cy, Cz;
   cgsize_t sizes[3],*e, start, end, ncells;
 
     int num_nodes=m->count(0);
@@ -482,6 +540,19 @@ void writeCGNS(Output& o, std::string path)
     if(j==1) if(cgp_coord_write_data(F, B, Z, Cy, &start, &end, x)) cgp_error_exit();
     if(j==2) if(cgp_coord_write_data(F, B, Z, Cz, &start, &end, x)) cgp_error_exit();
   }
+        /* create Helper array for number of elements on rank */
+     if ( cg_goto(F, B, "Zone_t", 1, NULL) ||
+          cg_user_data_write("User Data") ||
+          cg_gorel(F, "User Data", 0, NULL) ||
+         cgp_array_write("nCoordsOnRank", CG_Integer, 1, &num_parts, &Fs2))
+        cgp_error_exit();
+    /* create the field data for this process */
+    int* nCoordVec = (int *)malloc( 1 * sizeof(int));
+    nCoordVec[0]=o.iownnodes;
+    rank+=1;
+    printf("Coor %d, %d, %d, \n", nCoordVec[0],rank,Fs2);
+    if ( cgp_array_write_data(Fs2, &rank, &rank, nCoordVec))
+        cgp_error_exit();
   if(o.writeCGNSFiles > 1) 
   writeBlocksCGNS(F,B,Z, o);
   if(cgp_close(F)) cgp_error_exit();
diff --git a/phasta/phOutput.cc b/phasta/phOutput.cc
index 648a928e4..d4b71028b 100644
--- a/phasta/phOutput.cc
+++ b/phasta/phOutput.cc
@@ -21,12 +21,6 @@
 #include <stdlib.h>
 #include <typeinfo>
 #include <pcu_util.h>
-#ifdef HAVE_CGNS
-//
-#include <cgns_io.h>
-#include <pcgnslib.h>
-//
-#endif
 
 namespace ph {
 
diff --git a/phasta/phOutput.h b/phasta/phOutput.h
index d31d7182f..ad417505a 100644
--- a/phasta/phOutput.h
+++ b/phasta/phOutput.h
@@ -169,9 +169,9 @@ struct Output
   cgsize_t local_start_id; /* this rank's first global node number (1 based) */
   cgsize_t numGlobalNodes; 
   cgsize_t numGlobalVolumeElements; 
+  int iownnodes;  /*  how many node this rank owns */
 #endif
   int writeCGNSFiles;
-  int iownnodes;  /*  how many node this rank owns */
   int nlwork; /* size of arrays.ilwork */
   int nlworkf; /* size of arrays.ilworkf */
   int nlworkl; /* size of arrays.ilworkl */

From 51ee9ea9d403cc4a7d565d7824aa1bba8e1bcc5f Mon Sep 17 00:00:00 2001
From: James Wright <james@jameswright.xyz>
Date: Thu, 10 Aug 2023 21:51:00 -0600
Subject: [PATCH 28/68] fix: Correct boco index offset

 - Previously forgot to include the offset due to volume element
   indexing
---
 phasta/phCGNSgbc.cc | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/phasta/phCGNSgbc.cc b/phasta/phCGNSgbc.cc
index 222f3656c..2051addc9 100644
--- a/phasta/phCGNSgbc.cc
+++ b/phasta/phCGNSgbc.cc
@@ -475,7 +475,6 @@ void writeBlocksCGNS(int F,int B,int Z, Output& o)
     if (cgp_array_write_data(Fsb, &e_start, &e_end, srfID) ||
         cgp_array_write_data(Fsb2, &rank, &rank, nBelVec))
         cgp_error_exit();
-  }
     printf("%ld, %ld \n", e_start+1, e_end);
 
     int num_ranks;
@@ -486,9 +485,9 @@ void writeBlocksCGNS(int F,int B,int Z, Output& o)
       // waaay too large, but works as proof of concept
       cgsize_t (*bc_elems)[e_owned] = (cgsize_t (*)[e_owned])calloc(6 * e_owned, sizeof(cgsize_t));
       cgsize_t bc_elems_count[6] = {0};
-      for (int ne=0; ne<e_owned; ++ne) {
-        int BCid = srfID[ne] - 1;
-        bc_elems[BCid][bc_elems_count[BCid]] = ne+1;
+      for (int elem_id=0; elem_id<e_owned; ++elem_id) {
+        int BCid = srfID[elem_id] - 1;
+        bc_elems[BCid][bc_elems_count[BCid]] = elem_id + eVolElm + 1;
         bc_elems_count[BCid]++;
       }
       // for (int BCid = 0; BCid < 6; BCid++) {
@@ -513,6 +512,7 @@ void writeBlocksCGNS(int F,int B,int Z, Output& o)
       free(bc_elems);
     }
   }
+  }
 }
 
 void writeCGNS(Output& o, std::string path)

From 0d39b03dcc9dc63a94e4647238233693beaf608a Mon Sep 17 00:00:00 2001
From: James Wright <james@jameswright.xyz>
Date: Thu, 10 Aug 2023 21:52:08 -0600
Subject: [PATCH 29/68] style: Fix indent of for loop, remove commented code

---
 phasta/phCGNSgbc.cc | 161 +++++++++++++++++++++-----------------------
 1 file changed, 77 insertions(+), 84 deletions(-)

diff --git a/phasta/phCGNSgbc.cc b/phasta/phCGNSgbc.cc
index 2051addc9..5d6d699b6 100644
--- a/phasta/phCGNSgbc.cc
+++ b/phasta/phCGNSgbc.cc
@@ -425,94 +425,87 @@ void writeBlocksCGNS(int F,int B,int Z, Output& o)
         cgp_error_exit();
   }
   if(o.writeCGNSFiles > 2) {
-  cgsize_t eVolElm=e_written;
-  for (int i = 0; i < o.blocks.boundary.getSize(); ++i) {
-    BlockKey& k = o.blocks.boundary.keys[i];
-    params[0] = o.blocks.boundary.nElements[i];
-    e_owned = params[0];
-    int nvert = o.blocks.boundary.keys[i].nBoundaryFaceEdges;
-    cgsize_t* e = (cgsize_t *)malloc(nvert * e_owned * sizeof(cgsize_t));
-    getBoundaryConnectivityCGNS(o, i, e);
-    e_startg=1+e_written; // start for the elements of this topology
-    cgsize_t  numBelTP = PCU_Add_Long(e_owned); // number of elements of this topology
-    e_endg=e_written + numBelTP; // end for the elements of this topology
-    switch(nvert){
-      case 3:
-        if (cgp_section_write(F, B, Z, "Tri", CG_TETRA_4, e_startg, e_endg, 0, &E))
-           cgp_error_exit();
-        break;
-      case 4:
-        if (cgp_section_write(F, B, Z, "Quad", CG_QUAD_4, e_startg, e_endg, 0, &E))
-           cgp_error_exit();
-        break;
-    }
-    e_start=0;
-    auto type = getMpiType( cgsize_t() );
-    MPI_Exscan(&e_owned, &e_start, 1, type , MPI_SUM, MPI_COMM_WORLD);
-    e_start+=1+e_written; // my ranks global element start 1-based
-    e_end=e_start+e_owned-1;  // my ranks global element stop 1-based
-    /* write the element connectivity in parallel */
-    if (cgp_elements_write_data(F, B, Z, E, e_start, e_end, e))
-        cgp_error_exit();
-    free(e);
-    int* srfID = (int *)malloc( e_owned * sizeof(int));
-    int* nBelVec = (int *)malloc( 1 * sizeof(int));
-    getNaturalBCCodesCGNS(o, i, srfID);
-    printf("%ld ", numBelTP);
-        /* create a centered solution on boundary faces ONLY for srfID */
-    if ( cg_goto(F, B, "Zone_t", 1, NULL) ||
-//done above          cg_user_data_write("User Data") ||
-          cg_gorel(F, "User Data", 0, NULL) ||
-         cgp_array_write("srfID", CG_Integer, 1,&numBelTP, &Fsb) ||
-         cgp_array_write("nBelOnRank", CG_Integer, 1, &num_parts, &Fsb2))
-        cgp_error_exit();
-    /* create the field data for this process */
-    e_start-=eVolElm;
-    e_end-=eVolElm;
-    nBelVec[0]=e_owned;
-    printf("Bndy %ld, %ld %d, %d, %d, %d \n", e_start, e_end, nBelVec[0],rank,Fsb,Fsb2);
-//    for (int ibel=0; ibel<e_owned; ++ibel) printf("%d, %d \n", ibel, srfID[ibel]);
-    if (cgp_array_write_data(Fsb, &e_start, &e_end, srfID) ||
-        cgp_array_write_data(Fsb2, &rank, &rank, nBelVec))
-        cgp_error_exit();
-    printf("%ld, %ld \n", e_start+1, e_end);
-
-    int num_ranks;
-    MPI_Comm_size(MPI_COMM_WORLD, &num_ranks);
-    if (num_ranks > 1) {
-      printf("Boundary conditions cannot be written in parallel right now\n");
-    } else {
-      // waaay too large, but works as proof of concept
-      cgsize_t (*bc_elems)[e_owned] = (cgsize_t (*)[e_owned])calloc(6 * e_owned, sizeof(cgsize_t));
-      cgsize_t bc_elems_count[6] = {0};
-      for (int elem_id=0; elem_id<e_owned; ++elem_id) {
-        int BCid = srfID[elem_id] - 1;
-        bc_elems[BCid][bc_elems_count[BCid]] = elem_id + eVolElm + 1;
-        bc_elems_count[BCid]++;
-      }
-      // for (int BCid = 0; BCid < 6; BCid++) {
-      //   printf("BCid %d, nelems %ld: ", BCid, bc_elems_count[BCid]);
-      //   for (int nelem = 0; nelem < bc_elems_count[BCid]; nelem++){
-      //     printf("%ld, ", bc_elems[BCid][nelem]);
-      //   }
-      //   printf("\n");
-      // }
-
-      int BC_index;
-      for (int BCid = 0; BCid < 6; BCid++) {
-        char BC_name[33];
-        snprintf(BC_name, 33, "SurfID_%d", BCid + 1);
-        // printf("%s\n", BC_name);
-        if(cg_boco_write(F, B, Z, BC_name, CGNS_ENUMV(BCTypeUserDefined), CGNS_ENUMV(PointList), bc_elems_count[BCid], bc_elems[BCid], &BC_index))
-          cg_error_exit();
-        if(cg_goto(F, B, "Zone_t", 1, "ZoneBC_t", 1, "BC_t", BC_index, "end")) cg_error_exit();;
-        if(cg_gridlocation_write(CGNS_ENUMV(FaceCenter))) cg_error_exit();
+    cgsize_t eVolElm=e_written;
+    for (int i = 0; i < o.blocks.boundary.getSize(); ++i) {
+      BlockKey& k = o.blocks.boundary.keys[i];
+      params[0] = o.blocks.boundary.nElements[i];
+      e_owned = params[0];
+      int nvert = o.blocks.boundary.keys[i].nBoundaryFaceEdges;
+      cgsize_t* e = (cgsize_t *)malloc(nvert * e_owned * sizeof(cgsize_t));
+      getBoundaryConnectivityCGNS(o, i, e);
+      e_startg=1+e_written; // start for the elements of this topology
+      cgsize_t  numBelTP = PCU_Add_Long(e_owned); // number of elements of this topology
+      e_endg=e_written + numBelTP; // end for the elements of this topology
+      switch(nvert){
+        case 3:
+          if (cgp_section_write(F, B, Z, "Tri", CG_TETRA_4, e_startg, e_endg, 0, &E))
+            cgp_error_exit();
+          break;
+        case 4:
+          if (cgp_section_write(F, B, Z, "Quad", CG_QUAD_4, e_startg, e_endg, 0, &E))
+            cgp_error_exit();
+          break;
       }
+      e_start=0;
+      auto type = getMpiType( cgsize_t() );
+      MPI_Exscan(&e_owned, &e_start, 1, type , MPI_SUM, MPI_COMM_WORLD);
+      e_start+=1+e_written; // my ranks global element start 1-based
+      e_end=e_start+e_owned-1;  // my ranks global element stop 1-based
+      /* write the element connectivity in parallel */
+      if (cgp_elements_write_data(F, B, Z, E, e_start, e_end, e))
+          cgp_error_exit();
+      free(e);
+      int* srfID = (int *)malloc( e_owned * sizeof(int));
+      int* nBelVec = (int *)malloc( 1 * sizeof(int));
+      getNaturalBCCodesCGNS(o, i, srfID);
+      printf("%ld ", numBelTP);
+          /* create a centered solution on boundary faces ONLY for srfID */
+      if ( cg_goto(F, B, "Zone_t", 1, NULL) ||
+  //done above          cg_user_data_write("User Data") ||
+            cg_gorel(F, "User Data", 0, NULL) ||
+          cgp_array_write("srfID", CG_Integer, 1,&numBelTP, &Fsb) ||
+          cgp_array_write("nBelOnRank", CG_Integer, 1, &num_parts, &Fsb2))
+          cgp_error_exit();
+      /* create the field data for this process */
+      e_start-=eVolElm;
+      e_end-=eVolElm;
+      nBelVec[0]=e_owned;
+      printf("Bndy %ld, %ld %d, %d, %d, %d \n", e_start, e_end, nBelVec[0],rank,Fsb,Fsb2);
+  //    for (int ibel=0; ibel<e_owned; ++ibel) printf("%d, %d \n", ibel, srfID[ibel]);
+      if (cgp_array_write_data(Fsb, &e_start, &e_end, srfID) ||
+          cgp_array_write_data(Fsb2, &rank, &rank, nBelVec))
+          cgp_error_exit();
+      printf("%ld, %ld \n", e_start+1, e_end);
+
+      int num_ranks;
+      MPI_Comm_size(MPI_COMM_WORLD, &num_ranks);
+      if (num_ranks > 1) {
+        printf("Boundary conditions cannot be written in parallel right now\n");
+      } else {
+        // waaay too large, but works as proof of concept
+        cgsize_t (*bc_elems)[e_owned] = (cgsize_t (*)[e_owned])calloc(6 * e_owned, sizeof(cgsize_t));
+        cgsize_t bc_elems_count[6] = {0};
+        for (int elem_id=0; elem_id<e_owned; ++elem_id) {
+          int BCid = srfID[elem_id] - 1;
+          bc_elems[BCid][bc_elems_count[BCid]] = elem_id + eVolElm + 1;
+          bc_elems_count[BCid]++;
+        }
 
-      free(bc_elems);
+        int BC_index;
+        for (int BCid = 0; BCid < 6; BCid++) {
+          char BC_name[33];
+          snprintf(BC_name, 33, "SurfID_%d", BCid + 1);
+          // printf("%s\n", BC_name);
+          if(cg_boco_write(F, B, Z, BC_name, CGNS_ENUMV(BCTypeUserDefined), CGNS_ENUMV(PointList), bc_elems_count[BCid], bc_elems[BCid], &BC_index))
+            cg_error_exit();
+          if(cg_goto(F, B, "Zone_t", 1, "ZoneBC_t", 1, "BC_t", BC_index, "end")) cg_error_exit();;
+          if(cg_gridlocation_write(CGNS_ENUMV(FaceCenter))) cg_error_exit();
+        }
+
+        free(bc_elems);
+      }
     }
   }
-  }
 }
 
 void writeCGNS(Output& o, std::string path)

From 0ed9a51260172535a84eff679866baf6bb48b865 Mon Sep 17 00:00:00 2001
From: "Kenneth E. Jansen" <Kenneth.Jansen@colorado.edu>
Date: Fri, 11 Aug 2023 07:59:17 -0600
Subject: [PATCH 30/68] small cleanup

---
 phasta/phCGNSgbc.cc | 6 +-----
 pumi-meshes         | 2 +-
 2 files changed, 2 insertions(+), 6 deletions(-)

diff --git a/phasta/phCGNSgbc.cc b/phasta/phCGNSgbc.cc
index 5d6d699b6..15e87c47e 100644
--- a/phasta/phCGNSgbc.cc
+++ b/phasta/phCGNSgbc.cc
@@ -412,7 +412,6 @@ void writeBlocksCGNS(int F,int B,int Z, Output& o)
 
         /* create Helper array for number of elements on rank */
      if ( cg_goto(F, B, "Zone_t", 1, NULL) ||
-//done for coords          cg_user_data_write("User Data") ||
           cg_gorel(F, "User Data", 0, NULL) ||
          cgp_array_write("nIelOnRank", CG_Integer, 1, &num_parts, &Fs2))
         cgp_error_exit();
@@ -461,7 +460,6 @@ void writeBlocksCGNS(int F,int B,int Z, Output& o)
       printf("%ld ", numBelTP);
           /* create a centered solution on boundary faces ONLY for srfID */
       if ( cg_goto(F, B, "Zone_t", 1, NULL) ||
-  //done above          cg_user_data_write("User Data") ||
             cg_gorel(F, "User Data", 0, NULL) ||
           cgp_array_write("srfID", CG_Integer, 1,&numBelTP, &Fsb) ||
           cgp_array_write("nBelOnRank", CG_Integer, 1, &num_parts, &Fsb2))
@@ -477,9 +475,7 @@ void writeBlocksCGNS(int F,int B,int Z, Output& o)
           cgp_error_exit();
       printf("%ld, %ld \n", e_start+1, e_end);
 
-      int num_ranks;
-      MPI_Comm_size(MPI_COMM_WORLD, &num_ranks);
-      if (num_ranks > 1) {
+      if (num_parts > 1) {
         printf("Boundary conditions cannot be written in parallel right now\n");
       } else {
         // waaay too large, but works as proof of concept
diff --git a/pumi-meshes b/pumi-meshes
index 0cd77590d..4d07746d7 160000
--- a/pumi-meshes
+++ b/pumi-meshes
@@ -1 +1 @@
-Subproject commit 0cd77590d748b9cb5e190ecd4a33126d9823bdbb
+Subproject commit 4d07746d7e10bbc5a7da992ef2e0a18dd1be55be

From fdefc1469558c78babeca4ac5abdc2eb38c784b6 Mon Sep 17 00:00:00 2001
From: "Kenneth E. Jansen" <Kenneth.Jansen@colorado.edu>
Date: Fri, 11 Aug 2023 08:54:54 -0600
Subject: [PATCH 31/68] srfID moved out of boundary element topology loop and
 dimensioned to the size of the total on-rank boundary elements both for
 writing and for post processing into ZonalBCs

---
 phasta/phCGNSgbc.cc | 88 +++++++++++++++++++++++----------------------
 1 file changed, 45 insertions(+), 43 deletions(-)

diff --git a/phasta/phCGNSgbc.cc b/phasta/phCGNSgbc.cc
index 15e87c47e..4bad94016 100644
--- a/phasta/phCGNSgbc.cc
+++ b/phasta/phCGNSgbc.cc
@@ -425,6 +425,11 @@ void writeBlocksCGNS(int F,int B,int Z, Output& o)
   }
   if(o.writeCGNSFiles > 2) {
     cgsize_t eVolElm=e_written;
+    cgsize_t eBelWritten=0;
+    cgsize_t totOnRankBel=0;
+    for (int i = 0; i < o.blocks.boundary.getSize(); ++i) 
+      totOnRankBel += o.blocks.boundary.nElements[i];
+    int* srfID = (int *)malloc( totOnRankBel * sizeof(int));
     for (int i = 0; i < o.blocks.boundary.getSize(); ++i) {
       BlockKey& k = o.blocks.boundary.keys[i];
       params[0] = o.blocks.boundary.nElements[i];
@@ -454,52 +459,49 @@ void writeBlocksCGNS(int F,int B,int Z, Output& o)
       if (cgp_elements_write_data(F, B, Z, E, e_start, e_end, e))
           cgp_error_exit();
       free(e);
-      int* srfID = (int *)malloc( e_owned * sizeof(int));
-      int* nBelVec = (int *)malloc( 1 * sizeof(int));
-      getNaturalBCCodesCGNS(o, i, srfID);
-      printf("%ld ", numBelTP);
-          /* create a centered solution on boundary faces ONLY for srfID */
-      if ( cg_goto(F, B, "Zone_t", 1, NULL) ||
-            cg_gorel(F, "User Data", 0, NULL) ||
-          cgp_array_write("srfID", CG_Integer, 1,&numBelTP, &Fsb) ||
-          cgp_array_write("nBelOnRank", CG_Integer, 1, &num_parts, &Fsb2))
-          cgp_error_exit();
-      /* create the field data for this process */
-      e_start-=eVolElm;
-      e_end-=eVolElm;
-      nBelVec[0]=e_owned;
-      printf("Bndy %ld, %ld %d, %d, %d, %d \n", e_start, e_end, nBelVec[0],rank,Fsb,Fsb2);
-  //    for (int ibel=0; ibel<e_owned; ++ibel) printf("%d, %d \n", ibel, srfID[ibel]);
-      if (cgp_array_write_data(Fsb, &e_start, &e_end, srfID) ||
-          cgp_array_write_data(Fsb2, &rank, &rank, nBelVec))
-          cgp_error_exit();
-      printf("%ld, %ld \n", e_start+1, e_end);
-
-      if (num_parts > 1) {
-        printf("Boundary conditions cannot be written in parallel right now\n");
-      } else {
-        // waaay too large, but works as proof of concept
-        cgsize_t (*bc_elems)[e_owned] = (cgsize_t (*)[e_owned])calloc(6 * e_owned, sizeof(cgsize_t));
-        cgsize_t bc_elems_count[6] = {0};
-        for (int elem_id=0; elem_id<e_owned; ++elem_id) {
-          int BCid = srfID[elem_id] - 1;
-          bc_elems[BCid][bc_elems_count[BCid]] = elem_id + eVolElm + 1;
-          bc_elems_count[BCid]++;
-        }
+      getNaturalBCCodesCGNS(o, i, &srfID[eBelWritten]);
+      eBelWritten+=e_owned;
+    }
 
-        int BC_index;
-        for (int BCid = 0; BCid < 6; BCid++) {
-          char BC_name[33];
-          snprintf(BC_name, 33, "SurfID_%d", BCid + 1);
-          // printf("%s\n", BC_name);
-          if(cg_boco_write(F, B, Z, BC_name, CGNS_ENUMV(BCTypeUserDefined), CGNS_ENUMV(PointList), bc_elems_count[BCid], bc_elems[BCid], &BC_index))
-            cg_error_exit();
-          if(cg_goto(F, B, "Zone_t", 1, "ZoneBC_t", 1, "BC_t", BC_index, "end")) cg_error_exit();;
-          if(cg_gridlocation_write(CGNS_ENUMV(FaceCenter))) cg_error_exit();
-        }
+    printf("%ld ", totOnRankBel);
+        /* setup User Data for boundary faces */
+    if ( cg_goto(F, B, "Zone_t", 1, NULL) ||
+         cg_gorel(F, "User Data", 0, NULL) ||
+         cgp_array_write("srfID", CG_Integer, 1,&totOnRankBel, &Fsb) ||
+         cgp_array_write("nBelOnRank", CG_Integer, 1, &num_parts, &Fsb2))
+         cgp_error_exit();
+    /* write the user data for this process */
+    e_start=1;
+    e_end=eBelWritten; // user data is ranged differently than field data
+    printf("Bndy %ld, %ld %d, %d, %d, %d \n", e_start, e_end, rank,Fsb,Fsb2);
+    if (cgp_array_write_data(Fsb, &e_start, &e_end, srfID) ||
+        cgp_array_write_data(Fsb2, &rank, &rank, &e_end))
+        cgp_error_exit();
+
+    if (num_parts > 1) {
+      printf("Boundary conditions cannot be written in parallel right now\n");
+    } else {
+      // waaay too large, but works as proof of concept
+      cgsize_t (*bc_elems)[totOnRankBel] = (cgsize_t (*)[totOnRankBel])calloc(6 * totOnRankBel, sizeof(cgsize_t));
+      cgsize_t bc_elems_count[6] = {0};
+      for (int elem_id=0; elem_id<totOnRankBel; ++elem_id) {
+        int BCid = srfID[elem_id] - 1;
+        bc_elems[BCid][bc_elems_count[BCid]] = elem_id + eVolElm + 1;
+        bc_elems_count[BCid]++;
+      }
 
-        free(bc_elems);
+      int BC_index;
+      for (int BCid = 0; BCid < 6; BCid++) {
+        char BC_name[33];
+        snprintf(BC_name, 33, "SurfID_%d", BCid + 1);
+        // printf("%s\n", BC_name);
+        if(cg_boco_write(F, B, Z, BC_name, CGNS_ENUMV(BCTypeUserDefined), CGNS_ENUMV(PointList), bc_elems_count[BCid], bc_elems[BCid], &BC_index))
+          cg_error_exit();
+        if(cg_goto(F, B, "Zone_t", 1, "ZoneBC_t", 1, "BC_t", BC_index, "end")) cg_error_exit();;
+        if(cg_gridlocation_write(CGNS_ENUMV(FaceCenter))) cg_error_exit();
       }
+
+      free(bc_elems);
     }
   }
 }

From ff16778c870c2b962a9752aa3a76acff2ea3fa03 Mon Sep 17 00:00:00 2001
From: "Kenneth E. Jansen" <Kenneth.Jansen@colorado.edu>
Date: Fri, 11 Aug 2023 09:22:30 -0600
Subject: [PATCH 32/68] not so ugly printf statements restored to find issue
 with tri-face bels.

---
 phasta/phCGNSgbc.cc | 66 ++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 63 insertions(+), 3 deletions(-)

diff --git a/phasta/phCGNSgbc.cc b/phasta/phCGNSgbc.cc
index 4bad94016..20b41d4de 100644
--- a/phasta/phCGNSgbc.cc
+++ b/phasta/phCGNSgbc.cc
@@ -206,7 +206,16 @@ void gen_ncorp(Output& o )
         }
         MPI_Waitall(m, req, stat);
       }
-
+if(1==1) {
+     for (int ipart=0; ipart<num_parts; ++ipart){
+        if(part==ipart) { // my turn
+           for (int inod=0; inod<num_nodes; ++inod) printf("%ld ", o.arrays.ncorp[inod]);
+           printf(" \n");
+           
+        }
+        PCU_Barrier();
+     }
+}
 }
 
 static lcorp_t count_local(int* ilwork, int nlwork,cgsize_t* ncorp_tmp, int num_nodes)
@@ -401,10 +410,17 @@ void writeBlocksCGNS(int F,int B,int Z, Output& o)
     for (int n = 0; n < e_owned; n++) 
             d[n] = rank;
     /* write the solution field data in parallel */
-// from example    if (cgp_field_write_data(F, B, Z, S, Fs, &start, &end, d))
     if (cgp_field_write_data(F, B, Z, S, Fs, &e_start, &e_end, d))
         cgp_error_exit();
 
+if(1==1){
+    printf("interior cnn %d, %ld, %ld \n", rank, e_start, e_end);
+    for (int ne=0; ne<e_owned; ++ne) {
+      printf("%d, %d ", rank,(ne+1));
+      for(int nv=0; nv< nvert; ++nv) printf("%ld ", e[ne*nvert+nv]);
+      printf("\n");
+    }
+}
 
     e_written=e_endg; // update count of elements written
     free(e);
@@ -458,6 +474,14 @@ void writeBlocksCGNS(int F,int B,int Z, Output& o)
       /* write the element connectivity in parallel */
       if (cgp_elements_write_data(F, B, Z, E, e_start, e_end, e))
           cgp_error_exit();
+if(1==1){
+    printf("boundary cnn %d, %ld, %ld \n", rank, e_start, e_end);
+    for (int ne=0; ne<e_owned; ++ne) {
+      printf("%d, %d ", rank,(ne+1));
+      for(int nv=0; nv< nvert; ++nv) printf("%ld ", e[ne*nvert+nv]);
+      printf("\n");
+    }
+}
       free(e);
       getNaturalBCCodesCGNS(o, i, &srfID[eBelWritten]);
       eBelWritten+=e_owned;
@@ -478,6 +502,7 @@ void writeBlocksCGNS(int F,int B,int Z, Output& o)
         cgp_array_write_data(Fsb2, &rank, &rank, &e_end))
         cgp_error_exit();
 
+
     if (num_parts > 1) {
       printf("Boundary conditions cannot be written in parallel right now\n");
     } else {
@@ -521,7 +546,37 @@ void writeCGNS(Output& o, std::string path)
   cgsize_t sizes[3],*e, start, end, ncells;
 
     int num_nodes=m->count(0);
-
+// debug prints
+if(0==1){
+    for (int ipart=0; ipart<num_parts; ++ipart){
+        if(rank==ipart) { // my turn
+           printf("ilwork %d, %d, %d \n", rank, o.nlwork,o.arrays.ilwork[0]);
+           int ist=0;
+           for (int itask=0; itask<o.arrays.ilwork[0]; ++itask) {
+              printf("%d  ",itask);
+              for (int itt=1; itt<5; ++itt)  printf("%d ", o.arrays.ilwork[ist+itt]);
+              printf(" \n");
+              int pnumseg=o.arrays.ilwork[ist+4];
+              for (int is=0; is<pnumseg; ++is) { 
+                 printf("%d, %d, %d \n",is,o.arrays.ilwork[ist+5+2*is],o.arrays.ilwork[ist+6+2*is]);
+              } 
+           }
+       }
+       PCU_Barrier();
+     }
+}
+if(1==1){
+  for (int ipart=0; ipart<num_parts; ++ipart){
+        if(rank==ipart) { // my turn    printf("xyz %d, %d \n", rank, num_nodes);
+    for (int inode = 0; inode < num_nodes; ++inode){
+      printf("%d ",inode+1);
+      for (int j=0; j<3; ++j) printf("%f ", o.arrays.coordinates[j*num_nodes+inode]);
+      printf(" \n");
+   }
+       }
+       PCU_Barrier();
+     }
+}
 
 // copied gen_ncorp from PHASTA to help map on-rank numbering to CGNS/PETSC friendly global numbering
     gen_ncorp( o );
@@ -564,6 +619,11 @@ void writeCGNS(Output& o, std::string path)
          icount++;
       }
     }
+if(0==1) {
+    printf("%ld, %ld \n", start, end);
+    for (int ne=0; ne<num_nodes; ++ne)
+	printf("%d, %f \n", (ne+1), x[ne]);
+}
     if(j==0) if(cgp_coord_write_data(F, B, Z, Cx, &start, &end, x)) cgp_error_exit();
     if(j==1) if(cgp_coord_write_data(F, B, Z, Cy, &start, &end, x)) cgp_error_exit();
     if(j==2) if(cgp_coord_write_data(F, B, Z, Cz, &start, &end, x)) cgp_error_exit();

From 83bc5bc3e816ef4b4f3c83b22190b30470507925 Mon Sep 17 00:00:00 2001
From: "Kenneth E. Jansen" <Kenneth.Jansen@colorado.edu>
Date: Fri, 11 Aug 2023 10:39:10 -0600
Subject: [PATCH 33/68] cut and paste error was the tri-face problem...fixed

---
 phasta/phCGNSgbc.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/phasta/phCGNSgbc.cc b/phasta/phCGNSgbc.cc
index 20b41d4de..80078606b 100644
--- a/phasta/phCGNSgbc.cc
+++ b/phasta/phCGNSgbc.cc
@@ -458,7 +458,7 @@ if(1==1){
       e_endg=e_written + numBelTP; // end for the elements of this topology
       switch(nvert){
         case 3:
-          if (cgp_section_write(F, B, Z, "Tri", CG_TETRA_4, e_startg, e_endg, 0, &E))
+          if (cgp_section_write(F, B, Z, "Tri", CG_TRI_3, e_startg, e_endg, 0, &E))
             cgp_error_exit();
           break;
         case 4:

From 1fdd6ed112d2c7b7f6090c3fc70128676ee6bfef Mon Sep 17 00:00:00 2001
From: "Kenneth E. Jansen" <Kenneth.Jansen@colorado.edu>
Date: Fri, 11 Aug 2023 13:35:53 -0600
Subject: [PATCH 34/68] Wedges working.  Tets now positive volume.  All
 boundary element have inward normals from crossing first two edges of the
 face directed along numbering. All but pyramids have been checked.

---
 phasta/phCGNSgbc.cc | 49 ++++++++++++++++++++++++++++++++++++---------
 1 file changed, 39 insertions(+), 10 deletions(-)

diff --git a/phasta/phCGNSgbc.cc b/phasta/phCGNSgbc.cc
index 80078606b..83f1c5272 100644
--- a/phasta/phCGNSgbc.cc
+++ b/phasta/phCGNSgbc.cc
@@ -302,9 +302,18 @@ void getInteriorConnectivityCGNS(Output& o, int block, cgsize_t* c)
   int nelem = o.blocks.interior.nElements[block];
   int nvert = o.blocks.interior.keys[block].nElementVertices;
   size_t i = 0;
-  for (int elem = 0; elem < nelem; ++elem)
-    for (int vert = 0; vert < nvert; ++vert)
-      c[i++] = o.arrays.ncorp[o.arrays.ien[block][elem][vert]]; // input is 0-based,  out is  1-based do drop the +1
+  if(nvert==4) { //prepped for PHASTA's negative volume tets so flip second and third vertex
+    for (int elem = 0; elem < nelem; ++elem){
+        c[i++] = o.arrays.ncorp[o.arrays.ien[block][elem][0]];
+        c[i++] = o.arrays.ncorp[o.arrays.ien[block][elem][2]];
+        c[i++] = o.arrays.ncorp[o.arrays.ien[block][elem][1]];
+        c[i++] = o.arrays.ncorp[o.arrays.ien[block][elem][3]];
+     }
+  } else {
+    for (int elem = 0; elem < nelem; ++elem)
+      for (int vert = 0; vert < nvert; ++vert)
+        c[i++] = o.arrays.ncorp[o.arrays.ien[block][elem][vert]]; // input is 0-based,  out is  1-based do drop the +1
+  }
   PCU_ALWAYS_ASSERT(i == nelem*nvert);
 }
 
@@ -313,11 +322,30 @@ void getBoundaryConnectivityCGNS(Output& o, int block, cgsize_t* c)
 {
   int nelem = o.blocks.boundary.nElements[block];
 // CGNS wants surface elements  int nvert = o.blocks.boundary.keys[block].nElementVertices;
+  int nvertVol = o.blocks.boundary.keys[block].nElementVertices;
   int nvert = o.blocks.boundary.keys[block].nBoundaryFaceEdges;
   size_t i = 0;
-  for (int elem = 0; elem < nelem; ++elem)
-    for (int vert = 0; vert < nvert; ++vert)
-      c[i++] = o.arrays.ncorp[o.arrays.ienb[block][elem][vert]];
+//  int* lnode[4];
+  std::vector<int> lnode={0,1,2,3}; // Standard pattern of first 4 (or 3)
+  // PHASTA's use of volume elements has an lnode array that maps the surface nodes from the volume numbering.  We need it here too
+  //  see hierarchic.f but note that is fortran numbering
+  if(nvertVol==4) lnode={0, 2, 1, -1};             // tet is first three but opposite normal of others to go with neg volume
+//  if(nvertVol==5 && nvert==4) lnode={0, 1, 2, 3};  // pyramid quad is first 4
+  if(nvertVol==5 && nvert==3) lnode={0, 4, 1, -1}; // pyramid tri is a fortran map of 1 5 2 
+  if(nvertVol==6 && nvert==4) lnode={0, 3, 4, 1};  // wedge quad is a fortran map of 1 4 5 2
+//  if(nvertVol==6 && nvert==3) lnode={0, 1, 2, -1}; // wedge tri first three
+//  if(nvertVol==8) lnode={0, 1, 2, 3};              // hex  first 4
+/*  if(nvertVol==4) { //see interior above
+    for (int elem = 0; elem < nelem; ++elem){
+        c[i++] = o.arrays.ncorp[o.arrays.ienb[block][elem][0]];
+        c[i++] = o.arrays.ncorp[o.arrays.ienb[block][elem][2]];
+        c[i++] = o.arrays.ncorp[o.arrays.ienb[block][elem][1]];
+    } 
+  } else { */
+    for (int elem = 0; elem < nelem; ++elem)
+      for (int vert = 0; vert < nvert; ++vert)
+        c[i++] = o.arrays.ncorp[o.arrays.ienb[block][elem][lnode[vert]]];
+//  }
   PCU_ALWAYS_ASSERT(i == nelem*nvert);
 }
 
@@ -441,7 +469,7 @@ if(1==1){
   }
   if(o.writeCGNSFiles > 2) {
     cgsize_t eVolElm=e_written;
-    cgsize_t eBelWritten=0;
+    cgsize_t e_belWritten=0;
     cgsize_t totOnRankBel=0;
     for (int i = 0; i < o.blocks.boundary.getSize(); ++i) 
       totOnRankBel += o.blocks.boundary.nElements[i];
@@ -483,8 +511,9 @@ if(1==1){
     }
 }
       free(e);
-      getNaturalBCCodesCGNS(o, i, &srfID[eBelWritten]);
-      eBelWritten+=e_owned;
+      getNaturalBCCodesCGNS(o, i, &srfID[e_belWritten]);
+      e_written+=e_owned;
+      e_belWritten+=e_owned;
     }
 
     printf("%ld ", totOnRankBel);
@@ -496,7 +525,7 @@ if(1==1){
          cgp_error_exit();
     /* write the user data for this process */
     e_start=1;
-    e_end=eBelWritten; // user data is ranged differently than field data
+    e_end=e_belWritten; // user data is ranged differently than field data
     printf("Bndy %ld, %ld %d, %d, %d, %d \n", e_start, e_end, rank,Fsb,Fsb2);
     if (cgp_array_write_data(Fsb, &e_start, &e_end, srfID) ||
         cgp_array_write_data(Fsb2, &rank, &rank, &e_end))

From cccf8b3a199359c37a771134740c29ef26ebb1d0 Mon Sep 17 00:00:00 2001
From: "Kenneth E. Jansen" <Kenneth.Jansen@colorado.edu>
Date: Fri, 11 Aug 2023 16:04:09 -0600
Subject: [PATCH 35/68] mixed wedge and tet volume elemement meshes pass a
 sniff test of cgnsview.  That required retooling/rearranging some of the CGNS
 write stuff and also required handling mutiple boundary element meshes for
 tris (tris from wedges and tris from tets).  We could consider merging these
 lists but for now they get enumerated to distinguish between them.

---
 phasta/phCGNSgbc.cc | 107 +++++++++++++++++++++++++++++---------------
 pumi-meshes         |   2 +-
 2 files changed, 72 insertions(+), 37 deletions(-)

diff --git a/phasta/phCGNSgbc.cc b/phasta/phCGNSgbc.cc
index 83f1c5272..ac4dff54e 100644
--- a/phasta/phCGNSgbc.cc
+++ b/phasta/phCGNSgbc.cc
@@ -392,6 +392,10 @@ void writeBlocksCGNS(int F,int B,int Z, Output& o)
   const int nparts = PCU_Comm_Peers();
   cgsize_t  num_parts=nparts;
   cgsize_t rank = PCU_Comm_Self() ;
+   /* create a centered solution */
+  if (cg_sol_write(F, B, Z, "RankOfWriter", CG_CellCenter, &S) ||
+      cgp_field_write(F, B, Z, S, CG_Integer, "RankOfWriter", &Fs))
+      cgp_error_exit();
   for (int i = 0; i < o.blocks.interior.getSize(); ++i) {
     BlockKey& k = o.blocks.interior.keys[i];
     std::string phrase = getBlockKeyPhrase(k, "connectivity interior ");
@@ -403,21 +407,27 @@ void writeBlocksCGNS(int F,int B,int Z, Output& o)
     /* create data node for elements */
     e_startg=1+e_written; // start for the elements of this topology
     e_endg=e_written + PCU_Add_Long(e_owned); // end for the elements of this topology
+//    char Ename[33];
+    char Ename[5];
     switch(nvert){
       case 4:
-        if (cgp_section_write(F, B, Z, "Tet", CG_TETRA_4, e_startg, e_endg, 0, &E))
+        snprintf(Ename, 4, "Tet");
+        if (cgp_section_write(F, B, Z, Ename, CG_TETRA_4, e_startg, e_endg, 0, &E))
            cgp_error_exit();
         break;
       case 5:
-        if (cgp_section_write(F, B, Z, "Pyr", CG_PYRA_5, e_startg, e_endg, 0, &E))
+        snprintf(Ename, 4, "Pyr");
+        if (cgp_section_write(F, B, Z, Ename, CG_PYRA_5, e_startg, e_endg, 0, &E))
            cgp_error_exit();
         break;
       case 6:
-        if (cgp_section_write(F, B, Z, "Wdg", CG_PENTA_6, e_startg, e_endg, 0, &E))
+        snprintf(Ename, 4, "Wdg");
+        if (cgp_section_write(F, B, Z, Ename, CG_PENTA_6, e_startg, e_endg, 0, &E))
            cgp_error_exit();
         break;
       case 8:
-        if (cgp_section_write(F, B, Z, "Hex", CG_HEXA_8, e_startg, e_endg, 0, &E))
+        snprintf(Ename, 4, "Hex");
+        if (cgp_section_write(F, B, Z, Ename, CG_HEXA_8, e_startg, e_endg, 0, &E))
            cgp_error_exit();
         break;
     }
@@ -429,17 +439,7 @@ void writeBlocksCGNS(int F,int B,int Z, Output& o)
     /* write the element connectivity in parallel */
     if (cgp_elements_write_data(F, B, Z, E, e_start, e_end, e))
         cgp_error_exit();
-        /* create a centered solution */
-    if (cg_sol_write(F, B, Z, "RankCellOwner", CG_CellCenter, &S) ||
-        cgp_field_write(F, B, Z, S, CG_Integer, "RankOfWriter", &Fs))
-        cgp_error_exit();
-    /* create the field data for this process */
-    int* d = (int *)malloc(e_owned * sizeof(int));
-    for (int n = 0; n < e_owned; n++) 
-            d[n] = rank;
-    /* write the solution field data in parallel */
-    if (cgp_field_write_data(F, B, Z, S, Fs, &e_start, &e_end, d))
-        cgp_error_exit();
+    e_written=e_endg; // update count of elements written
 
 if(1==1){
     printf("interior cnn %d, %ld, %ld \n", rank, e_start, e_end);
@@ -449,28 +449,47 @@ if(1==1){
       printf("\n");
     }
 }
-
-    e_written=e_endg; // update count of elements written
     free(e);
+
+//    /* create the field data for this process */
+    int* d = (int *)malloc(e_owned * sizeof(int));
+    for (int n = 0; n < e_owned; n++) 
+            d[n] = rank;
+//    /* write the solution field data in parallel */
+    if (cgp_field_write_data(F, B, Z, S, Fs, &e_start, &e_end, d))
+        cgp_error_exit();
     free(d);
 
-        /* create Helper array for number of elements on rank */
-     if ( cg_goto(F, B, "Zone_t", 1, NULL) ||
+
+//    char UserDataName[33];
+//    snprintf(UserDataName, 33, "n%sOnRank", Ename);
+    char UserDataName[11];
+        snprintf(UserDataName, 11, "n%sOnRank", Ename);
+        /* create Helper array for number of elements on rank of a given topology */
+    if ( cg_goto(F, B, "Zone_t", 1, NULL) ||
           cg_gorel(F, "User Data", 0, NULL) ||
-         cgp_array_write("nIelOnRank", CG_Integer, 1, &num_parts, &Fs2))
+//         cgp_array_write("nIelOnRank", CG_Integer, 1, &num_parts, &Fs2))
+         cgp_array_write(UserDataName, CG_Integer, 1, &num_parts, &Fs2))
         cgp_error_exit();
     /* create the field data for this process */
-    int* nIelVec = (int *)malloc( 1 * sizeof(int));
-    nIelVec[0]=e_owned;
-    rank+=1;
-    printf("Intr %d, %d, %d, %d \n", nIelVec[0],rank,Fs,Fs2);
-    if ( cgp_array_write_data(Fs2, &rank, &rank, nIelVec))
+//    int* nIelVec = (int *)malloc( 1 * sizeof(int));
+//    nIelVec[0]=e_owned;
+    int nIelVec=e_owned;
+    cgsize_t  rankP1=rank+1;
+    printf("Intr, %s,  %d, %d, %d, %d \n", UserDataName, nIelVec,rank,Fs,Fs2);
+    if ( cgp_array_write_data(Fs2, &rankP1, &rankP1, &nIelVec))
         cgp_error_exit();
-  }
+  } // end of loop over blocks
+
+
+
+
   if(o.writeCGNSFiles > 2) {
     cgsize_t eVolElm=e_written;
     cgsize_t e_belWritten=0;
     cgsize_t totOnRankBel=0;
+    int triCount=0;
+    int quadCount=0;
     for (int i = 0; i < o.blocks.boundary.getSize(); ++i) 
       totOnRankBel += o.blocks.boundary.nElements[i];
     int* srfID = (int *)malloc( totOnRankBel * sizeof(int));
@@ -484,13 +503,19 @@ if(1==1){
       e_startg=1+e_written; // start for the elements of this topology
       cgsize_t  numBelTP = PCU_Add_Long(e_owned); // number of elements of this topology
       e_endg=e_written + numBelTP; // end for the elements of this topology
+      if(nvert==3) triCount++;
+      if(nvert==4) quadCount++;
+      char Ename[7];
+
       switch(nvert){
         case 3:
-          if (cgp_section_write(F, B, Z, "Tri", CG_TRI_3, e_startg, e_endg, 0, &E))
+          snprintf(Ename, 5, "Tri%d",triCount);
+          if (cgp_section_write(F, B, Z, Ename, CG_TRI_3, e_startg, e_endg, 0, &E))
             cgp_error_exit();
           break;
         case 4:
-          if (cgp_section_write(F, B, Z, "Quad", CG_QUAD_4, e_startg, e_endg, 0, &E))
+          snprintf(Ename, 6, "Quad%d",quadCount);
+          if (cgp_section_write(F, B, Z, Ename, CG_QUAD_4, e_startg, e_endg, 0, &E))
             cgp_error_exit();
           break;
       }
@@ -514,24 +539,34 @@ if(1==1){
       getNaturalBCCodesCGNS(o, i, &srfID[e_belWritten]);
       e_written+=e_owned;
       e_belWritten+=e_owned;
-    }
 
+      char UserDataName[12];
+      snprintf(UserDataName, 13, "n%sOnRank", Ename);
+      if ( cg_goto(F, B, "Zone_t", 1, NULL) ||
+           cg_gorel(F, "User Data", 0, NULL) ||
+           cgp_array_write(UserDataName, CG_Integer, 1, &num_parts, &Fsb2))
+           cgp_error_exit();
+      printf("Bndy %s, %ld, %ld %d, %d, %d, %d \n", UserDataName, e_start, e_end, rank,Fsb,Fsb2);
+      cgsize_t rankP1=rank+1;
+      if (cgp_array_write_data(Fsb2, &rankP1, &rankP1, &e_end))
+          cgp_error_exit();
+
+    }
+// srfID is for ALL Boundary faces
     printf("%ld ", totOnRankBel);
-        /* setup User Data for boundary faces */
+    /* setup User Data for boundary faces */
     if ( cg_goto(F, B, "Zone_t", 1, NULL) ||
          cg_gorel(F, "User Data", 0, NULL) ||
-         cgp_array_write("srfID", CG_Integer, 1,&totOnRankBel, &Fsb) ||
-         cgp_array_write("nBelOnRank", CG_Integer, 1, &num_parts, &Fsb2))
+         cgp_array_write("srfID", CG_Integer, 1,&totOnRankBel, &Fsb)) 
          cgp_error_exit();
     /* write the user data for this process */
     e_start=1;
     e_end=e_belWritten; // user data is ranged differently than field data
-    printf("Bndy %ld, %ld %d, %d, %d, %d \n", e_start, e_end, rank,Fsb,Fsb2);
-    if (cgp_array_write_data(Fsb, &e_start, &e_end, srfID) ||
-        cgp_array_write_data(Fsb2, &rank, &rank, &e_end))
+    printf("Bndy %s, %ld, %ld %d, %d, %d, %d \n", "srfID", e_start, e_end, rank,Fsb,Fsb2);
+    cgsize_t rankP1=rank+1;
+    if (cgp_array_write_data(Fsb, &e_start, &e_end, srfID))
         cgp_error_exit();
 
-
     if (num_parts > 1) {
       printf("Boundary conditions cannot be written in parallel right now\n");
     } else {
diff --git a/pumi-meshes b/pumi-meshes
index 4d07746d7..9dd816fea 160000
--- a/pumi-meshes
+++ b/pumi-meshes
@@ -1 +1 @@
-Subproject commit 4d07746d7e10bbc5a7da992ef2e0a18dd1be55be
+Subproject commit 9dd816fea029a235619d6b70cfc9a3c2506cf9f6

From a6fc0371edc5c893ae212111dbc75008167b4aa8 Mon Sep 17 00:00:00 2001
From: "Kenneth E. Jansen" <Kenneth.Jansen@colorado.edu>
Date: Sat, 12 Aug 2023 09:47:35 -0600
Subject: [PATCH 36/68] fixed bug in srfID write for parallel with multiple
 topologies

---
 phasta/phCGNSgbc.cc | 269 ++++++++++++++++++++++----------------------
 phasta/phOutput.h   |   2 -
 pumi-meshes         |   2 +-
 3 files changed, 137 insertions(+), 136 deletions(-)

diff --git a/phasta/phCGNSgbc.cc b/phasta/phCGNSgbc.cc
index ac4dff54e..b3fc826e1 100644
--- a/phasta/phCGNSgbc.cc
+++ b/phasta/phCGNSgbc.cc
@@ -49,22 +49,20 @@ static lcorp_t count_local(int* ilwork, int nlwork,cgsize_t* ncorp_tmp, int num_
 void gen_ncorp(Output& o )
 {
         apf::Mesh* m = o.mesh;
-	int part;
-	int num_parts;
 	int i;
 	lcorp_t nilwork = o.nlwork;
         int num_nodes=m->count(0);
-	o.arrays.ncorp = new cgsize_t[num_nodes];
+        o.arrays.ncorp = (cgsize_t *)malloc(num_nodes * sizeof(cgsize_t));
 	lcorp_t owned;
 	lcorp_t local;
 	lcorp_t* owner_counts;
 	cgsize_t  local_start_id;
 	cgsize_t  gid;
 
-	MPI_Comm_rank(MPI_COMM_WORLD, &part);
-	MPI_Comm_size(MPI_COMM_WORLD, &num_parts);
+        const int num_parts = PCU_Comm_Peers();
+        const int part = PCU_Comm_Self() ;
 
-	memset(o.arrays.ncorp, 0, sizeof(cgsize_t)*(num_nodes));
+        for(int i=0; i < num_nodes; i++) o.arrays.ncorp[i]=0;
 	owned = count_owned(o.arrays.ilwork, nilwork, o.arrays.ncorp, num_nodes);
 	local = count_local(o.arrays.ilwork, nilwork, o.arrays.ncorp, num_nodes);
 	o.iownnodes = owned+local;
@@ -76,7 +74,7 @@ void gen_ncorp(Output& o )
 	assert( owned+local <= num_nodes );
 
 	owner_counts = (lcorp_t*) malloc(sizeof(lcorp_t)*num_parts);
-	memset(owner_counts, 0, sizeof(lcorp_t)*num_parts);
+        for(int i=0; i < num_parts; i++) owner_counts[i]=0;
 	owner_counts[part] = owned+local;
 #ifdef PRINT_EVERYTHING
 	for(i=0;i<num_parts;i++)
@@ -85,8 +83,7 @@ void gen_ncorp(Output& o )
 	}
 	printf("\n");
 #endif
-	MPI_Allgather(MPI_IN_PLACE, 1, NCORP_MPI_T, owner_counts,
-		       	1, NCORP_MPI_T, MPI_COMM_WORLD);
+	MPI_Allgather(MPI_IN_PLACE, 1, NCORP_MPI_T, owner_counts, 1, NCORP_MPI_T, MPI_COMM_WORLD);
 #ifdef PRINT_EVERYTHING
 	for(i=0;i<num_parts;i++)
 	{
@@ -103,11 +100,6 @@ void gen_ncorp(Output& o )
 	local_start_id++; //Fortran numbering
         o.local_start_id = local_start_id;
 
-// also get the global number of nodes
-	o.numGlobalNodes=0;
-	for(i=0;i<num_parts;i++)
-	   o.numGlobalNodes += owner_counts[i];
-
 #ifdef PRINT_EVERYTHING
 	printf("%d: %d\n", part, local_start_id);
 #endif
@@ -145,7 +137,6 @@ void gen_ncorp(Output& o )
 	}
 	//char code[] = "out";
 	//int ione = 1;
-     cgsize_t* ncorp = new cgsize_t[num_nodes];
 
      if(num_parts > 1) {
 // translating a commuInt out from PHASTA to c
@@ -162,11 +153,19 @@ void gen_ncorp(Output& o )
         int itag, iacc, iother, isgbeg;
         MPI_Datatype sevsegtype[numtask];
 //first do what ctypes does for setup
-        int isbegin[maxseg];
-        int lenseg[maxseg];
-        int ioffset[maxseg];
-        MPI_Request  req[numtask];
-        MPI_Status stat[numtask];
+        int* isbegin;
+        int* lenseg;
+        int* ioffset;
+	isbegin = (int*) malloc(sizeof(int) * maxseg);
+	lenseg  = (int*) malloc(sizeof(int) * maxseg);
+	ioffset = (int*) malloc(sizeof(int) * maxseg);
+// no VLA        MPI_Request  req[numtask];
+// no VLA        MPI_Status stat[numtask];
+
+        int maxtask=1000;
+        assert(maxtask>=numtask);
+        MPI_Request  req[maxtask];
+        MPI_Status stat[maxtask];
         int maxfront=0;
         int lfront;
         itkbeg=0;
@@ -187,6 +186,9 @@ void gen_ncorp(Output& o )
           MPI_Type_commit (&sevsegtype[itask]);
           itkbeg+=4+2*numseg;
         }
+        free(isbegin);
+        free(lenseg);
+        free(ioffset);
 
         int m = 0; 
         itkbeg=0;
@@ -206,7 +208,7 @@ void gen_ncorp(Output& o )
         }
         MPI_Waitall(m, req, stat);
       }
-if(1==1) {
+if(1==0) {
      for (int ipart=0; ipart<num_parts; ++ipart){
         if(part==ipart) { // my turn
            for (int inod=0; inod<num_nodes; ++inod) printf("%ld ", o.arrays.ncorp[inod]);
@@ -287,7 +289,6 @@ static lcorp_t count_owned(int* ilwork, int nlwork,cgsize_t* ncorp_tmp, int num_
 static std::string buildCGNSFileName(std::string timestep_or_dat)
 {
   std::stringstream ss;
-  int rank = PCU_Comm_Self() + 1;
   ss << "chefO." << timestep_or_dat;
   return ss.str();
 }
@@ -321,31 +322,18 @@ void getInteriorConnectivityCGNS(Output& o, int block, cgsize_t* c)
 void getBoundaryConnectivityCGNS(Output& o, int block, cgsize_t* c)
 {
   int nelem = o.blocks.boundary.nElements[block];
-// CGNS wants surface elements  int nvert = o.blocks.boundary.keys[block].nElementVertices;
   int nvertVol = o.blocks.boundary.keys[block].nElementVertices;
   int nvert = o.blocks.boundary.keys[block].nBoundaryFaceEdges;
   size_t i = 0;
-//  int* lnode[4];
   std::vector<int> lnode={0,1,2,3}; // Standard pattern of first 4 (or 3)
   // PHASTA's use of volume elements has an lnode array that maps the surface nodes from the volume numbering.  We need it here too
   //  see hierarchic.f but note that is fortran numbering
   if(nvertVol==4) lnode={0, 2, 1, -1};             // tet is first three but opposite normal of others to go with neg volume
-//  if(nvertVol==5 && nvert==4) lnode={0, 1, 2, 3};  // pyramid quad is first 4
   if(nvertVol==5 && nvert==3) lnode={0, 4, 1, -1}; // pyramid tri is a fortran map of 1 5 2 
   if(nvertVol==6 && nvert==4) lnode={0, 3, 4, 1};  // wedge quad is a fortran map of 1 4 5 2
-//  if(nvertVol==6 && nvert==3) lnode={0, 1, 2, -1}; // wedge tri first three
-//  if(nvertVol==8) lnode={0, 1, 2, 3};              // hex  first 4
-/*  if(nvertVol==4) { //see interior above
-    for (int elem = 0; elem < nelem; ++elem){
-        c[i++] = o.arrays.ncorp[o.arrays.ienb[block][elem][0]];
-        c[i++] = o.arrays.ncorp[o.arrays.ienb[block][elem][2]];
-        c[i++] = o.arrays.ncorp[o.arrays.ienb[block][elem][1]];
-    } 
-  } else { */
-    for (int elem = 0; elem < nelem; ++elem)
-      for (int vert = 0; vert < nvert; ++vert)
-        c[i++] = o.arrays.ncorp[o.arrays.ienb[block][elem][lnode[vert]]];
-//  }
+  for (int elem = 0; elem < nelem; ++elem)
+    for (int vert = 0; vert < nvert; ++vert)
+      c[i++] = o.arrays.ncorp[o.arrays.ienb[block][elem][lnode[vert]]];
   PCU_ALWAYS_ASSERT(i == nelem*nvert);
 }
 
@@ -370,7 +358,7 @@ void getInterfaceConnectivityCGNS // not extended yet other than transpose
   PCU_ALWAYS_ASSERT(i == c.getSize());
 }
 
-// renamed but not updated yet
+// renamed stripped down to just give srfID
 void getNaturalBCCodesCGNS(Output& o, int block, int* codes)
 {
   int nelem = o.blocks.boundary.nElements[block];
@@ -381,7 +369,7 @@ void getNaturalBCCodesCGNS(Output& o, int block, int* codes)
 // arbitrary combinations of BCs but leaving that out for now
 }
 
-// renamed and calling the renamed functions above with output writes commented as they are PHASTA file style
+// renamed and calling the renamed functions above with output writes now to CGNS
 void writeBlocksCGNS(int F,int B,int Z, Output& o)
 {
   int params[MAX_PARAMS];
@@ -389,10 +377,11 @@ void writeBlocksCGNS(int F,int B,int Z, Output& o)
   cgsize_t e_owned, e_start,e_end;
   cgsize_t e_startg,e_endg;
   cgsize_t e_written=0;
-  const int nparts = PCU_Comm_Peers();
-  cgsize_t  num_parts=nparts;
-  cgsize_t rank = PCU_Comm_Self() ;
-   /* create a centered solution */
+  const int num_parts = PCU_Comm_Peers();
+  const cgsize_t num_parts_cg=num_parts;
+  const int part = PCU_Comm_Self() ;
+  const cgsize_t part_cg=part;
+  /* create a centered solution */
   if (cg_sol_write(F, B, Z, "RankOfWriter", CG_CellCenter, &S) ||
       cgp_field_write(F, B, Z, S, CG_Integer, "RankOfWriter", &Fs))
       cgp_error_exit();
@@ -406,8 +395,8 @@ void writeBlocksCGNS(int F,int B,int Z, Output& o)
     getInteriorConnectivityCGNS(o, i, e);
     /* create data node for elements */
     e_startg=1+e_written; // start for the elements of this topology
-    e_endg=e_written + PCU_Add_Long(e_owned); // end for the elements of this topology
-//    char Ename[33];
+    long safeArg=e_owned; // e_owned is cgsize_t which could be an 32 or 64 bit int
+    e_endg=e_written + PCU_Add_Long(safeArg); // end for the elements of this topology
     char Ename[5];
     switch(nvert){
       case 4:
@@ -434,17 +423,17 @@ void writeBlocksCGNS(int F,int B,int Z, Output& o)
     e_start=0;
     auto type = getMpiType( cgsize_t() );
     MPI_Exscan(&e_owned, &e_start, 1, type , MPI_SUM, MPI_COMM_WORLD);
-    e_start+=1+e_written; // my ranks global element start 1-based
-    e_end=e_start+e_owned-1;  // my ranks global element stop 1-based
+    e_start+=1+e_written; // my parts global element start 1-based
+    e_end=e_start+e_owned-1;  // my parts global element stop 1-based
     /* write the element connectivity in parallel */
     if (cgp_elements_write_data(F, B, Z, E, e_start, e_end, e))
         cgp_error_exit();
     e_written=e_endg; // update count of elements written
 
-if(1==1){
-    printf("interior cnn %d, %ld, %ld \n", rank, e_start, e_end);
+if(1==0){
+    printf("interior cnn %d, %ld, %ld \n", part, e_start, e_end);
     for (int ne=0; ne<e_owned; ++ne) {
-      printf("%d, %d ", rank,(ne+1));
+      printf("%d, %d ", part,(ne+1));
       for(int nv=0; nv< nvert; ++nv) printf("%ld ", e[ne*nvert+nv]);
       printf("\n");
     }
@@ -454,45 +443,41 @@ if(1==1){
 //    /* create the field data for this process */
     int* d = (int *)malloc(e_owned * sizeof(int));
     for (int n = 0; n < e_owned; n++) 
-            d[n] = rank;
+            d[n] = part;
 //    /* write the solution field data in parallel */
     if (cgp_field_write_data(F, B, Z, S, Fs, &e_start, &e_end, d))
         cgp_error_exit();
     free(d);
 
-
-//    char UserDataName[33];
-//    snprintf(UserDataName, 33, "n%sOnRank", Ename);
     char UserDataName[11];
         snprintf(UserDataName, 11, "n%sOnRank", Ename);
-        /* create Helper array for number of elements on rank of a given topology */
+        /* create Helper array for number of elements on part of a given topology */
     if ( cg_goto(F, B, "Zone_t", 1, NULL) ||
           cg_gorel(F, "User Data", 0, NULL) ||
-//         cgp_array_write("nIelOnRank", CG_Integer, 1, &num_parts, &Fs2))
-         cgp_array_write(UserDataName, CG_Integer, 1, &num_parts, &Fs2))
+         cgp_array_write(UserDataName, CG_Integer, 1, &num_parts_cg, &Fs2))
         cgp_error_exit();
     /* create the field data for this process */
-//    int* nIelVec = (int *)malloc( 1 * sizeof(int));
-//    nIelVec[0]=e_owned;
     int nIelVec=e_owned;
-    cgsize_t  rankP1=rank+1;
-    printf("Intr, %s,  %d, %d, %d, %d \n", UserDataName, nIelVec,rank,Fs,Fs2);
-    if ( cgp_array_write_data(Fs2, &rankP1, &rankP1, &nIelVec))
+    cgsize_t  partP1=part+1;
+    printf("Intr, %s,  %d, %d, %d, %d \n", UserDataName, nIelVec,part,Fs,Fs2);
+    if ( cgp_array_write_data(Fs2, &partP1, &partP1, &nIelVec))
         cgp_error_exit();
   } // end of loop over blocks
 
 
-
-
   if(o.writeCGNSFiles > 2) {
     cgsize_t eVolElm=e_written;
     cgsize_t e_belWritten=0;
     cgsize_t totOnRankBel=0;
     int triCount=0;
     int quadCount=0;
-    for (int i = 0; i < o.blocks.boundary.getSize(); ++i) 
+    int nblkb = o.blocks.boundary.getSize(); 
+    for (int i = 0; i < nblkb; ++i) 
       totOnRankBel += o.blocks.boundary.nElements[i];
     int* srfID = (int *)malloc( totOnRankBel * sizeof(int));
+    int* srfIDidx = (int *)malloc( totOnRankBel * sizeof(int));
+    int* startBelBlk = (int *)malloc( nblkb * sizeof(int));
+    int* endBelBlk = (int *)malloc( nblkb * sizeof(int));
     for (int i = 0; i < o.blocks.boundary.getSize(); ++i) {
       BlockKey& k = o.blocks.boundary.keys[i];
       params[0] = o.blocks.boundary.nElements[i];
@@ -501,7 +486,8 @@ if(1==1){
       cgsize_t* e = (cgsize_t *)malloc(nvert * e_owned * sizeof(cgsize_t));
       getBoundaryConnectivityCGNS(o, i, e);
       e_startg=1+e_written; // start for the elements of this topology
-      cgsize_t  numBelTP = PCU_Add_Long(e_owned); // number of elements of this topology
+      long safeArg=e_owned; // e_owned is cgsize_t which could be an 32 or 64 bit int
+      cgsize_t  numBelTP = PCU_Add_Long(safeArg); // number of elements of this topology
       e_endg=e_written + numBelTP; // end for the elements of this topology
       if(nvert==3) triCount++;
       if(nvert==4) quadCount++;
@@ -522,51 +508,69 @@ if(1==1){
       e_start=0;
       auto type = getMpiType( cgsize_t() );
       MPI_Exscan(&e_owned, &e_start, 1, type , MPI_SUM, MPI_COMM_WORLD);
-      e_start+=1+e_written; // my ranks global element start 1-based
-      e_end=e_start+e_owned-1;  // my ranks global element stop 1-based
+      e_start+=1+e_written; // my parts global element start 1-based
+      e_end=e_start+e_owned-1;  // my parts global element stop 1-based
       /* write the element connectivity in parallel */
       if (cgp_elements_write_data(F, B, Z, E, e_start, e_end, e))
           cgp_error_exit();
-if(1==1){
-    printf("boundary cnn %d, %ld, %ld \n", rank, e_start, e_end);
+      printf("boundary cnn %d, %ld, %ld \n", part, e_start, e_end);
+if(1==0){
     for (int ne=0; ne<e_owned; ++ne) {
-      printf("%d, %d ", rank,(ne+1));
+      printf("%d, %d ", part,(ne+1));
       for(int nv=0; nv< nvert; ++nv) printf("%ld ", e[ne*nvert+nv]);
       printf("\n");
     }
 }
       free(e);
       getNaturalBCCodesCGNS(o, i, &srfID[e_belWritten]);
-      e_written+=e_owned;
-      e_belWritten+=e_owned;
+      for (int j = 0; j < (int) e_owned; ++j) srfIDidx[e_belWritten+j]=e_start+j;
+      startBelBlk[i]=e_start; // provides start point for each block in srfID
+      endBelBlk[i]=e_end; // provides end point for each block in srfID
+      e_written=e_endg;
+      e_belWritten+=e_owned; // this is tracking written by this rank as we unpack srfID later
 
       char UserDataName[12];
       snprintf(UserDataName, 13, "n%sOnRank", Ename);
       if ( cg_goto(F, B, "Zone_t", 1, NULL) ||
            cg_gorel(F, "User Data", 0, NULL) ||
-           cgp_array_write(UserDataName, CG_Integer, 1, &num_parts, &Fsb2))
+           cgp_array_write(UserDataName, CG_Integer, 1, &num_parts_cg, &Fsb2))
            cgp_error_exit();
-      printf("Bndy %s, %ld, %ld %d, %d, %d, %d \n", UserDataName, e_start, e_end, rank,Fsb,Fsb2);
-      cgsize_t rankP1=rank+1;
-      if (cgp_array_write_data(Fsb2, &rankP1, &rankP1, &e_end))
+      printf("Bndy %s, %ld, %d, %d \n", UserDataName, e_owned, part,Fsb2);
+      cgsize_t partP1=part+1;
+      if (cgp_array_write_data(Fsb2, &partP1, &partP1, &e_owned))
           cgp_error_exit();
-
     }
 // srfID is for ALL Boundary faces
-    printf("%ld ", totOnRankBel);
+//    long safeArg=totOnRankBel; // is cgsize_t which could be an 32 or 64 bit int
+//    cgsize_t  totBel = PCU_Add_Long(safeArg); // number of elements of this topology
+    cgsize_t  totBel = e_written-eVolElm;
+    printf("%ld %ld ", totOnRankBel,totBel);
     /* setup User Data for boundary faces */
     if ( cg_goto(F, B, "Zone_t", 1, NULL) ||
          cg_gorel(F, "User Data", 0, NULL) ||
-         cgp_array_write("srfID", CG_Integer, 1,&totOnRankBel, &Fsb)) 
+         cgp_array_write("srfID", CG_Integer, 1,&totBel, &Fsb)) 
          cgp_error_exit();
     /* write the user data for this process */
-    e_start=1;
-    e_end=e_belWritten; // user data is ranged differently than field data
-    printf("Bndy %s, %ld, %ld %d, %d, %d, %d \n", "srfID", e_start, e_end, rank,Fsb,Fsb2);
-    cgsize_t rankP1=rank+1;
-    if (cgp_array_write_data(Fsb, &e_start, &e_end, srfID))
-        cgp_error_exit();
+    e_written=0; //recycling  eVolElm holds 
+    for (int i = 0; i < nblkb; ++i) {
+      int e_startB=startBelBlk[i]-eVolElm; // srfID is only for bel....matches linear order with eVolElm offset from 
+                                       // bel# that starts from last volume element
+//      int e_endB=endBelBlk[i]-eVolElm;
+//      e_owned=e_endB-estartB;
+      e_owned=endBelBlk[i]-startBelBlk[i]+1;
+      e_start=0;
+      auto type = getMpiType( cgsize_t() );
+      MPI_Exscan(&e_owned, &e_start, 1, type , MPI_SUM, MPI_COMM_WORLD);
+      e_start+=1+e_written; // my parts global element start 1-based
+      e_end=e_start+e_owned-1;  // my parts global element stop 1-based
 
+      printf("Bndy %s, %ld, %ld, %ld, %d, %d, %d \n", "srfID", e_start, e_end, e_owned, i, part,Fsb);
+      if (cgp_array_write_data(Fsb, &e_start, &e_end, &srfID[e_startB]))
+        cgp_error_exit();
+      long safeArg=e_owned; // is cgsize_t which could be an 32 or 64 bit int
+      e_written += PCU_Add_Long(safeArg); // number of elements of this topology
+    }
+// ZonalBC data   When made parallel be mindful of srfID being in segments on each rank....NOT globally ordered but srIDidx gives global idx in same order. 
     if (num_parts > 1) {
       printf("Boundary conditions cannot be written in parallel right now\n");
     } else {
@@ -599,22 +603,21 @@ void writeCGNS(Output& o, std::string path)
 {
   double t0 = PCU_Time();
   apf::Mesh* m = o.mesh;
-  cgsize_t  rank = PCU_Comm_Self() + 0;
-  int nparts;
-  MPI_Comm_size(MPI_COMM_WORLD, &nparts);
-  cgsize_t  num_parts=nparts;
+  const int num_parts = PCU_Comm_Peers();
+  const int part = PCU_Comm_Self() ;
+  const cgsize_t  num_parts_cg=num_parts;
 
   std::string timestep_or_dat;
   static char outfile[] = "chefOut.cgns";
   int  F, B, Z, E, S, Fs, Fs2, A, Cx, Cy, Cz;
-  cgsize_t sizes[3],*e, start, end, ncells;
+  cgsize_t sizes[3],*e, start, end;
+
+  int num_nodes=m->count(0);
 
-    int num_nodes=m->count(0);
-// debug prints
-if(0==1){
+if(0==1){  // ilwork debugging
     for (int ipart=0; ipart<num_parts; ++ipart){
-        if(rank==ipart) { // my turn
-           printf("ilwork %d, %d, %d \n", rank, o.nlwork,o.arrays.ilwork[0]);
+        if(part==ipart) { // my turn
+           printf("ilwork %d, %d, %d \n", part, o.nlwork,o.arrays.ilwork[0]);
            int ist=0;
            for (int itask=0; itask<o.arrays.ilwork[0]; ++itask) {
               printf("%d  ",itask);
@@ -629,9 +632,10 @@ if(0==1){
        PCU_Barrier();
      }
 }
-if(1==1){
+if(1==0){
   for (int ipart=0; ipart<num_parts; ++ipart){
-        if(rank==ipart) { // my turn    printf("xyz %d, %d \n", rank, num_nodes);
+    if(part==ipart) { // my turn    
+    printf("xyz %d, %d \n", part, num_nodes);
     for (int inode = 0; inode < num_nodes; ++inode){
       printf("%d ",inode+1);
       for (int j=0; j<3; ++j) printf("%f ", o.arrays.coordinates[j*num_nodes+inode]);
@@ -643,37 +647,36 @@ if(1==1){
 }
 
 // copied gen_ncorp from PHASTA to help map on-rank numbering to CGNS/PETSC friendly global numbering
-    gen_ncorp( o );
+  gen_ncorp( o );
 //  o carries
 //     o.arrays.ncorp[on-rank-node-number(0-based)] => PETSc global node number (1-based)
 //     o.iownnodes => nodes owned by this rank
 //     o.local_start_id => this rank's first node number (1-based and also which must be a long long int)
-//     o.numGlobalNodes
-    ncells=m->count(m->getDimension());
-    ncells=PCU_Add_Long(ncells);
-    o.numGlobalVolumeElements = ncells;
-
-    sizes[0]=o.numGlobalNodes;
-    sizes[1]=ncells;
-    sizes[2]=0;
-    if(cgp_mpi_comm(MPI_COMM_WORLD)) cgp_error_exit;
-    if ( cgp_open(outfile, CG_MODE_WRITE, &F) ||
-        cg_base_write(F, "Base", 3, 3, &B) ||
-        cg_zone_write(F, B, "Zone", sizes, CG_Unstructured, &Z))
-        cgp_error_exit();
+
+  long safeArg=o.iownnodes; // cgsize_t could be an int
+  sizes[0]=PCU_Add_Long(safeArg);
+  int ncells=m->count(m->getDimension()); // this ranks number of elements
+  safeArg=ncells; // cgsize_t could be an int
+  sizes[1]=PCU_Add_Long(safeArg);
+  sizes[2]=0;
+  if(cgp_mpi_comm(MPI_COMM_WORLD)) cgp_error_exit;
+  if ( cgp_open(outfile, CG_MODE_WRITE, &F) ||
+       cg_base_write(F, "Base", 3, 3, &B) ||
+       cg_zone_write(F, B, "Zone", sizes, CG_Unstructured, &Z))
+       cgp_error_exit();
     /* create data nodes for coordinates */
-    cg_set_file_type(CG_FILE_HDF5);
+  cg_set_file_type(CG_FILE_HDF5);
 
-    if (cgp_coord_write(F, B, Z, CG_RealDouble, "CoordinateX", &Cx) ||
-        cgp_coord_write(F, B, Z, CG_RealDouble, "CoordinateY", &Cy) ||
-        cgp_coord_write(F, B, Z, CG_RealDouble, "CoordinateZ", &Cz))
-        cgp_error_exit();
+  if (cgp_coord_write(F, B, Z, CG_RealDouble, "CoordinateX", &Cx) ||
+      cgp_coord_write(F, B, Z, CG_RealDouble, "CoordinateY", &Cy) ||
+      cgp_coord_write(F, B, Z, CG_RealDouble, "CoordinateZ", &Cz))
+      cgp_error_exit();
 
-// condense out vertices owned by another rank in a new array, x, whose slices are ready for CGNS.  Seeing now PETSc CGNS writer did one coordinate at a time which is probably better....feel free to rewrite.
+// condense out vertices owned by another rank in a new array, x, whose slices are ready for CGNS.
   cgsize_t gnod;
   start=o.local_start_id;
   end=start+o.iownnodes-1;
-  double* x = new double[o.iownnodes];
+  double* x = (double *)malloc(o.iownnodes * sizeof(double));
   for (int j = 0; j < 3; ++j) {
     int icount=0;
     for (int inode = 0; inode < num_nodes; ++inode){
@@ -692,19 +695,19 @@ if(0==1) {
     if(j==1) if(cgp_coord_write_data(F, B, Z, Cy, &start, &end, x)) cgp_error_exit();
     if(j==2) if(cgp_coord_write_data(F, B, Z, Cz, &start, &end, x)) cgp_error_exit();
   }
-        /* create Helper array for number of elements on rank */
-     if ( cg_goto(F, B, "Zone_t", 1, NULL) ||
-          cg_user_data_write("User Data") ||
-          cg_gorel(F, "User Data", 0, NULL) ||
-         cgp_array_write("nCoordsOnRank", CG_Integer, 1, &num_parts, &Fs2))
-        cgp_error_exit();
-    /* create the field data for this process */
-    int* nCoordVec = (int *)malloc( 1 * sizeof(int));
-    nCoordVec[0]=o.iownnodes;
-    rank+=1;
-    printf("Coor %d, %d, %d, \n", nCoordVec[0],rank,Fs2);
-    if ( cgp_array_write_data(Fs2, &rank, &rank, nCoordVec))
-        cgp_error_exit();
+  free (x);
+  /* create Helper array for number of elements on rank */
+  if ( cg_goto(F, B, "Zone_t", 1, NULL) ||
+       cg_user_data_write("User Data") ||
+       cg_gorel(F, "User Data", 0, NULL) ||
+       cgp_array_write("nCoordsOnRank", CG_Integer, 1, &num_parts_cg, &Fs2))
+       cgp_error_exit();
+  /* create the field data for this process */
+  int nCoordVec=o.iownnodes;
+  cgsize_t partP1=part+1;
+  printf("Coor %d, %d, %d, \n", nCoordVec,part,Fs2);
+  if ( cgp_array_write_data(Fs2, &partP1, &partP1, &nCoordVec))
+       cgp_error_exit();
   if(o.writeCGNSFiles > 1) 
   writeBlocksCGNS(F,B,Z, o);
   if(cgp_close(F)) cgp_error_exit();
diff --git a/phasta/phOutput.h b/phasta/phOutput.h
index ad417505a..0e3c85351 100644
--- a/phasta/phOutput.h
+++ b/phasta/phOutput.h
@@ -167,8 +167,6 @@ struct Output
   int nOverlapEdges;
 #ifdef HAVE_CGNS
   cgsize_t local_start_id; /* this rank's first global node number (1 based) */
-  cgsize_t numGlobalNodes; 
-  cgsize_t numGlobalVolumeElements; 
   int iownnodes;  /*  how many node this rank owns */
 #endif
   int writeCGNSFiles;
diff --git a/pumi-meshes b/pumi-meshes
index 9dd816fea..fecc2dae4 160000
--- a/pumi-meshes
+++ b/pumi-meshes
@@ -1 +1 @@
-Subproject commit 9dd816fea029a235619d6b70cfc9a3c2506cf9f6
+Subproject commit fecc2dae4d3e5a288022fd10ddf78fa60ba05e86

From f68aa5f9e7fb283d632bb2e6df069349e9c0eb83 Mon Sep 17 00:00:00 2001
From: "Kenneth E. Jansen" <Kenneth.Jansen@colorado.edu>
Date: Sat, 12 Aug 2023 13:18:58 -0600
Subject: [PATCH 37/68] parallel BC write almost working

---
 phasta/phCGNSgbc.cc | 114 +++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 112 insertions(+), 2 deletions(-)

diff --git a/phasta/phCGNSgbc.cc b/phasta/phCGNSgbc.cc
index b3fc826e1..956aaae77 100644
--- a/phasta/phCGNSgbc.cc
+++ b/phasta/phCGNSgbc.cc
@@ -40,6 +40,38 @@ MPI_Datatype getMpiType(T) {
 
 }
 
+// https://www.geeksforgeeks.org/sorting-array-according-another-array-using-pair-stl/
+// Sort an array according to
+// other using pair in STL.
+#include <bits/stdc++.h>
+using namespace std;
+ 
+// Function to sort character array b[]
+// according to the order defined by a[]
+void pairsort(int a[], int b[], int n)
+{
+    pair<int, char> pairt[n];
+ 
+    // Storing the respective array
+    // elements in pairs.
+    for (int i = 0; i < n; i++)
+    {
+        pairt[i].first = a[i];
+        pairt[i].second = b[i];
+    }
+ 
+    // Sorting the pair array.
+    sort(pairt, pairt + n);
+     
+    // Modifying original arrays
+    for (int i = 0; i < n; i++)
+    {
+        a[i] = pairt[i].first;
+        b[i] = pairt[i].second;
+    }
+}
+
+
 namespace ph {
 
 static lcorp_t count_owned(int* ilwork, int nlwork,cgsize_t* ncorp_tmp, int num_nodes);
@@ -468,7 +500,8 @@ if(1==0){
   if(o.writeCGNSFiles > 2) {
     cgsize_t eVolElm=e_written;
     cgsize_t e_belWritten=0;
-    cgsize_t totOnRankBel=0;
+//    cgsize_t totOnRankBel=0;
+    int totOnRankBel=0;
     int triCount=0;
     int quadCount=0;
     int nblkb = o.blocks.boundary.getSize(); 
@@ -544,7 +577,8 @@ if(1==0){
 //    long safeArg=totOnRankBel; // is cgsize_t which could be an 32 or 64 bit int
 //    cgsize_t  totBel = PCU_Add_Long(safeArg); // number of elements of this topology
     cgsize_t  totBel = e_written-eVolElm;
-    printf("%ld %ld ", totOnRankBel,totBel);
+//    printf("%ld %ld ", totOnRankBel,totBel);
+    printf("%d %ld ", totOnRankBel,totBel);
     /* setup User Data for boundary faces */
     if ( cg_goto(F, B, "Zone_t", 1, NULL) ||
          cg_gorel(F, "User Data", 0, NULL) ||
@@ -571,6 +605,82 @@ if(1==0){
       e_written += PCU_Add_Long(safeArg); // number of elements of this topology
     }
 // ZonalBC data   When made parallel be mindful of srfID being in segments on each rank....NOT globally ordered but srIDidx gives global idx in same order. 
+    int* srfIDG = (int *)malloc( totBel * sizeof(int));
+    int* srfIDGidx = (int *)malloc( totBel * sizeof(int));
+    int* rcounts = (int *)malloc( num_parts * sizeof(int));
+    int* displs = (int *)malloc( num_parts * sizeof(int));
+    auto type_cg = getMpiType( cgsize_t() );
+    auto type_i = getMpiType( int() );
+    MPI_Gather(&totOnRankBel,1,type_i,rcounts,1,type_i,0,MPI_COMM_WORLD);
+    displs[0]=0;
+    if(part==0){ 
+       for (int i = 1; i < num_parts; ++i) displs[i]=displs[i-1]+rcounts[i-1]; 
+if(1==1){
+      for(int ip=0; ip< num_parts; ++ip)  printf("%ld ", rcounts[ip]);
+      printf("\n");
+      for(int ip=0; ip< num_parts; ++ip) printf("%ld ", displs[ip]);
+      printf("\n");
+}
+    }   
+   MPI_Gatherv(srfID,totOnRankBel,type_i,srfIDG,rcounts,displs,type_i,0,MPI_COMM_WORLD);
+   MPI_Gatherv(srfIDidx,totOnRankBel,type_i,srfIDGidx,rcounts,displs,type_i,0,MPI_COMM_WORLD);
+if(1==1){
+      if(part==0) {
+         printf(" srfID GLOBAL    ");
+         for(int is=0; is< totBel; ++is)  printf("%d ", srfIDG[is]);
+         printf("\n");
+         printf(" srfIDidx GLOBAL ");
+         for(int is=0; is< totBel; ++is)  printf("%d ", srfIDGidx[is]);
+         printf("\n");
+      }
+      printf("rank %d ",part);
+      printf(" srfID on Part ");
+      for(int is=0; is< totOnRankBel; ++is)  printf("%d ", srfID[is]);
+      printf("\n");
+      printf(" srfIDidx on Part ");
+      for(int is=0; is< totOnRankBel; ++is)  printf("%d ", srfIDidx[is]);
+      printf("\n");
+}
+    if(part==0) pairsort(srfIDG,srfIDGidx,totBel);
+if(1==1){
+      if(part==0) {
+         printf(" srfID GLOBAL    ");
+         for(int is=0; is< totBel; ++is)  printf("%d ", srfIDG[is]);
+         printf("\n");
+         printf(" srfIDidx GLOBAL ");
+         for(int is=0; is< totBel; ++is)  printf("%d ", srfIDGidx[is]);
+         printf("\n");
+      }
+}
+      if(part==0) {
+      int BC_scan=0;
+      cgsize_t* eBC = (cgsize_t *)malloc(totBel * sizeof(cgsize_t));
+      for (int BCid = 1; BCid < 7; BCid++) {
+        int imatch=0;
+        while (srfIDG[BC_scan]==BCid) {
+            eBC[imatch]=srfIDGidx[BC_scan];
+            BC_scan++;
+            imatch++;
+        }
+        int BC_index;
+        char BC_name[33];
+        snprintf(BC_name, 33, "SurfID_%d", BCid + 1);
+        if(cg_boco_write(F, B, Z, BC_name, CGNS_ENUMV(BCTypeUserDefined), CGNS_ENUMV(PointList), imatch, eBC,  &BC_index))
+          cg_error_exit();
+        if(cg_goto(F, B, "Zone_t", 1, "ZoneBC_t", 1, "BC_t", BC_index, "end")) cg_error_exit();;
+        if(cg_gridlocation_write(CGNS_ENUMV(FaceCenter))) cg_error_exit();
+
+if(1==1) {
+        printf(" srfID =%d    ",BCid);
+        for(int is=0; is< imatch; ++is)  printf("%d ", eBC[is]);
+        printf("\n");
+}
+      }
+      free(eBC);
+      }   
+                  
+     
+//James Work
     if (num_parts > 1) {
       printf("Boundary conditions cannot be written in parallel right now\n");
     } else {

From 0b09db1aca41751afa654a5c66ad37a424fb1cd8 Mon Sep 17 00:00:00 2001
From: "Kenneth E. Jansen" <Kenneth.Jansen@colorado.edu>
Date: Sat, 12 Aug 2023 13:56:02 -0600
Subject: [PATCH 38/68] gather is not good enough for cg_boco_write but
 committing the failure because this is so wrong to have to do an all gather
 for rank0 to be able to write

---
 phasta/phCGNSgbc.cc | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/phasta/phCGNSgbc.cc b/phasta/phCGNSgbc.cc
index 956aaae77..51ab4fe82 100644
--- a/phasta/phCGNSgbc.cc
+++ b/phasta/phCGNSgbc.cc
@@ -652,32 +652,32 @@ if(1==1){
          printf("\n");
       }
 }
-      if(part==0) {
       int BC_scan=0;
       cgsize_t* eBC = (cgsize_t *)malloc(totBel * sizeof(cgsize_t));
       for (int BCid = 1; BCid < 7; BCid++) {
         int imatch=0;
+      if(part==0) {
         while (srfIDG[BC_scan]==BCid) {
             eBC[imatch]=srfIDGidx[BC_scan];
             BC_scan++;
             imatch++;
         }
+if(1==1) {
+        printf(" srfID =%d    ",BCid);
+        for(int is=0; is< imatch; ++is)  printf("%d ", eBC[is]);
+        printf("\n");
+}
+      } else imatch=1;
         int BC_index;
         char BC_name[33];
-        snprintf(BC_name, 33, "SurfID_%d", BCid + 1);
+        snprintf(BC_name, 33, "SurfID_%d", BCid );
         if(cg_boco_write(F, B, Z, BC_name, CGNS_ENUMV(BCTypeUserDefined), CGNS_ENUMV(PointList), imatch, eBC,  &BC_index))
           cg_error_exit();
         if(cg_goto(F, B, "Zone_t", 1, "ZoneBC_t", 1, "BC_t", BC_index, "end")) cg_error_exit();;
         if(cg_gridlocation_write(CGNS_ENUMV(FaceCenter))) cg_error_exit();
 
-if(1==1) {
-        printf(" srfID =%d    ",BCid);
-        for(int is=0; is< imatch; ++is)  printf("%d ", eBC[is]);
-        printf("\n");
-}
       }
       free(eBC);
-      }   
                   
      
 //James Work

From aaa56c5fd1b0b340935fe0ee67fc8c97c9fa059b Mon Sep 17 00:00:00 2001
From: "Kenneth E. Jansen" <Kenneth.Jansen@colorado.edu>
Date: Sat, 12 Aug 2023 14:11:14 -0600
Subject: [PATCH 39/68] Gather->Allgather produces correct result....but what
 other insane performance landmines are out there?

---
 phasta/phCGNSgbc.cc | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/phasta/phCGNSgbc.cc b/phasta/phCGNSgbc.cc
index 51ab4fe82..6570ec036 100644
--- a/phasta/phCGNSgbc.cc
+++ b/phasta/phCGNSgbc.cc
@@ -611,9 +611,10 @@ if(1==0){
     int* displs = (int *)malloc( num_parts * sizeof(int));
     auto type_cg = getMpiType( cgsize_t() );
     auto type_i = getMpiType( int() );
-    MPI_Gather(&totOnRankBel,1,type_i,rcounts,1,type_i,0,MPI_COMM_WORLD);
+//FAIL    MPI_Gather(&totOnRankBel,1,type_i,rcounts,1,type_i,0,MPI_COMM_WORLD);
+    MPI_Allgather(&totOnRankBel,1,type_i,rcounts,1,type_i,MPI_COMM_WORLD);
     displs[0]=0;
-    if(part==0){ 
+//    if(part==0){ 
        for (int i = 1; i < num_parts; ++i) displs[i]=displs[i-1]+rcounts[i-1]; 
 if(1==1){
       for(int ip=0; ip< num_parts; ++ip)  printf("%ld ", rcounts[ip]);
@@ -621,9 +622,9 @@ if(1==1){
       for(int ip=0; ip< num_parts; ++ip) printf("%ld ", displs[ip]);
       printf("\n");
 }
-    }   
-   MPI_Gatherv(srfID,totOnRankBel,type_i,srfIDG,rcounts,displs,type_i,0,MPI_COMM_WORLD);
-   MPI_Gatherv(srfIDidx,totOnRankBel,type_i,srfIDGidx,rcounts,displs,type_i,0,MPI_COMM_WORLD);
+//    }   
+   MPI_Allgatherv(srfID,totOnRankBel,type_i,srfIDG,rcounts,displs,type_i,MPI_COMM_WORLD);
+   MPI_Allgatherv(srfIDidx,totOnRankBel,type_i,srfIDGidx,rcounts,displs,type_i,MPI_COMM_WORLD);
 if(1==1){
       if(part==0) {
          printf(" srfID GLOBAL    ");
@@ -641,7 +642,8 @@ if(1==1){
       for(int is=0; is< totOnRankBel; ++is)  printf("%d ", srfIDidx[is]);
       printf("\n");
 }
-    if(part==0) pairsort(srfIDG,srfIDGidx,totBel);
+//    if(part==0) pairsort(srfIDG,srfIDGidx,totBel);
+      pairsort(srfIDG,srfIDGidx,totBel);
 if(1==1){
       if(part==0) {
          printf(" srfID GLOBAL    ");
@@ -656,7 +658,7 @@ if(1==1){
       cgsize_t* eBC = (cgsize_t *)malloc(totBel * sizeof(cgsize_t));
       for (int BCid = 1; BCid < 7; BCid++) {
         int imatch=0;
-      if(part==0) {
+//      if(part==0) {
         while (srfIDG[BC_scan]==BCid) {
             eBC[imatch]=srfIDGidx[BC_scan];
             BC_scan++;
@@ -667,7 +669,7 @@ if(1==1) {
         for(int is=0; is< imatch; ++is)  printf("%d ", eBC[is]);
         printf("\n");
 }
-      } else imatch=1;
+//      } else imatch=1;
         int BC_index;
         char BC_name[33];
         snprintf(BC_name, 33, "SurfID_%d", BCid );

From 2fd47b1d070162f664d33a46b4d0f3d21104658a Mon Sep 17 00:00:00 2001
From: "Kenneth E. Jansen" <Kenneth.Jansen@colorado.edu>
Date: Sat, 12 Aug 2023 19:52:37 -0600
Subject: [PATCH 40/68] replaced the STL sort which the web says is Nlog_2N in
 distance between the elements with a deal 6 player.  Obviously this is highly
 specialized to our boxes while the STL will always work.

---
 phasta/phCGNSgbc.cc | 142 +++++++++++++++++++++++++-------------------
 pumi-meshes         |   2 +-
 2 files changed, 82 insertions(+), 62 deletions(-)

diff --git a/phasta/phCGNSgbc.cc b/phasta/phCGNSgbc.cc
index 6570ec036..94463e0a6 100644
--- a/phasta/phCGNSgbc.cc
+++ b/phasta/phCGNSgbc.cc
@@ -46,11 +46,11 @@ MPI_Datatype getMpiType(T) {
 #include <bits/stdc++.h>
 using namespace std;
  
-// Function to sort character array b[]
+// Function to sort integer array b[]
 // according to the order defined by a[]
 void pairsort(int a[], int b[], int n)
 {
-    pair<int, char> pairt[n];
+    pair<int, int> pairt[n];
  
     // Storing the respective array
     // elements in pairs.
@@ -70,6 +70,33 @@ void pairsort(int a[], int b[], int n)
         b[i] = pairt[i].second;
     }
 }
+void pairDeal6sort(int a[], int b[], int n)
+{
+    int c[6]={0}; 
+    for (int i = 0; i < n; i++) c[a[i]-1]++;  // count number each type in a pre-scan
+    int** p = new int*[6];
+    for (int i = 0; i < 6; i++) p[i]=new int[c[i]];
+    int** idx = new int*[6];
+    for (int i = 0; i < 6; i++) idx[i]=new int[c[i]];
+    for (int i = 0; i < 6; i++) c[i]=0;
+    int isrfM1;
+    for (int i = 0; i < n; i++)
+    {
+       isrfM1=a[i]-1;
+       p[isrfM1][c[isrfM1]]=b[i];
+       idx[isrfM1][c[isrfM1]]=a[i];
+       c[isrfM1]++;
+    }
+    int igc=0;
+    for (int j = 0; j < 6; j++){
+      for (int i = 0; i < c[j]; i++) {
+        b[igc] = p[j][i];
+        a[igc] = idx[j][i];
+        igc++;
+      }
+    }
+    assert(igc==n);
+}
 
 
 namespace ph {
@@ -611,78 +638,71 @@ if(1==0){
     int* displs = (int *)malloc( num_parts * sizeof(int));
     auto type_cg = getMpiType( cgsize_t() );
     auto type_i = getMpiType( int() );
-//FAIL    MPI_Gather(&totOnRankBel,1,type_i,rcounts,1,type_i,0,MPI_COMM_WORLD);
     MPI_Allgather(&totOnRankBel,1,type_i,rcounts,1,type_i,MPI_COMM_WORLD);
     displs[0]=0;
-//    if(part==0){ 
-       for (int i = 1; i < num_parts; ++i) displs[i]=displs[i-1]+rcounts[i-1]; 
+    for (int i = 1; i < num_parts; ++i) displs[i]=displs[i-1]+rcounts[i-1]; 
 if(1==1){
-      for(int ip=0; ip< num_parts; ++ip)  printf("%ld ", rcounts[ip]);
-      printf("\n");
-      for(int ip=0; ip< num_parts; ++ip) printf("%ld ", displs[ip]);
-      printf("\n");
+    for(int ip=0; ip< num_parts; ++ip) printf("%ld ", displs[ip]);
+    printf("\n");
 }
-//    }   
-   MPI_Allgatherv(srfID,totOnRankBel,type_i,srfIDG,rcounts,displs,type_i,MPI_COMM_WORLD);
-   MPI_Allgatherv(srfIDidx,totOnRankBel,type_i,srfIDGidx,rcounts,displs,type_i,MPI_COMM_WORLD);
+    MPI_Allgatherv(srfID,totOnRankBel,type_i,srfIDG,rcounts,displs,type_i,MPI_COMM_WORLD);
+    MPI_Allgatherv(srfIDidx,totOnRankBel,type_i,srfIDGidx,rcounts,displs,type_i,MPI_COMM_WORLD);
 if(1==1){
-      if(part==0) {
-         printf(" srfID GLOBAL    ");
-         for(int is=0; is< totBel; ++is)  printf("%d ", srfIDG[is]);
-         printf("\n");
-         printf(" srfIDidx GLOBAL ");
-         for(int is=0; is< totBel; ++is)  printf("%d ", srfIDGidx[is]);
-         printf("\n");
-      }
-      printf("rank %d ",part);
-      printf(" srfID on Part ");
-      for(int is=0; is< totOnRankBel; ++is)  printf("%d ", srfID[is]);
+    if(part==0) {
+      printf(" srfID GLOBAL    ");
+       for(int is=0; is< totBel; ++is)  printf("%d ", srfIDG[is]);
+       printf("\n");
+       printf(" srfIDidx GLOBAL ");
+       for(int is=0; is< totBel; ++is)  printf("%d ", srfIDGidx[is]);
+       printf("\n");
+    }
+    printf("rank %d ",part);
+    printf(" srfID on Part ");
+    for(int is=0; is< totOnRankBel; ++is)  printf("%d ", srfID[is]);
+    printf("\n");
+    printf(" srfIDidx on Part ");
+    for(int is=0; is< totOnRankBel; ++is)  printf("%d ", srfIDidx[is]);
+    printf("\n");
+}
+//     pairsort(srfIDG,srfIDGidx,totBel);
+     pairDeal6sort(srfIDG,srfIDGidx,totBel);
+if(1==1){
+    if(part==0) {
+      printf(" srfID GLOBAL    ");
+      for(int is=0; is< totBel; ++is)  printf("%d ", srfIDG[is]);
       printf("\n");
-      printf(" srfIDidx on Part ");
-      for(int is=0; is< totOnRankBel; ++is)  printf("%d ", srfIDidx[is]);
+      printf(" srfIDidx GLOBAL ");
+      for(int is=0; is< totBel; ++is)  printf("%d ", srfIDGidx[is]);
       printf("\n");
+    }
 }
-//    if(part==0) pairsort(srfIDG,srfIDGidx,totBel);
-      pairsort(srfIDG,srfIDGidx,totBel);
-if(1==1){
-      if(part==0) {
-         printf(" srfID GLOBAL    ");
-         for(int is=0; is< totBel; ++is)  printf("%d ", srfIDG[is]);
-         printf("\n");
-         printf(" srfIDidx GLOBAL ");
-         for(int is=0; is< totBel; ++is)  printf("%d ", srfIDGidx[is]);
-         printf("\n");
+    int BC_scan=0;
+    cgsize_t* eBC = (cgsize_t *)malloc(totBel * sizeof(cgsize_t));
+    for (int BCid = 1; BCid < 7; BCid++) {
+      int imatch=0;
+      while (srfIDG[BC_scan]==BCid) {
+        eBC[imatch]=srfIDGidx[BC_scan];
+        BC_scan++;
+        imatch++;
       }
-}
-      int BC_scan=0;
-      cgsize_t* eBC = (cgsize_t *)malloc(totBel * sizeof(cgsize_t));
-      for (int BCid = 1; BCid < 7; BCid++) {
-        int imatch=0;
-//      if(part==0) {
-        while (srfIDG[BC_scan]==BCid) {
-            eBC[imatch]=srfIDGidx[BC_scan];
-            BC_scan++;
-            imatch++;
-        }
 if(1==1) {
-        printf(" srfID =%d    ",BCid);
-        for(int is=0; is< imatch; ++is)  printf("%d ", eBC[is]);
-        printf("\n");
+      printf(" srfID =%d    ",BCid);
+      for(int is=0; is< imatch; ++is)  printf("%d ", eBC[is]);
+      printf("\n");
 }
-//      } else imatch=1;
-        int BC_index;
-        char BC_name[33];
-        snprintf(BC_name, 33, "SurfID_%d", BCid );
-        if(cg_boco_write(F, B, Z, BC_name, CGNS_ENUMV(BCTypeUserDefined), CGNS_ENUMV(PointList), imatch, eBC,  &BC_index))
-          cg_error_exit();
-        if(cg_goto(F, B, "Zone_t", 1, "ZoneBC_t", 1, "BC_t", BC_index, "end")) cg_error_exit();;
-        if(cg_gridlocation_write(CGNS_ENUMV(FaceCenter))) cg_error_exit();
+      int BC_index;
+      char BC_name[33];
+      snprintf(BC_name, 33, "SurfID_%d", BCid );
+      if(cg_boco_write(F, B, Z, BC_name, CGNS_ENUMV(BCTypeUserDefined), CGNS_ENUMV(PointList), imatch, eBC,  &BC_index))
+         cg_error_exit();
+      if(cg_goto(F, B, "Zone_t", 1, "ZoneBC_t", 1, "BC_t", BC_index, "end")) cg_error_exit();;
+      if(cg_gridlocation_write(CGNS_ENUMV(FaceCenter))) cg_error_exit();
 
-      }
-      free(eBC);
+    }
+    free(eBC);
                   
-     
 //James Work
+/*
     if (num_parts > 1) {
       printf("Boundary conditions cannot be written in parallel right now\n");
     } else {
@@ -707,7 +727,7 @@ if(1==1) {
       }
 
       free(bc_elems);
-    }
+    } */
   }
 }
 
diff --git a/pumi-meshes b/pumi-meshes
index fecc2dae4..a3a241a71 160000
--- a/pumi-meshes
+++ b/pumi-meshes
@@ -1 +1 @@
-Subproject commit fecc2dae4d3e5a288022fd10ddf78fa60ba05e86
+Subproject commit a3a241a715de566f0e812d253f5cfc2a82705f62

From 5ef895dc5d973bc11a99055eadd24613be5e5f6e Mon Sep 17 00:00:00 2001
From: "Kenneth E. Jansen" <Kenneth.Jansen@colorado.edu>
Date: Sun, 13 Aug 2023 19:07:09 -0600
Subject: [PATCH 41/68] As I test on a bump mesh I realize the help message is
 a bit out of date.  Also found some fossil code that usage went extinct.

---
 test/matchedNodeElmReader.cc | 33 +++------------------------------
 1 file changed, 3 insertions(+), 30 deletions(-)

diff --git a/test/matchedNodeElmReader.cc b/test/matchedNodeElmReader.cc
index 4dfdfabd0..1572b12e8 100644
--- a/test/matchedNodeElmReader.cc
+++ b/test/matchedNodeElmReader.cc
@@ -20,33 +20,6 @@
 #include <string>
 #include <regex>
 
-/* from https://github.com/SCOREC/core/issues/205
-0=fully interior of the volume
-1-6 =classified on face (not edge or vertex)
-11-22 = classified on model edge (not end points which are model vertices)
-31-38 = classified on a model vertex.
-*/
-
-/* tags on vertices */
-#define INTERIORTAG  0
-#define FACE 1
-#define FACE_LAST 6
-#define EDGE 11
-#define EDGE_LAST 22
-#define VERTEX 31
-#define VERTEX_LAST 38
-
-/* model entity ids */
-//#define INTERIOR_REGION 0
-//int INTERIOR_REGION=0; // initialized but will be checked from read input
-
-//Manifold single region apf::ModelEntity* getMdlRgn(gmi_model* model) {
-//Manifold single region   apf::ModelEntity* rgn = reinterpret_cast<apf::ModelEntity*>(
-//Manifold single region       gmi_find(model, 3, INTERIOR_REGION));
-//Manifold single region   PCU_ALWAYS_ASSERT(rgn);
-//Manifold single region   return rgn;
-//Manifold single region }
-
 
 apf::ModelEntity* getMdlRegion(apf::Mesh2* mesh, int tag) {
   apf::ModelEntity* region = mesh->findModelEntity(3,tag);
@@ -749,14 +722,14 @@ int main(int argc, char** argv)
   int noVerify=0;    // maintain default of verifying if not explicitly requesting it off
   if( argc < 11 ) {
     if( !PCU_Comm_Self() ) {
-      printf("Usage: %s <input dmg model> "
-          "<ascii mesh connectivity .cnn> "
+      printf("Usage: %s <input dmg model> no rank but .rank added to next 6 "
+          "<ascii mesh connectivity cnn> "
           "<ascii vertex coordinates .crd> "
           "<ascii vertex matching flag .match> "
           "<ascii vertex classification flag .class> "
           "<ascii vertex fathers2D flag .fathers2D> "
           "<ascii solution flag .soln> "
-          "<ascii conn header> "
+          "<ascii conn header see MGEN for format if using Matlab> "
           "<output model .dmg> <output mesh .smb>"
           "turn off verify mesh if equal 1 (on if you give nothing)\n",
           argv[0]);

From b448217bb21d406a96933c30f7f2299238d0a416 Mon Sep 17 00:00:00 2001
From: "Kenneth E. Jansen" <Kenneth.Jansen@colorado.edu>
Date: Mon, 14 Aug 2023 06:50:08 -0600
Subject: [PATCH 42/68] Initial condition extracted and written to CGNS. For
 now, PHASTA restart is bypassed since code bundles extraction and destruction
 and in that case, solution not available to CGNS writer to extract.

---
 phasta/phCGNSgbc.cc | 136 ++++++++++++++++++++++++++++++++++++++++++--
 phasta/phCook.cc    |  28 ++++-----
 phasta/phRestart.h  |   2 +
 3 files changed, 149 insertions(+), 17 deletions(-)

diff --git a/phasta/phCGNSgbc.cc b/phasta/phCGNSgbc.cc
index 94463e0a6..5b7e11f8d 100644
--- a/phasta/phCGNSgbc.cc
+++ b/phasta/phCGNSgbc.cc
@@ -9,6 +9,12 @@
 #include <cstdlib>
 #include <string.h>
 #include <assert.h>
+#include "phRestart.h"
+#include <apf.h>
+#include <apfField.h>
+#include "apfShape.h"
+
+
 #ifdef HAVE_CGNS
 //
 #include <cgns_io.h>
@@ -96,11 +102,67 @@ void pairDeal6sort(int a[], int b[], int n)
       }
     }
     assert(igc==n);
+    free(p);
+    free(idx);
 }
 
 
 namespace ph {
 
+/*
+void detachField(
+    apf::Field* f,
+    double*& data,
+    int& size)
+{
+  apf::Mesh* m = apf::getMesh(f);
+  size = apf::countComponents(f);
+  size_t n = m->count(0);
+  apf::NewArray<double> c(size);
+  data = (double*)malloc(sizeof(double) * size * m->count(0));
+  apf::MeshEntity* e;
+  size_t i = 0;
+  apf::MeshIterator* it = m->begin(0);
+  while ((e = m->iterate(it))) {
+    apf::getComponents(f, e, 0, &c[0]);
+    for (int j = 0; j < size; ++j)
+      data[j * n + i] = c[j];
+    ++i;
+  }
+  m->end(it);
+  PCU_ALWAYS_ASSERT(i == n);
+  apf::destroyField(f);
+}
+*/
+/*
+void detachField(
+    apf::Mesh* m,
+    const char* fieldname,
+    double*& data,
+    int& size)
+{
+  apf::Field* f = m->findField(fieldname);
+  PCU_ALWAYS_ASSERT(f);
+//  detachField(f, data, size);
+  size = apf::countComponents(f);
+  size_t n = m->count(0);
+  apf::NewArray<double> c(size);
+  data = (double*)malloc(sizeof(double) * size * m->count(0));
+  apf::MeshEntity* e;
+  size_t i = 0;
+  apf::MeshIterator* it = m->begin(0);
+  while ((e = m->iterate(it))) {
+    apf::getComponents(f, e, 0, &c[0]);
+    for (int j = 0; j < size; ++j)
+      data[j * n + i] = c[j];
+    ++i;
+  }
+  m->end(it);
+  PCU_ALWAYS_ASSERT(i == n);
+  apf::destroyField(f);
+}
+*/
+
 static lcorp_t count_owned(int* ilwork, int nlwork,cgsize_t* ncorp_tmp, int num_nodes);
 static lcorp_t count_local(int* ilwork, int nlwork,cgsize_t* ncorp_tmp, int num_nodes);
 
@@ -444,6 +506,7 @@ void writeBlocksCGNS(int F,int B,int Z, Output& o)
   if (cg_sol_write(F, B, Z, "RankOfWriter", CG_CellCenter, &S) ||
       cgp_field_write(F, B, Z, S, CG_Integer, "RankOfWriter", &Fs))
       cgp_error_exit();
+  printf("S=%d \n",S);
   for (int i = 0; i < o.blocks.interior.getSize(); ++i) {
     BlockKey& k = o.blocks.interior.keys[i];
     std::string phrase = getBlockKeyPhrase(k, "connectivity interior ");
@@ -641,13 +704,13 @@ if(1==0){
     MPI_Allgather(&totOnRankBel,1,type_i,rcounts,1,type_i,MPI_COMM_WORLD);
     displs[0]=0;
     for (int i = 1; i < num_parts; ++i) displs[i]=displs[i-1]+rcounts[i-1]; 
-if(1==1){
+if(0==1){
     for(int ip=0; ip< num_parts; ++ip) printf("%ld ", displs[ip]);
     printf("\n");
 }
     MPI_Allgatherv(srfID,totOnRankBel,type_i,srfIDG,rcounts,displs,type_i,MPI_COMM_WORLD);
     MPI_Allgatherv(srfIDidx,totOnRankBel,type_i,srfIDGidx,rcounts,displs,type_i,MPI_COMM_WORLD);
-if(1==1){
+if(0==1){
     if(part==0) {
       printf(" srfID GLOBAL    ");
        for(int is=0; is< totBel; ++is)  printf("%d ", srfIDG[is]);
@@ -666,7 +729,7 @@ if(1==1){
 }
 //     pairsort(srfIDG,srfIDGidx,totBel);
      pairDeal6sort(srfIDG,srfIDGidx,totBel);
-if(1==1){
+if(0==1){
     if(part==0) {
       printf(" srfID GLOBAL    ");
       for(int is=0; is< totBel; ++is)  printf("%d ", srfIDG[is]);
@@ -685,7 +748,7 @@ if(1==1){
         BC_scan++;
         imatch++;
       }
-if(1==1) {
+if(0==1) {
       printf(" srfID =%d    ",BCid);
       for(int is=0; is< imatch; ++is)  printf("%d ", eBC[is]);
       printf("\n");
@@ -742,6 +805,8 @@ void writeCGNS(Output& o, std::string path)
   std::string timestep_or_dat;
   static char outfile[] = "chefOut.cgns";
   int  F, B, Z, E, S, Fs, Fs2, A, Cx, Cy, Cz;
+  int Fp, Fu, Fv, Fw, FT;
+  int Sp, Su, Sv, Sw, ST;
   cgsize_t sizes[3],*e, start, end;
 
   int num_nodes=m->count(0);
@@ -828,6 +893,66 @@ if(0==1) {
     if(j==2) if(cgp_coord_write_data(F, B, Z, Cz, &start, &end, x)) cgp_error_exit();
   }
   free (x);
+  /* create a nodal solution */
+  char fieldName[12];
+  snprintf(fieldName, 13, "solution");
+  printf("solution=%s",fieldName);
+  double* data;
+  int size;
+  detachField(o.mesh, fieldName, data, size);
+  assert(size==5);
+
+//    /* create the field data for this process */
+  double* p = (double *)malloc(o.iownnodes * sizeof(double));
+  double* u = (double *)malloc(o.iownnodes * sizeof(double));
+  double* v = (double *)malloc(o.iownnodes * sizeof(double));
+  double* w = (double *)malloc(o.iownnodes * sizeof(double));
+  double* T = (double *)malloc(o.iownnodes * sizeof(double));
+  int icount=0;
+  for (int n = 0; n < num_nodes; n++) {
+    gnod=o.arrays.ncorp[n];
+    if(gnod >= start && gnod <= end) { // solution to write
+         p[icount]= data[0*num_nodes+n];
+         u[icount]= data[1*num_nodes+n];
+         v[icount]= data[2*num_nodes+n];
+         w[icount]= data[3*num_nodes+n];
+         T[icount]= data[4*num_nodes+n];
+         icount++;
+    }
+  }
+//    /* write the solution field data in parallel */
+  if (cg_sol_write(F, B, Z, "Solution", CG_Vertex, &Sp) ||
+      cgp_field_write(F, B, Z, Sp, CG_RealDouble, "Pressure", &Fp))
+      cgp_error_exit();
+  printf("Sp=%d \n",Sp);
+  if (cgp_field_write_data(F, B, Z, Sp, Fp, &start, &end, p))
+      cgp_error_exit();
+  if ( cgp_field_write(F, B, Z, Sp, CG_RealDouble, "VelocityX", &Fu))
+      cgp_error_exit();
+  printf("Su=%d \n",Su);
+  if (cgp_field_write_data(F, B, Z, Sp, Fu, &start, &end, u))
+      cgp_error_exit();
+  if ( cgp_field_write(F, B, Z, Sp, CG_RealDouble, "VelocityY", &Fv))
+      cgp_error_exit();
+  printf("Sv=%d \n",Sv);
+  if (cgp_field_write_data(F, B, Z, Sp, Fv, &start, &end, v))
+      cgp_error_exit();
+  if ( cgp_field_write(F, B, Z, Sp, CG_RealDouble, "VelocityZ", &Fw))
+      cgp_error_exit();
+  printf("Sw=%d \n",Sw);
+  if (cgp_field_write_data(F, B, Z, Sp, Fw, &start, &end, w))
+      cgp_error_exit();
+  if ( cgp_field_write(F, B, Z, Sp, CG_RealDouble, "Temperature", &FT))
+      cgp_error_exit();
+  printf("ST=%d \n",ST);
+  if (cgp_field_write_data(F, B, Z, Sp, FT, &start, &end, T))
+      cgp_error_exit();
+  free(p);
+  free(u);
+  free(v);
+  free(w);
+  free(T);
+  free(data);
   /* create Helper array for number of elements on rank */
   if ( cg_goto(F, B, "Zone_t", 1, NULL) ||
        cg_user_data_write("User Data") ||
@@ -843,5 +968,8 @@ if(0==1) {
   if(o.writeCGNSFiles > 1) 
   writeBlocksCGNS(F,B,Z, o);
   if(cgp_close(F)) cgp_error_exit();
+  double t1 = PCU_Time();
+  if (!PCU_Comm_Self())
+    lion_oprint(1,"CGNS file written in %f seconds\n", t1 - t0);
 }
 } // namespace
diff --git a/phasta/phCook.cc b/phasta/phCook.cc
index 0c8b5ed6b..d701ba26e 100644
--- a/phasta/phCook.cc
+++ b/phasta/phCook.cc
@@ -197,19 +197,21 @@ namespace ph {
     ph::enterFilteredMatching(m, in, bcs);
     ph::generateOutput(in, bcs, m, out);
     ph::exitFilteredMatching(m);
-    // a path is not needed for inmem
-    if ( in.writeRestartFiles ) {
-      if(!PCU_Comm_Self()) lion_oprint(1,"write file-based restart file\n");
-      // store the value of the function pointer
-      FILE* (*fn)(Output& out, const char* path) = out.openfile_write;
-      // set function pointer for file writing
-      out.openfile_write = chef::openfile_write;
-      ph::detachAndWriteSolution(in,out,m,subDirPath); //write restart
-      // reset the function pointer to the original value
-      out.openfile_write = fn;
-    }
-    else {
-      ph::detachAndWriteSolution(in,out,m,subDirPath); //write restart
+    if ( in.writeCGNSFiles ==0  ) { // for now, don't write restarts when writing CGNS since writing restarts is bundled with destroying fields 
+      // a path is not needed for inmem
+      if ( in.writeRestartFiles ) {
+        if(!PCU_Comm_Self()) lion_oprint(1,"write file-based restart file\n");
+        // store the value of the function pointer
+        FILE* (*fn)(Output& out, const char* path) = out.openfile_write;
+        // set function pointer for file writing
+        out.openfile_write = chef::openfile_write;
+        ph::detachAndWriteSolution(in,out,m,subDirPath); //write restart
+        // reset the function pointer to the original value
+        out.openfile_write = fn;
+      }
+      else {
+        ph::detachAndWriteSolution(in,out,m,subDirPath); //write restart
+      }
     }
     if ( ! in.outMeshFileName.empty() )
       m->writeNative(in.outMeshFileName.c_str());
diff --git a/phasta/phRestart.h b/phasta/phRestart.h
index cd82967b6..f17b690d6 100644
--- a/phasta/phRestart.h
+++ b/phasta/phRestart.h
@@ -27,6 +27,8 @@ void detachAndWriteSolution(Input& in, Output& out,
 void attachZeroSolution(Input& in, apf::Mesh* m);
 
 void detachField(apf::Field* f, double*& data, int& size);
+void detachField(apf::Mesh* m, const char* fieldname, double*& data, int& size);
+
 
 }
 

From 7cb7309433e6c082c6f591be1e590bd00b44a12d Mon Sep 17 00:00:00 2001
From: "Kenneth E. Jansen" <Kenneth.Jansen@colorado.edu>
Date: Wed, 16 Aug 2023 08:29:39 -0600
Subject: [PATCH 43/68] committing an aborted attempt to find matching faces
 through matched nodes in face connectivity...this approach would work if
 ncorp were what PETSc wanted without PHASTA's need for ilwork to have final
 owner (after periodicity) in ilwork but matched meshes currently foul ncorp
 which is why this approach is abandoned in an icomplete state and mothballed
 in this hash.

---
 phasta/phCGNSgbc.cc | 595 ++++++++++++++++++++------------------------
 1 file changed, 276 insertions(+), 319 deletions(-)

diff --git a/phasta/phCGNSgbc.cc b/phasta/phCGNSgbc.cc
index 5b7e11f8d..a7b6fd997 100644
--- a/phasta/phCGNSgbc.cc
+++ b/phasta/phCGNSgbc.cc
@@ -109,235 +109,162 @@ void pairDeal6sort(int a[], int b[], int n)
 
 namespace ph {
 
-/*
-void detachField(
-    apf::Field* f,
-    double*& data,
-    int& size)
-{
-  apf::Mesh* m = apf::getMesh(f);
-  size = apf::countComponents(f);
-  size_t n = m->count(0);
-  apf::NewArray<double> c(size);
-  data = (double*)malloc(sizeof(double) * size * m->count(0));
-  apf::MeshEntity* e;
-  size_t i = 0;
-  apf::MeshIterator* it = m->begin(0);
-  while ((e = m->iterate(it))) {
-    apf::getComponents(f, e, 0, &c[0]);
-    for (int j = 0; j < size; ++j)
-      data[j * n + i] = c[j];
-    ++i;
-  }
-  m->end(it);
-  PCU_ALWAYS_ASSERT(i == n);
-  apf::destroyField(f);
-}
-*/
-/*
-void detachField(
-    apf::Mesh* m,
-    const char* fieldname,
-    double*& data,
-    int& size)
-{
-  apf::Field* f = m->findField(fieldname);
-  PCU_ALWAYS_ASSERT(f);
-//  detachField(f, data, size);
-  size = apf::countComponents(f);
-  size_t n = m->count(0);
-  apf::NewArray<double> c(size);
-  data = (double*)malloc(sizeof(double) * size * m->count(0));
-  apf::MeshEntity* e;
-  size_t i = 0;
-  apf::MeshIterator* it = m->begin(0);
-  while ((e = m->iterate(it))) {
-    apf::getComponents(f, e, 0, &c[0]);
-    for (int j = 0; j < size; ++j)
-      data[j * n + i] = c[j];
-    ++i;
-  }
-  m->end(it);
-  PCU_ALWAYS_ASSERT(i == n);
-  apf::destroyField(f);
-}
-*/
-
 static lcorp_t count_owned(int* ilwork, int nlwork,cgsize_t* ncorp_tmp, int num_nodes);
 static lcorp_t count_local(int* ilwork, int nlwork,cgsize_t* ncorp_tmp, int num_nodes);
 
 
+void commuInt(Output& o, cgsize_t* global)
+{ // translating a commuInt out from PHASTA to c
+  int numtask=o.arrays.ilwork[0];
+  int itkbeg=0;
+  int maxseg=1;
+  int numseg;
+  for (int itask=0; itask<numtask; ++itask) {
+    numseg = o.arrays.ilwork[itkbeg + 4];
+    maxseg=std::max(numseg,maxseg);
+    itkbeg+=4+2*numseg;
+  }
+         
+  int itag, iacc, iother, isgbeg;
+  MPI_Datatype sevsegtype[numtask];
+//first do what ctypes does for setup
+  int* isbegin;
+  int* lenseg;
+  int* ioffset;
+  isbegin = (int*) malloc(sizeof(int) * maxseg);
+  lenseg  = (int*) malloc(sizeof(int) * maxseg);
+  ioffset = (int*) malloc(sizeof(int) * maxseg);
+// no VLA        MPI_Request  req[numtask];
+// no VLA        MPI_Status stat[numtask];
+  int maxtask=1000;
+  assert(maxtask>=numtask);
+  MPI_Request  req[maxtask];
+  MPI_Status stat[maxtask];
+  int maxfront=0;
+  int lfront;
+  itkbeg=0;
+  for (int itask=0; itask<numtask; ++itask) {
+    iacc   = o.arrays.ilwork[itkbeg + 2];
+    numseg = o.arrays.ilwork[itkbeg + 4];
+    // ctypes.f decrements itkbeg+3 by one for rank 0-based.  do that where used below
+    lfront=0; 
+    for(int is=0; is<numseg; ++is){
+      isbegin[is]= o.arrays.ilwork[itkbeg+3+2*(is+1)] -1 ; // ilwork was created for 1-based
+      lenseg[is]= o.arrays.ilwork[itkbeg+4+2*(is+1)];
+      lfront+=lenseg[is];
+    }
+    maxfront=std::max(maxfront,lfront);
+    for ( int iseg=0; iseg<numseg; ++iseg) ioffset[iseg] = isbegin[iseg] - isbegin[0];
+    auto type = getMpiType( cgsize_t() );
+    MPI_Type_indexed (numseg, lenseg, ioffset,type, &sevsegtype[itask]);
+    MPI_Type_commit (&sevsegtype[itask]);
+    itkbeg+=4+2*numseg;
+  }
+  free(isbegin);
+  free(lenseg);
+  free(ioffset);
+
+  int m = 0; 
+  itkbeg=0;
+  for (int itask=0; itask<numtask; ++itask) {
+    itag   = o.arrays.ilwork[itkbeg + 1];
+    iacc   = o.arrays.ilwork[itkbeg + 2];
+    iother = o.arrays.ilwork[itkbeg + 3] - 1; // MPI is 0 based but this was prepped wrong
+    numseg = o.arrays.ilwork[itkbeg + 4]; /// not used
+    isgbeg = o.arrays.ilwork[itkbeg + 5] - 1;
+    if (iacc==0){ 
+      MPI_Irecv(&global[isgbeg], 1, sevsegtype[itask],iother, itag, MPI_COMM_WORLD, &req[m]);
+    } else {
+      MPI_Isend(&global[isgbeg], 1, sevsegtype[itask],iother, itag, MPI_COMM_WORLD, &req[m]);
+    }
+    itkbeg+=4+2*numseg;
+    m      = m + 1; 
+  }
+  MPI_Waitall(m, req, stat);
+}
+
 void gen_ncorp(Output& o )
 {
-        apf::Mesh* m = o.mesh;
-	int i;
-	lcorp_t nilwork = o.nlwork;
-        int num_nodes=m->count(0);
-        o.arrays.ncorp = (cgsize_t *)malloc(num_nodes * sizeof(cgsize_t));
-	lcorp_t owned;
-	lcorp_t local;
-	lcorp_t* owner_counts;
-	cgsize_t  local_start_id;
-	cgsize_t  gid;
-
-        const int num_parts = PCU_Comm_Peers();
-        const int part = PCU_Comm_Self() ;
-
-        for(int i=0; i < num_nodes; i++) o.arrays.ncorp[i]=0;
-	owned = count_owned(o.arrays.ilwork, nilwork, o.arrays.ncorp, num_nodes);
-	local = count_local(o.arrays.ilwork, nilwork, o.arrays.ncorp, num_nodes);
-	o.iownnodes = owned+local;
+  apf::Mesh* m = o.mesh;
+  int i;
+  lcorp_t nilwork = o.nlwork;
+  int num_nodes=m->count(0);
+  o.arrays.ncorp = (cgsize_t *)malloc(num_nodes * sizeof(cgsize_t));
+  lcorp_t owned;
+  lcorp_t local;
+  lcorp_t* owner_counts;
+  cgsize_t  local_start_id;
+  cgsize_t  gid;
+
+  const int num_parts = PCU_Comm_Peers();
+  const int part = PCU_Comm_Self() ;
+
+  for(int i=0; i < num_nodes; i++) o.arrays.ncorp[i]=0;
+  owned = count_owned(o.arrays.ilwork, nilwork, o.arrays.ncorp, num_nodes);
+  local = count_local(o.arrays.ilwork, nilwork, o.arrays.ncorp, num_nodes);
+  o.iownnodes = owned+local;
 #ifdef PRINT_EVERYTHING
-	printf("%d: %d local only nodes\n", part, local);
-	printf("%d: %d owned nodes\n", part, owned);
+  printf("%d: %d local only nodes\n", part, local);
+  printf("%d: %d owned nodes\n", part, owned);
 #endif
-	assert( owned <= num_nodes );
-	assert( owned+local <= num_nodes );
+  assert( owned <= num_nodes );
+  assert( owned+local <= num_nodes );
 
-	owner_counts = (lcorp_t*) malloc(sizeof(lcorp_t)*num_parts);
-        for(int i=0; i < num_parts; i++) owner_counts[i]=0;
-	owner_counts[part] = owned+local;
+  owner_counts = (lcorp_t*) malloc(sizeof(lcorp_t)*num_parts);
+  for(int i=0; i < num_parts; i++) owner_counts[i]=0;
+  owner_counts[part] = owned+local;
 #ifdef PRINT_EVERYTHING
-	for(i=0;i<num_parts;i++)
-	{
-		printf("%d,", owner_counts[i]);
-	}
-	printf("\n");
+  for(i=0;i<num_parts;i++)
+    printf("%d,", owner_counts[i]);
+  printf("\n");
 #endif
-	MPI_Allgather(MPI_IN_PLACE, 1, NCORP_MPI_T, owner_counts, 1, NCORP_MPI_T, MPI_COMM_WORLD);
+  MPI_Allgather(MPI_IN_PLACE, 1, NCORP_MPI_T, owner_counts, 1, NCORP_MPI_T, MPI_COMM_WORLD);
 #ifdef PRINT_EVERYTHING
-	for(i=0;i<num_parts;i++)
-	{
-		printf("%d,", owner_counts[i]);
-	}
-	printf("\n");
+  for(i=0;i<num_parts;i++)
+    printf("%d,", owner_counts[i]);
+  printf("\n");
 #endif
-	local_start_id=0;
-	for(i=0;i<part;i++) //TODO: MPI_Exscan()?
-	{
-// global so needs long long
-		local_start_id += owner_counts[i];
-	}
-	local_start_id++; //Fortran numbering
-        o.local_start_id = local_start_id;
-
+  local_start_id=0;
+  for(i=0;i<part;i++) //TODO: MPI_Exscan()?
+    local_start_id += owner_counts[i];
+  local_start_id++; //Fortran numbering
+  o.local_start_id = local_start_id;
 #ifdef PRINT_EVERYTHING
-	printf("%d: %d\n", part, local_start_id);
+  printf("%d: %d\n", part, local_start_id);
 #endif
-// global so needs long long
-	gid = local_start_id;
-        if(gid<0) printf("part,gid, %d %ld",part,gid);
-        assert(gid>=0);
-	for(i=0;i<num_nodes;i++) //assign owned node's numbers
-	{
-		//if shared, owned 1
-			//if shared, slave -1
-			//if local only, 0
-		if(o.arrays.ncorp[i] == 1)
-		{
-// global so needs long long
-			o.arrays.ncorp[i]=gid;
-                        assert(o.arrays.ncorp[i]>=0);
-
-// global so needs long long
-			gid++;
-			continue;
-		}
-		if(o.arrays.ncorp[i] == 0)
-		{
-			o.arrays.ncorp[i] = gid;
-                        assert(o.arrays.ncorp[i]>=0);
-			gid++;
-			continue;
-		}
-		if(o.arrays.ncorp[i] == -1)
-		{
-			o.arrays.ncorp[i] = 0; //commu() adds, so zero slaves
-		}
-
-	}
-	//char code[] = "out";
-	//int ione = 1;
-
-     if(num_parts > 1) {
-// translating a commuInt out from PHASTA to c
-        int numtask=o.arrays.ilwork[0];
-        int itkbeg=0;
-        int maxseg=1;
-        int numseg;
-        for (int itask=0; itask<numtask; ++itask) {
-          numseg = o.arrays.ilwork[itkbeg + 4];
-          maxseg=std::max(numseg,maxseg);
-          itkbeg+=4+2*numseg;
-        }
-         
-        int itag, iacc, iother, isgbeg;
-        MPI_Datatype sevsegtype[numtask];
-//first do what ctypes does for setup
-        int* isbegin;
-        int* lenseg;
-        int* ioffset;
-	isbegin = (int*) malloc(sizeof(int) * maxseg);
-	lenseg  = (int*) malloc(sizeof(int) * maxseg);
-	ioffset = (int*) malloc(sizeof(int) * maxseg);
-// no VLA        MPI_Request  req[numtask];
-// no VLA        MPI_Status stat[numtask];
+  gid = local_start_id;
+  if(gid<0) printf("part,gid, %d %ld",part,gid);
+  assert(gid>=0);
+  for(i=0;i<num_nodes;i++) //assign owned node's numbers
+  { //if shared, owned 1 //if shared, slave -1 //if local only, 0
+    if(o.arrays.ncorp[i] == 1)
+    {
+      o.arrays.ncorp[i]=gid;
+      assert(o.arrays.ncorp[i]>=0);
+      gid++;
+      continue;
+    }
+    if(o.arrays.ncorp[i] == 0)
+    {
+      o.arrays.ncorp[i] = gid;
+      assert(o.arrays.ncorp[i]>=0);
+      gid++;
+      continue;
+    }
+    if(o.arrays.ncorp[i] == -1)
+      o.arrays.ncorp[i] = 0; //commu() adds, so zero slaves
+  } //char code[] = "out"; //int ione = 1;
 
-        int maxtask=1000;
-        assert(maxtask>=numtask);
-        MPI_Request  req[maxtask];
-        MPI_Status stat[maxtask];
-        int maxfront=0;
-        int lfront;
-        itkbeg=0;
-        for (int itask=0; itask<numtask; ++itask) {
-          iacc   = o.arrays.ilwork[itkbeg + 2];
-          numseg = o.arrays.ilwork[itkbeg + 4];
-         // ctypes.f decrements itkbeg+3 by one for rank 0-based.  do that where used below
-          lfront=0; 
-          for(int is=0; is<numseg; ++is){
-             isbegin[is]= o.arrays.ilwork[itkbeg+3+2*(is+1)] -1 ; // ilwork was created for 1-based
-             lenseg[is]= o.arrays.ilwork[itkbeg+4+2*(is+1)];
-             lfront+=lenseg[is];
-          }
-          maxfront=std::max(maxfront,lfront);
-          for ( int iseg=0; iseg<numseg; ++iseg) ioffset[iseg] = isbegin[iseg] - isbegin[0];
-          auto type = getMpiType( cgsize_t() );
-          MPI_Type_indexed (numseg, lenseg, ioffset,type, &sevsegtype[itask]);
-          MPI_Type_commit (&sevsegtype[itask]);
-          itkbeg+=4+2*numseg;
-        }
-        free(isbegin);
-        free(lenseg);
-        free(ioffset);
-
-        int m = 0; 
-        itkbeg=0;
-        for (int itask=0; itask<numtask; ++itask) {
-          itag   = o.arrays.ilwork[itkbeg + 1];
-          iacc   = o.arrays.ilwork[itkbeg + 2];
-          iother = o.arrays.ilwork[itkbeg + 3] - 1; // MPI is 0 based but this was prepped wrong
-          numseg = o.arrays.ilwork[itkbeg + 4]; /// not used
-          isgbeg = o.arrays.ilwork[itkbeg + 5] - 1;
-          if (iacc==0){ 
-             MPI_Irecv(&o.arrays.ncorp[isgbeg], 1, sevsegtype[itask],iother, itag, MPI_COMM_WORLD, &req[m]);
-          } else {
-             MPI_Isend(&o.arrays.ncorp[isgbeg], 1, sevsegtype[itask],iother, itag, MPI_COMM_WORLD, &req[m]);
-          }
-          itkbeg+=4+2*numseg;
-          m      = m + 1; 
-        }
-        MPI_Waitall(m, req, stat);
-      }
-if(1==0) {
-     for (int ipart=0; ipart<num_parts; ++ipart){
-        if(part==ipart) { // my turn
-           for (int inod=0; inod<num_nodes; ++inod) printf("%ld ", o.arrays.ncorp[inod]);
-           printf(" \n");
-           
-        }
-        PCU_Barrier();
-     }
+  if(num_parts > 1) 
+    commuInt(o, o.arrays.ncorp);
+if(1==1) {
+  for (int ipart=0; ipart<num_parts; ++ipart){
+    if(part==ipart) { // my turn
+      for (int inod=0; inod<num_nodes; ++inod) printf("%ld ", o.arrays.ncorp[inod]);
+        printf(" \n");
+    }
+    PCU_Barrier();
+  }
 }
 }
 
@@ -491,6 +418,7 @@ void getNaturalBCCodesCGNS(Output& o, int block, int* codes)
 }
 
 // renamed and calling the renamed functions above with output writes now to CGNS
+
 void writeBlocksCGNS(int F,int B,int Z, Output& o)
 {
   int params[MAX_PARAMS];
@@ -502,7 +430,7 @@ void writeBlocksCGNS(int F,int B,int Z, Output& o)
   const cgsize_t num_parts_cg=num_parts;
   const int part = PCU_Comm_Self() ;
   const cgsize_t part_cg=part;
-  /* create a centered solution */
+  // create a centered solution 
   if (cg_sol_write(F, B, Z, "RankOfWriter", CG_CellCenter, &S) ||
       cgp_field_write(F, B, Z, S, CG_Integer, "RankOfWriter", &Fs))
       cgp_error_exit();
@@ -515,7 +443,7 @@ void writeBlocksCGNS(int F,int B,int Z, Output& o)
     int nvert = o.blocks.interior.keys[i].nElementVertices;
     cgsize_t* e = (cgsize_t *)malloc(nvert * e_owned * sizeof(cgsize_t));
     getInteriorConnectivityCGNS(o, i, e);
-    /* create data node for elements */
+    // create data node for elements 
     e_startg=1+e_written; // start for the elements of this topology
     long safeArg=e_owned; // e_owned is cgsize_t which could be an 32 or 64 bit int
     e_endg=e_written + PCU_Add_Long(safeArg); // end for the elements of this topology
@@ -547,7 +475,7 @@ void writeBlocksCGNS(int F,int B,int Z, Output& o)
     MPI_Exscan(&e_owned, &e_start, 1, type , MPI_SUM, MPI_COMM_WORLD);
     e_start+=1+e_written; // my parts global element start 1-based
     e_end=e_start+e_owned-1;  // my parts global element stop 1-based
-    /* write the element connectivity in parallel */
+    // write the element connectivity in parallel 
     if (cgp_elements_write_data(F, B, Z, E, e_start, e_end, e))
         cgp_error_exit();
     e_written=e_endg; // update count of elements written
@@ -562,35 +490,34 @@ if(1==0){
 }
     free(e);
 
-//    /* create the field data for this process */
+//     create the field data for this process 
     int* d = (int *)malloc(e_owned * sizeof(int));
     for (int n = 0; n < e_owned; n++) 
             d[n] = part;
-//    /* write the solution field data in parallel */
+//     write the solution field data in parallel 
     if (cgp_field_write_data(F, B, Z, S, Fs, &e_start, &e_end, d))
         cgp_error_exit();
     free(d);
 
     char UserDataName[11];
         snprintf(UserDataName, 11, "n%sOnRank", Ename);
-        /* create Helper array for number of elements on part of a given topology */
+        // create Helper array for number of elements on part of a given topology 
     if ( cg_goto(F, B, "Zone_t", 1, NULL) ||
-          cg_gorel(F, "User Data", 0, NULL) ||
+         cg_gorel(F, "User Data", 0, NULL) ||
          cgp_array_write(UserDataName, CG_Integer, 1, &num_parts_cg, &Fs2))
-        cgp_error_exit();
-    /* create the field data for this process */
+         cgp_error_exit();
+    // create the field data for this process 
     int nIelVec=e_owned;
     cgsize_t  partP1=part+1;
     printf("Intr, %s,  %d, %d, %d, %d \n", UserDataName, nIelVec,part,Fs,Fs2);
     if ( cgp_array_write_data(Fs2, &partP1, &partP1, &nIelVec))
         cgp_error_exit();
-  } // end of loop over blocks
+  } // end of loop over interior blocks
 
 
   if(o.writeCGNSFiles > 2) {
     cgsize_t eVolElm=e_written;
     cgsize_t e_belWritten=0;
-//    cgsize_t totOnRankBel=0;
     int totOnRankBel=0;
     int triCount=0;
     int quadCount=0;
@@ -599,6 +526,11 @@ if(1==0){
       totOnRankBel += o.blocks.boundary.nElements[i];
     int* srfID = (int *)malloc( totOnRankBel * sizeof(int));
     int* srfIDidx = (int *)malloc( totOnRankBel * sizeof(int));
+    int** srfIDCnn1 = new int*[nblkb];
+    int** srfIDCnn2 = new int*[nblkb];
+    int* srfID1OnBlk = (int *)malloc( nblkb * sizeof(int));
+    int* srfID2OnBlk = (int *)malloc( nblkb * sizeof(int));
+
     int* startBelBlk = (int *)malloc( nblkb * sizeof(int));
     int* endBelBlk = (int *)malloc( nblkb * sizeof(int));
     for (int i = 0; i < o.blocks.boundary.getSize(); ++i) {
@@ -615,7 +547,6 @@ if(1==0){
       if(nvert==3) triCount++;
       if(nvert==4) quadCount++;
       char Ename[7];
-
       switch(nvert){
         case 3:
           snprintf(Ename, 5, "Tri%d",triCount);
@@ -633,27 +564,43 @@ if(1==0){
       MPI_Exscan(&e_owned, &e_start, 1, type , MPI_SUM, MPI_COMM_WORLD);
       e_start+=1+e_written; // my parts global element start 1-based
       e_end=e_start+e_owned-1;  // my parts global element stop 1-based
-      /* write the element connectivity in parallel */
+      // write the element connectivity in parallel 
       if (cgp_elements_write_data(F, B, Z, E, e_start, e_end, e))
           cgp_error_exit();
       printf("boundary cnn %d, %ld, %ld \n", part, e_start, e_end);
 if(1==0){
-    for (int ne=0; ne<e_owned; ++ne) {
-      printf("%d, %d ", part,(ne+1));
-      for(int nv=0; nv< nvert; ++nv) printf("%ld ", e[ne*nvert+nv]);
-      printf("\n");
-    }
+    for (int ne=0; ne<e_owned; ++ne) { printf("%d, %d ", part,(ne+1)); for(int nv=0; nv< nvert; ++nv) printf("%ld ", e[ne*nvert+nv]); printf("\n"); }
 }
-      free(e);
       getNaturalBCCodesCGNS(o, i, &srfID[e_belWritten]);
+      int icnt1=0;
+      int icnt2=0;
+      for (int ne=0; ne<e_owned; ++ne){ //count srfID =1 and 2 on this part,block
+         if(srfID[ebelWritten+i]==1) icnt1++; 
+         if(srfID[ebelWritten+i]==2) icnt2++;
+      } 
+      srfIDCnn1[i]=new int[icnt1*3];
+      srfIDCnn2[i]=new int[icnt2*3];
+      srfID1OnBlk[i]=icnt1;
+      srfID2OnBlk[i]=icnt2;
+      for (int ne=0; ne<e_owned; ++ne){
+         if(srfID[ebelWritten+i]==1){ 
+           srfIDCnn1[i][j1++]=e[ne*nv+0];
+           srfIDCnn1[i][j1++]=e[ne*nv+1];
+           srfIDCnn1[i][j1++]=e[ne*nv+2];
+         }
+         if(srfID[ebelWritten+i]==2) {
+           srfIDCnn1[i][j2++]=e[ne*nv+0];
+           srfIDCnn1[i][j2++]=e[ne*nv+1];
+           srfIDCnn1[i][j2++]=e[ne*nv+2];
+         }
+      } 
+
       for (int j = 0; j < (int) e_owned; ++j) srfIDidx[e_belWritten+j]=e_start+j;
       startBelBlk[i]=e_start; // provides start point for each block in srfID
       endBelBlk[i]=e_end; // provides end point for each block in srfID
       e_written=e_endg;
       e_belWritten+=e_owned; // this is tracking written by this rank as we unpack srfID later
-
-      char UserDataName[12];
-      snprintf(UserDataName, 13, "n%sOnRank", Ename);
+      char UserDataName[12]; snprintf(UserDataName, 13, "n%sOnRank", Ename);
       if ( cg_goto(F, B, "Zone_t", 1, NULL) ||
            cg_gorel(F, "User Data", 0, NULL) ||
            cgp_array_write(UserDataName, CG_Integer, 1, &num_parts_cg, &Fsb2))
@@ -664,82 +611,85 @@ if(1==0){
           cgp_error_exit();
     }
 // srfID is for ALL Boundary faces
-//    long safeArg=totOnRankBel; // is cgsize_t which could be an 32 or 64 bit int
-//    cgsize_t  totBel = PCU_Add_Long(safeArg); // number of elements of this topology
     cgsize_t  totBel = e_written-eVolElm;
-//    printf("%ld %ld ", totOnRankBel,totBel);
     printf("%d %ld ", totOnRankBel,totBel);
-    /* setup User Data for boundary faces */
+    // setup User Data for boundary faces 
     if ( cg_goto(F, B, "Zone_t", 1, NULL) ||
          cg_gorel(F, "User Data", 0, NULL) ||
          cgp_array_write("srfID", CG_Integer, 1,&totBel, &Fsb)) 
          cgp_error_exit();
-    /* write the user data for this process */
+    // write the user data for this process 
     e_written=0; //recycling  eVolElm holds 
     for (int i = 0; i < nblkb; ++i) {
       int e_startB=startBelBlk[i]-eVolElm; // srfID is only for bel....matches linear order with eVolElm offset from 
                                        // bel# that starts from last volume element
-//      int e_endB=endBelBlk[i]-eVolElm;
-//      e_owned=e_endB-estartB;
       e_owned=endBelBlk[i]-startBelBlk[i]+1;
       e_start=0;
       auto type = getMpiType( cgsize_t() );
       MPI_Exscan(&e_owned, &e_start, 1, type , MPI_SUM, MPI_COMM_WORLD);
       e_start+=1+e_written; // my parts global element start 1-based
       e_end=e_start+e_owned-1;  // my parts global element stop 1-based
-
       printf("Bndy %s, %ld, %ld, %ld, %d, %d, %d \n", "srfID", e_start, e_end, e_owned, i, part,Fsb);
       if (cgp_array_write_data(Fsb, &e_start, &e_end, &srfID[e_startB]))
         cgp_error_exit();
       long safeArg=e_owned; // is cgsize_t which could be an 32 or 64 bit int
       e_written += PCU_Add_Long(safeArg); // number of elements of this topology
     }
-// ZonalBC data   When made parallel be mindful of srfID being in segments on each rank....NOT globally ordered but srIDidx gives global idx in same order. 
-    int* srfIDG = (int *)malloc( totBel * sizeof(int));
-    int* srfIDGidx = (int *)malloc( totBel * sizeof(int));
+// stack  connectivities on rank before gather (should preserve order)
     int* rcounts = (int *)malloc( num_parts * sizeof(int));
     int* displs = (int *)malloc( num_parts * sizeof(int));
+    int numsurfID1onRank=0;
+    int numsurfID2onRank=0;
+    for (int i = 1; i < nblkb; ++i) numsurfID1onRank+=srfID1onBlk[i];
+    for (int i = 1; i < nblkb; ++i) numsurfID2onRank+=srfID2onBlk[i];
+    int* srfIDCnn1AllBlocks = (int *)malloc(numsurfID1onRank*3 * sizeof(int));
+    int* srfIDCnn2AllBlocks = (int *)malloc(numsurfID2onRank*3 * sizeof(int));
+    for (int i = 0; i < nblkb; ++i) {
+      for (int j = 0; j < srfID1onBlk[i]*3; ++j) srfIDcnn1AllBlocks[k++]=srfIDcnn1[i][j];
+      for (int j = 0; j < srfID2onBlk[i]*3; ++j) srfIDcnn2AllBlocks[k++]=srfIDcnn2[i][j];
+    }
+    int ncon=numsurfID1onRank*3;
+    MPI_Allgather(&ncon,1,type_i,rcounts,1,type_i,MPI_COMM_WORLD);
+    displs[0]=0;
+    for (int i = 1; i < num_parts; ++i) displs[i]=displs[i-1]+rcounts[i-1]; 
+if(0==1){ for(int ip=0; ip< num_parts; ++ip) printf("%ld ", displs[ip]); printf("\n"); }
+    int GsrfID1cnt=displs[num_parts-1]+displs[num_parts-1];
+    int* srfID1Gcnn = (int *)malloc( GsrfID1cnt * sizeof(int));
+    MPI_Allgatherv(srfIDcnn1AllBlocks,ncon,type_i,srfID1Gcnn,rcounts,displs,type_i,MPI_COMM_WORLD);
+    ncon=numsurfID2onRank*3;
+    MPI_Allgather(&ncon,1,type_i,rcounts,1,type_i,MPI_COMM_WORLD);
+    displs[0]=0;
+    for (int i = 1; i < num_parts; ++i) displs[i]=displs[i-1]+rcounts[i-1]; 
+if(0==1){ for(int ip=0; ip< num_parts; ++ip) printf("%ld ", displs[ip]); printf("\n"); }
+    int GsrfID2cnt=displs[num_parts-1]+displs[num_parts-1];
+    assert(GsrfIC1cnt==GsrfID2cnt);
+    int* srfID2Gcnn = (int *)malloc( GsrfID2cnt * sizeof(int));
+    MPI_Allgatherv(srfIDcnn2AllBlocks,ncon,type_i,srfID2Gcnn,rcounts,displs,type_i,MPI_COMM_WORLD);
+tbc 
+// ZonalBC data 
+    int* srfIDG = (int *)malloc( totBel * sizeof(int));
+    int* srfIDGidx = (int *)malloc( totBel * sizeof(int));
     auto type_cg = getMpiType( cgsize_t() );
     auto type_i = getMpiType( int() );
     MPI_Allgather(&totOnRankBel,1,type_i,rcounts,1,type_i,MPI_COMM_WORLD);
     displs[0]=0;
     for (int i = 1; i < num_parts; ++i) displs[i]=displs[i-1]+rcounts[i-1]; 
-if(0==1){
-    for(int ip=0; ip< num_parts; ++ip) printf("%ld ", displs[ip]);
-    printf("\n");
-}
+if(0==1){ for(int ip=0; ip< num_parts; ++ip) printf("%ld ", displs[ip]); printf("\n"); }
     MPI_Allgatherv(srfID,totOnRankBel,type_i,srfIDG,rcounts,displs,type_i,MPI_COMM_WORLD);
     MPI_Allgatherv(srfIDidx,totOnRankBel,type_i,srfIDGidx,rcounts,displs,type_i,MPI_COMM_WORLD);
-if(0==1){
-    if(part==0) {
-      printf(" srfID GLOBAL    ");
-       for(int is=0; is< totBel; ++is)  printf("%d ", srfIDG[is]);
-       printf("\n");
-       printf(" srfIDidx GLOBAL ");
-       for(int is=0; is< totBel; ++is)  printf("%d ", srfIDGidx[is]);
-       printf("\n");
-    }
-    printf("rank %d ",part);
-    printf(" srfID on Part ");
-    for(int is=0; is< totOnRankBel; ++is)  printf("%d ", srfID[is]);
-    printf("\n");
-    printf(" srfIDidx on Part ");
-    for(int is=0; is< totOnRankBel; ++is)  printf("%d ", srfIDidx[is]);
-    printf("\n");
-}
-//     pairsort(srfIDG,srfIDGidx,totBel);
-     pairDeal6sort(srfIDG,srfIDGidx,totBel);
-if(0==1){
-    if(part==0) {
-      printf(" srfID GLOBAL    ");
-      for(int is=0; is< totBel; ++is)  printf("%d ", srfIDG[is]);
-      printf("\n");
-      printf(" srfIDidx GLOBAL ");
-      for(int is=0; is< totBel; ++is)  printf("%d ", srfIDGidx[is]);
-      printf("\n");
-    }
+if(0==1){ if(part==0) {
+    printf(" srfID GLOBAL    "); for(int is=0; is< totBel; ++is)  printf("%d ", srfIDG[is]); printf("\n");
+    printf(" srfIDidx GLOBAL "); for(int is=0; is< totBel; ++is)  printf("%d ", srfIDGidx[is]); printf("\n"); }
+    printf("rank %d ",part); printf(" srfID on Part "); for(int is=0; is< totOnRankBel; ++is)  printf("%d ", srfID[is]); printf("\n");
+    printf(" srfIDidx on Part "); for(int is=0; is< totOnRankBel; ++is)  printf("%d ", srfIDidx[is]); printf("\n"); }
+//    pairsort(srfIDG,srfIDGidx,totBel);
+    pairDeal6sort(srfIDG,srfIDGidx,totBel);
+if(0==1){ if(part==0) {
+    printf(" srfID GLOBAL    "); for(int is=0; is< totBel; ++is)  printf("%d ", srfIDG[is]); printf("\n");
+    printf(" srfIDidx GLOBAL "); for(int is=0; is< totBel; ++is)  printf("%d ", srfIDGidx[is]); printf("\n"); }
 }
     int BC_scan=0;
+    int imatch1;
     cgsize_t* eBC = (cgsize_t *)malloc(totBel * sizeof(cgsize_t));
     for (int BCid = 1; BCid < 7; BCid++) {
       int imatch=0;
@@ -748,10 +698,14 @@ if(0==1){
         BC_scan++;
         imatch++;
       }
+//reorder SurfID = 2 list to match order of SurfID 1 to support periodicity 
+      if(BCid==1) imatch1=imatch;
+      if(BCid==2) {
+        assert(imatch==imatch1); //
+        for(int is=0; is< imatch; ++is)  printf("%d ", eBC[is]);
+      } //incomplete         
 if(0==1) {
-      printf(" srfID =%d    ",BCid);
-      for(int is=0; is< imatch; ++is)  printf("%d ", eBC[is]);
-      printf("\n");
+      printf(" srfID =%d    ",BCid); for(int is=0; is< imatch; ++is)  printf("%d ", eBC[is]); printf("\n");
 }
       int BC_index;
       char BC_name[33];
@@ -760,38 +714,9 @@ if(0==1) {
          cg_error_exit();
       if(cg_goto(F, B, "Zone_t", 1, "ZoneBC_t", 1, "BC_t", BC_index, "end")) cg_error_exit();;
       if(cg_gridlocation_write(CGNS_ENUMV(FaceCenter))) cg_error_exit();
-
     }
     free(eBC);
-                  
-//James Work
-/*
-    if (num_parts > 1) {
-      printf("Boundary conditions cannot be written in parallel right now\n");
-    } else {
-      // waaay too large, but works as proof of concept
-      cgsize_t (*bc_elems)[totOnRankBel] = (cgsize_t (*)[totOnRankBel])calloc(6 * totOnRankBel, sizeof(cgsize_t));
-      cgsize_t bc_elems_count[6] = {0};
-      for (int elem_id=0; elem_id<totOnRankBel; ++elem_id) {
-        int BCid = srfID[elem_id] - 1;
-        bc_elems[BCid][bc_elems_count[BCid]] = elem_id + eVolElm + 1;
-        bc_elems_count[BCid]++;
-      }
-
-      int BC_index;
-      for (int BCid = 0; BCid < 6; BCid++) {
-        char BC_name[33];
-        snprintf(BC_name, 33, "SurfID_%d", BCid + 1);
-        // printf("%s\n", BC_name);
-        if(cg_boco_write(F, B, Z, BC_name, CGNS_ENUMV(BCTypeUserDefined), CGNS_ENUMV(PointList), bc_elems_count[BCid], bc_elems[BCid], &BC_index))
-          cg_error_exit();
-        if(cg_goto(F, B, "Zone_t", 1, "ZoneBC_t", 1, "BC_t", BC_index, "end")) cg_error_exit();;
-        if(cg_gridlocation_write(CGNS_ENUMV(FaceCenter))) cg_error_exit();
-      }
-
-      free(bc_elems);
-    } */
-  }
+  } // processing boundary elments
 }
 
 void writeCGNS(Output& o, std::string path)
@@ -811,7 +736,7 @@ void writeCGNS(Output& o, std::string path)
 
   int num_nodes=m->count(0);
 
-if(0==1){  // ilwork debugging
+if(1==1){  // ilwork debugging
     for (int ipart=0; ipart<num_parts; ++ipart){
         if(part==ipart) { // my turn
            printf("ilwork %d, %d, %d \n", part, o.nlwork,o.arrays.ilwork[0]);
@@ -829,7 +754,7 @@ if(0==1){  // ilwork debugging
        PCU_Barrier();
      }
 }
-if(1==0){
+if(1==1){
   for (int ipart=0; ipart<num_parts; ++ipart){
     if(part==ipart) { // my turn    
     printf("xyz %d, %d \n", part, num_nodes);
@@ -861,7 +786,7 @@ if(1==0){
        cg_base_write(F, "Base", 3, 3, &B) ||
        cg_zone_write(F, B, "Zone", sizes, CG_Unstructured, &Z))
        cgp_error_exit();
-    /* create data nodes for coordinates */
+    // create data nodes for coordinates 
   cg_set_file_type(CG_FILE_HDF5);
 
   if (cgp_coord_write(F, B, Z, CG_RealDouble, "CoordinateX", &Cx) ||
@@ -893,7 +818,39 @@ if(0==1) {
     if(j==2) if(cgp_coord_write_data(F, B, Z, Cz, &start, &end, x)) cgp_error_exit();
   }
   free (x);
-  /* create a nodal solution */
+/* abort....matcched mesh was needed but this breaks our approach to building ncorp
+  cgsize_t* gizmin = (cgsize_t *)malloc(num_nodes * sizeof(cgsize_t));
+  cgsize_t* gizmax = (cgsize_t *)malloc(num_nodes * sizeof(cgsize_t));
+  double zmin=-0.5;
+  double zmax=65;
+  double eps=1e-3;
+  double z;
+  for (int inode = 0; inode < num_nodes; ++inode){
+      gnod=o.arrays.ncorp[inode];
+      z= o.arrays.coordinates[2*num_nodes+inode];
+      if( abs(z-zmin) < eps) {
+         gizmin[inode]=gnod;
+         gizmax[inode]=-1*(part+1);
+      } else if(abs(zmax-z)<eps){
+         gizmax[inode]=gnod;
+         gizmin[inode]=-1*(part+1);
+      } else {
+         gizmin[inode]=-1*(part+1);
+         gizmax[inode]=-1*(part+1);
+      }
+  }
+if(1==1) {
+      printf(" gizmin bc on part %d ",part); for(int is=0; is< num_nodes; ++is)  printf("%d ", gizmin[is]); printf("\n");
+      printf(" gizmax bc on part %d ",part); for(int is=0; is< num_nodes; ++is)  printf("%d ", gizmax[is]); printf("\n");
+}
+  commuInt(o, gizmax);
+  commuInt(o, gizmin);
+if(1==1) {
+      printf(" gizmin ac on part %d ",part); for(int is=0; is< num_nodes; ++is)  printf("%d ", gizmin[is]); printf("\n");
+      printf(" gizmax ac on part %d ",part); for(int is=0; is< num_nodes; ++is)  printf("%d ", gizmax[is]); printf("\n");
+}
+ */ 
+  // create a nodal solution 
   char fieldName[12];
   snprintf(fieldName, 13, "solution");
   printf("solution=%s",fieldName);
@@ -902,7 +859,7 @@ if(0==1) {
   detachField(o.mesh, fieldName, data, size);
   assert(size==5);
 
-//    /* create the field data for this process */
+//     create the field data for this process 
   double* p = (double *)malloc(o.iownnodes * sizeof(double));
   double* u = (double *)malloc(o.iownnodes * sizeof(double));
   double* v = (double *)malloc(o.iownnodes * sizeof(double));
@@ -920,7 +877,7 @@ if(0==1) {
          icount++;
     }
   }
-//    /* write the solution field data in parallel */
+//     write the solution field data in parallel 
   if (cg_sol_write(F, B, Z, "Solution", CG_Vertex, &Sp) ||
       cgp_field_write(F, B, Z, Sp, CG_RealDouble, "Pressure", &Fp))
       cgp_error_exit();
@@ -953,13 +910,13 @@ if(0==1) {
   free(w);
   free(T);
   free(data);
-  /* create Helper array for number of elements on rank */
+  // create Helper array for number of elements on rank 
   if ( cg_goto(F, B, "Zone_t", 1, NULL) ||
        cg_user_data_write("User Data") ||
        cg_gorel(F, "User Data", 0, NULL) ||
        cgp_array_write("nCoordsOnRank", CG_Integer, 1, &num_parts_cg, &Fs2))
        cgp_error_exit();
-  /* create the field data for this process */
+  // create the field data for this process 
   int nCoordVec=o.iownnodes;
   cgsize_t partP1=part+1;
   printf("Coor %d, %d, %d, \n", nCoordVec,part,Fs2);

From cf50e7952cfa0b2102afa42b7c08f7536f7f6920 Mon Sep 17 00:00:00 2001
From: "Kenneth E. Jansen" <Kenneth.Jansen@colorado.edu>
Date: Wed, 16 Aug 2023 23:02:39 -0600
Subject: [PATCH 44/68] SurfID1 and SurfID2 are in the same order to support
 periodicity of those two surfaces.  Quick check of mixed meshs failed but
 will debug tomorrow.

---
 phasta/phCGNSgbc.cc | 256 ++++++++++++++++++++++++++++++++++----------
 1 file changed, 198 insertions(+), 58 deletions(-)

diff --git a/phasta/phCGNSgbc.cc b/phasta/phCGNSgbc.cc
index a7b6fd997..45011d2e3 100644
--- a/phasta/phCGNSgbc.cc
+++ b/phasta/phCGNSgbc.cc
@@ -52,6 +52,31 @@ MPI_Datatype getMpiType(T) {
 #include <bits/stdc++.h>
 using namespace std;
  
+// Function to sort integer array b[]
+// according to the order defined by a[]
+void pairsortDI(double a[], int b[], int n)
+{
+    pair<double, int> pairt[n];
+ 
+    // Storing the respective array
+    // elements in pairs.
+    for (int i = 0; i < n; i++)
+    {
+        pairt[i].first = a[i];
+        pairt[i].second = b[i];
+    }
+ 
+    // Sorting the pair array.
+    sort(pairt, pairt + n);
+     
+    // Modifying original arrays
+    for (int i = 0; i < n; i++)
+    {
+        a[i] = pairt[i].first;
+        b[i] = pairt[i].second;
+    }
+}
+
 // Function to sort integer array b[]
 // according to the order defined by a[]
 void pairsort(int a[], int b[], int n)
@@ -102,8 +127,8 @@ void pairDeal6sort(int a[], int b[], int n)
       }
     }
     assert(igc==n);
-    free(p);
-    free(idx);
+    delete idx;
+    delete p;
 }
 
 
@@ -189,7 +214,7 @@ void gen_ncorp(Output& o )
   int i;
   lcorp_t nilwork = o.nlwork;
   int num_nodes=m->count(0);
-  o.arrays.ncorp = (cgsize_t *)malloc(num_nodes * sizeof(cgsize_t));
+  o.arrays.ncorp = (cgsize_t *)malloc(num_nodes * sizeof(cgsize_t)); //FIXME where to deallocate 
   lcorp_t owned;
   lcorp_t local;
   lcorp_t* owner_counts;
@@ -257,7 +282,7 @@ void gen_ncorp(Output& o )
 
   if(num_parts > 1) 
     commuInt(o, o.arrays.ncorp);
-if(1==1) {
+if(1==0) {
   for (int ipart=0; ipart<num_parts; ++ipart){
     if(part==ipart) { // my turn
       for (int inod=0; inod<num_nodes; ++inod) printf("%ld ", o.arrays.ncorp[inod]);
@@ -367,21 +392,35 @@ void getInteriorConnectivityCGNS(Output& o, int block, cgsize_t* c)
 }
 
 // update is both a transpose to match CNGS and reduction to only filling the first number of vertices on the boundary whereas PHASTA wanted full volume
-void getBoundaryConnectivityCGNS(Output& o, int block, cgsize_t* c)
+void getBoundaryConnectivityCGNS(Output& o, int block, cgsize_t* c, double* ecenx, double* eceny, double* ecenz)
 {
   int nelem = o.blocks.boundary.nElements[block];
   int nvertVol = o.blocks.boundary.keys[block].nElementVertices;
   int nvert = o.blocks.boundary.keys[block].nBoundaryFaceEdges;
+  int num_nodes=o.mesh->count(0);
   size_t i = 0;
+  size_t phGnod = 0;
   std::vector<int> lnode={0,1,2,3}; // Standard pattern of first 4 (or 3)
   // PHASTA's use of volume elements has an lnode array that maps the surface nodes from the volume numbering.  We need it here too
   //  see hierarchic.f but note that is fortran numbering
   if(nvertVol==4) lnode={0, 2, 1, -1};             // tet is first three but opposite normal of others to go with neg volume
   if(nvertVol==5 && nvert==3) lnode={0, 4, 1, -1}; // pyramid tri is a fortran map of 1 5 2 
   if(nvertVol==6 && nvert==4) lnode={0, 3, 4, 1};  // wedge quad is a fortran map of 1 4 5 2
-  for (int elem = 0; elem < nelem; ++elem)
-    for (int vert = 0; vert < nvert; ++vert)
-      c[i++] = o.arrays.ncorp[o.arrays.ienb[block][elem][lnode[vert]]];
+  for (int elem = 0; elem < nelem; ++elem){
+    ecenx[elem]=0;
+    eceny[elem]=0;
+    ecenz[elem]=0;
+    for (int vert = 0; vert < nvert; ++vert){
+      phGnod=o.arrays.ienb[block][elem][lnode[vert]]; //actually it is on-rank Global 
+      c[i++] = o.arrays.ncorp[phGnod]; // PETSc truely global
+      ecenx[elem]+=o.arrays.coordinates[0*num_nodes+phGnod];
+      eceny[elem]+=o.arrays.coordinates[1*num_nodes+phGnod];
+      ecenz[elem]+=o.arrays.coordinates[2*num_nodes+phGnod];
+    }
+    ecenx[elem]/=nvert; // only necessary if you really want to use this as a correct centroid rather than comparison
+    eceny[elem]/=nvert; // only necessary if you really want to use this as a correct centroid rather than comparison
+    ecenz[elem]/=nvert; // only necessary if you really want to use this as a correct centroid rather than comparison
+   }
   PCU_ALWAYS_ASSERT(i == nelem*nvert);
 }
 
@@ -434,7 +473,6 @@ void writeBlocksCGNS(int F,int B,int Z, Output& o)
   if (cg_sol_write(F, B, Z, "RankOfWriter", CG_CellCenter, &S) ||
       cgp_field_write(F, B, Z, S, CG_Integer, "RankOfWriter", &Fs))
       cgp_error_exit();
-  printf("S=%d \n",S);
   for (int i = 0; i < o.blocks.interior.getSize(); ++i) {
     BlockKey& k = o.blocks.interior.keys[i];
     std::string phrase = getBlockKeyPhrase(k, "connectivity interior ");
@@ -526,8 +564,8 @@ if(1==0){
       totOnRankBel += o.blocks.boundary.nElements[i];
     int* srfID = (int *)malloc( totOnRankBel * sizeof(int));
     int* srfIDidx = (int *)malloc( totOnRankBel * sizeof(int));
-    int** srfIDCnn1 = new int*[nblkb];
-    int** srfIDCnn2 = new int*[nblkb];
+    double** srfIDCen1 = new double*[nblkb];
+    double** srfIDCen2 = new double*[nblkb];
     int* srfID1OnBlk = (int *)malloc( nblkb * sizeof(int));
     int* srfID2OnBlk = (int *)malloc( nblkb * sizeof(int));
 
@@ -539,7 +577,10 @@ if(1==0){
       e_owned = params[0];
       int nvert = o.blocks.boundary.keys[i].nBoundaryFaceEdges;
       cgsize_t* e = (cgsize_t *)malloc(nvert * e_owned * sizeof(cgsize_t));
-      getBoundaryConnectivityCGNS(o, i, e);
+      double* ecenx = (double *)malloc( e_owned * sizeof(double));
+      double* eceny = (double *)malloc( e_owned * sizeof(double));
+      double* ecenz = (double *)malloc( e_owned * sizeof(double));
+      getBoundaryConnectivityCGNS(o, i, e,ecenx,eceny,ecenz);
       e_startg=1+e_written; // start for the elements of this topology
       long safeArg=e_owned; // e_owned is cgsize_t which could be an 32 or 64 bit int
       cgsize_t  numBelTP = PCU_Add_Long(safeArg); // number of elements of this topology
@@ -575,26 +616,29 @@ if(1==0){
       int icnt1=0;
       int icnt2=0;
       for (int ne=0; ne<e_owned; ++ne){ //count srfID =1 and 2 on this part,block
-         if(srfID[ebelWritten+i]==1) icnt1++; 
-         if(srfID[ebelWritten+i]==2) icnt2++;
+         if(srfID[e_belWritten+ne]==1) icnt1++; 
+         if(srfID[e_belWritten+ne]==2) icnt2++;
       } 
-      srfIDCnn1[i]=new int[icnt1*3];
-      srfIDCnn2[i]=new int[icnt2*3];
+      srfIDCen1[i]=new double[icnt1*3];
+      srfIDCen2[i]=new double[icnt2*3];
       srfID1OnBlk[i]=icnt1;
       srfID2OnBlk[i]=icnt2;
+      int j1=0;
+      int j2=0;
       for (int ne=0; ne<e_owned; ++ne){
-         if(srfID[ebelWritten+i]==1){ 
-           srfIDCnn1[i][j1++]=e[ne*nv+0];
-           srfIDCnn1[i][j1++]=e[ne*nv+1];
-           srfIDCnn1[i][j1++]=e[ne*nv+2];
+         if(srfID[e_belWritten+ne]==1){ 
+           srfIDCen1[i][j1++]=ecenx[ne];
+           srfIDCen1[i][j1++]=eceny[ne];
+           srfIDCen1[i][j1++]=ecenz[ne];
          }
-         if(srfID[ebelWritten+i]==2) {
-           srfIDCnn1[i][j2++]=e[ne*nv+0];
-           srfIDCnn1[i][j2++]=e[ne*nv+1];
-           srfIDCnn1[i][j2++]=e[ne*nv+2];
+         if(srfID[e_belWritten+ne]==2) {
+           srfIDCen2[i][j2++]=ecenx[ne];
+           srfIDCen2[i][j2++]=eceny[ne];
+           srfIDCen2[i][j2++]=ecenz[ne];
          }
       } 
-
+      free(ecenx); free(eceny); free(ecenz);
+if(1==0){      printf("CentroidCounts %d %d %d %d %d %d %d %d\n",part,icnt1, icnt2, j1, j2, e_owned, srfID1OnBlk[i],srfID2OnBlk[i]);}
       for (int j = 0; j < (int) e_owned; ++j) srfIDidx[e_belWritten+j]=e_start+j;
       startBelBlk[i]=e_start; // provides start point for each block in srfID
       endBelBlk[i]=e_end; // provides end point for each block in srfID
@@ -612,7 +656,6 @@ if(1==0){
     }
 // srfID is for ALL Boundary faces
     cgsize_t  totBel = e_written-eVolElm;
-    printf("%d %ld ", totOnRankBel,totBel);
     // setup User Data for boundary faces 
     if ( cg_goto(F, B, "Zone_t", 1, NULL) ||
          cg_gorel(F, "User Data", 0, NULL) ||
@@ -640,51 +683,145 @@ if(1==0){
     int* displs = (int *)malloc( num_parts * sizeof(int));
     int numsurfID1onRank=0;
     int numsurfID2onRank=0;
-    for (int i = 1; i < nblkb; ++i) numsurfID1onRank+=srfID1onBlk[i];
-    for (int i = 1; i < nblkb; ++i) numsurfID2onRank+=srfID2onBlk[i];
-    int* srfIDCnn1AllBlocks = (int *)malloc(numsurfID1onRank*3 * sizeof(int));
-    int* srfIDCnn2AllBlocks = (int *)malloc(numsurfID2onRank*3 * sizeof(int));
+    for (int i = 0; i < nblkb; ++i) numsurfID1onRank+=srfID1OnBlk[i];
+    for (int i = 0; i < nblkb; ++i) numsurfID2onRank+=srfID2OnBlk[i];
+    double* srfIDCen1AllBlocks = (double *)malloc(numsurfID1onRank*3 * sizeof(double));
+    double* srfIDCen2AllBlocks = (double *)malloc(numsurfID2onRank*3 * sizeof(double));
+    int k1=0;
+    int k2=0;
     for (int i = 0; i < nblkb; ++i) {
-      for (int j = 0; j < srfID1onBlk[i]*3; ++j) srfIDcnn1AllBlocks[k++]=srfIDcnn1[i][j];
-      for (int j = 0; j < srfID2onBlk[i]*3; ++j) srfIDcnn2AllBlocks[k++]=srfIDcnn2[i][j];
+      for (int j = 0; j < srfID1OnBlk[i]*3; ++j) srfIDCen1AllBlocks[k1++]=srfIDCen1[i][j];
+      for (int j = 0; j < srfID2OnBlk[i]*3; ++j) srfIDCen2AllBlocks[k2++]=srfIDCen2[i][j];
     }
+    free(srfID1OnBlk); free(srfID2OnBlk);
+    delete srfIDCen1; delete srfIDCen2;
     int ncon=numsurfID1onRank*3;
+    auto type_i = getMpiType( int() );
     MPI_Allgather(&ncon,1,type_i,rcounts,1,type_i,MPI_COMM_WORLD);
     displs[0]=0;
     for (int i = 1; i < num_parts; ++i) displs[i]=displs[i-1]+rcounts[i-1]; 
-if(0==1){ for(int ip=0; ip< num_parts; ++ip) printf("%ld ", displs[ip]); printf("\n"); }
-    int GsrfID1cnt=displs[num_parts-1]+displs[num_parts-1];
-    int* srfID1Gcnn = (int *)malloc( GsrfID1cnt * sizeof(int));
-    MPI_Allgatherv(srfIDcnn1AllBlocks,ncon,type_i,srfID1Gcnn,rcounts,displs,type_i,MPI_COMM_WORLD);
+if(1==0){ printf("displs1 %d ",part);for(int ip=0; ip< num_parts; ++ip) printf("% ld ", displs[ip]); printf("\n"); }
+    int GsrfID1cnt=displs[num_parts-1]+rcounts[num_parts-1];
+if(1==0){    printf("Stack1 %d %d, %d, %d, %d, %d\n",part, GsrfID1cnt, ncon, nblkb, numsurfID1onRank, numsurfID2onRank);}
+    double* srfID1Gcen = (double *)malloc( GsrfID1cnt * sizeof(double));
+    auto type_d = getMpiType( double() );
+    MPI_Allgatherv(srfIDCen1AllBlocks,ncon,type_d,srfID1Gcen,rcounts,displs,type_d,MPI_COMM_WORLD);
+// srfID=2 repeats
     ncon=numsurfID2onRank*3;
     MPI_Allgather(&ncon,1,type_i,rcounts,1,type_i,MPI_COMM_WORLD);
     displs[0]=0;
     for (int i = 1; i < num_parts; ++i) displs[i]=displs[i-1]+rcounts[i-1]; 
-if(0==1){ for(int ip=0; ip< num_parts; ++ip) printf("%ld ", displs[ip]); printf("\n"); }
-    int GsrfID2cnt=displs[num_parts-1]+displs[num_parts-1];
-    assert(GsrfIC1cnt==GsrfID2cnt);
-    int* srfID2Gcnn = (int *)malloc( GsrfID2cnt * sizeof(int));
-    MPI_Allgatherv(srfIDcnn2AllBlocks,ncon,type_i,srfID2Gcnn,rcounts,displs,type_i,MPI_COMM_WORLD);
-tbc 
+if(1==0){ printf("displs2 %d ",part);for(int ip=0; ip< num_parts; ++ip) printf("% ld ", displs[ip]); printf("\n"); }
+    int GsrfID2cnt=displs[num_parts-1]+rcounts[num_parts-1];
+if(1==0){     printf("Stack2 %d %d, %d, %d, %d, %d\n",part, GsrfID2cnt, ncon, nblkb, numsurfID1onRank, numsurfID2onRank);}
+    assert(GsrfID1cnt==GsrfID2cnt);
+    int nmatchFace=GsrfID1cnt/3;
+    double* srfID2Gcen = (double *)malloc( GsrfID2cnt * sizeof(double));
+    MPI_Allgatherv(srfIDCen2AllBlocks,ncon,type_d,srfID2Gcen,rcounts,displs,type_d,MPI_COMM_WORLD);
+if(1==0){  printf("%d part srfID 1 xc ",part); for(int ip=0; ip< nmatchFace; ++ip) printf("%f ", srfID1Gcen[ip*3+0]); printf("\n"); }
+if(1==0){  printf("%d part srfID 1 yc ",part); for(int ip=0; ip< nmatchFace; ++ip) printf("%f ", srfID1Gcen[ip*3+1]); printf("\n"); }
+if(1==0){  printf("%d part srfID 1 zc ",part); for(int ip=0; ip< nmatchFace; ++ip) printf("%f ", srfID1Gcen[ip*3+2]); printf("\n"); }
+       PCU_Barrier();
+if(1==0){  printf("%d part srfID 2 xc ",part); for(int ip=0; ip< nmatchFace; ++ip) printf("%f ", srfID2Gcen[ip*3+0]); printf("\n"); }
+if(1==0){  printf("%d part srfID 2 yc ",part); for(int ip=0; ip< nmatchFace; ++ip) printf("%f ", srfID2Gcen[ip*3+1]); printf("\n"); }
+if(1==0){  printf("%d part srfID 2 zc ",part); for(int ip=0; ip< nmatchFace; ++ip) printf("%f ", srfID2Gcen[ip*3+2]); printf("\n"); }
+    free(srfIDCen1AllBlocks); free(srfIDCen2AllBlocks);
+    double* srfID1distSq = (double *)malloc( nmatchFace * sizeof(double));
+    double* srfID2distSq = (double *)malloc( nmatchFace * sizeof(double));
+    int* imapD1 = (int *)malloc( nmatchFace * sizeof(int));
+    int* imapD2 = (int *)malloc( nmatchFace * sizeof(int));
+    int* imapD2v = (int *)malloc( nmatchFace * sizeof(int));
+    double xc=10; // true cubes with uniform meshes set up ties  (good for debugging/verifying that dumb search backup works)
+    for (int i = 0; i < nmatchFace; ++i) {
+      srfID1distSq[i]=(srfID1Gcen[i*3+0]-xc)*(srfID1Gcen[i*3+0]-xc) 
+                   +srfID1Gcen[i*3+1]*srfID1Gcen[i*3+1] 
+                   +srfID1Gcen[i*3+2]*srfID1Gcen[i*3+2]; 
+      srfID2distSq[i]=(srfID2Gcen[i*3+0]-xc)*(srfID2Gcen[i*3+0]-xc) 
+                   +srfID2Gcen[i*3+1]*srfID2Gcen[i*3+1] 
+                   +srfID2Gcen[i*3+2]*srfID2Gcen[i*3+2]; 
+      imapD1[i]=i;
+      imapD2[i]=i;
+    }
+    if(1==0){ if(part==0) {
+      printf(" srfID1dist GLOBAL B "); for(int is=0; is< nmatchFace; ++is)  printf("%f ", srfID1distSq[is]); printf("\n");
+      printf(" imapD1 GLOBAL B     "); for(int is=0; is< nmatchFace; ++is)  printf("%d ", imapD1[is]); printf("\n"); }
+    }
+    if(1==0){ if(part==0) {
+      printf(" srfID2dist GLOBAL B "); for(int is=0; is< nmatchFace; ++is)  printf("%f ", srfID2distSq[is]); printf("\n");
+      printf(" imapD2 GLOBAL B     "); for(int is=0; is< nmatchFace; ++is)  printf("%d ", imapD2[is]); printf("\n"); }
+    }
+    pairsortDI(srfID1distSq,imapD1,nmatchFace);
+    pairsortDI(srfID2distSq,imapD2,nmatchFace);
+
+    if(1==0){ if(part==0) {
+      printf(" srfID1dist GLOBAL "); for(int is=0; is< nmatchFace; ++is)  printf("%f ", srfID1distSq[is]); printf("\n");
+      printf(" imapD1 GLOBAL     "); for(int is=0; is< nmatchFace; ++is)  printf("%d ", imapD1[is]); printf("\n"); }
+    }
+    if(1==0){ if(part==0) {
+      printf(" srfID2dist GLOBAL "); for(int is=0; is< nmatchFace; ++is)  printf("%f ", srfID2distSq[is]); printf("\n");
+      printf(" imapD2 GLOBAL     "); for(int is=0; is< nmatchFace; ++is)  printf("%d ", imapD2[is]); printf("\n"); }
+    }
+
+    double tol=1.0e-12;
+    double tol2=1.0e-14;
+    int jclosest, iclose1, iclose2;
+    double d1,d2,vDistSq,vDSmin;
+    int DistFails=0;
+    for (int i = 0; i < nmatchFace; ++i) {
+        iclose1=imapD1[i];
+        iclose2=imapD2[i];
+        d1=srfID1Gcen[(iclose1)*3+0]-srfID2Gcen[(iclose2)*3+0];
+        d2=srfID1Gcen[(iclose1)*3+1]-srfID2Gcen[(iclose2)*3+1];
+        vDistSq= d1*d1+d2*d2;
+        if(vDistSq < tol2) {
+           imapD2v[i]=imapD2[i];
+        } else {// centroid for i-1 did not match-> search list srfID=2 list to find true match
+          vDSmin=vDistSq;
+          DistFails++;
+          for (int j = 0; j < nmatchFace; ++j) {   // if this turns out to be taken a lot then it could be narrowed e.g. j=max(0,i-50), j< i+min(matchFace,i+50),
+            iclose2=imapD2[j];
+            d1=srfID1Gcen[(iclose1)*3+0]-srfID2Gcen[(iclose2)*3+0];
+            d2=srfID1Gcen[(iclose1)*3+1]-srfID2Gcen[(iclose2)*3+1];
+            vDistSq= d1*d1+d2*d2;
+            if(vDistSq<vDSmin) {
+              vDSmin=vDistSq;
+              jclosest=j;
+            } 
+          }
+          imapD2v[i]=imapD2[jclosest];
+        } 
+    } 
+    for (int i = 0; i < nmatchFace; ++i) imapD2[i]=imapD2v[i];
+    if(1==1&&part==0) {
+      printf("Number of Distance Failures=%d\n ",DistFails);
+      printf(" srfID1dist GLOBAL "); for(int is=0; is< nmatchFace; ++is)  printf("%f ", srfID1distSq[is]); printf("\n");
+      printf(" imapD1 GLOBAL     "); for(int is=0; is< nmatchFace; ++is)  printf("%d ", imapD1[is]); printf("\n"); 
+      printf(" srfID2dist GLOBAL "); for(int is=0; is< nmatchFace; ++is)  printf("%f ", srfID2distSq[is]); printf("\n");
+      printf(" imapD2 GLOBAL     "); for(int is=0; is< nmatchFace; ++is)  printf("%d ", imapD2[is]); printf("\n"); }
+    free(srfID1Gcen); free(srfID2Gcen);
+    free(srfID1distSq); free(srfID2distSq);
+    free(imapD2v);
+
 // ZonalBC data 
     int* srfIDG = (int *)malloc( totBel * sizeof(int));
     int* srfIDGidx = (int *)malloc( totBel * sizeof(int));
     auto type_cg = getMpiType( cgsize_t() );
-    auto type_i = getMpiType( int() );
     MPI_Allgather(&totOnRankBel,1,type_i,rcounts,1,type_i,MPI_COMM_WORLD);
     displs[0]=0;
     for (int i = 1; i < num_parts; ++i) displs[i]=displs[i-1]+rcounts[i-1]; 
 if(0==1){ for(int ip=0; ip< num_parts; ++ip) printf("%ld ", displs[ip]); printf("\n"); }
     MPI_Allgatherv(srfID,totOnRankBel,type_i,srfIDG,rcounts,displs,type_i,MPI_COMM_WORLD);
     MPI_Allgatherv(srfIDidx,totOnRankBel,type_i,srfIDGidx,rcounts,displs,type_i,MPI_COMM_WORLD);
+    free(rcounts); free(displs);
 if(0==1){ if(part==0) {
     printf(" srfID GLOBAL    "); for(int is=0; is< totBel; ++is)  printf("%d ", srfIDG[is]); printf("\n");
     printf(" srfIDidx GLOBAL "); for(int is=0; is< totBel; ++is)  printf("%d ", srfIDGidx[is]); printf("\n"); }
     printf("rank %d ",part); printf(" srfID on Part "); for(int is=0; is< totOnRankBel; ++is)  printf("%d ", srfID[is]); printf("\n");
     printf(" srfIDidx on Part "); for(int is=0; is< totOnRankBel; ++is)  printf("%d ", srfIDidx[is]); printf("\n"); }
+    free(srfID); free(srfIDidx); 
 //    pairsort(srfIDG,srfIDGidx,totBel);
     pairDeal6sort(srfIDG,srfIDGidx,totBel);
-if(0==1){ if(part==0) {
+if(1==0){ if(part==0) {
     printf(" srfID GLOBAL    "); for(int is=0; is< totBel; ++is)  printf("%d ", srfIDG[is]); printf("\n");
     printf(" srfIDidx GLOBAL "); for(int is=0; is< totBel; ++is)  printf("%d ", srfIDGidx[is]); printf("\n"); }
 }
@@ -698,12 +835,19 @@ if(0==1){ if(part==0) {
         BC_scan++;
         imatch++;
       }
-//reorder SurfID = 2 list to match order of SurfID 1 to support periodicity 
-      if(BCid==1) imatch1=imatch;
+//reorder SurfID = 1 and 2 using idmapD{1,2} based on distance to support periodicity 
+      if(BCid==1) {
+        cgsize_t* eBCtmp = (cgsize_t *)malloc(nmatchFace * sizeof(cgsize_t));
+        for (int i = 0; i < nmatchFace; i++) eBCtmp[i]=eBC[imapD1[i]];
+        for (int i = 0; i < nmatchFace; i++) eBC[i]=eBCtmp[i];
+if(1==1&&part==1){ printf(" srfIDidx 1 "); for(int is=0; is< nmatchFace; ++is)  printf("%d ", eBC[is]); printf("\n"); }
+      }       
       if(BCid==2) {
-        assert(imatch==imatch1); //
-        for(int is=0; is< imatch; ++is)  printf("%d ", eBC[is]);
-      } //incomplete         
+        cgsize_t* eBCtmp = (cgsize_t *)malloc(nmatchFace * sizeof(cgsize_t));
+        for (int i = 0; i < nmatchFace; i++) eBCtmp[i]=eBC[imapD2[i]];
+        for (int i = 0; i < nmatchFace; i++) eBC[i]=eBCtmp[i];
+if(1==1&&part==1){ printf(" srfIDidx 2 "); for(int is=0; is< nmatchFace; ++is)  printf("%d ", eBC[is]); printf("\n"); }
+      }       
 if(0==1) {
       printf(" srfID =%d    ",BCid); for(int is=0; is< imatch; ++is)  printf("%d ", eBC[is]); printf("\n");
 }
@@ -715,7 +859,8 @@ if(0==1) {
       if(cg_goto(F, B, "Zone_t", 1, "ZoneBC_t", 1, "BC_t", BC_index, "end")) cg_error_exit();;
       if(cg_gridlocation_write(CGNS_ENUMV(FaceCenter))) cg_error_exit();
     }
-    free(eBC);
+    free(imapD1); free(imapD2);
+    free(eBC); free(srfIDG); free(srfIDGidx);
   } // processing boundary elments
 }
 
@@ -736,7 +881,7 @@ void writeCGNS(Output& o, std::string path)
 
   int num_nodes=m->count(0);
 
-if(1==1){  // ilwork debugging
+if(1==0){  // ilwork debugging
     for (int ipart=0; ipart<num_parts; ++ipart){
         if(part==ipart) { // my turn
            printf("ilwork %d, %d, %d \n", part, o.nlwork,o.arrays.ilwork[0]);
@@ -754,9 +899,9 @@ if(1==1){  // ilwork debugging
        PCU_Barrier();
      }
 }
-if(1==1){
+if(1==0){
   for (int ipart=0; ipart<num_parts; ++ipart){
-    if(part==ipart) { // my turn    
+    if(part==part) { // my turn    
     printf("xyz %d, %d \n", part, num_nodes);
     for (int inode = 0; inode < num_nodes; ++inode){
       printf("%d ",inode+1);
@@ -881,27 +1026,22 @@ if(1==1) {
   if (cg_sol_write(F, B, Z, "Solution", CG_Vertex, &Sp) ||
       cgp_field_write(F, B, Z, Sp, CG_RealDouble, "Pressure", &Fp))
       cgp_error_exit();
-  printf("Sp=%d \n",Sp);
   if (cgp_field_write_data(F, B, Z, Sp, Fp, &start, &end, p))
       cgp_error_exit();
   if ( cgp_field_write(F, B, Z, Sp, CG_RealDouble, "VelocityX", &Fu))
       cgp_error_exit();
-  printf("Su=%d \n",Su);
   if (cgp_field_write_data(F, B, Z, Sp, Fu, &start, &end, u))
       cgp_error_exit();
   if ( cgp_field_write(F, B, Z, Sp, CG_RealDouble, "VelocityY", &Fv))
       cgp_error_exit();
-  printf("Sv=%d \n",Sv);
   if (cgp_field_write_data(F, B, Z, Sp, Fv, &start, &end, v))
       cgp_error_exit();
   if ( cgp_field_write(F, B, Z, Sp, CG_RealDouble, "VelocityZ", &Fw))
       cgp_error_exit();
-  printf("Sw=%d \n",Sw);
   if (cgp_field_write_data(F, B, Z, Sp, Fw, &start, &end, w))
       cgp_error_exit();
   if ( cgp_field_write(F, B, Z, Sp, CG_RealDouble, "Temperature", &FT))
       cgp_error_exit();
-  printf("ST=%d \n",ST);
   if (cgp_field_write_data(F, B, Z, Sp, FT, &start, &end, T))
       cgp_error_exit();
   free(p);

From 1375cc16fc70db63c503a0fab85b8155601504b1 Mon Sep 17 00:00:00 2001
From: "Kenneth E. Jansen" <Kenneth.Jansen@colorado.edu>
Date: Thu, 17 Aug 2023 09:00:20 -0600
Subject: [PATCH 45/68] CGNS standard for defining periodic interface compplete
 and tested for 1 and 2 process cases.  All write a file that looks reasonable
 at first glance with cgnsview except the wedge-tet mixed case which probably
 has an issue I have not resolved yet

---
 phasta/phCGNSgbc.cc | 23 +++++++++++++++++------
 pumi-meshes         |  2 +-
 2 files changed, 18 insertions(+), 7 deletions(-)

diff --git a/phasta/phCGNSgbc.cc b/phasta/phCGNSgbc.cc
index 45011d2e3..fc33d3c0f 100644
--- a/phasta/phCGNSgbc.cc
+++ b/phasta/phCGNSgbc.cc
@@ -718,6 +718,7 @@ if(1==0){     printf("Stack2 %d %d, %d, %d, %d, %d\n",part, GsrfID2cnt, ncon, nb
     int nmatchFace=GsrfID1cnt/3;
     double* srfID2Gcen = (double *)malloc( GsrfID2cnt * sizeof(double));
     MPI_Allgatherv(srfIDCen2AllBlocks,ncon,type_d,srfID2Gcen,rcounts,displs,type_d,MPI_COMM_WORLD);
+    const  float  Lz=abs(srfID2Gcen[2]-srfID1Gcen[2]);
 if(1==0){  printf("%d part srfID 1 xc ",part); for(int ip=0; ip< nmatchFace; ++ip) printf("%f ", srfID1Gcen[ip*3+0]); printf("\n"); }
 if(1==0){  printf("%d part srfID 1 yc ",part); for(int ip=0; ip< nmatchFace; ++ip) printf("%f ", srfID1Gcen[ip*3+1]); printf("\n"); }
 if(1==0){  printf("%d part srfID 1 zc ",part); for(int ip=0; ip< nmatchFace; ++ip) printf("%f ", srfID1Gcen[ip*3+2]); printf("\n"); }
@@ -805,6 +806,8 @@ if(1==0){  printf("%d part srfID 2 zc ",part); for(int ip=0; ip< nmatchFace; ++i
 // ZonalBC data 
     int* srfIDG = (int *)malloc( totBel * sizeof(int));
     int* srfIDGidx = (int *)malloc( totBel * sizeof(int));
+    cgsize_t* donor2 = (cgsize_t *)malloc(nmatchFace * sizeof(cgsize_t));
+    cgsize_t* periodic1 = (cgsize_t *)malloc(nmatchFace * sizeof(cgsize_t));
     auto type_cg = getMpiType( cgsize_t() );
     MPI_Allgather(&totOnRankBel,1,type_i,rcounts,1,type_i,MPI_COMM_WORLD);
     displs[0]=0;
@@ -837,15 +840,13 @@ if(1==0){ if(part==0) {
       }
 //reorder SurfID = 1 and 2 using idmapD{1,2} based on distance to support periodicity 
       if(BCid==1) {
-        cgsize_t* eBCtmp = (cgsize_t *)malloc(nmatchFace * sizeof(cgsize_t));
-        for (int i = 0; i < nmatchFace; i++) eBCtmp[i]=eBC[imapD1[i]];
-        for (int i = 0; i < nmatchFace; i++) eBC[i]=eBCtmp[i];
+        for (int i = 0; i < nmatchFace; i++) periodic1[i]=eBC[imapD1[i]];
+        for (int i = 0; i < nmatchFace; i++) eBC[i]=periodic1[i];
 if(1==1&&part==1){ printf(" srfIDidx 1 "); for(int is=0; is< nmatchFace; ++is)  printf("%d ", eBC[is]); printf("\n"); }
       }       
       if(BCid==2) {
-        cgsize_t* eBCtmp = (cgsize_t *)malloc(nmatchFace * sizeof(cgsize_t));
-        for (int i = 0; i < nmatchFace; i++) eBCtmp[i]=eBC[imapD2[i]];
-        for (int i = 0; i < nmatchFace; i++) eBC[i]=eBCtmp[i];
+        for (int i = 0; i < nmatchFace; i++) donor2[i]=eBC[imapD2[i]];
+        for (int i = 0; i < nmatchFace; i++) eBC[i]=donor2[i];
 if(1==1&&part==1){ printf(" srfIDidx 2 "); for(int is=0; is< nmatchFace; ++is)  printf("%d ", eBC[is]); printf("\n"); }
       }       
 if(0==1) {
@@ -859,6 +860,16 @@ if(0==1) {
       if(cg_goto(F, B, "Zone_t", 1, "ZoneBC_t", 1, "BC_t", BC_index, "end")) cg_error_exit();;
       if(cg_gridlocation_write(CGNS_ENUMV(FaceCenter))) cg_error_exit();
     }
+    int cgconn;
+    if (cg_conn_write(F, B, Z, "Periodic Connectivity",
+          CGNS_ENUMV(FaceCenter), CGNS_ENUMV(Abutting1to1),
+          CGNS_ENUMV(PointList), nmatchFace, periodic1, "Zone",
+          CGNS_ENUMV(Unstructured), CGNS_ENUMV(PointListDonor),
+          CGNS_ENUMV(Integer), nmatchFace, donor2, &cgconn)) cgp_error_exit();
+    const float  RotationCenter[3]={0};
+    const float  RotationAngle[3]={0};
+    const float  Translation[3]={0,0,-Lz};
+    if (cg_conn_periodic_write(F, B, Z, cgconn, RotationCenter, RotationAngle, Translation)) cgp_error_exit();
     free(imapD1); free(imapD2);
     free(eBC); free(srfIDG); free(srfIDGidx);
   } // processing boundary elments
diff --git a/pumi-meshes b/pumi-meshes
index a3a241a71..3355b3a95 160000
--- a/pumi-meshes
+++ b/pumi-meshes
@@ -1 +1 @@
-Subproject commit a3a241a715de566f0e812d253f5cfc2a82705f62
+Subproject commit 3355b3a952b114f1c7c02b9bdb7fa4bb9db1b86e

From ae008bd096ccb0f485fde4a207bb89dbe72f0362 Mon Sep 17 00:00:00 2001
From: "Kenneth E. Jansen" <Kenneth.Jansen@colorado.edu>
Date: Fri, 18 Aug 2023 08:13:58 -0600
Subject: [PATCH 46/68] valgrind now only shows HDF5-MPIO issues

---
 phasta/phCGNSgbc.cc | 26 ++++++++++++++++++++------
 1 file changed, 20 insertions(+), 6 deletions(-)

diff --git a/phasta/phCGNSgbc.cc b/phasta/phCGNSgbc.cc
index fc33d3c0f..c3fafc23a 100644
--- a/phasta/phCGNSgbc.cc
+++ b/phasta/phCGNSgbc.cc
@@ -127,8 +127,10 @@ void pairDeal6sort(int a[], int b[], int n)
       }
     }
     assert(igc==n);
-    delete idx;
-    delete p;
+    for (int i = 0; i < 6; i++) delete [] p[i];
+    for (int i = 0; i < 6; i++) delete [] idx[i];
+    delete [] idx;
+    delete [] p;
 }
 
 
@@ -664,7 +666,7 @@ if(1==0){      printf("CentroidCounts %d %d %d %d %d %d %d %d\n",part,icnt1, icn
     // write the user data for this process 
     e_written=0; //recycling  eVolElm holds 
     for (int i = 0; i < nblkb; ++i) {
-      int e_startB=startBelBlk[i]-eVolElm; // srfID is only for bel....matches linear order with eVolElm offset from 
+      int e_startB=startBelBlk[i]-eVolElm-1; // srfID is only for bel....matches linear order with eVolElm offset from 
                                        // bel# that starts from last volume element
       e_owned=endBelBlk[i]-startBelBlk[i]+1;
       e_start=0;
@@ -694,7 +696,9 @@ if(1==0){      printf("CentroidCounts %d %d %d %d %d %d %d %d\n",part,icnt1, icn
       for (int j = 0; j < srfID2OnBlk[i]*3; ++j) srfIDCen2AllBlocks[k2++]=srfIDCen2[i][j];
     }
     free(srfID1OnBlk); free(srfID2OnBlk);
-    delete srfIDCen1; delete srfIDCen2;
+    for (int i = 0; i < nblkb; ++i) delete [] srfIDCen1[i];
+    for (int i = 0; i < nblkb; ++i) delete [] srfIDCen2[i];
+    delete [] srfIDCen1; delete [] srfIDCen2;
     int ncon=numsurfID1onRank*3;
     auto type_i = getMpiType( int() );
     MPI_Allgather(&ncon,1,type_i,rcounts,1,type_i,MPI_COMM_WORLD);
@@ -829,15 +833,25 @@ if(1==0){ if(part==0) {
     printf(" srfIDidx GLOBAL "); for(int is=0; is< totBel; ++is)  printf("%d ", srfIDGidx[is]); printf("\n"); }
 }
     int BC_scan=0;
-    int imatch1;
     cgsize_t* eBC = (cgsize_t *)malloc(totBel * sizeof(cgsize_t));
     for (int BCid = 1; BCid < 7; BCid++) {
       int imatch=0;
-      while (srfIDG[BC_scan]==BCid) {
+// valgrind likes this?
+      for (int ib = BC_scan; ib < totBel; ib++) {
+        if(srfIDG[ib]==BCid){
+          eBC[imatch]=srfIDGidx[BC_scan];
+          BC_scan++;
+          imatch++;
+        } else  break;
+      }
+ 
+/* works but valgrind no likey
+      while (srfIDG[BC_scan]==BCid&&BC_scan<totBel) {
         eBC[imatch]=srfIDGidx[BC_scan];
         BC_scan++;
         imatch++;
       }
+*/
 //reorder SurfID = 1 and 2 using idmapD{1,2} based on distance to support periodicity 
       if(BCid==1) {
         for (int i = 0; i < nmatchFace; i++) periodic1[i]=eBC[imapD1[i]];

From 449e8e22688a29375af174983698ca70ddd37a81 Mon Sep 17 00:00:00 2001
From: "Kenneth E. Jansen" <Kenneth.Jansen@colorado.edu>
Date: Fri, 18 Aug 2023 13:54:39 -0600
Subject: [PATCH 47/68]  split writeBlocksCGNS into three for readability

---
 phasta/phCGNSgbc.cc | 263 +++++++++++++++++++++-----------------------
 1 file changed, 123 insertions(+), 140 deletions(-)

diff --git a/phasta/phCGNSgbc.cc b/phasta/phCGNSgbc.cc
index c3fafc23a..38cb6e4ee 100644
--- a/phasta/phCGNSgbc.cc
+++ b/phasta/phCGNSgbc.cc
@@ -368,10 +368,6 @@ static std::string buildCGNSFileName(std::string timestep_or_dat)
   return ss.str();
 }
 
-enum {
-  MAX_PARAMS = 12
-};
-
 // update is only a transpose to match CNGS.  
 void getInteriorConnectivityCGNS(Output& o, int block, cgsize_t* c)
 {
@@ -394,7 +390,7 @@ void getInteriorConnectivityCGNS(Output& o, int block, cgsize_t* c)
 }
 
 // update is both a transpose to match CNGS and reduction to only filling the first number of vertices on the boundary whereas PHASTA wanted full volume
-void getBoundaryConnectivityCGNS(Output& o, int block, cgsize_t* c, double* ecenx, double* eceny, double* ecenz)
+void getBoundaryConnectivityCGNS(Output& o, int block, cgsize_t* c, double* eCenx, double* eCeny, double* eCenz)
 {
   int nelem = o.blocks.boundary.nElements[block];
   int nvertVol = o.blocks.boundary.keys[block].nElementVertices;
@@ -409,19 +405,19 @@ void getBoundaryConnectivityCGNS(Output& o, int block, cgsize_t* c, double* ecen
   if(nvertVol==5 && nvert==3) lnode={0, 4, 1, -1}; // pyramid tri is a fortran map of 1 5 2 
   if(nvertVol==6 && nvert==4) lnode={0, 3, 4, 1};  // wedge quad is a fortran map of 1 4 5 2
   for (int elem = 0; elem < nelem; ++elem){
-    ecenx[elem]=0;
-    eceny[elem]=0;
-    ecenz[elem]=0;
+    eCenx[elem]=0;
+    eCeny[elem]=0;
+    eCenz[elem]=0;
     for (int vert = 0; vert < nvert; ++vert){
       phGnod=o.arrays.ienb[block][elem][lnode[vert]]; //actually it is on-rank Global 
       c[i++] = o.arrays.ncorp[phGnod]; // PETSc truely global
-      ecenx[elem]+=o.arrays.coordinates[0*num_nodes+phGnod];
-      eceny[elem]+=o.arrays.coordinates[1*num_nodes+phGnod];
-      ecenz[elem]+=o.arrays.coordinates[2*num_nodes+phGnod];
+      eCenx[elem]+=o.arrays.coordinates[0*num_nodes+phGnod];
+      eCeny[elem]+=o.arrays.coordinates[1*num_nodes+phGnod];
+      eCenz[elem]+=o.arrays.coordinates[2*num_nodes+phGnod];
     }
-    ecenx[elem]/=nvert; // only necessary if you really want to use this as a correct centroid rather than comparison
-    eceny[elem]/=nvert; // only necessary if you really want to use this as a correct centroid rather than comparison
-    ecenz[elem]/=nvert; // only necessary if you really want to use this as a correct centroid rather than comparison
+    eCenx[elem]/=nvert; // only necessary if you really want to use this as a correct Centroid rather than comparison
+    eCeny[elem]/=nvert; // only necessary if you really want to use this as a correct Centroid rather than comparison
+    eCenz[elem]/=nvert; // only necessary if you really want to use this as a correct Centroid rather than comparison
    }
   PCU_ALWAYS_ASSERT(i == nelem*nvert);
 }
@@ -460,13 +456,11 @@ void getNaturalBCCodesCGNS(Output& o, int block, int* codes)
 
 // renamed and calling the renamed functions above with output writes now to CGNS
 
-void writeBlocksCGNS(int F,int B,int Z, Output& o)
+void writeBlocksCGNSinteror(int F,int B,int Z, Output& o, cgsize_t *e_written)
 {
-  int params[MAX_PARAMS];
   int E,S,Fs,Fs2,Fsb,Fsb2;
   cgsize_t e_owned, e_start,e_end;
   cgsize_t e_startg,e_endg;
-  cgsize_t e_written=0;
   const int num_parts = PCU_Comm_Peers();
   const cgsize_t num_parts_cg=num_parts;
   const int part = PCU_Comm_Self() ;
@@ -478,15 +472,14 @@ void writeBlocksCGNS(int F,int B,int Z, Output& o)
   for (int i = 0; i < o.blocks.interior.getSize(); ++i) {
     BlockKey& k = o.blocks.interior.keys[i];
     std::string phrase = getBlockKeyPhrase(k, "connectivity interior ");
-    params[0] = o.blocks.interior.nElements[i];
     e_owned = o.blocks.interior.nElements[i];
     int nvert = o.blocks.interior.keys[i].nElementVertices;
     cgsize_t* e = (cgsize_t *)malloc(nvert * e_owned * sizeof(cgsize_t));
     getInteriorConnectivityCGNS(o, i, e);
     // create data node for elements 
-    e_startg=1+e_written; // start for the elements of this topology
+    e_startg=1+*e_written; // start for the elements of this topology
     long safeArg=e_owned; // e_owned is cgsize_t which could be an 32 or 64 bit int
-    e_endg=e_written + PCU_Add_Long(safeArg); // end for the elements of this topology
+    e_endg=*e_written + PCU_Add_Long(safeArg); // end for the elements of this topology
     char Ename[5];
     switch(nvert){
       case 4:
@@ -513,12 +506,12 @@ void writeBlocksCGNS(int F,int B,int Z, Output& o)
     e_start=0;
     auto type = getMpiType( cgsize_t() );
     MPI_Exscan(&e_owned, &e_start, 1, type , MPI_SUM, MPI_COMM_WORLD);
-    e_start+=1+e_written; // my parts global element start 1-based
+    e_start+=1+*e_written; // my parts global element start 1-based
     e_end=e_start+e_owned-1;  // my parts global element stop 1-based
     // write the element connectivity in parallel 
     if (cgp_elements_write_data(F, B, Z, E, e_start, e_end, e))
         cgp_error_exit();
-    e_written=e_endg; // update count of elements written
+    *e_written=e_endg; // update count of elements written
 
 if(1==0){
     printf("interior cnn %d, %ld, %ld \n", part, e_start, e_end);
@@ -553,40 +546,38 @@ if(1==0){
     if ( cgp_array_write_data(Fs2, &partP1, &partP1, &nIelVec))
         cgp_error_exit();
   } // end of loop over interior blocks
-
-
-  if(o.writeCGNSFiles > 2) {
-    cgsize_t eVolElm=e_written;
+}
+void writeBlocksCGNSboundary(int F,int B,int Z, Output& o, int* srfID, int* srfIDidx, double** srfIDCen1, double** srfIDCen2, int* srfID1OnBlk, int* srfID2OnBlk, int* startBelBlk, int* endBelBlk, cgsize_t *e_written, cgsize_t *totBel, int nblkb)
+{
+//  if(o.writeCGNSFiles > 2) {
+    int E,Fsb,Fsb2;
+    const int num_parts = PCU_Comm_Peers();
+    const cgsize_t num_parts_cg=num_parts;
+    const int part = PCU_Comm_Self() ;
+    const cgsize_t part_cg=part;
+    cgsize_t e_owned, e_start,e_end;
+    cgsize_t e_startg,e_endg;
+    cgsize_t eVolElm=*e_written;
     cgsize_t e_belWritten=0;
-    int totOnRankBel=0;
     int triCount=0;
     int quadCount=0;
-    int nblkb = o.blocks.boundary.getSize(); 
+    int totOnRankBel=0;
     for (int i = 0; i < nblkb; ++i) 
       totOnRankBel += o.blocks.boundary.nElements[i];
-    int* srfID = (int *)malloc( totOnRankBel * sizeof(int));
-    int* srfIDidx = (int *)malloc( totOnRankBel * sizeof(int));
-    double** srfIDCen1 = new double*[nblkb];
-    double** srfIDCen2 = new double*[nblkb];
-    int* srfID1OnBlk = (int *)malloc( nblkb * sizeof(int));
-    int* srfID2OnBlk = (int *)malloc( nblkb * sizeof(int));
 
-    int* startBelBlk = (int *)malloc( nblkb * sizeof(int));
-    int* endBelBlk = (int *)malloc( nblkb * sizeof(int));
     for (int i = 0; i < o.blocks.boundary.getSize(); ++i) {
       BlockKey& k = o.blocks.boundary.keys[i];
-      params[0] = o.blocks.boundary.nElements[i];
-      e_owned = params[0];
+      e_owned = o.blocks.boundary.nElements[i];
       int nvert = o.blocks.boundary.keys[i].nBoundaryFaceEdges;
       cgsize_t* e = (cgsize_t *)malloc(nvert * e_owned * sizeof(cgsize_t));
-      double* ecenx = (double *)malloc( e_owned * sizeof(double));
-      double* eceny = (double *)malloc( e_owned * sizeof(double));
-      double* ecenz = (double *)malloc( e_owned * sizeof(double));
-      getBoundaryConnectivityCGNS(o, i, e,ecenx,eceny,ecenz);
-      e_startg=1+e_written; // start for the elements of this topology
+      double* eCenx = (double *)malloc( e_owned * sizeof(double));
+      double* eCeny = (double *)malloc( e_owned * sizeof(double));
+      double* eCenz = (double *)malloc( e_owned * sizeof(double));
+      getBoundaryConnectivityCGNS(o, i, e,eCenx,eCeny,eCenz);
+      e_startg=1+*e_written; // start for the elements of this topology
       long safeArg=e_owned; // e_owned is cgsize_t which could be an 32 or 64 bit int
       cgsize_t  numBelTP = PCU_Add_Long(safeArg); // number of elements of this topology
-      e_endg=e_written + numBelTP; // end for the elements of this topology
+      e_endg=*e_written + numBelTP; // end for the elements of this topology
       if(nvert==3) triCount++;
       if(nvert==4) quadCount++;
       char Ename[7];
@@ -605,7 +596,7 @@ if(1==0){
       e_start=0;
       auto type = getMpiType( cgsize_t() );
       MPI_Exscan(&e_owned, &e_start, 1, type , MPI_SUM, MPI_COMM_WORLD);
-      e_start+=1+e_written; // my parts global element start 1-based
+      e_start+=1+*e_written; // my parts global element start 1-based
       e_end=e_start+e_owned-1;  // my parts global element stop 1-based
       // write the element connectivity in parallel 
       if (cgp_elements_write_data(F, B, Z, E, e_start, e_end, e))
@@ -629,22 +620,22 @@ if(1==0){
       int j2=0;
       for (int ne=0; ne<e_owned; ++ne){
          if(srfID[e_belWritten+ne]==1){ 
-           srfIDCen1[i][j1++]=ecenx[ne];
-           srfIDCen1[i][j1++]=eceny[ne];
-           srfIDCen1[i][j1++]=ecenz[ne];
+           srfIDCen1[i][j1++]=eCenx[ne];
+           srfIDCen1[i][j1++]=eCeny[ne];
+           srfIDCen1[i][j1++]=eCenz[ne];
          }
          if(srfID[e_belWritten+ne]==2) {
-           srfIDCen2[i][j2++]=ecenx[ne];
-           srfIDCen2[i][j2++]=eceny[ne];
-           srfIDCen2[i][j2++]=ecenz[ne];
+           srfIDCen2[i][j2++]=eCenx[ne];
+           srfIDCen2[i][j2++]=eCeny[ne];
+           srfIDCen2[i][j2++]=eCenz[ne];
          }
       } 
-      free(ecenx); free(eceny); free(ecenz);
+      free(eCenx); free(eCeny); free(eCenz);
 if(1==0){      printf("CentroidCounts %d %d %d %d %d %d %d %d\n",part,icnt1, icnt2, j1, j2, e_owned, srfID1OnBlk[i],srfID2OnBlk[i]);}
       for (int j = 0; j < (int) e_owned; ++j) srfIDidx[e_belWritten+j]=e_start+j;
       startBelBlk[i]=e_start; // provides start point for each block in srfID
       endBelBlk[i]=e_end; // provides end point for each block in srfID
-      e_written=e_endg;
+      *e_written=e_endg;
       e_belWritten+=e_owned; // this is tracking written by this rank as we unpack srfID later
       char UserDataName[12]; snprintf(UserDataName, 13, "n%sOnRank", Ename);
       if ( cg_goto(F, B, "Zone_t", 1, NULL) ||
@@ -656,15 +647,25 @@ if(1==0){      printf("CentroidCounts %d %d %d %d %d %d %d %d\n",part,icnt1, icn
       if (cgp_array_write_data(Fsb2, &partP1, &partP1, &e_owned))
           cgp_error_exit();
     }
+    *totBel = *e_written-eVolElm;
+}
+void writeCGNSboundary(int F,int B,int Z, Output& o, int* srfID, int* srfIDidx, double** srfIDCen1, double** srfIDCen2, int* srfID1OnBlk, int* srfID2OnBlk, int* startBelBlk, int *endBelBlk, cgsize_t *e_written, cgsize_t *totBel, int nblkb)
+{
 // srfID is for ALL Boundary faces
-    cgsize_t  totBel = e_written-eVolElm;
+    const int num_parts = PCU_Comm_Peers();
+    const cgsize_t num_parts_cg=num_parts;
+    const int part = PCU_Comm_Self() ;
+    const cgsize_t part_cg=part;
+    cgsize_t e_owned, e_start,e_end;
+    int Fsb;
+    cgsize_t  eVolElm = *e_written-*totBel;
     // setup User Data for boundary faces 
     if ( cg_goto(F, B, "Zone_t", 1, NULL) ||
          cg_gorel(F, "User Data", 0, NULL) ||
-         cgp_array_write("srfID", CG_Integer, 1,&totBel, &Fsb)) 
+         cgp_array_write("srfID", CG_Integer, 1,totBel, &Fsb)) 
          cgp_error_exit();
     // write the user data for this process 
-    e_written=0; //recycling  eVolElm holds 
+    *e_written=0; //recycling  eVolElm holds 
     for (int i = 0; i < nblkb; ++i) {
       int e_startB=startBelBlk[i]-eVolElm-1; // srfID is only for bel....matches linear order with eVolElm offset from 
                                        // bel# that starts from last volume element
@@ -672,13 +673,13 @@ if(1==0){      printf("CentroidCounts %d %d %d %d %d %d %d %d\n",part,icnt1, icn
       e_start=0;
       auto type = getMpiType( cgsize_t() );
       MPI_Exscan(&e_owned, &e_start, 1, type , MPI_SUM, MPI_COMM_WORLD);
-      e_start+=1+e_written; // my parts global element start 1-based
+      e_start+=1+*e_written; // my parts global element start 1-based
       e_end=e_start+e_owned-1;  // my parts global element stop 1-based
       printf("Bndy %s, %ld, %ld, %ld, %d, %d, %d \n", "srfID", e_start, e_end, e_owned, i, part,Fsb);
       if (cgp_array_write_data(Fsb, &e_start, &e_end, &srfID[e_startB]))
         cgp_error_exit();
       long safeArg=e_owned; // is cgsize_t which could be an 32 or 64 bit int
-      e_written += PCU_Add_Long(safeArg); // number of elements of this topology
+      *e_written += PCU_Add_Long(safeArg); // number of elements of this topology
     }
 // stack  connectivities on rank before gather (should preserve order)
     int* rcounts = (int *)malloc( num_parts * sizeof(int));
@@ -695,10 +696,6 @@ if(1==0){      printf("CentroidCounts %d %d %d %d %d %d %d %d\n",part,icnt1, icn
       for (int j = 0; j < srfID1OnBlk[i]*3; ++j) srfIDCen1AllBlocks[k1++]=srfIDCen1[i][j];
       for (int j = 0; j < srfID2OnBlk[i]*3; ++j) srfIDCen2AllBlocks[k2++]=srfIDCen2[i][j];
     }
-    free(srfID1OnBlk); free(srfID2OnBlk);
-    for (int i = 0; i < nblkb; ++i) delete [] srfIDCen1[i];
-    for (int i = 0; i < nblkb; ++i) delete [] srfIDCen2[i];
-    delete [] srfIDCen1; delete [] srfIDCen2;
     int ncon=numsurfID1onRank*3;
     auto type_i = getMpiType( int() );
     MPI_Allgather(&ncon,1,type_i,rcounts,1,type_i,MPI_COMM_WORLD);
@@ -707,9 +704,9 @@ if(1==0){      printf("CentroidCounts %d %d %d %d %d %d %d %d\n",part,icnt1, icn
 if(1==0){ printf("displs1 %d ",part);for(int ip=0; ip< num_parts; ++ip) printf("% ld ", displs[ip]); printf("\n"); }
     int GsrfID1cnt=displs[num_parts-1]+rcounts[num_parts-1];
 if(1==0){    printf("Stack1 %d %d, %d, %d, %d, %d\n",part, GsrfID1cnt, ncon, nblkb, numsurfID1onRank, numsurfID2onRank);}
-    double* srfID1Gcen = (double *)malloc( GsrfID1cnt * sizeof(double));
+    double* srfID1GCen = (double *)malloc( GsrfID1cnt * sizeof(double));
     auto type_d = getMpiType( double() );
-    MPI_Allgatherv(srfIDCen1AllBlocks,ncon,type_d,srfID1Gcen,rcounts,displs,type_d,MPI_COMM_WORLD);
+    MPI_Allgatherv(srfIDCen1AllBlocks,ncon,type_d,srfID1GCen,rcounts,displs,type_d,MPI_COMM_WORLD);
 // srfID=2 repeats
     ncon=numsurfID2onRank*3;
     MPI_Allgather(&ncon,1,type_i,rcounts,1,type_i,MPI_COMM_WORLD);
@@ -720,16 +717,16 @@ if(1==0){ printf("displs2 %d ",part);for(int ip=0; ip< num_parts; ++ip) printf("
 if(1==0){     printf("Stack2 %d %d, %d, %d, %d, %d\n",part, GsrfID2cnt, ncon, nblkb, numsurfID1onRank, numsurfID2onRank);}
     assert(GsrfID1cnt==GsrfID2cnt);
     int nmatchFace=GsrfID1cnt/3;
-    double* srfID2Gcen = (double *)malloc( GsrfID2cnt * sizeof(double));
-    MPI_Allgatherv(srfIDCen2AllBlocks,ncon,type_d,srfID2Gcen,rcounts,displs,type_d,MPI_COMM_WORLD);
-    const  float  Lz=abs(srfID2Gcen[2]-srfID1Gcen[2]);
-if(1==0){  printf("%d part srfID 1 xc ",part); for(int ip=0; ip< nmatchFace; ++ip) printf("%f ", srfID1Gcen[ip*3+0]); printf("\n"); }
-if(1==0){  printf("%d part srfID 1 yc ",part); for(int ip=0; ip< nmatchFace; ++ip) printf("%f ", srfID1Gcen[ip*3+1]); printf("\n"); }
-if(1==0){  printf("%d part srfID 1 zc ",part); for(int ip=0; ip< nmatchFace; ++ip) printf("%f ", srfID1Gcen[ip*3+2]); printf("\n"); }
+    double* srfID2GCen = (double *)malloc( GsrfID2cnt * sizeof(double));
+    MPI_Allgatherv(srfIDCen2AllBlocks,ncon,type_d,srfID2GCen,rcounts,displs,type_d,MPI_COMM_WORLD);
+    const  float  Lz=abs(srfID2GCen[2]-srfID1GCen[2]);
+if(1==0){  printf("%d part srfID 1 xc ",part); for(int ip=0; ip< nmatchFace; ++ip) printf("%f ", srfID1GCen[ip*3+0]); printf("\n"); }
+if(1==0){  printf("%d part srfID 1 yc ",part); for(int ip=0; ip< nmatchFace; ++ip) printf("%f ", srfID1GCen[ip*3+1]); printf("\n"); }
+if(1==0){  printf("%d part srfID 1 zc ",part); for(int ip=0; ip< nmatchFace; ++ip) printf("%f ", srfID1GCen[ip*3+2]); printf("\n"); }
        PCU_Barrier();
-if(1==0){  printf("%d part srfID 2 xc ",part); for(int ip=0; ip< nmatchFace; ++ip) printf("%f ", srfID2Gcen[ip*3+0]); printf("\n"); }
-if(1==0){  printf("%d part srfID 2 yc ",part); for(int ip=0; ip< nmatchFace; ++ip) printf("%f ", srfID2Gcen[ip*3+1]); printf("\n"); }
-if(1==0){  printf("%d part srfID 2 zc ",part); for(int ip=0; ip< nmatchFace; ++ip) printf("%f ", srfID2Gcen[ip*3+2]); printf("\n"); }
+if(1==0){  printf("%d part srfID 2 xc ",part); for(int ip=0; ip< nmatchFace; ++ip) printf("%f ", srfID2GCen[ip*3+0]); printf("\n"); }
+if(1==0){  printf("%d part srfID 2 yc ",part); for(int ip=0; ip< nmatchFace; ++ip) printf("%f ", srfID2GCen[ip*3+1]); printf("\n"); }
+if(1==0){  printf("%d part srfID 2 zc ",part); for(int ip=0; ip< nmatchFace; ++ip) printf("%f ", srfID2GCen[ip*3+2]); printf("\n"); }
     free(srfIDCen1AllBlocks); free(srfIDCen2AllBlocks);
     double* srfID1distSq = (double *)malloc( nmatchFace * sizeof(double));
     double* srfID2distSq = (double *)malloc( nmatchFace * sizeof(double));
@@ -738,12 +735,12 @@ if(1==0){  printf("%d part srfID 2 zc ",part); for(int ip=0; ip< nmatchFace; ++i
     int* imapD2v = (int *)malloc( nmatchFace * sizeof(int));
     double xc=10; // true cubes with uniform meshes set up ties  (good for debugging/verifying that dumb search backup works)
     for (int i = 0; i < nmatchFace; ++i) {
-      srfID1distSq[i]=(srfID1Gcen[i*3+0]-xc)*(srfID1Gcen[i*3+0]-xc) 
-                   +srfID1Gcen[i*3+1]*srfID1Gcen[i*3+1] 
-                   +srfID1Gcen[i*3+2]*srfID1Gcen[i*3+2]; 
-      srfID2distSq[i]=(srfID2Gcen[i*3+0]-xc)*(srfID2Gcen[i*3+0]-xc) 
-                   +srfID2Gcen[i*3+1]*srfID2Gcen[i*3+1] 
-                   +srfID2Gcen[i*3+2]*srfID2Gcen[i*3+2]; 
+      srfID1distSq[i]=(srfID1GCen[i*3+0]-xc)*(srfID1GCen[i*3+0]-xc) 
+                   +srfID1GCen[i*3+1]*srfID1GCen[i*3+1] 
+                   +srfID1GCen[i*3+2]*srfID1GCen[i*3+2]; 
+      srfID2distSq[i]=(srfID2GCen[i*3+0]-xc)*(srfID2GCen[i*3+0]-xc) 
+                   +srfID2GCen[i*3+1]*srfID2GCen[i*3+1] 
+                   +srfID2GCen[i*3+2]*srfID2GCen[i*3+2]; 
       imapD1[i]=i;
       imapD2[i]=i;
     }
@@ -775,18 +772,18 @@ if(1==0){  printf("%d part srfID 2 zc ",part); for(int ip=0; ip< nmatchFace; ++i
     for (int i = 0; i < nmatchFace; ++i) {
         iclose1=imapD1[i];
         iclose2=imapD2[i];
-        d1=srfID1Gcen[(iclose1)*3+0]-srfID2Gcen[(iclose2)*3+0];
-        d2=srfID1Gcen[(iclose1)*3+1]-srfID2Gcen[(iclose2)*3+1];
+        d1=srfID1GCen[(iclose1)*3+0]-srfID2GCen[(iclose2)*3+0];
+        d2=srfID1GCen[(iclose1)*3+1]-srfID2GCen[(iclose2)*3+1];
         vDistSq= d1*d1+d2*d2;
         if(vDistSq < tol2) {
            imapD2v[i]=imapD2[i];
-        } else {// centroid for i-1 did not match-> search list srfID=2 list to find true match
+        } else {// Centroid for i-1 did not match-> search list srfID=2 list to find true match
           vDSmin=vDistSq;
           DistFails++;
           for (int j = 0; j < nmatchFace; ++j) {   // if this turns out to be taken a lot then it could be narrowed e.g. j=max(0,i-50), j< i+min(matchFace,i+50),
             iclose2=imapD2[j];
-            d1=srfID1Gcen[(iclose1)*3+0]-srfID2Gcen[(iclose2)*3+0];
-            d2=srfID1Gcen[(iclose1)*3+1]-srfID2Gcen[(iclose2)*3+1];
+            d1=srfID1GCen[(iclose1)*3+0]-srfID2GCen[(iclose2)*3+0];
+            d2=srfID1GCen[(iclose1)*3+1]-srfID2GCen[(iclose2)*3+1];
             vDistSq= d1*d1+d2*d2;
             if(vDistSq<vDSmin) {
               vDSmin=vDistSq;
@@ -803,16 +800,18 @@ if(1==0){  printf("%d part srfID 2 zc ",part); for(int ip=0; ip< nmatchFace; ++i
       printf(" imapD1 GLOBAL     "); for(int is=0; is< nmatchFace; ++is)  printf("%d ", imapD1[is]); printf("\n"); 
       printf(" srfID2dist GLOBAL "); for(int is=0; is< nmatchFace; ++is)  printf("%f ", srfID2distSq[is]); printf("\n");
       printf(" imapD2 GLOBAL     "); for(int is=0; is< nmatchFace; ++is)  printf("%d ", imapD2[is]); printf("\n"); }
-    free(srfID1Gcen); free(srfID2Gcen);
+    free(srfID1GCen); free(srfID2GCen);
     free(srfID1distSq); free(srfID2distSq);
     free(imapD2v);
-
 // ZonalBC data 
-    int* srfIDG = (int *)malloc( totBel * sizeof(int));
-    int* srfIDGidx = (int *)malloc( totBel * sizeof(int));
+    int* srfIDG = (int *)malloc( *totBel * sizeof(int));
+    int* srfIDGidx = (int *)malloc( *totBel * sizeof(int));
     cgsize_t* donor2 = (cgsize_t *)malloc(nmatchFace * sizeof(cgsize_t));
     cgsize_t* periodic1 = (cgsize_t *)malloc(nmatchFace * sizeof(cgsize_t));
     auto type_cg = getMpiType( cgsize_t() );
+    int totOnRankBel=0;
+    for (int i = 0; i < nblkb; ++i) 
+      totOnRankBel += o.blocks.boundary.nElements[i];
     MPI_Allgather(&totOnRankBel,1,type_i,rcounts,1,type_i,MPI_COMM_WORLD);
     displs[0]=0;
     for (int i = 1; i < num_parts; ++i) displs[i]=displs[i-1]+rcounts[i-1]; 
@@ -821,23 +820,22 @@ if(0==1){ for(int ip=0; ip< num_parts; ++ip) printf("%ld ", displs[ip]); printf(
     MPI_Allgatherv(srfIDidx,totOnRankBel,type_i,srfIDGidx,rcounts,displs,type_i,MPI_COMM_WORLD);
     free(rcounts); free(displs);
 if(0==1){ if(part==0) {
-    printf(" srfID GLOBAL    "); for(int is=0; is< totBel; ++is)  printf("%d ", srfIDG[is]); printf("\n");
-    printf(" srfIDidx GLOBAL "); for(int is=0; is< totBel; ++is)  printf("%d ", srfIDGidx[is]); printf("\n"); }
+    printf(" srfID GLOBAL    "); for(int is=0; is< *totBel; ++is)  printf("%d ", srfIDG[is]); printf("\n");
+    printf(" srfIDidx GLOBAL "); for(int is=0; is< *totBel; ++is)  printf("%d ", srfIDGidx[is]); printf("\n"); }
     printf("rank %d ",part); printf(" srfID on Part "); for(int is=0; is< totOnRankBel; ++is)  printf("%d ", srfID[is]); printf("\n");
     printf(" srfIDidx on Part "); for(int is=0; is< totOnRankBel; ++is)  printf("%d ", srfIDidx[is]); printf("\n"); }
-    free(srfID); free(srfIDidx); 
-//    pairsort(srfIDG,srfIDGidx,totBel);
-    pairDeal6sort(srfIDG,srfIDGidx,totBel);
+//    pairsort(srfIDG,srfIDGidx,*totBel);
+    pairDeal6sort(srfIDG,srfIDGidx,*totBel);
 if(1==0){ if(part==0) {
-    printf(" srfID GLOBAL    "); for(int is=0; is< totBel; ++is)  printf("%d ", srfIDG[is]); printf("\n");
-    printf(" srfIDidx GLOBAL "); for(int is=0; is< totBel; ++is)  printf("%d ", srfIDGidx[is]); printf("\n"); }
+    printf(" srfID GLOBAL    "); for(int is=0; is< *totBel; ++is)  printf("%d ", srfIDG[is]); printf("\n");
+    printf(" srfIDidx GLOBAL "); for(int is=0; is< *totBel; ++is)  printf("%d ", srfIDGidx[is]); printf("\n"); }
 }
     int BC_scan=0;
-    cgsize_t* eBC = (cgsize_t *)malloc(totBel * sizeof(cgsize_t));
+    cgsize_t* eBC = (cgsize_t *)malloc(*totBel * sizeof(cgsize_t));
     for (int BCid = 1; BCid < 7; BCid++) {
       int imatch=0;
 // valgrind likes this?
-      for (int ib = BC_scan; ib < totBel; ib++) {
+      for (int ib = BC_scan; ib < *totBel; ib++) {
         if(srfIDG[ib]==BCid){
           eBC[imatch]=srfIDGidx[BC_scan];
           BC_scan++;
@@ -846,7 +844,7 @@ if(1==0){ if(part==0) {
       }
  
 /* works but valgrind no likey
-      while (srfIDG[BC_scan]==BCid&&BC_scan<totBel) {
+      while (srfIDG[BC_scan]==BCid&&BC_scan<*totBel) {
         eBC[imatch]=srfIDGidx[BC_scan];
         BC_scan++;
         imatch++;
@@ -886,8 +884,7 @@ if(0==1) {
     if (cg_conn_periodic_write(F, B, Z, cgconn, RotationCenter, RotationAngle, Translation)) cgp_error_exit();
     free(imapD1); free(imapD2);
     free(eBC); free(srfIDG); free(srfIDGidx);
-  } // processing boundary elments
-}
+} 
 
 void writeCGNS(Output& o, std::string path)
 {
@@ -988,38 +985,6 @@ if(0==1) {
     if(j==2) if(cgp_coord_write_data(F, B, Z, Cz, &start, &end, x)) cgp_error_exit();
   }
   free (x);
-/* abort....matcched mesh was needed but this breaks our approach to building ncorp
-  cgsize_t* gizmin = (cgsize_t *)malloc(num_nodes * sizeof(cgsize_t));
-  cgsize_t* gizmax = (cgsize_t *)malloc(num_nodes * sizeof(cgsize_t));
-  double zmin=-0.5;
-  double zmax=65;
-  double eps=1e-3;
-  double z;
-  for (int inode = 0; inode < num_nodes; ++inode){
-      gnod=o.arrays.ncorp[inode];
-      z= o.arrays.coordinates[2*num_nodes+inode];
-      if( abs(z-zmin) < eps) {
-         gizmin[inode]=gnod;
-         gizmax[inode]=-1*(part+1);
-      } else if(abs(zmax-z)<eps){
-         gizmax[inode]=gnod;
-         gizmin[inode]=-1*(part+1);
-      } else {
-         gizmin[inode]=-1*(part+1);
-         gizmax[inode]=-1*(part+1);
-      }
-  }
-if(1==1) {
-      printf(" gizmin bc on part %d ",part); for(int is=0; is< num_nodes; ++is)  printf("%d ", gizmin[is]); printf("\n");
-      printf(" gizmax bc on part %d ",part); for(int is=0; is< num_nodes; ++is)  printf("%d ", gizmax[is]); printf("\n");
-}
-  commuInt(o, gizmax);
-  commuInt(o, gizmin);
-if(1==1) {
-      printf(" gizmin ac on part %d ",part); for(int is=0; is< num_nodes; ++is)  printf("%d ", gizmin[is]); printf("\n");
-      printf(" gizmax ac on part %d ",part); for(int is=0; is< num_nodes; ++is)  printf("%d ", gizmax[is]); printf("\n");
-}
- */ 
   // create a nodal solution 
   char fieldName[12];
   snprintf(fieldName, 13, "solution");
@@ -1069,12 +1034,7 @@ if(1==1) {
       cgp_error_exit();
   if (cgp_field_write_data(F, B, Z, Sp, FT, &start, &end, T))
       cgp_error_exit();
-  free(p);
-  free(u);
-  free(v);
-  free(w);
-  free(T);
-  free(data);
+  free(p); free(u); free(v); free(w); free(T); free(data);
   // create Helper array for number of elements on rank 
   if ( cg_goto(F, B, "Zone_t", 1, NULL) ||
        cg_user_data_write("User Data") ||
@@ -1087,8 +1047,31 @@ if(1==1) {
   printf("Coor %d, %d, %d, \n", nCoordVec,part,Fs2);
   if ( cgp_array_write_data(Fs2, &partP1, &partP1, &nCoordVec))
        cgp_error_exit();
-  if(o.writeCGNSFiles > 1) 
-  writeBlocksCGNS(F,B,Z, o);
+//  if(o.writeCGNSFiles > 1) 
+// got split into 4  writeBlocksCGNS(F,B,Z, o);
+  cgsize_t e_written=0; 
+  cgsize_t totBel;
+  writeBlocksCGNSinteror(F,B,Z,o,&e_written);
+  int nblkb = o.blocks.boundary.getSize(); 
+  double** srfIDCen1 = new double*[nblkb];
+  double** srfIDCen2 = new double*[nblkb];
+  int totOnRankBel=0;
+  for (int i = 0; i < nblkb; ++i) 
+    totOnRankBel += o.blocks.boundary.nElements[i];
+  int* srfID = (int *)malloc( totOnRankBel * sizeof(int));
+  int* srfID1OnBlk = (int *)malloc( nblkb * sizeof(int));
+  int* srfID2OnBlk = (int *)malloc( nblkb * sizeof(int));
+  int* startBelBlk = (int *)malloc( nblkb * sizeof(int));
+  int* endBelBlk = (int *)malloc( nblkb * sizeof(int));
+  int* srfIDidx = (int *)malloc( totOnRankBel * sizeof(int));
+  writeBlocksCGNSboundary(F,B,Z,o, srfID, srfIDidx, srfIDCen1, srfIDCen2, srfID1OnBlk, srfID2OnBlk, startBelBlk, endBelBlk, &e_written, &totBel, nblkb);
+  writeCGNSboundary      (F,B,Z,o, srfID, srfIDidx, srfIDCen1, srfIDCen2, srfID1OnBlk, srfID2OnBlk, startBelBlk, endBelBlk, &e_written, &totBel, nblkb);
+  free(srfID); free(srfIDidx);
+  free(srfID1OnBlk); free(srfID2OnBlk);
+  free(startBelBlk); free(endBelBlk);
+  for (int i = 0; i < nblkb; ++i) delete [] srfIDCen1[i];
+  for (int i = 0; i < nblkb; ++i) delete [] srfIDCen2[i];
+  delete [] srfIDCen1; delete [] srfIDCen2;
   if(cgp_close(F)) cgp_error_exit();
   double t1 = PCU_Time();
   if (!PCU_Comm_Self())

From 0bf531316bd2bb436e849ddf44b4adad8bd86e09 Mon Sep 17 00:00:00 2001
From: "Kenneth E. Jansen" <Kenneth.Jansen@colorado.edu>
Date: Fri, 18 Aug 2023 15:55:32 -0600
Subject: [PATCH 48/68] more helper functions to improve readability

---
 phasta/phCGNSgbc.cc | 252 ++++++++++++++++++++++----------------------
 1 file changed, 128 insertions(+), 124 deletions(-)

diff --git a/phasta/phCGNSgbc.cc b/phasta/phCGNSgbc.cc
index 38cb6e4ee..58c416e7d 100644
--- a/phasta/phCGNSgbc.cc
+++ b/phasta/phCGNSgbc.cc
@@ -649,25 +649,18 @@ if(1==0){      printf("CentroidCounts %d %d %d %d %d %d %d %d\n",part,icnt1, icn
     }
     *totBel = *e_written-eVolElm;
 }
-void writeCGNSboundary(int F,int B,int Z, Output& o, int* srfID, int* srfIDidx, double** srfIDCen1, double** srfIDCen2, int* srfID1OnBlk, int* srfID2OnBlk, int* startBelBlk, int *endBelBlk, cgsize_t *e_written, cgsize_t *totBel, int nblkb)
+void writeCGNS_UserData(int F,int B, int* srfID,  int* startBelBlk, int *endBelBlk, cgsize_t *e_written, cgsize_t *totBel, cgsize_t *eVolElm, int nblkb)
 {
-// srfID is for ALL Boundary faces
-    const int num_parts = PCU_Comm_Peers();
-    const cgsize_t num_parts_cg=num_parts;
-    const int part = PCU_Comm_Self() ;
-    const cgsize_t part_cg=part;
     cgsize_t e_owned, e_start,e_end;
     int Fsb;
-    cgsize_t  eVolElm = *e_written-*totBel;
     // setup User Data for boundary faces 
     if ( cg_goto(F, B, "Zone_t", 1, NULL) ||
          cg_gorel(F, "User Data", 0, NULL) ||
          cgp_array_write("srfID", CG_Integer, 1,totBel, &Fsb)) 
          cgp_error_exit();
     // write the user data for this process 
-    *e_written=0; //recycling  eVolElm holds 
     for (int i = 0; i < nblkb; ++i) {
-      int e_startB=startBelBlk[i]-eVolElm-1; // srfID is only for bel....matches linear order with eVolElm offset from 
+      int e_startB=startBelBlk[i]-*eVolElm-1; // srfID is only for bel....matches linear order with eVolElm offset from 
                                        // bel# that starts from last volume element
       e_owned=endBelBlk[i]-startBelBlk[i]+1;
       e_start=0;
@@ -675,64 +668,20 @@ void writeCGNSboundary(int F,int B,int Z, Output& o, int* srfID, int* srfIDidx,
       MPI_Exscan(&e_owned, &e_start, 1, type , MPI_SUM, MPI_COMM_WORLD);
       e_start+=1+*e_written; // my parts global element start 1-based
       e_end=e_start+e_owned-1;  // my parts global element stop 1-based
-      printf("Bndy %s, %ld, %ld, %ld, %d, %d, %d \n", "srfID", e_start, e_end, e_owned, i, part,Fsb);
+      printf("Bndy %s, %ld, %ld, %ld,  %d, %d \n", "srfID", e_start, e_end, e_owned, i, Fsb);
       if (cgp_array_write_data(Fsb, &e_start, &e_end, &srfID[e_startB]))
         cgp_error_exit();
       long safeArg=e_owned; // is cgsize_t which could be an 32 or 64 bit int
       *e_written += PCU_Add_Long(safeArg); // number of elements of this topology
     }
-// stack  connectivities on rank before gather (should preserve order)
-    int* rcounts = (int *)malloc( num_parts * sizeof(int));
-    int* displs = (int *)malloc( num_parts * sizeof(int));
-    int numsurfID1onRank=0;
-    int numsurfID2onRank=0;
-    for (int i = 0; i < nblkb; ++i) numsurfID1onRank+=srfID1OnBlk[i];
-    for (int i = 0; i < nblkb; ++i) numsurfID2onRank+=srfID2OnBlk[i];
-    double* srfIDCen1AllBlocks = (double *)malloc(numsurfID1onRank*3 * sizeof(double));
-    double* srfIDCen2AllBlocks = (double *)malloc(numsurfID2onRank*3 * sizeof(double));
-    int k1=0;
-    int k2=0;
-    for (int i = 0; i < nblkb; ++i) {
-      for (int j = 0; j < srfID1OnBlk[i]*3; ++j) srfIDCen1AllBlocks[k1++]=srfIDCen1[i][j];
-      for (int j = 0; j < srfID2OnBlk[i]*3; ++j) srfIDCen2AllBlocks[k2++]=srfIDCen2[i][j];
-    }
-    int ncon=numsurfID1onRank*3;
-    auto type_i = getMpiType( int() );
-    MPI_Allgather(&ncon,1,type_i,rcounts,1,type_i,MPI_COMM_WORLD);
-    displs[0]=0;
-    for (int i = 1; i < num_parts; ++i) displs[i]=displs[i-1]+rcounts[i-1]; 
-if(1==0){ printf("displs1 %d ",part);for(int ip=0; ip< num_parts; ++ip) printf("% ld ", displs[ip]); printf("\n"); }
-    int GsrfID1cnt=displs[num_parts-1]+rcounts[num_parts-1];
-if(1==0){    printf("Stack1 %d %d, %d, %d, %d, %d\n",part, GsrfID1cnt, ncon, nblkb, numsurfID1onRank, numsurfID2onRank);}
-    double* srfID1GCen = (double *)malloc( GsrfID1cnt * sizeof(double));
-    auto type_d = getMpiType( double() );
-    MPI_Allgatherv(srfIDCen1AllBlocks,ncon,type_d,srfID1GCen,rcounts,displs,type_d,MPI_COMM_WORLD);
-// srfID=2 repeats
-    ncon=numsurfID2onRank*3;
-    MPI_Allgather(&ncon,1,type_i,rcounts,1,type_i,MPI_COMM_WORLD);
-    displs[0]=0;
-    for (int i = 1; i < num_parts; ++i) displs[i]=displs[i-1]+rcounts[i-1]; 
-if(1==0){ printf("displs2 %d ",part);for(int ip=0; ip< num_parts; ++ip) printf("% ld ", displs[ip]); printf("\n"); }
-    int GsrfID2cnt=displs[num_parts-1]+rcounts[num_parts-1];
-if(1==0){     printf("Stack2 %d %d, %d, %d, %d, %d\n",part, GsrfID2cnt, ncon, nblkb, numsurfID1onRank, numsurfID2onRank);}
-    assert(GsrfID1cnt==GsrfID2cnt);
-    int nmatchFace=GsrfID1cnt/3;
-    double* srfID2GCen = (double *)malloc( GsrfID2cnt * sizeof(double));
-    MPI_Allgatherv(srfIDCen2AllBlocks,ncon,type_d,srfID2GCen,rcounts,displs,type_d,MPI_COMM_WORLD);
-    const  float  Lz=abs(srfID2GCen[2]-srfID1GCen[2]);
-if(1==0){  printf("%d part srfID 1 xc ",part); for(int ip=0; ip< nmatchFace; ++ip) printf("%f ", srfID1GCen[ip*3+0]); printf("\n"); }
-if(1==0){  printf("%d part srfID 1 yc ",part); for(int ip=0; ip< nmatchFace; ++ip) printf("%f ", srfID1GCen[ip*3+1]); printf("\n"); }
-if(1==0){  printf("%d part srfID 1 zc ",part); for(int ip=0; ip< nmatchFace; ++ip) printf("%f ", srfID1GCen[ip*3+2]); printf("\n"); }
-       PCU_Barrier();
-if(1==0){  printf("%d part srfID 2 xc ",part); for(int ip=0; ip< nmatchFace; ++ip) printf("%f ", srfID2GCen[ip*3+0]); printf("\n"); }
-if(1==0){  printf("%d part srfID 2 yc ",part); for(int ip=0; ip< nmatchFace; ++ip) printf("%f ", srfID2GCen[ip*3+1]); printf("\n"); }
-if(1==0){  printf("%d part srfID 2 zc ",part); for(int ip=0; ip< nmatchFace; ++ip) printf("%f ", srfID2GCen[ip*3+2]); printf("\n"); }
-    free(srfIDCen1AllBlocks); free(srfIDCen2AllBlocks);
+
+}
+void sortID1andID2(double* srfID1GCen,double* srfID2GCen, int nmatchFace, int* imapD1, int*imapD2)
+{
+    int* imapD2v = (int *)malloc( nmatchFace * sizeof(int));
     double* srfID1distSq = (double *)malloc( nmatchFace * sizeof(double));
     double* srfID2distSq = (double *)malloc( nmatchFace * sizeof(double));
-    int* imapD1 = (int *)malloc( nmatchFace * sizeof(int));
-    int* imapD2 = (int *)malloc( nmatchFace * sizeof(int));
-    int* imapD2v = (int *)malloc( nmatchFace * sizeof(int));
+    const int part = PCU_Comm_Self() ;
     double xc=10; // true cubes with uniform meshes set up ties  (good for debugging/verifying that dumb search backup works)
     for (int i = 0; i < nmatchFace; ++i) {
       srfID1distSq[i]=(srfID1GCen[i*3+0]-xc)*(srfID1GCen[i*3+0]-xc) 
@@ -800,9 +749,67 @@ if(1==0){  printf("%d part srfID 2 zc ",part); for(int ip=0; ip< nmatchFace; ++i
       printf(" imapD1 GLOBAL     "); for(int is=0; is< nmatchFace; ++is)  printf("%d ", imapD1[is]); printf("\n"); 
       printf(" srfID2dist GLOBAL "); for(int is=0; is< nmatchFace; ++is)  printf("%f ", srfID2distSq[is]); printf("\n");
       printf(" imapD2 GLOBAL     "); for(int is=0; is< nmatchFace; ++is)  printf("%d ", imapD2[is]); printf("\n"); }
-    free(srfID1GCen); free(srfID2GCen);
     free(srfID1distSq); free(srfID2distSq);
     free(imapD2v);
+}
+void gatherCentroid(double** srfIDCen,int* srfIDOnBlk, double** srfIDGCen, int *nmatchFace, int nblkb)
+{
+// stack  connectivities on rank before gather (should preserve order)
+    const int num_parts = PCU_Comm_Peers();
+    int* rcounts = (int *)malloc( num_parts * sizeof(int));
+    int* displs = (int *)malloc( num_parts * sizeof(int));
+    int numSurfIDOnRank=0;
+    for (int i = 0; i < nblkb; ++i) numSurfIDOnRank+=srfIDOnBlk[i];
+    double* srfIDCenAllBlocks = (double *)malloc(numSurfIDOnRank*3 * sizeof(double));
+    int k1=0;
+    for (int i = 0; i < nblkb; ++i) 
+      for (int j = 0; j < srfIDOnBlk[i]*3; ++j) srfIDCenAllBlocks[k1++]=srfIDCen[i][j];
+    int ncon=numSurfIDOnRank*3;
+    auto type_i = getMpiType( int() );
+    MPI_Allgather(&ncon,1,type_i,rcounts,1,type_i,MPI_COMM_WORLD);
+    displs[0]=0;
+    for (int i = 1; i < num_parts; ++i) displs[i]=displs[i-1]+rcounts[i-1]; 
+    int GsrfIDcnt=displs[num_parts-1]+rcounts[num_parts-1];
+    *nmatchFace=GsrfIDcnt/3;
+    *srfIDGCen = (double *)malloc( GsrfIDcnt * sizeof(double));
+if(1==0){ printf("displs1 ");for(int ip=0; ip< num_parts; ++ip) printf("% ld ", displs[ip]); printf("\n"); }
+    auto type_d = getMpiType( double() );
+    MPI_Allgatherv(srfIDCenAllBlocks,ncon,type_d,*srfIDGCen,rcounts,displs,type_d,MPI_COMM_WORLD);
+    free(srfIDCenAllBlocks); 
+}
+
+void writeCGNSboundary(int F,int B,int Z, Output& o, int* srfID, int* srfIDidx, double** srfIDCen1, double** srfIDCen2, int* srfID1OnBlk, int* srfID2OnBlk, int* startBelBlk, int *endBelBlk, cgsize_t *e_written, cgsize_t *totBel, int nblkb)
+{
+// srfID is for ALL Boundary faces
+    const int num_parts = PCU_Comm_Peers();
+    const cgsize_t num_parts_cg=num_parts;
+    const int part = PCU_Comm_Self() ;
+    const cgsize_t part_cg=part;
+    int* rcounts = (int *)malloc( num_parts * sizeof(int));
+    int* displs = (int *)malloc( num_parts * sizeof(int));
+    cgsize_t e_owned, e_start,e_end;
+    int Fsb;
+    cgsize_t  eVolElm = *e_written-*totBel;
+    *e_written=0; //recycling  eVolElm holds 
+    writeCGNS_UserData(F,B, srfID,  startBelBlk, endBelBlk, e_written, totBel, &eVolElm, nblkb);
+    double* srfID1GCen; 
+    double* srfID2GCen; 
+    int nmatchFace1,nmatchFace;
+    gatherCentroid(srfIDCen1,srfID1OnBlk,&srfID1GCen,&nmatchFace1, nblkb);
+    gatherCentroid(srfIDCen2,srfID2OnBlk,&srfID2GCen,&nmatchFace, nblkb);
+    assert(nmatchFace1==nmatchFace);
+    const  float  Lz=abs(srfID2GCen[2]-srfID1GCen[2]);
+if(1==0){  printf("%d part srfID 1 xc ",part); for(int ip=0; ip< nmatchFace; ++ip) printf("%f ", srfID1GCen[ip*3+0]); printf("\n"); }
+if(1==0){  printf("%d part srfID 1 yc ",part); for(int ip=0; ip< nmatchFace; ++ip) printf("%f ", srfID1GCen[ip*3+1]); printf("\n"); }
+if(1==0){  printf("%d part srfID 1 zc ",part); for(int ip=0; ip< nmatchFace; ++ip) printf("%f ", srfID1GCen[ip*3+2]); printf("\n"); }
+       PCU_Barrier();
+if(1==0){  printf("%d part srfID 2 xc ",part); for(int ip=0; ip< nmatchFace; ++ip) printf("%f ", srfID2GCen[ip*3+0]); printf("\n"); }
+if(1==0){  printf("%d part srfID 2 yc ",part); for(int ip=0; ip< nmatchFace; ++ip) printf("%f ", srfID2GCen[ip*3+1]); printf("\n"); }
+if(1==0){  printf("%d part srfID 2 zc ",part); for(int ip=0; ip< nmatchFace; ++ip) printf("%f ", srfID2GCen[ip*3+2]); printf("\n"); }
+    int* imapD1 = (int *)malloc( nmatchFace * sizeof(int));
+    int* imapD2 = (int *)malloc( nmatchFace * sizeof(int));
+    sortID1andID2(srfID1GCen,srfID2GCen,nmatchFace, imapD1, imapD2);
+    free(srfID1GCen); free(srfID2GCen);
 // ZonalBC data 
     int* srfIDG = (int *)malloc( *totBel * sizeof(int));
     int* srfIDGidx = (int *)malloc( *totBel * sizeof(int));
@@ -812,6 +819,7 @@ if(1==0){  printf("%d part srfID 2 zc ",part); for(int ip=0; ip< nmatchFace; ++i
     int totOnRankBel=0;
     for (int i = 0; i < nblkb; ++i) 
       totOnRankBel += o.blocks.boundary.nElements[i];
+    auto type_i = getMpiType( int() );
     MPI_Allgather(&totOnRankBel,1,type_i,rcounts,1,type_i,MPI_COMM_WORLD);
     displs[0]=0;
     for (int i = 1; i < num_parts; ++i) displs[i]=displs[i-1]+rcounts[i-1]; 
@@ -834,7 +842,6 @@ if(1==0){ if(part==0) {
     cgsize_t* eBC = (cgsize_t *)malloc(*totBel * sizeof(cgsize_t));
     for (int BCid = 1; BCid < 7; BCid++) {
       int imatch=0;
-// valgrind likes this?
       for (int ib = BC_scan; ib < *totBel; ib++) {
         if(srfIDG[ib]==BCid){
           eBC[imatch]=srfIDGidx[BC_scan];
@@ -842,14 +849,6 @@ if(1==0){ if(part==0) {
           imatch++;
         } else  break;
       }
- 
-/* works but valgrind no likey
-      while (srfIDG[BC_scan]==BCid&&BC_scan<*totBel) {
-        eBC[imatch]=srfIDGidx[BC_scan];
-        BC_scan++;
-        imatch++;
-      }
-*/
 //reorder SurfID = 1 and 2 using idmapD{1,2} based on distance to support periodicity 
       if(BCid==1) {
         for (int i = 0; i < nmatchFace; i++) periodic1[i]=eBC[imapD1[i]];
@@ -885,6 +884,63 @@ if(0==1) {
     free(imapD1); free(imapD2);
     free(eBC); free(srfIDG); free(srfIDGidx);
 } 
+void CGNS_NodalSolution(int F,int B,int Z, Output& o)
+{
+  // create a nodal solution 
+  char fieldName[12];
+  snprintf(fieldName, 13, "solution");
+  printf("solution=%s",fieldName);
+  double* data;
+  int size, S,Q;
+  detachField(o.mesh, fieldName, data, size);
+  assert(size==5);
+
+//     create the field data for this process 
+  double* p = (double *)malloc(o.iownnodes * sizeof(double));
+  double* u = (double *)malloc(o.iownnodes * sizeof(double));
+  double* v = (double *)malloc(o.iownnodes * sizeof(double));
+  double* w = (double *)malloc(o.iownnodes * sizeof(double));
+  double* T = (double *)malloc(o.iownnodes * sizeof(double));
+  int icount=0;
+  int num_nodes=o.mesh->count(0);
+  cgsize_t gnod,start,end;
+  start=o.local_start_id;
+  end=start+o.iownnodes-1;
+  for (int n = 0; n < num_nodes; n++) {
+    gnod=o.arrays.ncorp[n];
+    if(gnod >= start && gnod <= end) { // solution to write
+         p[icount]= data[0*num_nodes+n];
+         u[icount]= data[1*num_nodes+n];
+         v[icount]= data[2*num_nodes+n];
+         w[icount]= data[3*num_nodes+n];
+         T[icount]= data[4*num_nodes+n];
+         icount++;
+    }
+  }
+//     write the solution field data in parallel 
+  if (cg_sol_write(F, B, Z, "Solution", CG_Vertex, &S) ||
+      cgp_field_write(F, B, Z, S, CG_RealDouble, "Pressure", &Q))
+      cgp_error_exit();
+  if (cgp_field_write_data(F, B, Z, S, Q, &start, &end, p))
+      cgp_error_exit();
+  if ( cgp_field_write(F, B, Z, S, CG_RealDouble, "VelocityX", &Q))
+      cgp_error_exit();
+  if (cgp_field_write_data(F, B, Z, S, Q, &start, &end, u))
+      cgp_error_exit();
+  if ( cgp_field_write(F, B, Z, S, CG_RealDouble, "VelocityY", &Q))
+      cgp_error_exit();
+  if (cgp_field_write_data(F, B, Z, S, Q, &start, &end, v))
+      cgp_error_exit();
+  if ( cgp_field_write(F, B, Z, S, CG_RealDouble, "VelocityZ", &Q))
+      cgp_error_exit();
+  if (cgp_field_write_data(F, B, Z, S, Q, &start, &end, w))
+      cgp_error_exit();
+  if ( cgp_field_write(F, B, Z, S, CG_RealDouble, "Temperature", &Q))
+      cgp_error_exit();
+  if (cgp_field_write_data(F, B, Z, S, Q, &start, &end, T))
+      cgp_error_exit();
+  free(p); free(u); free(v); free(w); free(T); free(data);
+}
 
 void writeCGNS(Output& o, std::string path)
 {
@@ -897,8 +953,6 @@ void writeCGNS(Output& o, std::string path)
   std::string timestep_or_dat;
   static char outfile[] = "chefOut.cgns";
   int  F, B, Z, E, S, Fs, Fs2, A, Cx, Cy, Cz;
-  int Fp, Fu, Fv, Fw, FT;
-  int Sp, Su, Sv, Sw, ST;
   cgsize_t sizes[3],*e, start, end;
 
   int num_nodes=m->count(0);
@@ -985,56 +1039,7 @@ if(0==1) {
     if(j==2) if(cgp_coord_write_data(F, B, Z, Cz, &start, &end, x)) cgp_error_exit();
   }
   free (x);
-  // create a nodal solution 
-  char fieldName[12];
-  snprintf(fieldName, 13, "solution");
-  printf("solution=%s",fieldName);
-  double* data;
-  int size;
-  detachField(o.mesh, fieldName, data, size);
-  assert(size==5);
-
-//     create the field data for this process 
-  double* p = (double *)malloc(o.iownnodes * sizeof(double));
-  double* u = (double *)malloc(o.iownnodes * sizeof(double));
-  double* v = (double *)malloc(o.iownnodes * sizeof(double));
-  double* w = (double *)malloc(o.iownnodes * sizeof(double));
-  double* T = (double *)malloc(o.iownnodes * sizeof(double));
-  int icount=0;
-  for (int n = 0; n < num_nodes; n++) {
-    gnod=o.arrays.ncorp[n];
-    if(gnod >= start && gnod <= end) { // solution to write
-         p[icount]= data[0*num_nodes+n];
-         u[icount]= data[1*num_nodes+n];
-         v[icount]= data[2*num_nodes+n];
-         w[icount]= data[3*num_nodes+n];
-         T[icount]= data[4*num_nodes+n];
-         icount++;
-    }
-  }
-//     write the solution field data in parallel 
-  if (cg_sol_write(F, B, Z, "Solution", CG_Vertex, &Sp) ||
-      cgp_field_write(F, B, Z, Sp, CG_RealDouble, "Pressure", &Fp))
-      cgp_error_exit();
-  if (cgp_field_write_data(F, B, Z, Sp, Fp, &start, &end, p))
-      cgp_error_exit();
-  if ( cgp_field_write(F, B, Z, Sp, CG_RealDouble, "VelocityX", &Fu))
-      cgp_error_exit();
-  if (cgp_field_write_data(F, B, Z, Sp, Fu, &start, &end, u))
-      cgp_error_exit();
-  if ( cgp_field_write(F, B, Z, Sp, CG_RealDouble, "VelocityY", &Fv))
-      cgp_error_exit();
-  if (cgp_field_write_data(F, B, Z, Sp, Fv, &start, &end, v))
-      cgp_error_exit();
-  if ( cgp_field_write(F, B, Z, Sp, CG_RealDouble, "VelocityZ", &Fw))
-      cgp_error_exit();
-  if (cgp_field_write_data(F, B, Z, Sp, Fw, &start, &end, w))
-      cgp_error_exit();
-  if ( cgp_field_write(F, B, Z, Sp, CG_RealDouble, "Temperature", &FT))
-      cgp_error_exit();
-  if (cgp_field_write_data(F, B, Z, Sp, FT, &start, &end, T))
-      cgp_error_exit();
-  free(p); free(u); free(v); free(w); free(T); free(data);
+  CGNS_NodalSolution(F,B,Z,o);
   // create Helper array for number of elements on rank 
   if ( cg_goto(F, B, "Zone_t", 1, NULL) ||
        cg_user_data_write("User Data") ||
@@ -1047,8 +1052,7 @@ if(0==1) {
   printf("Coor %d, %d, %d, \n", nCoordVec,part,Fs2);
   if ( cgp_array_write_data(Fs2, &partP1, &partP1, &nCoordVec))
        cgp_error_exit();
-//  if(o.writeCGNSFiles > 1) 
-// got split into 4  writeBlocksCGNS(F,B,Z, o);
+
   cgsize_t e_written=0; 
   cgsize_t totBel;
   writeBlocksCGNSinteror(F,B,Z,o,&e_written);

From cb1131abcea4279a782fd1d178dd72ce37ba4853 Mon Sep 17 00:00:00 2001
From: "Kenneth E. Jansen" <Kenneth.Jansen@colorado.edu>
Date: Fri, 18 Aug 2023 18:43:20 -0600
Subject: [PATCH 49/68] helper functions broke up remaining code such that all
 functions fit in a 105 row vi window even with copious debug print statement.
  Valgrind also checked in for np=1 and 2 where no leaks are attributed to
 arrays we alloocate-- same for other errors which seems to be wholly within
 HDF5.

---
 phasta/phCGNSgbc.cc | 150 +++++++++++++++++++++++---------------------
 1 file changed, 78 insertions(+), 72 deletions(-)

diff --git a/phasta/phCGNSgbc.cc b/phasta/phCGNSgbc.cc
index 58c416e7d..bab28466e 100644
--- a/phasta/phCGNSgbc.cc
+++ b/phasta/phCGNSgbc.cc
@@ -48,7 +48,7 @@ MPI_Datatype getMpiType(T) {
 
 // https://www.geeksforgeeks.org/sorting-array-according-another-array-using-pair-stl/
 // Sort an array according to
-// other using pair in STL.
+// other using pair in STL.  Modified to be real-int pair (for distance matching) and in a separate routine, two integers (for idx sort by surfID)  
 #include <bits/stdc++.h>
 using namespace std;
  
@@ -649,7 +649,7 @@ if(1==0){      printf("CentroidCounts %d %d %d %d %d %d %d %d\n",part,icnt1, icn
     }
     *totBel = *e_written-eVolElm;
 }
-void writeCGNS_UserData(int F,int B, int* srfID,  int* startBelBlk, int *endBelBlk, cgsize_t *e_written, cgsize_t *totBel, cgsize_t *eVolElm, int nblkb)
+void writeCGNS_UserData_srfID(int F,int B, int* srfID,  int* startBelBlk, int *endBelBlk, cgsize_t *e_written, cgsize_t *totBel, cgsize_t *eVolElm, int nblkb)
 {
     cgsize_t e_owned, e_start,e_end;
     int Fsb;
@@ -660,7 +660,7 @@ void writeCGNS_UserData(int F,int B, int* srfID,  int* startBelBlk, int *endBelB
          cgp_error_exit();
     // write the user data for this process 
     for (int i = 0; i < nblkb; ++i) {
-      int e_startB=startBelBlk[i]-*eVolElm-1; // srfID is only for bel....matches linear order with eVolElm offset from 
+      int e_startB=0; //startBelBlk[i]-*eVolElm-1; // srfID is only for bel....matches linear order with eVolElm offset from 
                                        // bel# that starts from last volume element
       e_owned=endBelBlk[i]-startBelBlk[i]+1;
       e_start=0;
@@ -668,7 +668,7 @@ void writeCGNS_UserData(int F,int B, int* srfID,  int* startBelBlk, int *endBelB
       MPI_Exscan(&e_owned, &e_start, 1, type , MPI_SUM, MPI_COMM_WORLD);
       e_start+=1+*e_written; // my parts global element start 1-based
       e_end=e_start+e_owned-1;  // my parts global element stop 1-based
-      printf("Bndy %s, %ld, %ld, %ld,  %d, %d \n", "srfID", e_start, e_end, e_owned, i, Fsb);
+      printf("BndyUserData %s, %ld, %ld, %ld,  %d, %d %d \n", "srfID", e_start, e_end, e_owned, i, e_startB,*totBel);
       if (cgp_array_write_data(Fsb, &e_start, &e_end, &srfID[e_startB]))
         cgp_error_exit();
       long safeArg=e_owned; // is cgsize_t which could be an 32 or 64 bit int
@@ -701,8 +701,8 @@ void sortID1andID2(double* srfID1GCen,double* srfID2GCen, int nmatchFace, int* i
       printf(" srfID2dist GLOBAL B "); for(int is=0; is< nmatchFace; ++is)  printf("%f ", srfID2distSq[is]); printf("\n");
       printf(" imapD2 GLOBAL B     "); for(int is=0; is< nmatchFace; ++is)  printf("%d ", imapD2[is]); printf("\n"); }
     }
-    pairsortDI(srfID1distSq,imapD1,nmatchFace);
-    pairsortDI(srfID2distSq,imapD2,nmatchFace);
+    pairsortDI(srfID1distSq,imapD1,nmatchFace); // imapD1 puts elements with srfID=1 in order of increasing disatnce from pt 10, 0 0 
+    pairsortDI(srfID2distSq,imapD2,nmatchFace); // imapD1 puts elements with srfID=2 in order of increasing disatnce from pt 10, 0 0 
 
     if(1==0){ if(part==0) {
       printf(" srfID1dist GLOBAL "); for(int is=0; is< nmatchFace; ++is)  printf("%f ", srfID1distSq[is]); printf("\n");
@@ -713,7 +713,6 @@ void sortID1andID2(double* srfID1GCen,double* srfID2GCen, int nmatchFace, int* i
       printf(" imapD2 GLOBAL     "); for(int is=0; is< nmatchFace; ++is)  printf("%d ", imapD2[is]); printf("\n"); }
     }
 
-    double tol=1.0e-12;
     double tol2=1.0e-14;
     int jclosest, iclose1, iclose2;
     double d1,d2,vDistSq,vDSmin;
@@ -726,7 +725,7 @@ void sortID1andID2(double* srfID1GCen,double* srfID2GCen, int nmatchFace, int* i
         vDistSq= d1*d1+d2*d2;
         if(vDistSq < tol2) {
            imapD2v[i]=imapD2[i];
-        } else {// Centroid for i-1 did not match-> search list srfID=2 list to find true match
+        } else {// Centroid for i did not match-> search list srfID=2 list to find true match
           vDSmin=vDistSq;
           DistFails++;
           for (int j = 0; j < nmatchFace; ++j) {   // if this turns out to be taken a lot then it could be narrowed e.g. j=max(0,i-50), j< i+min(matchFace,i+50),
@@ -737,8 +736,10 @@ void sortID1andID2(double* srfID1GCen,double* srfID2GCen, int nmatchFace, int* i
             if(vDistSq<vDSmin) {
               vDSmin=vDistSq;
               jclosest=j;
+              if(vDistSq<tol2) break;
             } 
           }
+          assert(vDistSq<tol2);
           imapD2v[i]=imapD2[jclosest];
         } 
     } 
@@ -752,7 +753,7 @@ void sortID1andID2(double* srfID1GCen,double* srfID2GCen, int nmatchFace, int* i
     free(srfID1distSq); free(srfID2distSq);
     free(imapD2v);
 }
-void gatherCentroid(double** srfIDCen,int* srfIDOnBlk, double** srfIDGCen, int *nmatchFace, int nblkb)
+void AllgatherCentroid(double** srfIDCen,int* srfIDOnBlk, double** srfIDGCen, int *nmatchFace, int nblkb)
 {
 // stack  connectivities on rank before gather (should preserve order)
     const int num_parts = PCU_Comm_Peers();
@@ -777,7 +778,36 @@ if(1==0){ printf("displs1 ");for(int ip=0; ip< num_parts; ++ip) printf("% ld ",
     MPI_Allgatherv(srfIDCenAllBlocks,ncon,type_d,*srfIDGCen,rcounts,displs,type_d,MPI_COMM_WORLD);
     free(srfIDCenAllBlocks); 
 }
-
+void Allgather2IntAndSort(int* srfID, int* srfIDidx,Output& o,int* srfIDG, int* srfIDGidx, int nblkb)
+{
+    const int num_parts = PCU_Comm_Peers();
+    const int part = PCU_Comm_Self() ;
+    const cgsize_t part_cg=part;
+    int* rcounts = (int *)malloc( num_parts * sizeof(int));
+    int* displs = (int *)malloc( num_parts * sizeof(int));
+    int totOnRankBel=0;
+    for (int i = 0; i < nblkb; ++i) 
+      totOnRankBel += o.blocks.boundary.nElements[i];
+    auto type_i = getMpiType( int() );
+    MPI_Allgather(&totOnRankBel,1,type_i,rcounts,1,type_i,MPI_COMM_WORLD);
+    displs[0]=0;
+    for (int i = 1; i < num_parts; ++i) displs[i]=displs[i-1]+rcounts[i-1]; 
+    int totBel=displs[num_parts-1]+rcounts[num_parts-1];
+if(0==1){ for(int ip=0; ip< num_parts; ++ip) printf("%ld ", displs[ip]); printf("\n"); }
+    MPI_Allgatherv(srfID,totOnRankBel,type_i,srfIDG,rcounts,displs,type_i,MPI_COMM_WORLD);
+    MPI_Allgatherv(srfIDidx,totOnRankBel,type_i,srfIDGidx,rcounts,displs,type_i,MPI_COMM_WORLD);
+    free(rcounts); free(displs);
+if(0==1){ if(part==0) {
+    printf(" srfID GLOBAL    "); for(int is=0; is< totBel; ++is)  printf("%d ", srfIDG[is]); printf("\n");
+    printf(" srfIDidx GLOBAL "); for(int is=0; is< totBel; ++is)  printf("%d ", srfIDGidx[is]); printf("\n"); }
+    printf("rank %d ",part); printf(" srfID on Part "); for(int is=0; is< totOnRankBel; ++is)  printf("%d ", srfID[is]); printf("\n");
+    printf(" srfIDidx on Part "); for(int is=0; is< totOnRankBel; ++is)  printf("%d ", srfIDidx[is]); printf("\n"); }
+//    pairsort(srfIDG,srfIDGidx,totBel);
+    pairDeal6sort(srfIDG,srfIDGidx,totBel);
+if(1==0){ if(part==0) {
+    printf(" srfID GLOBAL    "); for(int is=0; is< totBel; ++is)  printf("%d ", srfIDG[is]); printf("\n");
+    printf(" srfIDidx GLOBAL "); for(int is=0; is< totBel; ++is)  printf("%d ", srfIDGidx[is]); printf("\n"); } }
+}
 void writeCGNSboundary(int F,int B,int Z, Output& o, int* srfID, int* srfIDidx, double** srfIDCen1, double** srfIDCen2, int* srfID1OnBlk, int* srfID2OnBlk, int* startBelBlk, int *endBelBlk, cgsize_t *e_written, cgsize_t *totBel, int nblkb)
 {
 // srfID is for ALL Boundary faces
@@ -791,12 +821,12 @@ void writeCGNSboundary(int F,int B,int Z, Output& o, int* srfID, int* srfIDidx,
     int Fsb;
     cgsize_t  eVolElm = *e_written-*totBel;
     *e_written=0; //recycling  eVolElm holds 
-    writeCGNS_UserData(F,B, srfID,  startBelBlk, endBelBlk, e_written, totBel, &eVolElm, nblkb);
+    writeCGNS_UserData_srfID(F,B, srfID,  startBelBlk, endBelBlk, e_written, totBel, &eVolElm, nblkb);
     double* srfID1GCen; 
     double* srfID2GCen; 
     int nmatchFace1,nmatchFace;
-    gatherCentroid(srfIDCen1,srfID1OnBlk,&srfID1GCen,&nmatchFace1, nblkb);
-    gatherCentroid(srfIDCen2,srfID2OnBlk,&srfID2GCen,&nmatchFace, nblkb);
+    AllgatherCentroid(srfIDCen1,srfID1OnBlk,&srfID1GCen,&nmatchFace1, nblkb);
+    AllgatherCentroid(srfIDCen2,srfID2OnBlk,&srfID2GCen,&nmatchFace, nblkb);
     assert(nmatchFace1==nmatchFace);
     const  float  Lz=abs(srfID2GCen[2]-srfID1GCen[2]);
 if(1==0){  printf("%d part srfID 1 xc ",part); for(int ip=0; ip< nmatchFace; ++ip) printf("%f ", srfID1GCen[ip*3+0]); printf("\n"); }
@@ -815,29 +845,7 @@ if(1==0){  printf("%d part srfID 2 zc ",part); for(int ip=0; ip< nmatchFace; ++i
     int* srfIDGidx = (int *)malloc( *totBel * sizeof(int));
     cgsize_t* donor2 = (cgsize_t *)malloc(nmatchFace * sizeof(cgsize_t));
     cgsize_t* periodic1 = (cgsize_t *)malloc(nmatchFace * sizeof(cgsize_t));
-    auto type_cg = getMpiType( cgsize_t() );
-    int totOnRankBel=0;
-    for (int i = 0; i < nblkb; ++i) 
-      totOnRankBel += o.blocks.boundary.nElements[i];
-    auto type_i = getMpiType( int() );
-    MPI_Allgather(&totOnRankBel,1,type_i,rcounts,1,type_i,MPI_COMM_WORLD);
-    displs[0]=0;
-    for (int i = 1; i < num_parts; ++i) displs[i]=displs[i-1]+rcounts[i-1]; 
-if(0==1){ for(int ip=0; ip< num_parts; ++ip) printf("%ld ", displs[ip]); printf("\n"); }
-    MPI_Allgatherv(srfID,totOnRankBel,type_i,srfIDG,rcounts,displs,type_i,MPI_COMM_WORLD);
-    MPI_Allgatherv(srfIDidx,totOnRankBel,type_i,srfIDGidx,rcounts,displs,type_i,MPI_COMM_WORLD);
-    free(rcounts); free(displs);
-if(0==1){ if(part==0) {
-    printf(" srfID GLOBAL    "); for(int is=0; is< *totBel; ++is)  printf("%d ", srfIDG[is]); printf("\n");
-    printf(" srfIDidx GLOBAL "); for(int is=0; is< *totBel; ++is)  printf("%d ", srfIDGidx[is]); printf("\n"); }
-    printf("rank %d ",part); printf(" srfID on Part "); for(int is=0; is< totOnRankBel; ++is)  printf("%d ", srfID[is]); printf("\n");
-    printf(" srfIDidx on Part "); for(int is=0; is< totOnRankBel; ++is)  printf("%d ", srfIDidx[is]); printf("\n"); }
-//    pairsort(srfIDG,srfIDGidx,*totBel);
-    pairDeal6sort(srfIDG,srfIDGidx,*totBel);
-if(1==0){ if(part==0) {
-    printf(" srfID GLOBAL    "); for(int is=0; is< *totBel; ++is)  printf("%d ", srfIDG[is]); printf("\n");
-    printf(" srfIDidx GLOBAL "); for(int is=0; is< *totBel; ++is)  printf("%d ", srfIDGidx[is]); printf("\n"); }
-}
+    Allgather2IntAndSort(srfID, srfIDidx,o,srfIDG, srfIDGidx,nblkb);
     int BC_scan=0;
     cgsize_t* eBC = (cgsize_t *)malloc(*totBel * sizeof(cgsize_t));
     for (int BCid = 1; BCid < 7; BCid++) {
@@ -941,7 +949,40 @@ void CGNS_NodalSolution(int F,int B,int Z, Output& o)
       cgp_error_exit();
   free(p); free(u); free(v); free(w); free(T); free(data);
 }
+void CGNS_Coordinates(int F,int B,int Z,Output& o)
+{
+   int Cx,Cy,Cz;
+  if (cgp_coord_write(F, B, Z, CG_RealDouble, "CoordinateX", &Cx) ||
+      cgp_coord_write(F, B, Z, CG_RealDouble, "CoordinateY", &Cy) ||
+      cgp_coord_write(F, B, Z, CG_RealDouble, "CoordinateZ", &Cz))
+      cgp_error_exit();
 
+// condense out vertices owned by another rank in a new array, x, whose slices are ready for CGNS.
+  int num_nodes=o.mesh->count(0);
+  cgsize_t gnod;
+  cgsize_t start=o.local_start_id;
+  cgsize_t end=start+o.iownnodes-1;
+  double* x = (double *)malloc(o.iownnodes * sizeof(double));
+  for (int j = 0; j < 3; ++j) {
+    int icount=0;
+    for (int inode = 0; inode < num_nodes; ++inode){
+      gnod=o.arrays.ncorp[inode];
+      if(gnod >= start && gnod <= end) { // coordinate to write
+         x[icount]= o.arrays.coordinates[j*num_nodes+inode];
+         icount++;
+      }
+    }
+if(0==1) {
+    printf("%ld, %ld \n", start, end);
+    for (int ne=0; ne<num_nodes; ++ne)
+	printf("%d, %f \n", (ne+1), x[ne]);
+}
+    if(j==0) if(cgp_coord_write_data(F, B, Z, Cx, &start, &end, x)) cgp_error_exit();
+    if(j==1) if(cgp_coord_write_data(F, B, Z, Cy, &start, &end, x)) cgp_error_exit();
+    if(j==2) if(cgp_coord_write_data(F, B, Z, Cz, &start, &end, x)) cgp_error_exit();
+  }
+  free (x);
+}
 void writeCGNS(Output& o, std::string path)
 {
   double t0 = PCU_Time();
@@ -949,14 +990,11 @@ void writeCGNS(Output& o, std::string path)
   const int num_parts = PCU_Comm_Peers();
   const int part = PCU_Comm_Self() ;
   const cgsize_t  num_parts_cg=num_parts;
-
   std::string timestep_or_dat;
   static char outfile[] = "chefOut.cgns";
   int  F, B, Z, E, S, Fs, Fs2, A, Cx, Cy, Cz;
   cgsize_t sizes[3],*e, start, end;
-
   int num_nodes=m->count(0);
-
 if(1==0){  // ilwork debugging
     for (int ipart=0; ipart<num_parts; ++ipart){
         if(part==ipart) { // my turn
@@ -988,14 +1026,12 @@ if(1==0){
        PCU_Barrier();
      }
 }
-
 // copied gen_ncorp from PHASTA to help map on-rank numbering to CGNS/PETSC friendly global numbering
   gen_ncorp( o );
 //  o carries
 //     o.arrays.ncorp[on-rank-node-number(0-based)] => PETSc global node number (1-based)
 //     o.iownnodes => nodes owned by this rank
 //     o.local_start_id => this rank's first node number (1-based and also which must be a long long int)
-
   long safeArg=o.iownnodes; // cgsize_t could be an int
   sizes[0]=PCU_Add_Long(safeArg);
   int ncells=m->count(m->getDimension()); // this ranks number of elements
@@ -1009,36 +1045,7 @@ if(1==0){
        cgp_error_exit();
     // create data nodes for coordinates 
   cg_set_file_type(CG_FILE_HDF5);
-
-  if (cgp_coord_write(F, B, Z, CG_RealDouble, "CoordinateX", &Cx) ||
-      cgp_coord_write(F, B, Z, CG_RealDouble, "CoordinateY", &Cy) ||
-      cgp_coord_write(F, B, Z, CG_RealDouble, "CoordinateZ", &Cz))
-      cgp_error_exit();
-
-// condense out vertices owned by another rank in a new array, x, whose slices are ready for CGNS.
-  cgsize_t gnod;
-  start=o.local_start_id;
-  end=start+o.iownnodes-1;
-  double* x = (double *)malloc(o.iownnodes * sizeof(double));
-  for (int j = 0; j < 3; ++j) {
-    int icount=0;
-    for (int inode = 0; inode < num_nodes; ++inode){
-      gnod=o.arrays.ncorp[inode];
-      if(gnod >= start && gnod <= end) { // coordinate to write
-         x[icount]= o.arrays.coordinates[j*num_nodes+inode];
-         icount++;
-      }
-    }
-if(0==1) {
-    printf("%ld, %ld \n", start, end);
-    for (int ne=0; ne<num_nodes; ++ne)
-	printf("%d, %f \n", (ne+1), x[ne]);
-}
-    if(j==0) if(cgp_coord_write_data(F, B, Z, Cx, &start, &end, x)) cgp_error_exit();
-    if(j==1) if(cgp_coord_write_data(F, B, Z, Cy, &start, &end, x)) cgp_error_exit();
-    if(j==2) if(cgp_coord_write_data(F, B, Z, Cz, &start, &end, x)) cgp_error_exit();
-  }
-  free (x);
+  CGNS_Coordinates(F,B,Z,o);
   CGNS_NodalSolution(F,B,Z,o);
   // create Helper array for number of elements on rank 
   if ( cg_goto(F, B, "Zone_t", 1, NULL) ||
@@ -1052,7 +1059,6 @@ if(0==1) {
   printf("Coor %d, %d, %d, \n", nCoordVec,part,Fs2);
   if ( cgp_array_write_data(Fs2, &partP1, &partP1, &nCoordVec))
        cgp_error_exit();
-
   cgsize_t e_written=0; 
   cgsize_t totBel;
   writeBlocksCGNSinteror(F,B,Z,o,&e_written);

From 9d7f038908cf1170a8c3df4d320e023d6693d6f2 Mon Sep 17 00:00:00 2001
From: "Kenneth E. Jansen" <Kenneth.Jansen@colorado.edu>
Date: Sat, 19 Aug 2023 13:01:05 -0600
Subject: [PATCH 50/68] Computing translation vector by difference of cenroids
 of first, ordered element centroids but I find documentation unclear so it
 might be negation. Also moved the pair sort data struction to heap (was on
 stack which is not nice for large meshes -> segfault with out increasing
 stack. Also limited debug prints with an extern defined variable (better
 way??) and  few other cleanups of unused arrays.  Eliminated output model as
 an argument (9) from matchedNodeElemReader becuase it is never used.

---
 phasta/phCGNSgbc.cc          | 88 ++++++++++++++++++------------------
 pumi-meshes                  |  2 +-
 test/matchedNodeElmReader.cc |  7 ++-
 3 files changed, 49 insertions(+), 48 deletions(-)

diff --git a/phasta/phCGNSgbc.cc b/phasta/phCGNSgbc.cc
index bab28466e..996c97e62 100644
--- a/phasta/phCGNSgbc.cc
+++ b/phasta/phCGNSgbc.cc
@@ -23,6 +23,8 @@
 #endif
 typedef int lcorp_t;
 #define NCORP_MPI_T MPI_INTEGER
+extern cgsize_t nDbgCG=50;
+extern int nDbgI=50;
 
 namespace  {
 
@@ -56,7 +58,7 @@ using namespace std;
 // according to the order defined by a[]
 void pairsortDI(double a[], int b[], int n)
 {
-    pair<double, int> pairt[n];
+       pair<double, int> *pairt = new pair<double, int>[n];  // when done    delete pairt;
  
     // Storing the respective array
     // elements in pairs.
@@ -75,13 +77,14 @@ void pairsortDI(double a[], int b[], int n)
         a[i] = pairt[i].first;
         b[i] = pairt[i].second;
     }
+    delete pairt;
 }
 
 // Function to sort integer array b[]
 // according to the order defined by a[]
 void pairsort(int a[], int b[], int n)
 {
-    pair<int, int> pairt[n];
+    pair<double, int> *pairt = new pair<double, int>[n];
  
     // Storing the respective array
     // elements in pairs.
@@ -100,6 +103,7 @@ void pairsort(int a[], int b[], int n)
         a[i] = pairt[i].first;
         b[i] = pairt[i].second;
     }
+    delete pairt;
 }
 void pairDeal6sort(int a[], int b[], int n)
 {
@@ -515,7 +519,7 @@ void writeBlocksCGNSinteror(int F,int B,int Z, Output& o, cgsize_t *e_written)
 
 if(1==0){
     printf("interior cnn %d, %ld, %ld \n", part, e_start, e_end);
-    for (int ne=0; ne<e_owned; ++ne) {
+    for (int ne=0; ne<std::min(nDbgCG,e_owned); ++ne) {
       printf("%d, %d ", part,(ne+1));
       for(int nv=0; nv< nvert; ++nv) printf("%ld ", e[ne*nvert+nv]);
       printf("\n");
@@ -603,7 +607,7 @@ void writeBlocksCGNSboundary(int F,int B,int Z, Output& o, int* srfID, int* srfI
           cgp_error_exit();
       printf("boundary cnn %d, %ld, %ld \n", part, e_start, e_end);
 if(1==0){
-    for (int ne=0; ne<e_owned; ++ne) { printf("%d, %d ", part,(ne+1)); for(int nv=0; nv< nvert; ++nv) printf("%ld ", e[ne*nvert+nv]); printf("\n"); }
+    for (int ne=0; ne<std::min(nDbgCG,e_owned); ++ne) { printf("%d, %d ", part,(ne+1)); for(int nv=0; nv< nvert; ++nv) printf("%ld ", e[ne*nvert+nv]); printf("\n"); }
 }
       getNaturalBCCodesCGNS(o, i, &srfID[e_belWritten]);
       int icnt1=0;
@@ -631,7 +635,7 @@ if(1==0){
          }
       } 
       free(eCenx); free(eCeny); free(eCenz);
-if(1==0){      printf("CentroidCounts %d %d %d %d %d %d %d %d\n",part,icnt1, icnt2, j1, j2, e_owned, srfID1OnBlk[i],srfID2OnBlk[i]);}
+if(1==1){      printf("CentroidCounts %d %d %d %d %d %d %d %d\n",part,icnt1, icnt2, j1, j2, e_owned, srfID1OnBlk[i],srfID2OnBlk[i]);}
       for (int j = 0; j < (int) e_owned; ++j) srfIDidx[e_belWritten+j]=e_start+j;
       startBelBlk[i]=e_start; // provides start point for each block in srfID
       endBelBlk[i]=e_end; // provides end point for each block in srfID
@@ -682,35 +686,35 @@ void sortID1andID2(double* srfID1GCen,double* srfID2GCen, int nmatchFace, int* i
     double* srfID1distSq = (double *)malloc( nmatchFace * sizeof(double));
     double* srfID2distSq = (double *)malloc( nmatchFace * sizeof(double));
     const int part = PCU_Comm_Self() ;
-    double xc=10; // true cubes with uniform meshes set up ties  (good for debugging/verifying that dumb search backup works)
+    double xc=0.1; // true cubes with uniform meshes and other symmetries set up ties  (good for debugging/verifying that dumb search backup works)
     for (int i = 0; i < nmatchFace; ++i) {
       srfID1distSq[i]=(srfID1GCen[i*3+0]-xc)*(srfID1GCen[i*3+0]-xc) 
                    +srfID1GCen[i*3+1]*srfID1GCen[i*3+1] 
-                   +srfID1GCen[i*3+2]*srfID1GCen[i*3+2]; 
+                   +srfID1GCen[i*3+2]*srfID1GCen[i*3+2]; // periodicity always in Z then could be omitted
       srfID2distSq[i]=(srfID2GCen[i*3+0]-xc)*(srfID2GCen[i*3+0]-xc) 
                    +srfID2GCen[i*3+1]*srfID2GCen[i*3+1] 
-                   +srfID2GCen[i*3+2]*srfID2GCen[i*3+2]; 
+                   +srfID2GCen[i*3+2]*srfID2GCen[i*3+2]; // periodicity always in Z then could be omitted
       imapD1[i]=i;
       imapD2[i]=i;
     }
     if(1==0){ if(part==0) {
-      printf(" srfID1dist GLOBAL B "); for(int is=0; is< nmatchFace; ++is)  printf("%f ", srfID1distSq[is]); printf("\n");
-      printf(" imapD1 GLOBAL B     "); for(int is=0; is< nmatchFace; ++is)  printf("%d ", imapD1[is]); printf("\n"); }
+      printf(" srfID1dist GLOBAL B "); for(int is=0; is< std::min(nDbgI,nmatchFace); ++is)  printf("%f ", srfID1distSq[is]); printf("\n");
+      printf(" imapD1 GLOBAL B     "); for(int is=0; is< std::min(nDbgI,nmatchFace); ++is)  printf("%d ", imapD1[is]); printf("\n"); }
     }
     if(1==0){ if(part==0) {
-      printf(" srfID2dist GLOBAL B "); for(int is=0; is< nmatchFace; ++is)  printf("%f ", srfID2distSq[is]); printf("\n");
-      printf(" imapD2 GLOBAL B     "); for(int is=0; is< nmatchFace; ++is)  printf("%d ", imapD2[is]); printf("\n"); }
+      printf(" srfID2dist GLOBAL B "); for(int is=0; is< std::min(nDbgI,nmatchFace); ++is)  printf("%f ", srfID2distSq[is]); printf("\n");
+      printf(" imapD2 GLOBAL B     "); for(int is=0; is< std::min(nDbgI,nmatchFace); ++is)  printf("%d ", imapD2[is]); printf("\n"); }
     }
-    pairsortDI(srfID1distSq,imapD1,nmatchFace); // imapD1 puts elements with srfID=1 in order of increasing disatnce from pt 10, 0 0 
-    pairsortDI(srfID2distSq,imapD2,nmatchFace); // imapD1 puts elements with srfID=2 in order of increasing disatnce from pt 10, 0 0 
+    pairsortDI(srfID1distSq,imapD1,nmatchFace); // imapD1 puts elements with srfID=1 in order of increasing distance from pt 0.1, 0 0 
+    pairsortDI(srfID2distSq,imapD2,nmatchFace); // imapD1 puts elements with srfID=2 in order of increasing distance from pt 0.1, 0 0 
 
     if(1==0){ if(part==0) {
-      printf(" srfID1dist GLOBAL "); for(int is=0; is< nmatchFace; ++is)  printf("%f ", srfID1distSq[is]); printf("\n");
-      printf(" imapD1 GLOBAL     "); for(int is=0; is< nmatchFace; ++is)  printf("%d ", imapD1[is]); printf("\n"); }
+      printf(" srfID1dist GLOBAL "); for(int is=0; is< std::min(nDbgI,nmatchFace); ++is)  printf("%f ", srfID1distSq[is]); printf("\n");
+      printf(" imapD1 GLOBAL     "); for(int is=0; is< std::min(nDbgI,nmatchFace); ++is)  printf("%d ", imapD1[is]); printf("\n"); }
     }
     if(1==0){ if(part==0) {
-      printf(" srfID2dist GLOBAL "); for(int is=0; is< nmatchFace; ++is)  printf("%f ", srfID2distSq[is]); printf("\n");
-      printf(" imapD2 GLOBAL     "); for(int is=0; is< nmatchFace; ++is)  printf("%d ", imapD2[is]); printf("\n"); }
+      printf(" srfID2dist GLOBAL "); for(int is=0; is< std::min(nDbgI,nmatchFace); ++is)  printf("%f ", srfID2distSq[is]); printf("\n");
+      printf(" imapD2 GLOBAL     "); for(int is=0; is< std::min(nDbgI,nmatchFace); ++is)  printf("%d ", imapD2[is]); printf("\n"); }
     }
 
     double tol2=1.0e-14;
@@ -746,10 +750,10 @@ void sortID1andID2(double* srfID1GCen,double* srfID2GCen, int nmatchFace, int* i
     for (int i = 0; i < nmatchFace; ++i) imapD2[i]=imapD2v[i];
     if(1==1&&part==0) {
       printf("Number of Distance Failures=%d\n ",DistFails);
-      printf(" srfID1dist GLOBAL "); for(int is=0; is< nmatchFace; ++is)  printf("%f ", srfID1distSq[is]); printf("\n");
-      printf(" imapD1 GLOBAL     "); for(int is=0; is< nmatchFace; ++is)  printf("%d ", imapD1[is]); printf("\n"); 
-      printf(" srfID2dist GLOBAL "); for(int is=0; is< nmatchFace; ++is)  printf("%f ", srfID2distSq[is]); printf("\n");
-      printf(" imapD2 GLOBAL     "); for(int is=0; is< nmatchFace; ++is)  printf("%d ", imapD2[is]); printf("\n"); }
+      printf(" srfID1dist GLOBAL "); for(int is=0; is< std::min(nDbgI,nmatchFace); ++is)  printf("%f ", srfID1distSq[is]); printf("\n");
+      printf(" imapD1 GLOBAL     "); for(int is=0; is< std::min(nDbgI,nmatchFace); ++is)  printf("%d ", imapD1[is]); printf("\n"); 
+      printf(" srfID2dist GLOBAL "); for(int is=0; is< std::min(nDbgI,nmatchFace); ++is)  printf("%f ", srfID2distSq[is]); printf("\n");
+      printf(" imapD2 GLOBAL     "); for(int is=0; is< std::min(nDbgI,nmatchFace); ++is)  printf("%d ", imapD2[is]); printf("\n"); }
     free(srfID1distSq); free(srfID2distSq);
     free(imapD2v);
 }
@@ -798,15 +802,15 @@ if(0==1){ for(int ip=0; ip< num_parts; ++ip) printf("%ld ", displs[ip]); printf(
     MPI_Allgatherv(srfIDidx,totOnRankBel,type_i,srfIDGidx,rcounts,displs,type_i,MPI_COMM_WORLD);
     free(rcounts); free(displs);
 if(0==1){ if(part==0) {
-    printf(" srfID GLOBAL    "); for(int is=0; is< totBel; ++is)  printf("%d ", srfIDG[is]); printf("\n");
-    printf(" srfIDidx GLOBAL "); for(int is=0; is< totBel; ++is)  printf("%d ", srfIDGidx[is]); printf("\n"); }
-    printf("rank %d ",part); printf(" srfID on Part "); for(int is=0; is< totOnRankBel; ++is)  printf("%d ", srfID[is]); printf("\n");
-    printf(" srfIDidx on Part "); for(int is=0; is< totOnRankBel; ++is)  printf("%d ", srfIDidx[is]); printf("\n"); }
+    printf(" srfID GLOBAL    "); for(int is=0; is< std::min(nDbgI,totBel); ++is)  printf("%d ", srfIDG[is]); printf("\n");
+    printf(" srfIDidx GLOBAL "); for(int is=0; is< std::min(nDbgI,totBel); ++is)  printf("%d ", srfIDGidx[is]); printf("\n"); }
+    printf("rank %d ",part); printf(" srfID on Part "); for(int is=0; is< std::min(nDbgI,totOnRankBel); ++is)  printf("%d ", srfID[is]); printf("\n");
+    printf(" srfIDidx on Part "); for(int is=0; is< std::min(nDbgI,totOnRankBel); ++is)  printf("%d ", srfIDidx[is]); printf("\n"); }
 //    pairsort(srfIDG,srfIDGidx,totBel);
     pairDeal6sort(srfIDG,srfIDGidx,totBel);
 if(1==0){ if(part==0) {
-    printf(" srfID GLOBAL    "); for(int is=0; is< totBel; ++is)  printf("%d ", srfIDG[is]); printf("\n");
-    printf(" srfIDidx GLOBAL "); for(int is=0; is< totBel; ++is)  printf("%d ", srfIDGidx[is]); printf("\n"); } }
+    printf(" srfID GLOBAL    "); for(int is=0; is< std::min(nDbgI,totBel); ++is)  printf("%d ", srfIDG[is]); printf("\n");
+    printf(" srfIDidx GLOBAL "); for(int is=0; is< std::min(nDbgI,totBel); ++is)  printf("%d ", srfIDGidx[is]); printf("\n"); } }
 }
 void writeCGNSboundary(int F,int B,int Z, Output& o, int* srfID, int* srfIDidx, double** srfIDCen1, double** srfIDCen2, int* srfID1OnBlk, int* srfID2OnBlk, int* startBelBlk, int *endBelBlk, cgsize_t *e_written, cgsize_t *totBel, int nblkb)
 {
@@ -815,8 +819,6 @@ void writeCGNSboundary(int F,int B,int Z, Output& o, int* srfID, int* srfIDidx,
     const cgsize_t num_parts_cg=num_parts;
     const int part = PCU_Comm_Self() ;
     const cgsize_t part_cg=part;
-    int* rcounts = (int *)malloc( num_parts * sizeof(int));
-    int* displs = (int *)malloc( num_parts * sizeof(int));
     cgsize_t e_owned, e_start,e_end;
     int Fsb;
     cgsize_t  eVolElm = *e_written-*totBel;
@@ -828,14 +830,15 @@ void writeCGNSboundary(int F,int B,int Z, Output& o, int* srfID, int* srfIDidx,
     AllgatherCentroid(srfIDCen1,srfID1OnBlk,&srfID1GCen,&nmatchFace1, nblkb);
     AllgatherCentroid(srfIDCen2,srfID2OnBlk,&srfID2GCen,&nmatchFace, nblkb);
     assert(nmatchFace1==nmatchFace);
-    const  float  Lz=abs(srfID2GCen[2]-srfID1GCen[2]);
-if(1==0){  printf("%d part srfID 1 xc ",part); for(int ip=0; ip< nmatchFace; ++ip) printf("%f ", srfID1GCen[ip*3+0]); printf("\n"); }
-if(1==0){  printf("%d part srfID 1 yc ",part); for(int ip=0; ip< nmatchFace; ++ip) printf("%f ", srfID1GCen[ip*3+1]); printf("\n"); }
-if(1==0){  printf("%d part srfID 1 zc ",part); for(int ip=0; ip< nmatchFace; ++ip) printf("%f ", srfID1GCen[ip*3+2]); printf("\n"); }
+//   compute the translation while we still have ordered centroids data  Assuming Translation = donor minus periodic but documents unclear
+    const float  Translation[3]={ (srfID2GCen[0]-srfID1GCen[0]), (srfID2GCen[1]-srfID1GCen[1]), (srfID2GCen[2]-srfID1GCen[2])};
+if(1==0){  printf("%d part srfID 1 xc ",part); for(int ip=0; ip< std::min(nDbgI,nmatchFace); ++ip) printf("%f ", srfID1GCen[ip*3+0]); printf("\n"); }
+if(1==0){  printf("%d part srfID 1 yc ",part); for(int ip=0; ip< std::min(nDbgI,nmatchFace); ++ip) printf("%f ", srfID1GCen[ip*3+1]); printf("\n"); }
+if(1==0){  printf("%d part srfID 1 zc ",part); for(int ip=0; ip< std::min(nDbgI,nmatchFace); ++ip) printf("%f ", srfID1GCen[ip*3+2]); printf("\n"); }
        PCU_Barrier();
-if(1==0){  printf("%d part srfID 2 xc ",part); for(int ip=0; ip< nmatchFace; ++ip) printf("%f ", srfID2GCen[ip*3+0]); printf("\n"); }
-if(1==0){  printf("%d part srfID 2 yc ",part); for(int ip=0; ip< nmatchFace; ++ip) printf("%f ", srfID2GCen[ip*3+1]); printf("\n"); }
-if(1==0){  printf("%d part srfID 2 zc ",part); for(int ip=0; ip< nmatchFace; ++ip) printf("%f ", srfID2GCen[ip*3+2]); printf("\n"); }
+if(1==0){  printf("%d part srfID 2 xc ",part); for(int ip=0; ip< std::min(nDbgI,nmatchFace); ++ip) printf("%f ", srfID2GCen[ip*3+0]); printf("\n"); }
+if(1==0){  printf("%d part srfID 2 yc ",part); for(int ip=0; ip< std::min(nDbgI,nmatchFace); ++ip) printf("%f ", srfID2GCen[ip*3+1]); printf("\n"); }
+if(1==0){  printf("%d part srfID 2 zc ",part); for(int ip=0; ip< std::min(nDbgI,nmatchFace); ++ip) printf("%f ", srfID2GCen[ip*3+2]); printf("\n"); }
     int* imapD1 = (int *)malloc( nmatchFace * sizeof(int));
     int* imapD2 = (int *)malloc( nmatchFace * sizeof(int));
     sortID1andID2(srfID1GCen,srfID2GCen,nmatchFace, imapD1, imapD2);
@@ -861,15 +864,15 @@ if(1==0){  printf("%d part srfID 2 zc ",part); for(int ip=0; ip< nmatchFace; ++i
       if(BCid==1) {
         for (int i = 0; i < nmatchFace; i++) periodic1[i]=eBC[imapD1[i]];
         for (int i = 0; i < nmatchFace; i++) eBC[i]=periodic1[i];
-if(1==1&&part==1){ printf(" srfIDidx 1 "); for(int is=0; is< nmatchFace; ++is)  printf("%d ", eBC[is]); printf("\n"); }
+if(1==1&&part==1){ printf(" srfIDidx 1 "); for(int is=0; is< std::min(nDbgI,nmatchFace); ++is)  printf("%d ", eBC[is]); printf("\n"); }
       }       
       if(BCid==2) {
         for (int i = 0; i < nmatchFace; i++) donor2[i]=eBC[imapD2[i]];
         for (int i = 0; i < nmatchFace; i++) eBC[i]=donor2[i];
-if(1==1&&part==1){ printf(" srfIDidx 2 "); for(int is=0; is< nmatchFace; ++is)  printf("%d ", eBC[is]); printf("\n"); }
+if(1==1&&part==1){ printf(" srfIDidx 2 "); for(int is=0; is< std::min(nDbgI,nmatchFace); ++is)  printf("%d ", eBC[is]); printf("\n"); }
       }       
 if(0==1) {
-      printf(" srfID =%d    ",BCid); for(int is=0; is< imatch; ++is)  printf("%d ", eBC[is]); printf("\n");
+      printf(" srfID =%d    ",BCid); for(int is=0; is< std::min(nDbgI,imatch); ++is)  printf("%d ", eBC[is]); printf("\n");
 }
       int BC_index;
       char BC_name[33];
@@ -887,7 +890,6 @@ if(0==1) {
           CGNS_ENUMV(Integer), nmatchFace, donor2, &cgconn)) cgp_error_exit();
     const float  RotationCenter[3]={0};
     const float  RotationAngle[3]={0};
-    const float  Translation[3]={0,0,-Lz};
     if (cg_conn_periodic_write(F, B, Z, cgconn, RotationCenter, RotationAngle, Translation)) cgp_error_exit();
     free(imapD1); free(imapD2);
     free(eBC); free(srfIDG); free(srfIDGidx);
@@ -974,7 +976,7 @@ void CGNS_Coordinates(int F,int B,int Z,Output& o)
     }
 if(0==1) {
     printf("%ld, %ld \n", start, end);
-    for (int ne=0; ne<num_nodes; ++ne)
+    for (int ne=0; ne<std::min(icount,nDbgI); ++ne)
 	printf("%d, %f \n", (ne+1), x[ne]);
 }
     if(j==0) if(cgp_coord_write_data(F, B, Z, Cx, &start, &end, x)) cgp_error_exit();
@@ -1017,7 +1019,7 @@ if(1==0){
   for (int ipart=0; ipart<num_parts; ++ipart){
     if(part==part) { // my turn    
     printf("xyz %d, %d \n", part, num_nodes);
-    for (int inode = 0; inode < num_nodes; ++inode){
+    for (int inode = 0; inode < std::min(nDbgI,num_nodes); ++inode){
       printf("%d ",inode+1);
       for (int j=0; j<3; ++j) printf("%f ", o.arrays.coordinates[j*num_nodes+inode]);
       printf(" \n");
diff --git a/pumi-meshes b/pumi-meshes
index 3355b3a95..8b920cf7e 160000
--- a/pumi-meshes
+++ b/pumi-meshes
@@ -1 +1 @@
-Subproject commit 3355b3a952b114f1c7c02b9bdb7fa4bb9db1b86e
+Subproject commit 8b920cf7e0590befcce7a2af6e2c3f3ec6c89712
diff --git a/test/matchedNodeElmReader.cc b/test/matchedNodeElmReader.cc
index 1572b12e8..b423116ce 100644
--- a/test/matchedNodeElmReader.cc
+++ b/test/matchedNodeElmReader.cc
@@ -730,7 +730,6 @@ int main(int argc, char** argv)
           "<ascii vertex fathers2D flag .fathers2D> "
           "<ascii solution flag .soln> "
           "<ascii conn header see MGEN for format if using Matlab> "
-          "<output model .dmg> <output mesh .smb>"
           "turn off verify mesh if equal 1 (on if you give nothing)\n",
           argv[0]);
     }
@@ -740,14 +739,14 @@ int main(int argc, char** argv)
   gmi_register_mesh();
   gmi_register_null();
 
-  if( argc == 11 ) noVerify=atoi(argv[10]);
+  if( argc == 11 ) noVerify=atoi(argv[9]);
 
   double t0 = PCU_Time();
   MeshInfo m;
   readMesh(argv[2],argv[3],argv[4],argv[5],argv[6],argv[7],argv[8],m);
 
   bool isMatched = true;
-  if( !strcmp(argv[3], "NULL") )
+  if( !strcmp(argv[4], "NULL") )
     isMatched = false;
 
   if(!PCU_Comm_Self())
@@ -795,7 +794,7 @@ int main(int argc, char** argv)
 
   outMap.clear();
   apf::writeVtkFiles("rendered",mesh);
-  mesh->writeNative(argv[10]);
+  mesh->writeNative(argv[9]);
   if(noVerify != 1) mesh->verify();
 
   mesh->destroyNative();

From 449025c61eb46191650868f0bf6d0df289f1b4ab Mon Sep 17 00:00:00 2001
From: "Kenneth E. Jansen" <Kenneth.Jansen@colorado.edu>
Date: Sat, 19 Aug 2023 14:33:15 -0600
Subject: [PATCH 51/68] Centroid sort now only done on rank 0  and results
 bcast back toother ranks since CGNS requires its serial writers to all have
 the same information

---
 phasta/phCGNSgbc.cc | 54 +++++++++++++++++++++++++++++++++++++++------
 1 file changed, 47 insertions(+), 7 deletions(-)

diff --git a/phasta/phCGNSgbc.cc b/phasta/phCGNSgbc.cc
index 996c97e62..cc188d4d0 100644
--- a/phasta/phCGNSgbc.cc
+++ b/phasta/phCGNSgbc.cc
@@ -757,6 +757,32 @@ void sortID1andID2(double* srfID1GCen,double* srfID2GCen, int nmatchFace, int* i
     free(srfID1distSq); free(srfID2distSq);
     free(imapD2v);
 }
+void GatherCentroid(double** srfIDCen,int* srfIDOnBlk, double** srfIDGCen, int *nmatchFace, int nblkb)
+{
+// stack  connectivities on rank before gather (should preserve order)
+    const int num_parts = PCU_Comm_Peers();
+    const int part = PCU_Comm_Self() ;
+    int* rcounts = (int *)malloc( num_parts * sizeof(int));
+    int* displs = (int *)malloc( num_parts * sizeof(int));
+    int numSurfIDOnRank=0;
+    for (int i = 0; i < nblkb; ++i) numSurfIDOnRank+=srfIDOnBlk[i];
+    double* srfIDCenAllBlocks = (double *)malloc(numSurfIDOnRank*3 * sizeof(double));
+    int k1=0;
+    for (int i = 0; i < nblkb; ++i) 
+      for (int j = 0; j < srfIDOnBlk[i]*3; ++j) srfIDCenAllBlocks[k1++]=srfIDCen[i][j];
+    int ncon=numSurfIDOnRank*3;
+    auto type_i = getMpiType( int() );
+    MPI_Gather(&ncon,1,type_i,rcounts,1,type_i,0,MPI_COMM_WORLD);
+    displs[0]=0;
+    for (int i = 1; i < num_parts; ++i) displs[i]=displs[i-1]+rcounts[i-1]; 
+    int GsrfIDcnt=displs[num_parts-1]+rcounts[num_parts-1];
+    *nmatchFace=GsrfIDcnt/3;
+    if(part==0) *srfIDGCen = (double *)malloc( GsrfIDcnt * sizeof(double));
+if(1==0){ printf("displs1 ");for(int ip=0; ip< num_parts; ++ip) printf("% ld ", displs[ip]); printf("\n"); }
+    auto type_d = getMpiType( double() );
+    MPI_Gatherv(srfIDCenAllBlocks,ncon,type_d,*srfIDGCen,rcounts,displs,type_d,0, MPI_COMM_WORLD);
+    free(srfIDCenAllBlocks); 
+}
 void AllgatherCentroid(double** srfIDCen,int* srfIDOnBlk, double** srfIDGCen, int *nmatchFace, int nblkb)
 {
 // stack  connectivities on rank before gather (should preserve order)
@@ -784,8 +810,8 @@ if(1==0){ printf("displs1 ");for(int ip=0; ip< num_parts; ++ip) printf("% ld ",
 }
 void Allgather2IntAndSort(int* srfID, int* srfIDidx,Output& o,int* srfIDG, int* srfIDGidx, int nblkb)
 {
-    const int num_parts = PCU_Comm_Peers();
     const int part = PCU_Comm_Self() ;
+    const int num_parts = PCU_Comm_Peers();
     const cgsize_t part_cg=part;
     int* rcounts = (int *)malloc( num_parts * sizeof(int));
     int* displs = (int *)malloc( num_parts * sizeof(int));
@@ -827,11 +853,15 @@ void writeCGNSboundary(int F,int B,int Z, Output& o, int* srfID, int* srfIDidx,
     double* srfID1GCen; 
     double* srfID2GCen; 
     int nmatchFace1,nmatchFace;
-    AllgatherCentroid(srfIDCen1,srfID1OnBlk,&srfID1GCen,&nmatchFace1, nblkb);
-    AllgatherCentroid(srfIDCen2,srfID2OnBlk,&srfID2GCen,&nmatchFace, nblkb);
-    assert(nmatchFace1==nmatchFace);
+//    AllgatherCentroid(srfIDCen1,srfID1OnBlk,&srfID1GCen,&nmatchFace1, nblkb);
+//    AllgatherCentroid(srfIDCen2,srfID2OnBlk,&srfID2GCen,&nmatchFace, nblkb);
+    GatherCentroid(srfIDCen1,srfID1OnBlk,&srfID1GCen,&nmatchFace1, nblkb);
+    GatherCentroid(srfIDCen2,srfID2OnBlk,&srfID2GCen,&nmatchFace, nblkb);
+    if(part==0)  printf("matchface %d, %d", nmatchFace1, nmatchFace);
+    if(part==0) assert(nmatchFace1==nmatchFace);
 //   compute the translation while we still have ordered centroids data  Assuming Translation = donor minus periodic but documents unclear
-    const float  Translation[3]={ (srfID2GCen[0]-srfID1GCen[0]), (srfID2GCen[1]-srfID1GCen[1]), (srfID2GCen[2]-srfID1GCen[2])};
+    double  TranslationD[3];
+    if (part==0){  TranslationD[0]=srfID2GCen[0]-srfID1GCen[0]; TranslationD[1]=srfID2GCen[1]-srfID1GCen[1];TranslationD[2]=srfID2GCen[2]-srfID1GCen[2];}
 if(1==0){  printf("%d part srfID 1 xc ",part); for(int ip=0; ip< std::min(nDbgI,nmatchFace); ++ip) printf("%f ", srfID1GCen[ip*3+0]); printf("\n"); }
 if(1==0){  printf("%d part srfID 1 yc ",part); for(int ip=0; ip< std::min(nDbgI,nmatchFace); ++ip) printf("%f ", srfID1GCen[ip*3+1]); printf("\n"); }
 if(1==0){  printf("%d part srfID 1 zc ",part); for(int ip=0; ip< std::min(nDbgI,nmatchFace); ++ip) printf("%f ", srfID1GCen[ip*3+2]); printf("\n"); }
@@ -839,10 +869,18 @@ if(1==0){  printf("%d part srfID 1 zc ",part); for(int ip=0; ip< std::min(nDbgI,
 if(1==0){  printf("%d part srfID 2 xc ",part); for(int ip=0; ip< std::min(nDbgI,nmatchFace); ++ip) printf("%f ", srfID2GCen[ip*3+0]); printf("\n"); }
 if(1==0){  printf("%d part srfID 2 yc ",part); for(int ip=0; ip< std::min(nDbgI,nmatchFace); ++ip) printf("%f ", srfID2GCen[ip*3+1]); printf("\n"); }
 if(1==0){  printf("%d part srfID 2 zc ",part); for(int ip=0; ip< std::min(nDbgI,nmatchFace); ++ip) printf("%f ", srfID2GCen[ip*3+2]); printf("\n"); }
+    auto type_i = getMpiType( int() );
+    MPI_Bcast(&nmatchFace,1,type_i,0, MPI_COMM_WORLD);
     int* imapD1 = (int *)malloc( nmatchFace * sizeof(int));
     int* imapD2 = (int *)malloc( nmatchFace * sizeof(int));
-    sortID1andID2(srfID1GCen,srfID2GCen,nmatchFace, imapD1, imapD2);
-    free(srfID1GCen); free(srfID2GCen);
+    if(part==0) sortID1andID2(srfID1GCen,srfID2GCen,nmatchFace, imapD1, imapD2);
+    PCU_Barrier();
+    printf("Barrier %d %d",part,nmatchFace);
+    MPI_Bcast(imapD1,nmatchFace,type_i,0, MPI_COMM_WORLD);
+    MPI_Bcast(imapD2,nmatchFace,type_i,0, MPI_COMM_WORLD);
+    auto type_d = getMpiType( double() );
+    MPI_Bcast(TranslationD,3,type_d,0, MPI_COMM_WORLD);
+    if(part==0) {free(srfID1GCen); free(srfID2GCen);}
 // ZonalBC data 
     int* srfIDG = (int *)malloc( *totBel * sizeof(int));
     int* srfIDGidx = (int *)malloc( *totBel * sizeof(int));
@@ -890,6 +928,8 @@ if(0==1) {
           CGNS_ENUMV(Integer), nmatchFace, donor2, &cgconn)) cgp_error_exit();
     const float  RotationCenter[3]={0};
     const float  RotationAngle[3]={0};
+    const float  Translation[3]={TranslationD[0],TranslationD[2],TranslationD[2]};
+
     if (cg_conn_periodic_write(F, B, Z, cgconn, RotationCenter, RotationAngle, Translation)) cgp_error_exit();
     free(imapD1); free(imapD2);
     free(eBC); free(srfIDG); free(srfIDGidx);

From 1c8507d7e9478bccb4b34ad9d0fb19a2fbcc5b08 Mon Sep 17 00:00:00 2001
From: "Kenneth E. Jansen" <Kenneth.Jansen@colorado.edu>
Date: Mon, 21 Aug 2023 21:54:53 -0600
Subject: [PATCH 52/68] incomplete fix of matchedNodeElmReader...forgot to
 change the expected argument count.

---
 phasta/phCGNSgbc.cc          | 2 +-
 test/matchedNodeElmReader.cc | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/phasta/phCGNSgbc.cc b/phasta/phCGNSgbc.cc
index cc188d4d0..847413b98 100644
--- a/phasta/phCGNSgbc.cc
+++ b/phasta/phCGNSgbc.cc
@@ -517,7 +517,7 @@ void writeBlocksCGNSinteror(int F,int B,int Z, Output& o, cgsize_t *e_written)
         cgp_error_exit();
     *e_written=e_endg; // update count of elements written
 
-if(1==0){
+if(1==1){
     printf("interior cnn %d, %ld, %ld \n", part, e_start, e_end);
     for (int ne=0; ne<std::min(nDbgCG,e_owned); ++ne) {
       printf("%d, %d ", part,(ne+1));
diff --git a/test/matchedNodeElmReader.cc b/test/matchedNodeElmReader.cc
index b423116ce..7b4f11a59 100644
--- a/test/matchedNodeElmReader.cc
+++ b/test/matchedNodeElmReader.cc
@@ -720,7 +720,7 @@ int main(int argc, char** argv)
   PCU_Comm_Init();
   lion_set_verbosity(1);
   int noVerify=0;    // maintain default of verifying if not explicitly requesting it off
-  if( argc < 11 ) {
+  if( argc < 10 ) {
     if( !PCU_Comm_Self() ) {
       printf("Usage: %s <input dmg model> no rank but .rank added to next 6 "
           "<ascii mesh connectivity cnn> "

From 61d2cbbbf42e9304512996fb8b7af03261661f02 Mon Sep 17 00:00:00 2001
From: "Kenneth E. Jansen" <Kenneth.Jansen@colorado.edu>
Date: Tue, 22 Aug 2023 00:01:57 -0600
Subject: [PATCH 53/68] hacky way of handling multiple toplogy wrote
 connectivity without hanging but must be done better

---
 phasta/phCGNSgbc.cc | 70 ++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 60 insertions(+), 10 deletions(-)

diff --git a/phasta/phCGNSgbc.cc b/phasta/phCGNSgbc.cc
index 847413b98..b97408a59 100644
--- a/phasta/phCGNSgbc.cc
+++ b/phasta/phCGNSgbc.cc
@@ -473,9 +473,33 @@ void writeBlocksCGNSinteror(int F,int B,int Z, Output& o, cgsize_t *e_written)
   if (cg_sol_write(F, B, Z, "RankOfWriter", CG_CellCenter, &S) ||
       cgp_field_write(F, B, Z, S, CG_Integer, "RankOfWriter", &Fs))
       cgp_error_exit();
-  for (int i = 0; i < o.blocks.interior.getSize(); ++i) {
+  int nblki= o.blocks.interior.getSize();
+  if(nblki==1) { // this part has only one toplogy
+    int nvert = o.blocks.interior.keys[0].nElementVertices;
+    if( nvert==4) {// need to make an empty wedge block
+        e_owned=0;
+ //       cgsize_t* e = (cgsize_t *)malloc(nvert * 1 * sizeof(cgsize_t));
+        e_startg=1+*e_written; // start for the elements of this topology
+        long safeArg=e_owned; // e_owned is cgsize_t which could be an 32 or 64 bit int
+        e_endg=*e_written + PCU_Add_Long(safeArg); // end for the elements of this topology
+        char Ename[5];
+        snprintf(Ename, 4, "Wdg");
+        if (cgp_section_write(F, B, Z, Ename, CG_PENTA_6, e_startg, e_endg, 0, &E))
+           cgp_error_exit();
+       e_start=0;
+       auto type = getMpiType( cgsize_t() );
+       MPI_Exscan(&e_owned, &e_start, 1, type , MPI_SUM, MPI_COMM_WORLD);
+       e_start+=1+*e_written; // my parts global element start 1-based
+// fail??       e_end=e_start+e_owned-1;  // my parts global element stop 1-based
+       e_end=e_start;  // my parts global element stop 1-based
+       // write the element connectivity in parallel 
+       if (cgp_elements_write_data(F, B, Z, E, e_start, e_end, NULL))
+          cgp_error_exit();
+    }     
+    //free(e);
+  }
+  for (int i = 0; i < nblki; ++i) { 
     BlockKey& k = o.blocks.interior.keys[i];
-    std::string phrase = getBlockKeyPhrase(k, "connectivity interior ");
     e_owned = o.blocks.interior.nElements[i];
     int nvert = o.blocks.interior.keys[i].nElementVertices;
     cgsize_t* e = (cgsize_t *)malloc(nvert * e_owned * sizeof(cgsize_t));
@@ -519,13 +543,13 @@ void writeBlocksCGNSinteror(int F,int B,int Z, Output& o, cgsize_t *e_written)
 
 if(1==1){
     printf("interior cnn %d, %ld, %ld \n", part, e_start, e_end);
-    for (int ne=0; ne<std::min(nDbgCG,e_owned); ++ne) {
-      printf("%d, %d ", part,(ne+1));
-      for(int nv=0; nv< nvert; ++nv) printf("%ld ", e[ne*nvert+nv]);
-      printf("\n");
-    }
+//    for (int ne=0; ne<std::min(nDbgCG,e_owned); ++ne) {
+//      printf("%d, %d ", part,(ne+1));
+//      for(int nv=0; nv< nvert; ++nv) printf("%ld ", e[ne*nvert+nv]);
+//      printf("\n");
+//    }
 }
-    free(e);
+/* not fixed for multi-topology yet
 
 //     create the field data for this process 
     int* d = (int *)malloc(e_owned * sizeof(int));
@@ -535,7 +559,6 @@ if(1==1){
     if (cgp_field_write_data(F, B, Z, S, Fs, &e_start, &e_end, d))
         cgp_error_exit();
     free(d);
-
     char UserDataName[11];
         snprintf(UserDataName, 11, "n%sOnRank", Ename);
         // create Helper array for number of elements on part of a given topology 
@@ -549,7 +572,34 @@ if(1==1){
     printf("Intr, %s,  %d, %d, %d, %d \n", UserDataName, nIelVec,part,Fs,Fs2);
     if ( cgp_array_write_data(Fs2, &partP1, &partP1, &nIelVec))
         cgp_error_exit();
+*/
   } // end of loop over interior blocks
+  if(nblki==1) { // this part has only one toplogy
+    int nvert = o.blocks.interior.keys[0].nElementVertices;
+    if( nvert==6) {// need to make an empty tet block
+        e_owned=0;
+//        cgsize_t* e = (cgsize_t *)malloc(nvert * 1 * sizeof(cgsize_t));
+        e_startg=1+*e_written; // start for the elements of this topology
+        long safeArg=e_owned; // e_owned is cgsize_t which could be an 32 or 64 bit int
+        e_endg=*e_written + PCU_Add_Long(safeArg); // end for the elements of this topology
+        char Ename[5];
+        snprintf(Ename, 4, "Tet");
+        if (cgp_section_write(F, B, Z, Ename, CG_TETRA_4, e_startg, e_endg, 0, &E))
+           cgp_error_exit();
+       e_start=0;
+       auto type = getMpiType( cgsize_t() );
+       MPI_Exscan(&e_owned, &e_start, 1, type , MPI_SUM, MPI_COMM_WORLD);
+       e_start+=1+*e_written; // my parts global element start 1-based
+// fail??       e_end=e_start+e_owned-1;  // my parts global element stop 1-based
+       e_end=e_start;  // my parts global element stop 1-based
+       // write the element connectivity in parallel 
+       if (cgp_elements_write_data(F, B, Z, E, e_start, e_end, NULL))
+          cgp_error_exit();
+    }     
+    //free(e);
+  }
+  PCU_Barrier();
+  printf("rank=%d reached end of BlockInterior",part);
 }
 void writeBlocksCGNSboundary(int F,int B,int Z, Output& o, int* srfID, int* srfIDidx, double** srfIDCen1, double** srfIDCen2, int* srfID1OnBlk, int* srfID2OnBlk, int* startBelBlk, int* endBelBlk, cgsize_t *e_written, cgsize_t *totBel, int nblkb)
 {
@@ -928,7 +978,7 @@ if(0==1) {
           CGNS_ENUMV(Integer), nmatchFace, donor2, &cgconn)) cgp_error_exit();
     const float  RotationCenter[3]={0};
     const float  RotationAngle[3]={0};
-    const float  Translation[3]={TranslationD[0],TranslationD[2],TranslationD[2]};
+    const float  Translation[3]={TranslationD[0],TranslationD[1],TranslationD[2]};
 
     if (cg_conn_periodic_write(F, B, Z, cgconn, RotationCenter, RotationAngle, Translation)) cgp_error_exit();
     free(imapD1); free(imapD2);

From f348df882097a2e388b1904f827cac090b3da1be Mon Sep 17 00:00:00 2001
From: "Kenneth E. Jansen" <Kenneth.Jansen@colorado.edu>
Date: Wed, 23 Aug 2023 10:02:23 -0600
Subject: [PATCH 54/68] multi-topology hopefully handled properly now.  Passes
 small tests but could still be corner cases laying in wait.

---
 phasta/phCGNSgbc.cc | 477 +++++++++++++++++++++++---------------------
 1 file changed, 250 insertions(+), 227 deletions(-)

diff --git a/phasta/phCGNSgbc.cc b/phasta/phCGNSgbc.cc
index b97408a59..1ff65654a 100644
--- a/phasta/phCGNSgbc.cc
+++ b/phasta/phCGNSgbc.cc
@@ -458,8 +458,54 @@ void getNaturalBCCodesCGNS(Output& o, int block, int* codes)
 // arbitrary combinations of BCs but leaving that out for now
 }
 
-// renamed and calling the renamed functions above with output writes now to CGNS
+void topoSwitch(char* Ename, int nvert,int F,int B,int Z,int *E, cgsize_t e_startg,cgsize_t e_endg)
+{
+  int Ep;
+  switch(nvert){
+    case 4:
+      snprintf(Ename, 4, "Tet");
+      if (cgp_section_write(F, B, Z, Ename, CG_TETRA_4, e_startg, e_endg, 0, &Ep))
+         cgp_error_exit();
+      break;
+    case 5:
+      snprintf(Ename, 4, "Pyr");
+      if (cgp_section_write(F, B, Z, Ename, CG_PYRA_5, e_startg, e_endg, 0, &Ep))
+          cgp_error_exit();
+      break;
+    case 6:
+      snprintf(Ename, 4, "Wdg");
+      if (cgp_section_write(F, B, Z, Ename, CG_PENTA_6, e_startg, e_endg, 0, &Ep))
+          cgp_error_exit();
+      break;
+    case 8:
+      snprintf(Ename, 4, "Hex");
+      if (cgp_section_write(F, B, Z, Ename, CG_HEXA_8, e_startg, e_endg, 0, &Ep))
+          cgp_error_exit();
+      break;
+  }
+  printf("%d %d %d %s %ld %ld %d\n",F,B,Z,Ename,e_startg,e_endg,Ep);
+  *E=Ep;
+}
+void topoSwitchB(char* Ename, int nvert,int F,int B,int Z,int *E, cgsize_t e_startg,cgsize_t e_endg)
+{
+  int Ep;
+  switch(nvert){
+    case 3:
+      snprintf(Ename, 4, "Tri");
+      if (cgp_section_write(F, B, Z, Ename, CG_TRI_3, e_startg, e_endg, 0, &Ep))
+            cgp_error_exit();
+      break;
+    case 4:
+      snprintf(Ename, 5, "Quad");
+      if (cgp_section_write(F, B, Z, Ename, CG_QUAD_4, e_startg, e_endg, 0, &Ep))
+            cgp_error_exit();
+      break;
+  }
+  printf("%d %d %d %s %ld %ld %d\n",F,B,Z,Ename,e_startg,e_endg,Ep);
+  *E=Ep;
+}
 
+// renamed and calling the renamed functions above with output writes now to CGNS
 void writeBlocksCGNSinteror(int F,int B,int Z, Output& o, cgsize_t *e_written)
 {
   int E,S,Fs,Fs2,Fsb,Fsb2;
@@ -474,136 +520,85 @@ void writeBlocksCGNSinteror(int F,int B,int Z, Output& o, cgsize_t *e_written)
       cgp_field_write(F, B, Z, S, CG_Integer, "RankOfWriter", &Fs))
       cgp_error_exit();
   int nblki= o.blocks.interior.getSize();
-  if(nblki==1) { // this part has only one toplogy
-    int nvert = o.blocks.interior.keys[0].nElementVertices;
-    if( nvert==4) {// need to make an empty wedge block
-        e_owned=0;
- //       cgsize_t* e = (cgsize_t *)malloc(nvert * 1 * sizeof(cgsize_t));
-        e_startg=1+*e_written; // start for the elements of this topology
-        long safeArg=e_owned; // e_owned is cgsize_t which could be an 32 or 64 bit int
-        e_endg=*e_written + PCU_Add_Long(safeArg); // end for the elements of this topology
-        char Ename[5];
-        snprintf(Ename, 4, "Wdg");
-        if (cgp_section_write(F, B, Z, Ename, CG_PENTA_6, e_startg, e_endg, 0, &E))
-           cgp_error_exit();
-       e_start=0;
-       auto type = getMpiType( cgsize_t() );
-       MPI_Exscan(&e_owned, &e_start, 1, type , MPI_SUM, MPI_COMM_WORLD);
-       e_start+=1+*e_written; // my parts global element start 1-based
-// fail??       e_end=e_start+e_owned-1;  // my parts global element stop 1-based
-       e_end=e_start;  // my parts global element stop 1-based
-       // write the element connectivity in parallel 
-       if (cgp_elements_write_data(F, B, Z, E, e_start, e_end, NULL))
-          cgp_error_exit();
-    }     
-    //free(e);
-  }
-  for (int i = 0; i < nblki; ++i) { 
-    BlockKey& k = o.blocks.interior.keys[i];
-    e_owned = o.blocks.interior.nElements[i];
-    int nvert = o.blocks.interior.keys[i].nElementVertices;
-    cgsize_t* e = (cgsize_t *)malloc(nvert * e_owned * sizeof(cgsize_t));
-    getInteriorConnectivityCGNS(o, i, e);
-    // create data node for elements 
-    e_startg=1+*e_written; // start for the elements of this topology
-    long safeArg=e_owned; // e_owned is cgsize_t which could be an 32 or 64 bit int
-    e_endg=*e_written + PCU_Add_Long(safeArg); // end for the elements of this topology
-    char Ename[5];
-    switch(nvert){
-      case 4:
-        snprintf(Ename, 4, "Tet");
-        if (cgp_section_write(F, B, Z, Ename, CG_TETRA_4, e_startg, e_endg, 0, &E))
-           cgp_error_exit();
-        break;
-      case 5:
-        snprintf(Ename, 4, "Pyr");
-        if (cgp_section_write(F, B, Z, Ename, CG_PYRA_5, e_startg, e_endg, 0, &E))
-           cgp_error_exit();
+  int nvMap[4] = {4,5,6,8};
+  int nvC,nvert,nvAll,invC,iblkC;
+  for (int i = 0; i < 4; ++i) { // check all topologies
+    nvAll=0;
+    nvC=nvMap[i];
+    for (int j = 0; j < nblki; ++j) { // check all blocks
+      BlockKey& k = o.blocks.interior.keys[j];
+      nvert = o.blocks.interior.keys[j].nElementVertices;
+      if(nvC==nvert) {
+        invC=1;
+        iblkC=j;
         break;
-      case 6:
-        snprintf(Ename, 4, "Wdg");
-        if (cgp_section_write(F, B, Z, Ename, CG_PENTA_6, e_startg, e_endg, 0, &E))
+      } else invC=0;
+    }
+    nvAll= PCU_Add_Int(invC); // add across all
+    cgsize_t* e=NULL; // = (cgsize_t *)malloc(nvC * e_owned * sizeof(cgsize_t));
+    if(nvAll!=0) { //nvC present on at least 1 rank
+      if(invC!=0){  //nvC present on my rank
+         e_owned = o.blocks.interior.nElements[iblkC];
+         e = (cgsize_t *)malloc(nvC * e_owned * sizeof(cgsize_t));
+        getInteriorConnectivityCGNS(o, iblkC, e);
+      }
+      else e_owned=0;
+      long safeArg=e_owned; // e_owned is cgsize_t which could be an 32 or 64 bit int
+      e_endg=*e_written + PCU_Add_Long(safeArg); // end for the elements of this topology
+      e_startg=1+*e_written; // start for the elements of this topology
+      char Ename[5];
+      topoSwitch(Ename, nvC,F,B,Z,&E,e_startg,e_endg);
+      e_start=0;
+      auto type = getMpiType( cgsize_t() );
+      MPI_Exscan(&e_owned, &e_start, 1, type , MPI_SUM, MPI_COMM_WORLD);
+      e_start+=1+*e_written; // my parts global element start 1-based
+      e_end=e_start+e_owned-1;  // my parts global element stop 1-based
+      if (cgp_elements_write_data(F, B, Z, E, e_start, e_end, e))
+          cgp_error_exit();
+      *e_written=e_endg;
+      if(invC!=0) free(e);
+      //     create the field data for this process 
+      int* d = NULL;
+      if(invC!=0){  //nvC present on my rank
+//KEN LEARN        int* d = (int *)malloc(e_owned * sizeof(int));
+        d = (int *)malloc(e_owned * sizeof(int));
+        for (int n = 0; n < e_owned; n++) 
+          d[n] = part;
+        //     write the solution field data in parallel 
+      }
+      if (cgp_field_write_data(F, B, Z, S, Fs, &e_start, &e_end, d))
+          cgp_error_exit();
+      if(invC!=0) free(d);
+      char UserDataName[11];
+      snprintf(UserDataName, 11, "n%sOnRank", Ename);
+      // create Helper array for number of elements on part of a given topology 
+      if ( cg_goto(F, B, "Zone_t", 1, NULL) ||
+           cg_gorel(F, "User Data", 0, NULL) ||
+           cgp_array_write(UserDataName, CG_Integer, 1, &num_parts_cg, &Fs2))
            cgp_error_exit();
-        break;
-      case 8:
-        snprintf(Ename, 4, "Hex");
-        if (cgp_section_write(F, B, Z, Ename, CG_HEXA_8, e_startg, e_endg, 0, &E))
+        // create the field data for this process 
+      int nIelVec=e_owned;
+      cgsize_t  partP1=part+1;
+      printf("Intr, %s,  %d, %d, %d, %d \n", UserDataName, nIelVec,part,Fs,Fs2);
+      if ( cgp_array_write_data(Fs2, &partP1, &partP1, &nIelVec))
            cgp_error_exit();
-        break;
-    }
-    e_start=0;
-    auto type = getMpiType( cgsize_t() );
-    MPI_Exscan(&e_owned, &e_start, 1, type , MPI_SUM, MPI_COMM_WORLD);
-    e_start+=1+*e_written; // my parts global element start 1-based
-    e_end=e_start+e_owned-1;  // my parts global element stop 1-based
-    // write the element connectivity in parallel 
-    if (cgp_elements_write_data(F, B, Z, E, e_start, e_end, e))
-        cgp_error_exit();
-    *e_written=e_endg; // update count of elements written
 
 if(1==1){
-    printf("interior cnn %d, %ld, %ld \n", part, e_start, e_end);
+    printf("interior cnn %s %d %ld %ld \n", Ename,part, e_start, e_end);
 //    for (int ne=0; ne<std::min(nDbgCG,e_owned); ++ne) {
 //      printf("%d, %d ", part,(ne+1));
-//      for(int nv=0; nv< nvert; ++nv) printf("%ld ", e[ne*nvert+nv]);
+//      for(int nv=0; nv< nvC; ++nv) printf("%ld ", e[ne*nvC+nv]);
 //      printf("\n");
 //    }
 }
-/* not fixed for multi-topology yet
 
-//     create the field data for this process 
-    int* d = (int *)malloc(e_owned * sizeof(int));
-    for (int n = 0; n < e_owned; n++) 
-            d[n] = part;
-//     write the solution field data in parallel 
-    if (cgp_field_write_data(F, B, Z, S, Fs, &e_start, &e_end, d))
-        cgp_error_exit();
-    free(d);
-    char UserDataName[11];
-        snprintf(UserDataName, 11, "n%sOnRank", Ename);
-        // create Helper array for number of elements on part of a given topology 
-    if ( cg_goto(F, B, "Zone_t", 1, NULL) ||
-         cg_gorel(F, "User Data", 0, NULL) ||
-         cgp_array_write(UserDataName, CG_Integer, 1, &num_parts_cg, &Fs2))
-         cgp_error_exit();
-    // create the field data for this process 
-    int nIelVec=e_owned;
-    cgsize_t  partP1=part+1;
-    printf("Intr, %s,  %d, %d, %d, %d \n", UserDataName, nIelVec,part,Fs,Fs2);
-    if ( cgp_array_write_data(Fs2, &partP1, &partP1, &nIelVec))
-        cgp_error_exit();
-*/
-  } // end of loop over interior blocks
-  if(nblki==1) { // this part has only one toplogy
-    int nvert = o.blocks.interior.keys[0].nElementVertices;
-    if( nvert==6) {// need to make an empty tet block
-        e_owned=0;
-//        cgsize_t* e = (cgsize_t *)malloc(nvert * 1 * sizeof(cgsize_t));
-        e_startg=1+*e_written; // start for the elements of this topology
-        long safeArg=e_owned; // e_owned is cgsize_t which could be an 32 or 64 bit int
-        e_endg=*e_written + PCU_Add_Long(safeArg); // end for the elements of this topology
-        char Ename[5];
-        snprintf(Ename, 4, "Tet");
-        if (cgp_section_write(F, B, Z, Ename, CG_TETRA_4, e_startg, e_endg, 0, &E))
-           cgp_error_exit();
-       e_start=0;
-       auto type = getMpiType( cgsize_t() );
-       MPI_Exscan(&e_owned, &e_start, 1, type , MPI_SUM, MPI_COMM_WORLD);
-       e_start+=1+*e_written; // my parts global element start 1-based
-// fail??       e_end=e_start+e_owned-1;  // my parts global element stop 1-based
-       e_end=e_start;  // my parts global element stop 1-based
-       // write the element connectivity in parallel 
-       if (cgp_elements_write_data(F, B, Z, E, e_start, e_end, NULL))
-          cgp_error_exit();
-    }     
-    //free(e);
-  }
+    } // end if ANY rank has this topology
+  } // end of loop over ALL topologies
   PCU_Barrier();
-  printf("rank=%d reached end of BlockInterior",part);
+  printf("rank=%d reached end of BlockInterior\n",part);
 }
-void writeBlocksCGNSboundary(int F,int B,int Z, Output& o, int* srfID, int* srfIDidx, double** srfIDCen1, double** srfIDCen2, int* srfID1OnBlk, int* srfID2OnBlk, int* startBelBlk, int* endBelBlk, cgsize_t *e_written, cgsize_t *totBel, int nblkb)
+void writeBlocksCGNSboundary(int F,int B,int Z, Output& o, int* srfID, int* srfIDidx, double** srfIDCen1, double** srfIDCen2, int* srfID1OnBlk, int* srfID2OnBlk, int* startBelBlk, int* endBelBlk, cgsize_t *e_written, cgsize_t *totBel, int *nStackedOnRank, int nblkb)
 {
-//  if(o.writeCGNSFiles > 2) {
     int E,Fsb,Fsb2;
     const int num_parts = PCU_Comm_Peers();
     const cgsize_t num_parts_cg=num_parts;
@@ -613,97 +608,114 @@ void writeBlocksCGNSboundary(int F,int B,int Z, Output& o, int* srfID, int* srfI
     cgsize_t e_startg,e_endg;
     cgsize_t eVolElm=*e_written;
     cgsize_t e_belWritten=0;
-    int triCount=0;
-    int quadCount=0;
-    int totOnRankBel=0;
-    for (int i = 0; i < nblkb; ++i) 
-      totOnRankBel += o.blocks.boundary.nElements[i];
-
-    for (int i = 0; i < o.blocks.boundary.getSize(); ++i) {
-      BlockKey& k = o.blocks.boundary.keys[i];
-      e_owned = o.blocks.boundary.nElements[i];
-      int nvert = o.blocks.boundary.keys[i].nBoundaryFaceEdges;
-      cgsize_t* e = (cgsize_t *)malloc(nvert * e_owned * sizeof(cgsize_t));
-      double* eCenx = (double *)malloc( e_owned * sizeof(double));
-      double* eCeny = (double *)malloc( e_owned * sizeof(double));
-      double* eCenz = (double *)malloc( e_owned * sizeof(double));
-      getBoundaryConnectivityCGNS(o, i, e,eCenx,eCeny,eCenz);
-      e_startg=1+*e_written; // start for the elements of this topology
-      long safeArg=e_owned; // e_owned is cgsize_t which could be an 32 or 64 bit int
-      cgsize_t  numBelTP = PCU_Add_Long(safeArg); // number of elements of this topology
-      e_endg=*e_written + numBelTP; // end for the elements of this topology
-      if(nvert==3) triCount++;
-      if(nvert==4) quadCount++;
-      char Ename[7];
-      switch(nvert){
-        case 3:
-          snprintf(Ename, 5, "Tri%d",triCount);
-          if (cgp_section_write(F, B, Z, Ename, CG_TRI_3, e_startg, e_endg, 0, &E))
-            cgp_error_exit();
-          break;
-        case 4:
-          snprintf(Ename, 6, "Quad%d",quadCount);
-          if (cgp_section_write(F, B, Z, Ename, CG_QUAD_4, e_startg, e_endg, 0, &E))
-            cgp_error_exit();
-          break;
+    int nvMap[2] = {3,4};
+    int iblkC[2];
+    int estart[2];
+    int nvC,nvert,nvAll,invC;
+    for (int j = 0; j < nblkb; ++j) { // check all blocks
+      BlockKey& k = o.blocks.boundary.keys[j];
+      nvert = o.blocks.boundary.keys[j].nBoundaryFaceEdges;
+    }
+    for (int i = 0; i < 2; ++i) { // check all topologies
+      nvAll=0;
+      nvC=nvMap[i];
+      invC=0;
+      int icountB=0;
+      for (int j = 0; j < nblkb; ++j) { // check all blocks
+        BlockKey& k = o.blocks.boundary.keys[j];
+        nvert = o.blocks.boundary.keys[j].nBoundaryFaceEdges;
+        if(nvert==nvC) {
+           invC=1;
+           iblkC[icountB]=j; // mark the block numbers (could be more than one) that have current topology
+           icountB++;
+        } 
       }
-      e_start=0;
-      auto type = getMpiType( cgsize_t() );
-      MPI_Exscan(&e_owned, &e_start, 1, type , MPI_SUM, MPI_COMM_WORLD);
-      e_start+=1+*e_written; // my parts global element start 1-based
-      e_end=e_start+e_owned-1;  // my parts global element stop 1-based
-      // write the element connectivity in parallel 
-      if (cgp_elements_write_data(F, B, Z, E, e_start, e_end, e))
-          cgp_error_exit();
-      printf("boundary cnn %d, %ld, %ld \n", part, e_start, e_end);
+      nvAll= PCU_Add_Int(invC); // add across all
+      cgsize_t* e=NULL; double* eCenx=NULL; double* eCeny=NULL; double* eCenz=NULL; 
+      if(nvAll!=0) { //nvC present on at least 1 rank
+        e_owned=0;
+        if(invC!=0){  //nvC present on my rank
+           for (int j = 0; j < icountB; ++j) { // combine blocks 
+             estart[j]=e_owned;
+             e_owned += o.blocks.boundary.nElements[iblkC[j]];
+           }
+           e = (cgsize_t *)malloc(nvC * e_owned * sizeof(cgsize_t));
+           eCenx = (double *)malloc( e_owned * sizeof(double));
+           eCeny = (double *)malloc( e_owned * sizeof(double));
+           eCenz = (double *)malloc( e_owned * sizeof(double));
+           for (int j = 0; j < icountB; ++j) {// combine blocks 
+             getBoundaryConnectivityCGNS(o, iblkC[j], &e[estart[j]], &eCenx[estart[j]], 
+                                                  &eCeny[estart[j]], &eCenz[estart[j]]); // stack repeated topologies 
+             getNaturalBCCodesCGNS(o, iblkC[j], &srfID[e_belWritten+estart[j]]); // note e_owned counts all same topo
+           }
+           (*nStackedOnRank)++;  // no longer have nblkb blocks so count them as you stack them
+        }
+        e_startg=1+*e_written; // start for the elements of this topology
+        long safeArg=e_owned; // e_owned is cgsize_t which could be an 32 or 64 bit int
+        cgsize_t  numBelTP = PCU_Add_Long(safeArg); // number of elements of this topology
+        e_endg=*e_written + numBelTP; // end for the elements of this topology
+        char Ename[6];
+        topoSwitchB(Ename, nvC,F,B,Z,&E,e_startg,e_endg);
+        e_start=0;
+        auto type = getMpiType( cgsize_t() );
+        MPI_Exscan(&e_owned, &e_start, 1, type , MPI_SUM, MPI_COMM_WORLD);
+        e_start+=1+*e_written; // my parts global element start 1-based
+        e_end=e_start+e_owned-1;  // my parts global element stop 1-based
+        // write the element connectivity in parallel 
+        if (cgp_elements_write_data(F, B, Z, E, e_start, e_end, e))
+            cgp_error_exit();
+        printf("boundary cnn %d, %ld, %ld \n", part, e_start, e_end);
 if(1==0){
     for (int ne=0; ne<std::min(nDbgCG,e_owned); ++ne) { printf("%d, %d ", part,(ne+1)); for(int nv=0; nv< nvert; ++nv) printf("%ld ", e[ne*nvert+nv]); printf("\n"); }
 }
-      getNaturalBCCodesCGNS(o, i, &srfID[e_belWritten]);
-      int icnt1=0;
-      int icnt2=0;
-      for (int ne=0; ne<e_owned; ++ne){ //count srfID =1 and 2 on this part,block
-         if(srfID[e_belWritten+ne]==1) icnt1++; 
-         if(srfID[e_belWritten+ne]==2) icnt2++;
-      } 
-      srfIDCen1[i]=new double[icnt1*3];
-      srfIDCen2[i]=new double[icnt2*3];
-      srfID1OnBlk[i]=icnt1;
-      srfID2OnBlk[i]=icnt2;
-      int j1=0;
-      int j2=0;
-      for (int ne=0; ne<e_owned; ++ne){
-         if(srfID[e_belWritten+ne]==1){ 
-           srfIDCen1[i][j1++]=eCenx[ne];
-           srfIDCen1[i][j1++]=eCeny[ne];
-           srfIDCen1[i][j1++]=eCenz[ne];
-         }
-         if(srfID[e_belWritten+ne]==2) {
-           srfIDCen2[i][j2++]=eCenx[ne];
-           srfIDCen2[i][j2++]=eCeny[ne];
-           srfIDCen2[i][j2++]=eCenz[ne];
-         }
-      } 
-      free(eCenx); free(eCeny); free(eCenz);
+        int idx=(*nStackedOnRank) - 1;
+        if(invC!=0) {
+          free(e);
+//moved above          getNaturalBCCodesCGNS(o, iblkC[, &srfID[e_belWritten]);
+          int icnt1=0; int icnt2=0;
+          for (int ne=0; ne<e_owned; ++ne){ //count srfID =1 and 2 on this part,block
+             if(srfID[e_belWritten+ne]==1) icnt1++; 
+             if(srfID[e_belWritten+ne]==2) icnt2++;
+          } 
+          srfIDCen1[idx]=new double[icnt1*3]; // icnt{1,2} will equal to zero OFTEN
+          srfIDCen2[idx]=new double[icnt2*3];
+          srfID1OnBlk[idx]=icnt1;
+          srfID2OnBlk[idx]=icnt2;
+          int j1=0; int j2=0;
+          for (int ne=0; ne<e_owned; ++ne){
+             if(srfID[e_belWritten+ne]==1){  // copy in the toplogies centroids
+               srfIDCen1[idx][j1++]=eCenx[ne];
+               srfIDCen1[idx][j1++]=eCeny[ne];
+               srfIDCen1[idx][j1++]=eCenz[ne];
+             }
+             if(srfID[e_belWritten+ne]==2) {
+               srfIDCen2[idx][j2++]=eCenx[ne];
+               srfIDCen2[idx][j2++]=eCeny[ne];
+               srfIDCen2[idx][j2++]=eCenz[ne];
+             }
+          } 
+          free(eCenx); free(eCeny); free(eCenz);
 if(1==1){      printf("CentroidCounts %d %d %d %d %d %d %d %d\n",part,icnt1, icnt2, j1, j2, e_owned, srfID1OnBlk[i],srfID2OnBlk[i]);}
-      for (int j = 0; j < (int) e_owned; ++j) srfIDidx[e_belWritten+j]=e_start+j;
-      startBelBlk[i]=e_start; // provides start point for each block in srfID
-      endBelBlk[i]=e_end; // provides end point for each block in srfID
-      *e_written=e_endg;
-      e_belWritten+=e_owned; // this is tracking written by this rank as we unpack srfID later
-      char UserDataName[12]; snprintf(UserDataName, 13, "n%sOnRank", Ename);
-      if ( cg_goto(F, B, "Zone_t", 1, NULL) ||
-           cg_gorel(F, "User Data", 0, NULL) ||
-           cgp_array_write(UserDataName, CG_Integer, 1, &num_parts_cg, &Fsb2))
-           cgp_error_exit();
-      printf("Bndy %s, %ld, %d, %d \n", UserDataName, e_owned, part,Fsb2);
-      cgsize_t partP1=part+1;
-      if (cgp_array_write_data(Fsb2, &partP1, &partP1, &e_owned))
-          cgp_error_exit();
-    }
+          for (int j = 0; j < (int) e_owned; ++j) srfIDidx[e_belWritten+j]=e_start+j;
+          startBelBlk[idx]=e_start; // provides start point for each block in srfID
+          endBelBlk[idx]=e_end; // provides end point for each block in srfID
+        }
+        *e_written=e_endg;
+        e_belWritten+=e_owned; // this is tracking written by this rank as we unpack srfID later
+        char UserDataName[12]; snprintf(UserDataName, 13, "n%sOnRank", Ename);
+        if ( cg_goto(F, B, "Zone_t", 1, NULL) ||
+             cg_gorel(F, "User Data", 0, NULL) ||
+             cgp_array_write(UserDataName, CG_Integer, 1, &num_parts_cg, &Fsb2))
+             cgp_error_exit();
+        printf("Bndy %s, %ld, %d, %d \n", UserDataName, e_owned, part,Fsb2);
+        cgsize_t partP1=part+1;
+        if (cgp_array_write_data(Fsb2, &partP1, &partP1, &e_owned))
+            cgp_error_exit();
+      } // at least one part has this topo
+    } // loop over both topos
     *totBel = *e_written-eVolElm;
 }
-void writeCGNS_UserData_srfID(int F,int B, int* srfID,  int* startBelBlk, int *endBelBlk, cgsize_t *e_written, cgsize_t *totBel, cgsize_t *eVolElm, int nblkb)
+void writeCGNS_UserData_srfID(int F,int B, int* srfID,  int* startBelBlk, int *endBelBlk, cgsize_t *e_written, cgsize_t *totBel, cgsize_t *eVolElm, int nStackedOnRank)
 {
     cgsize_t e_owned, e_start,e_end;
     int Fsb;
@@ -713,18 +725,29 @@ void writeCGNS_UserData_srfID(int F,int B, int* srfID,  int* startBelBlk, int *e
          cgp_array_write("srfID", CG_Integer, 1,totBel, &Fsb)) 
          cgp_error_exit();
     // write the user data for this process 
-    for (int i = 0; i < nblkb; ++i) {
-      int e_startB=0; //startBelBlk[i]-*eVolElm-1; // srfID is only for bel....matches linear order with eVolElm offset from 
+    int nvMap[2] = {3,4};
+    int iblkC[2];
+    int estart[2];
+    int nvC,nvert,nvAll,invC;
+    for (int i = 0; i < 2; ++i) { // at most, 2 blocks (assumed as we have (untested) collapsed/stacked blocks withe same number of verts)
+      e_owned=0;
+      if(i<nStackedOnRank) 
+        e_owned=endBelBlk[i]-startBelBlk[i]+1;
+     int e_startB=startBelBlk[i]; // srfID is only for bel....matches linear order with eVolElm offset from 
                                        // bel# that starts from last volume element
-      e_owned=endBelBlk[i]-startBelBlk[i]+1;
       e_start=0;
       auto type = getMpiType( cgsize_t() );
       MPI_Exscan(&e_owned, &e_start, 1, type , MPI_SUM, MPI_COMM_WORLD);
       e_start+=1+*e_written; // my parts global element start 1-based
       e_end=e_start+e_owned-1;  // my parts global element stop 1-based
       printf("BndyUserData %s, %ld, %ld, %ld,  %d, %d %d \n", "srfID", e_start, e_end, e_owned, i, e_startB,*totBel);
-      if (cgp_array_write_data(Fsb, &e_start, &e_end, &srfID[e_startB]))
+      if(i<nStackedOnRank){ 
+        if (cgp_array_write_data(Fsb, &e_start, &e_end, &srfID[e_startB]))
+        cgp_error_exit();
+      } else { 
+        if (cgp_array_write_data(Fsb, &e_start, &e_end, NULL))
         cgp_error_exit();
+      }
       long safeArg=e_owned; // is cgsize_t which could be an 32 or 64 bit int
       *e_written += PCU_Add_Long(safeArg); // number of elements of this topology
     }
@@ -807,7 +830,7 @@ void sortID1andID2(double* srfID1GCen,double* srfID2GCen, int nmatchFace, int* i
     free(srfID1distSq); free(srfID2distSq);
     free(imapD2v);
 }
-void GatherCentroid(double** srfIDCen,int* srfIDOnBlk, double** srfIDGCen, int *nmatchFace, int nblkb)
+void GatherCentroid(double** srfIDCen,int* srfIDOnBlk, double** srfIDGCen, int *nmatchFace, int nStackedOnRank)
 {
 // stack  connectivities on rank before gather (should preserve order)
     const int num_parts = PCU_Comm_Peers();
@@ -815,10 +838,10 @@ void GatherCentroid(double** srfIDCen,int* srfIDOnBlk, double** srfIDGCen, int *
     int* rcounts = (int *)malloc( num_parts * sizeof(int));
     int* displs = (int *)malloc( num_parts * sizeof(int));
     int numSurfIDOnRank=0;
-    for (int i = 0; i < nblkb; ++i) numSurfIDOnRank+=srfIDOnBlk[i];
+    for (int i = 0; i < nStackedOnRank; ++i) numSurfIDOnRank+=srfIDOnBlk[i];
     double* srfIDCenAllBlocks = (double *)malloc(numSurfIDOnRank*3 * sizeof(double));
     int k1=0;
-    for (int i = 0; i < nblkb; ++i) 
+    for (int i = 0; i < nStackedOnRank; ++i) 
       for (int j = 0; j < srfIDOnBlk[i]*3; ++j) srfIDCenAllBlocks[k1++]=srfIDCen[i][j];
     int ncon=numSurfIDOnRank*3;
     auto type_i = getMpiType( int() );
@@ -833,17 +856,17 @@ if(1==0){ printf("displs1 ");for(int ip=0; ip< num_parts; ++ip) printf("% ld ",
     MPI_Gatherv(srfIDCenAllBlocks,ncon,type_d,*srfIDGCen,rcounts,displs,type_d,0, MPI_COMM_WORLD);
     free(srfIDCenAllBlocks); 
 }
-void AllgatherCentroid(double** srfIDCen,int* srfIDOnBlk, double** srfIDGCen, int *nmatchFace, int nblkb)
+void AllgatherCentroid(double** srfIDCen,int* srfIDOnBlk, double** srfIDGCen, int *nmatchFace, int nStackedOnRank)
 {
 // stack  connectivities on rank before gather (should preserve order)
     const int num_parts = PCU_Comm_Peers();
     int* rcounts = (int *)malloc( num_parts * sizeof(int));
     int* displs = (int *)malloc( num_parts * sizeof(int));
     int numSurfIDOnRank=0;
-    for (int i = 0; i < nblkb; ++i) numSurfIDOnRank+=srfIDOnBlk[i];
+    for (int i = 0; i < nStackedOnRank; ++i) numSurfIDOnRank+=srfIDOnBlk[i];
     double* srfIDCenAllBlocks = (double *)malloc(numSurfIDOnRank*3 * sizeof(double));
     int k1=0;
-    for (int i = 0; i < nblkb; ++i) 
+    for (int i = 0; i < nStackedOnRank; ++i) 
       for (int j = 0; j < srfIDOnBlk[i]*3; ++j) srfIDCenAllBlocks[k1++]=srfIDCen[i][j];
     int ncon=numSurfIDOnRank*3;
     auto type_i = getMpiType( int() );
@@ -858,16 +881,13 @@ if(1==0){ printf("displs1 ");for(int ip=0; ip< num_parts; ++ip) printf("% ld ",
     MPI_Allgatherv(srfIDCenAllBlocks,ncon,type_d,*srfIDGCen,rcounts,displs,type_d,MPI_COMM_WORLD);
     free(srfIDCenAllBlocks); 
 }
-void Allgather2IntAndSort(int* srfID, int* srfIDidx,Output& o,int* srfIDG, int* srfIDGidx, int nblkb)
+void Allgather2IntAndSort(int* srfID, int* srfIDidx,Output& o,int* srfIDG, int* srfIDGidx, int totOnRankBel)
 {
     const int part = PCU_Comm_Self() ;
     const int num_parts = PCU_Comm_Peers();
     const cgsize_t part_cg=part;
     int* rcounts = (int *)malloc( num_parts * sizeof(int));
     int* displs = (int *)malloc( num_parts * sizeof(int));
-    int totOnRankBel=0;
-    for (int i = 0; i < nblkb; ++i) 
-      totOnRankBel += o.blocks.boundary.nElements[i];
     auto type_i = getMpiType( int() );
     MPI_Allgather(&totOnRankBel,1,type_i,rcounts,1,type_i,MPI_COMM_WORLD);
     displs[0]=0;
@@ -888,7 +908,7 @@ if(1==0){ if(part==0) {
     printf(" srfID GLOBAL    "); for(int is=0; is< std::min(nDbgI,totBel); ++is)  printf("%d ", srfIDG[is]); printf("\n");
     printf(" srfIDidx GLOBAL "); for(int is=0; is< std::min(nDbgI,totBel); ++is)  printf("%d ", srfIDGidx[is]); printf("\n"); } }
 }
-void writeCGNSboundary(int F,int B,int Z, Output& o, int* srfID, int* srfIDidx, double** srfIDCen1, double** srfIDCen2, int* srfID1OnBlk, int* srfID2OnBlk, int* startBelBlk, int *endBelBlk, cgsize_t *e_written, cgsize_t *totBel, int nblkb)
+void writeCGNSboundary(int F,int B,int Z, Output& o, int* srfID, int* srfIDidx, double** srfIDCen1, double** srfIDCen2, int* srfID1OnBlk, int* srfID2OnBlk, int* startBelBlk, int *endBelBlk, cgsize_t *e_written, int totOnRankBel, cgsize_t *totBel, int nStackedOnRank)
 {
 // srfID is for ALL Boundary faces
     const int num_parts = PCU_Comm_Peers();
@@ -899,14 +919,14 @@ void writeCGNSboundary(int F,int B,int Z, Output& o, int* srfID, int* srfIDidx,
     int Fsb;
     cgsize_t  eVolElm = *e_written-*totBel;
     *e_written=0; //recycling  eVolElm holds 
-    writeCGNS_UserData_srfID(F,B, srfID,  startBelBlk, endBelBlk, e_written, totBel, &eVolElm, nblkb);
+    writeCGNS_UserData_srfID(F,B, srfID,  startBelBlk, endBelBlk, e_written, totBel, &eVolElm, nStackedOnRank);
     double* srfID1GCen; 
     double* srfID2GCen; 
     int nmatchFace1,nmatchFace;
-//    AllgatherCentroid(srfIDCen1,srfID1OnBlk,&srfID1GCen,&nmatchFace1, nblkb);
-//    AllgatherCentroid(srfIDCen2,srfID2OnBlk,&srfID2GCen,&nmatchFace, nblkb);
-    GatherCentroid(srfIDCen1,srfID1OnBlk,&srfID1GCen,&nmatchFace1, nblkb);
-    GatherCentroid(srfIDCen2,srfID2OnBlk,&srfID2GCen,&nmatchFace, nblkb);
+//    AllgatherCentroid(srfIDCen1,srfID1OnBlk,&srfID1GCen,&nmatchFace1, nStackedOnRank);
+//    AllgatherCentroid(srfIDCen2,srfID2OnBlk,&srfID2GCen,&nmatchFace, nStackedOnRank);
+    GatherCentroid(srfIDCen1,srfID1OnBlk,&srfID1GCen,&nmatchFace1, nStackedOnRank);
+    GatherCentroid(srfIDCen2,srfID2OnBlk,&srfID2GCen,&nmatchFace, nStackedOnRank);
     if(part==0)  printf("matchface %d, %d", nmatchFace1, nmatchFace);
     if(part==0) assert(nmatchFace1==nmatchFace);
 //   compute the translation while we still have ordered centroids data  Assuming Translation = donor minus periodic but documents unclear
@@ -936,7 +956,7 @@ if(1==0){  printf("%d part srfID 2 zc ",part); for(int ip=0; ip< std::min(nDbgI,
     int* srfIDGidx = (int *)malloc( *totBel * sizeof(int));
     cgsize_t* donor2 = (cgsize_t *)malloc(nmatchFace * sizeof(cgsize_t));
     cgsize_t* periodic1 = (cgsize_t *)malloc(nmatchFace * sizeof(cgsize_t));
-    Allgather2IntAndSort(srfID, srfIDidx,o,srfIDG, srfIDGidx,nblkb);
+    Allgather2IntAndSort(srfID, srfIDidx,o,srfIDG, srfIDGidx,totOnRankBel);
     int BC_scan=0;
     cgsize_t* eBC = (cgsize_t *)malloc(*totBel * sizeof(cgsize_t));
     for (int BCid = 1; BCid < 7; BCid++) {
@@ -1154,8 +1174,9 @@ if(1==0){
   cgsize_t e_written=0; 
   cgsize_t totBel;
   writeBlocksCGNSinteror(F,B,Z,o,&e_written);
+  if(o.writeCGNSFiles > 2) {
   int nblkb = o.blocks.boundary.getSize(); 
-  double** srfIDCen1 = new double*[nblkb];
+  double** srfIDCen1 = new double*[nblkb]; // might not all be used
   double** srfIDCen2 = new double*[nblkb];
   int totOnRankBel=0;
   for (int i = 0; i < nblkb; ++i) 
@@ -1166,17 +1187,19 @@ if(1==0){
   int* startBelBlk = (int *)malloc( nblkb * sizeof(int));
   int* endBelBlk = (int *)malloc( nblkb * sizeof(int));
   int* srfIDidx = (int *)malloc( totOnRankBel * sizeof(int));
-  writeBlocksCGNSboundary(F,B,Z,o, srfID, srfIDidx, srfIDCen1, srfIDCen2, srfID1OnBlk, srfID2OnBlk, startBelBlk, endBelBlk, &e_written, &totBel, nblkb);
-  writeCGNSboundary      (F,B,Z,o, srfID, srfIDidx, srfIDCen1, srfIDCen2, srfID1OnBlk, srfID2OnBlk, startBelBlk, endBelBlk, &e_written, &totBel, nblkb);
+  int nStackedOnRank;
+  writeBlocksCGNSboundary(F,B,Z,o, srfID, srfIDidx, srfIDCen1, srfIDCen2, srfID1OnBlk, srfID2OnBlk, startBelBlk, endBelBlk, &e_written, &totBel, &nStackedOnRank, nblkb);
+  writeCGNSboundary      (F,B,Z,o, srfID, srfIDidx, srfIDCen1, srfIDCen2, srfID1OnBlk, srfID2OnBlk, startBelBlk, endBelBlk, &e_written, totOnRankBel, &totBel,  nStackedOnRank);
   free(srfID); free(srfIDidx);
   free(srfID1OnBlk); free(srfID2OnBlk);
   free(startBelBlk); free(endBelBlk);
-  for (int i = 0; i < nblkb; ++i) delete [] srfIDCen1[i];
-  for (int i = 0; i < nblkb; ++i) delete [] srfIDCen2[i];
+  for (int i = 0; i < nStackedOnRank; ++i) delete [] srfIDCen1[i];
+  for (int i = 0; i < nStackedOnRank; ++i) delete [] srfIDCen2[i];
   delete [] srfIDCen1; delete [] srfIDCen2;
   if(cgp_close(F)) cgp_error_exit();
   double t1 = PCU_Time();
   if (!PCU_Comm_Self())
     lion_oprint(1,"CGNS file written in %f seconds\n", t1 - t0);
+  }
 }
 } // namespace

From 35426c66875d4db6b4157578ec6c15a9f4e42218 Mon Sep 17 00:00:00 2001
From: "Kenneth E. Jansen" <Kenneth.Jansen@colorado.edu>
Date: Fri, 25 Aug 2023 11:42:02 -0600
Subject: [PATCH 55/68] Adding a text file to describe developments, current
 assumptions/limitation, and future paths to improvement

---
 phasta/CGNSFileWritingDev.txt | 76 +++++++++++++++++++++++++++++++++++
 pumi-meshes                   |  2 +-
 2 files changed, 77 insertions(+), 1 deletion(-)
 create mode 100644 phasta/CGNSFileWritingDev.txt

diff --git a/phasta/CGNSFileWritingDev.txt b/phasta/CGNSFileWritingDev.txt
new file mode 100644
index 000000000..4441cf355
--- /dev/null
+++ b/phasta/CGNSFileWritingDev.txt
@@ -0,0 +1,76 @@
+CGNS output from Chef
+
+
+
+This document is to describe work done to get CGNS output from Chef.
+
+
+
+Before doing that, I am going to list EXPECTATIONS of CGNS and how they align or not with classic Chef/PHASTA vs. PETSc/CEED-PHASTA  (defs rank=part=process?.I will use part) 
+
+I)  CGNS expects global numbering for mesh nodes and elements and that numbering MUST start from 1 (not zero).   
+
+II) The global numbering of elements is inclusive of both volume elements and boundary elements and also inclusive of all topologies with the numbering-start determined by what order you write them to file (might not be a requirement but simplest way when streaming).
+
+III) If using parallel writing (which we will have to do for any realistic size mesh), the ownership of the writer must be exclusive (write no data you don?t own),  continuous (no skipped global numbers), and linearly increasing with part number (e.g., rank0 starts from 1 and ends on nOwnedByRank0, rank1 starts from nOwnedByRank0+1 and ends on nOwnedByRank1+nOwnedbyRank0 and so on). 
+
+
+
+
+
+Going to a separate enumeration to discuss how that translated to our work on that now:
+
+Starting with the most basic, CGNS has the concept of a Base.  We keep life simple and only have 1 base. 
+CGNS has the concept of a Zone.  Someday if we get into overset grids (not likely) we might have more but for now, we only support 1 zone.
+Within a Zone we will always be type Unstructured and a few things must be described while others are optional. CGNS provides writer ?functions?  cg_<various> or cgp_<various>  and these have a structure that one function establishes the file-node in the file/database and then you are able call a second function to write the data at that node (this is a little bit like PHASTAIO?s notion of write/read header followed by write/read data). cg_ means all parallel processes must have identical data to write while cgp_ allows each process to write its portion of the data and CGNS collects (interpret collect as MPIO collective operations) that data within an HDF5 file. 
+Chef was co-developed with PHASTA to avoid global numbering and instead number from 0 to n_entity-1 on each rank when parallel and have separate data structures which tracked which rank owned a given entity and which ranks had remote copies of those entities.  Chef created data structures for PHASTA to use to manage this partition-specific ownership. Thus, before we can write any parallel distributed data with the  CGNS functions described in 3.,  we needed to create a map from PHASTA?s numbering to a numbering that satisfies I)-III).  Since that global node numbering is basically the same as PETSc with a shift by 1, I copied code from PHASTA that did that for  use withPETSc solvers (common/gen_ncorp.c) and modified it.  That also needed functionality of commuInt.f to communicate ownership on part boundaries  back to all the replicas on other parts (which in turn required a chunk of code from ctypes.f) translated to C. All of this code makes use of the ilwork data structure that helps PHASTA know how to setup and efficiently perform peer-to-peer communication. At the end of this code insertion/translation, we have an ncorp array that maps from PHASTA/Chef numbering to CGNS numbering on each part and thus we can start to now describe the arrays that are written.
+CGNS of course has to store coordinates.  It does so as flat double lists one dimension at a time so that means CoordinatesX then CoordinatesY, then CoordinatesZ for us.  To be clear, to use the cgp mid-level functions to write these in parallel, PHASTA/Chef?s part coordinate list must be sifted down to just its owners using ncorp described in 4. and pass that compact ownership array satisfying I)-III) data through the cgp_write functions (both file-node creation  and parallel data write).
+Next in our output, though not absolutely required is Solution.  Similarly to step 5., CGNS has a function to create a file-node for Solution and then you add as many fields as needed to that (currently I have only coded Pressure, VelocityX, VelocityY, VelocityZ, and Temperature.  Note CGNS is a standard and they mandate the name of these and any additional fields we might want to add to this so read the docs.  As with 5. these have to be sifted and mapped through ncorp to convert PHASTA/Chef?s numbering to a compact array that can be written in parallel using the cgp writers.  Note, as of 4.4.0, it looks to be possible to aggregate the writes described in 5. and 6. through cgp_coord_multi_* and cgp_field_multi_* respectively but this has not been explored yet. 
+Next in the file is some User Data that was a backdoor to writing some data in parallel  and to support parallel read with less work that I may describe more later but is not required by CGNS so skipping for now. 
+Next is a cell-centered solution file (that just means one value per 3D element or cell) that I put the RankOfWriterfield in. This is likely what the PETSc reader will use to understand the partition that Chef used to write this file and if that part-count matches the PETSc reader and solver, the file can be read and processed to derive all the parallel data structures PETSc/CEED-PHASTA?s need. CGNS issue filed to determine if they have a standard for this and if not interest in developing one. 
+Next is the first 3D element topology connectivity.  Basically we create a separate node for each element topology, establish global numbering by rank (easy as there are no replicas of elements and thus the ownership range was established definitively by the partitioner and thus the ownership range just jumps by the number of that element type on a given part).  If multi-topology, this repeats for the rest of the 3D element topologies. 
+Next is the first 2D boundary element topology  which follows concepts of  9 for it and subsequent other 2D boundary element topologies.  At this time we have elected to write all the elements of a given topology in a single CGNS file-node even if they are distributed across multiples geometric model surfaces (not the only option).  Note, since ALL cgp and cg writes are collective, all ranks, even those without boundary elements (or interior elements of a given topology) must participate.  Obviously the same for the MPI_Exscan, and PCU collectives.  
+It was decided/chose in the first pass to forgo writing ZonalBCs based on nodes in favor of writing them as mesh-sets (CGNS calls them PointLists abstracting the face numbers to the non-existent point at the centroid of the mesh face) which are face numbers with a particular surfID set in the smd (GUI if Simmetrix model-based) or spj (flat text file if working with a dmg model as we do with MATLAB->MGEN-MNER or SIMMETRIX->{MDLCONVERT,CONVERT(withExtrude)} ) workflows to get to chef inputs.  PETSc will then parse these mesh-sets into DMLabels for the boundary of the mesh. Then, it will handle Dirichlet and Neumann boundary conditions as it normally does (based on yaml input as to what type of BC is on a particular surfID number). For now we have a rather rigid prototype code that is limited to processing and writing 6  distinct mesh sets (one for each of the 6 faces of our topological box).  It should not be hard to extend and generalize this code but we took this shortcut in the first version of this code.  CGNS clearly supports direct nodal/Dirichlet PointSet but we have CHOSEN not to pursue this in the first pass.
+Last but certainly not least is a file-node called ZonalGridConnectivity which is how CGNS  encodes periodic boundary conditions as can be seen in the first/only leaf under that file-node Periodic Connectivity. This has been setup rigidly to assume that the faces listed in PointList are ordered in the same way as the faces listed in PointListDonor and further that surfID=2 is the donor and surfID=1 is the periodic partner of the donor.  This is again a shortcut or hardcoded link that assumes that the spj file has put surfID=1 on the face that is the periodic match for the face that it has surfID=2.  These meshes obviously need to be matched meshes and this creates an issue we still need to resolve (will describe soon).  The code currently computes the translation between the two periodic planes.  I found the documentation unclear but assumed that vector was FROM the donor To the periodic plane.  In the current inputs to the test codes the donor (surfID=2) is at zMax while the periodic plane (surfID=1) is as zMin so this makes the Translation[3]={0,0,-Lz} but that might be backwards and would certainly be flipped if I got the FROM/TO flipped (here Lz is unsigned as it is the spanwise domain width).  I made the code general to use the first element with a surfID=2's centroid coordinates - the first element with a surfID=1's centroid coordinates (this picks up a y component of 1e-21 due to roundoff). 
+
+
+While the above is functional, the already mentioned ambiguity  and the following issues/limitations remain unresolved:
+
+
+
+If we feed the current code a matched mesh ncorp will be computed incorrectly for every point that is on a part boundary that is also matched.  The reason for this is that ilwork was set up for PHASTA?s needs and capabilities.  As noted above PHASTA has replica nodes as REAL nodes  (nodes with local node ranges) that it uses for all on-rank work and then the on-rank numbers do their parallel assembly with the true OWNER node which in this case is not the node they physically share on the periodic plane but instead ilwork sets up a communication with the donor for that node.  Consequently, if we use the ilwork data structure as it is made for PHASTA, ncorp will map that node to a global node number on the donor plane.  Again ilwork is only used in PHASTA for assembling equations so this is right for PHASTA but will foul PETSc by providing a connectivity that gives global node numbers with coordinates on the donor plane. 
+Currently ZonalBC does not support parallel BC writing (cg available but not cgp).   James is working with the CGNS development group to develop cgp_ptset_* for reading/writing PointSet data (which is also used for ZonalGridConnectivity), but for now we are doing MPI_Allgather{v} operations so that cg is correct.  Note it is Allgather and not Gather because CGNS does not let part 0 write in serial but instead requires all ranks to have the same data and all to call cg with that same data to have this work correctly.  We are told by CGNS developers (and this seems like it has to be true) that only part=0 is actually writing but we have observed that any non-matching data on part!=0 results in a failure of cg. This is a potential scalability issue but seems likely to be addressed through the development of cgp for ZonalBCs. 
+
+
+Discussion of ISSUE 1) I just put this question to Jed and James in the GitHub PR but in doing so I think it is clear that CGNS does need global node numbers for the perioidic replicas of the owner/donor nodes.  Thus we do need to do one of the following:
+
+A) Turn off matching (creates another conflict), 
+
+B) suppress it during certain stages of chef?s work, or similarly 
+
+C) alter the code to not add this type of mapping in ilwork. 
+
+
+
+The conflict with A) might be limited  to Simmetrix wedge-tet meshes that won?t match when there is an unstructured mesh region (like tets).  TBH, on small meshes I can?t get matching to work anyway.  This is what we are currently doing and this has forced us to re-discover matching through reordering the donor and periodic mesh sets to have their centroids match.  To make this tractable, I did a MPI_Gather of the data to part 0 and did a serial sort there.  This will eventually hit scalability issues (less of an issue for Q3 as they are 3^d more coarse than Q1 meshes) but still not pretty.  If we could keep matching on AND we were able to disable it from ilwork so that we build the global numbering that CGNS wants (periodic-nodes/replicas have a global node number) then we MIGHT be able to use matching  to order the periodic-mesh set to match the order of the donor-mesh-set in parallel and avoid this serial bottleneck.  
+
+
+
+CWS and KEJ discussed the following organization of options:
+
+I) Use SCOREC/core matching
+
+II) Order periodic faces without use of SCOREC/core matching information
+
+
+
+I) further breaks into the following steps and branches
+
+To have matching available requires one of the following a)  replication of ilwork to ilworkCGNS structure without accounting for periodic matching so that it will build an ncorp as if matching were not present or b) POSSIBLY if filterMatching flag is set, existing ilwork will create a PHASTA input set that is lacking periodicity and thus correct correct for CGNS and yet the matching information is saved and can be restored for use in CGNS code to determine matching 
+The second aspect is how to do that matching.  Since inputs coming from matchedNodeElement reader ONLY have VERTEX matching.  Here again 3 options are possible:  a) make the matching check for matches of nodes through face connectivity,   b) make inputs to MNER richer to include face matching (likely available during mesh generation),  c) develop code within MNER to elevate matching information to edges and faces (as is done with classification though this is far easier due to classification being to a geometric model that is simple enough to be on all ranks AND model entities are far fewer and this is currently limited to extrusions anyway but so is periodicity so not really a limitation),
+
+
+II) also breaks into at least 2 branches:
+
+distance of centroid collected to rank0 and sorted (current approach) which we will likely use as long as mesh size on periodic plane does not make this intractable. 
+OR breadth first search that starts with a single matched face (seed) and then finds adds neighboring faces to a list from which the next in some order (could be centroidal distance or other) is chosen.  If mesh is matched, this ordering can proceed in parallel for both the donor and the periodic mesh set. When a face is added that touches a part boundary, existing part boundary adjacency information is used to continue the search on another rank. 
diff --git a/pumi-meshes b/pumi-meshes
index 8b920cf7e..b7860281c 160000
--- a/pumi-meshes
+++ b/pumi-meshes
@@ -1 +1 @@
-Subproject commit 8b920cf7e0590befcce7a2af6e2c3f3ec6c89712
+Subproject commit b7860281c513fa44ee2047f7a3ad615b921d67cd

From 2931f6bfb69e7435a8e94c6a3ea48f5730a82101 Mon Sep 17 00:00:00 2001
From: "Kenneth E. Jansen" <Kenneth.Jansen@colorado.edu>
Date: Sun, 27 Aug 2023 18:19:29 +0000
Subject: [PATCH 56/68] Enums neededed to be compatable with non-Spack builds

---
 phasta/phCGNSgbc.cc | 44 ++++++++++++++++++++++----------------------
 1 file changed, 22 insertions(+), 22 deletions(-)

diff --git a/phasta/phCGNSgbc.cc b/phasta/phCGNSgbc.cc
index 1ff65654a..b7d505a39 100644
--- a/phasta/phCGNSgbc.cc
+++ b/phasta/phCGNSgbc.cc
@@ -464,22 +464,22 @@ void topoSwitch(char* Ename, int nvert,int F,int B,int Z,int *E, cgsize_t e_star
   switch(nvert){
     case 4:
       snprintf(Ename, 4, "Tet");
-      if (cgp_section_write(F, B, Z, Ename, CG_TETRA_4, e_startg, e_endg, 0, &Ep))
+      if (cgp_section_write(F, B, Z, Ename, CGNS_ENUMV(TETRA_4), e_startg, e_endg, 0, &Ep))
          cgp_error_exit();
       break;
     case 5:
       snprintf(Ename, 4, "Pyr");
-      if (cgp_section_write(F, B, Z, Ename, CG_PYRA_5, e_startg, e_endg, 0, &Ep))
+      if (cgp_section_write(F, B, Z, Ename, CGNS_ENUMV(PYRA_5), e_startg, e_endg, 0, &Ep))
           cgp_error_exit();
       break;
     case 6:
       snprintf(Ename, 4, "Wdg");
-      if (cgp_section_write(F, B, Z, Ename, CG_PENTA_6, e_startg, e_endg, 0, &Ep))
+      if (cgp_section_write(F, B, Z, Ename, CGNS_ENUMV(PENTA_6), e_startg, e_endg, 0, &Ep))
           cgp_error_exit();
       break;
     case 8:
       snprintf(Ename, 4, "Hex");
-      if (cgp_section_write(F, B, Z, Ename, CG_HEXA_8, e_startg, e_endg, 0, &Ep))
+      if (cgp_section_write(F, B, Z, Ename, CGNS_ENUMV(HEXA_8), e_startg, e_endg, 0, &Ep))
           cgp_error_exit();
       break;
   }
@@ -492,12 +492,12 @@ void topoSwitchB(char* Ename, int nvert,int F,int B,int Z,int *E, cgsize_t e_sta
   switch(nvert){
     case 3:
       snprintf(Ename, 4, "Tri");
-      if (cgp_section_write(F, B, Z, Ename, CG_TRI_3, e_startg, e_endg, 0, &Ep))
+      if (cgp_section_write(F, B, Z, Ename, CGNS_ENUMV(TRI_3), e_startg, e_endg, 0, &Ep))
             cgp_error_exit();
       break;
     case 4:
       snprintf(Ename, 5, "Quad");
-      if (cgp_section_write(F, B, Z, Ename, CG_QUAD_4, e_startg, e_endg, 0, &Ep))
+      if (cgp_section_write(F, B, Z, Ename, CGNS_ENUMV(QUAD_4), e_startg, e_endg, 0, &Ep))
             cgp_error_exit();
       break;
   }
@@ -516,8 +516,8 @@ void writeBlocksCGNSinteror(int F,int B,int Z, Output& o, cgsize_t *e_written)
   const int part = PCU_Comm_Self() ;
   const cgsize_t part_cg=part;
   // create a centered solution 
-  if (cg_sol_write(F, B, Z, "RankOfWriter", CG_CellCenter, &S) ||
-      cgp_field_write(F, B, Z, S, CG_Integer, "RankOfWriter", &Fs))
+  if (cg_sol_write(F, B, Z, "RankOfWriter", CGNS_ENUMV(CellCenter), &S) ||
+      cgp_field_write(F, B, Z, S, CGNS_ENUMV(Integer), "RankOfWriter", &Fs))
       cgp_error_exit();
   int nblki= o.blocks.interior.getSize();
   int nvMap[4] = {4,5,6,8};
@@ -574,7 +574,7 @@ void writeBlocksCGNSinteror(int F,int B,int Z, Output& o, cgsize_t *e_written)
       // create Helper array for number of elements on part of a given topology 
       if ( cg_goto(F, B, "Zone_t", 1, NULL) ||
            cg_gorel(F, "User Data", 0, NULL) ||
-           cgp_array_write(UserDataName, CG_Integer, 1, &num_parts_cg, &Fs2))
+           cgp_array_write(UserDataName, CGNS_ENUMV(Integer), 1, &num_parts_cg, &Fs2))
            cgp_error_exit();
         // create the field data for this process 
       int nIelVec=e_owned;
@@ -705,7 +705,7 @@ if(1==1){      printf("CentroidCounts %d %d %d %d %d %d %d %d\n",part,icnt1, icn
         char UserDataName[12]; snprintf(UserDataName, 13, "n%sOnRank", Ename);
         if ( cg_goto(F, B, "Zone_t", 1, NULL) ||
              cg_gorel(F, "User Data", 0, NULL) ||
-             cgp_array_write(UserDataName, CG_Integer, 1, &num_parts_cg, &Fsb2))
+             cgp_array_write(UserDataName, CGNS_ENUMV(Integer), 1, &num_parts_cg, &Fsb2))
              cgp_error_exit();
         printf("Bndy %s, %ld, %d, %d \n", UserDataName, e_owned, part,Fsb2);
         cgsize_t partP1=part+1;
@@ -722,7 +722,7 @@ void writeCGNS_UserData_srfID(int F,int B, int* srfID,  int* startBelBlk, int *e
     // setup User Data for boundary faces 
     if ( cg_goto(F, B, "Zone_t", 1, NULL) ||
          cg_gorel(F, "User Data", 0, NULL) ||
-         cgp_array_write("srfID", CG_Integer, 1,totBel, &Fsb)) 
+         cgp_array_write("srfID", CGNS_ENUMV(Integer), 1,totBel, &Fsb)) 
          cgp_error_exit();
     // write the user data for this process 
     int nvMap[2] = {3,4};
@@ -1038,24 +1038,24 @@ void CGNS_NodalSolution(int F,int B,int Z, Output& o)
     }
   }
 //     write the solution field data in parallel 
-  if (cg_sol_write(F, B, Z, "Solution", CG_Vertex, &S) ||
-      cgp_field_write(F, B, Z, S, CG_RealDouble, "Pressure", &Q))
+  if (cg_sol_write(F, B, Z, "Solution", CGNS_ENUMV(Vertex), &S) ||
+      cgp_field_write(F, B, Z, S, CGNS_ENUMV(RealDouble), "Pressure", &Q))
       cgp_error_exit();
   if (cgp_field_write_data(F, B, Z, S, Q, &start, &end, p))
       cgp_error_exit();
-  if ( cgp_field_write(F, B, Z, S, CG_RealDouble, "VelocityX", &Q))
+  if ( cgp_field_write(F, B, Z, S, CGNS_ENUMV(RealDouble), "VelocityX", &Q))
       cgp_error_exit();
   if (cgp_field_write_data(F, B, Z, S, Q, &start, &end, u))
       cgp_error_exit();
-  if ( cgp_field_write(F, B, Z, S, CG_RealDouble, "VelocityY", &Q))
+  if ( cgp_field_write(F, B, Z, S, CGNS_ENUMV(RealDouble), "VelocityY", &Q))
       cgp_error_exit();
   if (cgp_field_write_data(F, B, Z, S, Q, &start, &end, v))
       cgp_error_exit();
-  if ( cgp_field_write(F, B, Z, S, CG_RealDouble, "VelocityZ", &Q))
+  if ( cgp_field_write(F, B, Z, S, CGNS_ENUMV(RealDouble), "VelocityZ", &Q))
       cgp_error_exit();
   if (cgp_field_write_data(F, B, Z, S, Q, &start, &end, w))
       cgp_error_exit();
-  if ( cgp_field_write(F, B, Z, S, CG_RealDouble, "Temperature", &Q))
+  if ( cgp_field_write(F, B, Z, S, CGNS_ENUMV(RealDouble), "Temperature", &Q))
       cgp_error_exit();
   if (cgp_field_write_data(F, B, Z, S, Q, &start, &end, T))
       cgp_error_exit();
@@ -1064,9 +1064,9 @@ void CGNS_NodalSolution(int F,int B,int Z, Output& o)
 void CGNS_Coordinates(int F,int B,int Z,Output& o)
 {
    int Cx,Cy,Cz;
-  if (cgp_coord_write(F, B, Z, CG_RealDouble, "CoordinateX", &Cx) ||
-      cgp_coord_write(F, B, Z, CG_RealDouble, "CoordinateY", &Cy) ||
-      cgp_coord_write(F, B, Z, CG_RealDouble, "CoordinateZ", &Cz))
+  if (cgp_coord_write(F, B, Z, CGNS_ENUMV(RealDouble), "CoordinateX", &Cx) ||
+      cgp_coord_write(F, B, Z, CGNS_ENUMV(RealDouble), "CoordinateY", &Cy) ||
+      cgp_coord_write(F, B, Z, CGNS_ENUMV(RealDouble), "CoordinateZ", &Cz))
       cgp_error_exit();
 
 // condense out vertices owned by another rank in a new array, x, whose slices are ready for CGNS.
@@ -1153,7 +1153,7 @@ if(1==0){
   if(cgp_mpi_comm(MPI_COMM_WORLD)) cgp_error_exit;
   if ( cgp_open(outfile, CG_MODE_WRITE, &F) ||
        cg_base_write(F, "Base", 3, 3, &B) ||
-       cg_zone_write(F, B, "Zone", sizes, CG_Unstructured, &Z))
+       cg_zone_write(F, B, "Zone", sizes, CGNS_ENUMV(Unstructured), &Z))
        cgp_error_exit();
     // create data nodes for coordinates 
   cg_set_file_type(CG_FILE_HDF5);
@@ -1163,7 +1163,7 @@ if(1==0){
   if ( cg_goto(F, B, "Zone_t", 1, NULL) ||
        cg_user_data_write("User Data") ||
        cg_gorel(F, "User Data", 0, NULL) ||
-       cgp_array_write("nCoordsOnRank", CG_Integer, 1, &num_parts_cg, &Fs2))
+       cgp_array_write("nCoordsOnRank", CGNS_ENUMV(Integer), 1, &num_parts_cg, &Fs2))
        cgp_error_exit();
   // create the field data for this process 
   int nCoordVec=o.iownnodes;

From 0d96a635bf16d123c9c129ab8244130c19bf78cc Mon Sep 17 00:00:00 2001
From: "Kenneth E. Jansen" <Kenneth.Jansen@colorado.edu>
Date: Sun, 27 Aug 2023 12:21:04 -0600
Subject: [PATCH 57/68] srfID for BEL only and thus index must be offset by
 eVolElm

---
 phasta/phCGNSgbc.cc | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/phasta/phCGNSgbc.cc b/phasta/phCGNSgbc.cc
index 1ff65654a..2f5c78f7a 100644
--- a/phasta/phCGNSgbc.cc
+++ b/phasta/phCGNSgbc.cc
@@ -595,7 +595,6 @@ if(1==1){
     } // end if ANY rank has this topology
   } // end of loop over ALL topologies
   PCU_Barrier();
-  printf("rank=%d reached end of BlockInterior\n",part);
 }
 void writeBlocksCGNSboundary(int F,int B,int Z, Output& o, int* srfID, int* srfIDidx, double** srfIDCen1, double** srfIDCen2, int* srfID1OnBlk, int* srfID2OnBlk, int* startBelBlk, int* endBelBlk, cgsize_t *e_written, cgsize_t *totBel, int *nStackedOnRank, int nblkb)
 {
@@ -697,8 +696,8 @@ if(1==0){
           free(eCenx); free(eCeny); free(eCenz);
 if(1==1){      printf("CentroidCounts %d %d %d %d %d %d %d %d\n",part,icnt1, icnt2, j1, j2, e_owned, srfID1OnBlk[i],srfID2OnBlk[i]);}
           for (int j = 0; j < (int) e_owned; ++j) srfIDidx[e_belWritten+j]=e_start+j;
-          startBelBlk[idx]=e_start; // provides start point for each block in srfID
-          endBelBlk[idx]=e_end; // provides end point for each block in srfID
+          startBelBlk[idx]=e_start-eVolElm; // provides start point for each block in srfID 
+          endBelBlk[idx]=e_end-eVolElm; // provides end point for each block in srfID
         }
         *e_written=e_endg;
         e_belWritten+=e_owned; // this is tracking written by this rank as we unpack srfID later
@@ -945,7 +944,6 @@ if(1==0){  printf("%d part srfID 2 zc ",part); for(int ip=0; ip< std::min(nDbgI,
     int* imapD2 = (int *)malloc( nmatchFace * sizeof(int));
     if(part==0) sortID1andID2(srfID1GCen,srfID2GCen,nmatchFace, imapD1, imapD2);
     PCU_Barrier();
-    printf("Barrier %d %d",part,nmatchFace);
     MPI_Bcast(imapD1,nmatchFace,type_i,0, MPI_COMM_WORLD);
     MPI_Bcast(imapD2,nmatchFace,type_i,0, MPI_COMM_WORLD);
     auto type_d = getMpiType( double() );

From 93d90329d9d7cbcba173e705076908837598c6e7 Mon Sep 17 00:00:00 2001
From: "Kenneth E. Jansen" <Kenneth.Jansen@colorado.edu>
Date: Sun, 27 Aug 2023 14:06:02 -0600
Subject: [PATCH 58/68] all debugging out encased in 0==1 conditional

---
 phasta/phCGNSgbc.cc | 30 +++++++++++++++---------------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/phasta/phCGNSgbc.cc b/phasta/phCGNSgbc.cc
index 2f2581ec2..b61b5531a 100644
--- a/phasta/phCGNSgbc.cc
+++ b/phasta/phCGNSgbc.cc
@@ -483,7 +483,7 @@ void topoSwitch(char* Ename, int nvert,int F,int B,int Z,int *E, cgsize_t e_star
           cgp_error_exit();
       break;
   }
-  printf("%d %d %d %s %ld %ld %d\n",F,B,Z,Ename,e_startg,e_endg,Ep);
+if(0==1)  printf("%d %d %d %s %ld %ld %d\n",F,B,Z,Ename,e_startg,e_endg,Ep);
   *E=Ep;
 }
 void topoSwitchB(char* Ename, int nvert,int F,int B,int Z,int *E, cgsize_t e_startg,cgsize_t e_endg)
@@ -501,7 +501,7 @@ void topoSwitchB(char* Ename, int nvert,int F,int B,int Z,int *E, cgsize_t e_sta
             cgp_error_exit();
       break;
   }
-  printf("%d %d %d %s %ld %ld %d\n",F,B,Z,Ename,e_startg,e_endg,Ep);
+if(0==1)  printf("%d %d %d %s %ld %ld %d\n",F,B,Z,Ename,e_startg,e_endg,Ep);
   *E=Ep;
 }
 
@@ -579,11 +579,11 @@ void writeBlocksCGNSinteror(int F,int B,int Z, Output& o, cgsize_t *e_written)
         // create the field data for this process 
       int nIelVec=e_owned;
       cgsize_t  partP1=part+1;
-      printf("Intr, %s,  %d, %d, %d, %d \n", UserDataName, nIelVec,part,Fs,Fs2);
+if(0==1)      printf("Intr, %s,  %d, %d, %d, %d \n", UserDataName, nIelVec,part,Fs,Fs2);
       if ( cgp_array_write_data(Fs2, &partP1, &partP1, &nIelVec))
            cgp_error_exit();
 
-if(1==1){
+if(0==1){
     printf("interior cnn %s %d %ld %ld \n", Ename,part, e_start, e_end);
 //    for (int ne=0; ne<std::min(nDbgCG,e_owned); ++ne) {
 //      printf("%d, %d ", part,(ne+1));
@@ -663,7 +663,7 @@ void writeBlocksCGNSboundary(int F,int B,int Z, Output& o, int* srfID, int* srfI
         // write the element connectivity in parallel 
         if (cgp_elements_write_data(F, B, Z, E, e_start, e_end, e))
             cgp_error_exit();
-        printf("boundary cnn %d, %ld, %ld \n", part, e_start, e_end);
+if(0==1)        printf("boundary cnn %d, %ld, %ld \n", part, e_start, e_end);
 if(1==0){
     for (int ne=0; ne<std::min(nDbgCG,e_owned); ++ne) { printf("%d, %d ", part,(ne+1)); for(int nv=0; nv< nvert; ++nv) printf("%ld ", e[ne*nvert+nv]); printf("\n"); }
 }
@@ -694,7 +694,7 @@ if(1==0){
              }
           } 
           free(eCenx); free(eCeny); free(eCenz);
-if(1==1){      printf("CentroidCounts %d %d %d %d %d %d %d %d\n",part,icnt1, icnt2, j1, j2, e_owned, srfID1OnBlk[i],srfID2OnBlk[i]);}
+if(0==1){      printf("CentroidCounts %d %d %d %d %d %d %d %d\n",part,icnt1, icnt2, j1, j2, e_owned, srfID1OnBlk[i],srfID2OnBlk[i]);}
           for (int j = 0; j < (int) e_owned; ++j) srfIDidx[e_belWritten+j]=e_start+j;
           startBelBlk[idx]=e_start-eVolElm; // provides start point for each block in srfID 
           endBelBlk[idx]=e_end-eVolElm; // provides end point for each block in srfID
@@ -706,7 +706,7 @@ if(1==1){      printf("CentroidCounts %d %d %d %d %d %d %d %d\n",part,icnt1, icn
              cg_gorel(F, "User Data", 0, NULL) ||
              cgp_array_write(UserDataName, CGNS_ENUMV(Integer), 1, &num_parts_cg, &Fsb2))
              cgp_error_exit();
-        printf("Bndy %s, %ld, %d, %d \n", UserDataName, e_owned, part,Fsb2);
+if(0==1)        printf("Bndy %s, %ld, %d, %d \n", UserDataName, e_owned, part,Fsb2);
         cgsize_t partP1=part+1;
         if (cgp_array_write_data(Fsb2, &partP1, &partP1, &e_owned))
             cgp_error_exit();
@@ -739,7 +739,7 @@ void writeCGNS_UserData_srfID(int F,int B, int* srfID,  int* startBelBlk, int *e
       MPI_Exscan(&e_owned, &e_start, 1, type , MPI_SUM, MPI_COMM_WORLD);
       e_start+=1+*e_written; // my parts global element start 1-based
       e_end=e_start+e_owned-1;  // my parts global element stop 1-based
-      printf("BndyUserData %s, %ld, %ld, %ld,  %d, %d %d \n", "srfID", e_start, e_end, e_owned, i, e_startB,*totBel);
+if(0==1)      printf("BndyUserData %s, %ld, %ld, %ld,  %d, %d %d \n", "srfID", e_start, e_end, e_owned, i, e_startB,*totBel);
       if(i<nStackedOnRank){ 
         if (cgp_array_write_data(Fsb, &e_start, &e_end, &srfID[e_startB]))
         cgp_error_exit();
@@ -820,7 +820,7 @@ void sortID1andID2(double* srfID1GCen,double* srfID2GCen, int nmatchFace, int* i
         } 
     } 
     for (int i = 0; i < nmatchFace; ++i) imapD2[i]=imapD2v[i];
-    if(1==1&&part==0) {
+    if(0==1&&part==0) {
       printf("Number of Distance Failures=%d\n ",DistFails);
       printf(" srfID1dist GLOBAL "); for(int is=0; is< std::min(nDbgI,nmatchFace); ++is)  printf("%f ", srfID1distSq[is]); printf("\n");
       printf(" imapD1 GLOBAL     "); for(int is=0; is< std::min(nDbgI,nmatchFace); ++is)  printf("%d ", imapD1[is]); printf("\n"); 
@@ -970,12 +970,12 @@ if(1==0){  printf("%d part srfID 2 zc ",part); for(int ip=0; ip< std::min(nDbgI,
       if(BCid==1) {
         for (int i = 0; i < nmatchFace; i++) periodic1[i]=eBC[imapD1[i]];
         for (int i = 0; i < nmatchFace; i++) eBC[i]=periodic1[i];
-if(1==1&&part==1){ printf(" srfIDidx 1 "); for(int is=0; is< std::min(nDbgI,nmatchFace); ++is)  printf("%d ", eBC[is]); printf("\n"); }
+if(0==1&&part==1){ printf(" srfIDidx 1 "); for(int is=0; is< std::min(nDbgI,nmatchFace); ++is)  printf("%d ", eBC[is]); printf("\n"); }
       }       
       if(BCid==2) {
         for (int i = 0; i < nmatchFace; i++) donor2[i]=eBC[imapD2[i]];
         for (int i = 0; i < nmatchFace; i++) eBC[i]=donor2[i];
-if(1==1&&part==1){ printf(" srfIDidx 2 "); for(int is=0; is< std::min(nDbgI,nmatchFace); ++is)  printf("%d ", eBC[is]); printf("\n"); }
+if(0==1&&part==1){ printf(" srfIDidx 2 "); for(int is=0; is< std::min(nDbgI,nmatchFace); ++is)  printf("%d ", eBC[is]); printf("\n"); }
       }       
 if(0==1) {
       printf(" srfID =%d    ",BCid); for(int is=0; is< std::min(nDbgI,imatch); ++is)  printf("%d ", eBC[is]); printf("\n");
@@ -1007,7 +1007,7 @@ void CGNS_NodalSolution(int F,int B,int Z, Output& o)
   // create a nodal solution 
   char fieldName[12];
   snprintf(fieldName, 13, "solution");
-  printf("solution=%s",fieldName);
+if(0==1)  printf("solution=%s",fieldName);
   double* data;
   int size, S,Q;
   detachField(o.mesh, fieldName, data, size);
@@ -1105,7 +1105,7 @@ void writeCGNS(Output& o, std::string path)
   int  F, B, Z, E, S, Fs, Fs2, A, Cx, Cy, Cz;
   cgsize_t sizes[3],*e, start, end;
   int num_nodes=m->count(0);
-if(1==0){  // ilwork debugging
+if(0==1){  // ilwork debugging
     for (int ipart=0; ipart<num_parts; ++ipart){
         if(part==ipart) { // my turn
            printf("ilwork %d, %d, %d \n", part, o.nlwork,o.arrays.ilwork[0]);
@@ -1123,7 +1123,7 @@ if(1==0){  // ilwork debugging
        PCU_Barrier();
      }
 }
-if(1==0){
+if(0==1){
   for (int ipart=0; ipart<num_parts; ++ipart){
     if(part==part) { // my turn    
     printf("xyz %d, %d \n", part, num_nodes);
@@ -1166,7 +1166,7 @@ if(1==0){
   // create the field data for this process 
   int nCoordVec=o.iownnodes;
   cgsize_t partP1=part+1;
-  printf("Coor %d, %d, %d, \n", nCoordVec,part,Fs2);
+if(0==1)  printf("Coor %d, %d, %d, \n", nCoordVec,part,Fs2);
   if ( cgp_array_write_data(Fs2, &partP1, &partP1, &nCoordVec))
        cgp_error_exit();
   cgsize_t e_written=0; 

From 37d285358d14e32b41a5fdba3c40c3fc77e9a2f9 Mon Sep 17 00:00:00 2001
From: "Kenneth E. Jansen" <Kenneth.Jansen@colorado.edu>
Date: Sun, 27 Aug 2023 16:08:30 -0600
Subject: [PATCH 59/68] valgrind found some slopiness

---
 phasta/phCGNSgbc.cc | 68 ++++++++++++++++++++++++++-------------------
 1 file changed, 40 insertions(+), 28 deletions(-)

diff --git a/phasta/phCGNSgbc.cc b/phasta/phCGNSgbc.cc
index b61b5531a..0e35aa4a6 100644
--- a/phasta/phCGNSgbc.cc
+++ b/phasta/phCGNSgbc.cc
@@ -77,7 +77,7 @@ void pairsortDI(double a[], int b[], int n)
         a[i] = pairt[i].first;
         b[i] = pairt[i].second;
     }
-    delete pairt;
+    delete [] pairt;
 }
 
 // Function to sort integer array b[]
@@ -103,7 +103,7 @@ void pairsort(int a[], int b[], int n)
         a[i] = pairt[i].first;
         b[i] = pairt[i].second;
     }
-    delete pairt;
+    delete [] pairt;
 }
 void pairDeal6sort(int a[], int b[], int n)
 {
@@ -611,10 +611,6 @@ void writeBlocksCGNSboundary(int F,int B,int Z, Output& o, int* srfID, int* srfI
     int iblkC[2];
     int estart[2];
     int nvC,nvert,nvAll,invC;
-    for (int j = 0; j < nblkb; ++j) { // check all blocks
-      BlockKey& k = o.blocks.boundary.keys[j];
-      nvert = o.blocks.boundary.keys[j].nBoundaryFaceEdges;
-    }
     for (int i = 0; i < 2; ++i) { // check all topologies
       nvAll=0;
       nvC=nvMap[i];
@@ -667,7 +663,8 @@ if(0==1)        printf("boundary cnn %d, %ld, %ld \n", part, e_start, e_end);
 if(1==0){
     for (int ne=0; ne<std::min(nDbgCG,e_owned); ++ne) { printf("%d, %d ", part,(ne+1)); for(int nv=0; nv< nvert; ++nv) printf("%ld ", e[ne*nvert+nv]); printf("\n"); }
 }
-        int idx=(*nStackedOnRank) - 1;
+        int idx;
+        idx =((*nStackedOnRank) - 1);
         if(invC!=0) {
           free(e);
 //moved above          getNaturalBCCodesCGNS(o, iblkC[, &srfID[e_belWritten]);
@@ -714,7 +711,7 @@ if(0==1)        printf("Bndy %s, %ld, %d, %d \n", UserDataName, e_owned, part,Fs
     } // loop over both topos
     *totBel = *e_written-eVolElm;
 }
-void writeCGNS_UserData_srfID(int F,int B, int* srfID,  int* startBelBlk, int *endBelBlk, cgsize_t *e_written, cgsize_t *totBel, cgsize_t *eVolElm, int nStackedOnRank)
+void writeCGNS_UserData_srfID(int F,int B, Output& o, int* srfID,  int* startBelBlk, int *endBelBlk, cgsize_t *e_written, cgsize_t *totBel, cgsize_t *eVolElm, int nStackedOnRank)
 {
     cgsize_t e_owned, e_start,e_end;
     int Fsb;
@@ -724,31 +721,46 @@ void writeCGNS_UserData_srfID(int F,int B, int* srfID,  int* startBelBlk, int *e
          cgp_array_write("srfID", CGNS_ENUMV(Integer), 1,totBel, &Fsb)) 
          cgp_error_exit();
     // write the user data for this process 
+    int nblkb = o.blocks.boundary.getSize(); 
     int nvMap[2] = {3,4};
     int iblkC[2];
     int estart[2];
     int nvC,nvert,nvAll,invC;
     for (int i = 0; i < 2; ++i) { // at most, 2 blocks (assumed as we have (untested) collapsed/stacked blocks withe same number of verts)
-      e_owned=0;
-      if(i<nStackedOnRank) 
-        e_owned=endBelBlk[i]-startBelBlk[i]+1;
-     int e_startB=startBelBlk[i]; // srfID is only for bel....matches linear order with eVolElm offset from 
-                                       // bel# that starts from last volume element
-      e_start=0;
-      auto type = getMpiType( cgsize_t() );
-      MPI_Exscan(&e_owned, &e_start, 1, type , MPI_SUM, MPI_COMM_WORLD);
-      e_start+=1+*e_written; // my parts global element start 1-based
-      e_end=e_start+e_owned-1;  // my parts global element stop 1-based
+      nvAll=0;
+      nvC=nvMap[i];
+      invC=0;
+      for (int j = 0; j < nblkb; ++j) { // check all blocks
+        BlockKey& k = o.blocks.boundary.keys[j];
+        nvert = o.blocks.boundary.keys[j].nBoundaryFaceEdges;
+        if(nvert==nvC) {
+           invC=1;
+        } 
+      }
+      nvAll= PCU_Add_Int(invC); // add across all
+      if(nvAll!=0) { //nvC present on at least 1 rank
+        e_owned=0;
+        int e_startB=0;
+        if(i<nStackedOnRank) {
+          e_owned=endBelBlk[i]-startBelBlk[i]+1;
+          e_startB=startBelBlk[i]; // srfID is only for bel....matches linear order with eVolElm offset from 
+        }                                 // bel# that starts from last volume element
+        e_start=0;
+        auto type = getMpiType( cgsize_t() );
+        MPI_Exscan(&e_owned, &e_start, 1, type , MPI_SUM, MPI_COMM_WORLD);
+        e_start+=1+*e_written; // my parts global element start 1-based
+        e_end=e_start+e_owned-1;  // my parts global element stop 1-based
 if(0==1)      printf("BndyUserData %s, %ld, %ld, %ld,  %d, %d %d \n", "srfID", e_start, e_end, e_owned, i, e_startB,*totBel);
-      if(i<nStackedOnRank){ 
-        if (cgp_array_write_data(Fsb, &e_start, &e_end, &srfID[e_startB]))
-        cgp_error_exit();
-      } else { 
-        if (cgp_array_write_data(Fsb, &e_start, &e_end, NULL))
-        cgp_error_exit();
+        if(i<nStackedOnRank){ 
+          if (cgp_array_write_data(Fsb, &e_start, &e_end, &srfID[e_startB]))
+          cgp_error_exit();
+        } else { 
+          if (cgp_array_write_data(Fsb, &e_start, &e_end, NULL))
+          cgp_error_exit();
+        }
+        long safeArg=e_owned; // is cgsize_t which could be an 32 or 64 bit int
+        *e_written += PCU_Add_Long(safeArg); // number of elements of this topology
       }
-      long safeArg=e_owned; // is cgsize_t which could be an 32 or 64 bit int
-      *e_written += PCU_Add_Long(safeArg); // number of elements of this topology
     }
 
 }
@@ -918,7 +930,7 @@ void writeCGNSboundary(int F,int B,int Z, Output& o, int* srfID, int* srfIDidx,
     int Fsb;
     cgsize_t  eVolElm = *e_written-*totBel;
     *e_written=0; //recycling  eVolElm holds 
-    writeCGNS_UserData_srfID(F,B, srfID,  startBelBlk, endBelBlk, e_written, totBel, &eVolElm, nStackedOnRank);
+    writeCGNS_UserData_srfID(F,B, o, srfID,  startBelBlk, endBelBlk, e_written, totBel, &eVolElm, nStackedOnRank );
     double* srfID1GCen; 
     double* srfID2GCen; 
     int nmatchFace1,nmatchFace;
@@ -1185,7 +1197,7 @@ if(0==1)  printf("Coor %d, %d, %d, \n", nCoordVec,part,Fs2);
   int* startBelBlk = (int *)malloc( nblkb * sizeof(int));
   int* endBelBlk = (int *)malloc( nblkb * sizeof(int));
   int* srfIDidx = (int *)malloc( totOnRankBel * sizeof(int));
-  int nStackedOnRank;
+  int nStackedOnRank=0;
   writeBlocksCGNSboundary(F,B,Z,o, srfID, srfIDidx, srfIDCen1, srfIDCen2, srfID1OnBlk, srfID2OnBlk, startBelBlk, endBelBlk, &e_written, &totBel, &nStackedOnRank, nblkb);
   writeCGNSboundary      (F,B,Z,o, srfID, srfIDidx, srfIDCen1, srfIDCen2, srfID1OnBlk, srfID2OnBlk, startBelBlk, endBelBlk, &e_written, totOnRankBel, &totBel,  nStackedOnRank);
   free(srfID); free(srfIDidx);

From e530971e2a691bb162e9ed2bf3488c07077e939b Mon Sep 17 00:00:00 2001
From: "Kenneth E. Jansen" <Kenneth.Jansen@colorado.edu>
Date: Mon, 28 Aug 2023 15:09:30 -0600
Subject: [PATCH 60/68]  added 13 tests

---
 test/testing.cmake | 84 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 84 insertions(+)

diff --git a/test/testing.cmake b/test/testing.cmake
index ed5c92888..ea870d0de 100644
--- a/test/testing.cmake
+++ b/test/testing.cmake
@@ -69,6 +69,90 @@ else()
   set(GXT dmg)
 endif()
 
+if(ENABLE_CGNS AND SIM_DOT_VERSION VERSION_GREATER 12.0.171000)
+  set(MDIR ${MESHES}/phasta/cube_CGNS)
+  mpi_test(chef-CGNS-multitopology1 1 ${CMAKE_CURRENT_BINARY_DIR}/chef
+    WORKING_DIRECTORY ${MDIR}/multiTopology/mner/Chef/1-1-Chef)
+  add_test(NAME chef-CGNS-multitopology1-diff
+    COMMAND cgnsdiff  chefOut.cgns correct.cgns
+    WORKING_DIRECTORY ${MDIR}/multiTopology/mner/Chef/1-1-Chef)
+
+  mpi_test(chef-CGNS-multitopology2 2 ${CMAKE_CURRENT_BINARY_DIR}/chef
+    WORKING_DIRECTORY ${MDIR}/multiTopology/mner/Chef/2-1-Chef)
+  add_test(NAME chef-CGNS-multitopology2-diff
+    COMMAND cgnsdiff  chefOut.cgns correct.cgns
+    WORKING_DIRECTORY ${MDIR}/multiTopology/mner/Chef/2-1-Chef)
+
+  mpi_test(chef-CGNS-multitopology4 4 ${CMAKE_CURRENT_BINARY_DIR}/chef
+    WORKING_DIRECTORY ${MDIR}/multiTopology/mner/Chef/4-1-Chef)
+  add_test(NAME chef-CGNS-multitopology4-diff
+    COMMAND cgnsdiff  chefOut.cgns correct.cgns
+    WORKING_DIRECTORY ${MDIR}/multiTopology/mner/Chef/4-1-Chef)
+endif()
+
+if(ENABLE_SIMMETRIX AND SIM_PARASOLID AND SIMMODSUITE_SimAdvMeshing_FOUND AND ENABLE_CGNS AND SIM_DOT_VERSION VERSION_GREATER 12.0.171000)
+  set(MDIR ${MESHES}/phasta/cube_CGNS)
+  mpi_test(chef-CGNS-8hex1 1 ${CMAKE_CURRENT_BINARY_DIR}/chef
+    WORKING_DIRECTORY ${MDIR}/sms2mds8Hex/Chef/1-1-Chef)
+  add_test(NAME chef-CGNS-8hex1-diff
+    COMMAND cgnsdiff  chefOut.cgns correct.cgns
+    WORKING_DIRECTORY ${MDIR}/sms2mds8Hex/Chef/1-1-Chef)
+
+  mpi_test(chef-CGNS-8hex2 2 ${CMAKE_CURRENT_BINARY_DIR}/chef
+    WORKING_DIRECTORY ${MDIR}/sms2mds8Hex/Chef/2-1-Chef)
+  add_test(NAME chef-CGNS-8hex2-diff
+    COMMAND cgnsdiff  chefOut.cgns correct.cgns
+    WORKING_DIRECTORY ${MDIR}/sms2mds8Hex/Chef/2-1-Chef)
+
+  mpi_test(chef-CGNS-smallTet1 1 ${CMAKE_CURRENT_BINARY_DIR}/chef
+    WORKING_DIRECTORY ${MDIR}/sms2mds-SmallestTet/Chef/1-1-Chef)
+  add_test(NAME chef-CGNS-smallTet1-diff
+    COMMAND cgnsdiff  chefOut.cgns correct.cgns
+    WORKING_DIRECTORY ${MDIR}/sms2mds-SmallestTet/Chef/1-1-Chef)
+
+  mpi_test(chef-CGNS-smallTet2 2 ${CMAKE_CURRENT_BINARY_DIR}/chef
+    WORKING_DIRECTORY ${MDIR}/sms2mds-SmallestTet/Chef/2-1-Chef)
+  add_test(NAME chef-CGNS-smallTet2-diff
+    COMMAND cgnsdiff  chefOut.cgns correct.cgns
+    WORKING_DIRECTORY ${MDIR}/sms2mds-SmallestTet/Chef/2-1-Chef)
+
+  mpi_test(chef-CGNS-AllHex1 1 ${CMAKE_CURRENT_BINARY_DIR}/chef
+    WORKING_DIRECTORY ${MDIR}/sms2mdsAllHex/Chef/1-1-Chef)
+  add_test(NAME chef-CGNS-AllHex1-diff
+    COMMAND cgnsdiff  chefOut.cgns correct.cgns
+    WORKING_DIRECTORY ${MDIR}/sms2mdsAllHex/Chef/1-1-Chef)
+
+  mpi_test(chef-CGNS-AllHex2 2 ${CMAKE_CURRENT_BINARY_DIR}/chef
+    WORKING_DIRECTORY ${MDIR}/sms2mdsAllHex/Chef/2-1-Chef)
+  add_test(NAME chef-CGNS-AllHex2-diff
+    COMMAND cgnsdiff  chefOut.cgns correct.cgns
+    WORKING_DIRECTORY ${MDIR}/sms2mdsAllHex/Chef/2-1-Chef)
+
+  mpi_test(chef-CGNS-AllTet 1 ${CMAKE_CURRENT_BINARY_DIR}/chef
+    WORKING_DIRECTORY ${MDIR}/sms2mdsAllTet/Chef/1-1-Chef)
+  add_test(NAME chef-CGNS-AllTet-diff
+    COMMAND cgnsdiff  chefOut.cgns correct.cgns
+    WORKING_DIRECTORY ${MDIR}/sms2mdsAllTet/Chef/1-1-Chef)
+
+  mpi_test(chef-CGNS-AllTet2 2 ${CMAKE_CURRENT_BINARY_DIR}/chef
+    WORKING_DIRECTORY ${MDIR}/sms2mdsAllTet/Chef/2-1-Chef)
+  add_test(NAME chef-CGNS-AllTet2-diff
+    COMMAND cgnsdiff  chefOut.cgns correct.cgns
+    WORKING_DIRECTORY ${MDIR}/sms2mdsAllTet/Chef/2-1-Chef)
+
+  mpi_test(chef-CGNS-AllWedge1 1 ${CMAKE_CURRENT_BINARY_DIR}/chef
+    WORKING_DIRECTORY ${MDIR}/sms2mdsAllWedge/Chef/1-1-Chef)
+  add_test(NAME chef-CGNS-AllWedge1-diff
+    COMMAND cgnsdiff  chefOut.cgns correct.cgns
+    WORKING_DIRECTORY ${MDIR}/sms2mdsAllWedge/Chef/1-1-Chef)
+
+  mpi_test(chef-CGNS-AllWedge2 2 ${CMAKE_CURRENT_BINARY_DIR}/chef
+    WORKING_DIRECTORY ${MDIR}/sms2mdsAllWedge/Chef/2-1-Chef)
+  add_test(NAME chef-CGNS-AllWedge2-diff
+    COMMAND cgnsdiff  chefOut.cgns correct.cgns
+    WORKING_DIRECTORY ${MDIR}/sms2mdsAllWedge/Chef/2-1-Chef)
+endif()
+
 set(MDIR ${MESHES}/phasta/dg)
 
 if(ENABLE_SIMMETRIX AND SIM_PARASOLID AND SIMMODSUITE_SimAdvMeshing_FOUND)

From 71bf7b4ba638f3f3535345f30a2ed576734ed3da Mon Sep 17 00:00:00 2001
From: "Kenneth E. Jansen" <Kenneth.Jansen@colorado.edu>
Date: Mon, 28 Aug 2023 16:34:26 -0600
Subject: [PATCH 61/68] removed some dead code

---
 phasta/phCGNSgbc.cc | 64 +++++++++++++--------------------------------
 pumi-meshes         |  2 +-
 2 files changed, 19 insertions(+), 47 deletions(-)

diff --git a/phasta/phCGNSgbc.cc b/phasta/phCGNSgbc.cc
index 0e35aa4a6..424cfab34 100644
--- a/phasta/phCGNSgbc.cc
+++ b/phasta/phCGNSgbc.cc
@@ -23,8 +23,8 @@
 #endif
 typedef int lcorp_t;
 #define NCORP_MPI_T MPI_INTEGER
-extern cgsize_t nDbgCG=50;
-extern int nDbgI=50;
+static cgsize_t nDbgCG=50;
+static int nDbgI=50;
 
 namespace  {
 
@@ -288,7 +288,7 @@ void gen_ncorp(Output& o )
 
   if(num_parts > 1) 
     commuInt(o, o.arrays.ncorp);
-if(1==0) {
+if(0==1) {
   for (int ipart=0; ipart<num_parts; ++ipart){
     if(part==ipart) { // my turn
       for (int inod=0; inod<num_nodes; ++inod) printf("%ld ", o.arrays.ncorp[inod]);
@@ -560,7 +560,6 @@ void writeBlocksCGNSinteror(int F,int B,int Z, Output& o, cgsize_t *e_written)
       //     create the field data for this process 
       int* d = NULL;
       if(invC!=0){  //nvC present on my rank
-//KEN LEARN        int* d = (int *)malloc(e_owned * sizeof(int));
         d = (int *)malloc(e_owned * sizeof(int));
         for (int n = 0; n < e_owned; n++) 
           d[n] = part;
@@ -660,7 +659,7 @@ void writeBlocksCGNSboundary(int F,int B,int Z, Output& o, int* srfID, int* srfI
         if (cgp_elements_write_data(F, B, Z, E, e_start, e_end, e))
             cgp_error_exit();
 if(0==1)        printf("boundary cnn %d, %ld, %ld \n", part, e_start, e_end);
-if(1==0){
+if(0==1){
     for (int ne=0; ne<std::min(nDbgCG,e_owned); ++ne) { printf("%d, %d ", part,(ne+1)); for(int nv=0; nv< nvert; ++nv) printf("%ld ", e[ne*nvert+nv]); printf("\n"); }
 }
         int idx;
@@ -781,22 +780,22 @@ void sortID1andID2(double* srfID1GCen,double* srfID2GCen, int nmatchFace, int* i
       imapD1[i]=i;
       imapD2[i]=i;
     }
-    if(1==0){ if(part==0) {
+    if(0==1){ if(part==0) {
       printf(" srfID1dist GLOBAL B "); for(int is=0; is< std::min(nDbgI,nmatchFace); ++is)  printf("%f ", srfID1distSq[is]); printf("\n");
       printf(" imapD1 GLOBAL B     "); for(int is=0; is< std::min(nDbgI,nmatchFace); ++is)  printf("%d ", imapD1[is]); printf("\n"); }
     }
-    if(1==0){ if(part==0) {
+    if(0==1){ if(part==0) {
       printf(" srfID2dist GLOBAL B "); for(int is=0; is< std::min(nDbgI,nmatchFace); ++is)  printf("%f ", srfID2distSq[is]); printf("\n");
       printf(" imapD2 GLOBAL B     "); for(int is=0; is< std::min(nDbgI,nmatchFace); ++is)  printf("%d ", imapD2[is]); printf("\n"); }
     }
     pairsortDI(srfID1distSq,imapD1,nmatchFace); // imapD1 puts elements with srfID=1 in order of increasing distance from pt 0.1, 0 0 
     pairsortDI(srfID2distSq,imapD2,nmatchFace); // imapD1 puts elements with srfID=2 in order of increasing distance from pt 0.1, 0 0 
 
-    if(1==0){ if(part==0) {
+    if(0==1){ if(part==0) {
       printf(" srfID1dist GLOBAL "); for(int is=0; is< std::min(nDbgI,nmatchFace); ++is)  printf("%f ", srfID1distSq[is]); printf("\n");
       printf(" imapD1 GLOBAL     "); for(int is=0; is< std::min(nDbgI,nmatchFace); ++is)  printf("%d ", imapD1[is]); printf("\n"); }
     }
-    if(1==0){ if(part==0) {
+    if(0==1){ if(part==0) {
       printf(" srfID2dist GLOBAL "); for(int is=0; is< std::min(nDbgI,nmatchFace); ++is)  printf("%f ", srfID2distSq[is]); printf("\n");
       printf(" imapD2 GLOBAL     "); for(int is=0; is< std::min(nDbgI,nmatchFace); ++is)  printf("%d ", imapD2[is]); printf("\n"); }
     }
@@ -862,36 +861,11 @@ void GatherCentroid(double** srfIDCen,int* srfIDOnBlk, double** srfIDGCen, int *
     int GsrfIDcnt=displs[num_parts-1]+rcounts[num_parts-1];
     *nmatchFace=GsrfIDcnt/3;
     if(part==0) *srfIDGCen = (double *)malloc( GsrfIDcnt * sizeof(double));
-if(1==0){ printf("displs1 ");for(int ip=0; ip< num_parts; ++ip) printf("% ld ", displs[ip]); printf("\n"); }
+if(0==1){ printf("displs1 ");for(int ip=0; ip< num_parts; ++ip) printf("% ld ", displs[ip]); printf("\n"); }
     auto type_d = getMpiType( double() );
     MPI_Gatherv(srfIDCenAllBlocks,ncon,type_d,*srfIDGCen,rcounts,displs,type_d,0, MPI_COMM_WORLD);
     free(srfIDCenAllBlocks); 
 }
-void AllgatherCentroid(double** srfIDCen,int* srfIDOnBlk, double** srfIDGCen, int *nmatchFace, int nStackedOnRank)
-{
-// stack  connectivities on rank before gather (should preserve order)
-    const int num_parts = PCU_Comm_Peers();
-    int* rcounts = (int *)malloc( num_parts * sizeof(int));
-    int* displs = (int *)malloc( num_parts * sizeof(int));
-    int numSurfIDOnRank=0;
-    for (int i = 0; i < nStackedOnRank; ++i) numSurfIDOnRank+=srfIDOnBlk[i];
-    double* srfIDCenAllBlocks = (double *)malloc(numSurfIDOnRank*3 * sizeof(double));
-    int k1=0;
-    for (int i = 0; i < nStackedOnRank; ++i) 
-      for (int j = 0; j < srfIDOnBlk[i]*3; ++j) srfIDCenAllBlocks[k1++]=srfIDCen[i][j];
-    int ncon=numSurfIDOnRank*3;
-    auto type_i = getMpiType( int() );
-    MPI_Allgather(&ncon,1,type_i,rcounts,1,type_i,MPI_COMM_WORLD);
-    displs[0]=0;
-    for (int i = 1; i < num_parts; ++i) displs[i]=displs[i-1]+rcounts[i-1]; 
-    int GsrfIDcnt=displs[num_parts-1]+rcounts[num_parts-1];
-    *nmatchFace=GsrfIDcnt/3;
-    *srfIDGCen = (double *)malloc( GsrfIDcnt * sizeof(double));
-if(1==0){ printf("displs1 ");for(int ip=0; ip< num_parts; ++ip) printf("% ld ", displs[ip]); printf("\n"); }
-    auto type_d = getMpiType( double() );
-    MPI_Allgatherv(srfIDCenAllBlocks,ncon,type_d,*srfIDGCen,rcounts,displs,type_d,MPI_COMM_WORLD);
-    free(srfIDCenAllBlocks); 
-}
 void Allgather2IntAndSort(int* srfID, int* srfIDidx,Output& o,int* srfIDG, int* srfIDGidx, int totOnRankBel)
 {
     const int part = PCU_Comm_Self() ;
@@ -915,7 +889,7 @@ if(0==1){ if(part==0) {
     printf(" srfIDidx on Part "); for(int is=0; is< std::min(nDbgI,totOnRankBel); ++is)  printf("%d ", srfIDidx[is]); printf("\n"); }
 //    pairsort(srfIDG,srfIDGidx,totBel);
     pairDeal6sort(srfIDG,srfIDGidx,totBel);
-if(1==0){ if(part==0) {
+if(0==1){ if(part==0) {
     printf(" srfID GLOBAL    "); for(int is=0; is< std::min(nDbgI,totBel); ++is)  printf("%d ", srfIDG[is]); printf("\n");
     printf(" srfIDidx GLOBAL "); for(int is=0; is< std::min(nDbgI,totBel); ++is)  printf("%d ", srfIDGidx[is]); printf("\n"); } }
 }
@@ -934,8 +908,6 @@ void writeCGNSboundary(int F,int B,int Z, Output& o, int* srfID, int* srfIDidx,
     double* srfID1GCen; 
     double* srfID2GCen; 
     int nmatchFace1,nmatchFace;
-//    AllgatherCentroid(srfIDCen1,srfID1OnBlk,&srfID1GCen,&nmatchFace1, nStackedOnRank);
-//    AllgatherCentroid(srfIDCen2,srfID2OnBlk,&srfID2GCen,&nmatchFace, nStackedOnRank);
     GatherCentroid(srfIDCen1,srfID1OnBlk,&srfID1GCen,&nmatchFace1, nStackedOnRank);
     GatherCentroid(srfIDCen2,srfID2OnBlk,&srfID2GCen,&nmatchFace, nStackedOnRank);
     if(part==0)  printf("matchface %d, %d", nmatchFace1, nmatchFace);
@@ -943,13 +915,13 @@ void writeCGNSboundary(int F,int B,int Z, Output& o, int* srfID, int* srfIDidx,
 //   compute the translation while we still have ordered centroids data  Assuming Translation = donor minus periodic but documents unclear
     double  TranslationD[3];
     if (part==0){  TranslationD[0]=srfID2GCen[0]-srfID1GCen[0]; TranslationD[1]=srfID2GCen[1]-srfID1GCen[1];TranslationD[2]=srfID2GCen[2]-srfID1GCen[2];}
-if(1==0){  printf("%d part srfID 1 xc ",part); for(int ip=0; ip< std::min(nDbgI,nmatchFace); ++ip) printf("%f ", srfID1GCen[ip*3+0]); printf("\n"); }
-if(1==0){  printf("%d part srfID 1 yc ",part); for(int ip=0; ip< std::min(nDbgI,nmatchFace); ++ip) printf("%f ", srfID1GCen[ip*3+1]); printf("\n"); }
-if(1==0){  printf("%d part srfID 1 zc ",part); for(int ip=0; ip< std::min(nDbgI,nmatchFace); ++ip) printf("%f ", srfID1GCen[ip*3+2]); printf("\n"); }
+if(0==1){  printf("%d part srfID 1 xc ",part); for(int ip=0; ip< std::min(nDbgI,nmatchFace); ++ip) printf("%f ", srfID1GCen[ip*3+0]); printf("\n"); }
+if(0==1){  printf("%d part srfID 1 yc ",part); for(int ip=0; ip< std::min(nDbgI,nmatchFace); ++ip) printf("%f ", srfID1GCen[ip*3+1]); printf("\n"); }
+if(0==1){  printf("%d part srfID 1 zc ",part); for(int ip=0; ip< std::min(nDbgI,nmatchFace); ++ip) printf("%f ", srfID1GCen[ip*3+2]); printf("\n"); }
        PCU_Barrier();
-if(1==0){  printf("%d part srfID 2 xc ",part); for(int ip=0; ip< std::min(nDbgI,nmatchFace); ++ip) printf("%f ", srfID2GCen[ip*3+0]); printf("\n"); }
-if(1==0){  printf("%d part srfID 2 yc ",part); for(int ip=0; ip< std::min(nDbgI,nmatchFace); ++ip) printf("%f ", srfID2GCen[ip*3+1]); printf("\n"); }
-if(1==0){  printf("%d part srfID 2 zc ",part); for(int ip=0; ip< std::min(nDbgI,nmatchFace); ++ip) printf("%f ", srfID2GCen[ip*3+2]); printf("\n"); }
+if(0==1){  printf("%d part srfID 2 xc ",part); for(int ip=0; ip< std::min(nDbgI,nmatchFace); ++ip) printf("%f ", srfID2GCen[ip*3+0]); printf("\n"); }
+if(0==1){  printf("%d part srfID 2 yc ",part); for(int ip=0; ip< std::min(nDbgI,nmatchFace); ++ip) printf("%f ", srfID2GCen[ip*3+1]); printf("\n"); }
+if(0==1){  printf("%d part srfID 2 zc ",part); for(int ip=0; ip< std::min(nDbgI,nmatchFace); ++ip) printf("%f ", srfID2GCen[ip*3+2]); printf("\n"); }
     auto type_i = getMpiType( int() );
     MPI_Bcast(&nmatchFace,1,type_i,0, MPI_COMM_WORLD);
     int* imapD1 = (int *)malloc( nmatchFace * sizeof(int));
@@ -1008,9 +980,9 @@ if(0==1) {
           CGNS_ENUMV(Integer), nmatchFace, donor2, &cgconn)) cgp_error_exit();
     const float  RotationCenter[3]={0};
     const float  RotationAngle[3]={0};
-    const float  Translation[3]={TranslationD[0],TranslationD[1],TranslationD[2]};
+    const float  Translation[3]={(float)TranslationD[0],(float)TranslationD[1],(float)TranslationD[2]};
 
-    if (cg_conn_periodic_write(F, B, Z, cgconn, RotationCenter, RotationAngle, Translation)) cgp_error_exit();
+    if (cg_conn_periodic_write(F, B, Z, cgconn, RotationCenter, RotationAngle,  Translation)) cgp_error_exit();
     free(imapD1); free(imapD2);
     free(eBC); free(srfIDG); free(srfIDGidx);
 } 
diff --git a/pumi-meshes b/pumi-meshes
index b7860281c..0629ac309 160000
--- a/pumi-meshes
+++ b/pumi-meshes
@@ -1 +1 @@
-Subproject commit b7860281c513fa44ee2047f7a3ad615b921d67cd
+Subproject commit 0629ac3090be4905a3f98d602fb1379eb6b31b00

From dc05773a60d067665791dc7016f269d05ceb405b Mon Sep 17 00:00:00 2001
From: "Kenneth E. Jansen" <Kenneth.Jansen@colorado.edu>
Date: Tue, 29 Aug 2023 06:22:29 -0600
Subject: [PATCH 62/68] fixed matchedNodeElmReader tests to eliminate unused
 argument 9

---
 test/testing.cmake | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/test/testing.cmake b/test/testing.cmake
index ea870d0de..08e5ae7d8 100644
--- a/test/testing.cmake
+++ b/test/testing.cmake
@@ -297,7 +297,7 @@ mpi_test(matchedNodeElementReader_p1 1
   "${MDIR}/1part/geom3D.fathr"
   "NULL"
   "${MDIR}/1part/geom3DHead.cnn"
-  "geom.dmg" "geom.smb")
+  "geom.smb")
 
 mpi_test(matchedNodeElementReader_p4 4
   ./matchedNodeElmReader
@@ -309,7 +309,7 @@ mpi_test(matchedNodeElementReader_p4 4
   "${MDIR}/4part/geom3D.fathr"
   "NULL"
   "${MDIR}/4part/geom3DHead.cnn"
-  "geom.dmg" "geom.smb")
+  "geom.smb")
 
 set(MDIR ${MESHES}/gmsh)
 mpi_test(gmshv2TwoQuads 1

From 364895c4fe9bd0dc522cd423e344ba1389bdb908 Mon Sep 17 00:00:00 2001
From: "Kenneth E. Jansen" <Kenneth.Jansen@colorado.edu>
Date: Tue, 29 Aug 2023 08:17:44 -0600
Subject: [PATCH 63/68] fixed swapDoubles-- needed a call to MPI_Finalize to
 pass with openMPI 4

---
 test/swapDoubles.cc | 1 +
 1 file changed, 1 insertion(+)

diff --git a/test/swapDoubles.cc b/test/swapDoubles.cc
index beacc27d2..f85c2a531 100644
--- a/test/swapDoubles.cc
+++ b/test/swapDoubles.cc
@@ -25,5 +25,6 @@ int main(int argc, char** argv) {
   }
   delete [] d_orig;
   delete [] d;
+  MPI_Finalize();
   return 0;
 }

From 20b9a7d3acaba891d54f8ee97c4cc8a55e1f3514 Mon Sep 17 00:00:00 2001
From: "Kenneth E. Jansen" <Kenneth.Jansen@colorado.edu>
Date: Thu, 31 Aug 2023 20:00:05 -0600
Subject: [PATCH 64/68] Testing disables prior CGNS tests that are hardwired to
 32 bit

---
 CMakeLists.txt      |  9 +------
 phasta/phCGNSgbc.cc | 66 ++++++++++++++++++++++-----------------------
 test/testing.cmake  |  4 +--
 3 files changed, 35 insertions(+), 44 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 395bc43d8..ab2af3869 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -131,14 +131,7 @@ if(ENABLE_OMEGA_H)
 endif()
 
 if(ENABLE_CGNS)
-  set(SCOREC_USE_CGNS_DEFAULT ${ENABLE_CGNS})
-  bob_public_dep(CGNS)
-  #CGNS does not provide cmake targets :(
-  include_directories(SYSTEM ${CGNS_INCLUDE_DIR})
-  set(SCOREC_USE_HDF5_DEFAULT ${ENABLE_CGNS})
-  bob_public_dep(HDF5)
-  add_definitions(-DHAVE_CGNS)
-else()
+  option(ENABLE_CGNS_MULTI_BASE "Enable the CGNS Multi Base tests" OFF)
   set(SCOREC_USE_CGNS_DEFAULT ${ENABLE_CGNS})
   bob_public_dep(CGNS)
   #CGNS does not provide cmake targets :(
diff --git a/phasta/phCGNSgbc.cc b/phasta/phCGNSgbc.cc
index 424cfab34..1d98a1a2a 100644
--- a/phasta/phCGNSgbc.cc
+++ b/phasta/phCGNSgbc.cc
@@ -597,23 +597,21 @@ if(0==1){
 }
 void writeBlocksCGNSboundary(int F,int B,int Z, Output& o, int* srfID, int* srfIDidx, double** srfIDCen1, double** srfIDCen2, int* srfID1OnBlk, int* srfID2OnBlk, int* startBelBlk, int* endBelBlk, cgsize_t *e_written, cgsize_t *totBel, int *nStackedOnRank, int nblkb)
 {
-    int E,Fsb,Fsb2;
+    int E,Fsb,Fsb2, nvC,nvert,nvAll,invC;
     const int num_parts = PCU_Comm_Peers();
     const cgsize_t num_parts_cg=num_parts;
     const int part = PCU_Comm_Self() ;
     const cgsize_t part_cg=part;
-    cgsize_t e_owned, e_start,e_end;
-    cgsize_t e_startg,e_endg;
+    cgsize_t e_owned, e_start,e_end, e_startg,e_endg;
     cgsize_t eVolElm=*e_written;
     cgsize_t e_belWritten=0;
     int nvMap[2] = {3,4};
     int iblkC[2];
     int estart[2];
-    int nvC,nvert,nvAll,invC;
     for (int i = 0; i < 2; ++i) { // check all topologies
       nvAll=0;
-      nvC=nvMap[i];
       invC=0;
+      nvC=nvMap[i];
       int icountB=0;
       for (int j = 0; j < nblkb; ++j) { // check all blocks
         BlockKey& k = o.blocks.boundary.keys[j];
@@ -662,8 +660,7 @@ if(0==1)        printf("boundary cnn %d, %ld, %ld \n", part, e_start, e_end);
 if(0==1){
     for (int ne=0; ne<std::min(nDbgCG,e_owned); ++ne) { printf("%d, %d ", part,(ne+1)); for(int nv=0; nv< nvert; ++nv) printf("%ld ", e[ne*nvert+nv]); printf("\n"); }
 }
-        int idx;
-        idx =((*nStackedOnRank) - 1);
+        int idx =((*nStackedOnRank) - 1);
         if(invC!=0) {
           free(e);
 //moved above          getNaturalBCCodesCGNS(o, iblkC[, &srfID[e_belWritten]);
@@ -1154,34 +1151,35 @@ if(0==1)  printf("Coor %d, %d, %d, \n", nCoordVec,part,Fs2);
   if ( cgp_array_write_data(Fs2, &partP1, &partP1, &nCoordVec))
        cgp_error_exit();
   cgsize_t e_written=0; 
-  cgsize_t totBel;
-  writeBlocksCGNSinteror(F,B,Z,o,&e_written);
+  if(o.writeCGNSFiles > 2) 
+    writeBlocksCGNSinteror(F,B,Z,o,&e_written);
   if(o.writeCGNSFiles > 2) {
-  int nblkb = o.blocks.boundary.getSize(); 
-  double** srfIDCen1 = new double*[nblkb]; // might not all be used
-  double** srfIDCen2 = new double*[nblkb];
-  int totOnRankBel=0;
-  for (int i = 0; i < nblkb; ++i) 
-    totOnRankBel += o.blocks.boundary.nElements[i];
-  int* srfID = (int *)malloc( totOnRankBel * sizeof(int));
-  int* srfID1OnBlk = (int *)malloc( nblkb * sizeof(int));
-  int* srfID2OnBlk = (int *)malloc( nblkb * sizeof(int));
-  int* startBelBlk = (int *)malloc( nblkb * sizeof(int));
-  int* endBelBlk = (int *)malloc( nblkb * sizeof(int));
-  int* srfIDidx = (int *)malloc( totOnRankBel * sizeof(int));
-  int nStackedOnRank=0;
-  writeBlocksCGNSboundary(F,B,Z,o, srfID, srfIDidx, srfIDCen1, srfIDCen2, srfID1OnBlk, srfID2OnBlk, startBelBlk, endBelBlk, &e_written, &totBel, &nStackedOnRank, nblkb);
-  writeCGNSboundary      (F,B,Z,o, srfID, srfIDidx, srfIDCen1, srfIDCen2, srfID1OnBlk, srfID2OnBlk, startBelBlk, endBelBlk, &e_written, totOnRankBel, &totBel,  nStackedOnRank);
-  free(srfID); free(srfIDidx);
-  free(srfID1OnBlk); free(srfID2OnBlk);
-  free(startBelBlk); free(endBelBlk);
-  for (int i = 0; i < nStackedOnRank; ++i) delete [] srfIDCen1[i];
-  for (int i = 0; i < nStackedOnRank; ++i) delete [] srfIDCen2[i];
-  delete [] srfIDCen1; delete [] srfIDCen2;
-  if(cgp_close(F)) cgp_error_exit();
-  double t1 = PCU_Time();
-  if (!PCU_Comm_Self())
-    lion_oprint(1,"CGNS file written in %f seconds\n", t1 - t0);
+    cgsize_t totBel;
+    int nblkb = o.blocks.boundary.getSize(); 
+    double** srfIDCen1 = new double*[nblkb]; // might not all be used
+    double** srfIDCen2 = new double*[nblkb];
+    int totOnRankBel=0;
+    for (int i = 0; i < nblkb; ++i) 
+      totOnRankBel += o.blocks.boundary.nElements[i];
+    int* srfID = (int *)malloc( totOnRankBel * sizeof(int));
+    int* srfID1OnBlk = (int *)malloc( nblkb * sizeof(int));
+    int* srfID2OnBlk = (int *)malloc( nblkb * sizeof(int));
+    int* startBelBlk = (int *)malloc( nblkb * sizeof(int));
+    int* endBelBlk = (int *)malloc( nblkb * sizeof(int));
+    int* srfIDidx = (int *)malloc( totOnRankBel * sizeof(int));
+    int nStackedOnRank=0;
+    writeBlocksCGNSboundary(F,B,Z,o, srfID, srfIDidx, srfIDCen1, srfIDCen2, srfID1OnBlk, srfID2OnBlk, startBelBlk, endBelBlk, &e_written, &totBel, &nStackedOnRank, nblkb);
+    writeCGNSboundary      (F,B,Z,o, srfID, srfIDidx, srfIDCen1, srfIDCen2, srfID1OnBlk, srfID2OnBlk, startBelBlk, endBelBlk, &e_written, totOnRankBel, &totBel,  nStackedOnRank);
+    free(srfID); free(srfIDidx);
+    free(srfID1OnBlk); free(srfID2OnBlk);
+    free(startBelBlk); free(endBelBlk);
+    for (int i = 0; i < nStackedOnRank; ++i) delete [] srfIDCen1[i];
+    for (int i = 0; i < nStackedOnRank; ++i) delete [] srfIDCen2[i];
+    delete [] srfIDCen1; delete [] srfIDCen2;
+    if(cgp_close(F)) cgp_error_exit();
+    double t1 = PCU_Time();
+    if (!PCU_Comm_Self())
+      lion_oprint(1,"CGNS file written in %f seconds\n", t1 - t0);
   }
 }
 } // namespace
diff --git a/test/testing.cmake b/test/testing.cmake
index 08e5ae7d8..8deea0a63 100644
--- a/test/testing.cmake
+++ b/test/testing.cmake
@@ -599,7 +599,7 @@ if(ENABLE_ZOLTAN)
   )
 endif()
 
-if(ENABLE_CGNS AND ENABLE_ZOLTAN)
+if(ENABLE_CGNS AND ENABLE_ZOLTAN AND ENABLE_CGNS_MULTI_BASE)
 #
 # sort of an arbitrary choice
 set(numProcs 4)
@@ -684,7 +684,7 @@ mpi_test(cgns_bcs_3 ${numProcs}
   bcs3.smb
   additional)
 
-endif(ENABLE_CGNS AND ENABLE_ZOLTAN)
+endif(ENABLE_CGNS AND ENABLE_ZOLTAN AND ENABLE_CGNS_MULTI_BASE)
 
 mpi_test(construct 4
   ./construct

From dd2b713711953fc9dbbcfcab425386a816aa4237 Mon Sep 17 00:00:00 2001
From: "Kenneth E. Jansen" <Kenneth.Jansen@colorado.edu>
Date: Sat, 2 Sep 2023 21:21:04 -0600
Subject: [PATCH 65/68] Valgrind leaks, added VertexRank, replaced RankOfWriter
 with CellRank, added a couple of boundary element diagnostic fields, replaced
 cgnsdiff with hdf5diff which returns 1 if different for testing.

---
 phasta/phCGNSgbc.cc | 85 +++++++++++++++++++++++++++++++++++++++++----
 phasta/phOutput.cc  |  1 +
 test/testing.cmake  | 26 +++++++-------
 3 files changed, 93 insertions(+), 19 deletions(-)

diff --git a/phasta/phCGNSgbc.cc b/phasta/phCGNSgbc.cc
index 1d98a1a2a..71d414cb6 100644
--- a/phasta/phCGNSgbc.cc
+++ b/phasta/phCGNSgbc.cc
@@ -220,7 +220,7 @@ void gen_ncorp(Output& o )
   int i;
   lcorp_t nilwork = o.nlwork;
   int num_nodes=m->count(0);
-  o.arrays.ncorp = (cgsize_t *)malloc(num_nodes * sizeof(cgsize_t)); //FIXME where to deallocate 
+  o.arrays.ncorp = new cgsize_t[num_nodes]; 
   lcorp_t owned;
   lcorp_t local;
   lcorp_t* owner_counts;
@@ -297,6 +297,7 @@ if(0==1) {
     PCU_Barrier();
   }
 }
+free(owner_counts);
 }
 
 static lcorp_t count_local(int* ilwork, int nlwork,cgsize_t* ncorp_tmp, int num_nodes)
@@ -516,8 +517,8 @@ void writeBlocksCGNSinteror(int F,int B,int Z, Output& o, cgsize_t *e_written)
   const int part = PCU_Comm_Self() ;
   const cgsize_t part_cg=part;
   // create a centered solution 
-  if (cg_sol_write(F, B, Z, "RankOfWriter", CGNS_ENUMV(CellCenter), &S) ||
-      cgp_field_write(F, B, Z, S, CGNS_ENUMV(Integer), "RankOfWriter", &Fs))
+  if (cg_sol_write(F, B, Z, "CellRank", CGNS_ENUMV(CellCenter), &S) ||
+      cgp_field_write(F, B, Z, S, CGNS_ENUMV(Integer), "CellRank", &Fs))
       cgp_error_exit();
   int nblki= o.blocks.interior.getSize();
   int nvMap[4] = {4,5,6,8};
@@ -661,6 +662,68 @@ if(0==1){
     for (int ne=0; ne<std::min(nDbgCG,e_owned); ++ne) { printf("%d, %d ", part,(ne+1)); for(int nv=0; nv< nvert; ++nv) printf("%ld ", e[ne*nvert+nv]); printf("\n"); }
 }
         int idx =((*nStackedOnRank) - 1);
+
+        int S, Fs;
+
+//        if (cg_sol_write(F, B, Z, "BoundaryCellRank", CGNS_ENUMV(FaceCenter), &S) ||
+//            cgp_field_write(F, B, Z, S, CGNS_ENUMV(Integer), "BoundaryCellRank", &Fs))
+//            cgp_error_exit();
+/*        int ec0=cg_sol_write(F, B, Z, "BoundaryCellRank2", CGNS_ENUMV(CellCenter), &S);
+        int ec1=cg_sol_write(F, B, Z, "BoundaryCellRank", CGNS_ENUMV(FaceCenter), &S);
+        int ec2= cgp_field_write(F, B, Z, S, CGNS_ENUMV(Integer), "BoundaryCellRank", &Fs);
+      //     create the field data for this process 
+        int* d = NULL;
+        if(invC!=0){  //nvC present on my rank
+          d = (int *)malloc(e_owned * sizeof(int));
+          for (int n = 0; n < e_owned; n++) 
+            d[n] = part;
+        }
+        if (cgp_field_write_data(F, B, Z, S, Fs, &e_start, &e_end, d))
+          cgp_error_exit();
+        if(invC!=0) free(d);
+*/
+// more tricky to put something into PV to visualize the above (approximately) through vertex field
+        int* dv = (int *)malloc(o.iownnodes * sizeof(int));
+        cgsize_t start=o.local_start_id;
+        cgsize_t end=start+o.iownnodes-1;
+        if (cg_sol_write(F, B, Z, "BoundaryVertexRank", CGNS_ENUMV(Vertex), &S) ||
+            cgp_field_write(F, B, Z, S, CGNS_ENUMV(Integer), "BoundaryVertexRank", &Fs))
+            cgp_error_exit();
+      //     create the field data for this process 
+        for (int inode = 0; inode < o.iownnodes; ++inode) dv[inode]= -1;
+        int idxl, en;
+        if(invC!=0) { 
+           for (int ibel = 0; ibel < e_owned; ++ibel){ 
+             for (int ilv=0; ilv < nvC; ilv++) {
+               en=e[ibel*nvC+ilv]; 
+               if(en>=start && en<=end) {
+                 dv[en-start]= part; 
+               }
+             }
+           }
+         }
+         if (cgp_field_write_data(F, B, Z, S, Fs, &start, &end, dv))
+            cgp_error_exit();
+// more tricky to put srfID on nodes to see in PV (approximately) through vertex field
+        if (cg_sol_write(F, B, Z, "BoundaryVertexSrfID", CGNS_ENUMV(Vertex), &S) ||
+            cgp_field_write(F, B, Z, S, CGNS_ENUMV(Integer), "BoundaryVertexSrfID", &Fs))
+            cgp_error_exit();
+      //     create the field data for this process 
+        for (int inode = 0; inode < o.iownnodes; ++inode) dv[inode]= -1;
+        if(invC!=0) { 
+           for (int ibel = 0; ibel < e_owned; ++ibel){ 
+             for (int ilv=0; ilv < nvC; ilv++) {
+               en=e[ibel*nvC+ilv]; 
+               if(en>=start && en<=end) {
+                 dv[en-start]= srfID[ibel]; 
+//                 printf("%d %d %d %d %d %d %d\n ", part,ibel, ilv, en, en-start, dv[en-start], srfID[ibel]);
+               }
+             }
+           }
+         }
+         if (cgp_field_write_data(F, B, Z, S, Fs, &start, &end, dv))
+            cgp_error_exit();
+        free(dv);
         if(invC!=0) {
           free(e);
 //moved above          getNaturalBCCodesCGNS(o, iblkC[, &srfID[e_belWritten]);
@@ -861,7 +924,7 @@ void GatherCentroid(double** srfIDCen,int* srfIDOnBlk, double** srfIDGCen, int *
 if(0==1){ printf("displs1 ");for(int ip=0; ip< num_parts; ++ip) printf("% ld ", displs[ip]); printf("\n"); }
     auto type_d = getMpiType( double() );
     MPI_Gatherv(srfIDCenAllBlocks,ncon,type_d,*srfIDGCen,rcounts,displs,type_d,0, MPI_COMM_WORLD);
-    free(srfIDCenAllBlocks); 
+    free(srfIDCenAllBlocks); free(rcounts); free(displs);
 }
 void Allgather2IntAndSort(int* srfID, int* srfIDidx,Output& o,int* srfIDG, int* srfIDGidx, int totOnRankBel)
 {
@@ -907,7 +970,7 @@ void writeCGNSboundary(int F,int B,int Z, Output& o, int* srfID, int* srfIDidx,
     int nmatchFace1,nmatchFace;
     GatherCentroid(srfIDCen1,srfID1OnBlk,&srfID1GCen,&nmatchFace1, nStackedOnRank);
     GatherCentroid(srfIDCen2,srfID2OnBlk,&srfID2GCen,&nmatchFace, nStackedOnRank);
-    if(part==0)  printf("matchface %d, %d", nmatchFace1, nmatchFace);
+if(0==1)    if(part==0)  printf("matchface %d, %d\n", nmatchFace1, nmatchFace);
     if(part==0) assert(nmatchFace1==nmatchFace);
 //   compute the translation while we still have ordered centroids data  Assuming Translation = donor minus periodic but documents unclear
     double  TranslationD[3];
@@ -981,7 +1044,7 @@ if(0==1) {
 
     if (cg_conn_periodic_write(F, B, Z, cgconn, RotationCenter, RotationAngle,  Translation)) cgp_error_exit();
     free(imapD1); free(imapD2);
-    free(eBC); free(srfIDG); free(srfIDGidx);
+    free(eBC); free(srfIDG); free(srfIDGidx); free(donor2); free(periodic1);
 } 
 void CGNS_NodalSolution(int F,int B,int Z, Output& o)
 {
@@ -1073,6 +1136,16 @@ if(0==1) {
     if(j==2) if(cgp_coord_write_data(F, B, Z, Cz, &start, &end, x)) cgp_error_exit();
   }
   free (x);
+  int S2,Fs2;
+  const int part = PCU_Comm_Self() ;
+  if (cg_sol_write(F, B, Z, "VertexRank", CGNS_ENUMV(Vertex), &S2) ||
+      cgp_field_write(F, B, Z, S2, CGNS_ENUMV(Integer), "VertexRank", &Fs2))
+      cgp_error_exit();
+  int* d = (int *)malloc(o.iownnodes * sizeof(int));
+  for (int inode = 0; inode < o.iownnodes; ++inode) d[inode]= part;
+  if (cgp_field_write_data(F, B, Z, S2, Fs2, &start, &end, d))
+          cgp_error_exit();
+  free(d);
 }
 void writeCGNS(Output& o, std::string path)
 {
diff --git a/phasta/phOutput.cc b/phasta/phOutput.cc
index d4b71028b..fd5e73a69 100644
--- a/phasta/phOutput.cc
+++ b/phasta/phOutput.cc
@@ -997,6 +997,7 @@ Output::~Output()
   //nOwnedNodes will still be zero.
   if(!nOwnedNodes) return;
 
+  delete [] arrays.ncorp;
   delete [] arrays.coordinates;
   delete [] arrays.ilwork;
   delete [] arrays.ilworkf;
diff --git a/test/testing.cmake b/test/testing.cmake
index 8deea0a63..d256c6b30 100644
--- a/test/testing.cmake
+++ b/test/testing.cmake
@@ -74,19 +74,19 @@ if(ENABLE_CGNS AND SIM_DOT_VERSION VERSION_GREATER 12.0.171000)
   mpi_test(chef-CGNS-multitopology1 1 ${CMAKE_CURRENT_BINARY_DIR}/chef
     WORKING_DIRECTORY ${MDIR}/multiTopology/mner/Chef/1-1-Chef)
   add_test(NAME chef-CGNS-multitopology1-diff
-    COMMAND cgnsdiff  chefOut.cgns correct.cgns
+    COMMAND h5diff  chefOut.cgns correct.cgns
     WORKING_DIRECTORY ${MDIR}/multiTopology/mner/Chef/1-1-Chef)
 
   mpi_test(chef-CGNS-multitopology2 2 ${CMAKE_CURRENT_BINARY_DIR}/chef
     WORKING_DIRECTORY ${MDIR}/multiTopology/mner/Chef/2-1-Chef)
   add_test(NAME chef-CGNS-multitopology2-diff
-    COMMAND cgnsdiff  chefOut.cgns correct.cgns
+    COMMAND h5diff  chefOut.cgns correct.cgns
     WORKING_DIRECTORY ${MDIR}/multiTopology/mner/Chef/2-1-Chef)
 
   mpi_test(chef-CGNS-multitopology4 4 ${CMAKE_CURRENT_BINARY_DIR}/chef
     WORKING_DIRECTORY ${MDIR}/multiTopology/mner/Chef/4-1-Chef)
   add_test(NAME chef-CGNS-multitopology4-diff
-    COMMAND cgnsdiff  chefOut.cgns correct.cgns
+    COMMAND h5diff  chefOut.cgns correct.cgns
     WORKING_DIRECTORY ${MDIR}/multiTopology/mner/Chef/4-1-Chef)
 endif()
 
@@ -95,61 +95,61 @@ if(ENABLE_SIMMETRIX AND SIM_PARASOLID AND SIMMODSUITE_SimAdvMeshing_FOUND AND EN
   mpi_test(chef-CGNS-8hex1 1 ${CMAKE_CURRENT_BINARY_DIR}/chef
     WORKING_DIRECTORY ${MDIR}/sms2mds8Hex/Chef/1-1-Chef)
   add_test(NAME chef-CGNS-8hex1-diff
-    COMMAND cgnsdiff  chefOut.cgns correct.cgns
+    COMMAND h5diff  chefOut.cgns correct.cgns
     WORKING_DIRECTORY ${MDIR}/sms2mds8Hex/Chef/1-1-Chef)
 
   mpi_test(chef-CGNS-8hex2 2 ${CMAKE_CURRENT_BINARY_DIR}/chef
     WORKING_DIRECTORY ${MDIR}/sms2mds8Hex/Chef/2-1-Chef)
   add_test(NAME chef-CGNS-8hex2-diff
-    COMMAND cgnsdiff  chefOut.cgns correct.cgns
+    COMMAND h5diff  chefOut.cgns correct.cgns
     WORKING_DIRECTORY ${MDIR}/sms2mds8Hex/Chef/2-1-Chef)
 
   mpi_test(chef-CGNS-smallTet1 1 ${CMAKE_CURRENT_BINARY_DIR}/chef
     WORKING_DIRECTORY ${MDIR}/sms2mds-SmallestTet/Chef/1-1-Chef)
   add_test(NAME chef-CGNS-smallTet1-diff
-    COMMAND cgnsdiff  chefOut.cgns correct.cgns
+    COMMAND h5diff  chefOut.cgns correct.cgns
     WORKING_DIRECTORY ${MDIR}/sms2mds-SmallestTet/Chef/1-1-Chef)
 
   mpi_test(chef-CGNS-smallTet2 2 ${CMAKE_CURRENT_BINARY_DIR}/chef
     WORKING_DIRECTORY ${MDIR}/sms2mds-SmallestTet/Chef/2-1-Chef)
   add_test(NAME chef-CGNS-smallTet2-diff
-    COMMAND cgnsdiff  chefOut.cgns correct.cgns
+    COMMAND h5diff  chefOut.cgns correct.cgns
     WORKING_DIRECTORY ${MDIR}/sms2mds-SmallestTet/Chef/2-1-Chef)
 
   mpi_test(chef-CGNS-AllHex1 1 ${CMAKE_CURRENT_BINARY_DIR}/chef
     WORKING_DIRECTORY ${MDIR}/sms2mdsAllHex/Chef/1-1-Chef)
   add_test(NAME chef-CGNS-AllHex1-diff
-    COMMAND cgnsdiff  chefOut.cgns correct.cgns
+    COMMAND h5diff  chefOut.cgns correct.cgns
     WORKING_DIRECTORY ${MDIR}/sms2mdsAllHex/Chef/1-1-Chef)
 
   mpi_test(chef-CGNS-AllHex2 2 ${CMAKE_CURRENT_BINARY_DIR}/chef
     WORKING_DIRECTORY ${MDIR}/sms2mdsAllHex/Chef/2-1-Chef)
   add_test(NAME chef-CGNS-AllHex2-diff
-    COMMAND cgnsdiff  chefOut.cgns correct.cgns
+    COMMAND h5diff  chefOut.cgns correct.cgns
     WORKING_DIRECTORY ${MDIR}/sms2mdsAllHex/Chef/2-1-Chef)
 
   mpi_test(chef-CGNS-AllTet 1 ${CMAKE_CURRENT_BINARY_DIR}/chef
     WORKING_DIRECTORY ${MDIR}/sms2mdsAllTet/Chef/1-1-Chef)
   add_test(NAME chef-CGNS-AllTet-diff
-    COMMAND cgnsdiff  chefOut.cgns correct.cgns
+    COMMAND h5diff  chefOut.cgns correct.cgns
     WORKING_DIRECTORY ${MDIR}/sms2mdsAllTet/Chef/1-1-Chef)
 
   mpi_test(chef-CGNS-AllTet2 2 ${CMAKE_CURRENT_BINARY_DIR}/chef
     WORKING_DIRECTORY ${MDIR}/sms2mdsAllTet/Chef/2-1-Chef)
   add_test(NAME chef-CGNS-AllTet2-diff
-    COMMAND cgnsdiff  chefOut.cgns correct.cgns
+    COMMAND h5diff  chefOut.cgns correct.cgns
     WORKING_DIRECTORY ${MDIR}/sms2mdsAllTet/Chef/2-1-Chef)
 
   mpi_test(chef-CGNS-AllWedge1 1 ${CMAKE_CURRENT_BINARY_DIR}/chef
     WORKING_DIRECTORY ${MDIR}/sms2mdsAllWedge/Chef/1-1-Chef)
   add_test(NAME chef-CGNS-AllWedge1-diff
-    COMMAND cgnsdiff  chefOut.cgns correct.cgns
+    COMMAND h5diff  chefOut.cgns correct.cgns
     WORKING_DIRECTORY ${MDIR}/sms2mdsAllWedge/Chef/1-1-Chef)
 
   mpi_test(chef-CGNS-AllWedge2 2 ${CMAKE_CURRENT_BINARY_DIR}/chef
     WORKING_DIRECTORY ${MDIR}/sms2mdsAllWedge/Chef/2-1-Chef)
   add_test(NAME chef-CGNS-AllWedge2-diff
-    COMMAND cgnsdiff  chefOut.cgns correct.cgns
+    COMMAND h5diff  chefOut.cgns correct.cgns
     WORKING_DIRECTORY ${MDIR}/sms2mdsAllWedge/Chef/2-1-Chef)
 endif()
 

From bdc26b58f14cfbbf2c0af23aa467653f85974e8a Mon Sep 17 00:00:00 2001
From: "Kenneth E. Jansen" <Kenneth.Jansen@colorado.edu>
Date: Sun, 3 Sep 2023 12:02:45 -0600
Subject: [PATCH 66/68] checking this in with some extra commented code on my
 failure to get BoundaryCellRank to be a FaceCenter field.  This branch also
 provides a hacky way to get around ParaView only being able to visualize the
 first nodal field in the CGNS file by circulating the file-node creation
 order.

---
 phasta/phCGNSgbc.cc | 143 ++++++++++++++++++++++++++++----------------
 1 file changed, 93 insertions(+), 50 deletions(-)

diff --git a/phasta/phCGNSgbc.cc b/phasta/phCGNSgbc.cc
index 71d414cb6..f66a219ef 100644
--- a/phasta/phCGNSgbc.cc
+++ b/phasta/phCGNSgbc.cc
@@ -507,9 +507,9 @@ if(0==1)  printf("%d %d %d %s %ld %ld %d\n",F,B,Z,Ename,e_startg,e_endg,Ep);
 }
 
 // renamed and calling the renamed functions above with output writes now to CGNS
-void writeBlocksCGNSinteror(int F,int B,int Z, Output& o, cgsize_t *e_written)
+void writeBlocksCGNSinterior(int F,int B,int Z, int SCR, Output& o, cgsize_t *e_written)
 {
-  int E,S,Fs,Fs2,Fsb,Fsb2;
+  int E,Fs,Fs2,Fsb,Fsb2;
   cgsize_t e_owned, e_start,e_end;
   cgsize_t e_startg,e_endg;
   const int num_parts = PCU_Comm_Peers();
@@ -517,8 +517,7 @@ void writeBlocksCGNSinteror(int F,int B,int Z, Output& o, cgsize_t *e_written)
   const int part = PCU_Comm_Self() ;
   const cgsize_t part_cg=part;
   // create a centered solution 
-  if (cg_sol_write(F, B, Z, "CellRank", CGNS_ENUMV(CellCenter), &S) ||
-      cgp_field_write(F, B, Z, S, CGNS_ENUMV(Integer), "CellRank", &Fs))
+  if ( cgp_field_write(F, B, Z, SCR, CGNS_ENUMV(Integer), "CellRank", &Fs))
       cgp_error_exit();
   int nblki= o.blocks.interior.getSize();
   int nvMap[4] = {4,5,6,8};
@@ -566,7 +565,7 @@ void writeBlocksCGNSinteror(int F,int B,int Z, Output& o, cgsize_t *e_written)
           d[n] = part;
         //     write the solution field data in parallel 
       }
-      if (cgp_field_write_data(F, B, Z, S, Fs, &e_start, &e_end, d))
+      if (cgp_field_write_data(F, B, Z, SCR, Fs, &e_start, &e_end, d))
           cgp_error_exit();
       if(invC!=0) free(d);
       char UserDataName[11];
@@ -596,7 +595,7 @@ if(0==1){
   } // end of loop over ALL topologies
   PCU_Barrier();
 }
-void writeBlocksCGNSboundary(int F,int B,int Z, Output& o, int* srfID, int* srfIDidx, double** srfIDCen1, double** srfIDCen2, int* srfID1OnBlk, int* srfID2OnBlk, int* startBelBlk, int* endBelBlk, cgsize_t *e_written, cgsize_t *totBel, int *nStackedOnRank, int nblkb)
+void writeBlocksCGNSboundary(int F,int B,int Z, int SBVR, int SBVS, Output& o, int* srfID, int* srfIDidx, double** srfIDCen1, double** srfIDCen2, int* srfID1OnBlk, int* srfID2OnBlk, int* startBelBlk, int* endBelBlk, cgsize_t *e_written, cgsize_t *totBel, int *nStackedOnRank, int nblkb)
 {
     int E,Fsb,Fsb2, nvC,nvert,nvAll,invC;
     const int num_parts = PCU_Comm_Peers();
@@ -663,7 +662,7 @@ if(0==1){
 }
         int idx =((*nStackedOnRank) - 1);
 
-        int S, Fs;
+        int FsR, FsS;
 
 //        if (cg_sol_write(F, B, Z, "BoundaryCellRank", CGNS_ENUMV(FaceCenter), &S) ||
 //            cgp_field_write(F, B, Z, S, CGNS_ENUMV(Integer), "BoundaryCellRank", &Fs))
@@ -686,8 +685,7 @@ if(0==1){
         int* dv = (int *)malloc(o.iownnodes * sizeof(int));
         cgsize_t start=o.local_start_id;
         cgsize_t end=start+o.iownnodes-1;
-        if (cg_sol_write(F, B, Z, "BoundaryVertexRank", CGNS_ENUMV(Vertex), &S) ||
-            cgp_field_write(F, B, Z, S, CGNS_ENUMV(Integer), "BoundaryVertexRank", &Fs))
+        if ( cgp_field_write(F, B, Z, SBVR, CGNS_ENUMV(Integer), "BoundaryVertexRank", &FsR))
             cgp_error_exit();
       //     create the field data for this process 
         for (int inode = 0; inode < o.iownnodes; ++inode) dv[inode]= -1;
@@ -702,11 +700,10 @@ if(0==1){
              }
            }
          }
-         if (cgp_field_write_data(F, B, Z, S, Fs, &start, &end, dv))
+         if (cgp_field_write_data(F, B, Z, SBVR, FsR, &start, &end, dv))
             cgp_error_exit();
 // more tricky to put srfID on nodes to see in PV (approximately) through vertex field
-        if (cg_sol_write(F, B, Z, "BoundaryVertexSrfID", CGNS_ENUMV(Vertex), &S) ||
-            cgp_field_write(F, B, Z, S, CGNS_ENUMV(Integer), "BoundaryVertexSrfID", &Fs))
+        if ( cgp_field_write(F, B, Z, SBVS, CGNS_ENUMV(Integer), "BoundaryVertexSrfID", &FsS))
             cgp_error_exit();
       //     create the field data for this process 
         for (int inode = 0; inode < o.iownnodes; ++inode) dv[inode]= -1;
@@ -721,7 +718,7 @@ if(0==1){
              }
            }
          }
-         if (cgp_field_write_data(F, B, Z, S, Fs, &start, &end, dv))
+         if (cgp_field_write_data(F, B, Z, SBVS, FsS, &start, &end, dv))
             cgp_error_exit();
         free(dv);
         if(invC!=0) {
@@ -1103,7 +1100,7 @@ if(0==1)  printf("solution=%s",fieldName);
       cgp_error_exit();
   free(p); free(u); free(v); free(w); free(T); free(data);
 }
-void CGNS_Coordinates(int F,int B,int Z,Output& o)
+void CGNS_Coordinates(int F,int B,int Z, Output& o)
 {
    int Cx,Cy,Cz;
   if (cgp_coord_write(F, B, Z, CGNS_ENUMV(RealDouble), "CoordinateX", &Cx) ||
@@ -1136,14 +1133,18 @@ if(0==1) {
     if(j==2) if(cgp_coord_write_data(F, B, Z, Cz, &start, &end, x)) cgp_error_exit();
   }
   free (x);
+}
+void CGNS_VertexRank(int F,int B,int Z, int SVR, Output& o)
+{
   int S2,Fs2;
   const int part = PCU_Comm_Self() ;
-  if (cg_sol_write(F, B, Z, "VertexRank", CGNS_ENUMV(Vertex), &S2) ||
-      cgp_field_write(F, B, Z, S2, CGNS_ENUMV(Integer), "VertexRank", &Fs2))
+  cgsize_t start=o.local_start_id;
+  cgsize_t end=start+o.iownnodes-1;
+  if ( cgp_field_write(F, B, Z, SVR, CGNS_ENUMV(Integer), "VertexRank", &Fs2))
       cgp_error_exit();
   int* d = (int *)malloc(o.iownnodes * sizeof(int));
   for (int inode = 0; inode < o.iownnodes; ++inode) d[inode]= part;
-  if (cgp_field_write_data(F, B, Z, S2, Fs2, &start, &end, d))
+  if (cgp_field_write_data(F, B, Z, SVR, Fs2, &start, &end, d))
           cgp_error_exit();
   free(d);
 }
@@ -1156,7 +1157,7 @@ void writeCGNS(Output& o, std::string path)
   const cgsize_t  num_parts_cg=num_parts;
   std::string timestep_or_dat;
   static char outfile[] = "chefOut.cgns";
-  int  F, B, Z, E, S, Fs, Fs2, A, Cx, Cy, Cz;
+  int  F, B, Z, E, S, SCR, SVR, SBVR, SBVS, Fs, Fs2, A, Cx, Cy, Cz;
   cgsize_t sizes[3],*e, start, end;
   int num_nodes=m->count(0);
 if(0==1){  // ilwork debugging
@@ -1209,8 +1210,53 @@ if(0==1){
        cgp_error_exit();
     // create data nodes for coordinates 
   cg_set_file_type(CG_FILE_HDF5);
-  CGNS_Coordinates(F,B,Z,o);
-  CGNS_NodalSolution(F,B,Z,o);
+  CGNS_Coordinates(F,B,Z, o);
+// Paraview will only viz the first sol node created so control that with writeCGNSFiles flag
+
+// notes on FaceCenter Fails
+//        int ec0=cg_sol_write(F, B, Z, "BoundaryCellRank2", CGNS_ENUMV(CellCenter), &S);
+// ec0 returns 0  GOOD and ec2 below is also 0 so CellCenter works
+//        int ec1=cg_sol_write(F, B, Z, "BoundaryCellRank", CGNS_ENUMV(FaceCenter), &S);
+//  ec1 returns 1 ERROR causing ec2 to also fail since S is junk 
+//        int ec2= cgp_field_write(F, B, Z, S, CGNS_ENUMV(Integer), "BoundaryCellRank", &Fs);
+
+  if (cg_sol_write(F, B, Z, "CellRank", CGNS_ENUMV(CellCenter), &SCR)) 
+       cgp_error_exit();
+  if(o.writeCGNSFiles == 2) { // Solution
+        CGNS_NodalSolution(F,B,Z,o);
+        if (cg_sol_write(F, B, Z, "VertexRank", CGNS_ENUMV(Vertex), &SVR) )  
+            cgp_error_exit();
+        if (cg_sol_write(F, B, Z, "BoundaryVertexSrfID", CGNS_ENUMV(Vertex), &SBVS) )
+            cgp_error_exit();
+        if (cg_sol_write(F, B, Z, "BoundaryVertexRank", CGNS_ENUMV(Vertex), &SBVR) )
+            cgp_error_exit();
+  }else if(o.writeCGNSFiles == 3) { // Vertex Rank
+        if (cg_sol_write(F, B, Z, "VertexRank", CGNS_ENUMV(Vertex), &SVR) )  
+            cgp_error_exit();
+        CGNS_NodalSolution(F,B,Z,o);
+        if (cg_sol_write(F, B, Z, "BoundaryVertexSrfID", CGNS_ENUMV(Vertex), &SBVS) )
+            cgp_error_exit();
+        if (cg_sol_write(F, B, Z, "BoundaryVertexRank", CGNS_ENUMV(Vertex), &SBVR) )
+            cgp_error_exit();
+  }else if(o.writeCGNSFiles == 4) { // Boundary Vertex Rank
+        if (cg_sol_write(F, B, Z, "BoundaryVertexRank", CGNS_ENUMV(Vertex), &SBVR) )
+            cgp_error_exit();
+        if (cg_sol_write(F, B, Z, "VertexRank", CGNS_ENUMV(Vertex), &SVR) )  
+            cgp_error_exit();
+        CGNS_NodalSolution(F,B,Z,o);
+        if (cg_sol_write(F, B, Z, "BoundaryVertexSrfID", CGNS_ENUMV(Vertex), &SBVS) )
+            cgp_error_exit();
+  }else if(o.writeCGNSFiles == 5) { // Boundary Vertex SrfID
+        if (cg_sol_write(F, B, Z, "BoundaryVertexSrfID", CGNS_ENUMV(Vertex), &SBVS) )
+            cgp_error_exit();
+        if (cg_sol_write(F, B, Z, "BoundaryVertexRank", CGNS_ENUMV(Vertex), &SBVR) )
+            cgp_error_exit();
+        if (cg_sol_write(F, B, Z, "VertexRank", CGNS_ENUMV(Vertex), &SVR) )  
+            cgp_error_exit();
+        CGNS_NodalSolution(F,B,Z,o);
+  }
+  CGNS_VertexRank(F,B,Z,SVR, o);
+//  CGNS_NodalSolution(F,B,Z,o);
   // create Helper array for number of elements on rank 
   if ( cg_goto(F, B, "Zone_t", 1, NULL) ||
        cg_user_data_write("User Data") ||
@@ -1224,35 +1270,32 @@ if(0==1)  printf("Coor %d, %d, %d, \n", nCoordVec,part,Fs2);
   if ( cgp_array_write_data(Fs2, &partP1, &partP1, &nCoordVec))
        cgp_error_exit();
   cgsize_t e_written=0; 
-  if(o.writeCGNSFiles > 2) 
-    writeBlocksCGNSinteror(F,B,Z,o,&e_written);
-  if(o.writeCGNSFiles > 2) {
-    cgsize_t totBel;
-    int nblkb = o.blocks.boundary.getSize(); 
-    double** srfIDCen1 = new double*[nblkb]; // might not all be used
-    double** srfIDCen2 = new double*[nblkb];
-    int totOnRankBel=0;
-    for (int i = 0; i < nblkb; ++i) 
-      totOnRankBel += o.blocks.boundary.nElements[i];
-    int* srfID = (int *)malloc( totOnRankBel * sizeof(int));
-    int* srfID1OnBlk = (int *)malloc( nblkb * sizeof(int));
-    int* srfID2OnBlk = (int *)malloc( nblkb * sizeof(int));
-    int* startBelBlk = (int *)malloc( nblkb * sizeof(int));
-    int* endBelBlk = (int *)malloc( nblkb * sizeof(int));
-    int* srfIDidx = (int *)malloc( totOnRankBel * sizeof(int));
-    int nStackedOnRank=0;
-    writeBlocksCGNSboundary(F,B,Z,o, srfID, srfIDidx, srfIDCen1, srfIDCen2, srfID1OnBlk, srfID2OnBlk, startBelBlk, endBelBlk, &e_written, &totBel, &nStackedOnRank, nblkb);
-    writeCGNSboundary      (F,B,Z,o, srfID, srfIDidx, srfIDCen1, srfIDCen2, srfID1OnBlk, srfID2OnBlk, startBelBlk, endBelBlk, &e_written, totOnRankBel, &totBel,  nStackedOnRank);
-    free(srfID); free(srfIDidx);
-    free(srfID1OnBlk); free(srfID2OnBlk);
-    free(startBelBlk); free(endBelBlk);
-    for (int i = 0; i < nStackedOnRank; ++i) delete [] srfIDCen1[i];
-    for (int i = 0; i < nStackedOnRank; ++i) delete [] srfIDCen2[i];
-    delete [] srfIDCen1; delete [] srfIDCen2;
-    if(cgp_close(F)) cgp_error_exit();
-    double t1 = PCU_Time();
-    if (!PCU_Comm_Self())
-      lion_oprint(1,"CGNS file written in %f seconds\n", t1 - t0);
-  }
+  writeBlocksCGNSinterior(F,B,Z,SCR,o,&e_written);
+  cgsize_t totBel;
+  int nblkb = o.blocks.boundary.getSize(); 
+  double** srfIDCen1 = new double*[nblkb]; // might not all be used
+  double** srfIDCen2 = new double*[nblkb];
+  int totOnRankBel=0;
+  for (int i = 0; i < nblkb; ++i) 
+    totOnRankBel += o.blocks.boundary.nElements[i];
+  int* srfID = (int *)malloc( totOnRankBel * sizeof(int));
+  int* srfID1OnBlk = (int *)malloc( nblkb * sizeof(int));
+  int* srfID2OnBlk = (int *)malloc( nblkb * sizeof(int));
+  int* startBelBlk = (int *)malloc( nblkb * sizeof(int));
+  int* endBelBlk = (int *)malloc( nblkb * sizeof(int));
+  int* srfIDidx = (int *)malloc( totOnRankBel * sizeof(int));
+  int nStackedOnRank=0;
+  writeBlocksCGNSboundary(F,B,Z, SBVR, SBVS, o, srfID, srfIDidx, srfIDCen1, srfIDCen2, srfID1OnBlk, srfID2OnBlk, startBelBlk, endBelBlk, &e_written, &totBel, &nStackedOnRank, nblkb);
+  writeCGNSboundary      (F,B,Z,o, srfID, srfIDidx, srfIDCen1, srfIDCen2, srfID1OnBlk, srfID2OnBlk, startBelBlk, endBelBlk, &e_written, totOnRankBel, &totBel,  nStackedOnRank);
+  free(srfID); free(srfIDidx);
+  free(srfID1OnBlk); free(srfID2OnBlk);
+  free(startBelBlk); free(endBelBlk);
+  for (int i = 0; i < nStackedOnRank; ++i) delete [] srfIDCen1[i];
+  for (int i = 0; i < nStackedOnRank; ++i) delete [] srfIDCen2[i];
+  delete [] srfIDCen1; delete [] srfIDCen2;
+  if(cgp_close(F)) cgp_error_exit();
+  double t1 = PCU_Time();
+  if (!PCU_Comm_Self())
+    lion_oprint(1,"CGNS file written in %f seconds\n", t1 - t0);
 }
 } // namespace

From d24287582e5797db22cccabf52de3277199e7004 Mon Sep 17 00:00:00 2001
From: "Kenneth E. Jansen" <Kenneth.Jansen@colorado.edu>
Date: Sun, 3 Sep 2023 12:57:40 -0600
Subject: [PATCH 67/68] cleaned dead/commented code, valgrind check again,
 created more helpder functions to keep long functions under 105 lines.

---
 phasta/phCGNSgbc.cc | 195 +++++++++++++++++++-------------------------
 1 file changed, 86 insertions(+), 109 deletions(-)

diff --git a/phasta/phCGNSgbc.cc b/phasta/phCGNSgbc.cc
index f66a219ef..9e84bc6bd 100644
--- a/phasta/phCGNSgbc.cc
+++ b/phasta/phCGNSgbc.cc
@@ -157,7 +157,6 @@ void commuInt(Output& o, cgsize_t* global)
   }
          
   int itag, iacc, iother, isgbeg;
-  MPI_Datatype sevsegtype[numtask];
 //first do what ctypes does for setup
   int* isbegin;
   int* lenseg;
@@ -165,12 +164,15 @@ void commuInt(Output& o, cgsize_t* global)
   isbegin = (int*) malloc(sizeof(int) * maxseg);
   lenseg  = (int*) malloc(sizeof(int) * maxseg);
   ioffset = (int*) malloc(sizeof(int) * maxseg);
-// no VLA        MPI_Request  req[numtask];
+// no VLA  but could not figure out how to malloc so maxtask FIXME/HELP       MPI_Request  req[numtask];
 // no VLA        MPI_Status stat[numtask];
+// no VLA  MPI_Datatype sevsegtype[numtask];
   int maxtask=1000;
   assert(maxtask>=numtask);
   MPI_Request  req[maxtask];
   MPI_Status stat[maxtask];
+  MPI_Datatype sevsegtype[maxtask];
+// FIXME/HELP
   int maxfront=0;
   int lfront;
   itkbeg=0;
@@ -256,7 +258,7 @@ void gen_ncorp(Output& o )
   printf("\n");
 #endif
   local_start_id=0;
-  for(i=0;i<part;i++) //TODO: MPI_Exscan()?
+  for(i=0;i<part;i++) 
     local_start_id += owner_counts[i];
   local_start_id++; //Fortran numbering
   o.local_start_id = local_start_id;
@@ -297,7 +299,7 @@ if(0==1) {
     PCU_Barrier();
   }
 }
-free(owner_counts);
+  free(owner_counts);
 }
 
 static lcorp_t count_local(int* ilwork, int nlwork,cgsize_t* ncorp_tmp, int num_nodes)
@@ -595,6 +597,46 @@ if(0==1){
   } // end of loop over ALL topologies
   PCU_Barrier();
 }
+void writeBoundaryVertexToSol(int F,int B,int Z, int SBVR, int SBVS, Output& o, int* srfID, int part,int invC, cgsize_t e_owned, int nvC, cgsize_t* e)
+{
+  int FsR, FsS;
+  // more tricky to put something into PV to visualize the above (approximately) through vertex field
+  int* dv = (int *)malloc(o.iownnodes * sizeof(int));
+  cgsize_t start=o.local_start_id;
+  cgsize_t end=start+o.iownnodes-1;
+  if ( cgp_field_write(F, B, Z, SBVR, CGNS_ENUMV(Integer), "BoundaryVertexRank", &FsR))
+       cgp_error_exit();
+  //     create the field data for this process 
+  for (int inode = 0; inode < o.iownnodes; ++inode) dv[inode]= -1;
+  int idxl, en;
+  if(invC!=0) { 
+    for (int ibel = 0; ibel < e_owned; ++ibel){ 
+      for (int ilv=0; ilv < nvC; ilv++) {
+        en=e[ibel*nvC+ilv]; 
+        if(en>=start && en<=end) 
+          dv[en-start]= part; 
+      }
+    }
+  }
+  if (cgp_field_write_data(F, B, Z, SBVR, FsR, &start, &end, dv))
+      cgp_error_exit();
+  // more tricky to put srfID on nodes to see in PV (approximately) through vertex field
+  if ( cgp_field_write(F, B, Z, SBVS, CGNS_ENUMV(Integer), "BoundaryVertexSrfID", &FsS))
+       cgp_error_exit();
+  //     create the field data for this process 
+  for (int inode = 0; inode < o.iownnodes; ++inode) dv[inode]= -1;
+  if(invC!=0) { 
+    for (int ibel = 0; ibel < e_owned; ++ibel){ 
+      for (int ilv=0; ilv < nvC; ilv++) {
+        en=e[ibel*nvC+ilv]; 
+        if(en>=start && en<=end) 
+          dv[en-start]= srfID[ibel]; 
+      }
+    }
+  }
+  if (cgp_field_write_data(F, B, Z, SBVS, FsS, &start, &end, dv))
+      cgp_error_exit();
+}
 void writeBlocksCGNSboundary(int F,int B,int Z, int SBVR, int SBVS, Output& o, int* srfID, int* srfIDidx, double** srfIDCen1, double** srfIDCen2, int* srfID1OnBlk, int* srfID2OnBlk, int* startBelBlk, int* endBelBlk, cgsize_t *e_written, cgsize_t *totBel, int *nStackedOnRank, int nblkb)
 {
     int E,Fsb,Fsb2, nvC,nvert,nvAll,invC;
@@ -661,69 +703,9 @@ if(0==1){
     for (int ne=0; ne<std::min(nDbgCG,e_owned); ++ne) { printf("%d, %d ", part,(ne+1)); for(int nv=0; nv< nvert; ++nv) printf("%ld ", e[ne*nvert+nv]); printf("\n"); }
 }
         int idx =((*nStackedOnRank) - 1);
-
-        int FsR, FsS;
-
-//        if (cg_sol_write(F, B, Z, "BoundaryCellRank", CGNS_ENUMV(FaceCenter), &S) ||
-//            cgp_field_write(F, B, Z, S, CGNS_ENUMV(Integer), "BoundaryCellRank", &Fs))
-//            cgp_error_exit();
-/*        int ec0=cg_sol_write(F, B, Z, "BoundaryCellRank2", CGNS_ENUMV(CellCenter), &S);
-        int ec1=cg_sol_write(F, B, Z, "BoundaryCellRank", CGNS_ENUMV(FaceCenter), &S);
-        int ec2= cgp_field_write(F, B, Z, S, CGNS_ENUMV(Integer), "BoundaryCellRank", &Fs);
-      //     create the field data for this process 
-        int* d = NULL;
-        if(invC!=0){  //nvC present on my rank
-          d = (int *)malloc(e_owned * sizeof(int));
-          for (int n = 0; n < e_owned; n++) 
-            d[n] = part;
-        }
-        if (cgp_field_write_data(F, B, Z, S, Fs, &e_start, &e_end, d))
-          cgp_error_exit();
-        if(invC!=0) free(d);
-*/
-// more tricky to put something into PV to visualize the above (approximately) through vertex field
-        int* dv = (int *)malloc(o.iownnodes * sizeof(int));
-        cgsize_t start=o.local_start_id;
-        cgsize_t end=start+o.iownnodes-1;
-        if ( cgp_field_write(F, B, Z, SBVR, CGNS_ENUMV(Integer), "BoundaryVertexRank", &FsR))
-            cgp_error_exit();
-      //     create the field data for this process 
-        for (int inode = 0; inode < o.iownnodes; ++inode) dv[inode]= -1;
-        int idxl, en;
-        if(invC!=0) { 
-           for (int ibel = 0; ibel < e_owned; ++ibel){ 
-             for (int ilv=0; ilv < nvC; ilv++) {
-               en=e[ibel*nvC+ilv]; 
-               if(en>=start && en<=end) {
-                 dv[en-start]= part; 
-               }
-             }
-           }
-         }
-         if (cgp_field_write_data(F, B, Z, SBVR, FsR, &start, &end, dv))
-            cgp_error_exit();
-// more tricky to put srfID on nodes to see in PV (approximately) through vertex field
-        if ( cgp_field_write(F, B, Z, SBVS, CGNS_ENUMV(Integer), "BoundaryVertexSrfID", &FsS))
-            cgp_error_exit();
-      //     create the field data for this process 
-        for (int inode = 0; inode < o.iownnodes; ++inode) dv[inode]= -1;
-        if(invC!=0) { 
-           for (int ibel = 0; ibel < e_owned; ++ibel){ 
-             for (int ilv=0; ilv < nvC; ilv++) {
-               en=e[ibel*nvC+ilv]; 
-               if(en>=start && en<=end) {
-                 dv[en-start]= srfID[ibel]; 
-//                 printf("%d %d %d %d %d %d %d\n ", part,ibel, ilv, en, en-start, dv[en-start], srfID[ibel]);
-               }
-             }
-           }
-         }
-         if (cgp_field_write_data(F, B, Z, SBVS, FsS, &start, &end, dv))
-            cgp_error_exit();
-        free(dv);
+        writeBoundaryVertexToSol(F,B,Z, SBVR, SBVS, o, srfID, part,invC, e_owned, nvC, e);
         if(invC!=0) {
           free(e);
-//moved above          getNaturalBCCodesCGNS(o, iblkC[, &srfID[e_belWritten]);
           int icnt1=0; int icnt2=0;
           for (int ne=0; ne<e_owned; ++ne){ //count srfID =1 and 2 on this part,block
              if(srfID[e_belWritten+ne]==1) icnt1++; 
@@ -1136,7 +1118,7 @@ if(0==1) {
 }
 void CGNS_VertexRank(int F,int B,int Z, int SVR, Output& o)
 {
-  int S2,Fs2;
+  int Fs2;
   const int part = PCU_Comm_Self() ;
   cgsize_t start=o.local_start_id;
   cgsize_t end=start+o.iownnodes-1;
@@ -1148,6 +1130,42 @@ void CGNS_VertexRank(int F,int B,int Z, int SVR, Output& o)
           cgp_error_exit();
   free(d);
 }
+void  CirculateSolWriteOrder(int F,int B,int Z,int *SVR,int *SBVS,int *SBVR, Output&o)
+{
+  if(o.writeCGNSFiles == 2) { // Solution
+        CGNS_NodalSolution(F,B,Z,o);
+        if (cg_sol_write(F, B, Z, "VertexRank", CGNS_ENUMV(Vertex), SVR) )  
+            cgp_error_exit();
+        if (cg_sol_write(F, B, Z, "BoundaryVertexSrfID", CGNS_ENUMV(Vertex), SBVS) )
+            cgp_error_exit();
+        if (cg_sol_write(F, B, Z, "BoundaryVertexRank", CGNS_ENUMV(Vertex), SBVR) )
+            cgp_error_exit();
+  }else if(o.writeCGNSFiles == 3) { // Vertex Rank
+        if (cg_sol_write(F, B, Z, "VertexRank", CGNS_ENUMV(Vertex), SVR) )  
+            cgp_error_exit();
+        CGNS_NodalSolution(F,B,Z,o);
+        if (cg_sol_write(F, B, Z, "BoundaryVertexSrfID", CGNS_ENUMV(Vertex), SBVS) )
+            cgp_error_exit();
+        if (cg_sol_write(F, B, Z, "BoundaryVertexRank", CGNS_ENUMV(Vertex), SBVR) )
+            cgp_error_exit();
+  }else if(o.writeCGNSFiles == 4) { // Boundary Vertex Rank
+        if (cg_sol_write(F, B, Z, "BoundaryVertexRank", CGNS_ENUMV(Vertex), SBVR) )
+            cgp_error_exit();
+        if (cg_sol_write(F, B, Z, "VertexRank", CGNS_ENUMV(Vertex), SVR) )  
+            cgp_error_exit();
+        CGNS_NodalSolution(F,B,Z,o);
+        if (cg_sol_write(F, B, Z, "BoundaryVertexSrfID", CGNS_ENUMV(Vertex), SBVS) )
+            cgp_error_exit();
+  }else if(o.writeCGNSFiles == 5) { // Boundary Vertex SrfID
+        if (cg_sol_write(F, B, Z, "BoundaryVertexSrfID", CGNS_ENUMV(Vertex), SBVS) )
+            cgp_error_exit();
+        if (cg_sol_write(F, B, Z, "BoundaryVertexRank", CGNS_ENUMV(Vertex), SBVR) )
+            cgp_error_exit();
+        if (cg_sol_write(F, B, Z, "VertexRank", CGNS_ENUMV(Vertex), SVR) )  
+            cgp_error_exit();
+        CGNS_NodalSolution(F,B,Z,o);
+  }
+}
 void writeCGNS(Output& o, std::string path)
 {
   double t0 = PCU_Time();
@@ -1211,52 +1229,11 @@ if(0==1){
     // create data nodes for coordinates 
   cg_set_file_type(CG_FILE_HDF5);
   CGNS_Coordinates(F,B,Z, o);
-// Paraview will only viz the first sol node created so control that with writeCGNSFiles flag
-
-// notes on FaceCenter Fails
-//        int ec0=cg_sol_write(F, B, Z, "BoundaryCellRank2", CGNS_ENUMV(CellCenter), &S);
-// ec0 returns 0  GOOD and ec2 below is also 0 so CellCenter works
-//        int ec1=cg_sol_write(F, B, Z, "BoundaryCellRank", CGNS_ENUMV(FaceCenter), &S);
-//  ec1 returns 1 ERROR causing ec2 to also fail since S is junk 
-//        int ec2= cgp_field_write(F, B, Z, S, CGNS_ENUMV(Integer), "BoundaryCellRank", &Fs);
-
   if (cg_sol_write(F, B, Z, "CellRank", CGNS_ENUMV(CellCenter), &SCR)) 
        cgp_error_exit();
-  if(o.writeCGNSFiles == 2) { // Solution
-        CGNS_NodalSolution(F,B,Z,o);
-        if (cg_sol_write(F, B, Z, "VertexRank", CGNS_ENUMV(Vertex), &SVR) )  
-            cgp_error_exit();
-        if (cg_sol_write(F, B, Z, "BoundaryVertexSrfID", CGNS_ENUMV(Vertex), &SBVS) )
-            cgp_error_exit();
-        if (cg_sol_write(F, B, Z, "BoundaryVertexRank", CGNS_ENUMV(Vertex), &SBVR) )
-            cgp_error_exit();
-  }else if(o.writeCGNSFiles == 3) { // Vertex Rank
-        if (cg_sol_write(F, B, Z, "VertexRank", CGNS_ENUMV(Vertex), &SVR) )  
-            cgp_error_exit();
-        CGNS_NodalSolution(F,B,Z,o);
-        if (cg_sol_write(F, B, Z, "BoundaryVertexSrfID", CGNS_ENUMV(Vertex), &SBVS) )
-            cgp_error_exit();
-        if (cg_sol_write(F, B, Z, "BoundaryVertexRank", CGNS_ENUMV(Vertex), &SBVR) )
-            cgp_error_exit();
-  }else if(o.writeCGNSFiles == 4) { // Boundary Vertex Rank
-        if (cg_sol_write(F, B, Z, "BoundaryVertexRank", CGNS_ENUMV(Vertex), &SBVR) )
-            cgp_error_exit();
-        if (cg_sol_write(F, B, Z, "VertexRank", CGNS_ENUMV(Vertex), &SVR) )  
-            cgp_error_exit();
-        CGNS_NodalSolution(F,B,Z,o);
-        if (cg_sol_write(F, B, Z, "BoundaryVertexSrfID", CGNS_ENUMV(Vertex), &SBVS) )
-            cgp_error_exit();
-  }else if(o.writeCGNSFiles == 5) { // Boundary Vertex SrfID
-        if (cg_sol_write(F, B, Z, "BoundaryVertexSrfID", CGNS_ENUMV(Vertex), &SBVS) )
-            cgp_error_exit();
-        if (cg_sol_write(F, B, Z, "BoundaryVertexRank", CGNS_ENUMV(Vertex), &SBVR) )
-            cgp_error_exit();
-        if (cg_sol_write(F, B, Z, "VertexRank", CGNS_ENUMV(Vertex), &SVR) )  
-            cgp_error_exit();
-        CGNS_NodalSolution(F,B,Z,o);
-  }
+// Paraview will only viz the first sol node created so control that with writeCGNSFiles flag
+  CirculateSolWriteOrder(F,B,Z,&SVR,&SBVS,&SBVR,o);
   CGNS_VertexRank(F,B,Z,SVR, o);
-//  CGNS_NodalSolution(F,B,Z,o);
   // create Helper array for number of elements on rank 
   if ( cg_goto(F, B, "Zone_t", 1, NULL) ||
        cg_user_data_write("User Data") ||

From e65dd2cc0b8233933f28dc62418f31af68a6f06f Mon Sep 17 00:00:00 2001
From: "Kenneth E. Jansen" <Kenneth.Jansen@colorado.edu>
Date: Thu, 6 Jun 2024 13:18:29 -0600
Subject: [PATCH 68/68] _rebase was a trainwreck  getting things from there
 back onto this branch

---
 phasta/phCGNSgbc.cc | 25 ++++++++++++++-----------
 1 file changed, 14 insertions(+), 11 deletions(-)

diff --git a/phasta/phCGNSgbc.cc b/phasta/phCGNSgbc.cc
index 9e84bc6bd..e86eaf227 100644
--- a/phasta/phCGNSgbc.cc
+++ b/phasta/phCGNSgbc.cc
@@ -164,15 +164,12 @@ void commuInt(Output& o, cgsize_t* global)
   isbegin = (int*) malloc(sizeof(int) * maxseg);
   lenseg  = (int*) malloc(sizeof(int) * maxseg);
   ioffset = (int*) malloc(sizeof(int) * maxseg);
-// no VLA  but could not figure out how to malloc so maxtask FIXME/HELP       MPI_Request  req[numtask];
-// no VLA        MPI_Status stat[numtask];
-// no VLA  MPI_Datatype sevsegtype[numtask];
-  int maxtask=1000;
-  assert(maxtask>=numtask);
-  MPI_Request  req[maxtask];
-  MPI_Status stat[maxtask];
-  MPI_Datatype sevsegtype[maxtask];
-// FIXME/HELP
+  MPI_Request* req;
+  req = (MPI_Request*) malloc(sizeof(MPI_Request) * numtask); 
+  MPI_Status* stat;
+  stat = (MPI_Status*) malloc(sizeof(MPI_Status) * numtask); 
+  MPI_Datatype* sevsegtype;
+  sevsegtype = (MPI_Datatype*) malloc(sizeof(MPI_Datatype) * numtask); 
   int maxfront=0;
   int lfront;
   itkbeg=0;
@@ -1223,8 +1220,14 @@ if(0==1){
   sizes[2]=0;
   if(cgp_mpi_comm(MPI_COMM_WORLD)) cgp_error_exit;
   if ( cgp_open(outfile, CG_MODE_WRITE, &F) ||
-       cg_base_write(F, "Base", 3, 3, &B) ||
-       cg_zone_write(F, B, "Zone", sizes, CGNS_ENUMV(Unstructured), &Z))
+       cg_base_write(F, "Base", 3, 3, &B) )
+       cgp_error_exit();
+  if ( cg_goto(F,B,"end"))
+       cgp_error_exit();
+  if ( cg_dataclass_write(CGNS_ENUMV(Dimensional)))
+       cgp_error_exit();
+  cg_units_write(CGNS_ENUMV(Kilogram),CGNS_ENUMV(Meter),CGNS_ENUMV(Second),CGNS_ENUMV(Kelvin),CGNS_ENUMV(Degree));
+  if ( cg_zone_write(F, B, "Zone", sizes, CGNS_ENUMV(Unstructured), &Z))
        cgp_error_exit();
     // create data nodes for coordinates 
   cg_set_file_type(CG_FILE_HDF5);