From 6aed9fb0ca7a59481102f5bd08d7407cdc41122a Mon Sep 17 00:00:00 2001 From: cosunae Date: Tue, 10 Nov 2020 22:07:21 +0100 Subject: [PATCH 01/24] just fix some confusing naming --- dawn/src/dawn/IIR/DependencyGraph.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/dawn/src/dawn/IIR/DependencyGraph.h b/dawn/src/dawn/IIR/DependencyGraph.h index 633645bc9..b814b417b 100644 --- a/dawn/src/dawn/IIR/DependencyGraph.h +++ b/dawn/src/dawn/IIR/DependencyGraph.h @@ -68,7 +68,9 @@ class DependencyGraph { }; protected: + // map of Value (i.e. normally accessID to Vertex object std::unordered_map vertices_; + // adjacencyList for each vertex where the position within the vector is the vertexID std::vector adjacencyList_; public: @@ -96,8 +98,8 @@ class DependencyGraph { DependencyGraph() = default; /// @brief Insert a new node - Vertex& insertNode(int ID) { - auto [iter, inserted] = vertices_.emplace(ID, Vertex{adjacencyList_.size(), ID}); + Vertex& insertNode(int Value) { + auto [iter, inserted] = vertices_.emplace(Value, Vertex{adjacencyList_.size(), Value}); if(inserted) adjacencyList_.push_back(EdgeList()); return iter->second; From ebf86c6e142e76a553f97b897a9684e1aa7ff571 Mon Sep 17 00:00:00 2001 From: cosunae Date: Tue, 10 Nov 2020 22:10:04 +0100 Subject: [PATCH 02/24] adding check for in and outdegree --- dawn/src/dawn/IIR/DependencyGraphAccesses.cpp | 26 +++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/dawn/src/dawn/IIR/DependencyGraphAccesses.cpp b/dawn/src/dawn/IIR/DependencyGraphAccesses.cpp index cb4445a5c..f418c7d53 100644 --- a/dawn/src/dawn/IIR/DependencyGraphAccesses.cpp +++ b/dawn/src/dawn/IIR/DependencyGraphAccesses.cpp @@ -245,6 +245,32 @@ bool DependencyGraphAccesses::isDAG() const { return true; } +bool DependencyGraphAccesses::hasZeroOutdegreeNodes() const { + auto partitions = partitionInSubGraphs(); + std::vector vertices; + + for(std::set& partition : partitions) { + getOutputVertexIDsImpl( + *this, partition, [](std::size_t VertexID) { return VertexID; }, vertices); + if(vertices.empty()) + return false; + } + return true; +} + +bool DependencyGraphAccesses::hasZeroIndegreeNodes() const { + auto partitions = partitionInSubGraphs(); + std::vector vertices; + + for(std::set& partition : partitions) { + getInputVertexIDsImpl( + *this, partition, [](std::size_t VertexID) { return VertexID; }, vertices); + if(vertices.empty()) + return false; + } + return true; +} + std::vector DependencyGraphAccesses::getOutputVertexIDs() const { std::vector outputVertexIDs; getOutputVertexIDsImpl( From 12363ea3247a4e443159fe8f0db2e25c6c9d2620 Mon Sep 17 00:00:00 2001 From: cosunae Date: Tue, 10 Nov 2020 22:10:11 +0100 Subject: [PATCH 03/24] adding check for in and outdegree --- dawn/src/dawn/IIR/DependencyGraphAccesses.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/dawn/src/dawn/IIR/DependencyGraphAccesses.h b/dawn/src/dawn/IIR/DependencyGraphAccesses.h index fc0c8cc7b..08157491d 100644 --- a/dawn/src/dawn/IIR/DependencyGraphAccesses.h +++ b/dawn/src/dawn/IIR/DependencyGraphAccesses.h @@ -127,6 +127,11 @@ class DependencyGraphAccesses /// In our context, a DAG is defined as having a non-empty set of input as well as output nodes. bool isDAG() const; + /// @brief true if graph has nodes with indegree=0 + bool hasZeroIndegreeNodes() const; + /// @brief true if graph has nodes with outdegree=0 + bool hasZeroOutdegreeNodes() const; + /// @brief Get the VertexIDs of the pure `output` vertices /// /// Output vertices are vertices which do not have incoming edges from other vertices. From f9301101f5850e26e717515d7d5edd7fc027559b Mon Sep 17 00:00:00 2001 From: cosunae Date: Tue, 10 Nov 2020 22:15:43 +0100 Subject: [PATCH 04/24] remove attempt to fix graphs with no zero indegree or outdegree nodes --- dawn/src/dawn/Optimizer/PassFieldVersioning.cpp | 17 +---------------- .../dawn/Optimizer/ReadBeforeWriteConflict.cpp | 7 +++++++ 2 files changed, 8 insertions(+), 16 deletions(-) diff --git a/dawn/src/dawn/Optimizer/PassFieldVersioning.cpp b/dawn/src/dawn/Optimizer/PassFieldVersioning.cpp index 58deaaf70..c1ff71ce3 100644 --- a/dawn/src/dawn/Optimizer/PassFieldVersioning.cpp +++ b/dawn/src/dawn/Optimizer/PassFieldVersioning.cpp @@ -138,6 +138,7 @@ bool PassFieldVersioning::run( newGraph = oldGraph; newGraph.insertStatement(stmt); } + doMethod.update(iir::NodeUpdateType::level); } stage.update(iir::NodeUpdateType::level); @@ -211,22 +212,6 @@ PassFieldVersioning::RCKind PassFieldVersioning::fixRaceCondition( } } - // If we only have non-stencil SCCs and there are no input and output fields (i.e we don't have a - // DAG) we have to break (by renaming) one of the SCCs to get a DAG. For example: - // - // field_a = field_b; - // field_b = field_a; - // - // needs to be renamed to - // - // field_a = field_b_0; - // field_b = field_a; - // - // ... and then field_b_0 must be initialized from field_b. - if(stencilSCCs->empty() && !SCCs->empty() && !graph.isDAG()) { - stencilSCCs->emplace_back(std::move(SCCs->front())); - } - if(stencilSCCs->empty()) return RCKind::Nothing; diff --git a/dawn/src/dawn/Optimizer/ReadBeforeWriteConflict.cpp b/dawn/src/dawn/Optimizer/ReadBeforeWriteConflict.cpp index c5916bc38..d5072af6b 100644 --- a/dawn/src/dawn/Optimizer/ReadBeforeWriteConflict.cpp +++ b/dawn/src/dawn/Optimizer/ReadBeforeWriteConflict.cpp @@ -42,6 +42,10 @@ class ReadBeforeWriteConflictDetector { ReadBeforeWriteConflict check() const { std::vector nodesToVisit = graph_.getOutputVertexIDs(); + // if the graph does not have nodes with outdegree=0, there is no seed to start the algorithm + // in this case, there is at least one SCC, which means we can start from any node of the graph + if(nodesToVisit.empty() ) nodesToVisit.push_back(0); + DAWN_ASSERT_MSG(!nodesToVisit.empty(), "invalid graph (probably contains cycles!)"); ReadBeforeWriteConflict conflict; @@ -77,6 +81,9 @@ class ReadBeforeWriteConflictDetector { else visitedNodes.insert(curNode); + DAWN_ASSERT_MSG((adjacencyList.size() > curNode), "out of bounds access to adjacency list of graph"); + + // Follow edges of the current node if(!adjacencyList[curNode].empty()) { for(const auto& edge : adjacencyList[curNode]) { From 44770e627cfd09ed9ff511345249d6f8a8aef997 Mon Sep 17 00:00:00 2001 From: cosunae Date: Tue, 10 Nov 2020 23:17:35 +0100 Subject: [PATCH 05/24] update ref --- .../data/generate_versioned_field_ref.cpp | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/dawn/test/integration-test/dawn4py-tests/data/generate_versioned_field_ref.cpp b/dawn/test/integration-test/dawn4py-tests/data/generate_versioned_field_ref.cpp index f6d395776..b8283423e 100644 --- a/dawn/test/integration-test/dawn4py-tests/data/generate_versioned_field_ref.cpp +++ b/dawn/test/integration-test/dawn4py-tests/data/generate_versioned_field_ref.cpp @@ -21,11 +21,10 @@ class generate_versioned_field { ::dawn::edge_field_t& m_c; ::dawn::edge_field_t& m_d; ::dawn::edge_field_t& m_e; - ::dawn::edge_field_t& m_c_0; ::dawn::unstructured_domain m_unstructured_domain ; public: - stencil_37(::dawn::mesh_t const &mesh, int k_size, ::dawn::edge_field_t&a, ::dawn::edge_field_t&b, ::dawn::edge_field_t&c, ::dawn::edge_field_t&d, ::dawn::edge_field_t&e, ::dawn::edge_field_t&c_0) : m_mesh(mesh), m_k_size(k_size), m_a(a), m_b(b), m_c(c), m_d(d), m_e(e), m_c_0(c_0){} + stencil_37(::dawn::mesh_t const &mesh, int k_size, ::dawn::edge_field_t&a, ::dawn::edge_field_t&b, ::dawn::edge_field_t&c, ::dawn::edge_field_t&d, ::dawn::edge_field_t&e) : m_mesh(mesh), m_k_size(k_size), m_a(a), m_b(b), m_c(c), m_d(d), m_e(e){} ~stencil_37() { } @@ -37,18 +36,14 @@ class generate_versioned_field { static constexpr ::dawn::driver::unstructured_extent c_extent = {false, 0,0}; static constexpr ::dawn::driver::unstructured_extent d_extent = {false, 0,0}; static constexpr ::dawn::driver::unstructured_extent e_extent = {false, 0,0}; - static constexpr ::dawn::driver::unstructured_extent c_0_extent = {false, 0,0}; void run() { using ::dawn::deref; { for(int k = 0+0; k <= ( m_k_size == 0 ? 0 : (m_k_size - 1)) + 0+0; ++k) { for(auto const& loc : getEdges(LibTag{}, m_mesh)) { -m_c_0(deref(LibTag{}, loc), (k + 0)) = m_c(deref(LibTag{}, loc), (k + 0)); - } }}{ - for(int k = 0+0; k <= ( m_k_size == 0 ? 0 : (m_k_size - 1)) + 0+0; ++k) { - for(auto const& loc : getEdges(LibTag{}, m_mesh)) { -m_a(deref(LibTag{}, loc), (k + 0)) = ((m_b(deref(LibTag{}, loc), (k + 0)) / m_c_0(deref(LibTag{}, loc), (k + 0))) + (::dawn::float_type) 5); +m_a(deref(LibTag{}, loc), (k + 0)) = ((m_b(deref(LibTag{}, loc), (k + 0)) / m_c(deref(LibTag{}, loc), (k + 0))) + (::dawn::float_type) 5); + } for(auto const& loc : getEdges(LibTag{}, m_mesh)) { if(m_d(deref(LibTag{}, loc), (k + 0))) { m_a(deref(LibTag{}, loc), (k + 0)) = m_b(deref(LibTag{}, loc), (k + 0)); @@ -70,13 +65,12 @@ else generate_versioned_field(const generate_versioned_field&) = delete; // Members - ::dawn::edge_field_t m_c_0; void set_splitter_index(::dawn::LocationType loc, ::dawn::UnstructuredIterationSpace space, int offset, int index) { m_stencil_37.m_unstructured_domain.set_splitter_index({loc, space, offset}, index); } - generate_versioned_field(const ::dawn::mesh_t &mesh, int k_size, ::dawn::edge_field_t& a, ::dawn::edge_field_t& b, ::dawn::edge_field_t& c, ::dawn::edge_field_t& d, ::dawn::edge_field_t& e) : m_stencil_37(mesh, k_size,a,b,c,d,e,m_c_0), m_c_0(allocateFieldLike(LibTag{}, c)){} + generate_versioned_field(const ::dawn::mesh_t &mesh, int k_size, ::dawn::edge_field_t& a, ::dawn::edge_field_t& b, ::dawn::edge_field_t& c, ::dawn::edge_field_t& d, ::dawn::edge_field_t& e) : m_stencil_37(mesh, k_size,a,b,c,d,e){} void run() { m_stencil_37.run(); From 949d86d07cd851f6150b26de65048ed89b0afcb4 Mon Sep 17 00:00:00 2001 From: mroethlin Date: Wed, 8 Dec 2021 09:24:59 +0100 Subject: [PATCH 06/24] some hot fixes for the psy2dawn project --- dawn/src/dawn/CodeGen/Cuda/MSCodeGen.cpp | 13 +++++++++++-- dawn/src/driver-includes/math.hpp | 5 +++++ gtclang/src/gtclang_dsl_defs/math.hpp | 3 +++ 3 files changed, 19 insertions(+), 2 deletions(-) diff --git a/dawn/src/dawn/CodeGen/Cuda/MSCodeGen.cpp b/dawn/src/dawn/CodeGen/Cuda/MSCodeGen.cpp index 84f682818..58e39c37f 100644 --- a/dawn/src/dawn/CodeGen/Cuda/MSCodeGen.cpp +++ b/dawn/src/dawn/CodeGen/Cuda/MSCodeGen.cpp @@ -726,7 +726,7 @@ void MSCodeGen::generateCudaKernelCode() { } if(globalNames_.find("checkOffset") == globalNames_.end()) { - MemberFunction offsetFunc("__device__ bool", "checkOffset", ss_); + MemberFunction offsetFunc("__device__ static bool", "checkOffset", ss_); offsetFunc.addArg("unsigned int min"); offsetFunc.addArg("unsigned int max"); offsetFunc.addArg("unsigned int val"); @@ -1082,9 +1082,18 @@ void MSCodeGen::generateCudaKernelCode() { for(auto [index, interval] : enumerate(stage->getIterationSpace())) { if(interval.has_value()) { std::string arrName = prefix + iterators.at(index) + "Indices"; + std::string offset; + if (iterators[index] == 'I') { + offset = "blockIdx.x * " + std::to_string(ntx) + " + " + std::string{(char) std::tolower(iterators.at(index))} + "block"; + } + + if (iterators[index] == 'J') { + offset = "blockIdx.y * " + std::to_string(nty) + " + " + std::string{(char) std::tolower(iterators.at(index))} + "block"; + } + guard += " && checkOffset(" + arrName + "_[0], " + arrName + "_[1], globalOffsets_[" + std::to_string(index) + "] + " + - (char)std::tolower(iterators.at(index)) + "block)"; + "(" + offset +"))"; } } } diff --git a/dawn/src/driver-includes/math.hpp b/dawn/src/driver-includes/math.hpp index ce3518a1b..dfc9f2e80 100644 --- a/dawn/src/driver-includes/math.hpp +++ b/dawn/src/driver-includes/math.hpp @@ -253,6 +253,11 @@ GT_FUNCTION T isnan(const T x) { return std::isnan(x); } + +template +GT_FUNCTION T sign(const T val) { + return (T(0) < val) - (val < T(0));; +} /** @} */ } // namespace math } // namespace dawn diff --git a/gtclang/src/gtclang_dsl_defs/math.hpp b/gtclang/src/gtclang_dsl_defs/math.hpp index a56427efa..60e7061ad 100644 --- a/gtclang/src/gtclang_dsl_defs/math.hpp +++ b/gtclang/src/gtclang_dsl_defs/math.hpp @@ -134,6 +134,9 @@ T exp(const T arg); template T log(const T x); +template +T sign(const T x); + /** @} */ } // namespace math } // namespace dsl From b81f35f94ef22cb476cb97ea0500fcc5ec1eb6a9 Mon Sep 17 00:00:00 2001 From: mroethlin Date: Fri, 10 Dec 2021 11:44:25 +0100 Subject: [PATCH 07/24] add test, update old test --- .../dawn/CodeGen/reference/global_indexing.cu | 4 +- .../integration-test/CodeGen/CMakeLists.txt | 1 + .../CodeGen/copy_stencil_benchmark.cpp | 2 +- .../CodeGen/iteration_space_stencil.cpp | 32 ++++++++++ .../iteration_space_stencil_benchmark.cpp | 61 +++++++++++++++++++ .../iteration_space_stencil_benchmark.cu | 1 + 6 files changed, 98 insertions(+), 3 deletions(-) create mode 100644 gtclang/test/integration-test/CodeGen/iteration_space_stencil.cpp create mode 100644 gtclang/test/integration-test/CodeGen/iteration_space_stencil_benchmark.cpp create mode 100644 gtclang/test/integration-test/CodeGen/iteration_space_stencil_benchmark.cu diff --git a/dawn/test/unit-test/dawn/CodeGen/reference/global_indexing.cu b/dawn/test/unit-test/dawn/CodeGen/reference/global_indexing.cu index 3ea53ecfc..8352d6e3c 100644 --- a/dawn/test/unit-test/dawn/CodeGen/reference/global_indexing.cu +++ b/dawn/test/unit-test/dawn/CodeGen/reference/global_indexing.cu @@ -41,7 +41,7 @@ namespace dawn_generated { namespace cuda { __constant__ int stage14GlobalJIndices_[2]; __constant__ unsigned globalOffsets_[2]; -__device__ bool checkOffset(unsigned int min, unsigned int max, unsigned int val) { +__device__ static bool checkOffset(unsigned int min, unsigned int max, unsigned int val) { return (min <= val && val < max); } __global__ void __launch_bounds__(128) @@ -108,7 +108,7 @@ __global__ void __launch_bounds__(128) if(iblock >= 0 && iblock <= block_size_i - 1 + 0 && jblock >= 0 && jblock <= block_size_j - 1 + 0 && checkOffset(stage14GlobalJIndices_[0], stage14GlobalJIndices_[1], - globalOffsets_[1] + jblock)) { + globalOffsets_[1] + (blockIdx.y * 4 + jblock))) { { out_field[idx111] = (int)10; } diff --git a/gtclang/test/integration-test/CodeGen/CMakeLists.txt b/gtclang/test/integration-test/CodeGen/CMakeLists.txt index 97d3f7e38..05dd16a65 100644 --- a/gtclang/test/integration-test/CodeGen/CMakeLists.txt +++ b/gtclang/test/integration-test/CodeGen/CMakeLists.txt @@ -196,3 +196,4 @@ add_codegen_test(TEST kcache_fill_kparallel PLAIN_CUDA_ONLY) add_codegen_test(TEST kcache_fill_backward PLAIN_CUDA_ONLY) add_codegen_test(TEST kcache_flush FLAGS -fmultistage-merger PLAIN_CUDA_ONLY) add_codegen_test(TEST kcache_epflush FLAGS -fmultistage-merger PLAIN_CUDA_ONLY) +add_codegen_test(TEST iteration_space_stencil FLAGS -max-halo-size=0 PLAIN_CUDA_ONLY) \ No newline at end of file diff --git a/gtclang/test/integration-test/CodeGen/copy_stencil_benchmark.cpp b/gtclang/test/integration-test/CodeGen/copy_stencil_benchmark.cpp index 9d79d8002..5f3485453 100644 --- a/gtclang/test/integration-test/CodeGen/copy_stencil_benchmark.cpp +++ b/gtclang/test/integration-test/CodeGen/copy_stencil_benchmark.cpp @@ -46,7 +46,7 @@ TEST(copy_stencil, test) { verifier verif(dom); - meta_data_t meta_data(dom.isize(), dom.jsize(), dom.ksize() + 1); + meta_data_t meta_data(dom.isize(), dom.jsize(), dom.ksize()); storage_t in(meta_data, "in"), out_gt(meta_data, "out-gt"), out_naive(meta_data, "out-naive"); verif.fillMath(8.0, 2.0, 1.5, 1.5, 2.0, 4.0, in); diff --git a/gtclang/test/integration-test/CodeGen/iteration_space_stencil.cpp b/gtclang/test/integration-test/CodeGen/iteration_space_stencil.cpp new file mode 100644 index 000000000..197ad0dc6 --- /dev/null +++ b/gtclang/test/integration-test/CodeGen/iteration_space_stencil.cpp @@ -0,0 +1,32 @@ +//===--------------------------------------------------------------------------------*- C++ -*-===// +// _ _ +// | | | | +// __ _| |_ ___| | __ _ _ __ __ _ +// / _` | __/ __| |/ _` | '_ \ / _` | +// | (_| | || (__| | (_| | | | | (_| | +// \__, |\__\___|_|\__,_|_| |_|\__, | - GridTools Clang DSL +// __/ | __/ | +// |___/ |___/ +// +// +// This file is distributed under the MIT License (MIT). +// See LICENSE.txt for details. +// +//===------------------------------------------------------------------------------------------===// + +#include "gtclang_dsl_defs/gtclang_dsl.hpp" +using namespace gtclang::dsl; + +stencil iteration_space_stencil { + storage out; + + Do { + vertical_region(k_start, k_end) { + out = 0; + } + + iteration_space(i_start + 1, i_end-1, j_start + 1, j_end - 1, k_start + 1, k_end - 1) { + out = 1; + } + } +}; diff --git a/gtclang/test/integration-test/CodeGen/iteration_space_stencil_benchmark.cpp b/gtclang/test/integration-test/CodeGen/iteration_space_stencil_benchmark.cpp new file mode 100644 index 000000000..10392a8e9 --- /dev/null +++ b/gtclang/test/integration-test/CodeGen/iteration_space_stencil_benchmark.cpp @@ -0,0 +1,61 @@ +//===--------------------------------------------------------------------------------*- C++ -*-===// +// _ _ +// | | | | +// __ _| |_ ___| | __ _ _ __ __ _ +// / _` | __/ __| |/ _` | '_ \ / _` | +// | (_| | || (__| | (_| | | | | (_| | +// \__, |\__\___|_|\__,_|_| |_|\__, | - GridTools Clang DSL +// __/ | __/ | +// |___/ |___/ +// +// +// This file is distributed under the MIT License (MIT). +// See LICENSE.txt for details. +// +//===------------------------------------------------------------------------------------------===// +#define DAWN_GENERATED 1 +#define GRIDTOOLS_DAWN_HALO_EXTENT 0 +#define GT_VECTOR_LIMIT_SIZE 30 + +#undef FUSION_MAX_VECTOR_SIZE +#undef FUSION_MAX_MAP_SIZE +#define FUSION_MAX_VECTOR_SIZE GT_VECTOR_LIMIT_SIZE +#define FUSION_MAX_MAP_SIZE FUSION_MAX_VECTOR_SIZE +#define BOOST_MPL_LIMIT_VECTOR_SIZE FUSION_MAX_VECTOR_SIZE +#define BOOST_MPL_CFG_NO_PREPROCESSED_HEADERS + +#include +#include "test/integration-test/CodeGen/Macros.hpp" +#include "driver-includes/verify.hpp" +#include "test/integration-test/CodeGen/Options.hpp" +#include "test/integration-test/CodeGen/generated/iteration_space_stencil_c++-naive.cpp" + +#ifndef OPTBACKEND +#define OPTBACKEND gt +#endif + +// clang-format off +#include INCLUDE_FILE(test/integration-test/CodeGen/generated/iteration_space_stencil_,OPTBACKEND.cpp) +// clang-format on + +using namespace dawn; +TEST(iteration_space_stencil, test) { + domain dom(Options::getInstance().m_size[0], Options::getInstance().m_size[1], + Options::getInstance().m_size[2]); + dom.set_halos(halo::value, halo::value, halo::value, halo::value, 0, 0); + + verifier verif(dom); + + meta_data_t meta_data(dom.isize(), dom.jsize(), dom.ksize() + 1); + storage_t out_gt(meta_data, "out-gt"), out_naive(meta_data, "out-naive"); + + verif.fill(-1.0, out_gt, out_naive); + + dawn_generated::OPTBACKEND::iteration_space_stencil iteration_space_gt(dom); + dawn_generated::cxxnaive::iteration_space_stencil iteration_space_naive(dom); + + iteration_space_gt.run(out_gt); + iteration_space_naive.run(out_naive); + + ASSERT_TRUE(verif.verify(out_gt, out_naive)); +} diff --git a/gtclang/test/integration-test/CodeGen/iteration_space_stencil_benchmark.cu b/gtclang/test/integration-test/CodeGen/iteration_space_stencil_benchmark.cu new file mode 100644 index 000000000..5930037eb --- /dev/null +++ b/gtclang/test/integration-test/CodeGen/iteration_space_stencil_benchmark.cu @@ -0,0 +1 @@ +#include "iteration_space_stencil_benchmark.cpp" \ No newline at end of file From 4ade9037c536b682fc5f018875b1563855c2e60b Mon Sep 17 00:00:00 2001 From: mroethlin Date: Fri, 10 Dec 2021 11:46:08 +0100 Subject: [PATCH 08/24] revert copy stencil benchmark --- .../test/integration-test/CodeGen/copy_stencil_benchmark.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gtclang/test/integration-test/CodeGen/copy_stencil_benchmark.cpp b/gtclang/test/integration-test/CodeGen/copy_stencil_benchmark.cpp index 5f3485453..9d79d8002 100644 --- a/gtclang/test/integration-test/CodeGen/copy_stencil_benchmark.cpp +++ b/gtclang/test/integration-test/CodeGen/copy_stencil_benchmark.cpp @@ -46,7 +46,7 @@ TEST(copy_stencil, test) { verifier verif(dom); - meta_data_t meta_data(dom.isize(), dom.jsize(), dom.ksize()); + meta_data_t meta_data(dom.isize(), dom.jsize(), dom.ksize() + 1); storage_t in(meta_data, "in"), out_gt(meta_data, "out-gt"), out_naive(meta_data, "out-naive"); verif.fillMath(8.0, 2.0, 1.5, 1.5, 2.0, 4.0, in); From 5aed526c49c912e7b0a8d9822d537663304bee3f Mon Sep 17 00:00:00 2001 From: mroethlin Date: Mon, 13 Dec 2021 11:58:10 +0100 Subject: [PATCH 09/24] fix codegen bug, add test --- dawn/src/dawn/CodeGen/Cuda/MSCodeGen.cpp | 36 ++++++----- .../integration-test/CodeGen/CMakeLists.txt | 3 +- ...cil.cpp => iteration_space_stencil_01.cpp} | 2 +- ... iteration_space_stencil_01_benchmark.cpp} | 10 +-- .../iteration_space_stencil_01_benchmark.cu | 1 + .../CodeGen/iteration_space_stencil_02.cpp | 36 +++++++++++ .../iteration_space_stencil_02_benchmark.cpp | 61 +++++++++++++++++++ .../iteration_space_stencil_02_benchmark.cu | 1 + .../iteration_space_stencil_benchmark.cu | 1 - 9 files changed, 124 insertions(+), 27 deletions(-) rename gtclang/test/integration-test/CodeGen/{iteration_space_stencil.cpp => iteration_space_stencil_01.cpp} (96%) rename gtclang/test/integration-test/CodeGen/{iteration_space_stencil_benchmark.cpp => iteration_space_stencil_01_benchmark.cpp} (87%) create mode 100644 gtclang/test/integration-test/CodeGen/iteration_space_stencil_01_benchmark.cu create mode 100644 gtclang/test/integration-test/CodeGen/iteration_space_stencil_02.cpp create mode 100644 gtclang/test/integration-test/CodeGen/iteration_space_stencil_02_benchmark.cpp create mode 100644 gtclang/test/integration-test/CodeGen/iteration_space_stencil_02_benchmark.cu delete mode 100644 gtclang/test/integration-test/CodeGen/iteration_space_stencil_benchmark.cu diff --git a/dawn/src/dawn/CodeGen/Cuda/MSCodeGen.cpp b/dawn/src/dawn/CodeGen/Cuda/MSCodeGen.cpp index 58e39c37f..1518b26db 100644 --- a/dawn/src/dawn/CodeGen/Cuda/MSCodeGen.cpp +++ b/dawn/src/dawn/CodeGen/Cuda/MSCodeGen.cpp @@ -1076,27 +1076,25 @@ void MSCodeGen::generateCudaKernelCode() { if(std::any_of(stage.getIterationSpace().cbegin(), stage.getIterationSpace().cend(), [](const auto& p) -> bool { return p.has_value(); })) { - std::string iterators = "IJ"; - for(const auto& stage : iterateIIROver(*(stencilInstantiation_->getIIR()))) { - std::string prefix = "stage" + std::to_string(stage->getStageID()) + "Global"; - for(auto [index, interval] : enumerate(stage->getIterationSpace())) { - if(interval.has_value()) { - std::string arrName = prefix + iterators.at(index) + "Indices"; - std::string offset; - if (iterators[index] == 'I') { - offset = "blockIdx.x * " + std::to_string(ntx) + " + " + std::string{(char) std::tolower(iterators.at(index))} + "block"; - } - - if (iterators[index] == 'J') { - offset = "blockIdx.y * " + std::to_string(nty) + " + " + std::string{(char) std::tolower(iterators.at(index))} + "block"; - } - - guard += " && checkOffset(" + arrName + "_[0], " + arrName + - "_[1], globalOffsets_[" + std::to_string(index) + "] + " + - "(" + offset +"))"; + std::string iterators = "IJ"; + std::string prefix = "stage" + std::to_string(stage.getStageID()) + "Global"; + for(auto [index, interval] : enumerate(stage.getIterationSpace())) { + if(interval.has_value()) { + std::string arrName = prefix + iterators.at(index) + "Indices"; + std::string offset; + if (iterators[index] == 'I') { + offset = "blockIdx.x * " + std::to_string(ntx) + " + " + std::string{(char) std::tolower(iterators.at(index))} + "block"; } + + if (iterators[index] == 'J') { + offset = "blockIdx.y * " + std::to_string(nty) + " + " + std::string{(char) std::tolower(iterators.at(index))} + "block"; + } + + guard += " && checkOffset(" + arrName + "_[0], " + arrName + + "_[1], globalOffsets_[" + std::to_string(index) + "] + " + + "(" + offset +"))"; } - } + } } guard += ")"; diff --git a/gtclang/test/integration-test/CodeGen/CMakeLists.txt b/gtclang/test/integration-test/CodeGen/CMakeLists.txt index 05dd16a65..469a2bb9b 100644 --- a/gtclang/test/integration-test/CodeGen/CMakeLists.txt +++ b/gtclang/test/integration-test/CodeGen/CMakeLists.txt @@ -196,4 +196,5 @@ add_codegen_test(TEST kcache_fill_kparallel PLAIN_CUDA_ONLY) add_codegen_test(TEST kcache_fill_backward PLAIN_CUDA_ONLY) add_codegen_test(TEST kcache_flush FLAGS -fmultistage-merger PLAIN_CUDA_ONLY) add_codegen_test(TEST kcache_epflush FLAGS -fmultistage-merger PLAIN_CUDA_ONLY) -add_codegen_test(TEST iteration_space_stencil FLAGS -max-halo-size=0 PLAIN_CUDA_ONLY) \ No newline at end of file +add_codegen_test(TEST iteration_space_stencil_01 FLAGS -max-halo-size=0 PLAIN_CUDA_ONLY) +add_codegen_test(TEST iteration_space_stencil_02 FLAGS -max-halo-size=0 PLAIN_CUDA_ONLY) \ No newline at end of file diff --git a/gtclang/test/integration-test/CodeGen/iteration_space_stencil.cpp b/gtclang/test/integration-test/CodeGen/iteration_space_stencil_01.cpp similarity index 96% rename from gtclang/test/integration-test/CodeGen/iteration_space_stencil.cpp rename to gtclang/test/integration-test/CodeGen/iteration_space_stencil_01.cpp index 197ad0dc6..74163d65f 100644 --- a/gtclang/test/integration-test/CodeGen/iteration_space_stencil.cpp +++ b/gtclang/test/integration-test/CodeGen/iteration_space_stencil_01.cpp @@ -17,7 +17,7 @@ #include "gtclang_dsl_defs/gtclang_dsl.hpp" using namespace gtclang::dsl; -stencil iteration_space_stencil { +stencil iteration_space_stencil_01 { storage out; Do { diff --git a/gtclang/test/integration-test/CodeGen/iteration_space_stencil_benchmark.cpp b/gtclang/test/integration-test/CodeGen/iteration_space_stencil_01_benchmark.cpp similarity index 87% rename from gtclang/test/integration-test/CodeGen/iteration_space_stencil_benchmark.cpp rename to gtclang/test/integration-test/CodeGen/iteration_space_stencil_01_benchmark.cpp index 10392a8e9..9bc5d7f74 100644 --- a/gtclang/test/integration-test/CodeGen/iteration_space_stencil_benchmark.cpp +++ b/gtclang/test/integration-test/CodeGen/iteration_space_stencil_01_benchmark.cpp @@ -28,14 +28,14 @@ #include "test/integration-test/CodeGen/Macros.hpp" #include "driver-includes/verify.hpp" #include "test/integration-test/CodeGen/Options.hpp" -#include "test/integration-test/CodeGen/generated/iteration_space_stencil_c++-naive.cpp" +#include "test/integration-test/CodeGen/generated/iteration_space_stencil_01_c++-naive.cpp" #ifndef OPTBACKEND #define OPTBACKEND gt #endif // clang-format off -#include INCLUDE_FILE(test/integration-test/CodeGen/generated/iteration_space_stencil_,OPTBACKEND.cpp) +#include INCLUDE_FILE(test/integration-test/CodeGen/generated/iteration_space_stencil_01_,OPTBACKEND.cpp) // clang-format on using namespace dawn; @@ -46,13 +46,13 @@ TEST(iteration_space_stencil, test) { verifier verif(dom); - meta_data_t meta_data(dom.isize(), dom.jsize(), dom.ksize() + 1); + meta_data_t meta_data(dom.isize(), dom.jsize(), dom.ksize()); storage_t out_gt(meta_data, "out-gt"), out_naive(meta_data, "out-naive"); verif.fill(-1.0, out_gt, out_naive); - dawn_generated::OPTBACKEND::iteration_space_stencil iteration_space_gt(dom); - dawn_generated::cxxnaive::iteration_space_stencil iteration_space_naive(dom); + dawn_generated::OPTBACKEND::iteration_space_stencil_01 iteration_space_gt(dom); + dawn_generated::cxxnaive::iteration_space_stencil_01 iteration_space_naive(dom); iteration_space_gt.run(out_gt); iteration_space_naive.run(out_naive); diff --git a/gtclang/test/integration-test/CodeGen/iteration_space_stencil_01_benchmark.cu b/gtclang/test/integration-test/CodeGen/iteration_space_stencil_01_benchmark.cu new file mode 100644 index 000000000..b9bc8b31a --- /dev/null +++ b/gtclang/test/integration-test/CodeGen/iteration_space_stencil_01_benchmark.cu @@ -0,0 +1 @@ +#include "iteration_space_stencil_01_benchmark.cpp" \ No newline at end of file diff --git a/gtclang/test/integration-test/CodeGen/iteration_space_stencil_02.cpp b/gtclang/test/integration-test/CodeGen/iteration_space_stencil_02.cpp new file mode 100644 index 000000000..e2ad63bf4 --- /dev/null +++ b/gtclang/test/integration-test/CodeGen/iteration_space_stencil_02.cpp @@ -0,0 +1,36 @@ +//===--------------------------------------------------------------------------------*- C++ -*-===// +// _ _ +// | | | | +// __ _| |_ ___| | __ _ _ __ __ _ +// / _` | __/ __| |/ _` | '_ \ / _` | +// | (_| | || (__| | (_| | | | | (_| | +// \__, |\__\___|_|\__,_|_| |_|\__, | - GridTools Clang DSL +// __/ | __/ | +// |___/ |___/ +// +// +// This file is distributed under the MIT License (MIT). +// See LICENSE.txt for details. +// +//===------------------------------------------------------------------------------------------===// + +#include "gtclang_dsl_defs/gtclang_dsl.hpp" +using namespace gtclang::dsl; + +stencil iteration_space_stencil_02 { + storage out; + + Do { + vertical_region(k_start, k_end) { + out = 0; + } + + iteration_space(i_start + 1, i_end-1, j_start + 1, j_end - 1, k_start + 1, k_end - 1) { + out = 1; + } + + iteration_space(j_start + 1, j_end - 1, k_start + 1, k_end - 1) { + out = 2; + } + } +}; diff --git a/gtclang/test/integration-test/CodeGen/iteration_space_stencil_02_benchmark.cpp b/gtclang/test/integration-test/CodeGen/iteration_space_stencil_02_benchmark.cpp new file mode 100644 index 000000000..7530d3215 --- /dev/null +++ b/gtclang/test/integration-test/CodeGen/iteration_space_stencil_02_benchmark.cpp @@ -0,0 +1,61 @@ +//===--------------------------------------------------------------------------------*- C++ -*-===// +// _ _ +// | | | | +// __ _| |_ ___| | __ _ _ __ __ _ +// / _` | __/ __| |/ _` | '_ \ / _` | +// | (_| | || (__| | (_| | | | | (_| | +// \__, |\__\___|_|\__,_|_| |_|\__, | - GridTools Clang DSL +// __/ | __/ | +// |___/ |___/ +// +// +// This file is distributed under the MIT License (MIT). +// See LICENSE.txt for details. +// +//===------------------------------------------------------------------------------------------===// +#define DAWN_GENERATED 1 +#define GRIDTOOLS_DAWN_HALO_EXTENT 0 +#define GT_VECTOR_LIMIT_SIZE 30 + +#undef FUSION_MAX_VECTOR_SIZE +#undef FUSION_MAX_MAP_SIZE +#define FUSION_MAX_VECTOR_SIZE GT_VECTOR_LIMIT_SIZE +#define FUSION_MAX_MAP_SIZE FUSION_MAX_VECTOR_SIZE +#define BOOST_MPL_LIMIT_VECTOR_SIZE FUSION_MAX_VECTOR_SIZE +#define BOOST_MPL_CFG_NO_PREPROCESSED_HEADERS + +#include +#include "test/integration-test/CodeGen/Macros.hpp" +#include "driver-includes/verify.hpp" +#include "test/integration-test/CodeGen/Options.hpp" +#include "test/integration-test/CodeGen/generated/iteration_space_stencil_02_c++-naive.cpp" + +#ifndef OPTBACKEND +#define OPTBACKEND gt +#endif + +// clang-format off +#include INCLUDE_FILE(test/integration-test/CodeGen/generated/iteration_space_stencil_02_,OPTBACKEND.cpp) +// clang-format on + +using namespace dawn; +TEST(iteration_space_stencil, test) { + domain dom(Options::getInstance().m_size[0], Options::getInstance().m_size[1], + Options::getInstance().m_size[2]); + dom.set_halos(halo::value, halo::value, halo::value, halo::value, 0, 0); + + verifier verif(dom); + + meta_data_t meta_data(dom.isize(), dom.jsize(), dom.ksize()); + storage_t out_gt(meta_data, "out-gt"), out_naive(meta_data, "out-naive"); + + verif.fill(-1.0, out_gt, out_naive); + + dawn_generated::OPTBACKEND::iteration_space_stencil_02 iteration_space_gt(dom); + dawn_generated::cxxnaive::iteration_space_stencil_02 iteration_space_naive(dom); + + iteration_space_gt.run(out_gt); + iteration_space_naive.run(out_naive); + + ASSERT_TRUE(verif.verify(out_gt, out_naive)); +} diff --git a/gtclang/test/integration-test/CodeGen/iteration_space_stencil_02_benchmark.cu b/gtclang/test/integration-test/CodeGen/iteration_space_stencil_02_benchmark.cu new file mode 100644 index 000000000..c7bea8dc5 --- /dev/null +++ b/gtclang/test/integration-test/CodeGen/iteration_space_stencil_02_benchmark.cu @@ -0,0 +1 @@ +#include "iteration_space_stencil_02_benchmark.cpp" \ No newline at end of file diff --git a/gtclang/test/integration-test/CodeGen/iteration_space_stencil_benchmark.cu b/gtclang/test/integration-test/CodeGen/iteration_space_stencil_benchmark.cu deleted file mode 100644 index 5930037eb..000000000 --- a/gtclang/test/integration-test/CodeGen/iteration_space_stencil_benchmark.cu +++ /dev/null @@ -1 +0,0 @@ -#include "iteration_space_stencil_benchmark.cpp" \ No newline at end of file From 5921cef6786b38cb5390da087706d7d5321e955b Mon Sep 17 00:00:00 2001 From: mroethlin Date: Mon, 20 Dec 2021 11:16:43 +0100 Subject: [PATCH 10/24] add curiously failing gtclang test --- .../integration-test/CodeGen/CMakeLists.txt | 3 +- .../integration-test/CodeGen/var_stencil.cpp | 33 ++++++++++ .../CodeGen/var_stencil_benchmark.cpp | 62 +++++++++++++++++++ .../CodeGen/var_stencil_benchmark.cu | 1 + 4 files changed, 98 insertions(+), 1 deletion(-) create mode 100644 gtclang/test/integration-test/CodeGen/var_stencil.cpp create mode 100644 gtclang/test/integration-test/CodeGen/var_stencil_benchmark.cpp create mode 100644 gtclang/test/integration-test/CodeGen/var_stencil_benchmark.cu diff --git a/gtclang/test/integration-test/CodeGen/CMakeLists.txt b/gtclang/test/integration-test/CodeGen/CMakeLists.txt index 469a2bb9b..40b69610e 100644 --- a/gtclang/test/integration-test/CodeGen/CMakeLists.txt +++ b/gtclang/test/integration-test/CodeGen/CMakeLists.txt @@ -197,4 +197,5 @@ add_codegen_test(TEST kcache_fill_backward PLAIN_CUDA_ONLY) add_codegen_test(TEST kcache_flush FLAGS -fmultistage-merger PLAIN_CUDA_ONLY) add_codegen_test(TEST kcache_epflush FLAGS -fmultistage-merger PLAIN_CUDA_ONLY) add_codegen_test(TEST iteration_space_stencil_01 FLAGS -max-halo-size=0 PLAIN_CUDA_ONLY) -add_codegen_test(TEST iteration_space_stencil_02 FLAGS -max-halo-size=0 PLAIN_CUDA_ONLY) \ No newline at end of file +add_codegen_test(TEST iteration_space_stencil_02 FLAGS -max-halo-size=0 PLAIN_CUDA_ONLY) +add_codegen_test(TEST var_stencil FLAGS -max-halo-size=0 PLAIN_CUDA_ONLY) \ No newline at end of file diff --git a/gtclang/test/integration-test/CodeGen/var_stencil.cpp b/gtclang/test/integration-test/CodeGen/var_stencil.cpp new file mode 100644 index 000000000..a189b2760 --- /dev/null +++ b/gtclang/test/integration-test/CodeGen/var_stencil.cpp @@ -0,0 +1,33 @@ +//===--------------------------------------------------------------------------------*- C++ -*-===// +// _ _ +// | | | | +// __ _| |_ ___| | __ _ _ __ __ _ +// / _` | __/ __| |/ _` | '_ \ / _` | +// | (_| | || (__| | (_| | | | | (_| | +// \__, |\__\___|_|\__,_|_| |_|\__, | - GridTools Clang DSL +// __/ | __/ | +// |___/ |___/ +// +// +// This file is distributed under the MIT License (MIT). +// See LICENSE.txt for details. +// +//===------------------------------------------------------------------------------------------===// + +#include "gtclang_dsl_defs/gtclang_dsl.hpp" +using namespace gtclang::dsl; + +stencil var_stencil { + storage out; + var tmp; + + Do { + vertical_region(k_start, k_end) { + tmp = 1; + } + + iteration_space(k_start, k_end) { + out = 1 + tmp; + } + } +}; diff --git a/gtclang/test/integration-test/CodeGen/var_stencil_benchmark.cpp b/gtclang/test/integration-test/CodeGen/var_stencil_benchmark.cpp new file mode 100644 index 000000000..8f0005aae --- /dev/null +++ b/gtclang/test/integration-test/CodeGen/var_stencil_benchmark.cpp @@ -0,0 +1,62 @@ +//===--------------------------------------------------------------------------------*- C++ -*-===// +// _ _ +// | | | | +// __ _| |_ ___| | __ _ _ __ __ _ +// / _` | __/ __| |/ _` | '_ \ / _` | +// | (_| | || (__| | (_| | | | | (_| | +// \__, |\__\___|_|\__,_|_| |_|\__, | - GridTools Clang DSL +// __/ | __/ | +// |___/ |___/ +// +// +// This file is distributed under the MIT License (MIT). +// See LICENSE.txt for details. +// +//===------------------------------------------------------------------------------------------===// +#define DAWN_GENERATED 1 +#define GRIDTOOLS_DAWN_HALO_EXTENT 3 +#define GT_VECTOR_LIMIT_SIZE 30 + +#undef FUSION_MAX_VECTOR_SIZE +#undef FUSION_MAX_MAP_SIZE +#define FUSION_MAX_VECTOR_SIZE GT_VECTOR_LIMIT_SIZE +#define FUSION_MAX_MAP_SIZE FUSION_MAX_VECTOR_SIZE +#define BOOST_MPL_LIMIT_VECTOR_SIZE FUSION_MAX_VECTOR_SIZE +#define BOOST_MPL_CFG_NO_PREPROCESSED_HEADERS + +#include +#include "test/integration-test/CodeGen/Macros.hpp" +#include "driver-includes/verify.hpp" +#include "test/integration-test/CodeGen/Options.hpp" +#include "test/integration-test/CodeGen/generated/var_stencil_c++-naive.cpp" + +#ifndef OPTBACKEND +#define OPTBACKEND gt +#endif + +// clang-format off +#include INCLUDE_FILE(test/integration-test/CodeGen/generated/var_stencil_,OPTBACKEND.cpp) +// clang-format on + +using namespace dawn; +TEST(var_stencil, test) { + domain dom(Options::getInstance().m_size[0], Options::getInstance().m_size[1], + Options::getInstance().m_size[2]); + dom.set_halos(halo::value, halo::value, halo::value, halo::value, 0, 0); + + verifier verif(dom); + + meta_data_t meta_data(dom.isize(), dom.jsize(), dom.ksize() + 1); + storage_t in(meta_data, "in"), out_gt(meta_data, "out-gt"), out_naive(meta_data, "out-naive"); + + verif.fillMath(8.0, 2.0, 1.5, 1.5, 2.0, 4.0, in); + verif.fill(-1.0, out_gt, out_naive); + + dawn_generated::OPTBACKEND::var_stencil copy_gt(dom); + dawn_generated::cxxnaive::var_stencil copy_naive(dom); + + copy_gt.run(out_gt); + copy_naive.run(out_naive); + + ASSERT_TRUE(verif.verify(out_gt, out_naive)); +} diff --git a/gtclang/test/integration-test/CodeGen/var_stencil_benchmark.cu b/gtclang/test/integration-test/CodeGen/var_stencil_benchmark.cu new file mode 100644 index 000000000..583dbbc7d --- /dev/null +++ b/gtclang/test/integration-test/CodeGen/var_stencil_benchmark.cu @@ -0,0 +1 @@ +#include "var_stencil_benchmark.cpp" \ No newline at end of file From 1fbf65839ec921032c905b6a3292ba891ecc7860 Mon Sep 17 00:00:00 2001 From: mroethlin Date: Mon, 3 Jan 2022 14:45:07 +0100 Subject: [PATCH 11/24] fixed bug that for MS involving temp fields only --- .../dawn/CodeGen/Cuda/CodeGeneratorHelper.cpp | 45 ++++++++++++++++++- .../data/generate_versioned_field_ref.cpp | 43 ------------------ .../integration-test/CodeGen/CMakeLists.txt | 2 +- .../integration-test/CodeGen/var_stencil.cpp | 4 +- .../CodeGen/var_stencil_benchmark.cpp | 13 +++--- 5 files changed, 54 insertions(+), 53 deletions(-) diff --git a/dawn/src/dawn/CodeGen/Cuda/CodeGeneratorHelper.cpp b/dawn/src/dawn/CodeGen/Cuda/CodeGeneratorHelper.cpp index 78bf2cbec..fa3442a68 100644 --- a/dawn/src/dawn/CodeGen/Cuda/CodeGeneratorHelper.cpp +++ b/dawn/src/dawn/CodeGen/Cuda/CodeGeneratorHelper.cpp @@ -99,7 +99,50 @@ std::vector CodeGeneratorHelper::generateStrideArguments( } } } - if(!tempFields.empty()) { + if(!tempFields.empty() && nonTempFields.empty()) { + const auto& firstTmpField = *(tempFields.begin()); + std::string fieldName = metadata.getFieldNameFromAccessID(firstTmpField.second.getAccessID()); + if(funArg == CodeGeneratorHelper::FunctionArgType::FT_Caller) { + strides.push_back("m_" + fieldName + ".strides()[3]," + "m_" + fieldName + ".strides()[4]," + "m_" + fieldName + ".get_storage_info_ptr()->template begin<0>()," + "m_" + + fieldName + ".get_storage_info_ptr()->template begin<1>()," + "m_" + + fieldName + ".get_storage_info_ptr()->template stride<1>()," + "m_" + + fieldName + ".get_storage_info_ptr()->template stride<4>()"); + } + + Array3i dims{-1, -1, -1}; + for(const auto& fieldInfo : ms->getParent()->getFields()) { + if(fieldInfo.second.field.getAccessID() == firstTmpField.second.getAccessID()) { + DAWN_ASSERT_MSG( + dawn::ast::dimension_isa( + fieldInfo.second.field.getFieldDimensions().getHorizontalFieldDimension()), + "Field has non cartesian horizontal dimension"); + auto const& dimCartesian = + dawn::ast::dimension_cast( + fieldInfo.second.field.getFieldDimensions().getHorizontalFieldDimension()); + dims[0] = dimCartesian.I() == 1; + dims[1] = dimCartesian.J() == 1; + dims[2] = fieldInfo.second.field.getFieldDimensions().K() == 1; + break; + } + } + + int usedDim = 0; + for(int i = 0; i < dims.size(); ++i) { + if(!dims[i]) + continue; + if(!(usedDim++)) + continue; + if(funArg == CodeGeneratorHelper::FunctionArgType::FT_Callee) { + strides.push_back("const int stride_" + CodeGeneratorHelper::indexIteratorName(dims) + "_" + + std::to_string(i)); + } + } + if(funArg == CodeGeneratorHelper::FunctionArgType::FT_Callee) { + strides.push_back("const int tmpBeginIIndex, const int tmpBeginJIndex, const int " + "jstride_tmp, const int kstride_tmp"); + } + } + else if(!tempFields.empty()) { const auto& firstTmpField = *(tempFields.begin()); std::string fieldName = metadata.getFieldNameFromAccessID(firstTmpField.second.getAccessID()); if(funArg == CodeGeneratorHelper::FunctionArgType::FT_Caller) { diff --git a/dawn/test/integration-test/dawn4py-tests/data/generate_versioned_field_ref.cpp b/dawn/test/integration-test/dawn4py-tests/data/generate_versioned_field_ref.cpp index 8040080f0..5dfb6e25b 100644 --- a/dawn/test/integration-test/dawn4py-tests/data/generate_versioned_field_ref.cpp +++ b/dawn/test/integration-test/dawn4py-tests/data/generate_versioned_field_ref.cpp @@ -19,13 +19,9 @@ class generate_versioned_field { ::dawn::edge_field_t& m_c; ::dawn::edge_field_t& m_d; ::dawn::edge_field_t& m_e; -<<<<<<< HEAD ::dawn::edge_field_t& m_c_0; ::dawn::unstructured_domain m_unstructured_domain; -======= - ::dawn::unstructured_domain m_unstructured_domain ; ->>>>>>> origin/fix_fieldversion2 public: stencil_37(::dawn::mesh_t const& mesh, int k_size, ::dawn::edge_field_t& a, @@ -36,7 +32,6 @@ class generate_versioned_field { ::dawn::edge_field_t& c_0) : m_mesh(mesh), m_k_size(k_size), m_a(a), m_b(b), m_c(c), m_d(d), m_e(e), m_c_0(c_0) {} -<<<<<<< HEAD ~stencil_37() {} void sync_storages() {} @@ -74,40 +69,6 @@ class generate_versioned_field { } } sync_storages(); -======= - stencil_37(::dawn::mesh_t const &mesh, int k_size, ::dawn::edge_field_t&a, ::dawn::edge_field_t&b, ::dawn::edge_field_t&c, ::dawn::edge_field_t&d, ::dawn::edge_field_t&e) : m_mesh(mesh), m_k_size(k_size), m_a(a), m_b(b), m_c(c), m_d(d), m_e(e){} - - ~stencil_37() { - } - - void sync_storages() { - } - static constexpr ::dawn::driver::unstructured_extent a_extent = {false, 0,0}; - static constexpr ::dawn::driver::unstructured_extent b_extent = {false, 0,0}; - static constexpr ::dawn::driver::unstructured_extent c_extent = {false, 0,0}; - static constexpr ::dawn::driver::unstructured_extent d_extent = {false, 0,0}; - static constexpr ::dawn::driver::unstructured_extent e_extent = {false, 0,0}; - - void run() { - using ::dawn::deref; -{ - for(int k = 0+0; k <= ( m_k_size == 0 ? 0 : (m_k_size - 1)) + 0+0; ++k) { - for(auto const& loc : getEdges(LibTag{}, m_mesh)) { -m_a(deref(LibTag{}, loc), (k + 0)) = ((m_b(deref(LibTag{}, loc), (k + 0)) / m_c(deref(LibTag{}, loc), (k + 0))) + (::dawn::float_type) 5); - } for(auto const& loc : getEdges(LibTag{}, m_mesh)) { -if(m_d(deref(LibTag{}, loc), (k + 0))) -{ - m_a(deref(LibTag{}, loc), (k + 0)) = m_b(deref(LibTag{}, loc), (k + 0)); -} -else -{ - if(m_e(deref(LibTag{}, loc), (k + 0))) - { - m_c(deref(LibTag{}, loc), (k + 0)) = (m_a(deref(LibTag{}, loc), (k + 0)) + (::dawn::float_type) 1); - } -} - } }} sync_storages(); ->>>>>>> origin/fix_fieldversion2 } }; static constexpr const char* s_name = "generate_versioned_field"; @@ -123,7 +84,6 @@ else m_stencil_37.m_unstructured_domain.set_splitter_index({loc, subdomain, offset}, index); } -<<<<<<< HEAD generate_versioned_field(const ::dawn::mesh_t& mesh, int k_size, ::dawn::edge_field_t& a, ::dawn::edge_field_t& b, @@ -132,9 +92,6 @@ else ::dawn::edge_field_t& e) : m_stencil_37(mesh, k_size, a, b, c, d, e, m_c_0), m_c_0(allocateField(LibTag{}, numEdges(LibTag{}, mesh), k_size)) {} -======= - generate_versioned_field(const ::dawn::mesh_t &mesh, int k_size, ::dawn::edge_field_t& a, ::dawn::edge_field_t& b, ::dawn::edge_field_t& c, ::dawn::edge_field_t& d, ::dawn::edge_field_t& e) : m_stencil_37(mesh, k_size,a,b,c,d,e){} ->>>>>>> origin/fix_fieldversion2 void run() { m_stencil_37.run(); diff --git a/gtclang/test/integration-test/CodeGen/CMakeLists.txt b/gtclang/test/integration-test/CodeGen/CMakeLists.txt index 40b69610e..97f8531dd 100644 --- a/gtclang/test/integration-test/CodeGen/CMakeLists.txt +++ b/gtclang/test/integration-test/CodeGen/CMakeLists.txt @@ -198,4 +198,4 @@ add_codegen_test(TEST kcache_flush FLAGS -fmultistage-merger PLAIN_CUDA_ONLY) add_codegen_test(TEST kcache_epflush FLAGS -fmultistage-merger PLAIN_CUDA_ONLY) add_codegen_test(TEST iteration_space_stencil_01 FLAGS -max-halo-size=0 PLAIN_CUDA_ONLY) add_codegen_test(TEST iteration_space_stencil_02 FLAGS -max-halo-size=0 PLAIN_CUDA_ONLY) -add_codegen_test(TEST var_stencil FLAGS -max-halo-size=0 PLAIN_CUDA_ONLY) \ No newline at end of file +add_codegen_test(TEST var_stencil PLAIN_CUDA_ONLY) \ No newline at end of file diff --git a/gtclang/test/integration-test/CodeGen/var_stencil.cpp b/gtclang/test/integration-test/CodeGen/var_stencil.cpp index a189b2760..5371592ca 100644 --- a/gtclang/test/integration-test/CodeGen/var_stencil.cpp +++ b/gtclang/test/integration-test/CodeGen/var_stencil.cpp @@ -18,7 +18,7 @@ using namespace gtclang::dsl; stencil var_stencil { - storage out; + storage out, in; var tmp; Do { @@ -27,7 +27,7 @@ stencil var_stencil { } iteration_space(k_start, k_end) { - out = 1 + tmp; + out = in + tmp; } } }; diff --git a/gtclang/test/integration-test/CodeGen/var_stencil_benchmark.cpp b/gtclang/test/integration-test/CodeGen/var_stencil_benchmark.cpp index 8f0005aae..60d61a52b 100644 --- a/gtclang/test/integration-test/CodeGen/var_stencil_benchmark.cpp +++ b/gtclang/test/integration-test/CodeGen/var_stencil_benchmark.cpp @@ -41,12 +41,13 @@ using namespace dawn; TEST(var_stencil, test) { domain dom(Options::getInstance().m_size[0], Options::getInstance().m_size[1], - Options::getInstance().m_size[2]); - dom.set_halos(halo::value, halo::value, halo::value, halo::value, 0, 0); + Options::getInstance().m_size[2]); + + dom.set_halos(halo::value, halo::value, halo::value, halo::value, 0, 0); verifier verif(dom); - meta_data_t meta_data(dom.isize(), dom.jsize(), dom.ksize() + 1); + meta_data_t meta_data(dom.isize(), dom.jsize(), dom.ksize()); storage_t in(meta_data, "in"), out_gt(meta_data, "out-gt"), out_naive(meta_data, "out-naive"); verif.fillMath(8.0, 2.0, 1.5, 1.5, 2.0, 4.0, in); @@ -55,8 +56,8 @@ TEST(var_stencil, test) { dawn_generated::OPTBACKEND::var_stencil copy_gt(dom); dawn_generated::cxxnaive::var_stencil copy_naive(dom); - copy_gt.run(out_gt); - copy_naive.run(out_naive); - + copy_gt.run(out_gt, in); + copy_naive.run(out_naive, in); + ASSERT_TRUE(verif.verify(out_gt, out_naive)); } From e01f4f106cc3dbb0d9c009a126aa93d3d07dd834 Mon Sep 17 00:00:00 2001 From: mroethlin Date: Mon, 3 Jan 2022 15:01:28 +0100 Subject: [PATCH 12/24] update ref (accidental revert?) --- .../data/generate_versioned_field_ref.cpp | 21 ++++++------------- 1 file changed, 6 insertions(+), 15 deletions(-) diff --git a/dawn/test/integration-test/dawn4py-tests/data/generate_versioned_field_ref.cpp b/dawn/test/integration-test/dawn4py-tests/data/generate_versioned_field_ref.cpp index 5dfb6e25b..68e85d821 100644 --- a/dawn/test/integration-test/dawn4py-tests/data/generate_versioned_field_ref.cpp +++ b/dawn/test/integration-test/dawn4py-tests/data/generate_versioned_field_ref.cpp @@ -19,7 +19,6 @@ class generate_versioned_field { ::dawn::edge_field_t& m_c; ::dawn::edge_field_t& m_d; ::dawn::edge_field_t& m_e; - ::dawn::edge_field_t& m_c_0; ::dawn::unstructured_domain m_unstructured_domain; public: @@ -28,9 +27,8 @@ class generate_versioned_field { ::dawn::edge_field_t& b, ::dawn::edge_field_t& c, ::dawn::edge_field_t& d, - ::dawn::edge_field_t& e, - ::dawn::edge_field_t& c_0) - : m_mesh(mesh), m_k_size(k_size), m_a(a), m_b(b), m_c(c), m_d(d), m_e(e), m_c_0(c_0) {} + ::dawn::edge_field_t& e) + : m_mesh(mesh), m_k_size(k_size), m_a(a), m_b(b), m_c(c), m_d(d), m_e(e) {} ~stencil_37() {} @@ -40,23 +38,17 @@ class generate_versioned_field { static constexpr ::dawn::driver::unstructured_extent c_extent = {false, 0, 0}; static constexpr ::dawn::driver::unstructured_extent d_extent = {false, 0, 0}; static constexpr ::dawn::driver::unstructured_extent e_extent = {false, 0, 0}; - static constexpr ::dawn::driver::unstructured_extent c_0_extent = {false, 0, 0}; void run() { using ::dawn::deref; - { - for(int k = 0 + 0; k <= (m_k_size == 0 ? 0 : (m_k_size)) + 0 - 1 + 0; ++k) { - for(auto const& loc : getEdges(LibTag{}, m_mesh)) { - m_c_0(deref(LibTag{}, loc), (k + 0)) = m_c(deref(LibTag{}, loc), (k + 0)); - } - } - } { for(int k = 0 + 0; k <= (m_k_size == 0 ? 0 : (m_k_size)) + 0 - 1 + 0; ++k) { for(auto const& loc : getEdges(LibTag{}, m_mesh)) { m_a(deref(LibTag{}, loc), (k + 0)) = - ((m_b(deref(LibTag{}, loc), (k + 0)) / m_c_0(deref(LibTag{}, loc), (k + 0))) + + ((m_b(deref(LibTag{}, loc), (k + 0)) / m_c(deref(LibTag{}, loc), (k + 0))) + (::dawn::float_type)5); + } + for(auto const& loc : getEdges(LibTag{}, m_mesh)) { if(m_d(deref(LibTag{}, loc), (k + 0))) { m_a(deref(LibTag{}, loc), (k + 0)) = m_b(deref(LibTag{}, loc), (k + 0)); } else { @@ -90,8 +82,7 @@ class generate_versioned_field { ::dawn::edge_field_t& c, ::dawn::edge_field_t& d, ::dawn::edge_field_t& e) - : m_stencil_37(mesh, k_size, a, b, c, d, e, m_c_0), - m_c_0(allocateField(LibTag{}, numEdges(LibTag{}, mesh), k_size)) {} + : m_stencil_37(mesh, k_size, a, b, c, d, e) {} void run() { m_stencil_37.run(); From c234f1cc5acd171654c597ef2fca0bd15f0dcb90 Mon Sep 17 00:00:00 2001 From: Matthias Roethlin Date: Fri, 18 Mar 2022 11:01:35 +0100 Subject: [PATCH 13/24] rough implementation of a c interface compatible structured cuda codegen. breaks naive cpp bindgen --- dawn/src/dawn/CodeGen/CodeGen.cpp | 10 +- .../dawn/CodeGen/Cuda-ico/CudaIcoCodeGen.cpp | 8 +- dawn/src/dawn/CodeGen/Cuda/CudaCodeGen.cpp | 486 ++++++++++++++++-- dawn/src/dawn/CodeGen/Cuda/CudaCodeGen.h | 34 +- 4 files changed, 487 insertions(+), 51 deletions(-) diff --git a/dawn/src/dawn/CodeGen/CodeGen.cpp b/dawn/src/dawn/CodeGen/CodeGen.cpp index 8c2d20d56..87e0a0c83 100644 --- a/dawn/src/dawn/CodeGen/CodeGen.cpp +++ b/dawn/src/dawn/CodeGen/CodeGen.cpp @@ -338,10 +338,10 @@ void CodeGen::addTmpStorageDeclaration( Structure& stencilClass, IndexRange>& tempFields) const { if(!(tempFields.empty())) { - stencilClass.addMember(tmpMetadataTypename_, tmpMetadataName_); + stencilClass.addMember("static " + tmpMetadataTypename_, tmpMetadataName_); for(const auto& field : tempFields) { - stencilClass.addMember(tmpStorageTypename_, "m_" + field.second.Name); + stencilClass.addMember("static " + tmpStorageTypename_, "m_" + field.second.Name); } } } @@ -448,16 +448,16 @@ void CodeGen::generateGlobalIndices(const iir::Stencil& stencil, Structure& sten bool genCheckOffset) const { for(auto& stage : iterateIIROver(stencil)) { if(stage->getIterationSpace()[0].has_value()) { - stencilClass.addMember("std::array", + stencilClass.addMember("static std::array", "stage" + std::to_string(stage->getStageID()) + "GlobalIIndices"); } if(stage->getIterationSpace()[1].has_value()) { - stencilClass.addMember("std::array", + stencilClass.addMember("static std::array", "stage" + std::to_string(stage->getStageID()) + "GlobalJIndices"); } } - stencilClass.addMember("std::array", "globalOffsets"); + stencilClass.addMember("static std::array", "globalOffsets"); auto globalOffsetFunc = stencilClass.addMemberFunction("static std::array", "computeGlobalOffsets"); globalOffsetFunc.addArg("int rank, const " + c_dgt + "domain& dom, int xcols, int ycols"); diff --git a/dawn/src/dawn/CodeGen/Cuda-ico/CudaIcoCodeGen.cpp b/dawn/src/dawn/CodeGen/Cuda-ico/CudaIcoCodeGen.cpp index 33e757369..b3ee3ab61 100644 --- a/dawn/src/dawn/CodeGen/Cuda-ico/CudaIcoCodeGen.cpp +++ b/dawn/src/dawn/CodeGen/Cuda-ico/CudaIcoCodeGen.cpp @@ -1704,7 +1704,8 @@ generateF90InterfaceSI(FortranInterfaceModuleGen& fimGen, runWrapper.addACCLine("host_data use_device( &"); auto fieldArgs = getFieldArgs(/*includeSavedState*/ true); for(int i = 0; i < fieldArgs.size(); ++i) { - runWrapper.addACCLine(fortranIndent + fieldArgs[i] + (i == (fieldArgs.size() - 1) ? " &" : ", &")); + runWrapper.addACCLine(fortranIndent + fieldArgs[i] + + (i == (fieldArgs.size() - 1) ? " &" : ", &")); } runWrapper.addACCLine(")"); runWrapper.addBodyLine("#ifdef __DSL_VERIFY", /*withIndentation*/ false); @@ -1722,7 +1723,7 @@ generateF90InterfaceSI(FortranInterfaceModuleGen& fimGen, for(auto fieldID : getUsedFields(stencil, {dawn::iir::Field::IntendKind::Output, dawn::iir::Field::IntendKind::InputOutput})) { verticalBoundNames.push_back(stencilInstantiation->getMetaData().getNameFromAccessID(fieldID) + - "_kvert_max"); + "_kvert_max"); } // memory management functions for production interface @@ -1787,7 +1788,8 @@ generateF90InterfaceSI(FortranInterfaceModuleGen& fimGen, setupWrapper.addBodyLine(fortranIndent + verticalBoundNames[i] + ", &"); } - setupWrapper.addBodyLine(fortranIndent + verticalBoundNames[verticalBoundNames.size() - 1] + " &"); + setupWrapper.addBodyLine(fortranIndent + verticalBoundNames[verticalBoundNames.size() - 1] + + " &"); setupWrapper.addBodyLine(")"); diff --git a/dawn/src/dawn/CodeGen/Cuda/CudaCodeGen.cpp b/dawn/src/dawn/CodeGen/Cuda/CudaCodeGen.cpp index 46b146371..d57c4d9a2 100644 --- a/dawn/src/dawn/CodeGen/Cuda/CudaCodeGen.cpp +++ b/dawn/src/dawn/CodeGen/Cuda/CudaCodeGen.cpp @@ -19,11 +19,13 @@ #include "dawn/CodeGen/Cuda/CacheProperties.h" #include "dawn/CodeGen/Cuda/CodeGeneratorHelper.h" #include "dawn/CodeGen/Cuda/MSCodeGen.h" +#include "dawn/CodeGen/F90Util.h" #include "dawn/IIR/IIRNodeIterator.h" #include "dawn/IIR/StencilInstantiation.h" #include "dawn/SIR/SIR.h" #include "dawn/Support/Array.h" #include "dawn/Support/Assert.h" +#include "dawn/Support/FileSystem.h" #include "dawn/Support/Iterator.h" #include "dawn/Support/Logger.h" #include "dawn/Support/StringUtil.h" @@ -57,14 +59,19 @@ run(const std::map>& const Options& options) { const Array3i domain_size{options.DomainSizeI, options.DomainSizeJ, options.DomainSizeK}; CudaCodeGen CG(stencilInstantiationMap, options.MaxHaloSize, options.nsms, options.MaxBlocksPerSM, - domain_size, options.RunWithSync); + domain_size, options.OutputCHeader, options.OutputFortranInterface, + options.RunWithSync); return CG.generateCode(); } CudaCodeGen::CudaCodeGen(const StencilInstantiationContext& ctx, int maxHaloPoints, int nsms, - int maxBlocksPerSM, const Array3i& domainSize, bool runWithSync) - : CodeGen(ctx, maxHaloPoints), codeGenOptions_{nsms, maxBlocksPerSM, domainSize, runWithSync} {} + int maxBlocksPerSM, const Array3i& domainSize, + std::optional outputCHeader, + std::optional outputFortranInterface, bool runWithSync) + : CodeGen(ctx, maxHaloPoints), codeGenOptions_{nsms, maxBlocksPerSM, + domainSize, runWithSync, + outputCHeader, outputFortranInterface} {} CudaCodeGen::~CudaCodeGen() {} @@ -80,6 +87,141 @@ void CudaCodeGen::generateAllCudaKernels( } } +void CudaCodeGen::generateAPIRunFunctions( + std::stringstream& ssSW, const std::shared_ptr& stencilInstantiation, + CodeGenProperties& codeGenProperties, bool onlyDecl) const { + const auto& stencils = stencilInstantiation->getStencils(); + + // generate the code for each of the stencils + for(const auto& stencilPtr : stencils) { + const auto& stencil = *stencilPtr; + + std::string stencilName = "stencil_" + std::to_string(stencil.getStencilID()); + + std::string fullyQualitfiedName = + "dawn_generated::cuda::" + stencilInstantiation->getName() + "::" + stencilName; + + MemberFunction runFun("void", "run_" + stencilInstantiation->getName(), ssSW, 0, onlyDecl); + + const auto stencilFields = stencil.getOrderedFields(); + + auto nonTempFields = + makeRange(stencilFields, [](std::pair const& p) { + return !p.second.IsTemporary; + }); + + for(auto field : nonTempFields) { + runFun.addArg("double *" + field.second.Name + "_ptr"); + } + runFun.finishArgs(); + + if(!onlyDecl) { + runFun.addStatement(fullyQualitfiedName + "::meta_data_t meta_data(" + fullyQualitfiedName + + "::dom.isize(), " + fullyQualitfiedName + "::dom.jsize(), " + + fullyQualitfiedName + "::dom.ksize())"); + for(auto field : nonTempFields) { + runFun.addStatement(fullyQualitfiedName + "::storage_t " + field.second.Name + + "(meta_data, " + field.second.Name + "_ptr)"); + } + + { + std::string fields; + std::string sep = ""; + for(auto field : nonTempFields) { + fields += sep + field.second.Name; + sep = ", "; + } + runFun.addStatement(fullyQualitfiedName + "::run(" + fields + ")"); + } + } + runFun.commit(); + } +} + +void CudaCodeGen::generateSetupFunctions( + std::stringstream& ssSW, const std::shared_ptr& stencilInstantiation, + CodeGenProperties& codeGenProperties, bool onlyDecl) const { + const auto& stencils = stencilInstantiation->getStencils(); + + // generate the code for each of the stencils + for(const auto& stencilPtr : stencils) { + const auto& stencil = *stencilPtr; + + std::string stencilName = "stencil_" + std::to_string(stencil.getStencilID()); + + std::string fullyQualitfiedName = + "dawn_generated::cuda::" + stencilInstantiation->getName() + "::" + stencilName; + MemberFunction setupFun("void", "setup_" + stencilInstantiation->getName(), ssSW, 0, onlyDecl); + setupFun.addArg("int i"); + setupFun.addArg("int j"); + setupFun.addArg("int k"); + setupFun.finishArgs(); + if(!onlyDecl) { + setupFun.addStatement(fullyQualitfiedName + + "::setup(gridtools::dawn::domain(i, j, k), 1, 1, 1)"); + } + setupFun.commit(); + } +} + +void CudaCodeGen::generateStaticMembersTrailer( + std::stringstream& ssSW, const std::shared_ptr& stencilInstantiation, + CodeGenProperties& codeGenProperties) const { + + const auto& stencils = stencilInstantiation->getStencils(); + + // generate the code for each of the stencils + for(const auto& stencilPtr : stencils) { + const auto& stencil = *stencilPtr; + + std::string stencilName = "stencil_" + std::to_string(stencil.getStencilID()); + + std::string fullyQualitfiedName = + "dawn_generated::cuda::" + stencilInstantiation->getName() + "::" + stencilName; + + ssSW << "gridtools::dawn::domain " + fullyQualitfiedName + + "::m_dom = gridtools::dawn::domain(-1, -1, -1);"; + ssSW << fullyQualitfiedName + "::tmp_meta_data_t " + fullyQualitfiedName + + "::m_tmp_meta_data(-1, -1, -1, -1,-1);"; + + if(stencil.isEmpty()) + continue; + + // fields used in the stencil + const auto stencilFields = stencil.getOrderedFields(); + + auto tempFields = + makeRange(stencilFields, [](std::pair const& p) { + return p.second.IsTemporary; + }); + + if(!(tempFields.empty())) { + for(const auto& fieldPair : tempFields) { + ssSW << fullyQualitfiedName + << "::tmp_storage_t " + fullyQualitfiedName + "::" + fieldPair.second.Name + ";"; + } + } + + std::string iterators = "ij"; + for(auto& stage : iterateIIROver(stencil)) { + int index = 0; + for(const auto& interval : stage->getIterationSpace()) { + if(interval.has_value()) { + std::string iterator = iterators.substr(index, 1); + std::string arrName = "stage" + std::to_string(stage->getStageID()) + "Global" + + (char)std::toupper(iterator.at(0)) + "Indices"; + ssSW << "std::array " << fullyQualitfiedName + "::" + arrName + ";"; + index += 1; + } + } + } + + if(iterationSpaceSet_) { + ssSW << "std::array " << fullyQualitfiedName + "::globalOffsets;"; + } + } +} + std::string CudaCodeGen::generateStencilInstantiation( const std::shared_ptr& stencilInstantiation) { using namespace codegen; @@ -138,6 +280,12 @@ std::string CudaCodeGen::generateStencilInstantiation( cudaNamespace.commit(); dawnNamespace.commit(); + ssSW << "extern \"C\" {\n"; + generateAPIRunFunctions(ssSW, stencilInstantiation, codeGenProperties); + generateSetupFunctions(ssSW, stencilInstantiation, codeGenProperties); + ssSW << "}\n"; + generateStaticMembersTrailer(ssSW, stencilInstantiation, codeGenProperties); + return ssSW.str(); } @@ -227,6 +375,9 @@ void CudaCodeGen::generateStencilClasses( generateStencilClassCtr(stencilClass, stencil, globalsMap, nonTempFields, tempFields, stencilProperties); + generateStencilSetupMethod(stencilClass, stencil, globalsMap, nonTempFields, tempFields, + stencilProperties); + // accumulated extents of API fields generateFieldExtentsInfo(stencilClass, nonTempFields, ast::GridType::Cartesian); @@ -256,7 +407,7 @@ void CudaCodeGen::generateStencilClassMembers( stencilClass.addMember("globals&", "m_globals"); } - stencilClass.addMember("const " + c_dgt + "domain", "m_dom"); + stencilClass.addMember("static " + c_dgt + "domain", "m_dom"); if(!tempFields.empty()) { stencilClass.addComment("temporary storage declarations"); @@ -281,10 +432,10 @@ void CudaCodeGen::generateStencilClassCtr( stencilClassCtr.addArg("int ycols"); stencilClassCtr.addInit("sbase(\"" + stencilClass.getName() + "\")"); - stencilClassCtr.addInit("m_dom(dom_)"); + stencilClassCtr.addStatement("m_dom =dom_"); if(!globalsMap.empty()) { - stencilClassCtr.addInit("m_globals(globals_)"); + stencilClassCtr.addStatement("m_globals = globals_"); } std::string iterators = "ij"; @@ -295,20 +446,34 @@ void CudaCodeGen::generateStencilClassCtr( std::string iterator = iterators.substr(index, 1); std::string arrName = "stage" + std::to_string(stage->getStageID()) + "Global" + (char)std::toupper(iterator.at(0)) + "Indices"; - stencilClassCtr.addInit(arrName + "({" + - makeIntervalBoundExplicit(iterator, interval.value(), - iir::Interval::Bound::lower, "dom_") + - " , " + - makeIntervalBoundExplicit(iterator, interval.value(), - iir::Interval::Bound::upper, "dom_") + - "})"); + stencilClassCtr.addStatement( + arrName + " = {" + + makeIntervalBoundExplicit(iterator, interval.value(), iir::Interval::Bound::lower, + "dom_") + + " , " + + makeIntervalBoundExplicit(iterator, interval.value(), iir::Interval::Bound::upper, + "dom_") + + "}"); } index += 1; } } if(iterationSpaceSet_) { - stencilClassCtr.addInit("globalOffsets({computeGlobalOffsets(rank, m_dom, xcols, ycols)})"); + stencilClassCtr.addStatement( + "globalOffsets = {computeGlobalOffsets(rank, m_dom, xcols, ycols)}"); + + std::string iterators = "IJ"; + for(auto& stage : iterateIIROver(stencil)) { + for(auto [index, interval] : enumerate(stage->getIterationSpace())) { + if(interval.has_value()) { + std::string hostName = "stage" + std::to_string(stage->getStageID()) + "Global" + + iterators.at(index) + "Indices"; + addCudaCopySymbol(stencilClassCtr, hostName, "int"); + } + } + } + addCudaCopySymbol(stencilClassCtr, "globalOffsets", "unsigned"); } addTmpStorageInit(stencilClassCtr, stencil, tempFields); @@ -457,13 +622,82 @@ void CudaCodeGen::addCudaCopySymbol(MemberFunction& runMethod, const std::string dataType + ") * " + arrName + ".size())"); } +void CudaCodeGen::generateStencilSetupMethod( + Structure& stencilClass, const iir::Stencil& stencil, const ast::GlobalVariableMap& globalsMap, + IndexRange>& nonTempFields, + IndexRange>& tempFields, + std::shared_ptr stencilProperties) const { + + auto stencilClassSetup = stencilClass.addMemberFunction("static void", "setup"); + + stencilClassSetup.addArg("const " + c_dgt + "domain& dom_"); + if(!globalsMap.empty()) { + stencilClassSetup.addArg("globals& globals_"); + } + stencilClassSetup.addArg("int rank"); + stencilClassSetup.addArg("int xcols"); + stencilClassSetup.addArg("int ycols"); + + stencilClassSetup.addStatement("m_dom =dom_"); + + if(!globalsMap.empty()) { + stencilClassSetup.addStatement("m_globals = globals_"); + } + + std::string iterators = "ij"; + for(auto& stage : iterateIIROver(stencil)) { + int index = 0; + for(const auto& interval : stage->getIterationSpace()) { + if(interval.has_value()) { + std::string iterator = iterators.substr(index, 1); + std::string arrName = "stage" + std::to_string(stage->getStageID()) + "Global" + + (char)std::toupper(iterator.at(0)) + "Indices"; + stencilClassSetup.addStatement( + arrName + " = {" + + makeIntervalBoundExplicit(iterator, interval.value(), iir::Interval::Bound::lower, + "dom_") + + " , " + + makeIntervalBoundExplicit(iterator, interval.value(), iir::Interval::Bound::upper, + "dom_") + + "}"); + } + index += 1; + } + } + + if(iterationSpaceSet_) { + stencilClassSetup.addStatement( + "globalOffsets = {computeGlobalOffsets(rank, m_dom, xcols, ycols)}"); + } + + if(iterationSpaceSet_) { + stencilClassSetup.addStatement( + "globalOffsets = {computeGlobalOffsets(rank, m_dom, xcols, ycols)}"); + + std::string iterators = "IJ"; + for(auto& stage : iterateIIROver(stencil)) { + for(auto [index, interval] : enumerate(stage->getIterationSpace())) { + if(interval.has_value()) { + std::string hostName = "stage" + std::to_string(stage->getStageID()) + "Global" + + iterators.at(index) + "Indices"; + addCudaCopySymbol(stencilClassSetup, hostName, "int"); + } + } + } + addCudaCopySymbol(stencilClassSetup, "globalOffsets", "unsigned"); + } + + addTmpStorageInit(stencilClassSetup, stencil, tempFields); + stencilClassSetup.commit(); +} + void CudaCodeGen::generateStencilRunMethod( Structure& stencilClass, const iir::Stencil& stencil, const std::shared_ptr& stencilProperties, const std::shared_ptr& stencilInstantiation, const std::unordered_map& paramNameToType, const ast::GlobalVariableMap& globalsMap) const { - MemberFunction stencilRunMethod = stencilClass.addMemberFunction("void", "run", ""); + MemberFunction stencilRunMethod = stencilClass.addMemberFunction("static void", "run", ""); const auto& metadata = stencilInstantiation->getMetaData(); // fields used in the stencil @@ -482,7 +716,7 @@ void CudaCodeGen::generateStencilRunMethod( stencilRunMethod.startBody(); stencilRunMethod.addComment("starting timers"); - stencilRunMethod.addStatement("start()"); + stencilRunMethod.addComment("start()"); for(const auto& multiStagePtr : stencil.getChildren()) { const iir::MultiStage& multiStage = *multiStagePtr; @@ -564,20 +798,6 @@ void CudaCodeGen::generateStencilRunMethod( stencilRunMethod.addStatement("const unsigned int nbz = 1"); } - if(iterationSpaceSet_) { - std::string iterators = "IJ"; - for(auto& stage : iterateIIROver(stencil)) { - for(auto [index, interval] : enumerate(stage->getIterationSpace())) { - if(interval.has_value()) { - std::string hostName = "stage" + std::to_string(stage->getStageID()) + "Global" + - iterators.at(index) + "Indices"; - addCudaCopySymbol(stencilRunMethod, hostName, "int"); - } - } - } - addCudaCopySymbol(stencilRunMethod, "globalOffsets", "unsigned"); - } - stencilRunMethod.addStatement("dim3 blocks(nbx, nby, nbz)"); std::string kernelCall = CodeGeneratorHelper::buildCudaKernelName(stencilInstantiation, multiStagePtr) + @@ -614,8 +834,8 @@ void CudaCodeGen::generateStencilRunMethod( idx = 0; for(const auto& fieldPair : tempMSFieldsNonLocalCached) { // in some cases (where there are no horizontal extents) we dont use the special tmp index - // iterator, but rather a normal 3d field index iterator. In that case we pass temporaries in - // the same manner as normal fields + // iterator, but rather a normal 3d field index iterator. In that case we pass temporaries + // in the same manner as normal fields if(idx > 0) args += ","; if(!CodeGeneratorHelper::useTemporaries(multiStagePtr->getParent(), metadata)) { @@ -645,7 +865,7 @@ void CudaCodeGen::generateStencilRunMethod( } stencilRunMethod.addComment("stopping timers"); - stencilRunMethod.addStatement("pause()"); + stencilRunMethod.addComment("pause()"); stencilRunMethod.commit(); } @@ -679,23 +899,205 @@ void CudaCodeGen::addTmpStorageInit( if(!(tempFields.empty())) { auto const& hMaxExtents = iir::extent_cast(maxExtents.horizontalExtent()); - ctr.addInit(tmpMetadataName_ + "(" + std::to_string(blockSize[0]) + "+" + - std::to_string(-hMaxExtents.iMinus() + hMaxExtents.iPlus()) + ", " + - std::to_string(blockSize[1]) + "+" + - std::to_string(-hMaxExtents.jMinus() + hMaxExtents.jPlus()) + ", (dom_.isize()+ " + - std::to_string(blockSize[0]) + " - 1) / " + std::to_string(blockSize[0]) + - ", (dom_.jsize()+ " + std::to_string(blockSize[1]) + " - 1) / " + - std::to_string(blockSize[1]) + ", dom_.ksize() + 2 * " + - std::to_string(getVerticalTmpHaloSize(stencil)) + ")"); + ctr.addStatement(tmpMetadataName_ + " = tmp_meta_data_t(" + std::to_string(blockSize[0]) + "+" + + std::to_string(-hMaxExtents.iMinus() + hMaxExtents.iPlus()) + ", " + + std::to_string(blockSize[1]) + "+" + + std::to_string(-hMaxExtents.jMinus() + hMaxExtents.jPlus()) + + ", (dom_.isize()+ " + std::to_string(blockSize[0]) + " - 1) / " + + std::to_string(blockSize[0]) + ", (dom_.jsize()+ " + + std::to_string(blockSize[1]) + " - 1) / " + std::to_string(blockSize[1]) + + ", dom_.ksize() + 2 * " + std::to_string(getVerticalTmpHaloSize(stencil)) + + ")"); for(const auto& fieldPair : tempFields) { - ctr.addInit("m_" + fieldPair.second.Name + "(" + tmpMetadataName_ + ")"); + ctr.addStatement("m_" + fieldPair.second.Name + " = tmp_storage_t(" + tmpMetadataName_ + ")"); + } + } +} + +void CudaCodeGen::generateCHeaderSI( + std::stringstream& ssSW, + const std::shared_ptr& stencilInstantiation) const { + using namespace codegen; + + CodeGenProperties codeGenProperties = computeCodeGenProperties(stencilInstantiation.get()); + + ssSW << "extern \"C\" {\n"; + generateAPIRunFunctions(ssSW, stencilInstantiation, codeGenProperties, /*onlyDecl=*/true); + generateSetupFunctions(ssSW, stencilInstantiation, codeGenProperties, /*onlyDecl=*/true); + ssSW << "}\n"; +} + +std::string CudaCodeGen::generateCHeader() const { + std::stringstream ssSW; + ssSW << "#pragma once\n"; + ssSW << "#include \"driver-includes/defs.hpp\"\n"; + ssSW << "#include \"driver-includes/cuda_utils.hpp\"\n"; + + for(const auto& nameStencilCtxPair : context_) { + std::shared_ptr stencilInstantiation = nameStencilCtxPair.second; + generateCHeaderSI(ssSW, stencilInstantiation); + } + + return ssSW.str(); +} + +std::vector getUsedFields(const dawn::iir::Stencil& stencil, + std::unordered_set intend = { + dawn::iir::Field::IntendKind::Output, + dawn::iir::Field::IntendKind::InputOutput, + dawn::iir::Field::IntendKind::Input}) { + const auto& APIFields = stencil.getMetadata().getAPIFields(); + const auto& stenFields = stencil.getOrderedFields(); + auto usedAPIFields = + dawn::makeRange(APIFields, [&stenFields](int f) { return stenFields.count(f); }); + + std::vector res; + for(auto fieldID : usedAPIFields) { + auto field = stenFields.at(fieldID); + if(intend.count(field.field.getIntend())) { + res.push_back(fieldID); + } + } + + return res; +} +std::vector getGlobalsNames(const dawn::ast::GlobalVariableMap& globalsMap) { + std::vector globalsNames; + for(const auto& global : globalsMap) { + globalsNames.push_back(global.first); + } + return globalsNames; +} + +static void +generateF90InterfaceSI(FortranInterfaceModuleGen& fimGen, + const std::shared_ptr& stencilInstantiation) { + const auto& stencils = stencilInstantiation->getStencils(); + const auto& globalsMap = stencilInstantiation->getIIR()->getGlobalVariableMap(); + auto globalTypeToFortType = [](const ast::Global& global) { + switch(global.getType()) { + case ast::Value::Kind::Boolean: + return FortranAPI::InterfaceType::BOOLEAN; + case ast::Value::Kind::Double: + return FortranAPI::InterfaceType::DOUBLE; + case ast::Value::Kind::Float: + return FortranAPI::InterfaceType::FLOAT; + case ast::Value::Kind::Integer: + return FortranAPI::InterfaceType::INTEGER; + case ast::Value::Kind::String: + default: + throw std::runtime_error("string globals not supported in cuda ico backend"); + } + }; + + // The following assert is needed because we have only one (user-defined) name for a stencil + // instantiation (stencilInstantiation->getName()). We could compute a per-stencil name ( + // codeGenProperties.getStencilName(StencilContext::SC_Stencil, stencil.getStencilID()) ) + // however the interface would not be very useful if the name is generated. + DAWN_ASSERT_MSG(stencils.size() <= 1, + "Unable to generate interface. More than one stencil in stencil instantiation."); + const auto& stencil = *stencils[0]; + + std::vector interfaces = { + FortranInterfaceAPI("run_" + stencilInstantiation->getName())}; + + auto addArgsToAPI = [&](FortranAPI& api, bool includeSavedState, bool optThresholds) { + for(const auto& global : globalsMap) { + api.addArg(global.first, globalTypeToFortType(global.second)); + } + for(auto fieldID : stencilInstantiation->getMetaData().getAPIFields()) { + api.addArg( + stencilInstantiation->getMetaData().getNameFromAccessID(fieldID), + FortranAPI::InterfaceType::DOUBLE /* Unfortunately we need to know at codegen + time whether we have fields in SP/DP */ + , + stencilInstantiation->getMetaData().getFieldDimensions(fieldID).rank()); } + if(includeSavedState) { + for(auto fieldID : getUsedFields(stencil, {dawn::iir::Field::IntendKind::Output, + dawn::iir::Field::IntendKind::InputOutput})) { + api.addArg( + stencilInstantiation->getMetaData().getNameFromAccessID(fieldID) + "_before", + FortranAPI::InterfaceType::DOUBLE /* Unfortunately we need to know at codegen + time whether we have fields in SP/DP */ + , + stencilInstantiation->getMetaData().getFieldDimensions(fieldID).rank()); + } + + for(auto fieldID : getUsedFields(stencil, {dawn::iir::Field::IntendKind::Output, + dawn::iir::Field::IntendKind::InputOutput})) { + if(optThresholds) { + api.addOptArg(stencilInstantiation->getMetaData().getNameFromAccessID(fieldID) + + "_rel_tol", + FortranAPI::InterfaceType::DOUBLE); + api.addOptArg(stencilInstantiation->getMetaData().getNameFromAccessID(fieldID) + + "_abs_tol", + FortranAPI::InterfaceType::DOUBLE); + } else { + api.addArg(stencilInstantiation->getMetaData().getNameFromAccessID(fieldID) + "_rel_tol", + FortranAPI::InterfaceType::DOUBLE); + api.addArg(stencilInstantiation->getMetaData().getNameFromAccessID(fieldID) + "_abs_tol", + FortranAPI::InterfaceType::DOUBLE); + } + } + } + }; + + addArgsToAPI(interfaces[0], /*includeSavedState*/ false, false); + fimGen.addInterfaceAPI(std::move(interfaces[0])); + std::string fortranIndent = " "; + + // memory management functions for production interface + FortranInterfaceAPI setup("setup_" + stencilInstantiation->getName()); + setup.addArg("i", FortranAPI::InterfaceType::INTEGER); + setup.addArg("j", FortranAPI::InterfaceType::INTEGER); + setup.addArg("k", FortranAPI::InterfaceType::INTEGER); + fimGen.addInterfaceAPI(std::move(setup)); +} + +std::string CudaCodeGen::generateF90Interface(std::string moduleName) const { + std::stringstream ss; + IndentedStringStream iss(ss); + + FortranInterfaceModuleGen fimGen(iss, moduleName); + + for(const auto& nameStencilCtxPair : context_) { + std::shared_ptr stencilInstantiation = nameStencilCtxPair.second; + generateF90InterfaceSI(fimGen, stencilInstantiation); } + + fimGen.commit(); + + return iss.str(); } std::unique_ptr CudaCodeGen::generateCode() { DAWN_LOG(INFO) << "Starting code generation for GTClang ..."; + if(codeGenOptions_.OutputCHeader) { + fs::path filePath = *codeGenOptions_.OutputCHeader; + std::ofstream headerFile; + headerFile.open(filePath); + if(headerFile) { + headerFile << generateCHeader(); + headerFile.close(); + } else { + throw std::runtime_error("Error writing to " + filePath.string() + ": " + strerror(errno)); + } + } + if(codeGenOptions_.OutputFortranInterface) { + fs::path filePath = *codeGenOptions_.OutputFortranInterface; + std::string moduleName = filePath.filename().replace_extension("").string(); + std::ofstream interfaceFile; + interfaceFile.open(filePath); + if(interfaceFile) { + interfaceFile << generateF90Interface(moduleName); + interfaceFile.close(); + } else { + throw std::runtime_error("Error writing to " + filePath.string() + ": " + strerror(errno)); + } + } + // Generate code for StencilInstantiations std::map stencils; for(const auto& nameStencilCtxPair : context_) { diff --git a/dawn/src/dawn/CodeGen/Cuda/CudaCodeGen.h b/dawn/src/dawn/CodeGen/Cuda/CudaCodeGen.h index a79073708..46800d22e 100644 --- a/dawn/src/dawn/CodeGen/Cuda/CudaCodeGen.h +++ b/dawn/src/dawn/CodeGen/Cuda/CudaCodeGen.h @@ -44,7 +44,9 @@ class CudaCodeGen : public CodeGen { public: ///@brief constructor CudaCodeGen(const StencilInstantiationContext& ctx, int maxHaloPoints, int nsms, - int maxBlocksPerSM, const Array3i& domainSize, bool runWithSync = true); + int maxBlocksPerSM, const Array3i& domainSize, + std::optional outputCHeader, + std::optional OutputFortranInterface, bool runWithSync = true); virtual ~CudaCodeGen(); virtual std::unique_ptr generateCode() override; @@ -53,6 +55,8 @@ class CudaCodeGen : public CodeGen { int maxBlocksPerSM; Array3i domainSize; bool runWithSync; + std::optional OutputCHeader; + std::optional OutputFortranInterface; }; private: @@ -113,6 +117,13 @@ class CudaCodeGen : public CodeGen { IndexRange>& tempFields, std::shared_ptr stencilProperties) const; + void generateStencilSetupMethod( + Structure& stencilClass, const iir::Stencil& stencil, + const ast::GlobalVariableMap& globalsMap, + IndexRange>& nonTempFields, + IndexRange>& tempFields, + std::shared_ptr stencilProperties) const; + void generateStencilClassMembers( Structure& stencilClass, const iir::Stencil& stencil, const ast::GlobalVariableMap& globalsMap, @@ -123,6 +134,27 @@ class CudaCodeGen : public CodeGen { std::string generateStencilInstantiation( const std::shared_ptr& stencilInstantiation); + void + generateCHeaderSI(std::stringstream& ssSW, + const std::shared_ptr& stencilInstantiation) const; + + std::string generateCHeader() const; + std::string generateF90Interface(std::string moduleName) const; + + void + generateAPIRunFunctions(std::stringstream& ssSW, + const std::shared_ptr& stencilInstantiation, + CodeGenProperties& codeGenProperties, bool onlyDecl = false) const; + void + generateSetupFunctions(std::stringstream& ssSW, + const std::shared_ptr& stencilInstantiation, + CodeGenProperties& codeGenProperties, bool onlyDecl = false) const; + + void generateStaticMembersTrailer( + std::stringstream& ssSW, + const std::shared_ptr& stencilInstantiation, + CodeGenProperties& codeGenProperties) const; + CudaCodeGenOptions codeGenOptions_; bool iterationSpaceSet_; }; From 79b328734163d3ddf27c6830b016edc077bfccc7 Mon Sep 17 00:00:00 2001 From: mroethlin Date: Fri, 18 Mar 2022 14:47:22 +0100 Subject: [PATCH 14/24] milestone 0: new dawn ifaces compiling with nemo_gtc --- dawn/src/dawn/CodeGen/CodeGen.cpp | 6 +++--- dawn/src/dawn/CodeGen/CodeGen.h | 2 +- dawn/src/dawn/CodeGen/Cuda/CudaCodeGen.cpp | 25 +++++++++++++--------- 3 files changed, 19 insertions(+), 14 deletions(-) diff --git a/dawn/src/dawn/CodeGen/CodeGen.cpp b/dawn/src/dawn/CodeGen/CodeGen.cpp index 87e0a0c83..439ad5ab2 100644 --- a/dawn/src/dawn/CodeGen/CodeGen.cpp +++ b/dawn/src/dawn/CodeGen/CodeGen.cpp @@ -298,7 +298,7 @@ void CodeGen::generateStencilWrapperSyncMethod(Class& stencilWrapperClass) const syncStoragesMethod.commit(); } -std::string CodeGen::getStorageType(const ast::FieldDimensions& dimensions) { +std::string CodeGen::getStorageType(const ast::FieldDimensions& dimensions, std::string prefix, std::string suffix) { DAWN_ASSERT_MSG( ast::dimension_isa(dimensions.getHorizontalFieldDimension()), "Storage type requested for a non cartesian horizontal dimension"); @@ -306,11 +306,11 @@ std::string CodeGen::getStorageType(const ast::FieldDimensions& dimensions) { dawn::ast::dimension_cast( dimensions.getHorizontalFieldDimension()); - std::string storageType = "storage_"; + std::string storageType = prefix == "" ? "" : prefix + "_"; storageType += cartesianDimensions.I() ? "i" : ""; storageType += cartesianDimensions.J() ? "j" : ""; storageType += dimensions.K() ? "k" : ""; - storageType += "_t"; + storageType += suffix; return storageType; } diff --git a/dawn/src/dawn/CodeGen/CodeGen.h b/dawn/src/dawn/CodeGen/CodeGen.h index c50889fb5..fb08fd23f 100644 --- a/dawn/src/dawn/CodeGen/CodeGen.h +++ b/dawn/src/dawn/CodeGen/CodeGen.h @@ -84,7 +84,7 @@ class CodeGen { static std::string getStorageType(const sir::Field& field); static std::string getStorageType(const iir::Stencil::FieldInfo& field); - static std::string getStorageType(const ast::FieldDimensions& dimensions); + static std::string getStorageType(const ast::FieldDimensions& dimensions, std::string prefix="storage", std::string suffix="_t"); void generateBoundaryConditionFunctions( Class& stencilWrapperClass, diff --git a/dawn/src/dawn/CodeGen/Cuda/CudaCodeGen.cpp b/dawn/src/dawn/CodeGen/Cuda/CudaCodeGen.cpp index d57c4d9a2..cdcb2fbc8 100644 --- a/dawn/src/dawn/CodeGen/Cuda/CudaCodeGen.cpp +++ b/dawn/src/dawn/CodeGen/Cuda/CudaCodeGen.cpp @@ -94,9 +94,11 @@ void CudaCodeGen::generateAPIRunFunctions( // generate the code for each of the stencils for(const auto& stencilPtr : stencils) { - const auto& stencil = *stencilPtr; + const auto& stencil = *stencilPtr; std::string stencilName = "stencil_" + std::to_string(stencil.getStencilID()); + auto stencilProperties = + codeGenProperties.getStencilProperties(StencilContext::SC_Stencil, stencilName); std::string fullyQualitfiedName = "dawn_generated::cuda::" + stencilInstantiation->getName() + "::" + stencilName; @@ -116,14 +118,16 @@ void CudaCodeGen::generateAPIRunFunctions( runFun.finishArgs(); if(!onlyDecl) { - runFun.addStatement(fullyQualitfiedName + "::meta_data_t meta_data(" + fullyQualitfiedName + - "::dom.isize(), " + fullyQualitfiedName + "::dom.jsize(), " + - fullyQualitfiedName + "::dom.ksize())"); + runFun.addStatement("meta_data_t meta_data_ijk(" + fullyQualitfiedName + + "::m_dom.isize(), " + fullyQualitfiedName + "::m_dom.jsize(), " + + fullyQualitfiedName + "::m_dom.ksize())"); + runFun.addStatement("meta_data_ij_t meta_data_ij(" + fullyQualitfiedName + + "::m_dom.isize(), " + fullyQualitfiedName + "::m_dom.jsize(), 0)"); + runFun.addStatement("meta_data_k_t meta_data_k(0, 0, " + fullyQualitfiedName + "::m_dom.ksize())"); for(auto field : nonTempFields) { - runFun.addStatement(fullyQualitfiedName + "::storage_t " + field.second.Name + - "(meta_data, " + field.second.Name + "_ptr)"); + runFun.addStatement(stencilProperties->paramNameToType_.at(field.second.Name) + " " + field.second.Name + + "(meta_data_" + getStorageType(field.second.field.getFieldDimensions(), "", "") + ", " + field.second.Name + "_ptr)"); } - { std::string fields; std::string sep = ""; @@ -181,8 +185,6 @@ void CudaCodeGen::generateStaticMembersTrailer( ssSW << "gridtools::dawn::domain " + fullyQualitfiedName + "::m_dom = gridtools::dawn::domain(-1, -1, -1);"; - ssSW << fullyQualitfiedName + "::tmp_meta_data_t " + fullyQualitfiedName + - "::m_tmp_meta_data(-1, -1, -1, -1,-1);"; if(stencil.isEmpty()) continue; @@ -196,9 +198,11 @@ void CudaCodeGen::generateStaticMembersTrailer( }); if(!(tempFields.empty())) { + ssSW << fullyQualitfiedName + "::tmp_meta_data_t " + fullyQualitfiedName + + "::m_tmp_meta_data(-1, -1, -1, -1,-1);"; for(const auto& fieldPair : tempFields) { ssSW << fullyQualitfiedName - << "::tmp_storage_t " + fullyQualitfiedName + "::" + fieldPair.second.Name + ";"; + << "::tmp_storage_t " + fullyQualitfiedName + "::" + "m_" + fieldPair.second.Name + ";"; } } @@ -1130,6 +1134,7 @@ std::unique_ptr CudaCodeGen::generateCode() { // [https://github.com/MeteoSwiss-APN/gtclang/issues/32] //==============------------------------------------------------------------------------------=== CodeGen::addMplIfdefs(ppDefines, 30); + ppDefines.push_back("#include "); ppDefines.push_back("#include "); ppDefines.push_back("using namespace gridtools::dawn;"); From 5f27bef1d7bfa4aae0a1f2e11a172a88e6ed0abe Mon Sep 17 00:00:00 2001 From: mroethlin Date: Fri, 22 Apr 2022 16:40:47 +0200 Subject: [PATCH 15/24] pointer steal wip --- dawn/src/dawn/CodeGen/Cuda/CudaCodeGen.cpp | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/dawn/src/dawn/CodeGen/Cuda/CudaCodeGen.cpp b/dawn/src/dawn/CodeGen/Cuda/CudaCodeGen.cpp index cdcb2fbc8..e06fefd93 100644 --- a/dawn/src/dawn/CodeGen/Cuda/CudaCodeGen.cpp +++ b/dawn/src/dawn/CodeGen/Cuda/CudaCodeGen.cpp @@ -118,15 +118,17 @@ void CudaCodeGen::generateAPIRunFunctions( runFun.finishArgs(); if(!onlyDecl) { - runFun.addStatement("meta_data_t meta_data_ijk(" + fullyQualitfiedName + - "::m_dom.isize(), " + fullyQualitfiedName + "::m_dom.jsize(), " + - fullyQualitfiedName + "::m_dom.ksize())"); - runFun.addStatement("meta_data_ij_t meta_data_ij(" + fullyQualitfiedName + - "::m_dom.isize(), " + fullyQualitfiedName + "::m_dom.jsize(), 0)"); - runFun.addStatement("meta_data_k_t meta_data_k(0, 0, " + fullyQualitfiedName + "::m_dom.ksize())"); + runFun.addStatement("int ni = " + fullyQualitfiedName + "::m_dom.isize()"); + runFun.addStatement("int nj = " + fullyQualitfiedName + "::m_dom.jsize()"); + runFun.addStatement("int nk = " + fullyQualitfiedName + "::m_dom.ksize()"); + + runFun.addStatement("meta_data_t meta_data_ijk({ni, nj, nk}, {nj*nk, nk, 1})"); + runFun.addStatement("meta_data_ij_t meta_data_ij({ni, nj, 1}, {nk, 1, 0})"); + runFun.addStatement("meta_data_k_t meta_data_k({1, 1, nk}, {1, 0, 0})"); + for(auto field : nonTempFields) { runFun.addStatement(stencilProperties->paramNameToType_.at(field.second.Name) + " " + field.second.Name + - "(meta_data_" + getStorageType(field.second.field.getFieldDimensions(), "", "") + ", " + field.second.Name + "_ptr)"); + "(meta_data_" + getStorageType(field.second.field.getFieldDimensions(), "", "") + ", " + field.second.Name + "_ptr, gridtools::ownership::external_gpu)"); } { std::string fields; @@ -184,7 +186,7 @@ void CudaCodeGen::generateStaticMembersTrailer( "dawn_generated::cuda::" + stencilInstantiation->getName() + "::" + stencilName; ssSW << "gridtools::dawn::domain " + fullyQualitfiedName + - "::m_dom = gridtools::dawn::domain(-1, -1, -1);"; + "::m_dom = gridtools::dawn::domain(1, 1, 1);"; if(stencil.isEmpty()) continue; @@ -199,7 +201,7 @@ void CudaCodeGen::generateStaticMembersTrailer( if(!(tempFields.empty())) { ssSW << fullyQualitfiedName + "::tmp_meta_data_t " + fullyQualitfiedName + - "::m_tmp_meta_data(-1, -1, -1, -1,-1);"; + "::m_tmp_meta_data(1, 1, 1, 1, 1);"; for(const auto& fieldPair : tempFields) { ssSW << fullyQualitfiedName << "::tmp_storage_t " + fullyQualitfiedName + "::" + "m_" + fieldPair.second.Name + ";"; From 1b09266d6ae3ab9a4c8060b15796dbcbd85b6bab Mon Sep 17 00:00:00 2001 From: mroethlin Date: Fri, 20 May 2022 14:00:09 +0200 Subject: [PATCH 16/24] add results serialization --- dawn/src/dawn/CodeGen/Cuda/CudaCodeGen.cpp | 67 +++++++++++++--------- dawn/src/driver-includes/serialize.hpp | 37 ++++++++++++ 2 files changed, 76 insertions(+), 28 deletions(-) create mode 100644 dawn/src/driver-includes/serialize.hpp diff --git a/dawn/src/dawn/CodeGen/Cuda/CudaCodeGen.cpp b/dawn/src/dawn/CodeGen/Cuda/CudaCodeGen.cpp index e06fefd93..fae7cfde2 100644 --- a/dawn/src/dawn/CodeGen/Cuda/CudaCodeGen.cpp +++ b/dawn/src/dawn/CodeGen/Cuda/CudaCodeGen.cpp @@ -65,6 +65,34 @@ run(const std::map>& return CG.generateCode(); } +std::vector getUsedFields(const dawn::iir::Stencil& stencil, + std::unordered_set intend = { + dawn::iir::Field::IntendKind::Output, + dawn::iir::Field::IntendKind::InputOutput, + dawn::iir::Field::IntendKind::Input}) { + const auto& APIFields = stencil.getMetadata().getAPIFields(); + const auto& stenFields = stencil.getOrderedFields(); + auto usedAPIFields = + dawn::makeRange(APIFields, [&stenFields](int f) { return stenFields.count(f); }); + + std::vector res; + for(auto fieldID : usedAPIFields) { + auto field = stenFields.at(fieldID); + if(intend.count(field.field.getIntend())) { + res.push_back(fieldID); + } + } + + return res; +} +std::vector getGlobalsNames(const dawn::ast::GlobalVariableMap& globalsMap) { + std::vector globalsNames; + for(const auto& global : globalsMap) { + globalsNames.push_back(global.first); + } + return globalsNames; +} + CudaCodeGen::CudaCodeGen(const StencilInstantiationContext& ctx, int maxHaloPoints, int nsms, int maxBlocksPerSM, const Array3i& domainSize, std::optional outputCHeader, @@ -91,6 +119,7 @@ void CudaCodeGen::generateAPIRunFunctions( std::stringstream& ssSW, const std::shared_ptr& stencilInstantiation, CodeGenProperties& codeGenProperties, bool onlyDecl) const { const auto& stencils = stencilInstantiation->getStencils(); + const auto& metadata = stencilInstantiation->getMetaData(); // generate the code for each of the stencils for(const auto& stencilPtr : stencils) { @@ -118,6 +147,7 @@ void CudaCodeGen::generateAPIRunFunctions( runFun.finishArgs(); if(!onlyDecl) { + runFun.addStatement("static int iter = 0"); runFun.addStatement("int ni = " + fullyQualitfiedName + "::m_dom.isize()"); runFun.addStatement("int nj = " + fullyQualitfiedName + "::m_dom.jsize()"); runFun.addStatement("int nk = " + fullyQualitfiedName + "::m_dom.ksize()"); @@ -138,6 +168,14 @@ void CudaCodeGen::generateAPIRunFunctions( sep = ", "; } runFun.addStatement(fullyQualitfiedName + "::run(" + fields + ")"); + runFun.addPreprocessorDirective("ifdef __DSL_SERIALIZE"); + auto outFields = getUsedFields(stencil, {dawn::iir::Field::IntendKind::Output, dawn::iir::Field::IntendKind::InputOutput}); + for (auto outField : outFields) { + auto fname = metadata.getFieldNameFromAccessID(outField); + runFun.addStatement("serialize_gpu(" + fname + ", \"" + stencilName + "_" + fname + "\", iter, ni, nj, nk)"); + } + runFun.addPreprocessorDirective("endif"); + runFun.addStatement("iter++"); } } runFun.commit(); @@ -947,34 +985,6 @@ std::string CudaCodeGen::generateCHeader() const { return ssSW.str(); } -std::vector getUsedFields(const dawn::iir::Stencil& stencil, - std::unordered_set intend = { - dawn::iir::Field::IntendKind::Output, - dawn::iir::Field::IntendKind::InputOutput, - dawn::iir::Field::IntendKind::Input}) { - const auto& APIFields = stencil.getMetadata().getAPIFields(); - const auto& stenFields = stencil.getOrderedFields(); - auto usedAPIFields = - dawn::makeRange(APIFields, [&stenFields](int f) { return stenFields.count(f); }); - - std::vector res; - for(auto fieldID : usedAPIFields) { - auto field = stenFields.at(fieldID); - if(intend.count(field.field.getIntend())) { - res.push_back(fieldID); - } - } - - return res; -} -std::vector getGlobalsNames(const dawn::ast::GlobalVariableMap& globalsMap) { - std::vector globalsNames; - for(const auto& global : globalsMap) { - globalsNames.push_back(global.first); - } - return globalsNames; -} - static void generateF90InterfaceSI(FortranInterfaceModuleGen& fimGen, const std::shared_ptr& stencilInstantiation) { @@ -1138,6 +1148,7 @@ std::unique_ptr CudaCodeGen::generateCode() { CodeGen::addMplIfdefs(ppDefines, 30); ppDefines.push_back("#include "); ppDefines.push_back("#include "); + ppDefines.push_back("#include "); ppDefines.push_back("using namespace gridtools::dawn;"); generateBCHeaders(ppDefines); diff --git a/dawn/src/driver-includes/serialize.hpp b/dawn/src/driver-includes/serialize.hpp new file mode 100644 index 000000000..5e0b8f5cb --- /dev/null +++ b/dawn/src/driver-includes/serialize.hpp @@ -0,0 +1,37 @@ +#include + +template +void serialize(const storage_type &field, std::string &&fname, int iter, int isize, int jsize, int ksize) { + field.sync(); + gridtools::data_view field_view = gridtools::make_host_view(field); + char buf[128]; + sprintf(buf, "_%02d.txt", iter); + FILE *fp = fopen(("results/" + fname + buf).c_str(), "w+"); + for (int i = 0; i < isize; i++) { + for (int j = 0; j < jsize; j++) { + for (int k = 0; k < ksize; k++) { + fprintf(fp, "%.14g\n", field_view(i,j,k)); + } + } + } + fclose(fp); + field.sync(); +} + +template +void serialize_gpu(const storage_type &field, std::string &&fname, int iter, int isize, int jsize, int ksize) { + field.sync(); + gridtools::data_view field_view = gridtools::make_host_view(field); + char buf[128]; + sprintf(buf, "_%02d.txt", iter); + FILE *fp = fopen(("results/" + fname + buf).c_str(), "w+"); + for (int i = 0; i < isize; i++) { + for (int k = 0; k < ksize; k++) { + for (int j = 0; j < jsize; j++) { + fprintf(fp, "%.14g\n", field_view(i,j,k)); + } + } + } + fclose(fp); + field.sync(); +} From 74c1b7d7a3d719787e5ffcf7a9c284dfd5b4044d Mon Sep 17 00:00:00 2001 From: mroethlin Date: Fri, 20 May 2022 18:00:27 +0200 Subject: [PATCH 17/24] hopefully fixing strides --- dawn/src/dawn/CodeGen/Cuda/CudaCodeGen.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dawn/src/dawn/CodeGen/Cuda/CudaCodeGen.cpp b/dawn/src/dawn/CodeGen/Cuda/CudaCodeGen.cpp index fae7cfde2..c53aef44a 100644 --- a/dawn/src/dawn/CodeGen/Cuda/CudaCodeGen.cpp +++ b/dawn/src/dawn/CodeGen/Cuda/CudaCodeGen.cpp @@ -152,8 +152,8 @@ void CudaCodeGen::generateAPIRunFunctions( runFun.addStatement("int nj = " + fullyQualitfiedName + "::m_dom.jsize()"); runFun.addStatement("int nk = " + fullyQualitfiedName + "::m_dom.ksize()"); - runFun.addStatement("meta_data_t meta_data_ijk({ni, nj, nk}, {nj*nk, nk, 1})"); - runFun.addStatement("meta_data_ij_t meta_data_ij({ni, nj, 1}, {nk, 1, 0})"); + runFun.addStatement("meta_data_t meta_data_ijk({ni, nj, nk}, {1, ni, ni*nj})"); + runFun.addStatement("meta_data_ij_t meta_data_ij({ni, nj, 1}, {ni, 1, 0})"); runFun.addStatement("meta_data_k_t meta_data_k({1, 1, nk}, {1, 0, 0})"); for(auto field : nonTempFields) { From 1728e521317ee20d4285610cfc938f3df0d92b36 Mon Sep 17 00:00:00 2001 From: mroethlin Date: Fri, 20 May 2022 18:29:01 +0200 Subject: [PATCH 18/24] striding looks right now for cubic domains --- dawn/src/dawn/CodeGen/Cuda/CudaCodeGen.cpp | 2 +- dawn/src/driver-includes/serialize.hpp | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/dawn/src/dawn/CodeGen/Cuda/CudaCodeGen.cpp b/dawn/src/dawn/CodeGen/Cuda/CudaCodeGen.cpp index c53aef44a..88c3ec388 100644 --- a/dawn/src/dawn/CodeGen/Cuda/CudaCodeGen.cpp +++ b/dawn/src/dawn/CodeGen/Cuda/CudaCodeGen.cpp @@ -153,7 +153,7 @@ void CudaCodeGen::generateAPIRunFunctions( runFun.addStatement("int nk = " + fullyQualitfiedName + "::m_dom.ksize()"); runFun.addStatement("meta_data_t meta_data_ijk({ni, nj, nk}, {1, ni, ni*nj})"); - runFun.addStatement("meta_data_ij_t meta_data_ij({ni, nj, 1}, {ni, 1, 0})"); + runFun.addStatement("meta_data_ij_t meta_data_ij({ni, nj, 1}, {1, ni, 0})"); runFun.addStatement("meta_data_k_t meta_data_k({1, 1, nk}, {1, 0, 0})"); for(auto field : nonTempFields) { diff --git a/dawn/src/driver-includes/serialize.hpp b/dawn/src/driver-includes/serialize.hpp index 5e0b8f5cb..ba2745d73 100644 --- a/dawn/src/driver-includes/serialize.hpp +++ b/dawn/src/driver-includes/serialize.hpp @@ -26,8 +26,8 @@ void serialize_gpu(const storage_type &field, std::string &&fname, int iter, int sprintf(buf, "_%02d.txt", iter); FILE *fp = fopen(("results/" + fname + buf).c_str(), "w+"); for (int i = 0; i < isize; i++) { - for (int k = 0; k < ksize; k++) { - for (int j = 0; j < jsize; j++) { + for (int j = 0; j < jsize; j++) { + for (int k = 0; k < ksize; k++) { fprintf(fp, "%.14g\n", field_view(i,j,k)); } } From d1b28fd52b5fcdcde706ab80417af1e1bc5b9cc6 Mon Sep 17 00:00:00 2001 From: mroethlin Date: Mon, 23 May 2022 09:25:36 +0200 Subject: [PATCH 19/24] use human readable version of stencil name --- dawn/src/dawn/CodeGen/Cuda/CudaCodeGen.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dawn/src/dawn/CodeGen/Cuda/CudaCodeGen.cpp b/dawn/src/dawn/CodeGen/Cuda/CudaCodeGen.cpp index 88c3ec388..63fc39e01 100644 --- a/dawn/src/dawn/CodeGen/Cuda/CudaCodeGen.cpp +++ b/dawn/src/dawn/CodeGen/Cuda/CudaCodeGen.cpp @@ -172,7 +172,7 @@ void CudaCodeGen::generateAPIRunFunctions( auto outFields = getUsedFields(stencil, {dawn::iir::Field::IntendKind::Output, dawn::iir::Field::IntendKind::InputOutput}); for (auto outField : outFields) { auto fname = metadata.getFieldNameFromAccessID(outField); - runFun.addStatement("serialize_gpu(" + fname + ", \"" + stencilName + "_" + fname + "\", iter, ni, nj, nk)"); + runFun.addStatement("serialize_gpu(" + fname + ", \"" + stencilInstantiation->getName() + "_" + fname + "\", iter, ni, nj, nk)"); } runFun.addPreprocessorDirective("endif"); runFun.addStatement("iter++"); From e2ff27fa696dd1f6cbc6e9cb15b41dc37f5727a4 Mon Sep 17 00:00:00 2001 From: mroethlin Date: Thu, 9 Jun 2022 09:40:48 +0200 Subject: [PATCH 20/24] strides finally correct --- dawn/src/dawn/CodeGen/Cuda/CudaCodeGen.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dawn/src/dawn/CodeGen/Cuda/CudaCodeGen.cpp b/dawn/src/dawn/CodeGen/Cuda/CudaCodeGen.cpp index 63fc39e01..930601078 100644 --- a/dawn/src/dawn/CodeGen/Cuda/CudaCodeGen.cpp +++ b/dawn/src/dawn/CodeGen/Cuda/CudaCodeGen.cpp @@ -152,9 +152,9 @@ void CudaCodeGen::generateAPIRunFunctions( runFun.addStatement("int nj = " + fullyQualitfiedName + "::m_dom.jsize()"); runFun.addStatement("int nk = " + fullyQualitfiedName + "::m_dom.ksize()"); - runFun.addStatement("meta_data_t meta_data_ijk({ni, nj, nk}, {1, ni, ni*nj})"); + runFun.addStatement("meta_data_t meta_data_ijk({ni, nj, nk}, {1, ni, ni*nj});"); runFun.addStatement("meta_data_ij_t meta_data_ij({ni, nj, 1}, {1, ni, 0})"); - runFun.addStatement("meta_data_k_t meta_data_k({1, 1, nk}, {1, 0, 0})"); + runFun.addStatement("meta_data_k_t meta_data_k({nk, 1, 1}, {1, 0, 0})"); for(auto field : nonTempFields) { runFun.addStatement(stencilProperties->paramNameToType_.at(field.second.Name) + " " + field.second.Name + From 94843f9068140264dd07cb8d16ee1a94ead895f9 Mon Sep 17 00:00:00 2001 From: mroethlin Date: Wed, 10 Aug 2022 12:01:04 +0200 Subject: [PATCH 21/24] correct handling of globals in the c and fortran interfaces --- dawn/src/dawn/CodeGen/Cuda/CudaCodeGen.cpp | 47 +++++++++++++++------- 1 file changed, 33 insertions(+), 14 deletions(-) diff --git a/dawn/src/dawn/CodeGen/Cuda/CudaCodeGen.cpp b/dawn/src/dawn/CodeGen/Cuda/CudaCodeGen.cpp index 930601078..d759e7421 100644 --- a/dawn/src/dawn/CodeGen/Cuda/CudaCodeGen.cpp +++ b/dawn/src/dawn/CodeGen/Cuda/CudaCodeGen.cpp @@ -120,14 +120,15 @@ void CudaCodeGen::generateAPIRunFunctions( CodeGenProperties& codeGenProperties, bool onlyDecl) const { const auto& stencils = stencilInstantiation->getStencils(); const auto& metadata = stencilInstantiation->getMetaData(); + const auto& globalsMap = stencilInstantiation->getIIR()->getGlobalVariableMap(); // generate the code for each of the stencils for(const auto& stencilPtr : stencils) { - const auto& stencil = *stencilPtr; + const auto& stencil = *stencilPtr; std::string stencilName = "stencil_" + std::to_string(stencil.getStencilID()); auto stencilProperties = - codeGenProperties.getStencilProperties(StencilContext::SC_Stencil, stencilName); + codeGenProperties.getStencilProperties(StencilContext::SC_Stencil, stencilName); std::string fullyQualitfiedName = "dawn_generated::cuda::" + stencilInstantiation->getName() + "::" + stencilName; @@ -141,6 +142,11 @@ void CudaCodeGen::generateAPIRunFunctions( return !p.second.IsTemporary; }); + for(const auto& globalProp : globalsMap) { + const auto& globalValue = globalProp.second; + runFun.addArg(std::string(ast::Value::typeToString(globalValue.getType())) + " " + + globalProp.first); + } for(auto field : nonTempFields) { runFun.addArg("double *" + field.second.Name + "_ptr"); } @@ -155,10 +161,17 @@ void CudaCodeGen::generateAPIRunFunctions( runFun.addStatement("meta_data_t meta_data_ijk({ni, nj, nk}, {1, ni, ni*nj});"); runFun.addStatement("meta_data_ij_t meta_data_ij({ni, nj, 1}, {1, ni, 0})"); runFun.addStatement("meta_data_k_t meta_data_k({nk, 1, 1}, {1, 0, 0})"); - + + for(const auto& globalProp : globalsMap) { + const auto& globalValue = globalProp.second; + runFun.addStatement(fullyQualitfiedName + "::m_globals." + globalProp.first + " = " + globalProp.first); + } + for(auto field : nonTempFields) { - runFun.addStatement(stencilProperties->paramNameToType_.at(field.second.Name) + " " + field.second.Name + - "(meta_data_" + getStorageType(field.second.field.getFieldDimensions(), "", "") + ", " + field.second.Name + "_ptr, gridtools::ownership::external_gpu)"); + runFun.addStatement(stencilProperties->paramNameToType_.at(field.second.Name) + " " + + field.second.Name + "(meta_data_" + + getStorageType(field.second.field.getFieldDimensions(), "", "") + ", " + + field.second.Name + "_ptr, gridtools::ownership::external_gpu)"); } { std::string fields; @@ -169,10 +182,12 @@ void CudaCodeGen::generateAPIRunFunctions( } runFun.addStatement(fullyQualitfiedName + "::run(" + fields + ")"); runFun.addPreprocessorDirective("ifdef __DSL_SERIALIZE"); - auto outFields = getUsedFields(stencil, {dawn::iir::Field::IntendKind::Output, dawn::iir::Field::IntendKind::InputOutput}); - for (auto outField : outFields) { + auto outFields = getUsedFields(stencil, {dawn::iir::Field::IntendKind::Output, + dawn::iir::Field::IntendKind::InputOutput}); + for(auto outField : outFields) { auto fname = metadata.getFieldNameFromAccessID(outField); - runFun.addStatement("serialize_gpu(" + fname + ", \"" + stencilInstantiation->getName() + "_" + fname + "\", iter, ni, nj, nk)"); + runFun.addStatement("serialize_gpu(" + fname + ", \"" + stencilInstantiation->getName() + + "_" + fname + "\", iter, ni, nj, nk)"); } runFun.addPreprocessorDirective("endif"); runFun.addStatement("iter++"); @@ -186,6 +201,7 @@ void CudaCodeGen::generateSetupFunctions( std::stringstream& ssSW, const std::shared_ptr& stencilInstantiation, CodeGenProperties& codeGenProperties, bool onlyDecl) const { const auto& stencils = stencilInstantiation->getStencils(); + const auto& globalsMap = stencilInstantiation->getIIR()->getGlobalVariableMap(); // generate the code for each of the stencils for(const auto& stencilPtr : stencils) { @@ -193,7 +209,7 @@ void CudaCodeGen::generateSetupFunctions( std::string stencilName = "stencil_" + std::to_string(stencil.getStencilID()); - std::string fullyQualitfiedName = + std::string fullyQualifiedName = "dawn_generated::cuda::" + stencilInstantiation->getName() + "::" + stencilName; MemberFunction setupFun("void", "setup_" + stencilInstantiation->getName(), ssSW, 0, onlyDecl); setupFun.addArg("int i"); @@ -201,8 +217,8 @@ void CudaCodeGen::generateSetupFunctions( setupFun.addArg("int k"); setupFun.finishArgs(); if(!onlyDecl) { - setupFun.addStatement(fullyQualitfiedName + - "::setup(gridtools::dawn::domain(i, j, k), 1, 1, 1)"); + setupFun.addStatement(fullyQualifiedName + + "::setup(gridtools::dawn::domain(i, j, k), " + (!globalsMap.empty() ? fullyQualifiedName + "::m_globals, " : "") + "1, 1, 1)"); } setupFun.commit(); } @@ -225,6 +241,8 @@ void CudaCodeGen::generateStaticMembersTrailer( ssSW << "gridtools::dawn::domain " + fullyQualitfiedName + "::m_dom = gridtools::dawn::domain(1, 1, 1);"; + ssSW << "dawn_generated::cuda::globals " + fullyQualitfiedName + + "::m_globals;"; if(stencil.isEmpty()) continue; @@ -239,10 +257,11 @@ void CudaCodeGen::generateStaticMembersTrailer( if(!(tempFields.empty())) { ssSW << fullyQualitfiedName + "::tmp_meta_data_t " + fullyQualitfiedName + - "::m_tmp_meta_data(1, 1, 1, 1, 1);"; + "::m_tmp_meta_data(1, 1, 1, 1, 1);"; for(const auto& fieldPair : tempFields) { ssSW << fullyQualitfiedName - << "::tmp_storage_t " + fullyQualitfiedName + "::" + "m_" + fieldPair.second.Name + ";"; + << "::tmp_storage_t " + fullyQualitfiedName + "::" + "m_" + fieldPair.second.Name + + ";"; } } @@ -448,7 +467,7 @@ void CudaCodeGen::generateStencilClassMembers( addTempStorageTypedef(stencilClass, stencil); if(!globalsMap.empty()) { - stencilClass.addMember("globals&", "m_globals"); + stencilClass.addMember("static globals", "m_globals"); } stencilClass.addMember("static " + c_dgt + "domain", "m_dom"); From 8043e971040458088054a6f4b2183717782c2059 Mon Sep 17 00:00:00 2001 From: mroethlin Date: Tue, 18 Oct 2022 14:06:28 +0200 Subject: [PATCH 22/24] only emit globals if globals are present --- dawn/src/dawn/CodeGen/Cuda/CudaCodeGen.cpp | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/dawn/src/dawn/CodeGen/Cuda/CudaCodeGen.cpp b/dawn/src/dawn/CodeGen/Cuda/CudaCodeGen.cpp index d759e7421..5d27cb5a1 100644 --- a/dawn/src/dawn/CodeGen/Cuda/CudaCodeGen.cpp +++ b/dawn/src/dawn/CodeGen/Cuda/CudaCodeGen.cpp @@ -241,8 +241,12 @@ void CudaCodeGen::generateStaticMembersTrailer( ssSW << "gridtools::dawn::domain " + fullyQualitfiedName + "::m_dom = gridtools::dawn::domain(1, 1, 1);"; - ssSW << "dawn_generated::cuda::globals " + fullyQualitfiedName + - "::m_globals;"; + + const auto& globalsMap = stencilInstantiation->getIIR()->getGlobalVariableMap(); + if (!globalsMap.empty()) { + ssSW << "dawn_generated::cuda::globals " + fullyQualitfiedName + + "::m_globals;"; + } if(stencil.isEmpty()) continue; From da9e02f1b69f28fc8ef3f2dcf32d26cbaf6fe70a Mon Sep 17 00:00:00 2001 From: mroethlin Date: Tue, 25 Oct 2022 11:17:35 +0200 Subject: [PATCH 23/24] fix globals --- dawn/src/dawn/CodeGen/Cuda/CudaCodeGen.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dawn/src/dawn/CodeGen/Cuda/CudaCodeGen.cpp b/dawn/src/dawn/CodeGen/Cuda/CudaCodeGen.cpp index 5d27cb5a1..cccdb2ab2 100644 --- a/dawn/src/dawn/CodeGen/Cuda/CudaCodeGen.cpp +++ b/dawn/src/dawn/CodeGen/Cuda/CudaCodeGen.cpp @@ -186,7 +186,7 @@ void CudaCodeGen::generateAPIRunFunctions( dawn::iir::Field::IntendKind::InputOutput}); for(auto outField : outFields) { auto fname = metadata.getFieldNameFromAccessID(outField); - runFun.addStatement("serialize_gpu(" + fname + ", \"" + stencilInstantiation->getName() + + runFun.addStatement("serialize_gpu(" + fname + ", \"gtc_" + stencilInstantiation->getName() + "_" + fname + "\", iter, ni, nj, nk)"); } runFun.addPreprocessorDirective("endif"); From e58b303e77055307f1eef78b39dc4721c2c0419c Mon Sep 17 00:00:00 2001 From: mroethlin Date: Tue, 8 Nov 2022 11:49:25 +0100 Subject: [PATCH 24/24] variadic min/max funs --- dawn/src/driver-includes/math.hpp | 48 ++++++++++++++++++++----------- 1 file changed, 32 insertions(+), 16 deletions(-) diff --git a/dawn/src/driver-includes/math.hpp b/dawn/src/driver-includes/math.hpp index 990dfda15..9ad5d363d 100644 --- a/dawn/src/driver-includes/math.hpp +++ b/dawn/src/driver-includes/math.hpp @@ -18,6 +18,8 @@ #include "storage.hpp" #include #include +#include +#include #ifndef GT_FUNCTION #define GT_FUNCTION @@ -122,24 +124,38 @@ GT_FUNCTION T sqrt(const T x) { return ::sqrt(x); } -/** - * @brief Returns the smaller value of @c x and @c y - * - * @see http://en.cppreference.com/w/cpp/algorithm/min - */ -template -GT_FUNCTION auto min(const T x, const U y) -> decltype(x + y) { - return x < y ? x : y; +template +GT_FUNCTION T min(T&&t) +{ + return std::forward(t); } -/** - * @brief Returns the greater value of @c x and @c y - * - * @see http://en.cppreference.com/w/cpp/algorithm/max - */ -template -GT_FUNCTION auto max(const T x, const U y) -> decltype(x + y) { - return x > y ? x : y; +template +GT_FUNCTION typename std::common_type< + T0, T1, Ts... +>::type min(T0&& val1, T1&& val2, Ts&&... vs) +{ + if (val2 < val1) + return min(val2, std::forward(vs)...); + else + return min(val1, std::forward(vs)...); +} + +template +GT_FUNCTION T max(T&&t) +{ + return std::forward(t); +} + +template +GT_FUNCTION typename std::common_type< + T0, T1, Ts... +>::type max(T0&& val1, T1&& val2, Ts&&... vs) +{ + if (val2 < val1) + return max(val2, std::forward(vs)...); + else + return max(val1, std::forward(vs)...); } /**