diff --git a/dawn/src/dawn/CodeGen/CodeGen.cpp b/dawn/src/dawn/CodeGen/CodeGen.cpp index 8c2d20d56..439ad5ab2 100644 --- a/dawn/src/dawn/CodeGen/CodeGen.cpp +++ b/dawn/src/dawn/CodeGen/CodeGen.cpp @@ -298,7 +298,7 @@ void CodeGen::generateStencilWrapperSyncMethod(Class& stencilWrapperClass) const syncStoragesMethod.commit(); } -std::string CodeGen::getStorageType(const ast::FieldDimensions& dimensions) { +std::string CodeGen::getStorageType(const ast::FieldDimensions& dimensions, std::string prefix, std::string suffix) { DAWN_ASSERT_MSG( ast::dimension_isa(dimensions.getHorizontalFieldDimension()), "Storage type requested for a non cartesian horizontal dimension"); @@ -306,11 +306,11 @@ std::string CodeGen::getStorageType(const ast::FieldDimensions& dimensions) { dawn::ast::dimension_cast( dimensions.getHorizontalFieldDimension()); - std::string storageType = "storage_"; + std::string storageType = prefix == "" ? "" : prefix + "_"; storageType += cartesianDimensions.I() ? "i" : ""; storageType += cartesianDimensions.J() ? "j" : ""; storageType += dimensions.K() ? "k" : ""; - storageType += "_t"; + storageType += suffix; return storageType; } @@ -338,10 +338,10 @@ void CodeGen::addTmpStorageDeclaration( Structure& stencilClass, IndexRange>& tempFields) const { if(!(tempFields.empty())) { - stencilClass.addMember(tmpMetadataTypename_, tmpMetadataName_); + stencilClass.addMember("static " + tmpMetadataTypename_, tmpMetadataName_); for(const auto& field : tempFields) { - stencilClass.addMember(tmpStorageTypename_, "m_" + field.second.Name); + stencilClass.addMember("static " + tmpStorageTypename_, "m_" + field.second.Name); } } } @@ -448,16 +448,16 @@ void CodeGen::generateGlobalIndices(const iir::Stencil& stencil, Structure& sten bool genCheckOffset) const { for(auto& stage : iterateIIROver(stencil)) { if(stage->getIterationSpace()[0].has_value()) { - stencilClass.addMember("std::array", + stencilClass.addMember("static std::array", "stage" + std::to_string(stage->getStageID()) + "GlobalIIndices"); } if(stage->getIterationSpace()[1].has_value()) { - stencilClass.addMember("std::array", + stencilClass.addMember("static std::array", "stage" + std::to_string(stage->getStageID()) + "GlobalJIndices"); } } - stencilClass.addMember("std::array", "globalOffsets"); + stencilClass.addMember("static std::array", "globalOffsets"); auto globalOffsetFunc = stencilClass.addMemberFunction("static std::array", "computeGlobalOffsets"); globalOffsetFunc.addArg("int rank, const " + c_dgt + "domain& dom, int xcols, int ycols"); diff --git a/dawn/src/dawn/CodeGen/CodeGen.h b/dawn/src/dawn/CodeGen/CodeGen.h index c50889fb5..fb08fd23f 100644 --- a/dawn/src/dawn/CodeGen/CodeGen.h +++ b/dawn/src/dawn/CodeGen/CodeGen.h @@ -84,7 +84,7 @@ class CodeGen { static std::string getStorageType(const sir::Field& field); static std::string getStorageType(const iir::Stencil::FieldInfo& field); - static std::string getStorageType(const ast::FieldDimensions& dimensions); + static std::string getStorageType(const ast::FieldDimensions& dimensions, std::string prefix="storage", std::string suffix="_t"); void generateBoundaryConditionFunctions( Class& stencilWrapperClass, diff --git a/dawn/src/dawn/CodeGen/Cuda-ico/CudaIcoCodeGen.cpp b/dawn/src/dawn/CodeGen/Cuda-ico/CudaIcoCodeGen.cpp index 33e757369..b3ee3ab61 100644 --- a/dawn/src/dawn/CodeGen/Cuda-ico/CudaIcoCodeGen.cpp +++ b/dawn/src/dawn/CodeGen/Cuda-ico/CudaIcoCodeGen.cpp @@ -1704,7 +1704,8 @@ generateF90InterfaceSI(FortranInterfaceModuleGen& fimGen, runWrapper.addACCLine("host_data use_device( &"); auto fieldArgs = getFieldArgs(/*includeSavedState*/ true); for(int i = 0; i < fieldArgs.size(); ++i) { - runWrapper.addACCLine(fortranIndent + fieldArgs[i] + (i == (fieldArgs.size() - 1) ? " &" : ", &")); + runWrapper.addACCLine(fortranIndent + fieldArgs[i] + + (i == (fieldArgs.size() - 1) ? " &" : ", &")); } runWrapper.addACCLine(")"); runWrapper.addBodyLine("#ifdef __DSL_VERIFY", /*withIndentation*/ false); @@ -1722,7 +1723,7 @@ generateF90InterfaceSI(FortranInterfaceModuleGen& fimGen, for(auto fieldID : getUsedFields(stencil, {dawn::iir::Field::IntendKind::Output, dawn::iir::Field::IntendKind::InputOutput})) { verticalBoundNames.push_back(stencilInstantiation->getMetaData().getNameFromAccessID(fieldID) + - "_kvert_max"); + "_kvert_max"); } // memory management functions for production interface @@ -1787,7 +1788,8 @@ generateF90InterfaceSI(FortranInterfaceModuleGen& fimGen, setupWrapper.addBodyLine(fortranIndent + verticalBoundNames[i] + ", &"); } - setupWrapper.addBodyLine(fortranIndent + verticalBoundNames[verticalBoundNames.size() - 1] + " &"); + setupWrapper.addBodyLine(fortranIndent + verticalBoundNames[verticalBoundNames.size() - 1] + + " &"); setupWrapper.addBodyLine(")"); diff --git a/dawn/src/dawn/CodeGen/Cuda/CodeGeneratorHelper.cpp b/dawn/src/dawn/CodeGen/Cuda/CodeGeneratorHelper.cpp index 78bf2cbec..fa3442a68 100644 --- a/dawn/src/dawn/CodeGen/Cuda/CodeGeneratorHelper.cpp +++ b/dawn/src/dawn/CodeGen/Cuda/CodeGeneratorHelper.cpp @@ -99,7 +99,50 @@ std::vector CodeGeneratorHelper::generateStrideArguments( } } } - if(!tempFields.empty()) { + if(!tempFields.empty() && nonTempFields.empty()) { + const auto& firstTmpField = *(tempFields.begin()); + std::string fieldName = metadata.getFieldNameFromAccessID(firstTmpField.second.getAccessID()); + if(funArg == CodeGeneratorHelper::FunctionArgType::FT_Caller) { + strides.push_back("m_" + fieldName + ".strides()[3]," + "m_" + fieldName + ".strides()[4]," + "m_" + fieldName + ".get_storage_info_ptr()->template begin<0>()," + "m_" + + fieldName + ".get_storage_info_ptr()->template begin<1>()," + "m_" + + fieldName + ".get_storage_info_ptr()->template stride<1>()," + "m_" + + fieldName + ".get_storage_info_ptr()->template stride<4>()"); + } + + Array3i dims{-1, -1, -1}; + for(const auto& fieldInfo : ms->getParent()->getFields()) { + if(fieldInfo.second.field.getAccessID() == firstTmpField.second.getAccessID()) { + DAWN_ASSERT_MSG( + dawn::ast::dimension_isa( + fieldInfo.second.field.getFieldDimensions().getHorizontalFieldDimension()), + "Field has non cartesian horizontal dimension"); + auto const& dimCartesian = + dawn::ast::dimension_cast( + fieldInfo.second.field.getFieldDimensions().getHorizontalFieldDimension()); + dims[0] = dimCartesian.I() == 1; + dims[1] = dimCartesian.J() == 1; + dims[2] = fieldInfo.second.field.getFieldDimensions().K() == 1; + break; + } + } + + int usedDim = 0; + for(int i = 0; i < dims.size(); ++i) { + if(!dims[i]) + continue; + if(!(usedDim++)) + continue; + if(funArg == CodeGeneratorHelper::FunctionArgType::FT_Callee) { + strides.push_back("const int stride_" + CodeGeneratorHelper::indexIteratorName(dims) + "_" + + std::to_string(i)); + } + } + if(funArg == CodeGeneratorHelper::FunctionArgType::FT_Callee) { + strides.push_back("const int tmpBeginIIndex, const int tmpBeginJIndex, const int " + "jstride_tmp, const int kstride_tmp"); + } + } + else if(!tempFields.empty()) { const auto& firstTmpField = *(tempFields.begin()); std::string fieldName = metadata.getFieldNameFromAccessID(firstTmpField.second.getAccessID()); if(funArg == CodeGeneratorHelper::FunctionArgType::FT_Caller) { diff --git a/dawn/src/dawn/CodeGen/Cuda/CudaCodeGen.cpp b/dawn/src/dawn/CodeGen/Cuda/CudaCodeGen.cpp index 46b146371..cccdb2ab2 100644 --- a/dawn/src/dawn/CodeGen/Cuda/CudaCodeGen.cpp +++ b/dawn/src/dawn/CodeGen/Cuda/CudaCodeGen.cpp @@ -19,11 +19,13 @@ #include "dawn/CodeGen/Cuda/CacheProperties.h" #include "dawn/CodeGen/Cuda/CodeGeneratorHelper.h" #include "dawn/CodeGen/Cuda/MSCodeGen.h" +#include "dawn/CodeGen/F90Util.h" #include "dawn/IIR/IIRNodeIterator.h" #include "dawn/IIR/StencilInstantiation.h" #include "dawn/SIR/SIR.h" #include "dawn/Support/Array.h" #include "dawn/Support/Assert.h" +#include "dawn/Support/FileSystem.h" #include "dawn/Support/Iterator.h" #include "dawn/Support/Logger.h" #include "dawn/Support/StringUtil.h" @@ -57,14 +59,47 @@ run(const std::map>& const Options& options) { const Array3i domain_size{options.DomainSizeI, options.DomainSizeJ, options.DomainSizeK}; CudaCodeGen CG(stencilInstantiationMap, options.MaxHaloSize, options.nsms, options.MaxBlocksPerSM, - domain_size, options.RunWithSync); + domain_size, options.OutputCHeader, options.OutputFortranInterface, + options.RunWithSync); return CG.generateCode(); } +std::vector getUsedFields(const dawn::iir::Stencil& stencil, + std::unordered_set intend = { + dawn::iir::Field::IntendKind::Output, + dawn::iir::Field::IntendKind::InputOutput, + dawn::iir::Field::IntendKind::Input}) { + const auto& APIFields = stencil.getMetadata().getAPIFields(); + const auto& stenFields = stencil.getOrderedFields(); + auto usedAPIFields = + dawn::makeRange(APIFields, [&stenFields](int f) { return stenFields.count(f); }); + + std::vector res; + for(auto fieldID : usedAPIFields) { + auto field = stenFields.at(fieldID); + if(intend.count(field.field.getIntend())) { + res.push_back(fieldID); + } + } + + return res; +} +std::vector getGlobalsNames(const dawn::ast::GlobalVariableMap& globalsMap) { + std::vector globalsNames; + for(const auto& global : globalsMap) { + globalsNames.push_back(global.first); + } + return globalsNames; +} + CudaCodeGen::CudaCodeGen(const StencilInstantiationContext& ctx, int maxHaloPoints, int nsms, - int maxBlocksPerSM, const Array3i& domainSize, bool runWithSync) - : CodeGen(ctx, maxHaloPoints), codeGenOptions_{nsms, maxBlocksPerSM, domainSize, runWithSync} {} + int maxBlocksPerSM, const Array3i& domainSize, + std::optional outputCHeader, + std::optional outputFortranInterface, bool runWithSync) + : CodeGen(ctx, maxHaloPoints), codeGenOptions_{nsms, maxBlocksPerSM, + domainSize, runWithSync, + outputCHeader, outputFortranInterface} {} CudaCodeGen::~CudaCodeGen() {} @@ -80,6 +115,180 @@ void CudaCodeGen::generateAllCudaKernels( } } +void CudaCodeGen::generateAPIRunFunctions( + std::stringstream& ssSW, const std::shared_ptr& stencilInstantiation, + CodeGenProperties& codeGenProperties, bool onlyDecl) const { + const auto& stencils = stencilInstantiation->getStencils(); + const auto& metadata = stencilInstantiation->getMetaData(); + const auto& globalsMap = stencilInstantiation->getIIR()->getGlobalVariableMap(); + + // generate the code for each of the stencils + for(const auto& stencilPtr : stencils) { + const auto& stencil = *stencilPtr; + + std::string stencilName = "stencil_" + std::to_string(stencil.getStencilID()); + auto stencilProperties = + codeGenProperties.getStencilProperties(StencilContext::SC_Stencil, stencilName); + + std::string fullyQualitfiedName = + "dawn_generated::cuda::" + stencilInstantiation->getName() + "::" + stencilName; + + MemberFunction runFun("void", "run_" + stencilInstantiation->getName(), ssSW, 0, onlyDecl); + + const auto stencilFields = stencil.getOrderedFields(); + + auto nonTempFields = + makeRange(stencilFields, [](std::pair const& p) { + return !p.second.IsTemporary; + }); + + for(const auto& globalProp : globalsMap) { + const auto& globalValue = globalProp.second; + runFun.addArg(std::string(ast::Value::typeToString(globalValue.getType())) + " " + + globalProp.first); + } + for(auto field : nonTempFields) { + runFun.addArg("double *" + field.second.Name + "_ptr"); + } + runFun.finishArgs(); + + if(!onlyDecl) { + runFun.addStatement("static int iter = 0"); + runFun.addStatement("int ni = " + fullyQualitfiedName + "::m_dom.isize()"); + runFun.addStatement("int nj = " + fullyQualitfiedName + "::m_dom.jsize()"); + runFun.addStatement("int nk = " + fullyQualitfiedName + "::m_dom.ksize()"); + + runFun.addStatement("meta_data_t meta_data_ijk({ni, nj, nk}, {1, ni, ni*nj});"); + runFun.addStatement("meta_data_ij_t meta_data_ij({ni, nj, 1}, {1, ni, 0})"); + runFun.addStatement("meta_data_k_t meta_data_k({nk, 1, 1}, {1, 0, 0})"); + + for(const auto& globalProp : globalsMap) { + const auto& globalValue = globalProp.second; + runFun.addStatement(fullyQualitfiedName + "::m_globals." + globalProp.first + " = " + globalProp.first); + } + + for(auto field : nonTempFields) { + runFun.addStatement(stencilProperties->paramNameToType_.at(field.second.Name) + " " + + field.second.Name + "(meta_data_" + + getStorageType(field.second.field.getFieldDimensions(), "", "") + ", " + + field.second.Name + "_ptr, gridtools::ownership::external_gpu)"); + } + { + std::string fields; + std::string sep = ""; + for(auto field : nonTempFields) { + fields += sep + field.second.Name; + sep = ", "; + } + runFun.addStatement(fullyQualitfiedName + "::run(" + fields + ")"); + runFun.addPreprocessorDirective("ifdef __DSL_SERIALIZE"); + auto outFields = getUsedFields(stencil, {dawn::iir::Field::IntendKind::Output, + dawn::iir::Field::IntendKind::InputOutput}); + for(auto outField : outFields) { + auto fname = metadata.getFieldNameFromAccessID(outField); + runFun.addStatement("serialize_gpu(" + fname + ", \"gtc_" + stencilInstantiation->getName() + + "_" + fname + "\", iter, ni, nj, nk)"); + } + runFun.addPreprocessorDirective("endif"); + runFun.addStatement("iter++"); + } + } + runFun.commit(); + } +} + +void CudaCodeGen::generateSetupFunctions( + std::stringstream& ssSW, const std::shared_ptr& stencilInstantiation, + CodeGenProperties& codeGenProperties, bool onlyDecl) const { + const auto& stencils = stencilInstantiation->getStencils(); + const auto& globalsMap = stencilInstantiation->getIIR()->getGlobalVariableMap(); + + // generate the code for each of the stencils + for(const auto& stencilPtr : stencils) { + const auto& stencil = *stencilPtr; + + std::string stencilName = "stencil_" + std::to_string(stencil.getStencilID()); + + std::string fullyQualifiedName = + "dawn_generated::cuda::" + stencilInstantiation->getName() + "::" + stencilName; + MemberFunction setupFun("void", "setup_" + stencilInstantiation->getName(), ssSW, 0, onlyDecl); + setupFun.addArg("int i"); + setupFun.addArg("int j"); + setupFun.addArg("int k"); + setupFun.finishArgs(); + if(!onlyDecl) { + setupFun.addStatement(fullyQualifiedName + + "::setup(gridtools::dawn::domain(i, j, k), " + (!globalsMap.empty() ? fullyQualifiedName + "::m_globals, " : "") + "1, 1, 1)"); + } + setupFun.commit(); + } +} + +void CudaCodeGen::generateStaticMembersTrailer( + std::stringstream& ssSW, const std::shared_ptr& stencilInstantiation, + CodeGenProperties& codeGenProperties) const { + + const auto& stencils = stencilInstantiation->getStencils(); + + // generate the code for each of the stencils + for(const auto& stencilPtr : stencils) { + const auto& stencil = *stencilPtr; + + std::string stencilName = "stencil_" + std::to_string(stencil.getStencilID()); + + std::string fullyQualitfiedName = + "dawn_generated::cuda::" + stencilInstantiation->getName() + "::" + stencilName; + + ssSW << "gridtools::dawn::domain " + fullyQualitfiedName + + "::m_dom = gridtools::dawn::domain(1, 1, 1);"; + + const auto& globalsMap = stencilInstantiation->getIIR()->getGlobalVariableMap(); + if (!globalsMap.empty()) { + ssSW << "dawn_generated::cuda::globals " + fullyQualitfiedName + + "::m_globals;"; + } + + if(stencil.isEmpty()) + continue; + + // fields used in the stencil + const auto stencilFields = stencil.getOrderedFields(); + + auto tempFields = + makeRange(stencilFields, [](std::pair const& p) { + return p.second.IsTemporary; + }); + + if(!(tempFields.empty())) { + ssSW << fullyQualitfiedName + "::tmp_meta_data_t " + fullyQualitfiedName + + "::m_tmp_meta_data(1, 1, 1, 1, 1);"; + for(const auto& fieldPair : tempFields) { + ssSW << fullyQualitfiedName + << "::tmp_storage_t " + fullyQualitfiedName + "::" + "m_" + fieldPair.second.Name + + ";"; + } + } + + std::string iterators = "ij"; + for(auto& stage : iterateIIROver(stencil)) { + int index = 0; + for(const auto& interval : stage->getIterationSpace()) { + if(interval.has_value()) { + std::string iterator = iterators.substr(index, 1); + std::string arrName = "stage" + std::to_string(stage->getStageID()) + "Global" + + (char)std::toupper(iterator.at(0)) + "Indices"; + ssSW << "std::array " << fullyQualitfiedName + "::" + arrName + ";"; + index += 1; + } + } + } + + if(iterationSpaceSet_) { + ssSW << "std::array " << fullyQualitfiedName + "::globalOffsets;"; + } + } +} + std::string CudaCodeGen::generateStencilInstantiation( const std::shared_ptr& stencilInstantiation) { using namespace codegen; @@ -138,6 +347,12 @@ std::string CudaCodeGen::generateStencilInstantiation( cudaNamespace.commit(); dawnNamespace.commit(); + ssSW << "extern \"C\" {\n"; + generateAPIRunFunctions(ssSW, stencilInstantiation, codeGenProperties); + generateSetupFunctions(ssSW, stencilInstantiation, codeGenProperties); + ssSW << "}\n"; + generateStaticMembersTrailer(ssSW, stencilInstantiation, codeGenProperties); + return ssSW.str(); } @@ -227,6 +442,9 @@ void CudaCodeGen::generateStencilClasses( generateStencilClassCtr(stencilClass, stencil, globalsMap, nonTempFields, tempFields, stencilProperties); + generateStencilSetupMethod(stencilClass, stencil, globalsMap, nonTempFields, tempFields, + stencilProperties); + // accumulated extents of API fields generateFieldExtentsInfo(stencilClass, nonTempFields, ast::GridType::Cartesian); @@ -253,10 +471,10 @@ void CudaCodeGen::generateStencilClassMembers( addTempStorageTypedef(stencilClass, stencil); if(!globalsMap.empty()) { - stencilClass.addMember("globals&", "m_globals"); + stencilClass.addMember("static globals", "m_globals"); } - stencilClass.addMember("const " + c_dgt + "domain", "m_dom"); + stencilClass.addMember("static " + c_dgt + "domain", "m_dom"); if(!tempFields.empty()) { stencilClass.addComment("temporary storage declarations"); @@ -281,10 +499,10 @@ void CudaCodeGen::generateStencilClassCtr( stencilClassCtr.addArg("int ycols"); stencilClassCtr.addInit("sbase(\"" + stencilClass.getName() + "\")"); - stencilClassCtr.addInit("m_dom(dom_)"); + stencilClassCtr.addStatement("m_dom =dom_"); if(!globalsMap.empty()) { - stencilClassCtr.addInit("m_globals(globals_)"); + stencilClassCtr.addStatement("m_globals = globals_"); } std::string iterators = "ij"; @@ -295,20 +513,34 @@ void CudaCodeGen::generateStencilClassCtr( std::string iterator = iterators.substr(index, 1); std::string arrName = "stage" + std::to_string(stage->getStageID()) + "Global" + (char)std::toupper(iterator.at(0)) + "Indices"; - stencilClassCtr.addInit(arrName + "({" + - makeIntervalBoundExplicit(iterator, interval.value(), - iir::Interval::Bound::lower, "dom_") + - " , " + - makeIntervalBoundExplicit(iterator, interval.value(), - iir::Interval::Bound::upper, "dom_") + - "})"); + stencilClassCtr.addStatement( + arrName + " = {" + + makeIntervalBoundExplicit(iterator, interval.value(), iir::Interval::Bound::lower, + "dom_") + + " , " + + makeIntervalBoundExplicit(iterator, interval.value(), iir::Interval::Bound::upper, + "dom_") + + "}"); } index += 1; } } if(iterationSpaceSet_) { - stencilClassCtr.addInit("globalOffsets({computeGlobalOffsets(rank, m_dom, xcols, ycols)})"); + stencilClassCtr.addStatement( + "globalOffsets = {computeGlobalOffsets(rank, m_dom, xcols, ycols)}"); + + std::string iterators = "IJ"; + for(auto& stage : iterateIIROver(stencil)) { + for(auto [index, interval] : enumerate(stage->getIterationSpace())) { + if(interval.has_value()) { + std::string hostName = "stage" + std::to_string(stage->getStageID()) + "Global" + + iterators.at(index) + "Indices"; + addCudaCopySymbol(stencilClassCtr, hostName, "int"); + } + } + } + addCudaCopySymbol(stencilClassCtr, "globalOffsets", "unsigned"); } addTmpStorageInit(stencilClassCtr, stencil, tempFields); @@ -457,13 +689,82 @@ void CudaCodeGen::addCudaCopySymbol(MemberFunction& runMethod, const std::string dataType + ") * " + arrName + ".size())"); } +void CudaCodeGen::generateStencilSetupMethod( + Structure& stencilClass, const iir::Stencil& stencil, const ast::GlobalVariableMap& globalsMap, + IndexRange>& nonTempFields, + IndexRange>& tempFields, + std::shared_ptr stencilProperties) const { + + auto stencilClassSetup = stencilClass.addMemberFunction("static void", "setup"); + + stencilClassSetup.addArg("const " + c_dgt + "domain& dom_"); + if(!globalsMap.empty()) { + stencilClassSetup.addArg("globals& globals_"); + } + stencilClassSetup.addArg("int rank"); + stencilClassSetup.addArg("int xcols"); + stencilClassSetup.addArg("int ycols"); + + stencilClassSetup.addStatement("m_dom =dom_"); + + if(!globalsMap.empty()) { + stencilClassSetup.addStatement("m_globals = globals_"); + } + + std::string iterators = "ij"; + for(auto& stage : iterateIIROver(stencil)) { + int index = 0; + for(const auto& interval : stage->getIterationSpace()) { + if(interval.has_value()) { + std::string iterator = iterators.substr(index, 1); + std::string arrName = "stage" + std::to_string(stage->getStageID()) + "Global" + + (char)std::toupper(iterator.at(0)) + "Indices"; + stencilClassSetup.addStatement( + arrName + " = {" + + makeIntervalBoundExplicit(iterator, interval.value(), iir::Interval::Bound::lower, + "dom_") + + " , " + + makeIntervalBoundExplicit(iterator, interval.value(), iir::Interval::Bound::upper, + "dom_") + + "}"); + } + index += 1; + } + } + + if(iterationSpaceSet_) { + stencilClassSetup.addStatement( + "globalOffsets = {computeGlobalOffsets(rank, m_dom, xcols, ycols)}"); + } + + if(iterationSpaceSet_) { + stencilClassSetup.addStatement( + "globalOffsets = {computeGlobalOffsets(rank, m_dom, xcols, ycols)}"); + + std::string iterators = "IJ"; + for(auto& stage : iterateIIROver(stencil)) { + for(auto [index, interval] : enumerate(stage->getIterationSpace())) { + if(interval.has_value()) { + std::string hostName = "stage" + std::to_string(stage->getStageID()) + "Global" + + iterators.at(index) + "Indices"; + addCudaCopySymbol(stencilClassSetup, hostName, "int"); + } + } + } + addCudaCopySymbol(stencilClassSetup, "globalOffsets", "unsigned"); + } + + addTmpStorageInit(stencilClassSetup, stencil, tempFields); + stencilClassSetup.commit(); +} + void CudaCodeGen::generateStencilRunMethod( Structure& stencilClass, const iir::Stencil& stencil, const std::shared_ptr& stencilProperties, const std::shared_ptr& stencilInstantiation, const std::unordered_map& paramNameToType, const ast::GlobalVariableMap& globalsMap) const { - MemberFunction stencilRunMethod = stencilClass.addMemberFunction("void", "run", ""); + MemberFunction stencilRunMethod = stencilClass.addMemberFunction("static void", "run", ""); const auto& metadata = stencilInstantiation->getMetaData(); // fields used in the stencil @@ -482,7 +783,7 @@ void CudaCodeGen::generateStencilRunMethod( stencilRunMethod.startBody(); stencilRunMethod.addComment("starting timers"); - stencilRunMethod.addStatement("start()"); + stencilRunMethod.addComment("start()"); for(const auto& multiStagePtr : stencil.getChildren()) { const iir::MultiStage& multiStage = *multiStagePtr; @@ -564,20 +865,6 @@ void CudaCodeGen::generateStencilRunMethod( stencilRunMethod.addStatement("const unsigned int nbz = 1"); } - if(iterationSpaceSet_) { - std::string iterators = "IJ"; - for(auto& stage : iterateIIROver(stencil)) { - for(auto [index, interval] : enumerate(stage->getIterationSpace())) { - if(interval.has_value()) { - std::string hostName = "stage" + std::to_string(stage->getStageID()) + "Global" + - iterators.at(index) + "Indices"; - addCudaCopySymbol(stencilRunMethod, hostName, "int"); - } - } - } - addCudaCopySymbol(stencilRunMethod, "globalOffsets", "unsigned"); - } - stencilRunMethod.addStatement("dim3 blocks(nbx, nby, nbz)"); std::string kernelCall = CodeGeneratorHelper::buildCudaKernelName(stencilInstantiation, multiStagePtr) + @@ -614,8 +901,8 @@ void CudaCodeGen::generateStencilRunMethod( idx = 0; for(const auto& fieldPair : tempMSFieldsNonLocalCached) { // in some cases (where there are no horizontal extents) we dont use the special tmp index - // iterator, but rather a normal 3d field index iterator. In that case we pass temporaries in - // the same manner as normal fields + // iterator, but rather a normal 3d field index iterator. In that case we pass temporaries + // in the same manner as normal fields if(idx > 0) args += ","; if(!CodeGeneratorHelper::useTemporaries(multiStagePtr->getParent(), metadata)) { @@ -645,7 +932,7 @@ void CudaCodeGen::generateStencilRunMethod( } stencilRunMethod.addComment("stopping timers"); - stencilRunMethod.addStatement("pause()"); + stencilRunMethod.addComment("pause()"); stencilRunMethod.commit(); } @@ -679,23 +966,177 @@ void CudaCodeGen::addTmpStorageInit( if(!(tempFields.empty())) { auto const& hMaxExtents = iir::extent_cast(maxExtents.horizontalExtent()); - ctr.addInit(tmpMetadataName_ + "(" + std::to_string(blockSize[0]) + "+" + - std::to_string(-hMaxExtents.iMinus() + hMaxExtents.iPlus()) + ", " + - std::to_string(blockSize[1]) + "+" + - std::to_string(-hMaxExtents.jMinus() + hMaxExtents.jPlus()) + ", (dom_.isize()+ " + - std::to_string(blockSize[0]) + " - 1) / " + std::to_string(blockSize[0]) + - ", (dom_.jsize()+ " + std::to_string(blockSize[1]) + " - 1) / " + - std::to_string(blockSize[1]) + ", dom_.ksize() + 2 * " + - std::to_string(getVerticalTmpHaloSize(stencil)) + ")"); + ctr.addStatement(tmpMetadataName_ + " = tmp_meta_data_t(" + std::to_string(blockSize[0]) + "+" + + std::to_string(-hMaxExtents.iMinus() + hMaxExtents.iPlus()) + ", " + + std::to_string(blockSize[1]) + "+" + + std::to_string(-hMaxExtents.jMinus() + hMaxExtents.jPlus()) + + ", (dom_.isize()+ " + std::to_string(blockSize[0]) + " - 1) / " + + std::to_string(blockSize[0]) + ", (dom_.jsize()+ " + + std::to_string(blockSize[1]) + " - 1) / " + std::to_string(blockSize[1]) + + ", dom_.ksize() + 2 * " + std::to_string(getVerticalTmpHaloSize(stencil)) + + ")"); for(const auto& fieldPair : tempFields) { - ctr.addInit("m_" + fieldPair.second.Name + "(" + tmpMetadataName_ + ")"); + ctr.addStatement("m_" + fieldPair.second.Name + " = tmp_storage_t(" + tmpMetadataName_ + ")"); + } + } +} + +void CudaCodeGen::generateCHeaderSI( + std::stringstream& ssSW, + const std::shared_ptr& stencilInstantiation) const { + using namespace codegen; + + CodeGenProperties codeGenProperties = computeCodeGenProperties(stencilInstantiation.get()); + + ssSW << "extern \"C\" {\n"; + generateAPIRunFunctions(ssSW, stencilInstantiation, codeGenProperties, /*onlyDecl=*/true); + generateSetupFunctions(ssSW, stencilInstantiation, codeGenProperties, /*onlyDecl=*/true); + ssSW << "}\n"; +} + +std::string CudaCodeGen::generateCHeader() const { + std::stringstream ssSW; + ssSW << "#pragma once\n"; + ssSW << "#include \"driver-includes/defs.hpp\"\n"; + ssSW << "#include \"driver-includes/cuda_utils.hpp\"\n"; + + for(const auto& nameStencilCtxPair : context_) { + std::shared_ptr stencilInstantiation = nameStencilCtxPair.second; + generateCHeaderSI(ssSW, stencilInstantiation); + } + + return ssSW.str(); +} + +static void +generateF90InterfaceSI(FortranInterfaceModuleGen& fimGen, + const std::shared_ptr& stencilInstantiation) { + const auto& stencils = stencilInstantiation->getStencils(); + const auto& globalsMap = stencilInstantiation->getIIR()->getGlobalVariableMap(); + auto globalTypeToFortType = [](const ast::Global& global) { + switch(global.getType()) { + case ast::Value::Kind::Boolean: + return FortranAPI::InterfaceType::BOOLEAN; + case ast::Value::Kind::Double: + return FortranAPI::InterfaceType::DOUBLE; + case ast::Value::Kind::Float: + return FortranAPI::InterfaceType::FLOAT; + case ast::Value::Kind::Integer: + return FortranAPI::InterfaceType::INTEGER; + case ast::Value::Kind::String: + default: + throw std::runtime_error("string globals not supported in cuda ico backend"); + } + }; + + // The following assert is needed because we have only one (user-defined) name for a stencil + // instantiation (stencilInstantiation->getName()). We could compute a per-stencil name ( + // codeGenProperties.getStencilName(StencilContext::SC_Stencil, stencil.getStencilID()) ) + // however the interface would not be very useful if the name is generated. + DAWN_ASSERT_MSG(stencils.size() <= 1, + "Unable to generate interface. More than one stencil in stencil instantiation."); + const auto& stencil = *stencils[0]; + + std::vector interfaces = { + FortranInterfaceAPI("run_" + stencilInstantiation->getName())}; + + auto addArgsToAPI = [&](FortranAPI& api, bool includeSavedState, bool optThresholds) { + for(const auto& global : globalsMap) { + api.addArg(global.first, globalTypeToFortType(global.second)); } + for(auto fieldID : stencilInstantiation->getMetaData().getAPIFields()) { + api.addArg( + stencilInstantiation->getMetaData().getNameFromAccessID(fieldID), + FortranAPI::InterfaceType::DOUBLE /* Unfortunately we need to know at codegen + time whether we have fields in SP/DP */ + , + stencilInstantiation->getMetaData().getFieldDimensions(fieldID).rank()); + } + if(includeSavedState) { + for(auto fieldID : getUsedFields(stencil, {dawn::iir::Field::IntendKind::Output, + dawn::iir::Field::IntendKind::InputOutput})) { + api.addArg( + stencilInstantiation->getMetaData().getNameFromAccessID(fieldID) + "_before", + FortranAPI::InterfaceType::DOUBLE /* Unfortunately we need to know at codegen + time whether we have fields in SP/DP */ + , + stencilInstantiation->getMetaData().getFieldDimensions(fieldID).rank()); + } + + for(auto fieldID : getUsedFields(stencil, {dawn::iir::Field::IntendKind::Output, + dawn::iir::Field::IntendKind::InputOutput})) { + if(optThresholds) { + api.addOptArg(stencilInstantiation->getMetaData().getNameFromAccessID(fieldID) + + "_rel_tol", + FortranAPI::InterfaceType::DOUBLE); + api.addOptArg(stencilInstantiation->getMetaData().getNameFromAccessID(fieldID) + + "_abs_tol", + FortranAPI::InterfaceType::DOUBLE); + } else { + api.addArg(stencilInstantiation->getMetaData().getNameFromAccessID(fieldID) + "_rel_tol", + FortranAPI::InterfaceType::DOUBLE); + api.addArg(stencilInstantiation->getMetaData().getNameFromAccessID(fieldID) + "_abs_tol", + FortranAPI::InterfaceType::DOUBLE); + } + } + } + }; + + addArgsToAPI(interfaces[0], /*includeSavedState*/ false, false); + fimGen.addInterfaceAPI(std::move(interfaces[0])); + std::string fortranIndent = " "; + + // memory management functions for production interface + FortranInterfaceAPI setup("setup_" + stencilInstantiation->getName()); + setup.addArg("i", FortranAPI::InterfaceType::INTEGER); + setup.addArg("j", FortranAPI::InterfaceType::INTEGER); + setup.addArg("k", FortranAPI::InterfaceType::INTEGER); + fimGen.addInterfaceAPI(std::move(setup)); +} + +std::string CudaCodeGen::generateF90Interface(std::string moduleName) const { + std::stringstream ss; + IndentedStringStream iss(ss); + + FortranInterfaceModuleGen fimGen(iss, moduleName); + + for(const auto& nameStencilCtxPair : context_) { + std::shared_ptr stencilInstantiation = nameStencilCtxPair.second; + generateF90InterfaceSI(fimGen, stencilInstantiation); } + + fimGen.commit(); + + return iss.str(); } std::unique_ptr CudaCodeGen::generateCode() { DAWN_LOG(INFO) << "Starting code generation for GTClang ..."; + if(codeGenOptions_.OutputCHeader) { + fs::path filePath = *codeGenOptions_.OutputCHeader; + std::ofstream headerFile; + headerFile.open(filePath); + if(headerFile) { + headerFile << generateCHeader(); + headerFile.close(); + } else { + throw std::runtime_error("Error writing to " + filePath.string() + ": " + strerror(errno)); + } + } + if(codeGenOptions_.OutputFortranInterface) { + fs::path filePath = *codeGenOptions_.OutputFortranInterface; + std::string moduleName = filePath.filename().replace_extension("").string(); + std::ofstream interfaceFile; + interfaceFile.open(filePath); + if(interfaceFile) { + interfaceFile << generateF90Interface(moduleName); + interfaceFile.close(); + } else { + throw std::runtime_error("Error writing to " + filePath.string() + ": " + strerror(errno)); + } + } + // Generate code for StencilInstantiations std::map stencils; for(const auto& nameStencilCtxPair : context_) { @@ -728,7 +1169,9 @@ std::unique_ptr CudaCodeGen::generateCode() { // [https://github.com/MeteoSwiss-APN/gtclang/issues/32] //==============------------------------------------------------------------------------------=== CodeGen::addMplIfdefs(ppDefines, 30); + ppDefines.push_back("#include "); ppDefines.push_back("#include "); + ppDefines.push_back("#include "); ppDefines.push_back("using namespace gridtools::dawn;"); generateBCHeaders(ppDefines); diff --git a/dawn/src/dawn/CodeGen/Cuda/CudaCodeGen.h b/dawn/src/dawn/CodeGen/Cuda/CudaCodeGen.h index a79073708..46800d22e 100644 --- a/dawn/src/dawn/CodeGen/Cuda/CudaCodeGen.h +++ b/dawn/src/dawn/CodeGen/Cuda/CudaCodeGen.h @@ -44,7 +44,9 @@ class CudaCodeGen : public CodeGen { public: ///@brief constructor CudaCodeGen(const StencilInstantiationContext& ctx, int maxHaloPoints, int nsms, - int maxBlocksPerSM, const Array3i& domainSize, bool runWithSync = true); + int maxBlocksPerSM, const Array3i& domainSize, + std::optional outputCHeader, + std::optional OutputFortranInterface, bool runWithSync = true); virtual ~CudaCodeGen(); virtual std::unique_ptr generateCode() override; @@ -53,6 +55,8 @@ class CudaCodeGen : public CodeGen { int maxBlocksPerSM; Array3i domainSize; bool runWithSync; + std::optional OutputCHeader; + std::optional OutputFortranInterface; }; private: @@ -113,6 +117,13 @@ class CudaCodeGen : public CodeGen { IndexRange>& tempFields, std::shared_ptr stencilProperties) const; + void generateStencilSetupMethod( + Structure& stencilClass, const iir::Stencil& stencil, + const ast::GlobalVariableMap& globalsMap, + IndexRange>& nonTempFields, + IndexRange>& tempFields, + std::shared_ptr stencilProperties) const; + void generateStencilClassMembers( Structure& stencilClass, const iir::Stencil& stencil, const ast::GlobalVariableMap& globalsMap, @@ -123,6 +134,27 @@ class CudaCodeGen : public CodeGen { std::string generateStencilInstantiation( const std::shared_ptr& stencilInstantiation); + void + generateCHeaderSI(std::stringstream& ssSW, + const std::shared_ptr& stencilInstantiation) const; + + std::string generateCHeader() const; + std::string generateF90Interface(std::string moduleName) const; + + void + generateAPIRunFunctions(std::stringstream& ssSW, + const std::shared_ptr& stencilInstantiation, + CodeGenProperties& codeGenProperties, bool onlyDecl = false) const; + void + generateSetupFunctions(std::stringstream& ssSW, + const std::shared_ptr& stencilInstantiation, + CodeGenProperties& codeGenProperties, bool onlyDecl = false) const; + + void generateStaticMembersTrailer( + std::stringstream& ssSW, + const std::shared_ptr& stencilInstantiation, + CodeGenProperties& codeGenProperties) const; + CudaCodeGenOptions codeGenOptions_; bool iterationSpaceSet_; }; diff --git a/dawn/src/dawn/CodeGen/Cuda/MSCodeGen.cpp b/dawn/src/dawn/CodeGen/Cuda/MSCodeGen.cpp index 84f682818..1518b26db 100644 --- a/dawn/src/dawn/CodeGen/Cuda/MSCodeGen.cpp +++ b/dawn/src/dawn/CodeGen/Cuda/MSCodeGen.cpp @@ -726,7 +726,7 @@ void MSCodeGen::generateCudaKernelCode() { } if(globalNames_.find("checkOffset") == globalNames_.end()) { - MemberFunction offsetFunc("__device__ bool", "checkOffset", ss_); + MemberFunction offsetFunc("__device__ static bool", "checkOffset", ss_); offsetFunc.addArg("unsigned int min"); offsetFunc.addArg("unsigned int max"); offsetFunc.addArg("unsigned int val"); @@ -1076,18 +1076,25 @@ void MSCodeGen::generateCudaKernelCode() { if(std::any_of(stage.getIterationSpace().cbegin(), stage.getIterationSpace().cend(), [](const auto& p) -> bool { return p.has_value(); })) { - std::string iterators = "IJ"; - for(const auto& stage : iterateIIROver(*(stencilInstantiation_->getIIR()))) { - std::string prefix = "stage" + std::to_string(stage->getStageID()) + "Global"; - for(auto [index, interval] : enumerate(stage->getIterationSpace())) { - if(interval.has_value()) { - std::string arrName = prefix + iterators.at(index) + "Indices"; - guard += " && checkOffset(" + arrName + "_[0], " + arrName + - "_[1], globalOffsets_[" + std::to_string(index) + "] + " + - (char)std::tolower(iterators.at(index)) + "block)"; + std::string iterators = "IJ"; + std::string prefix = "stage" + std::to_string(stage.getStageID()) + "Global"; + for(auto [index, interval] : enumerate(stage.getIterationSpace())) { + if(interval.has_value()) { + std::string arrName = prefix + iterators.at(index) + "Indices"; + std::string offset; + if (iterators[index] == 'I') { + offset = "blockIdx.x * " + std::to_string(ntx) + " + " + std::string{(char) std::tolower(iterators.at(index))} + "block"; } + + if (iterators[index] == 'J') { + offset = "blockIdx.y * " + std::to_string(nty) + " + " + std::string{(char) std::tolower(iterators.at(index))} + "block"; + } + + guard += " && checkOffset(" + arrName + "_[0], " + arrName + + "_[1], globalOffsets_[" + std::to_string(index) + "] + " + + "(" + offset +"))"; } - } + } } guard += ")"; diff --git a/dawn/src/dawn/IIR/DependencyGraph.h b/dawn/src/dawn/IIR/DependencyGraph.h index 633645bc9..b814b417b 100644 --- a/dawn/src/dawn/IIR/DependencyGraph.h +++ b/dawn/src/dawn/IIR/DependencyGraph.h @@ -68,7 +68,9 @@ class DependencyGraph { }; protected: + // map of Value (i.e. normally accessID to Vertex object std::unordered_map vertices_; + // adjacencyList for each vertex where the position within the vector is the vertexID std::vector adjacencyList_; public: @@ -96,8 +98,8 @@ class DependencyGraph { DependencyGraph() = default; /// @brief Insert a new node - Vertex& insertNode(int ID) { - auto [iter, inserted] = vertices_.emplace(ID, Vertex{adjacencyList_.size(), ID}); + Vertex& insertNode(int Value) { + auto [iter, inserted] = vertices_.emplace(Value, Vertex{adjacencyList_.size(), Value}); if(inserted) adjacencyList_.push_back(EdgeList()); return iter->second; diff --git a/dawn/src/dawn/IIR/DependencyGraphAccesses.cpp b/dawn/src/dawn/IIR/DependencyGraphAccesses.cpp index fc7c516e5..b980dd3ea 100644 --- a/dawn/src/dawn/IIR/DependencyGraphAccesses.cpp +++ b/dawn/src/dawn/IIR/DependencyGraphAccesses.cpp @@ -245,6 +245,32 @@ bool DependencyGraphAccesses::isDAG() const { return true; } +bool DependencyGraphAccesses::hasZeroOutdegreeNodes() const { + auto partitions = partitionInSubGraphs(); + std::vector vertices; + + for(std::set& partition : partitions) { + getOutputVertexIDsImpl( + *this, partition, [](std::size_t VertexID) { return VertexID; }, vertices); + if(vertices.empty()) + return false; + } + return true; +} + +bool DependencyGraphAccesses::hasZeroIndegreeNodes() const { + auto partitions = partitionInSubGraphs(); + std::vector vertices; + + for(std::set& partition : partitions) { + getInputVertexIDsImpl( + *this, partition, [](std::size_t VertexID) { return VertexID; }, vertices); + if(vertices.empty()) + return false; + } + return true; +} + std::vector DependencyGraphAccesses::getOutputVertexIDs() const { std::vector outputVertexIDs; getOutputVertexIDsImpl( diff --git a/dawn/src/dawn/IIR/DependencyGraphAccesses.h b/dawn/src/dawn/IIR/DependencyGraphAccesses.h index c5ea557f6..912c198a2 100644 --- a/dawn/src/dawn/IIR/DependencyGraphAccesses.h +++ b/dawn/src/dawn/IIR/DependencyGraphAccesses.h @@ -127,6 +127,11 @@ class DependencyGraphAccesses /// In our context, a DAG is defined as having a non-empty set of input as well as output nodes. bool isDAG() const; + /// @brief true if graph has nodes with indegree=0 + bool hasZeroIndegreeNodes() const; + /// @brief true if graph has nodes with outdegree=0 + bool hasZeroOutdegreeNodes() const; + /// @brief Get the VertexIDs of the pure `output` vertices /// /// Output vertices are vertices which do not have incoming edges from other vertices. diff --git a/dawn/src/dawn/Optimizer/PassFieldVersioning.cpp b/dawn/src/dawn/Optimizer/PassFieldVersioning.cpp index 119ba0b5e..ed0dadfa0 100644 --- a/dawn/src/dawn/Optimizer/PassFieldVersioning.cpp +++ b/dawn/src/dawn/Optimizer/PassFieldVersioning.cpp @@ -137,6 +137,7 @@ bool PassFieldVersioning::run( newGraph = oldGraph; newGraph.insertStatement(stmt); } + doMethod.update(iir::NodeUpdateType::level); } stage.update(iir::NodeUpdateType::level); @@ -210,22 +211,6 @@ PassFieldVersioning::RCKind PassFieldVersioning::fixRaceCondition( } } - // If we only have non-stencil SCCs and there are no input and output fields (i.e we don't have a - // DAG) we have to break (by renaming) one of the SCCs to get a DAG. For example: - // - // field_a = field_b; - // field_b = field_a; - // - // needs to be renamed to - // - // field_a = field_b_0; - // field_b = field_a; - // - // ... and then field_b_0 must be initialized from field_b. - if(stencilSCCs->empty() && !SCCs->empty() && !graph.isDAG()) { - stencilSCCs->emplace_back(std::move(SCCs->front())); - } - if(stencilSCCs->empty()) return RCKind::Nothing; diff --git a/dawn/src/dawn/Optimizer/ReadBeforeWriteConflict.cpp b/dawn/src/dawn/Optimizer/ReadBeforeWriteConflict.cpp index c5916bc38..d5072af6b 100644 --- a/dawn/src/dawn/Optimizer/ReadBeforeWriteConflict.cpp +++ b/dawn/src/dawn/Optimizer/ReadBeforeWriteConflict.cpp @@ -42,6 +42,10 @@ class ReadBeforeWriteConflictDetector { ReadBeforeWriteConflict check() const { std::vector nodesToVisit = graph_.getOutputVertexIDs(); + // if the graph does not have nodes with outdegree=0, there is no seed to start the algorithm + // in this case, there is at least one SCC, which means we can start from any node of the graph + if(nodesToVisit.empty() ) nodesToVisit.push_back(0); + DAWN_ASSERT_MSG(!nodesToVisit.empty(), "invalid graph (probably contains cycles!)"); ReadBeforeWriteConflict conflict; @@ -77,6 +81,9 @@ class ReadBeforeWriteConflictDetector { else visitedNodes.insert(curNode); + DAWN_ASSERT_MSG((adjacencyList.size() > curNode), "out of bounds access to adjacency list of graph"); + + // Follow edges of the current node if(!adjacencyList[curNode].empty()) { for(const auto& edge : adjacencyList[curNode]) { diff --git a/dawn/src/driver-includes/math.hpp b/dawn/src/driver-includes/math.hpp index ce3518a1b..9ad5d363d 100644 --- a/dawn/src/driver-includes/math.hpp +++ b/dawn/src/driver-includes/math.hpp @@ -18,6 +18,8 @@ #include "storage.hpp" #include #include +#include +#include #ifndef GT_FUNCTION #define GT_FUNCTION @@ -122,24 +124,38 @@ GT_FUNCTION T sqrt(const T x) { return ::sqrt(x); } -/** - * @brief Returns the smaller value of @c x and @c y - * - * @see http://en.cppreference.com/w/cpp/algorithm/min - */ -template -GT_FUNCTION auto min(const T x, const U y) -> decltype(x + y) { - return x < y ? x : y; +template +GT_FUNCTION T min(T&&t) +{ + return std::forward(t); } -/** - * @brief Returns the greater value of @c x and @c y - * - * @see http://en.cppreference.com/w/cpp/algorithm/max - */ -template -GT_FUNCTION auto max(const T x, const U y) -> decltype(x + y) { - return x > y ? x : y; +template +GT_FUNCTION typename std::common_type< + T0, T1, Ts... +>::type min(T0&& val1, T1&& val2, Ts&&... vs) +{ + if (val2 < val1) + return min(val2, std::forward(vs)...); + else + return min(val1, std::forward(vs)...); +} + +template +GT_FUNCTION T max(T&&t) +{ + return std::forward(t); +} + +template +GT_FUNCTION typename std::common_type< + T0, T1, Ts... +>::type max(T0&& val1, T1&& val2, Ts&&... vs) +{ + if (val2 < val1) + return max(val2, std::forward(vs)...); + else + return max(val1, std::forward(vs)...); } /** @@ -253,6 +269,11 @@ GT_FUNCTION T isnan(const T x) { return std::isnan(x); } + +template +GT_FUNCTION T sign(const T val) { + return (T(0) <= val) - (val < T(0));; +} /** @} */ } // namespace math } // namespace dawn diff --git a/dawn/src/driver-includes/serialize.hpp b/dawn/src/driver-includes/serialize.hpp new file mode 100644 index 000000000..ba2745d73 --- /dev/null +++ b/dawn/src/driver-includes/serialize.hpp @@ -0,0 +1,37 @@ +#include + +template +void serialize(const storage_type &field, std::string &&fname, int iter, int isize, int jsize, int ksize) { + field.sync(); + gridtools::data_view field_view = gridtools::make_host_view(field); + char buf[128]; + sprintf(buf, "_%02d.txt", iter); + FILE *fp = fopen(("results/" + fname + buf).c_str(), "w+"); + for (int i = 0; i < isize; i++) { + for (int j = 0; j < jsize; j++) { + for (int k = 0; k < ksize; k++) { + fprintf(fp, "%.14g\n", field_view(i,j,k)); + } + } + } + fclose(fp); + field.sync(); +} + +template +void serialize_gpu(const storage_type &field, std::string &&fname, int iter, int isize, int jsize, int ksize) { + field.sync(); + gridtools::data_view field_view = gridtools::make_host_view(field); + char buf[128]; + sprintf(buf, "_%02d.txt", iter); + FILE *fp = fopen(("results/" + fname + buf).c_str(), "w+"); + for (int i = 0; i < isize; i++) { + for (int j = 0; j < jsize; j++) { + for (int k = 0; k < ksize; k++) { + fprintf(fp, "%.14g\n", field_view(i,j,k)); + } + } + } + fclose(fp); + field.sync(); +} diff --git a/dawn/test/integration-test/dawn4py-tests/data/generate_versioned_field_ref.cpp b/dawn/test/integration-test/dawn4py-tests/data/generate_versioned_field_ref.cpp index 1ad5952d9..68e85d821 100644 --- a/dawn/test/integration-test/dawn4py-tests/data/generate_versioned_field_ref.cpp +++ b/dawn/test/integration-test/dawn4py-tests/data/generate_versioned_field_ref.cpp @@ -19,7 +19,6 @@ class generate_versioned_field { ::dawn::edge_field_t& m_c; ::dawn::edge_field_t& m_d; ::dawn::edge_field_t& m_e; - ::dawn::edge_field_t& m_c_0; ::dawn::unstructured_domain m_unstructured_domain; public: @@ -28,9 +27,8 @@ class generate_versioned_field { ::dawn::edge_field_t& b, ::dawn::edge_field_t& c, ::dawn::edge_field_t& d, - ::dawn::edge_field_t& e, - ::dawn::edge_field_t& c_0) - : m_mesh(mesh), m_k_size(k_size), m_a(a), m_b(b), m_c(c), m_d(d), m_e(e), m_c_0(c_0) {} + ::dawn::edge_field_t& e) + : m_mesh(mesh), m_k_size(k_size), m_a(a), m_b(b), m_c(c), m_d(d), m_e(e) {} ~stencil_37() {} @@ -40,23 +38,17 @@ class generate_versioned_field { static constexpr ::dawn::driver::unstructured_extent c_extent = {false, 0, 0}; static constexpr ::dawn::driver::unstructured_extent d_extent = {false, 0, 0}; static constexpr ::dawn::driver::unstructured_extent e_extent = {false, 0, 0}; - static constexpr ::dawn::driver::unstructured_extent c_0_extent = {false, 0, 0}; void run() { using ::dawn::deref; - { - for(int k = 0 + 0; k <= (m_k_size == 0 ? 0 : (m_k_size)) + 0 - 1 + 0; ++k) { - for(auto const& loc : getEdges(LibTag{}, m_mesh)) { - m_c_0(deref(LibTag{}, loc), (k + 0)) = m_c(deref(LibTag{}, loc), (k + 0)); - } - } - } { for(int k = 0 + 0; k <= (m_k_size == 0 ? 0 : (m_k_size)) + 0 - 1 + 0; ++k) { for(auto const& loc : getEdges(LibTag{}, m_mesh)) { m_a(deref(LibTag{}, loc), (k + 0)) = - ((m_b(deref(LibTag{}, loc), (k + 0)) / m_c_0(deref(LibTag{}, loc), (k + 0))) + + ((m_b(deref(LibTag{}, loc), (k + 0)) / m_c(deref(LibTag{}, loc), (k + 0))) + (::dawn::float_type)5); + } + for(auto const& loc : getEdges(LibTag{}, m_mesh)) { if(m_d(deref(LibTag{}, loc), (k + 0))) { m_a(deref(LibTag{}, loc), (k + 0)) = m_b(deref(LibTag{}, loc), (k + 0)); } else { @@ -78,7 +70,6 @@ class generate_versioned_field { generate_versioned_field(const generate_versioned_field&) = delete; // Members - ::dawn::edge_field_t m_c_0; void set_splitter_index(::dawn::LocationType loc, ::dawn::UnstructuredSubdomain subdomain, int offset, int index) { @@ -91,8 +82,7 @@ class generate_versioned_field { ::dawn::edge_field_t& c, ::dawn::edge_field_t& d, ::dawn::edge_field_t& e) - : m_stencil_37(mesh, k_size, a, b, c, d, e, m_c_0), - m_c_0(allocateField(LibTag{}, numEdges(LibTag{}, mesh), k_size)) {} + : m_stencil_37(mesh, k_size, a, b, c, d, e) {} void run() { m_stencil_37.run(); diff --git a/dawn/test/integration-test/dawn4py-tests/global_var.py b/dawn/test/integration-test/dawn4py-tests/global_var.py index 36b7d3008..3f59ca14e 100644 --- a/dawn/test/integration-test/dawn4py-tests/global_var.py +++ b/dawn/test/integration-test/dawn4py-tests/global_var.py @@ -67,8 +67,8 @@ def main(args: argparse.Namespace): ) # print the SIR - if args.verbose: - print(MessageToJson(sir)) + # if args.verbose: + print(MessageToJson(sir)) # compile code = dawn4py.compile(sir, backend=dawn4py.CodeGenBackend.CXXNaive) diff --git a/dawn/test/unit-test/dawn/CodeGen/reference/global_indexing.cu b/dawn/test/unit-test/dawn/CodeGen/reference/global_indexing.cu index 3ea53ecfc..8352d6e3c 100644 --- a/dawn/test/unit-test/dawn/CodeGen/reference/global_indexing.cu +++ b/dawn/test/unit-test/dawn/CodeGen/reference/global_indexing.cu @@ -41,7 +41,7 @@ namespace dawn_generated { namespace cuda { __constant__ int stage14GlobalJIndices_[2]; __constant__ unsigned globalOffsets_[2]; -__device__ bool checkOffset(unsigned int min, unsigned int max, unsigned int val) { +__device__ static bool checkOffset(unsigned int min, unsigned int max, unsigned int val) { return (min <= val && val < max); } __global__ void __launch_bounds__(128) @@ -108,7 +108,7 @@ __global__ void __launch_bounds__(128) if(iblock >= 0 && iblock <= block_size_i - 1 + 0 && jblock >= 0 && jblock <= block_size_j - 1 + 0 && checkOffset(stage14GlobalJIndices_[0], stage14GlobalJIndices_[1], - globalOffsets_[1] + jblock)) { + globalOffsets_[1] + (blockIdx.y * 4 + jblock))) { { out_field[idx111] = (int)10; } diff --git a/gtclang/src/gtclang_dsl_defs/math.hpp b/gtclang/src/gtclang_dsl_defs/math.hpp index a56427efa..60e7061ad 100644 --- a/gtclang/src/gtclang_dsl_defs/math.hpp +++ b/gtclang/src/gtclang_dsl_defs/math.hpp @@ -134,6 +134,9 @@ T exp(const T arg); template T log(const T x); +template +T sign(const T x); + /** @} */ } // namespace math } // namespace dsl diff --git a/gtclang/test/integration-test/CodeGen/CMakeLists.txt b/gtclang/test/integration-test/CodeGen/CMakeLists.txt index 97d3f7e38..97f8531dd 100644 --- a/gtclang/test/integration-test/CodeGen/CMakeLists.txt +++ b/gtclang/test/integration-test/CodeGen/CMakeLists.txt @@ -196,3 +196,6 @@ add_codegen_test(TEST kcache_fill_kparallel PLAIN_CUDA_ONLY) add_codegen_test(TEST kcache_fill_backward PLAIN_CUDA_ONLY) add_codegen_test(TEST kcache_flush FLAGS -fmultistage-merger PLAIN_CUDA_ONLY) add_codegen_test(TEST kcache_epflush FLAGS -fmultistage-merger PLAIN_CUDA_ONLY) +add_codegen_test(TEST iteration_space_stencil_01 FLAGS -max-halo-size=0 PLAIN_CUDA_ONLY) +add_codegen_test(TEST iteration_space_stencil_02 FLAGS -max-halo-size=0 PLAIN_CUDA_ONLY) +add_codegen_test(TEST var_stencil PLAIN_CUDA_ONLY) \ No newline at end of file diff --git a/gtclang/test/integration-test/CodeGen/iteration_space_stencil_01.cpp b/gtclang/test/integration-test/CodeGen/iteration_space_stencil_01.cpp new file mode 100644 index 000000000..74163d65f --- /dev/null +++ b/gtclang/test/integration-test/CodeGen/iteration_space_stencil_01.cpp @@ -0,0 +1,32 @@ +//===--------------------------------------------------------------------------------*- C++ -*-===// +// _ _ +// | | | | +// __ _| |_ ___| | __ _ _ __ __ _ +// / _` | __/ __| |/ _` | '_ \ / _` | +// | (_| | || (__| | (_| | | | | (_| | +// \__, |\__\___|_|\__,_|_| |_|\__, | - GridTools Clang DSL +// __/ | __/ | +// |___/ |___/ +// +// +// This file is distributed under the MIT License (MIT). +// See LICENSE.txt for details. +// +//===------------------------------------------------------------------------------------------===// + +#include "gtclang_dsl_defs/gtclang_dsl.hpp" +using namespace gtclang::dsl; + +stencil iteration_space_stencil_01 { + storage out; + + Do { + vertical_region(k_start, k_end) { + out = 0; + } + + iteration_space(i_start + 1, i_end-1, j_start + 1, j_end - 1, k_start + 1, k_end - 1) { + out = 1; + } + } +}; diff --git a/gtclang/test/integration-test/CodeGen/iteration_space_stencil_01_benchmark.cpp b/gtclang/test/integration-test/CodeGen/iteration_space_stencil_01_benchmark.cpp new file mode 100644 index 000000000..9bc5d7f74 --- /dev/null +++ b/gtclang/test/integration-test/CodeGen/iteration_space_stencil_01_benchmark.cpp @@ -0,0 +1,61 @@ +//===--------------------------------------------------------------------------------*- C++ -*-===// +// _ _ +// | | | | +// __ _| |_ ___| | __ _ _ __ __ _ +// / _` | __/ __| |/ _` | '_ \ / _` | +// | (_| | || (__| | (_| | | | | (_| | +// \__, |\__\___|_|\__,_|_| |_|\__, | - GridTools Clang DSL +// __/ | __/ | +// |___/ |___/ +// +// +// This file is distributed under the MIT License (MIT). +// See LICENSE.txt for details. +// +//===------------------------------------------------------------------------------------------===// +#define DAWN_GENERATED 1 +#define GRIDTOOLS_DAWN_HALO_EXTENT 0 +#define GT_VECTOR_LIMIT_SIZE 30 + +#undef FUSION_MAX_VECTOR_SIZE +#undef FUSION_MAX_MAP_SIZE +#define FUSION_MAX_VECTOR_SIZE GT_VECTOR_LIMIT_SIZE +#define FUSION_MAX_MAP_SIZE FUSION_MAX_VECTOR_SIZE +#define BOOST_MPL_LIMIT_VECTOR_SIZE FUSION_MAX_VECTOR_SIZE +#define BOOST_MPL_CFG_NO_PREPROCESSED_HEADERS + +#include +#include "test/integration-test/CodeGen/Macros.hpp" +#include "driver-includes/verify.hpp" +#include "test/integration-test/CodeGen/Options.hpp" +#include "test/integration-test/CodeGen/generated/iteration_space_stencil_01_c++-naive.cpp" + +#ifndef OPTBACKEND +#define OPTBACKEND gt +#endif + +// clang-format off +#include INCLUDE_FILE(test/integration-test/CodeGen/generated/iteration_space_stencil_01_,OPTBACKEND.cpp) +// clang-format on + +using namespace dawn; +TEST(iteration_space_stencil, test) { + domain dom(Options::getInstance().m_size[0], Options::getInstance().m_size[1], + Options::getInstance().m_size[2]); + dom.set_halos(halo::value, halo::value, halo::value, halo::value, 0, 0); + + verifier verif(dom); + + meta_data_t meta_data(dom.isize(), dom.jsize(), dom.ksize()); + storage_t out_gt(meta_data, "out-gt"), out_naive(meta_data, "out-naive"); + + verif.fill(-1.0, out_gt, out_naive); + + dawn_generated::OPTBACKEND::iteration_space_stencil_01 iteration_space_gt(dom); + dawn_generated::cxxnaive::iteration_space_stencil_01 iteration_space_naive(dom); + + iteration_space_gt.run(out_gt); + iteration_space_naive.run(out_naive); + + ASSERT_TRUE(verif.verify(out_gt, out_naive)); +} diff --git a/gtclang/test/integration-test/CodeGen/iteration_space_stencil_01_benchmark.cu b/gtclang/test/integration-test/CodeGen/iteration_space_stencil_01_benchmark.cu new file mode 100644 index 000000000..b9bc8b31a --- /dev/null +++ b/gtclang/test/integration-test/CodeGen/iteration_space_stencil_01_benchmark.cu @@ -0,0 +1 @@ +#include "iteration_space_stencil_01_benchmark.cpp" \ No newline at end of file diff --git a/gtclang/test/integration-test/CodeGen/iteration_space_stencil_02.cpp b/gtclang/test/integration-test/CodeGen/iteration_space_stencil_02.cpp new file mode 100644 index 000000000..e2ad63bf4 --- /dev/null +++ b/gtclang/test/integration-test/CodeGen/iteration_space_stencil_02.cpp @@ -0,0 +1,36 @@ +//===--------------------------------------------------------------------------------*- C++ -*-===// +// _ _ +// | | | | +// __ _| |_ ___| | __ _ _ __ __ _ +// / _` | __/ __| |/ _` | '_ \ / _` | +// | (_| | || (__| | (_| | | | | (_| | +// \__, |\__\___|_|\__,_|_| |_|\__, | - GridTools Clang DSL +// __/ | __/ | +// |___/ |___/ +// +// +// This file is distributed under the MIT License (MIT). +// See LICENSE.txt for details. +// +//===------------------------------------------------------------------------------------------===// + +#include "gtclang_dsl_defs/gtclang_dsl.hpp" +using namespace gtclang::dsl; + +stencil iteration_space_stencil_02 { + storage out; + + Do { + vertical_region(k_start, k_end) { + out = 0; + } + + iteration_space(i_start + 1, i_end-1, j_start + 1, j_end - 1, k_start + 1, k_end - 1) { + out = 1; + } + + iteration_space(j_start + 1, j_end - 1, k_start + 1, k_end - 1) { + out = 2; + } + } +}; diff --git a/gtclang/test/integration-test/CodeGen/iteration_space_stencil_02_benchmark.cpp b/gtclang/test/integration-test/CodeGen/iteration_space_stencil_02_benchmark.cpp new file mode 100644 index 000000000..7530d3215 --- /dev/null +++ b/gtclang/test/integration-test/CodeGen/iteration_space_stencil_02_benchmark.cpp @@ -0,0 +1,61 @@ +//===--------------------------------------------------------------------------------*- C++ -*-===// +// _ _ +// | | | | +// __ _| |_ ___| | __ _ _ __ __ _ +// / _` | __/ __| |/ _` | '_ \ / _` | +// | (_| | || (__| | (_| | | | | (_| | +// \__, |\__\___|_|\__,_|_| |_|\__, | - GridTools Clang DSL +// __/ | __/ | +// |___/ |___/ +// +// +// This file is distributed under the MIT License (MIT). +// See LICENSE.txt for details. +// +//===------------------------------------------------------------------------------------------===// +#define DAWN_GENERATED 1 +#define GRIDTOOLS_DAWN_HALO_EXTENT 0 +#define GT_VECTOR_LIMIT_SIZE 30 + +#undef FUSION_MAX_VECTOR_SIZE +#undef FUSION_MAX_MAP_SIZE +#define FUSION_MAX_VECTOR_SIZE GT_VECTOR_LIMIT_SIZE +#define FUSION_MAX_MAP_SIZE FUSION_MAX_VECTOR_SIZE +#define BOOST_MPL_LIMIT_VECTOR_SIZE FUSION_MAX_VECTOR_SIZE +#define BOOST_MPL_CFG_NO_PREPROCESSED_HEADERS + +#include +#include "test/integration-test/CodeGen/Macros.hpp" +#include "driver-includes/verify.hpp" +#include "test/integration-test/CodeGen/Options.hpp" +#include "test/integration-test/CodeGen/generated/iteration_space_stencil_02_c++-naive.cpp" + +#ifndef OPTBACKEND +#define OPTBACKEND gt +#endif + +// clang-format off +#include INCLUDE_FILE(test/integration-test/CodeGen/generated/iteration_space_stencil_02_,OPTBACKEND.cpp) +// clang-format on + +using namespace dawn; +TEST(iteration_space_stencil, test) { + domain dom(Options::getInstance().m_size[0], Options::getInstance().m_size[1], + Options::getInstance().m_size[2]); + dom.set_halos(halo::value, halo::value, halo::value, halo::value, 0, 0); + + verifier verif(dom); + + meta_data_t meta_data(dom.isize(), dom.jsize(), dom.ksize()); + storage_t out_gt(meta_data, "out-gt"), out_naive(meta_data, "out-naive"); + + verif.fill(-1.0, out_gt, out_naive); + + dawn_generated::OPTBACKEND::iteration_space_stencil_02 iteration_space_gt(dom); + dawn_generated::cxxnaive::iteration_space_stencil_02 iteration_space_naive(dom); + + iteration_space_gt.run(out_gt); + iteration_space_naive.run(out_naive); + + ASSERT_TRUE(verif.verify(out_gt, out_naive)); +} diff --git a/gtclang/test/integration-test/CodeGen/iteration_space_stencil_02_benchmark.cu b/gtclang/test/integration-test/CodeGen/iteration_space_stencil_02_benchmark.cu new file mode 100644 index 000000000..c7bea8dc5 --- /dev/null +++ b/gtclang/test/integration-test/CodeGen/iteration_space_stencil_02_benchmark.cu @@ -0,0 +1 @@ +#include "iteration_space_stencil_02_benchmark.cpp" \ No newline at end of file diff --git a/gtclang/test/integration-test/CodeGen/var_stencil.cpp b/gtclang/test/integration-test/CodeGen/var_stencil.cpp new file mode 100644 index 000000000..5371592ca --- /dev/null +++ b/gtclang/test/integration-test/CodeGen/var_stencil.cpp @@ -0,0 +1,33 @@ +//===--------------------------------------------------------------------------------*- C++ -*-===// +// _ _ +// | | | | +// __ _| |_ ___| | __ _ _ __ __ _ +// / _` | __/ __| |/ _` | '_ \ / _` | +// | (_| | || (__| | (_| | | | | (_| | +// \__, |\__\___|_|\__,_|_| |_|\__, | - GridTools Clang DSL +// __/ | __/ | +// |___/ |___/ +// +// +// This file is distributed under the MIT License (MIT). +// See LICENSE.txt for details. +// +//===------------------------------------------------------------------------------------------===// + +#include "gtclang_dsl_defs/gtclang_dsl.hpp" +using namespace gtclang::dsl; + +stencil var_stencil { + storage out, in; + var tmp; + + Do { + vertical_region(k_start, k_end) { + tmp = 1; + } + + iteration_space(k_start, k_end) { + out = in + tmp; + } + } +}; diff --git a/gtclang/test/integration-test/CodeGen/var_stencil_benchmark.cpp b/gtclang/test/integration-test/CodeGen/var_stencil_benchmark.cpp new file mode 100644 index 000000000..60d61a52b --- /dev/null +++ b/gtclang/test/integration-test/CodeGen/var_stencil_benchmark.cpp @@ -0,0 +1,63 @@ +//===--------------------------------------------------------------------------------*- C++ -*-===// +// _ _ +// | | | | +// __ _| |_ ___| | __ _ _ __ __ _ +// / _` | __/ __| |/ _` | '_ \ / _` | +// | (_| | || (__| | (_| | | | | (_| | +// \__, |\__\___|_|\__,_|_| |_|\__, | - GridTools Clang DSL +// __/ | __/ | +// |___/ |___/ +// +// +// This file is distributed under the MIT License (MIT). +// See LICENSE.txt for details. +// +//===------------------------------------------------------------------------------------------===// +#define DAWN_GENERATED 1 +#define GRIDTOOLS_DAWN_HALO_EXTENT 3 +#define GT_VECTOR_LIMIT_SIZE 30 + +#undef FUSION_MAX_VECTOR_SIZE +#undef FUSION_MAX_MAP_SIZE +#define FUSION_MAX_VECTOR_SIZE GT_VECTOR_LIMIT_SIZE +#define FUSION_MAX_MAP_SIZE FUSION_MAX_VECTOR_SIZE +#define BOOST_MPL_LIMIT_VECTOR_SIZE FUSION_MAX_VECTOR_SIZE +#define BOOST_MPL_CFG_NO_PREPROCESSED_HEADERS + +#include +#include "test/integration-test/CodeGen/Macros.hpp" +#include "driver-includes/verify.hpp" +#include "test/integration-test/CodeGen/Options.hpp" +#include "test/integration-test/CodeGen/generated/var_stencil_c++-naive.cpp" + +#ifndef OPTBACKEND +#define OPTBACKEND gt +#endif + +// clang-format off +#include INCLUDE_FILE(test/integration-test/CodeGen/generated/var_stencil_,OPTBACKEND.cpp) +// clang-format on + +using namespace dawn; +TEST(var_stencil, test) { + domain dom(Options::getInstance().m_size[0], Options::getInstance().m_size[1], + Options::getInstance().m_size[2]); + + dom.set_halos(halo::value, halo::value, halo::value, halo::value, 0, 0); + + verifier verif(dom); + + meta_data_t meta_data(dom.isize(), dom.jsize(), dom.ksize()); + storage_t in(meta_data, "in"), out_gt(meta_data, "out-gt"), out_naive(meta_data, "out-naive"); + + verif.fillMath(8.0, 2.0, 1.5, 1.5, 2.0, 4.0, in); + verif.fill(-1.0, out_gt, out_naive); + + dawn_generated::OPTBACKEND::var_stencil copy_gt(dom); + dawn_generated::cxxnaive::var_stencil copy_naive(dom); + + copy_gt.run(out_gt, in); + copy_naive.run(out_naive, in); + + ASSERT_TRUE(verif.verify(out_gt, out_naive)); +} diff --git a/gtclang/test/integration-test/CodeGen/var_stencil_benchmark.cu b/gtclang/test/integration-test/CodeGen/var_stencil_benchmark.cu new file mode 100644 index 000000000..583dbbc7d --- /dev/null +++ b/gtclang/test/integration-test/CodeGen/var_stencil_benchmark.cu @@ -0,0 +1 @@ +#include "var_stencil_benchmark.cpp" \ No newline at end of file