diff --git a/src/CodeGen_Internal.cpp b/src/CodeGen_Internal.cpp index ecacf42be196..b5b7ebddfd4f 100644 --- a/src/CodeGen_Internal.cpp +++ b/src/CodeGen_Internal.cpp @@ -612,7 +612,9 @@ void get_target_options(const llvm::Module &module, llvm::TargetOptions &options options = llvm::TargetOptions(); options.AllowFPOpFusion = per_instruction_fast_math_flags ? llvm::FPOpFusion::Strict : llvm::FPOpFusion::Fast; +#if LLVM_VERSION < 210 options.UnsafeFPMath = !per_instruction_fast_math_flags; +#endif options.NoInfsFPMath = !per_instruction_fast_math_flags; options.NoNaNsFPMath = !per_instruction_fast_math_flags; options.HonorSignDependentRoundingFPMathOption = !per_instruction_fast_math_flags; diff --git a/src/CodeGen_PTX_Dev.cpp b/src/CodeGen_PTX_Dev.cpp index dae26c5d6681..9aef62a49188 100644 --- a/src/CodeGen_PTX_Dev.cpp +++ b/src/CodeGen_PTX_Dev.cpp @@ -614,7 +614,9 @@ vector CodeGen_PTX_Dev::compile_to_src() { TargetOptions options; options.AllowFPOpFusion = FPOpFusion::Fast; +#if LLVM_VERSION < 210 options.UnsafeFPMath = true; +#endif options.NoInfsFPMath = true; options.NoNaNsFPMath = true; options.HonorSignDependentRoundingFPMathOption = false; diff --git a/src/Lower.cpp b/src/Lower.cpp index 605311113681..c6db1adfa33c 100644 --- a/src/Lower.cpp +++ b/src/Lower.cpp @@ -331,6 +331,10 @@ void lower_impl(const vector &output_funcs, debug(1) << "Selecting a GPU API for extern stages...\n"; s = select_gpu_api(s, t); log("Lowering after selecting a GPU API for extern stages:", s); + } else { + debug(1) << "Injecting host-dirty marking...\n"; + s = inject_host_dev_buffer_copies(s, t); + log("Lowering after injecting host-dirty marking:", s); } debug(1) << "Simplifying...\n";