diff --git a/src/CodeGen_Internal.cpp b/src/CodeGen_Internal.cpp
index ecacf42be196..b5b7ebddfd4f 100644
--- a/src/CodeGen_Internal.cpp
+++ b/src/CodeGen_Internal.cpp
@@ -612,7 +612,9 @@ void get_target_options(const llvm::Module &module, llvm::TargetOptions &options
 
     options = llvm::TargetOptions();
     options.AllowFPOpFusion = per_instruction_fast_math_flags ? llvm::FPOpFusion::Strict : llvm::FPOpFusion::Fast;
+#if LLVM_VERSION < 210
     options.UnsafeFPMath = !per_instruction_fast_math_flags;
+#endif
     options.NoInfsFPMath = !per_instruction_fast_math_flags;
     options.NoNaNsFPMath = !per_instruction_fast_math_flags;
     options.HonorSignDependentRoundingFPMathOption = !per_instruction_fast_math_flags;
diff --git a/src/CodeGen_PTX_Dev.cpp b/src/CodeGen_PTX_Dev.cpp
index dae26c5d6681..9aef62a49188 100644
--- a/src/CodeGen_PTX_Dev.cpp
+++ b/src/CodeGen_PTX_Dev.cpp
@@ -614,7 +614,9 @@ vector<char> CodeGen_PTX_Dev::compile_to_src() {
 
     TargetOptions options;
     options.AllowFPOpFusion = FPOpFusion::Fast;
+#if LLVM_VERSION < 210
     options.UnsafeFPMath = true;
+#endif
     options.NoInfsFPMath = true;
     options.NoNaNsFPMath = true;
     options.HonorSignDependentRoundingFPMathOption = false;
diff --git a/src/Lower.cpp b/src/Lower.cpp
index 605311113681..c6db1adfa33c 100644
--- a/src/Lower.cpp
+++ b/src/Lower.cpp
@@ -331,6 +331,10 @@ void lower_impl(const vector<Function> &output_funcs,
         debug(1) << "Selecting a GPU API for extern stages...\n";
         s = select_gpu_api(s, t);
         log("Lowering after selecting a GPU API for extern stages:", s);
+    } else {
+        debug(1) << "Injecting host-dirty marking...\n";
+        s = inject_host_dev_buffer_copies(s, t);
+        log("Lowering after injecting host-dirty marking:", s);
     }
 
     debug(1) << "Simplifying...\n";