diff --git a/CMakeLists.txt b/CMakeLists.txt
index dc8f105d6..0ac7ee2d7 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -55,6 +55,10 @@ ecbuild_add_option( FEATURE ACC
                     DESCRIPTION "Support for using GPUs with OpenACC"
                     REQUIRED_PACKAGES "OpenACC COMPONENTS Fortran" )
 
+ecbuild_add_option( FEATURE GPU_HOST
+                    DEFAULT OFF
+                    DESCRIPTION "Use host backend for GPU version" )
+
 ecbuild_add_option( FEATURE DOUBLE_PRECISION
                     DEFAULT ON
                     DESCRIPTION "Support for Double Precision" )
@@ -99,19 +103,21 @@ ecbuild_add_option( FEATURE TRANSI
 
 # Search for available GPU runtimes, searching for CUDA first and, if not found,
 # attempt to find HIP
-if( ECTRANS_ENABLE_GPU OR (NOT DEFINED ECTRANS_ENABLE_GPU AND ENABLE_GPU))
-  set(HAVE_CUDA 0)
-  set(HAVE_HIP 0)
-  ectrans_find_cuda() # sets "HAVE_CUDA"
-  if( NOT HAVE_CUDA )
-    ectrans_find_hip() # sets "HAVE_HIP"
+set(HAVE_CUDA 0)
+set(HAVE_HIP 0)
+if( NOT HAVE_GPU_HOST )
+  if( ECTRANS_ENABLE_GPU OR (NOT DEFINED ECTRANS_ENABLE_GPU AND ENABLE_GPU))
+    ectrans_find_cuda() # sets "HAVE_CUDA"
+    if( NOT HAVE_CUDA )
+      ectrans_find_hip() # sets "HAVE_HIP"
+    endif()
   endif()
 endif()
 
 ecbuild_add_option( FEATURE GPU
                     DEFAULT OFF
                     DESCRIPTION "Compile GPU version of ectrans (Requires OpenACC or sufficient OpenMP offloading support)"
-                    CONDITION (HAVE_HIP OR HAVE_CUDA) AND (HAVE_ACC OR HAVE_OMP) )
+                    CONDITION ((HAVE_HIP OR HAVE_CUDA) AND (HAVE_ACC OR HAVE_OMP)) OR HAVE_GPU_HOST )
 
 # Check CPU or GPU is enabled, and if not, abort
 if( (NOT HAVE_CPU) AND (NOT HAVE_GPU) )
@@ -123,14 +129,13 @@ if( HAVE_GPU )
     set( GPU_OFFLOAD "ACC" )
   elseif( HAVE_OMP )
     set( GPU_OFFLOAD "OMP" )
-  else()
-    ecbuild_error("Could not enable GPU as OMP or ACC were not enabled")
   endif()
 endif()
 
 ecbuild_add_option( FEATURE CUTLASS
                     DEFAULT OFF
                     CONDITION HAVE_GPU AND HAVE_CUDA AND CMAKE_Fortran_COMPILER_ID MATCHES "NVHPC"
+                      AND NOT HAVE_GPU_HOST
                     DESCRIPTION "Support for Cutlass BLAS operations"
                     REQUIRED_PACKAGES "NvidiaCutlass VERSION 2.11" )
 
@@ -142,18 +147,18 @@ ecbuild_add_option( FEATURE CUTLASS_3XTF32
 
 ecbuild_add_option( FEATURE GPU_AWARE_MPI
                     DEFAULT ON
-                    CONDITION HAVE_GPU AND HAVE_MPI
+                    CONDITION HAVE_GPU AND HAVE_MPI AND NOT HAVE_GPU_HOST
                     REQUIRED_PACKAGES "MPI COMPONENTS Fortran"
                     DESCRIPTION "Enable GPU-aware MPI" )
 
 ecbuild_add_option( FEATURE GPU_GRAPHS_GEMM
                     DEFAULT ON
-                    CONDITION HAVE_GPU
+                    CONDITION HAVE_GPU AND NOT HAVE_GPU_HOST
                     DESCRIPTION "Enable graph-based optimisation of Legendre transform GEMM kernel" )
 
 ecbuild_add_option( FEATURE GPU_GRAPHS_FFT
                     DEFAULT ON
-                    CONDITION HAVE_GPU
+                    CONDITION HAVE_GPU AND NOT HAVE_GPU_HOST
                     DESCRIPTION "Enable graph-based optimisation of FFT kernels" )
 
 if( BUILD_SHARED_LIBS )
diff --git a/src/trans/gpu/CMakeLists.txt b/src/trans/gpu/CMakeLists.txt
index 0a19b4b3b..4c67d4aaa 100644
--- a/src/trans/gpu/CMakeLists.txt
+++ b/src/trans/gpu/CMakeLists.txt
@@ -10,7 +10,6 @@
 
 list( APPEND trans_gpu_common_src
   algor/ext_acc.F90
-  algor/c_hipmemgetinfo.cpp
   algor/buffered_allocator_mod.F90
   algor/device_mod.F90
   algor/growing_allocator_mod.F90
@@ -24,6 +23,7 @@ if( HAVE_HIP )
     algor/*.hip.cpp
   )
   list( APPEND trans_gpu_common_src
+    algor/c_hipmemgetinfo.cpp
     algor/hicblas_gemm.hip.cpp
     algor/hicfft.hip.cpp
   )
@@ -32,12 +32,17 @@ elseif( HAVE_CUDA )
   set( GPU_RUNTIME "CUDA" )
   set( ECTRANS_GPU_HIP_LIBRARIES CUDA::cufft CUDA::cublas nvhpcwrapnvtx CUDA::cudart )
   list( APPEND trans_gpu_common_src
+    algor/c_hipmemgetinfo.cpp
     algor/hicblas_gemm.cuda.cu
     algor/hicfft.cuda.cu
   )
   ecbuild_info("warn: IN_PLACE_FFT defined for cuFFT")
 else()
-  ecbuild_info("warn: HIP and CUDA not found")
+  set( GPU_RUNTIME "HOST" )
+  list( APPEND trans_gpu_common_src
+    algor/fft.host.cpp
+    algor/gemm.host.cpp
+  )
 endif()
 
 
@@ -67,6 +72,12 @@ ecbuild_add_library(
                        $<${HAVE_GPU_GRAPHS_FFT}:USE_GRAPHS_FFT>
 )
 
+if( HAVE_GPU_HOST )
+  target_link_libraries( ectrans_gpu_common PRIVATE ${LAPACK_LIBRARIES} )
+  target_link_libraries( ectrans_gpu_common PRIVATE ${FFTW_LIBRARIES} )
+  target_include_directories( ectrans_gpu_common PRIVATE ${FFTW_INCLUDE_DIRS} )
+endif()
+
 ectrans_target_fortran_module_directory(
   TARGET            ectrans_gpu_common
   MODULE_DIRECTORY  ${PROJECT_BINARY_DIR}/module/ectrans
@@ -169,7 +180,7 @@ foreach( prec dp sp )
         endif()
 
         # cuFFT can do in-place FFT, hipFFT cannot
-        if( HAVE_CUDA )
+        if( HAVE_CUDA OR (GPU_RUNTIME STREQUAL "HOST"))
           target_compile_definitions( ectrans_gpu_${prec} PRIVATE IN_PLACE_FFT )
         endif()
 
diff --git a/src/trans/gpu/algor/ext_acc.F90 b/src/trans/gpu/algor/ext_acc.F90
index 2cf79c98c..7216e3c44 100644
--- a/src/trans/gpu/algor/ext_acc.F90
+++ b/src/trans/gpu/algor/ext_acc.F90
@@ -27,12 +27,10 @@ module openacc_ext
   implicit none
 
   private
-  public :: ext_acc_pass, ext_acc_create, ext_acc_copyin, ext_acc_copyout, &
+  public :: ext_acc_pass, ext_acc_create, ext_acc_copyin, ext_acc_copyout, ext_acc_delete
+  public :: ext_acc_arr_desc
 #ifdef ACCGPU
-       & ext_acc_delete, ext_acc_arr_desc, acc_handle_kind
-#endif
-#ifdef OMPGPU
-       & ext_acc_delete, ext_acc_arr_desc
+  public :: acc_handle_kind
 #endif
 
   type common_pointer_descr
@@ -271,9 +269,8 @@ subroutine ext_acc_create(ptrs, stream)
     type(ext_acc_arr_desc), intent(in) :: ptrs(:)
 #ifdef ACCGPU
     integer(acc_handle_kind), optional :: stream
-#endif
-#ifdef OMPGPU
-    integer(kind=int32), optional :: stream
+#else
+    integer(kind=int32), optional :: stream ! not used by other runtimes than OpenACC
 #endif
 
     type(common_pointer_descr), allocatable :: common_ptrs(:)
@@ -310,9 +307,8 @@ subroutine ext_acc_copyin(ptrs, stream)
     type(ext_acc_arr_desc), intent(in) :: ptrs(:)
 #ifdef ACCGPU
     integer(acc_handle_kind), optional :: stream
-#endif
-#ifdef OMPGPU
-    integer(kind=int32), optional :: stream
+#else
+    integer(kind=int32), optional :: stream  ! not used by other runtimes than OpenACC
 #endif
 
     type(common_pointer_descr), allocatable :: common_ptrs(:)
@@ -349,9 +345,8 @@ subroutine ext_acc_copyout(ptrs, stream)
     type(ext_acc_arr_desc), intent(in) :: ptrs(:)
 #ifdef ACCGPU
     integer(acc_handle_kind), optional :: stream
-#endif
-#ifdef OMPGPU
-    integer(kind=int32), optional :: stream
+#else
+    integer(kind=int32), optional :: stream  ! not used by other runtimes than OpenACC
 #endif
     type(common_pointer_descr), allocatable :: common_ptrs(:)
 
@@ -388,7 +383,7 @@ subroutine ext_acc_delete(ptrs, stream)
 #ifdef ACCGPU
     integer(acc_handle_kind), optional :: stream
 #else
-    integer(kind=int32), optional :: stream
+    integer(kind=int32), optional :: stream ! not used by other runtimes than OpenACC
 #endif
     type(common_pointer_descr), allocatable :: common_ptrs(:)
 
diff --git a/src/trans/gpu/algor/fft.host.cpp b/src/trans/gpu/algor/fft.host.cpp
new file mode 100644
index 000000000..6059ab539
--- /dev/null
+++ b/src/trans/gpu/algor/fft.host.cpp
@@ -0,0 +1,201 @@
+#include <unordered_map>
+#include <vector>
+#include <algorithm>
+#include <memory>
+#include <fftw3.h>
+
+typedef enum fftType_t {
+  FFT_R2C = 0x2a,  // Real to complex (interleaved)
+  FFT_C2R = 0x2c,  // Complex (interleaved) to real
+  FFT_D2Z = 0x6a,  // Double to double-complex (interleaved)
+  FFT_Z2D = 0x6c,  // Double-complex (interleaved) to double
+} fftType;
+
+namespace {
+struct Double {
+  using real = double;
+  using cmplx = fftw_complex;
+  using plan_type = fftw_plan;
+};
+struct Float {
+  using real = float;
+  using cmplx = fftwf_complex;
+  using plan_type = fftwf_plan;
+};
+
+template <class Type, fftType Direction> class fft_plan {
+  using real = typename Type::real;
+  using cmplx = typename Type::cmplx;
+  using plan_type = typename Type::plan_type;
+
+public:
+  void exec(real *data_real, cmplx *data_complex) const {
+    real *data_real_l = &data_real[offset];
+    cmplx *data_complex_l = &data_complex[offset / 2];
+    if constexpr (Direction == FFT_R2C)
+      fftwf_execute_dft_r2c(*plan_ptr, data_real_l, data_complex_l);
+    else if constexpr (Direction == FFT_C2R)
+      fftwf_execute_dft_c2r(*plan_ptr, data_complex_l, data_real_l);
+    else if constexpr (Direction == FFT_D2Z)
+      fftw_execute_dft_r2c(*plan_ptr, data_real_l, data_complex_l);
+    else if constexpr (Direction == FFT_Z2D)
+      fftw_execute_dft_c2r(*plan_ptr, data_complex_l, data_real_l);
+  }
+  fft_plan(plan_type plan_, int64_t offset_)
+      : plan_ptr(new plan_type{plan_},
+                   [](auto ptr) {
+                     if constexpr (Direction == FFT_R2C || Direction == FFT_C2R)
+                       fftwf_destroy_plan(*ptr);
+                     else if constexpr (Direction == FFT_D2Z || Direction == FFT_Z2D)
+                       fftw_destroy_plan(*ptr);
+                     delete ptr;
+                   }),
+        offset(offset_) {}
+
+private:
+  std::shared_ptr<plan_type> plan_ptr;
+  int64_t offset;
+};
+
+struct cache_key {
+  int resol_id;
+  int kfield;
+  bool operator==(const cache_key &other) const {
+    return resol_id == other.resol_id && kfield == other.kfield;
+  }
+  cache_key(int resol_id_, int kfield_)
+      : resol_id(resol_id_), kfield(kfield_) {}
+};
+} // namespace
+
+template <> struct std::hash<cache_key> {
+  std::size_t operator()(const cache_key &k) const {
+    return k.resol_id * 10000 + k.kfield;
+  }
+};
+
+namespace {
+
+// kfield -> handles
+template <class Type, fftType Direction> auto &get_fft_plan_cache() {
+  static std::unordered_map<cache_key,
+                            std::vector<fft_plan<Type, Direction>>>
+      fftPlansCache;
+  return fftPlansCache;
+}
+
+// // kfield -> ptrs
+// template <class Type, hipfftType Direction> auto &get_ptr_cache() {
+//   using real = typename Type::real;
+//   using cmplx = typename Type::cmplx;
+//   static std::unordered_map<cache_key, std::pair<real *, cmplx *>> ptrCache;
+//   return ptrCache;
+// }
+
+template <typename Cache>
+void erase_resol_from_cache(Cache &cache, int resol_id) {
+  // Note that in C++20 this could also be std::erase_if
+  int erased = 0;
+  for (auto it = cache.begin(); it != cache.end();) {
+    if (it->first.resol_id == resol_id) {
+      it = cache.erase(it);
+      ++erased;
+    } else
+      ++it;
+  }
+}
+template <class Type, fftType Direction>
+void erase_from_caches(int resol_id) {
+  erase_resol_from_cache(get_fft_plan_cache<Type, Direction>(), resol_id);
+  // erase_resol_from_cache(get_ptr_cache<Type, Direction>(), resol_id);
+}
+
+template <class Type, fftType Direction>
+std::vector<fft_plan<Type, Direction>> plan_all(int resol_id, int kfield, int *loens,
+                                                   int nfft, int64_t *offsets) {
+  using cmplx = typename Type::cmplx;
+  using plan_type = typename Type::plan_type;
+  int flags = FFTW_ESTIMATE | FFTW_NO_SIMD;
+
+  auto key = cache_key{resol_id, kfield};
+  auto &fftPlansCache = get_fft_plan_cache<Type, Direction>();
+  auto fftPlans = fftPlansCache.find(key);
+  if (fftPlans == fftPlansCache.end()) {
+    // the fft plans do not exist yet
+    cmplx *dummy;
+    if constexpr (Direction == FFT_R2C || Direction == FFT_C2R)
+      dummy = fftwf_alloc_complex((size_t)1);
+    else
+      dummy = fftw_alloc_complex((size_t)1);
+
+    std::vector<fft_plan<Type, Direction>> newPlans;
+    newPlans.reserve(nfft);
+    for (int i = 0; i < nfft; ++i) {
+      int nloen = loens[i];
+
+      plan_type plan;
+      int dist = offsets[i + 1] - offsets[i];
+      int embed[] = {1};
+
+      if constexpr (Direction == FFT_R2C)
+        plan = fftwf_plan_many_dft_r2c(
+          1, &nloen, kfield, (float*)dummy, embed, 1, dist, dummy, embed, 1, dist / 2, flags
+        );
+      else if constexpr (Direction == FFT_C2R)
+        plan = fftwf_plan_many_dft_c2r(
+          1, &nloen, kfield, dummy, embed, 1, dist / 2, (float*)dummy, embed, 1, dist, flags
+        );
+      else if constexpr (Direction == FFT_D2Z)
+        plan = fftw_plan_many_dft_r2c(
+          1, &nloen, kfield, (double*)dummy, embed, 1, dist, dummy, embed, 1, dist / 2, flags
+        );
+      else if constexpr (Direction == FFT_Z2D)
+        plan = fftw_plan_many_dft_c2r(
+          1, &nloen, kfield, dummy, embed, 1, dist / 2, (double*)dummy, embed, 1, dist, flags
+        );
+      newPlans.emplace_back(plan, kfield * offsets[i]);
+    }
+    fftPlansCache.insert({key, newPlans});
+  }
+  return fftPlansCache.find(key)->second;
+}
+} // namespace
+
+extern "C" {
+void execute_dir_fft_float(float *data_real, fftwf_complex *data_complex,
+                           int resol_id, int kfield, int *loens, int64_t *offsets, int nfft,
+                           void *growing_allocator) {
+  auto plans = plan_all<Float, FFT_R2C>(resol_id, kfield, loens, nfft, offsets);
+  for (auto &plan : plans)
+    plan.exec(data_real, data_complex);
+}
+void execute_inv_fft_float(fftwf_complex *data_complex, float *data_real,
+                           int resol_id, int kfield, int *loens, int64_t *offsets, int nfft,
+                           void *growing_allocator) {
+  auto plans = plan_all<Float, FFT_C2R>(resol_id, kfield, loens, nfft, offsets);
+  for (auto &plan : plans)
+    plan.exec(data_real, data_complex);
+}
+void execute_dir_fft_double(double *data_real,
+                            fftw_complex *data_complex, int resol_id, int kfield,
+                            int *loens, int64_t *offsets, int nfft,
+                            void *growing_allocator) {
+  auto plans = plan_all<Double, FFT_D2Z>(resol_id, kfield, loens, nfft, offsets);
+  for (auto &plan : plans)
+    plan.exec(data_real, data_complex);
+}
+void execute_inv_fft_double(fftw_complex *data_complex,
+                            double *data_real, int resol_id, int kfield, int *loens,
+                            int64_t *offsets, int nfft, void *growing_allocator) {
+  auto plans = plan_all<Double, FFT_Z2D>(resol_id, kfield, loens, nfft, offsets);
+  for (auto &plan : plans)
+    plan.exec(data_real, data_complex);
+}
+
+void clean_fft(int resol_id) {
+  erase_from_caches<Float, FFT_R2C>(resol_id);
+  erase_from_caches<Float, FFT_C2R>(resol_id);
+  erase_from_caches<Double, FFT_D2Z>(resol_id);
+  erase_from_caches<Double, FFT_Z2D>(resol_id);
+}
+}
diff --git a/src/trans/gpu/algor/gemm.host.cpp b/src/trans/gpu/algor/gemm.host.cpp
new file mode 100644
index 000000000..b343d2b36
--- /dev/null
+++ b/src/trans/gpu/algor/gemm.host.cpp
@@ -0,0 +1,85 @@
+// (C) Copyright 2000- ECMWF.
+// (C) Copyright 2024- NVIDIA.
+//
+// This software is licensed under the terms of the Apache Licence Version 2.0
+// which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+// In applying this licence, ECMWF does not waive the privileges and immunities
+// granted to it by virtue of its status as an intergovernmental organisation
+// nor does it submit to any jurisdiction.
+
+
+#include <stdlib.h>
+
+extern "C" void dgemm_(
+  const char *transa, const char *transb,
+  const int *m, const int *n, const int *k,
+  double *alpha,
+  const double *A, const int *lda,
+  const double *B, const int *ldb,
+  double *beta,
+  double *C, const int *ldc
+);
+
+extern "C" void sgemm_(
+  const char *transa, const char *transb,
+  const int *m, const int *n, const int *k,
+  float *alpha,
+  const float *A, const int *lda,
+  const float *B, const int *ldb,
+  float *beta,
+  float *C, const int *ldc
+);
+
+extern "C" {
+void hipblas_dgemm_wrapper(char transa, char transb, int m, int n, int k,
+                           double alpha, const double *A, int lda, int tda,
+                           const double *B, int ldb, int tdb, double beta,
+                           double *C, int ldc, int tdc, int batchCount,
+                           size_t stream, void *growing_allocator) {
+  dgemm_(&transa, &transb, &m, &n, &k, &alpha, A, &lda, B, &ldb, &beta, C, &ldc);
+}
+
+void hipblas_sgemm_wrapper(char transa, char transb, int m, int n, int k,
+                           float alpha, const float *A, int lda, int tda,
+                           const float *B, int ldb, int tdb, float beta,
+                           float *C, int ldc, int tdc, int batchCount,
+                           void *growing_allocator) {
+  sgemm_(&transa, &transb, &m, &n, &k, &alpha, A, &lda, B, &ldb, &beta, C, &ldc);
+}
+
+void hipblas_sgemm_wrapper_grouped(
+    int resol_id, int blas_id, char transa, char transb, int m, const int *n,
+    const int *k, float alpha, const float *A, int lda, const int64_t *offsetsA,
+    const float *B, const int *ldb, const int64_t *offsetsB, float beta,
+    float *C, int ldc, const int64_t *offsetsC, int batchCount, size_t stream,
+    void *growing_allocator) {
+
+  for (int i = 0; i < batchCount; ++i) {
+    if (m == 0 || n[i] == 0 || k[i] == 0)
+      continue;
+    sgemm_(&transa, &transb, &m, &n[i], &k[i], &alpha, A + offsetsA[i], &lda, B + offsetsB[i],
+         &ldb[i], &beta, C + offsetsC[i], &ldc);
+  }
+}
+
+void hipblas_dgemm_wrapper_grouped(int resol_id, int blas_id, char transa,
+                                   char transb, int m, const int *n,
+                                   const int *k, double alpha, const double *A,
+                                   int lda, const int64_t *offsetsA,
+                                   const double *B, const int *ldb,
+                                   const int64_t *offsetsB, double beta,
+                                   double *C, int ldc, const int64_t *offsetsC,
+                                   int batchCount, size_t stream,
+                                   void *growing_allocator) {
+  for (int i = 0; i < batchCount; ++i) {
+    if (m == 0 || n[i] == 0 || k[i] == 0)
+      continue;
+    dgemm_(&transa, &transb, &m, &n[i], &k[i], &alpha, A + offsetsA[i], &lda, B + offsetsB[i],
+         &ldb[i], &beta, C + offsetsC[i], &ldc);
+  }
+}
+
+void clean_gemm(int resol_id) {
+  // Just here to satisfy compilation -> there's nothing to clean on the host version
+}
+}
diff --git a/src/trans/gpu/internal/trgtol_mod.F90 b/src/trans/gpu/internal/trgtol_mod.F90
index 88a16b56f..526136931 100755
--- a/src/trans/gpu/internal/trgtol_mod.F90
+++ b/src/trans/gpu/internal/trgtol_mod.F90
@@ -382,12 +382,11 @@ SUBROUTINE TRGTOL(ALLOCATOR,HTRGTOL,PREEL_REAL,KF_FS,KF_GP,KF_UV_G,KF_SCALARS_G,
       ACC_POINTERS(ACC_POINTERS_CNT) = EXT_ACC_PASS(PGP3B)
     ENDIF
 
-    IF (ACC_POINTERS_CNT > 0) CALL EXT_ACC_CREATE(ACC_POINTERS(1:ACC_POINTERS_CNT), &
 #ifdef ACCGPU
+    IF (ACC_POINTERS_CNT > 0) CALL EXT_ACC_CREATE(ACC_POINTERS(1:ACC_POINTERS_CNT), &
          & STREAM=1_ACC_HANDLE_KIND)
-#endif
-#ifdef OMPGPU
-         & STREAM=1)
+#else
+    IF (ACC_POINTERS_CNT > 0) CALL EXT_ACC_CREATE(ACC_POINTERS(1:ACC_POINTERS_CNT))
 #endif
 
 #ifdef ACCGPU
@@ -845,12 +844,11 @@ SUBROUTINE TRGTOL(ALLOCATOR,HTRGTOL,PREEL_REAL,KF_FS,KF_GP,KF_UV_G,KF_SCALARS_G,
     !$ACC END DATA !PGP
 #endif
 
-    IF (ACC_POINTERS_CNT > 0) CALL EXT_ACC_DELETE(ACC_POINTERS(1:ACC_POINTERS_CNT), &
 #ifdef ACCGPU
+    IF (ACC_POINTERS_CNT > 0) CALL EXT_ACC_DELETE(ACC_POINTERS(1:ACC_POINTERS_CNT), &
          & STREAM=1_ACC_HANDLE_KIND)
-#endif
-#ifdef OMPGPU
-         & STREAM=1)
+#else
+    IF (ACC_POINTERS_CNT > 0) CALL EXT_ACC_DELETE(ACC_POINTERS(1:ACC_POINTERS_CNT))
 #endif
 
     ! Free this now
diff --git a/src/trans/gpu/internal/trltog_mod.F90 b/src/trans/gpu/internal/trltog_mod.F90
index 4fcc11496..6e80aa73b 100755
--- a/src/trans/gpu/internal/trltog_mod.F90
+++ b/src/trans/gpu/internal/trltog_mod.F90
@@ -519,12 +519,11 @@ SUBROUTINE TRLTOG(ALLOCATOR,HTRLTOG,PREEL_REAL,KF_FS,KF_GP,KF_UV_G,KF_SCALARS_G,
       ACC_POINTERS(ACC_POINTERS_CNT) = EXT_ACC_PASS(PGP3B)
     ENDIF
 
-    IF (ACC_POINTERS_CNT > 0) CALL EXT_ACC_CREATE(ACC_POINTERS(1:ACC_POINTERS_CNT), &
 #ifdef ACCGPU
+    IF (ACC_POINTERS_CNT > 0) CALL EXT_ACC_CREATE(ACC_POINTERS(1:ACC_POINTERS_CNT), &
          & STREAM=1_ACC_HANDLE_KIND)
-#endif
-#ifdef OMPGPU
-         & STREAM=1)
+#else
+    IF (ACC_POINTERS_CNT > 0) CALL EXT_ACC_CREATE(ACC_POINTERS(1:ACC_POINTERS_CNT))
 #endif
 
 #ifdef OMPGPU
@@ -993,12 +992,11 @@ SUBROUTINE TRLTOG(ALLOCATOR,HTRLTOG,PREEL_REAL,KF_FS,KF_GP,KF_UV_G,KF_SCALARS_G,
       !$ACC UPDATE HOST(PGP3B) ASYNC(1)
 #endif
     ENDIF
-    IF (ACC_POINTERS_CNT > 0) CALL EXT_ACC_DELETE(ACC_POINTERS(1:ACC_POINTERS_CNT), &
 #ifdef ACCGPU
+    IF (ACC_POINTERS_CNT > 0) CALL EXT_ACC_DELETE(ACC_POINTERS(1:ACC_POINTERS_CNT), &
          & STREAM=1_ACC_HANDLE_KIND)
-#endif
-#ifdef OMPGPU
-         & STREAM=1)
+#else
+    IF (ACC_POINTERS_CNT > 0) CALL EXT_ACC_DELETE(ACC_POINTERS(1:ACC_POINTERS_CNT))
 #endif
 
     IF (LSYNC_TRANS) THEN