UoB-HPC
diff --git a/‎DefaultCPU/sp_gemm.hh‎
Lines changed: 55 additions & 0 deletions b/‎DefaultCPU/sp_gemm.hh‎
Lines changed: 55 additions & 0 deletions
diff --git a/‎DefaultGPU/sp_gemm.hh‎
Lines changed: 54 additions & 0 deletions b/‎DefaultGPU/sp_gemm.hh‎
Lines changed: 54 additions & 0 deletions
@@ -0,0 +1,55 @@
+#pragma once
+
+#if defined CPU_DEFAULT
+
+#include "../include/kernels/CPU/sp_gemm.hh"
+#include "../include/utilities.hh"
+
+namespace cpu {
+/** A class for GEMM CPU BLAS kernels. */
+template <typename T>
+class sp_gemm_cpu : public sp_gemm<T> {
+ public:
+  using sp_gemm<T>::sp_gemm;
+  using sp_gemm<T>::callConsume;
+  using sp_gemm<T>::m_;
+  using sp_gemm<T>::n_;
+  using sp_gemm<T>::k_;
+  using sp_gemm<T>::A_;
+  using sp_gemm<T>::B_;
+  using sp_gemm<T>::C_;
+
+ private:
+  /** Perform the GEMM kernel. */
+  void callGemm() override {
+    /** A naive implementation of a column-major GEMM. Alpha and Beta are always
+     * 1 and 0 respectively.
+     * Operation takes the form of C[M,N] = A[M,K] * B[K,N].
+     * callConsume() is required to ensure that the compiler does not optimise
+     * away this function. */
+    int x, y, z;
+    T acc;
+    for (x = 0; x < m_; x++) {
+      for (y = 0; y < n_; y++) {
+        acc = 0.0;
+        for (z = 0; z < k_; z++) {
+          acc += A_[z * m_ + x] * B_[y * k_ + z];
+        }
+        C_[y * m_ + x] = acc;
+      }
+    }
+    // Ensure compiler doesn't optimise away the work being done
+    callConsume();
+  }
+
+  /** Perform any required steps before calling the GEMM kernel that should
+   * be timed. */
+  void preLoopRequirements() override {}
+
+  /** Perform any required steps after calling the GEMM kernel that should
+   * be timed. */
+  void postLoopRequirements() override {}
+};
+
+}  // namespace cpu
+#endif
@@ -0,0 +1,54 @@
+#pragma once
+
+#if defined GPU_DEFAULT
+
+#include <cmath>
+
+#include "../include/kernels/GPU/sp_gemm.hh"
+#include "../include/utilities.hh"
+
+namespace gpu {
+/** A class for GEMM GPU BLAS kernels. */
+template <typename T>
+class sp_gemm_gpu : public sp_gemm<T> {
+ public:
+  using sp_gemm<T>::sp_gemm;
+
+  /** Call the BLAS kernel n times, with 1 warmup run.
+   * Returns the time elapsed for n BLAS calls in seconds. */
+  time_checksum_gflop compute() {
+    // Override function in base `kernel` class as DefaultGPU should do nothing.
+    return {INFINITY, INFINITY, 0.0};
+  }
+
+  /** Initialise the required data structures. */
+  void initialise(gpuOffloadType offload, int m, int n, int k) override {
+    // Default GPU implementation - do nothing.
+  }
+
+ private:
+  /** Make a call to the BLAS Library Kernel. */
+  void callGemm() override {
+    // Default GPU implementation - do nothing.
+  }
+
+  /** Perform any required steps before calling the GEMM kernel that should
+   * be timed. */
+  void preLoopRequirements() override {
+    // Default GPU implementation - do nothing.
+  }
+
+  /** Perform any required steps after calling the GEMM kernel that should
+   * be timed. */
+  void postLoopRequirements() override {
+    // Default GPU implementation - do nothing.
+  }
+
+  /** Do any necessary cleanup (free pointers, close library handles, etc.)
+   * after Kernel has been called. */
+  void postCallKernelCleanup() override {
+    // Default GPU implementation - do nothing.
+  }
+};
+}  // namespace gpu
+#endif