diff --git a/benchmarks/scampBench.cpp b/benchmarks/scampBench.cpp
new file mode 100644
index 0000000..645fdc9
--- /dev/null
+++ b/benchmarks/scampBench.cpp
@@ -0,0 +1,176 @@
+// Copyright (c) 2019 Shapelets.io
+//
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#include <benchmark/benchmark.h>
+#include <khiva/internal/matrixInternal.h>
+#include <khiva/matrix.h>
+
+#include <algorithm>
+#include <cmath>
+#include <iterator>
+
+#include "khivaBenchmark.h"
+
+template <af::Backend BE, int D>
+void MatrixProfile(benchmark::State &state) {
+    af::setBackend(BE);
+    af::setDevice(D);
+
+    auto n = state.range(0);
+    auto m = state.range(1);
+
+    auto tss = af::randu(n, f64);
+
+    af::array profile;
+    af::array index;
+
+    af::sync();
+    while (state.KeepRunning()) {
+        khiva::matrix::matrixProfile(tss, m, profile, index);
+        profile.eval();
+        index.eval();
+        af::sync();
+    }
+
+    addMemoryCounters(state);
+}
+
+template <af::Backend BE, int D>
+void MatrixProfile2(benchmark::State &state) {
+    af::setBackend(BE);
+    af::setDevice(D);
+
+    auto n = state.range(0);
+    auto m = state.range(1);
+
+    auto ta = af::randu(n, f64);
+    auto tb = af::randu(n, f64);
+
+    af::array profile;
+    af::array index;
+
+    af::sync();
+    while (state.KeepRunning()) {
+        khiva::matrix::matrixProfile(ta, tb, m, profile, index);
+        profile.eval();
+        index.eval();
+        af::sync();
+    }
+
+    addMemoryCounters(state);
+}
+
+template <af::Backend BE, int D>
+void MatrixProfileThresh(benchmark::State &state) {
+    af::setBackend(BE);
+    af::setDevice(D);
+
+    auto n = state.range(0);
+    auto m = state.range(1);
+    double threshold = 0.5;
+
+    auto tss = af::randu(n, f64);
+
+    af::array sumCorrelation;
+
+    af::sync();
+    while (state.KeepRunning()) {
+        khiva::matrix::matrixProfileThresh(tss, m, threshold, sumCorrelation);
+        sumCorrelation.eval();
+        af::sync();
+    }
+
+    addMemoryCounters(state);
+}
+
+template <af::Backend BE, int D>
+void MatrixProfileThresh2(benchmark::State &state) {
+    af::setBackend(BE);
+    af::setDevice(D);
+
+    auto n = state.range(0);
+    auto m = state.range(1);
+    double threshold = 0.5;
+
+    auto ta = af::randu(n, f64);
+    auto tb = af::randu(n, f64);
+
+    af::array sumCorrelation;
+
+    af::sync();
+    while (state.KeepRunning()) {
+        khiva::matrix::matrixProfileThresh(ta, tb, m, threshold, sumCorrelation);
+        sumCorrelation.eval();
+        af::sync();
+    }
+
+    addMemoryCounters(state);
+}
+
+template <af::Backend BE, int D>
+void MatrixProfileLR(benchmark::State &state) {
+    af::setBackend(BE);
+    af::setDevice(D);
+
+    auto n = state.range(0);
+    auto m = state.range(1);
+
+    auto tss = af::randu(n, f64);
+
+    af::array profileLeft;
+    af::array profileRight;
+    af::array indexLeft;
+    af::array indexRight;
+
+    af::sync();
+    while (state.KeepRunning()) {
+        khiva::matrix::matrixProfileLR(tss, m, profileLeft, indexLeft, profileRight, indexRight);
+        profileLeft.eval();
+        profileRight.eval();
+        indexLeft.eval();
+        indexRight.eval();
+        af::sync();
+    }
+
+    addMemoryCounters(state);
+}
+
+void cudaBenchmarks() {
+    // Empty cuda benchmarks because we want to benchmark only on cpu
+}
+
+void openclBenchmarks() {
+    // Empty opencl benchmarks because we want to benchmark only on cpu
+}
+
+void cpuBenchmarks() {
+    BENCHMARK_TEMPLATE(MatrixProfile, af::Backend::AF_BACKEND_CPU, CPU_BENCHMARKING_DEVICE)
+        ->RangeMultiplier(2)
+        ->Ranges({{16 << 10, 128 << 10}, {16, 512}})
+        ->Unit(benchmark::TimeUnit::kMicrosecond);
+
+    BENCHMARK_TEMPLATE(MatrixProfile2, af::Backend::AF_BACKEND_CPU, CPU_BENCHMARKING_DEVICE)
+        ->RangeMultiplier(2)
+        ->Ranges({{16 << 10, 128 << 10}, {16, 512}})
+        ->Unit(benchmark::TimeUnit::kMicrosecond);
+
+    BENCHMARK_TEMPLATE(MatrixProfileLR, af::Backend::AF_BACKEND_CPU, CPU_BENCHMARKING_DEVICE)
+        ->RangeMultiplier(2)
+        ->Ranges({{16 << 10, 128 << 10}, {16, 512}})
+        ->Unit(benchmark::TimeUnit::kMicrosecond);
+
+    BENCHMARK_TEMPLATE(MatrixProfileThresh, af::Backend::AF_BACKEND_CPU, CPU_BENCHMARKING_DEVICE)
+        ->RangeMultiplier(2)
+        ->Ranges({{16 << 10, 128 << 10}, {16, 512}})
+        ->Unit(benchmark::TimeUnit::kMicrosecond);
+
+    BENCHMARK_TEMPLATE(MatrixProfileThresh2, af::Backend::AF_BACKEND_CPU, CPU_BENCHMARKING_DEVICE)
+        ->RangeMultiplier(2)
+        ->Ranges({{16 << 10, 128 << 10}, {16, 512}})
+        ->Unit(benchmark::TimeUnit::kMicrosecond);
+}
+
+KHIVA_BENCHMARK_MAIN(cudaBenchmarks, openclBenchmarks, cpuBenchmarks)
diff --git a/docker/cuda/Dockerfile b/docker/cuda/Dockerfile
new file mode 100644
index 0000000..4660529
--- /dev/null
+++ b/docker/cuda/Dockerfile
@@ -0,0 +1,90 @@
+FROM nvidia/cuda:10.1-devel-ubuntu18.04 AS khiva-cuda-builder-base
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    software-properties-common \
+    build-essential \
+    cmake \
+    git \
+    libboost-all-dev \
+    libfftw3-dev \
+    libfreeimage-dev \
+    liblapack-dev \
+    liblapacke-dev \
+    libopenblas-dev \
+    openjdk-8-jdk \
+    python3-pip && \
+    pip3 install setuptools && \
+    pip3 install conan --upgrade && \
+    conan profile new --detect default && \
+    conan profile update settings.compiler.libcxx=libstdc++11 default && \
+    rm -rf /var/lib/apt/lists/* /var/tmp/* /tmp/*
+
+# AF_DISABLE_GRAPHICS - Environment variable to disable graphics at
+# runtime due to lack of graphics support by docker - visit
+# http://arrayfire.org/docs/configuring_environment.htm#af_disable_graphics
+# for more information
+# For build instructions: https://github.com/arrayfire/arrayfire/wiki/Build-Instructions-for-Linux
+ENV CXX g++-7
+ENV CC gcc-7
+WORKDIR /root
+
+COPY *.patch ./
+FROM khiva-cuda-builder-base AS arrayfire-cuda-builder
+ENV AF_DISABLE_GRAPHICS=1
+RUN git clone -b v3.6.2 --depth 1 --recurse-submodules -j 8 https://github.com/arrayfire/arrayfire.git && \
+    cd arrayfire && \
+    mkdir build && cd build && \
+    cmake .. -DCMAKE_INSTALL_PREFIX=/opt/arrayfire \
+             -DCMAKE_BUILD_TYPE=Release \
+             -DAF_BUILD_CPU=ON \
+             -DAF_BUILD_CUDA=ON \
+             -DAF_BUILD_OPENCL=OFF \
+             -DCUDA_cublas_device_LIBRARY=/usr/local/cuda/lib64 \
+             -DAF_BUILD_UNIFIED=ON \
+             -DAF_WITH_GRAPHICS=OFF \
+             -DAF_WITH_NONFREE=OFF \
+             -DAF_BUILD_EXAMPLES=OFF \
+             -DBUILD_TESTING=OFF \
+             -DAF_BUILD_DOCS=OFF \
+             -DAF_WITH_LOGGING=OFF \
+             -DAF_WITH_STATIC_FREEIMAGE=OFF && \
+    cmake --build . --target install -- -j8
+
+FROM khiva-cuda-builder-base AS khiva-cuda-builder
+ARG KHIVA_BRANCH=v0.5.0
+COPY --from=arrayfire-cuda-builder /opt/arrayfire /opt/arrayfire
+ENV JAVA_HOME /usr/lib/jvm/java-8-openjdk-amd64
+RUN echo /opt/arrayfire/lib > /etc/ld.so.conf.d/arrayfire.conf && \
+    ldconfig && \
+    git clone -b ${KHIVA_BRANCH} --depth 1  --recurse-submodules -j 8 https://github.com/shapelets/khiva.git && \
+    cd khiva && \
+    mkdir build && \
+    cd build && \
+    conan install .. --build missing && \
+    cmake .. \
+        -DCMAKE_BUILD_TYPE=Release \
+        -DCMAKE_INSTALL_PREFIX=/opt/khiva \
+        -DKHIVA_BUILD_TESTS=ON \
+        -DKHIVA_BUILD_BENCHMARKS=ON \
+        -DKHIVA_BUILD_EXAMPLES=OFF \
+        -DKHIVA_BUILD_DOCUMENTATION=OFF \
+        -DKHIVA_USE_CONAN=ON \
+        -DKHIVA_BUILD_C_BINDINGS=ON \
+        -DKHIVA_BUILD_JNI_BINDINGS=ON && \
+    cmake --build . --target install -- -j8
+
+FROM nvidia/cuda:10.1-runtime-ubuntu18.04 AS arrayfire-cuda
+COPY --from=arrayfire-cuda-builder /opt/arrayfire /opt/arrayfire
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends \
+    libopenblas-base \
+    libfftw3-3 \
+    liblapacke && \
+    echo /opt/arrayfire/lib > /etc/ld.so.conf.d/arrayfire.conf && \
+    ldconfig && \
+    rm -rf /var/lib/apt/lists/* /var/tmp/* /tmp/*
+
+FROM arrayfire-cuda AS khiva-cuda
+ENV LD_LIBRARY_PATH="/opt/khiva/lib:${LD_LIBRARY_PATH}"
+COPY --from=khiva-cuda-builder /opt/khiva /opt/khiva
+RUN echo /opt/khiva/lib > /etc/ld.so.conf.d/khiva.conf && \
+    ldconfig
diff --git a/docker/cuda/docker-build.sh b/docker/cuda/docker-build.sh
new file mode 100755
index 0000000..73eb326
--- /dev/null
+++ b/docker/cuda/docker-build.sh
@@ -0,0 +1,12 @@
+#! /usr/bin/env bash
+docker build --target khiva-cuda-builder-base -t $PUSH_REGISTRY/shapelets/khiva-cuda-builder-base:0.5.0 .
+docker build --target arrayfire-cuda-builder -t $PUSH_REGISTRY/shapelets/arrayfire-cuda-builder:3.6.2 .
+docker build --target khiva-cuda-builder --build-arg KHIVA_BRANCH=v0.5.0 -t $PUSH_REGISTRY/shapelets/khiva-cuda-builder:0.5.0 .
+docker run --gpus all --workdir /root/khiva/build $PUSH_REGISTRY/shapelets/khiva-cuda-builder:0.5.0 ctest --output-on-failure -j8
+docker build --target arrayfire-cuda -t $PUSH_REGISTRY/shapelets/arrayfire-cuda:3.6.2 .
+docker build --target khiva-cuda -t $PUSH_REGISTRY/shapelets/khiva-cuda:0.5.0 .
+docker push $PUSH_REGISTRY/shapelets/khiva-cuda-builder-base:0.5.0
+docker push $PUSH_REGISTRY/shapelets/arrayfire-cuda-builder:3.6.2
+docker push $PUSH_REGISTRY/shapelets/khiva-cuda-builder:0.5.0
+docker push $PUSH_REGISTRY/shapelets/arrayfire-cuda:3.6.2
+docker push $PUSH_REGISTRY/shapelets/khiva-cuda:0.5.0
diff --git a/docker/opencl/Dockerfile b/docker/opencl/Dockerfile
new file mode 100644
index 0000000..f47a685
--- /dev/null
+++ b/docker/opencl/Dockerfile
@@ -0,0 +1,90 @@
+FROM intelopencl/intel-opencl:ubuntu-18.04-ppa AS khiva-opencl-builder-base
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends \
+    build-essential \
+    cmake \
+    git \
+    libboost-math-dev \
+    libfftw3-dev \
+    libfreeimage-dev \
+    liblapack-dev \
+    liblapacke-dev \
+    libopenblas-dev \
+    openjdk-8-jdk \
+    ocl-icd-opencl-dev \
+    python3-pip \
+    software-properties-common && \
+    pip3 install setuptools && \
+    pip3 install conan --upgrade && \
+    conan profile new --detect default && \
+    conan profile update settings.compiler.libcxx=libstdc++11 default && \
+    rm -rf /var/lib/apt/lists/* /var/tmp/* /tmp/*
+
+ENV CXX g++-7
+ENV CC gcc-7
+WORKDIR /root
+COPY *.patch ./
+
+# AF_DISABLE_GRAPHICS - Environment variable to disable graphics at
+# runtime due to lack of graphics support by docker - visit
+# http://arrayfire.org/docs/configuring_environment.htm#af_disable_graphics
+# for more information
+# For build instructions: https://github.com/arrayfire/arrayfire/wiki/Build-Instructions-for-Linux
+FROM khiva-opencl-builder-base AS arrayfire-opencl-builder
+ENV AF_DISABLE_GRAPHICS=1
+RUN git clone -b v3.6.2 --depth 1 --recurse-submodules -j 8 https://github.com/arrayfire/arrayfire.git && \
+    cd arrayfire && \
+    mkdir build && cd build && \
+    cmake .. -DCMAKE_INSTALL_PREFIX=/opt/arrayfire \
+             -DCMAKE_BUILD_TYPE=Release \
+             -DAF_BUILD_CPU=ON \
+             -DAF_BUILD_CUDA=OFF \
+             -DAF_BUILD_OPENCL=ON \
+             -DAF_BUILD_UNIFIED=ON \
+             -DAF_WITH_GRAPHICS=OFF \
+             -DAF_WITH_NONFREE=OFF \
+             -DAF_BUILD_EXAMPLES=OFF \
+             -DBUILD_TESTING=OFF \
+             -DAF_BUILD_DOCS=OFF \
+             -DAF_WITH_LOGGING=OFF \
+             -DAF_WITH_STATIC_FREEIMAGE=OFF && \
+    cmake --build . --target install -- -j8
+
+FROM khiva-opencl-builder-base AS khiva-builder
+ARG KHIVA_BRANCH=v0.5.0
+WORKDIR /root
+COPY --from=arrayfire-opencl-builder /opt/arrayfire /opt/arrayfire
+RUN echo /opt/arrayfire/lib > /etc/ld.so.conf.d/arrayfire.conf && \
+    ldconfig && \
+    git clone -b ${KHIVA_BRANCH} --depth 1 --recurse-submodules -j 8 https://github.com/shapelets/khiva.git && \
+    cd khiva && \
+    mkdir build && \
+    cd build && \
+    conan install .. --build missing && \
+    cmake .. -DCMAKE_BUILD_TYPE=Release \
+        -DCMAKE_INSTALL_PREFIX=/opt/khiva \
+        -DKHIVA_BUILD_TESTS=ON \
+        -DKHIVA_BUILD_BENCHMARKS=ON \
+        -DKHIVA_BUILD_EXAMPLES=OFF \
+        -DKHIVA_BUILD_DOCUMENTATION=OFF \
+        -DKHIVA_USE_CONAN=ON \
+        -DKHIVA_BUILD_C_BINDINGS=ON \
+        -DKHIVA_BUILD_JNI_BINDINGS=ON && \
+    cmake --build . --target install -- -j8
+
+FROM intelopencl/intel-opencl:ubuntu-18.04-ppa AS arrayfire-intel-opencl
+COPY --from=arrayfire-opencl-builder /opt/arrayfire /opt/arrayfire
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends \
+    libopenblas-base \
+    libfftw3-3 \
+    liblapacke && \
+    echo /opt/arrayfire/lib > /etc/ld.so.conf.d/arrayfire.conf && \
+    ldconfig && \
+    rm -rf /var/lib/apt/lists/* /var/tmp/* /tmp/*
+
+FROM arrayfire-intel-opencl as khiva-intel-opencl
+ENV LD_LIBRARY_PATH="/opt/khiva/lib:${LD_LIBRARY_PATH}"
+COPY --from=khiva-builder /opt/khiva /opt/khiva
+RUN echo /opt/khiva/lib > /etc/ld.so.conf.d/khiva.conf && \
+    ldconfig
diff --git a/docker/opencl/docker-build.sh b/docker/opencl/docker-build.sh
new file mode 100755
index 0000000..9a73145
--- /dev/null
+++ b/docker/opencl/docker-build.sh
@@ -0,0 +1,11 @@
+#! /usr/bin/env bash
+docker build --target khiva-opencl-builder-base -t $PUSH_REGISTRY/shapelets/khiva-opencl-builder-base:0.5.0 .
+docker build --target arrayfire-opencl-builder -t $PUSH_REGISTRY/shapelets/arrayfire-opencl-builder:3.6.2 .
+docker build --target khiva-builder --build-arg KHIVA_BRANCH=v0.5.0 -t $PUSH_REGISTRY/shapelets/khiva-builder:0.5.0 .
+docker build --target arrayfire-intel-opencl -t $PUSH_REGISTRY/shapelets/arrayfire-intel-opencl:3.6.2 .
+docker build --target khiva-intel-opencl -t $PUSH_REGISTRY/shapelets/khiva:0.5.0-opencl .
+docker push $PUSH_REGISTRY/shapelets/khiva-opencl-builder-base:0.5.0
+docker push $PUSH_REGISTRY/shapelets/arrayfire-opencl-builder:3.6.2
+docker push $PUSH_REGISTRY/shapelets/khiva-builder:0.5.0
+docker push $PUSH_REGISTRY/shapelets/arrayfire-intel-opencl:3.6.2
+docker push $PUSH_REGISTRY/shapelets/khiva:0.5.0-opencl
diff --git a/include/khiva/internal/matrixInternal.h b/include/khiva/internal/matrixInternal.h
index f9afa1b..43fb873 100644
--- a/include/khiva/internal/matrixInternal.h
+++ b/include/khiva/internal/matrixInternal.h
@@ -27,6 +27,7 @@ using MatrixProfilePair = std::pair<DistancesVector, IndexesVector>;
 using LeftRightProfilePair = std::pair<MatrixProfilePair, MatrixProfilePair>;
 using Chain = std::vector<unsigned int>;
 using ChainVector = std::vector<Chain>;
+using SumCorrelationsVector = std::vector<double>;
 
 /**
  * @brief Calculates the sliding dot product of the time series 'q' against t.
@@ -189,6 +190,10 @@ KHIVAAPI LeftRightProfilePair scampLR(std::vector<double> &&ta, long m);
 KHIVAAPI void scampLR(af::array tss, long m, af::array &profileLeft, af::array &indexLeft, af::array &profileRight,
                       af::array &indexRight);
 
+KHIVAAPI void scampThresh(af::array tss, long m, double threshold, af::array &sumCorrelation);
+
+KHIVAAPI void scampThresh(af::array ta, af::array tb, long m, double threshold, af::array &sumCorrelation);
+
 }  // namespace internal
 }  // namespace matrix
 }  // namespace khiva
diff --git a/include/khiva/matrix.h b/include/khiva/matrix.h
index 23eb0ab..ef27237 100644
--- a/include/khiva/matrix.h
+++ b/include/khiva/matrix.h
@@ -130,17 +130,17 @@ KHIVAAPI void stomp(const af::array &ta, const af::array &tb, long m, af::array
 KHIVAAPI void stomp(const af::array &t, long m, af::array &profile, af::array &index);
 
 /**
- * @brief Calculates the matrix profile between 't' and itself using a subsequence length of 'm'.
+ * @brief Calculates the matrix profile between 'tss' and itself using a subsequence length of 'm'.
  * This method filters the trivial matches.
  *
  * [1] Yan Zhu, Zachary Zimmerman, Nader Shakibay Senobari, Chin-Chia Michael Yeh, Gareth Funning, Abdullah Mueen,
  * Philip Brisk and Eamonn Keogh (2016). Matrix Profile II: Exploiting a Novel Algorithm and GPUs to break the one
  * Hundred Million Barrier for Time Series Motifs and Joins. IEEE ICDM 2016.
  *
- * @param tss Query time series.
+ * @param tss Query and reference time series.
  * @param m Subsequence length.
- * @param profile The matrix profile, which reflects the distance to the closer element of the subsequence from 'ta'
- * in 'tb'.
+ * @param profile The matrix profile, which reflects the distance to the closest element of the subsequence from 'tss'
+ * in a different location of itself.
  * @param index The matrix profile index, which points to where the aforementioned minimum is located.
  */
 KHIVAAPI void matrixProfile(const af::array &tss, long m, af::array &profile, af::array &index);
@@ -152,11 +152,12 @@ KHIVAAPI void matrixProfile(const af::array &tss, long m, af::array &profile, af
  * Philip Brisk and Eamonn Keogh (2016). Matrix Profile II: Exploiting a Novel Algorithm and GPUs to break the one
  * Hundred Million Barrier for Time Series Motifs and Joins. IEEE ICDM 2016.
  *
- * @param ta Query and reference time series.
- * @param tb Query and reference time series.
+ * @param ta Query time series.
+ * @param tb Reference time series.
+
  * @param m Subsequence length.
- * @param profile The matrix profile, which reflects the distance to the closer element of the subsequence from 't' in a
- * different location of itself.
+ * @param profile The matrix profile, which reflects the distance to the closest element of the subsequence from 'ta'
+ * in 'tb'.
  * @param index The matrix profile index, which points to where the aforementioned minimum is located.
  */
 KHIVAAPI void matrixProfile(const af::array &ta, const af::array &tb, long m, af::array &profile, af::array &index);
@@ -200,6 +201,36 @@ KHIVAAPI void matrixProfileLR(const af::array &tss, long m, af::array &profileLe
  */
 KHIVAAPI void getChains(const af::array &tss, long m, af::array &chains);
 
+/**
+ * @brief Calculates the sum of correlations above a threshold between 'tss' and itself using a subsequence
+ * length of 'm' at each location in 'tss'.
+ *
+ * [1] Yan Zhu, Zachary Zimmerman, Nader Shakibay Senobari, Chin-Chia Michael Yeh, Gareth Funning, Abdullah Mueen,
+ * Philip Brisk and Eamonn Keogh (2016). Matrix Profile II: Exploiting a Novel Algorithm and GPUs to break the one
+ * Hundred Million Barrier for Time Series Motifs and Joins. IEEE ICDM 2016.
+ *
+ * @param tss Query and reference time series.
+ * @param m Subsequence length.
+ * @param sumCorrelation The sum of correlations above a threshold between 'tss' and itself using a subsequence length
+ * of 'm' at each location in 'tss'.
+ */
+KHIVAAPI void matrixProfileThresh(af::array tss, long m, double threshold, af::array &sumCorrelation);
+
+/**
+ * @brief Calculates the sum of correlations above a threshold between 'ta' and 'tb' using a subsequence length of 'm'.
+ *
+ * [1] Yan Zhu, Zachary Zimmerman, Nader Shakibay Senobari, Chin-Chia Michael Yeh, Gareth Funning, Abdullah Mueen,
+ * Philip Brisk and Eamonn Keogh (2016). Matrix Profile II: Exploiting a Novel Algorithm and GPUs to break the one
+ * Hundred Million Barrier for Time Series Motifs and Joins. IEEE ICDM 2016.
+ *
+ * @param ta Query time series.
+ * @param tb Reference time series.
+ * @param m Subsequence length.
+ * @param sumCorrelation The sum of correlations above a threshold between 'ta' and 'tb' using a subsequence length of
+ * 'm' at each location in 'ta'.
+ */
+KHIVAAPI void matrixProfileThresh(af::array ta, af::array tb, long m, double threshold, af::array &sumCorrelation);
+
 }  // namespace matrix
 }  // namespace khiva
 
diff --git a/src/khiva/matrix.cpp b/src/khiva/matrix.cpp
index 394888d..01eb7dc 100644
--- a/src/khiva/matrix.cpp
+++ b/src/khiva/matrix.cpp
@@ -113,5 +113,13 @@ void matrixProfileLR(const af::array &tss, long m, af::array &profileLeft, af::a
 
 void getChains(const af::array &tss, long m, af::array &chains) { internal::getChains(tss, m, chains); }
 
+void matrixProfileThresh(af::array tss, long m, double threshold, af::array &sumCorrelation) {
+    internal::scampThresh(tss, m, threshold, sumCorrelation);
+}
+
+void matrixProfileThresh(af::array ta, af::array tb, long m, double threshold, af::array &sumCorrelation) {
+    internal::scampThresh(ta, tb, m, threshold, sumCorrelation);
+}
+
 }  // namespace matrix
 }  // namespace khiva
diff --git a/src/khiva/matrixInternal.cpp b/src/khiva/matrixInternal.cpp
index 22d7dc5..b119028 100644
--- a/src/khiva/matrixInternal.cpp
+++ b/src/khiva/matrixInternal.cpp
@@ -71,15 +71,15 @@ void InitProfileMemory(SCAMP::SCAMPArgs &args) {
             }
             break;
         }
-        // case SCAMP::PROFILE_TYPE_SUM_THRESH: {
-        //    args.profile_a.data.emplace_back();
-        //    args.profile_a.data[0].double_value.resize(args.timeseries_a.size() - args.window + 1, 0);
-        //    if (args.has_b) {
-        //        args.profile_b.data.emplace_back();
-        //        args.profile_b.data[0].double_value.resize(args.timeseries_b.size() - args.window + 1, 0);
-        //    }
-        //    break;
-        //}
+        case SCAMP::PROFILE_TYPE_SUM_THRESH: {
+            args.profile_a.data.emplace_back();
+            args.profile_a.data[0].double_value.resize(args.timeseries_a.size() - args.window + 1, 0);
+            if (args.has_b) {
+                args.profile_b.data.emplace_back();
+                args.profile_b.data[0].double_value.resize(args.timeseries_b.size() - args.window + 1, 0);
+            }
+            break;
+        }
         default:
             break;
     }
@@ -128,6 +128,20 @@ MatrixProfilePair getProfileOutput(const SCAMP::Profile &p, uint64_t window) {
     return std::make_pair(std::move(distances), std::move(indexes));
 }
 
+SumCorrelationsVector getThreshProfileOutput(const SCAMP::Profile &p, uint64_t window) {
+    SumCorrelationsVector sumCorrelations;
+
+    const auto &arr = p.data[0].double_value;
+    sumCorrelations.resize(arr.size());
+
+    for (int i = 0; i < arr.size(); ++i) {
+        sumCorrelations[i] = arr[i];
+        // distances[i] = static_cast<double>(convertToEuclidean(e.floats[0], window));
+        // indexes[i] = (e.floats[0] < -1) ? -1 : e.ints[1];
+    }
+    return sumCorrelations;
+}
+
 void runScamp(SCAMP::SCAMPArgs &args) {
     std::vector<int> devices;
 #ifdef _HAS_CUDA_
@@ -176,6 +190,37 @@ MatrixProfilePair scamp(std::vector<double> &&ta, std::vector<double> &&tb, long
     return getProfileOutput(args.profile_a, args.window);
 }
 
+SumCorrelationsVector scampThresh(std::vector<double> &&tss, long m, double threshold) {
+    auto args = getDefaultArgs();
+    args.window = m;
+    args.has_b = false;
+    args.timeseries_a = std::move(tss);
+
+    args.distance_threshold = threshold;
+    args.profile_a.type = SCAMP::PROFILE_TYPE_SUM_THRESH;
+    args.profile_b.type = SCAMP::PROFILE_TYPE_SUM_THRESH;
+    args.precision_type = SCAMP::PRECISION_DOUBLE;
+    args.profile_type = SCAMP::PROFILE_TYPE_SUM_THRESH;
+    runScamp(args);
+    return getThreshProfileOutput(args.profile_a, args.window);
+}
+
+SumCorrelationsVector scampThresh(std::vector<double> &&ta, std::vector<double> &&tb, long m, double threshold) {
+    auto args = getDefaultArgs();
+    args.window = m;
+    args.has_b = true;
+    args.timeseries_a = std::move(ta);
+    args.timeseries_b = std::move(tb);
+
+    args.distance_threshold = threshold;
+    args.profile_a.type = SCAMP::PROFILE_TYPE_SUM_THRESH;
+    args.profile_b.type = SCAMP::PROFILE_TYPE_SUM_THRESH;
+    args.precision_type = SCAMP::PRECISION_DOUBLE;
+    args.profile_type = SCAMP::PROFILE_TYPE_SUM_THRESH;
+    runScamp(args);
+    return getThreshProfileOutput(args.profile_a, args.window);
+}
+
 void sortChains(ChainVector &chains) {
     chains.erase(std::remove_if(chains.begin(), chains.end(), [](const Chain &currChain) { return currChain.empty(); }),
                  chains.end());
@@ -457,6 +502,41 @@ void scamp(af::array ta, af::array tb, long m, af::array &profile, af::array &in
     }
 }
 
+void scampThresh(af::array tss, long m, double threshold, af::array &sumCorrelation) {
+    if (tss.dims(2) > 1 || tss.dims(3) > 1) {
+        throw std::invalid_argument("Dimension 2 o dimension 3 is bigger than 1");
+    }
+
+    sumCorrelation = af::array(tss.dims(0) - m + 1, tss.dims(1), f64);
+
+    tss = tss.as(f64);
+    for (dim_t tssIdx = 0; tssIdx < tss.dims(1); ++tssIdx) {
+        auto vect = khiva::vectorutil::get<double>(tss(af::span, tssIdx));
+        auto res = ::scampThresh(std::move(vect), m, threshold);
+        sumCorrelation(af::span, tssIdx) = khiva::vectorutil::createArray<double>(res);
+    }
+}
+
+void scampThresh(af::array ta, af::array tb, long m, double threshold, af::array &sumCorrelation) {
+    if (ta.dims(2) > 1 || ta.dims(3) > 1 || tb.dims(2) > 1 || tb.dims(3) > 1) {
+        throw std::invalid_argument("Dimension 2 o dimension 3 is bigger than 1");
+    }
+
+    sumCorrelation = af::array(tb.dims(0) - m + 1, ta.dims(1), tb.dims(1), f64);
+
+    ta = ta.as(f64);
+    tb = tb.as(f64);
+
+    for (dim_t tbIdx = 0; tbIdx < tb.dims(1); ++tbIdx) {
+        for (dim_t taIdx = 0; taIdx < ta.dims(1); ++taIdx) {
+            auto vectA = khiva::vectorutil::get<double>(ta(af::span, taIdx));
+            auto vectB = khiva::vectorutil::get<double>(tb(af::span, tbIdx));
+            auto res = ::scampThresh(std::move(vectB), std::move(vectA), m, threshold);
+            sumCorrelation(af::span, taIdx, tbIdx) = khiva::vectorutil::createArray<double>(res);
+        }
+    }
+}
+
 LeftRightProfilePair scampLR(std::vector<double> &&ta, long m) {
     auto args = getDefaultArgs();
     args.window = m;
diff --git a/src/scamp/CMakeLists.txt b/src/scamp/CMakeLists.txt
index 50ae443..ad48564 100644
--- a/src/scamp/CMakeLists.txt
+++ b/src/scamp/CMakeLists.txt
@@ -37,48 +37,62 @@ endif()
 
 # ----------------------------------------------------------------------------------------
 #                              ===== Build targets =====
+if(CMAKE_CUDA_COMPILER)
+  enable_language(CUDA)
+    # For CUFFT libraries
+    find_package(CUDA ${CMAKE_CUDA_VERSION} REQUIRED)
+    message("CUDA_LIBRARIES = ${CUDA_LIBRARIES}")
+    message("CUDA_CUFFT_LIBRARIES = ${CUDA_CUFFT_LIBRARIES}")
+    message("CUDA_cudart_LIBRARY = ${CUDA_cudart_LIBRARY}")
+endif()
 
 add_library(common SCAMP/src/common.cpp)
 
+if (CMAKE_CUDA_COMPILER) 
+  target_link_libraries(common ${CUDA_LIBRARIES})
+endif()
+
+add_library(qt_helper SCAMP/src/qt_helper.cpp)
+
 if (CMAKE_CUDA_COMPILER)
   add_library(qt_kernels SCAMP/src/qt_kernels.cu)
+  target_link_libraries(qt_helper ${CUDA_CUFFT_LIBRARIES} qt_kernels common)
   set_target_properties(qt_kernels
                       PROPERTIES
                       POSITION_INDEPENDENT_CODE ON
-					  COMPILE_FLAGS "${COMPILE_FLAGS} ${CUDA_GENCODE_FLAGS}") 
+					            COMPILE_FLAGS "${COMPILE_FLAGS} ${CUDA_GENCODE_FLAGS}")
+else()
+  target_link_libraries(qt_helper common)
+endif()
 
+add_library(kernel_common SCAMP/src/kernel_common.cpp)
 
+if (CMAKE_CUDA_COMPILER)
+  add_library(gpu_utils SCAMP/src/kernel_gpu_utils.cu)
+  target_link_libraries(gpu_utils kernel_common ${CUDA_LIBRARIES})
   add_library(gpu_kernels SCAMP/src/kernels.cu)
-  target_link_libraries(gpu_kernels common)
+  target_link_libraries(gpu_kernels gpu_utils kernel_common common)
   set_target_properties(gpu_kernels
                       PROPERTIES
                       POSITION_INDEPENDENT_CODE ON
 					  COMPILE_FLAGS "${COMPILE_FLAGS} ${CUDA_GENCODE_FLAGS}") 
-
 endif()
 
-add_library(qt_helper SCAMP/src/qt_helper.cpp) 
-target_link_libraries(qt_helper 
-					common
-					$<$<BOOL:${CMAKE_CUDA_COMPILER}>:cufft>
-					$<$<BOOL:${CMAKE_CUDA_COMPILER}>:qt_kernels>)
-
-
 add_library(cpu_stats SCAMP/src/cpu_stats.cpp)
 target_link_libraries(cpu_stats common)
 
 add_library(cpu_kernels SCAMP/src/cpu_kernels.cpp)
-target_link_libraries(cpu_kernels common)
+target_link_libraries(cpu_kernels kernel_common common)
 
 add_library(tile SCAMP/src/tile.cpp) 
 target_link_libraries(tile 
+          common
 					$<$<BOOL:${CMAKE_CUDA_COMPILER}>:gpu_kernels>
 					cpu_kernels 
 					qt_helper) 
 
-
 add_library(scamp SCAMP/src/SCAMP.cpp) 
-target_link_libraries(scamp PRIVATE tile cpu_stats)
+target_link_libraries(scamp PRIVATE tile cpu_stats common qt_helper)
 
 
 set_target_properties(scamp
@@ -95,7 +109,15 @@ set_target_properties(qt_helper
 
 set_target_properties(cpu_stats
                       PROPERTIES
-                      POSITION_INDEPENDENT_CODE ON) 
+                      POSITION_INDEPENDENT_CODE ON)
+
+set_target_properties(kernel_common
+                      PROPERTIES
+                      POSITION_INDEPENDENT_CODE ON)
+
+set_target_properties(gpu_utils
+                      PROPERTIES
+                      POSITION_INDEPENDENT_CODE ON)
 
 set_target_properties(cpu_kernels
                       PROPERTIES
diff --git a/src/scamp/SCAMP b/src/scamp/SCAMP
index 34a4981..978a58f 160000
--- a/src/scamp/SCAMP
+++ b/src/scamp/SCAMP
@@ -1 +1 @@
-Subproject commit 34a498197d1fc36d74408936866ffb634d43be65
+Subproject commit 978a58f7c4ee0a5d6346e0697177c7eb3a6424a0
diff --git a/test/matrixTest.cpp b/test/matrixTest.cpp
index 4696028..d7db7df 100644
--- a/test/matrixTest.cpp
+++ b/test/matrixTest.cpp
@@ -1251,6 +1251,61 @@ void findBestDiscordsException() {
     }
 }
 
+void matrixSumCorrelation() {
+    auto ta = khiva::vectorutil::createArray(
+        {-0.9247, 0.1808,  2.5441, 0.3516,  -0.3452, 0.2191, -0.7687, 0.2413, -1.1948, 0.8927, -0.5378, 0.2270,
+         0.9354,  -0.7613, 0.5787, -0.6174, 0.5889,  0.7897, -0.0645, 0.9520, -1.1411, 0.8281, -0.7363, -0.7446},
+        8, 3);
+
+    auto tb = khiva::vectorutil::createArray({0.2512, 0.6436, -2.3651, -0.7734, -0.0511, 1.6693, 1.9453, -1.9047,
+                                              0.8149, -0.1831, -0.1542, -1.3490, 1.2285, -1.0472, 0.3911, -0.0637},
+                                             8, 2);
+    long m = 3;
+    double threshold = 0.5;
+
+    af::array sumCorrelation;
+
+    khiva::matrix::matrixProfileThresh(ta, tb, m, threshold, sumCorrelation);
+
+    ASSERT_EQ(sumCorrelation.dims(0), 6);
+    ASSERT_EQ(sumCorrelation.dims(1), 3);
+    ASSERT_EQ(sumCorrelation.dims(2), 2);
+    ASSERT_EQ(sumCorrelation.dims(3), 1);
+
+    auto sumCorrelationVect = khiva::vectorutil::get<double>(sumCorrelation);
+    ASSERT_NEAR(1.9584, sumCorrelationVect[7], 1e-3);
+    ASSERT_NEAR(1.8904, sumCorrelationVect[17], 1e-3);
+    ASSERT_NEAR(2.1383, sumCorrelationVect[18], 1e-3);
+    ASSERT_NEAR(2.7268, sumCorrelationVect[27], 1e-3);
+    ASSERT_NEAR(0.8593, sumCorrelationVect[35], 1e-3);
+}
+
+void matrixSumCorrelationSelfJoin() {
+    auto ta = khiva::vectorutil::createArray(
+        {0.6010, 0.0278, 0.9806, 0.2126, 0.0655, 0.5497, 0.2864, 0.3410, 0.7509, 0.4105, 0.1583, 0.3712,
+         0.3543, 0.6450, 0.9675, 0.3636, 0.4165, 0.5814, 0.8962, 0.3712, 0.6755, 0.6105, 0.5232, 0.5567,
+         0.7896, 0.8966, 0.0536, 0.5775, 0.2908, 0.9941, 0.5143, 0.3670, 0.3336, 0.0363, 0.5349, 0.0123,
+         0.3988, 0.9787, 0.2308, 0.6244, 0.7917, 0.1654, 0.8657, 0.3766, 0.7331, 0.2522, 0.9644, 0.4711},
+        16, 3);
+
+    long m = 6;
+    double threshold = 0.5;
+
+    af::array sumCorrelation;
+
+    khiva::matrix::matrixProfileThresh(ta, m, threshold, sumCorrelation);
+
+    ASSERT_EQ(sumCorrelation.dims(0), 11);
+    ASSERT_EQ(sumCorrelation.dims(1), 3);
+    ASSERT_EQ(sumCorrelation.dims(2), 1);
+    ASSERT_EQ(sumCorrelation.dims(3), 1);
+
+    auto sumCorrelationVect = khiva::vectorutil::get<double>(sumCorrelation);
+    ASSERT_NEAR(0.8752, sumCorrelationVect[7], 1e-3);
+    ASSERT_NEAR(0.0, sumCorrelationVect[21], 1e-3);
+    ASSERT_NEAR(1.3062, sumCorrelationVect[25], 1e-3);
+}
+
 KHIVA_TEST(MatrixTests, SlidingDotProduct, slidingDotProduct)
 KHIVA_TEST(MatrixTests, MeanStdev, meanStdev)
 KHIVA_TEST(MatrixTests, MeanStdevMEqualsLength, meanStdevMEqualsLength)
@@ -1289,3 +1344,5 @@ KHIVA_TEST(MatrixTests, FindBestDiscordsMirror, findBestDiscordsMirror)
 KHIVA_TEST(MatrixTests, FindBestDiscordsConsecutive, findBestDiscordsConsecutive)
 KHIVA_TEST(MatrixTests, FindBestDiscordsMirrorException, findBestDiscordsMirrorException)
 KHIVA_TEST(MatrixTests, FindBestDiscordsException, findBestDiscordsException)
+KHIVA_TEST(MatrixTests, MatrixSumCorrelation, matrixSumCorrelation)
+KHIVA_TEST(MatrixTests, MatrixSumCorrelationSelfJoin, matrixSumCorrelationSelfJoin)