diff --git a/cpp/.clang-format b/cpp/.clang-format
new file mode 100644
index 00000000..1bad48e6
--- /dev/null
+++ b/cpp/.clang-format
@@ -0,0 +1,5 @@
+---
+BasedOnStyle: LLVM
+IndentWidth: 2
+ColumnLimit: 100
+---
diff --git a/cpp/.gitignore b/cpp/.gitignore
new file mode 100644
index 00000000..5a0f77a7
--- /dev/null
+++ b/cpp/.gitignore
@@ -0,0 +1,4 @@
+*.png
+libtorch/
+*build*/
+*.zip
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
new file mode 100644
index 00000000..9a72dbd8
--- /dev/null
+++ b/cpp/CMakeLists.txt
@@ -0,0 +1,25 @@
+cmake_minimum_required(VERSION 3.0 FATAL_ERROR)
+project(superglue)
+
+if (NOT CMAKE_BUILD_TYPE)
+    message(STATUS "No build type selected, default to Release")
+    set(CMAKE_BUILD_TYPE "Release")
+endif()
+
+find_package(Torch REQUIRED)
+find_package(OpenCV REQUIRED)
+
+add_executable(superglue superglue.cpp)
+target_link_libraries(superglue ${TORCH_LIBRARIES} ${OpenCV_LIBS})
+include_directories(${OpenCV_INCLUDE_DIRS} include)
+
+set_property(TARGET superglue PROPERTY CXX_STANDARD 17)
+set(CMAKE_CXX_FLAGS "-Wall -Wextra ${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS}")
+set(CMAKE_CXX_FLAGS_DEBUG "-ggdb3")
+set(CMAKE_CXX_FLAGS_RELEASE "-O3")
+
+file(COPY
+  ${CMAKE_CURRENT_SOURCE_DIR}/SuperPoint.zip
+  ${CMAKE_CURRENT_SOURCE_DIR}/SuperGlue.zip
+  DESTINATION ${CMAKE_BINARY_DIR}
+)
diff --git a/cpp/README.md b/cpp/README.md
new file mode 100644
index 00000000..c29725b8
--- /dev/null
+++ b/cpp/README.md
@@ -0,0 +1,31 @@
+## SuperGlue C++ Demo
+
+### Building
+
+First, generate [TorchScript](https://pytorch.org/tutorials/advanced/cpp_export.html) module files
+of SuperPoint and SuperGlue by JIT-ing the annotated model definitions.
+
+```bash
+$ python3 ../jit.py
+```
+
+This should output `SuperPoint.zip` and `SuperGlue.zip`.
+
+Building the demo project requires `libtorch` and OpenCV 3+. Follow the instructions in
+[*Installing C++ Distributions of PyTorch*](https://pytorch.org/cppdocs/installing.html) for `libtorch` setup.
+
+Create a build directory and configure CMake.
+
+```bash
+$ mkdir build && cd build
+$ cmake .. -DCMAKE_PREFIX_PATH=<libtorch path>
+$ make
+```
+
+### Usage
+
+```.env
+$ ./superglue <image0> <image1> <downscaled_width>
+```
+
+This will measure the average FPS over 50 iterations and outputs `matches.png` with a visualization of the detected keypoints and matches.
\ No newline at end of file
diff --git a/cpp/io.h b/cpp/io.h
new file mode 100644
index 00000000..4dcc1888
--- /dev/null
+++ b/cpp/io.h
@@ -0,0 +1,27 @@
+#pragma once
+
+#include <torch/torch.h>
+
+#include <opencv2/core.hpp>
+#include <opencv2/imgcodecs.hpp>
+#include <opencv2/imgproc.hpp>
+
+#include <string>
+
+torch::Tensor read_image(const std::string &path, int target_width) {
+  cv::Mat image = cv::imread(path, cv::IMREAD_GRAYSCALE);
+  int target_height = std::lround((float)target_width / image.cols * image.rows);
+  image.convertTo(image, CV_32F, 1.0f / 255.0f);
+  cv::resize(image, image, {target_width, target_height});
+
+  torch::Tensor tensor = torch::from_blob(image.data, {1, 1, image.rows, image.cols},
+                                          torch::TensorOptions().dtype(torch::kFloat32));
+  return tensor.clone();
+}
+
+cv::Mat tensor2mat(torch::Tensor tensor) {
+  tensor = tensor.to(torch::kCPU).contiguous();
+  cv::Mat mat(tensor.size(-2), tensor.size(-1), CV_32F);
+  std::memcpy((void *)mat.data, tensor.data_ptr(), sizeof(float) * tensor.numel());
+  return mat;
+}
\ No newline at end of file
diff --git a/cpp/superglue.cpp b/cpp/superglue.cpp
new file mode 100644
index 00000000..3f5ecd6c
--- /dev/null
+++ b/cpp/superglue.cpp
@@ -0,0 +1,105 @@
+#include <torch/script.h>
+#include <torch/torch.h>
+
+#include <chrono>
+#include <filesystem>
+#include <iostream>
+#include <utility>
+
+#include "io.h"
+#include "viz.h"
+
+using namespace torch;
+using namespace torch::indexing;
+namespace fs = std::filesystem;
+
+std::tuple<torch::Tensor, torch::Tensor, torch::Tensor> unpack_result(const IValue &result) {
+  auto dict = result.toGenericDict();
+  return {dict.at("keypoints").toTensorVector()[0], //
+          dict.at("scores").toTensorVector()[0],    //
+          dict.at("descriptors").toTensorVector()[0]};
+}
+
+torch::Dict<std::string, Tensor> toTensorDict(const torch::IValue &value) {
+  return c10::impl::toTypedDict<std::string, Tensor>(value.toGenericDict());
+}
+
+int main(int argc, const char *argv[]) {
+  if (argc <= 3) {
+    std::cerr << "Usage:" << std::endl;
+    std::cerr << argv[0] << " <image0> <image1> <downscaled_width>" << std::endl;
+    return 1;
+  }
+
+  torch::manual_seed(1);
+  torch::autograd::GradMode::set_enabled(false);
+
+  torch::Device device(torch::kCPU);
+  if (torch::cuda::is_available()) {
+    std::cout << "CUDA is available! Training on GPU." << std::endl;
+    device = torch::Device(torch::kCUDA);
+  }
+
+  int target_width = std::stoi(argv[3]);
+  Tensor image0 = read_image(std::string(argv[1]), target_width).to(device);
+  Tensor image1 = read_image(std::string(argv[2]), target_width).to(device);
+
+  // Look for the TorchScript module files in the executable directory
+  auto executable_dir = fs::weakly_canonical(fs::path(argv[0])).parent_path();
+  auto module_path = executable_dir / "SuperPoint.zip";
+  if (!fs::exists(module_path)) {
+    std::cerr << "Could not find the TorchScript module file " << module_path << std::endl;
+    return 1;
+  }
+  torch::jit::script::Module superpoint = torch::jit::load(module_path);
+  superpoint.eval();
+  superpoint.to(device);
+
+  module_path = executable_dir / "SuperGlue.zip";
+  if (!fs::exists(module_path)) {
+    std::cerr << "Could not find the TorchScript module file " << module_path << std::endl;
+    return 1;
+  }
+  torch::jit::script::Module superglue = torch::jit::load(module_path);
+  superglue.eval();
+  superglue.to(device);
+
+  int N = 50;
+  using namespace std::chrono;
+  auto t0 = high_resolution_clock::now();
+  Tensor keypoints0, scores0, descriptors0;
+  Tensor keypoints1, scores1, descriptors1;
+  torch::Dict<std::string, Tensor> pred;
+  for (int i = 0; i < N; ++i) {
+    std::tie(keypoints0, scores0, descriptors0) = unpack_result(superpoint.forward({image0}));
+    std::tie(keypoints1, scores1, descriptors1) = unpack_result(superpoint.forward({image1}));
+
+    torch::Dict<std::string, Tensor> input;
+    input.insert("image0", image0);
+    input.insert("image1", image1);
+    input.insert("keypoints0", keypoints0.unsqueeze(0));
+    input.insert("keypoints1", keypoints1.unsqueeze(0));
+    input.insert("scores0", scores0.unsqueeze(0));
+    input.insert("scores1", scores1.unsqueeze(0));
+    input.insert("descriptors0", descriptors0.unsqueeze(0));
+    input.insert("descriptors1", descriptors1.unsqueeze(0));
+    pred = toTensorDict(superglue.forward({input}));
+  }
+  double period = duration_cast<duration<double>>(high_resolution_clock::now() - t0).count() / N;
+  std::cout << period * 1e3 << " ms, FPS: " << 1 / period << std::endl;
+
+  auto matches = pred.at("matches0")[0];
+  auto valid = at::nonzero(matches > -1).squeeze();
+  auto mkpts0 = keypoints0.index_select(0, valid);
+  auto mkpts1 = keypoints1.index_select(0, matches.index_select(0, valid));
+  auto confidence = pred.at("matching_scores0")[0].index_select(0, valid);
+
+  std::cout << "Image #0 keypoints: " << keypoints0.size(0) << std::endl;
+  std::cout << "Image #1 keypoints: " << keypoints1.size(0) << std::endl;
+  std::cout << "Valid match count: " << valid.size(0) << std::endl;
+
+  cv::Mat plot =
+      make_matching_plot_fast(image0, image1, keypoints0, keypoints1, mkpts0, mkpts1, confidence);
+  cv::imwrite("matches.png", plot);
+  std::cout << "Done! Created matches.png for visualization." << std::endl;
+}
diff --git a/cpp/viz.h b/cpp/viz.h
new file mode 100644
index 00000000..f13436ad
--- /dev/null
+++ b/cpp/viz.h
@@ -0,0 +1,80 @@
+#pragma once
+
+#include <torch/torch.h>
+
+#include <opencv2/core.hpp>
+#include <opencv2/highgui.hpp>
+#include <opencv2/imgcodecs.hpp>
+#include <opencv2/imgproc.hpp>
+
+#include "io.h"
+
+cv::Mat draw_keypoints(const torch::Tensor &img, const torch::Tensor &keypoints) {
+  cv::Mat out = tensor2mat(img);
+  out.convertTo(out, CV_8U, 255.0f);
+  cv::cvtColor(out, out, cv::COLOR_GRAY2BGR);
+  for (int i = 0; i < keypoints.size(0); ++i) {
+    auto kp = keypoints[i];
+    cv::Point p(std::roundl(kp[0].item<float>()), std::roundl(kp[1].item<float>()));
+    cv::circle(out, p, 2, {0, 0, 255}, -1, cv::LINE_AA);
+  }
+  return out;
+}
+
+cv::Mat make_matching_plot_fast(const torch::Tensor &image0, const torch::Tensor &image1,
+                                const torch::Tensor &kpts0, const torch::Tensor &kpts1,
+                                const torch::Tensor &mkpts0, const torch::Tensor &mkpts1,
+                                const torch::Tensor &confidence, bool show_keypoints = true,
+                                int margin = 10) {
+  cv::Mat imgmat0 = tensor2mat(image0);
+  imgmat0.convertTo(imgmat0, CV_8U, 255.0f);
+  cv::Mat imgmat1 = tensor2mat(image1);
+  imgmat1.convertTo(imgmat1, CV_8U, 255.0f);
+
+  if (show_keypoints) {
+    const cv::Scalar white(255, 255, 255);
+    const cv::Scalar black(0, 0, 0);
+    for (int i = 0; i < kpts0.size(0); ++i) {
+      auto kp = kpts0[i];
+      cv::Point pt(std::lround(kp[0].item<float>()), std::lround(kp[1].item<float>()));
+      cv::circle(imgmat0, pt, 2, black, -1, cv::LINE_AA);
+      cv::circle(imgmat0, pt, 1, white, -1, cv::LINE_AA);
+    }
+    for (int i = 0; i < kpts1.size(0); ++i) {
+      auto kp = kpts1[i];
+      cv::Point pt(std::lround(kp[0].item<float>()), std::lround(kp[1].item<float>()));
+      cv::circle(imgmat1, pt, 2, black, -1, cv::LINE_AA);
+      cv::circle(imgmat1, pt, 1, white, -1, cv::LINE_AA);
+    }
+  }
+
+  int H0 = imgmat0.rows, W0 = imgmat0.cols;
+  int H1 = imgmat1.rows, W1 = imgmat1.cols;
+  int H = std::max(H0, H1), W = W0 + W1 + margin;
+
+  cv::Mat out = 255 * cv::Mat::ones(H, W, CV_8U);
+  imgmat0.copyTo(out.rowRange(0, H0).colRange(0, W0));
+  imgmat1.copyTo(out.rowRange(0, H1).colRange(W0 + margin, W));
+  cv::cvtColor(out, out, cv::COLOR_GRAY2BGR);
+
+  // Apply colormap to confidences
+  cv::Mat conf_mat = tensor2mat(confidence.unsqueeze(0));
+  conf_mat.convertTo(conf_mat, CV_8U, 255.0f);
+  cv::Mat colors;
+  cv::applyColorMap(conf_mat, colors, cv::COLORMAP_JET);
+
+  int n = std::min(mkpts0.size(0), mkpts1.size(0));
+  for (int i = 0; i < n; ++i) {
+    auto kp0 = mkpts0[i];
+    auto kp1 = mkpts1[i];
+    cv::Point pt0(std::lround(kp0[0].item<float>()), std::lround(kp0[1].item<float>()));
+    cv::Point pt1(std::lround(kp1[0].item<float>()), std::lround(kp1[1].item<float>()));
+    auto c = colors.at<cv::Vec3b>({i, 0});
+    cv::line(out, pt0, {pt1.x + margin + W0, pt1.y}, c, 1, cv::LINE_AA);
+    // display line end-points as circles
+    cv::circle(out, pt0, 2, c, -1, cv::LINE_AA);
+    cv::circle(out, {pt1.x + margin + W0, pt1.y}, 2, c, -1, cv::LINE_AA);
+  }
+
+  return out;
+}
diff --git a/jit.py b/jit.py
index 5735d15a..2346e9f6 100644
--- a/jit.py
+++ b/jit.py
@@ -1,6 +1,9 @@
-from models.superpoint import SuperPoint
-from models.superglue import SuperGlue
 import torch
 
-torch.jit.save(SuperPoint({}), 'SuperPoint.zip')
-torch.jit.save(SuperGlue({'weights': 'outdoor'}), 'SuperGlue.zip')
+from models.superglue import SuperGlue
+from models.superpoint import SuperPoint
+
+superpoint = SuperPoint({}).eval()
+superglue = SuperGlue({'weights': 'outdoor'}).eval()
+torch.jit.save(superpoint, 'SuperPoint.zip')
+torch.jit.save(superglue, 'SuperGlue.zip')