Cambridge-ICCS · jwallwork23 · Feb 20, 2025 · Apr 3, 2024 · May 6, 2024 · Oct 11, 2024
diff --git a/.github/workflows/static_analysis.yml b/.github/workflows/static_analysis.yml
@@ -98,8 +98,9 @@ jobs:
         if: always()
         run: |
           cd ${{ github.workspace }}
-          . ftorch_venv/bin/activate  # Uses .clang-tidy config file if present
-          fortitude check src/
+          . ftorch_venv/bin/activate
+          fortitude check --ignore=E001,T041 src/ftorch.F90
+          fortitude check src/ftorch_test_utils.f90
 
       # Apply C++ and C linter and formatter, clang
       # Configurable using the .clang-format and .clang-tidy config files if present
@@ -113,7 +114,7 @@ jobs:
           style: 'file'
           tidy-checks: ''
           # Use the compile_commands.json from CMake to locate headers
-          database: ${{ github.workspace }}/src/build
+          database: ${{ github.workspace }}/build
           # only 'update' a single comment in a pull request thread.
           thread-comments: ${{ github.event_name == 'pull_request' && 'update' }}
       - name: Fail fast?!

diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -22,15 +22,41 @@ set(CMAKE_CXX_STANDARD 17)
 set(CMAKE_CXX_STANDARD_REQUIRED ON)
 set(CMAKE_CXX_EXTENSIONS OFF)
 
+# Set GPU device type using consistent numbering as in PyTorch
+# https://github.com/pytorch/pytorch/blob/main/c10/core/DeviceType.h
+set(GPU_DEVICE_NONE 0)
+set(GPU_DEVICE_CUDA 1)
+set(GPU_DEVICE_XPU 12)
+set(GPU_DEVICE_MPS 13)
+option(GPU_DEVICE "Set the GPU device (NONE [default], CUDA, XPU, or MPS)" NONE)
+if("${GPU_DEVICE}" STREQUAL "OFF")
+  set(GPU_DEVICE NONE)
+endif()
+if("${GPU_DEVICE}" STREQUAL "NONE")
+  set(GPU_DEVICE_CODE ${GPU_DEVICE_NONE})
+elseif("${GPU_DEVICE}" STREQUAL "CUDA")
+  set(GPU_DEVICE_CODE ${GPU_DEVICE_CUDA})
+elseif("${GPU_DEVICE}" STREQUAL "XPU")
+  set(GPU_DEVICE_CODE ${GPU_DEVICE_XPU})
+elseif("${GPU_DEVICE}" STREQUAL "MPS")
+  set(GPU_DEVICE_CODE ${GPU_DEVICE_MPS})
+else()
+  message(SEND_ERROR "GPU_DEVICE '${GPU_DEVICE}' not recognised")
+endif()
+
+# Other GPU specific setup
 include(CheckLanguage)
-if(ENABLE_CUDA)
+if("${GPU_DEVICE}" STREQUAL "CUDA")
   check_language(CUDA)
   if(CMAKE_CUDA_COMPILER)
     enable_language(CUDA)
   else()
     message(WARNING "No CUDA support")
   endif()
 endif()
+if("${GPU_DEVICE}" STREQUAL "XPU")
+  set(CMAKE_Fortran_FLAGS "-fpscomp logicals ${CMAKE_Fortran_FLAGS}")
+endif()
 
 # Set RPATH behaviour
 set(CMAKE_SKIP_RPATH FALSE)
@@ -57,11 +83,17 @@ find_package(Torch REQUIRED)
 add_library(${LIB_NAME} SHARED src/ctorch.cpp src/ftorch.F90
                                src/ftorch_test_utils.f90)
 
-if(UNIX)
-  if(NOT APPLE) # only add definition for linux (not apple which is also unix)
-    target_compile_definitions(${LIB_NAME} PRIVATE UNIX)
-  endif()
+# Define compile definitions, including GPU devices
+set(COMPILE_DEFS "")
+if(UNIX AND NOT APPLE)
+  # only add UNIX definition for linux (not apple which is also unix)
+  set(COMPILE_DEFS UNIX)
 endif()
+target_compile_definitions(
+  ${LIB_NAME}
+  PRIVATE ${COMPILE_DEFS} GPU_DEVICE=${GPU_DEVICE_CODE}
+          GPU_DEVICE_NONE=${GPU_DEVICE_NONE} GPU_DEVICE_CUDA=${GPU_DEVICE_CUDA}
+          GPU_DEVICE_XPU=${GPU_DEVICE_XPU} GPU_DEVICE_MPS=${GPU_DEVICE_MPS})
 
 # Add an alias FTorch::ftorch for the library
 add_library(${PROJECT_NAME}::${LIB_NAME} ALIAS ${LIB_NAME})

diff --git a/README.md b/README.md
@@ -165,7 +165,7 @@ To build and install the library:
     | [`CMAKE_INSTALL_PREFIX`](https://cmake.org/cmake/help/latest/variable/CMAKE_INSTALL_PREFIX.html)  | `</path/to/install/lib/at/>` | Location at which the library files should be installed. By default this is `/usr/local` |
     | [`CMAKE_BUILD_TYPE`](https://cmake.org/cmake/help/latest/variable/CMAKE_BUILD_TYPE.html)          | `Release` / `Debug`          | Specifies build type. The default is `Debug`, use `Release` for production code|
     | `CMAKE_BUILD_TESTS`                                                                               | `TRUE` / `FALSE`             | Specifies whether to compile FTorch's [test suite](https://cambridge-iccs.github.io/FTorch/page/testing.html) as part of the build. |
-    | `ENABLE_CUDA`                                                                                     | `TRUE` / `FALSE`             | Specifies whether to check for and enable CUDA<sup>3</sup> |
+    | `GPU_DEVICE` | `NONE` / `CUDA` / `XPU` / `MPS` | Specifies the target GPU architecture (if any) <sup>3</sup> |
 
     <sup>1</sup> _On Windows this may need to be the full path to the compiler if CMake cannot locate it by default._  
 

diff --git a/conda/README.md b/conda/README.md
@@ -38,7 +38,7 @@ cmake \
     -DCMAKE_INSTALL_PREFIX=$CONDA_PREFIX \
     -DCMAKE_PREFIX_PATH=$(python -c 'import torch;print(torch.utils.cmake_prefix_path)') \
     -DCMAKE_BUILD_TYPE=Release \
-    -DENABLE_CUDA=FALSE \
+    -DGPU_DEVICE=NONE \
     ..
 cmake --build . --target install
 ```
@@ -65,7 +65,7 @@ cmake \
     -DCMAKE_INSTALL_PREFIX=$CONDA_PREFIX \
     -DCMAKE_PREFIX_PATH=$(python -c 'import torch;print(torch.utils.cmake_prefix_path)') \
     -DCMAKE_BUILD_TYPE=Release \
-    -DENABLE_CUDA=TRUE \
+    -DGPU_DEVICE=CUDA \
     -DCUDA_TOOLKIT_ROOT_DIR=$CONDA_PREFIX/targets/x86_64-linux \
     -Dnvtx3_dir=$CONDA_PREFIX/targets/x86_64-linux/include/nvtx3 \
     ..

diff --git a/examples/1_SimpleNet/CMakeLists.txt b/examples/1_SimpleNet/CMakeLists.txt
@@ -33,18 +33,16 @@ if(CMAKE_BUILD_TESTS)
   #   pt2ts.py script
   add_test(
     NAME pt2ts
-    COMMAND ${Python_EXECUTABLE} ${PROJECT_SOURCE_DIR}/pt2ts.py
-            ${PROJECT_BINARY_DIR} # Command line argument: filepath for saving
-                                  # the model
+    COMMAND ${Python_EXECUTABLE} ${PROJECT_SOURCE_DIR}/pt2ts.py --filepath
+            ${PROJECT_BINARY_DIR}
     WORKING_DIRECTORY ${PROJECT_BINARY_DIR})
 
   # 3. Check the model can be loaded from file and run in Python and that its
   #   outputs meet expectations
   add_test(
     NAME simplenet_infer_python
     COMMAND ${Python_EXECUTABLE} ${PROJECT_SOURCE_DIR}/simplenet_infer_python.py
-            ${PROJECT_BINARY_DIR} # Command line argument: filepath to find the
-                                  # model
+            --filepath ${PROJECT_BINARY_DIR}
     WORKING_DIRECTORY ${PROJECT_BINARY_DIR})
 
   # 4. Check the model can be loaded from file and run in Fortran and that its

diff --git a/examples/1_SimpleNet/pt2ts.py b/examples/1_SimpleNet/pt2ts.py
@@ -72,6 +72,28 @@ def load_torchscript(filename: Optional[str] = "saved_model.pt") -> torch.nn.Mod
 
 
 if __name__ == "__main__":
+    import argparse
+
+    parser = argparse.ArgumentParser(
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+    )
+    parser.add_argument(
+        "--device_type",
+        help="Device type to run the inference on",
+        type=str,
+        choices=["cpu", "cuda", "xpu", "mps"],
+        default="cpu",
+    )
+    parser.add_argument(
+        "--filepath",
+        help="Path to the file containing the PyTorch model",
+        type=str,
+        default=os.path.dirname(__file__),
+    )
+    parsed_args = parser.parse_args()
+    device_type = parsed_args.device_type
+    filepath = parsed_args.filepath
+
     # =====================================================
     # Load model and prepare for saving
     # =====================================================
@@ -97,12 +119,12 @@ def load_torchscript(filename: Optional[str] = "saved_model.pt") -> torch.nn.Mod
     # This example assumes one input of size (5)
     trained_model_dummy_input = torch.ones(5)
 
-    # FPTLIB-TODO
-    # Uncomment the following lines to save for inference on GPU (rather than CPU):
-    # device = torch.device('cuda')
-    # trained_model = trained_model.to(device)
-    # trained_model.eval()
-    # trained_model_dummy_input = trained_model_dummy_input.to(device)
+    # Transfer the model and inputs to GPU device, if appropriate
+    if device_type != "cpu":
+        device = torch.device(device_type)
+        trained_model = trained_model.to(device)
+        trained_model.eval()
+        trained_model_dummy_input = trained_model_dummy_input.to(device)
 
     # FPTLIB-TODO
     # Run model for dummy inputs
@@ -117,7 +139,7 @@ def load_torchscript(filename: Optional[str] = "saved_model.pt") -> torch.nn.Mod
 
     # FPTLIB-TODO
     # Set the name of the file you want to save the torchscript model to:
-    saved_ts_filename = "saved_simplenet_model_cpu.pt"
+    saved_ts_filename = f"saved_simplenet_model_{device_type}.pt"
     # A filepath may also be provided. To do this, pass the filepath as an argument to
     # this script when it is run from the command line, i.e. `./pt2ts.py path/to/model`.
 
@@ -141,9 +163,7 @@ def load_torchscript(filename: Optional[str] = "saved_model.pt") -> torch.nn.Mod
     # Check model saved OK
     # =====================================================
 
-    # Load torchscript and run model as a test
-    # FPTLIB-TODO
-    # Scale inputs as above and, if required, move inputs and mode to GPU
+    # Load torchscript and run model as a test, scaling inputs as above
     trained_model_dummy_input = 2.0 * trained_model_dummy_input
     trained_model_testing_outputs = trained_model(
         trained_model_dummy_input,
@@ -169,7 +189,6 @@ def load_torchscript(filename: Optional[str] = "saved_model.pt") -> torch.nn.Mod
             raise RuntimeError(model_error)
 
     # Check that the model file is created
-    filepath = os.path.dirname(__file__) if len(sys.argv) == 1 else sys.argv[1]
     if not os.path.exists(os.path.join(filepath, saved_ts_filename)):
         torchscript_file_error = (
             f"Saved TorchScript file {os.path.join(filepath, saved_ts_filename)} "

diff --git a/examples/1_SimpleNet/simplenet_infer_python.py b/examples/1_SimpleNet/simplenet_infer_python.py
@@ -49,7 +49,19 @@ def deploy(saved_model: str, device: str, batch_size: int = 1) -> torch.Tensor:
 
 
 if __name__ == "__main__":
-    filepath = os.path.dirname(__file__) if len(sys.argv) == 1 else sys.argv[1]
+    import argparse
+
+    parser = argparse.ArgumentParser(
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+    )
+    parser.add_argument(
+        "--filepath",
+        help="Path to the file containing the PyTorch model",
+        type=str,
+        default=os.path.dirname(__file__),
+    )
+    parsed_args = parser.parse_args()
+    filepath = parsed_args.filepath
     saved_model_file = os.path.join(filepath, "saved_simplenet_model_cpu.pt")
 
     device_to_run = "cpu"

diff --git a/examples/2_ResNet18/CMakeLists.txt b/examples/2_ResNet18/CMakeLists.txt
@@ -35,9 +35,8 @@ if(CMAKE_BUILD_TESTS)
   #   pt2ts.py script
   add_test(
     NAME pt2ts
-    COMMAND ${Python_EXECUTABLE} ${PROJECT_SOURCE_DIR}/pt2ts.py
-            ${PROJECT_BINARY_DIR} # Command line argument: filepath for saving
-                                  # the model
+    COMMAND ${Python_EXECUTABLE} ${PROJECT_SOURCE_DIR}/pt2ts.py --filepath
+            ${PROJECT_BINARY_DIR}
     WORKING_DIRECTORY ${PROJECT_BINARY_DIR})
 
   # 3. Check the model can be loaded from file and run in Fortran and that its

diff --git a/examples/2_ResNet18/pt2ts.py b/examples/2_ResNet18/pt2ts.py
@@ -75,6 +75,28 @@ def load_torchscript(filename: Optional[str] = "saved_model.pt") -> torch.nn.Mod
 
 
 if __name__ == "__main__":
+    import argparse
+
+    parser = argparse.ArgumentParser(
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+    )
+    parser.add_argument(
+        "--device_type",
+        help="Device type to run the inference on",
+        type=str,
+        choices=["cpu", "cuda", "xpu", "mps"],
+        default="cpu",
+    )
+    parser.add_argument(
+        "--filepath",
+        help="Path to the file containing the PyTorch model",
+        type=str,
+        default=os.path.dirname(__file__),
+    )
+    parsed_args = parser.parse_args()
+    device_type = parsed_args.device_type
+    filepath = parsed_args.filepath
+
     # =====================================================
     # Load model and prepare for saving
     # =====================================================
@@ -103,12 +125,12 @@ def load_torchscript(filename: Optional[str] = "saved_model.pt") -> torch.nn.Mod
     # of resolution 244x244 in a batch size of 1.
     trained_model_dummy_input = torch.ones(1, 3, 224, 224)
 
-    # FPTLIB-TODO
-    # Uncomment the following lines to save for inference on GPU (rather than CPU):
-    # device = torch.device('cuda')
-    # trained_model = trained_model.to(device)
-    # trained_model.eval()
-    # trained_model_dummy_input = trained_model_dummy_input.to(device)
+    # Transfer the model and inputs to GPU device, if appropriate
+    if device_type != "cpu":
+        device = torch.device(device_type)
+        trained_model = trained_model.to(device)
+        trained_model.eval()
+        trained_model_dummy_input = trained_model_dummy_input.to(device)
 
     # FPTLIB-TODO
     # Run model for dummy inputs
@@ -123,7 +145,7 @@ def load_torchscript(filename: Optional[str] = "saved_model.pt") -> torch.nn.Mod
 
     # FPTLIB-TODO
     # Set the name of the file you want to save the torchscript model to:
-    saved_ts_filename = "saved_resnet18_model_cpu.pt"
+    saved_ts_filename = f"saved_resnet18_model_{device_type}.pt"
     # A filepath may also be provided. To do this, pass the filepath as an argument to
     # this script when it is run from the command line, i.e. `./pt2ts.py path/to/model`.
 
@@ -147,9 +169,7 @@ def load_torchscript(filename: Optional[str] = "saved_model.pt") -> torch.nn.Mod
     # Check model saved OK
     # =====================================================
 
-    # Load torchscript and run model as a test
-    # FPTLIB-TODO
-    # Scale inputs as above and, if required, move inputs and mode to GPU
+    # Load torchscript and run model as a test, scaling inputs as above
     trained_model_dummy_input = 2.0 * trained_model_dummy_input
     trained_model_testing_outputs = trained_model(
         trained_model_dummy_input,
@@ -175,7 +195,6 @@ def load_torchscript(filename: Optional[str] = "saved_model.pt") -> torch.nn.Mod
             raise RuntimeError(model_error)
 
     # Check that the model file is created
-    filepath = os.path.dirname(__file__) if len(sys.argv) == 1 else sys.argv[1]
     if not os.path.exists(os.path.join(filepath, saved_ts_filename)):
         torchscript_file_error = (
             f"Saved TorchScript file {os.path.join(filepath, saved_ts_filename)} "

diff --git a/examples/2_ResNet18/resnet_infer_python.py b/examples/2_ResNet18/resnet_infer_python.py
@@ -78,11 +78,22 @@ def check_results(output: torch.Tensor) -> None:
 
 
 if __name__ == "__main__":
-    filepath = os.path.dirname(__file__) if len(sys.argv) == 1 else sys.argv[1]
+    import argparse
+
+    parser = argparse.ArgumentParser(
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+    )
+    parser.add_argument(
+        "--filepath",
+        help="Path to the file containing the PyTorch model",
+        type=str,
+        default=os.path.dirname(__file__),
+    )
+    parsed_args = parser.parse_args()
+    filepath = parsed_args.filepath
     saved_model_file = os.path.join(filepath, "saved_resnet18_model_cpu.pt")
 
     device_to_run = "cpu"
-    # device_to_run = "cuda"
 
     batch_size_to_run = 1