Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

XPU and MPS take 3 #276

Merged
merged 43 commits into from
Feb 20, 2025
Merged
Show file tree
Hide file tree
Changes from 29 commits
Commits
Show all changes
43 commits
Select commit Hold shift + click to select a range
f3a1914
Add MacOS GPU device option
ElliottKasoar Apr 3, 2024
42bcd61
Add XPU device option
ElliottKasoar May 6, 2024
7039084
Update C++ XPU interface to handle multiple devices indices.
jatkinson1000 Oct 11, 2024
2a30f6a
Update ftorch.F90 for XPU support
ma595 Dec 20, 2024
f58fa92
Make device enums consistent with PyTorch
jwallwork23 Feb 6, 2025
0e5e56e
Add ENABLE_XPU option for CMakeLists
jwallwork23 Feb 6, 2025
32a2b42
Move towards generalising device type in MultiGPU example
jwallwork23 Feb 6, 2025
1c47839
Accept command line arguments in MultiGPU example
jwallwork23 Feb 6, 2025
9adf424
Account for MPS
jwallwork23 Feb 6, 2025
1cb8cfd
Account for XPU in MultiGPU README
jwallwork23 Feb 6, 2025
999e1c8
Lint
jwallwork23 Feb 6, 2025
47e4276
Fix argparse syntax
jwallwork23 Feb 7, 2025
a422df3
Pre-processing for different GPU devices
jwallwork23 Feb 7, 2025
017f36e
Add mps to options for MultiGPU simplenet
jwallwork23 Feb 10, 2025
00e6a36
Introduce GPU_DEVICE preprocessor option
jwallwork23 Feb 10, 2025
ba99847
CMake lint
jwallwork23 Feb 10, 2025
978a2ad
GPU_DEVICE OFF implies NONE
jwallwork23 Feb 10, 2025
240c3cc
Better handling of GPU device
jwallwork23 Feb 10, 2025
5be69f0
CMake lint
jwallwork23 Feb 10, 2025
ece46ca
Use GPU_DEVICE in unit tests, too
jwallwork23 Feb 10, 2025
f4205ca
Add lint ignores for ftorch.F90
jwallwork23 Feb 10, 2025
5873eb7
Update pt2ts scripts; use argparse over argv
jwallwork23 Feb 10, 2025
61e054e
Update GPU docs
jwallwork23 Feb 10, 2025
7de01ec
Update READMEs
jwallwork23 Feb 10, 2025
50bd953
Add subsection on command line args to utils README
jwallwork23 Feb 12, 2025
838f3ae
Add explanation of GPU device codes in dev docs
jwallwork23 Feb 12, 2025
c863cc7
Tweak example 3 to run on Mps devices. Bugfix in README for device ar…
jatkinson1000 Feb 13, 2025
85c4723
Typographical changes from @jwallwork23's review
jatkinson1000 Feb 13, 2025
8ff6be2
Fix build path static analysis
jwallwork23 Feb 17, 2025
0897b20
Respond to @ElliotKasoar review
jwallwork23 Feb 18, 2025
9b3db7d
Apply suggestions from @jatkinson1000 code review
jwallwork23 Feb 19, 2025
b3ad229
Drop intel-specific compiler flag
jwallwork23 Feb 19, 2025
d87766b
Make README footnote more general
jwallwork23 Feb 19, 2025
1f4816f
CMake lint
jwallwork23 Feb 19, 2025
0cb0208
Add note on passing device codes via pre-processor
jwallwork23 Feb 19, 2025
cf80689
Make CMake docs footnote more general
jwallwork23 Feb 19, 2025
0ce326d
Add filepath arg for multigpu_infer_python [skip ci]
jwallwork23 Feb 19, 2025
e5962af
Asserts for MultiGPU example
jwallwork23 Feb 19, 2025
03054de
MPS tests; consistent args ordering
jwallwork23 Feb 19, 2025
69a4f45
Python lint
jwallwork23 Feb 19, 2025
7d1f098
Test fixes; drop unnecessary imports
jwallwork23 Feb 19, 2025
29fc909
Add missing filepath pass [skip ci]
jwallwork23 Feb 19, 2025
ac00fb3
Fix expected value in Python, too [skip ci]
jwallwork23 Feb 19, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions .github/workflows/static_analysis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -98,8 +98,9 @@ jobs:
if: always()
run: |
cd ${{ github.workspace }}
. ftorch_venv/bin/activate # Uses .clang-tidy config file if present
fortitude check src/
. ftorch_venv/bin/activate
fortitude check --ignore=E001,T041 src/ftorch.F90
fortitude check src/ftorch_test_utils.f90

# Apply C++ and C linter and formatter, clang
# Configurable using the .clang-format and .clang-tidy config files if present
Expand All @@ -113,7 +114,7 @@ jobs:
style: 'file'
tidy-checks: ''
# Use the compile_commands.json from CMake to locate headers
database: ${{ github.workspace }}/src/build
database: ${{ github.workspace }}/build
# only 'update' a single comment in a pull request thread.
thread-comments: ${{ github.event_name == 'pull_request' && 'update' }}
- name: Fail fast?!
Expand Down
42 changes: 37 additions & 5 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -22,15 +22,41 @@ set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_CXX_EXTENSIONS OFF)

# Set GPU device type using consistent numbering as in PyTorch
# https://github.com/pytorch/pytorch/blob/main/c10/core/DeviceType.h
set(GPU_DEVICE_NONE 0)
set(GPU_DEVICE_CUDA 1)
set(GPU_DEVICE_XPU 12)
set(GPU_DEVICE_MPS 13)
option(GPU_DEVICE "Set the GPU device (NONE [default], CUDA, XPU, or MPS)" NONE)
if("${GPU_DEVICE}" STREQUAL "OFF")
set(GPU_DEVICE NONE)
endif()
if("${GPU_DEVICE}" STREQUAL "NONE")
set(GPU_DEVICE_CODE ${GPU_DEVICE_NONE})
elseif("${GPU_DEVICE}" STREQUAL "CUDA")
set(GPU_DEVICE_CODE ${GPU_DEVICE_CUDA})
elseif("${GPU_DEVICE}" STREQUAL "XPU")
set(GPU_DEVICE_CODE ${GPU_DEVICE_XPU})
elseif("${GPU_DEVICE}" STREQUAL "MPS")
set(GPU_DEVICE_CODE ${GPU_DEVICE_MPS})
else()
message(SEND_ERROR "GPU_DEVICE '${GPU_DEVICE}' not recognised")
endif()

# Other GPU specific setup
include(CheckLanguage)
if(ENABLE_CUDA)
if("${GPU_DEVICE}" STREQUAL "CUDA")
check_language(CUDA)
if(CMAKE_CUDA_COMPILER)
enable_language(CUDA)
else()
message(WARNING "No CUDA support")
endif()
endif()
if("${GPU_DEVICE}" STREQUAL "XPU")
set(CMAKE_Fortran_FLAGS "-fpscomp logicals ${CMAKE_Fortran_FLAGS}")
endif()

# Set RPATH behaviour
set(CMAKE_SKIP_RPATH FALSE)
Expand All @@ -57,11 +83,17 @@ find_package(Torch REQUIRED)
add_library(${LIB_NAME} SHARED src/ctorch.cpp src/ftorch.F90
src/ftorch_test_utils.f90)

if(UNIX)
if(NOT APPLE) # only add definition for linux (not apple which is also unix)
target_compile_definitions(${LIB_NAME} PRIVATE UNIX)
endif()
# Define compile definitions, including GPU devices
set(COMPILE_DEFS "")
if(UNIX AND NOT APPLE)
# only add UNIX definition for linux (not apple which is also unix)
set(COMPILE_DEFS UNIX)
endif()
target_compile_definitions(
${LIB_NAME}
PRIVATE ${COMPILE_DEFS} GPU_DEVICE=${GPU_DEVICE_CODE}
GPU_DEVICE_NONE=${GPU_DEVICE_NONE} GPU_DEVICE_CUDA=${GPU_DEVICE_CUDA}
GPU_DEVICE_XPU=${GPU_DEVICE_XPU} GPU_DEVICE_MPS=${GPU_DEVICE_MPS})

# Add an alias FTorch::ftorch for the library
add_library(${PROJECT_NAME}::${LIB_NAME} ALIAS ${LIB_NAME})
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ To build and install the library:
| [`CMAKE_INSTALL_PREFIX`](https://cmake.org/cmake/help/latest/variable/CMAKE_INSTALL_PREFIX.html) | `</path/to/install/lib/at/>` | Location at which the library files should be installed. By default this is `/usr/local` |
| [`CMAKE_BUILD_TYPE`](https://cmake.org/cmake/help/latest/variable/CMAKE_BUILD_TYPE.html) | `Release` / `Debug` | Specifies build type. The default is `Debug`, use `Release` for production code|
| `CMAKE_BUILD_TESTS` | `TRUE` / `FALSE` | Specifies whether to compile FTorch's [test suite](https://cambridge-iccs.github.io/FTorch/page/testing.html) as part of the build. |
| `ENABLE_CUDA` | `TRUE` / `FALSE` | Specifies whether to check for and enable CUDA<sup>3</sup> |
| `GPU_DEVICE` | `NONE` / `CUDA` / `XPU` / `MPS` | Specifies the target GPU architecture (if any) <sup>3</sup> |

<sup>1</sup> _On Windows this may need to be the full path to the compiler if CMake cannot locate it by default._

Expand Down
4 changes: 2 additions & 2 deletions conda/README.md
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'll need to update the mac conda to include this in #284

Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ cmake \
-DCMAKE_INSTALL_PREFIX=$CONDA_PREFIX \
-DCMAKE_PREFIX_PATH=$(python -c 'import torch;print(torch.utils.cmake_prefix_path)') \
-DCMAKE_BUILD_TYPE=Release \
-DENABLE_CUDA=FALSE \
-DGPU_DEVICE=NONE \
..
cmake --build . --target install
```
Expand All @@ -65,7 +65,7 @@ cmake \
-DCMAKE_INSTALL_PREFIX=$CONDA_PREFIX \
-DCMAKE_PREFIX_PATH=$(python -c 'import torch;print(torch.utils.cmake_prefix_path)') \
-DCMAKE_BUILD_TYPE=Release \
-DENABLE_CUDA=TRUE \
-DGPU_DEVICE=CUDA \
-DCUDA_TOOLKIT_ROOT_DIR=$CONDA_PREFIX/targets/x86_64-linux \
-Dnvtx3_dir=$CONDA_PREFIX/targets/x86_64-linux/include/nvtx3 \
..
Expand Down
8 changes: 3 additions & 5 deletions examples/1_SimpleNet/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -33,18 +33,16 @@ if(CMAKE_BUILD_TESTS)
# pt2ts.py script
add_test(
NAME pt2ts
COMMAND ${Python_EXECUTABLE} ${PROJECT_SOURCE_DIR}/pt2ts.py
${PROJECT_BINARY_DIR} # Command line argument: filepath for saving
# the model
COMMAND ${Python_EXECUTABLE} ${PROJECT_SOURCE_DIR}/pt2ts.py --filepath
${PROJECT_BINARY_DIR}
WORKING_DIRECTORY ${PROJECT_BINARY_DIR})

# 3. Check the model can be loaded from file and run in Python and that its
# outputs meet expectations
add_test(
NAME simplenet_infer_python
COMMAND ${Python_EXECUTABLE} ${PROJECT_SOURCE_DIR}/simplenet_infer_python.py
${PROJECT_BINARY_DIR} # Command line argument: filepath to find the
# model
--filepath ${PROJECT_BINARY_DIR}
WORKING_DIRECTORY ${PROJECT_BINARY_DIR})

# 4. Check the model can be loaded from file and run in Fortran and that its
Expand Down
41 changes: 30 additions & 11 deletions examples/1_SimpleNet/pt2ts.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,28 @@ def load_torchscript(filename: Optional[str] = "saved_model.pt") -> torch.nn.Mod


if __name__ == "__main__":
import argparse

parser = argparse.ArgumentParser(
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
)
parser.add_argument(
"--device_type",
help="Device type to run the inference on",
type=str,
choices=["cpu", "cuda", "xpu", "mps"],
default="cpu",
)
parser.add_argument(
"--filepath",
help="Path to the file containing the PyTorch model",
type=str,
default=os.path.dirname(__file__),
)
parsed_args = parser.parse_args()
device_type = parsed_args.device_type
filepath = parsed_args.filepath

# =====================================================
# Load model and prepare for saving
# =====================================================
Expand All @@ -97,12 +119,12 @@ def load_torchscript(filename: Optional[str] = "saved_model.pt") -> torch.nn.Mod
# This example assumes one input of size (5)
trained_model_dummy_input = torch.ones(5)

# FPTLIB-TODO
# Uncomment the following lines to save for inference on GPU (rather than CPU):
# device = torch.device('cuda')
# trained_model = trained_model.to(device)
# trained_model.eval()
# trained_model_dummy_input = trained_model_dummy_input.to(device)
# Transfer the model and inputs to GPU device, if appropriate
if device_type != "cpu":
device = torch.device(device_type)
trained_model = trained_model.to(device)
trained_model.eval()
trained_model_dummy_input = trained_model_dummy_input.to(device)

# FPTLIB-TODO
# Run model for dummy inputs
Expand All @@ -117,7 +139,7 @@ def load_torchscript(filename: Optional[str] = "saved_model.pt") -> torch.nn.Mod

# FPTLIB-TODO
# Set the name of the file you want to save the torchscript model to:
saved_ts_filename = "saved_simplenet_model_cpu.pt"
saved_ts_filename = f"saved_simplenet_model_{device_type}.pt"
# A filepath may also be provided. To do this, pass the filepath as an argument to
# this script when it is run from the command line, i.e. `./pt2ts.py path/to/model`.

Expand All @@ -141,9 +163,7 @@ def load_torchscript(filename: Optional[str] = "saved_model.pt") -> torch.nn.Mod
# Check model saved OK
# =====================================================

# Load torchscript and run model as a test
# FPTLIB-TODO
# Scale inputs as above and, if required, move inputs and mode to GPU
# Load torchscript and run model as a test, scaling inputs as above
trained_model_dummy_input = 2.0 * trained_model_dummy_input
trained_model_testing_outputs = trained_model(
trained_model_dummy_input,
Expand All @@ -169,7 +189,6 @@ def load_torchscript(filename: Optional[str] = "saved_model.pt") -> torch.nn.Mod
raise RuntimeError(model_error)

# Check that the model file is created
filepath = os.path.dirname(__file__) if len(sys.argv) == 1 else sys.argv[1]
if not os.path.exists(os.path.join(filepath, saved_ts_filename)):
torchscript_file_error = (
f"Saved TorchScript file {os.path.join(filepath, saved_ts_filename)} "
Expand Down
14 changes: 13 additions & 1 deletion examples/1_SimpleNet/simplenet_infer_python.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,19 @@ def deploy(saved_model: str, device: str, batch_size: int = 1) -> torch.Tensor:


if __name__ == "__main__":
filepath = os.path.dirname(__file__) if len(sys.argv) == 1 else sys.argv[1]
import argparse

parser = argparse.ArgumentParser(
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
)
parser.add_argument(
"--filepath",
help="Path to the file containing the PyTorch model",
type=str,
default=os.path.dirname(__file__),
)
parsed_args = parser.parse_args()
filepath = parsed_args.filepath
saved_model_file = os.path.join(filepath, "saved_simplenet_model_cpu.pt")

device_to_run = "cpu"
Expand Down
5 changes: 2 additions & 3 deletions examples/2_ResNet18/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,8 @@ if(CMAKE_BUILD_TESTS)
# pt2ts.py script
add_test(
NAME pt2ts
COMMAND ${Python_EXECUTABLE} ${PROJECT_SOURCE_DIR}/pt2ts.py
${PROJECT_BINARY_DIR} # Command line argument: filepath for saving
# the model
COMMAND ${Python_EXECUTABLE} ${PROJECT_SOURCE_DIR}/pt2ts.py --filepath
${PROJECT_BINARY_DIR}
WORKING_DIRECTORY ${PROJECT_BINARY_DIR})

# 3. Check the model can be loaded from file and run in Fortran and that its
Expand Down
41 changes: 30 additions & 11 deletions examples/2_ResNet18/pt2ts.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,28 @@ def load_torchscript(filename: Optional[str] = "saved_model.pt") -> torch.nn.Mod


if __name__ == "__main__":
import argparse

parser = argparse.ArgumentParser(
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
)
parser.add_argument(
"--device_type",
help="Device type to run the inference on",
type=str,
choices=["cpu", "cuda", "xpu", "mps"],
default="cpu",
)
parser.add_argument(
"--filepath",
help="Path to the file containing the PyTorch model",
type=str,
default=os.path.dirname(__file__),
)
parsed_args = parser.parse_args()
device_type = parsed_args.device_type
filepath = parsed_args.filepath

# =====================================================
# Load model and prepare for saving
# =====================================================
Expand Down Expand Up @@ -103,12 +125,12 @@ def load_torchscript(filename: Optional[str] = "saved_model.pt") -> torch.nn.Mod
# of resolution 244x244 in a batch size of 1.
trained_model_dummy_input = torch.ones(1, 3, 224, 224)

# FPTLIB-TODO
# Uncomment the following lines to save for inference on GPU (rather than CPU):
# device = torch.device('cuda')
# trained_model = trained_model.to(device)
# trained_model.eval()
# trained_model_dummy_input = trained_model_dummy_input.to(device)
# Transfer the model and inputs to GPU device, if appropriate
if device_type != "cpu":
device = torch.device(device_type)
trained_model = trained_model.to(device)
trained_model.eval()
trained_model_dummy_input = trained_model_dummy_input.to(device)

# FPTLIB-TODO
# Run model for dummy inputs
Expand All @@ -123,7 +145,7 @@ def load_torchscript(filename: Optional[str] = "saved_model.pt") -> torch.nn.Mod

# FPTLIB-TODO
# Set the name of the file you want to save the torchscript model to:
saved_ts_filename = "saved_resnet18_model_cpu.pt"
saved_ts_filename = f"saved_resnet18_model_{device_type}.pt"
# A filepath may also be provided. To do this, pass the filepath as an argument to
# this script when it is run from the command line, i.e. `./pt2ts.py path/to/model`.

Expand All @@ -147,9 +169,7 @@ def load_torchscript(filename: Optional[str] = "saved_model.pt") -> torch.nn.Mod
# Check model saved OK
# =====================================================

# Load torchscript and run model as a test
# FPTLIB-TODO
# Scale inputs as above and, if required, move inputs and mode to GPU
# Load torchscript and run model as a test, scaling inputs as above
trained_model_dummy_input = 2.0 * trained_model_dummy_input
trained_model_testing_outputs = trained_model(
trained_model_dummy_input,
Expand All @@ -175,7 +195,6 @@ def load_torchscript(filename: Optional[str] = "saved_model.pt") -> torch.nn.Mod
raise RuntimeError(model_error)

# Check that the model file is created
filepath = os.path.dirname(__file__) if len(sys.argv) == 1 else sys.argv[1]
if not os.path.exists(os.path.join(filepath, saved_ts_filename)):
torchscript_file_error = (
f"Saved TorchScript file {os.path.join(filepath, saved_ts_filename)} "
Expand Down
15 changes: 13 additions & 2 deletions examples/2_ResNet18/resnet_infer_python.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,11 +78,22 @@ def check_results(output: torch.Tensor) -> None:


if __name__ == "__main__":
filepath = os.path.dirname(__file__) if len(sys.argv) == 1 else sys.argv[1]
import argparse

parser = argparse.ArgumentParser(
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
)
parser.add_argument(
"--filepath",
help="Path to the file containing the PyTorch model",
type=str,
default=os.path.dirname(__file__),
)
parsed_args = parser.parse_args()
filepath = parsed_args.filepath
saved_model_file = os.path.join(filepath, "saved_resnet18_model_cpu.pt")

device_to_run = "cpu"
# device_to_run = "cuda"

batch_size_to_run = 1

Expand Down
Loading