Skip to content

Commit edd1ebb

Browse files
authored
MPI example (#270)
Add MPI example and related docs
1 parent 864b0eb commit edd1ebb

14 files changed

+710
-8
lines changed

.github/workflows/test_suite_ubuntu.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ jobs:
6666
- name: Install an MPI distribution
6767
run: |
6868
sudo apt update
69-
sudo apt install mpich
69+
sudo apt install openmpi-bin openmpi-common libopenmpi-dev
7070
7171
- name: Install pFUnit
7272
run: |

examples/3_MultiGPU/multigpu_infer_fortran.f90

+3-3
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
program inference
22

33
! Import precision info from iso
4-
use, intrinsic :: iso_fortran_env, only : sp => real32
4+
use, intrinsic :: iso_fortran_env, only : sp => real32, stdout => output_unit
55

66
! Import our library for interfacing with PyTorch
77
use ftorch, only : torch_model, torch_tensor, &
@@ -61,7 +61,7 @@ program inference
6161

6262
! Initialise data and print the values used
6363
in_data = [(device_index + i, i = 0, 4)]
64-
write (6, 100) device_index, in_data(:)
64+
write(unit=stdout, fmt=100) device_index, in_data(:)
6565
100 format("input on device ", i1,": [", 4(f5.1,","), f5.1,"]")
6666

6767
! Create Torch input tensor from the above array and assign it to the first (and only)
@@ -83,7 +83,7 @@ program inference
8383
call torch_model_forward(model, in_tensors, out_tensors)
8484

8585
! Print the values computed on the current device.
86-
write (6, 200) device_index, out_data(:)
86+
write(unit=stdout, fmt=200) device_index, out_data(:)
8787
200 format("output on device ", i1,": [", 4(f5.1,","), f5.1,"]")
8888

8989
! Check output tensor matches expected value

examples/7_MPI/CMakeLists.txt

+66
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
cmake_minimum_required(VERSION 3.15...3.31)
2+
# policy CMP0076 - target_sources source files are relative to file where
3+
# target_sources is run
4+
cmake_policy(SET CMP0076 NEW)
5+
6+
set(PROJECT_NAME MPIExample)
7+
8+
project(${PROJECT_NAME} LANGUAGES Fortran)
9+
10+
# Build in Debug mode if not specified
11+
if(NOT CMAKE_BUILD_TYPE)
12+
set(CMAKE_BUILD_TYPE
13+
Debug
14+
CACHE STRING "" FORCE)
15+
endif()
16+
17+
find_package(FTorch)
18+
find_package(MPI REQUIRED)
19+
message(STATUS "Building with Fortran PyTorch coupling")
20+
21+
# Fortran example
22+
add_executable(mpi_infer_fortran mpi_infer_fortran.f90)
23+
target_link_libraries(mpi_infer_fortran PRIVATE FTorch::ftorch)
24+
target_link_libraries(mpi_infer_fortran PRIVATE MPI::MPI_Fortran)
25+
26+
# Integration testing
27+
if(CMAKE_BUILD_TESTS)
28+
include(CTest)
29+
30+
# 1. Check the PyTorch model runs and its outputs meet expectations
31+
add_test(NAME simplenet COMMAND ${Python_EXECUTABLE}
32+
${PROJECT_SOURCE_DIR}/simplenet.py)
33+
34+
# 2. Check the model is saved to file in the expected location with the
35+
# pt2ts.py script
36+
add_test(
37+
NAME pt2ts
38+
COMMAND ${Python_EXECUTABLE} ${PROJECT_SOURCE_DIR}/pt2ts.py --filepath
39+
${PROJECT_BINARY_DIR}
40+
WORKING_DIRECTORY ${PROJECT_BINARY_DIR})
41+
42+
# 3. Check the model can be loaded from file and run with MPI in Python and
43+
# that its outputs meet expectations
44+
add_test(
45+
NAME mpi_infer_python
46+
COMMAND
47+
${MPIEXEC_EXECUTABLE} ${MPIEXEC_NUMPROC_FLAG} 2 ${Python_EXECUTABLE}
48+
${PROJECT_SOURCE_DIR}/mpi_infer_python.py --filepath ${PROJECT_BINARY_DIR}
49+
WORKING_DIRECTORY ${PROJECT_BINARY_DIR})
50+
set_tests_properties(
51+
mpi_infer_python PROPERTIES PASS_REGULAR_EXPRESSION
52+
"MPI Python example ran successfully")
53+
54+
# 4. Check the model can be loaded from file and run with MPI in Fortran and
55+
# that its outputs meet expectations
56+
add_test(
57+
NAME mpi_infer_fortran
58+
COMMAND
59+
${MPIEXEC_EXECUTABLE} ${MPIEXEC_NUMPROC_FLAG} 2 ./mpi_infer_fortran
60+
${PROJECT_BINARY_DIR}/saved_simplenet_model_cpu.pt
61+
# Command line argument: model file
62+
WORKING_DIRECTORY ${PROJECT_BINARY_DIR})
63+
set_tests_properties(
64+
mpi_infer_fortran PROPERTIES PASS_REGULAR_EXPRESSION
65+
"MPI Fortran example ran successfully")
66+
endif()

examples/7_MPI/README.md

+124
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,124 @@
1+
# Example 7 - MPI
2+
3+
This example revisits the SimpleNet example and demonstrates how to run it using
4+
MPI parallelism.
5+
6+
7+
## Description
8+
9+
The Python file `simplenet.py` is copied from the earlier example. Recall that
10+
it defines a very simple PyTorch network that takes an input of length 5 and
11+
applies a single `Linear` layer to multiply it by 2.
12+
13+
The same `pt2ts.py` tool is used to save the simple network to TorchScript.
14+
15+
A series of files `mpi_infer_<LANG>` then bind from other languages to run the
16+
TorchScript model in inference mode.
17+
18+
## Dependencies
19+
20+
To run this example requires:
21+
22+
- CMake
23+
- An MPI installation.
24+
- FTorch (installed as described in main package)
25+
- Python 3
26+
27+
## Running
28+
29+
To run this example install FTorch as described in the main documentation. Then
30+
from this directory create a virtual environment and install the necessary
31+
Python modules:
32+
```
33+
python3 -m venv venv
34+
source venv/bin/activate
35+
pip install -r requirements.txt
36+
```
37+
38+
You can check the network is set up correctly by running `simplenet.py`:
39+
```
40+
python3 simplenet.py
41+
```
42+
As before, this defines the network and runs it with an input tensor
43+
[0.0, 1.0, 2.0, 3.0, 4.0] to produce the result:
44+
```
45+
tensor([[0, 2, 4, 6, 8]])
46+
```
47+
48+
To save the `SimpleNet`` model to TorchScript run the modified version of the
49+
`pt2ts.py` tool:
50+
```
51+
python3 pt2ts.py
52+
```
53+
which will generate `saved_simplenet_model_cpu.pt` - the TorchScript instance
54+
of the network.
55+
56+
You can check that everything is working by running the `mpi_infer_python.py`
57+
script. It's set up with MPI such that a different GPU device is associated
58+
with each MPI rank. You should substitute `<NP>` with the number of GPUs you
59+
wish to run with:
60+
```
61+
mpiexec -np <NP> python3 multigpu_infer_python.py
62+
```
63+
This reads the model in from the TorchScript file and runs it with an different
64+
input tensor on each GPU device: [0.0, 1.0, 2.0, 3.0, 4.0], plus the device
65+
index in each entry. Running with `NP=2`, the result should be (some
66+
permutation of):
67+
```
68+
rank 0: result:
69+
tensor([[0., 2., 4., 6., 8.]])
70+
rank 1: result:
71+
tensor([[ 2., 4., 6., 8., 10.]])
72+
```
73+
74+
At this point we no longer require Python, so can deactivate the virtual
75+
environment:
76+
```
77+
deactivate
78+
```
79+
80+
To call the saved `SimpleNet` model from Fortran we need to compile the
81+
`mpi_infer_fortran.f90` file. This can be done using the included
82+
`CMakeLists.txt` as follows, noting that we need to use an MPI-enabled Fortran
83+
compiler:
84+
```
85+
mkdir build
86+
cd build
87+
cmake .. -DCMAKE_PREFIX_PATH=<path/to/your/installation/of/library/> -DCMAKE_BUILD_TYPE=Release
88+
cmake --build .
89+
```
90+
91+
(Note that the Fortran compiler can be chosen explicitly with the
92+
`-DCMAKE_Fortran_COMPILER` flag, and should match the compiler that was used to
93+
locally build FTorch.)
94+
95+
To run the compiled code calling the saved `SimpleNet` TorchScript from Fortran,
96+
run the executable with an argument of the saved model file. Again, specify the
97+
number of MPI processes according to the desired number of GPUs:
98+
```
99+
mpiexec -np <NP> ./mpi_infer_fortran ../saved_simplenet_model_cpu.pt
100+
```
101+
102+
This runs the model with the same inputs as described above and should produce (some
103+
permutation of) the output:
104+
```
105+
input on rank 0: [ 0.0, 1.0, 2.0, 3.0, 4.0]
106+
input on rank 1: [ 1.0, 2.0, 3.0, 4.0, 5.0]
107+
output on rank 0: [ 0.0, 2.0, 4.0, 6.0, 8.0]
108+
output on rank 1: [ 2.0, 4.0, 6.0, 8.0, 10.0]
109+
```
110+
111+
Alternatively, we can use `make`, instead of CMake, copying the Makefile over from the
112+
first example:
113+
```
114+
cp ../1_SimpleNet/Makefile .
115+
```
116+
See the instructions in that example directory for further details.
117+
118+
## Exercise
119+
120+
You might wish to explore using different MPI ranks to call different GPU
121+
devices via the GPU `device_index` argument passed to constructors for FTorch
122+
tensors and models. See the
123+
[Multi-GPU](https://github.com/Cambridge-ICCS/FTorch/tree/main/examples/3_MultiGPU)
124+
example for more details on how to do this.

examples/7_MPI/mpi_infer_fortran.f90

+124
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,124 @@
1+
program inference
2+
3+
! Import precision info from iso
4+
use, intrinsic :: iso_fortran_env, only : sp => real32, stdout => output_unit
5+
6+
! Import our library for interfacing with PyTorch
7+
use ftorch, only : torch_model, torch_tensor, torch_kCPU, torch_delete, &
8+
torch_tensor_from_array, torch_model_load, torch_model_forward
9+
10+
! Import our tools module for testing utils
11+
use ftorch_test_utils, only : assert_allclose
12+
13+
! Import MPI
14+
use mpi, only : mpi_comm_rank, mpi_comm_size, mpi_comm_world, mpi_finalize, mpi_float, &
15+
mpi_gather, mpi_init
16+
17+
implicit none
18+
19+
! Set working precision for reals
20+
integer, parameter :: wp = sp
21+
22+
integer :: num_args, ix
23+
character(len=128), dimension(:), allocatable :: args
24+
25+
! Set up Fortran data structures
26+
real(wp), dimension(5), target :: in_data
27+
real(wp), dimension(5), target :: out_data
28+
real(wp), dimension(5), target :: expected
29+
integer, parameter :: tensor_layout(1) = [1]
30+
31+
! Set up Torch data structures
32+
! The net, a vector of input tensors (in this case we only have one), and the output tensor
33+
type(torch_model) :: model
34+
type(torch_tensor), dimension(1) :: in_tensors
35+
type(torch_tensor), dimension(1) :: out_tensors
36+
37+
! Flag for testing
38+
logical :: test_pass
39+
40+
! MPI configuration
41+
integer :: rank, size, ierr, i
42+
43+
! Variables for testing
44+
real(wp), allocatable, dimension(:,:) :: recvbuf
45+
real(wp), dimension(5) :: result_chk
46+
integer :: rank_chk
47+
48+
call mpi_init(ierr)
49+
call mpi_comm_rank(mpi_comm_world, rank, ierr)
50+
call mpi_comm_size(mpi_comm_world, size, ierr)
51+
52+
! Check MPI was configured correctly
53+
if (size == 1) then
54+
write(*,*) "MPI communicator size is 1, indicating that it is not configured correctly"
55+
write(*,*) "(assuming you specified more than one rank)"
56+
call clean_up()
57+
stop 999
58+
end if
59+
60+
! Get TorchScript model file as a command line argument
61+
num_args = command_argument_count()
62+
allocate(args(num_args))
63+
do ix = 1, num_args
64+
call get_command_argument(ix,args(ix))
65+
end do
66+
67+
! Initialise data and print the values used on each MPI rank
68+
in_data = [(rank + i, i = 0, 4)]
69+
write(unit=stdout, fmt="('input on rank ',i1,': ')", advance="no") rank
70+
write(unit=stdout, fmt=100) in_data(:)
71+
100 format('[',4(f5.1,','),f5.1,']')
72+
73+
! Create Torch input/output tensors from the above arrays
74+
call torch_tensor_from_array(in_tensors(1), in_data, tensor_layout, torch_kCPU)
75+
call torch_tensor_from_array(out_tensors(1), out_data, tensor_layout, torch_kCPU)
76+
77+
! Load ML model
78+
call torch_model_load(model, args(1), torch_kCPU)
79+
80+
! Run inference on each MPI rank
81+
call torch_model_forward(model, in_tensors, out_tensors)
82+
83+
! Print the values computed on each MPI rank
84+
write(unit=stdout, fmt="('output on rank ',i1,': ')", advance="no") rank
85+
write(unit=stdout, fmt=100) out_data(:)
86+
87+
! Gather the outputs onto rank 0
88+
allocate(recvbuf(5,size))
89+
call mpi_gather(out_data, 5, mpi_float, recvbuf, 5, mpi_float, 0, mpi_comm_world, ierr)
90+
91+
! Check that the correct values were attained
92+
if (rank == 0) then
93+
94+
! Check output tensor matches expected value
95+
do rank_chk = 0, size-1
96+
expected = [(2 * (rank_chk + i), i = 0, 4)]
97+
result_chk(:) = recvbuf(:,rank_chk+1)
98+
test_pass = assert_allclose(result_chk, expected, test_name="MPI")
99+
if (.not. test_pass) then
100+
write(unit=stdout, fmt="('rank ',i1,' result: ')") rank_chk
101+
write(unit=stdout, fmt=100) result_chk(:)
102+
write(unit=stdout, fmt="('does not match expected value')")
103+
write(unit=stdout, fmt=100) expected(:)
104+
call clean_up()
105+
stop 999
106+
end if
107+
end do
108+
109+
write (*,*) "MPI Fortran example ran successfully"
110+
end if
111+
112+
call clean_up()
113+
114+
contains
115+
116+
subroutine clean_up()
117+
call torch_delete(model)
118+
call torch_delete(in_tensors)
119+
call torch_delete(out_tensors)
120+
call mpi_finalize(ierr)
121+
deallocate(recvbuf)
122+
end subroutine clean_up
123+
124+
end program inference

0 commit comments

Comments
 (0)