Skip to content

Commit 581530a

Browse files
authored
Remove device name and ip address from required parameters of configure_nw_interfaces.sh (#2999)
* Remove device name and ip address from required parameters of configure_nw_interface.sh * Skip nw configuration if efa-only * Update changelog * Make nw config more robust * Update all os specific nw interface scripts
1 parent aecff90 commit 581530a

File tree

6 files changed

+66
-14
lines changed

6 files changed

+66
-14
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ This file is used to list changes made in each version of the AWS ParallelCluste
2828
- Addressed cluster id mismatch known issue by deleting the file `/var/spool/slurm.state/clustername` before configuring Slurm accounting.
2929
- Upgrade DCV to version 2024.0-19030.
3030
- Remove `berkshelf`. All cookbooks are local and do not need `berkshelf` dependency management.
31+
- Add support for GB200 instance types.
3132
- Install nvidia-imex for all OSs except AL2.
3233

3334
**BUG FIXES**

cookbooks/aws-parallelcluster-environment/files/amazon-2023/network_interfaces/configure_nw_interface.sh

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,16 +3,27 @@
33
set -ex
44

55
if
6-
[ -z "${DEVICE_NAME}" ] || # name of the device
76
[ -z "${DEVICE_NUMBER}" ] || # index of the device
87
[ -z "${NETWORK_CARD_INDEX}" ] || # index of the network card
9-
[ -z "${DEVICE_IP_ADDRESS}" ] || # ip of the device
108
[ -z "${MAC}" ] || # mac address of the device
119
[ -z "${CIDR_BLOCK}" ] # CIDR block of the subnet
1210
then
1311
echo 'One or more environment variables missing'
1412
exit 1
1513
fi
14+
15+
# Check if this is an EFA-only interface (no device name or IP)
16+
if [ -z "${DEVICE_NAME}" ] && [ -z "${DEVICE_IP_ADDRESS}" ]; then
17+
echo "EFA-only interface detected - skipping IP configuration"
18+
exit 0
19+
fi
20+
21+
# If one of these is missing but not both, it is an invalid configuration
22+
if [ -z "${DEVICE_NAME}" ] || [ -z "${DEVICE_IP_ADDRESS}" ]; then
23+
echo "Device name or IP address is missing"
24+
exit 1
25+
fi
26+
1627
echo "Configuring NIC, Device name: ${DEVICE_NAME}, Device number: ${DEVICE_NUMBER}, Network card index:${NETWORK_CARD_INDEX}"
1728

1829
configuration_directory="/etc/systemd/network"

cookbooks/aws-parallelcluster-environment/files/default/network_interfaces/configure_nw_interface.sh

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,21 +6,31 @@
66
# interface
77
# - A routing rule to make the OS use the specific routing table for this network interface
88

9-
set -e
9+
set -ex
1010

1111
if
12-
[ -z "${DEVICE_NAME}" ] || # name of the device
1312
[ -z "${DEVICE_NUMBER}" ] || # index of the device
1413
[ -z "${NETWORK_CARD_INDEX}" ] || # index of the network card
1514
[ -z "${GW_IP_ADDRESS}" ] || # gateway ip address
16-
[ -z "${DEVICE_IP_ADDRESS}" ] || # ip address to assign to the interface
1715
[ -z "${CIDR_PREFIX_LENGTH}" ] || # the prefix length of the device IP cidr block
1816
[ -z "${NETMASK}" ] # netmask to apply to device ip address
1917
then
2018
echo 'One or more environment variables missing'
2119
exit 1
2220
fi
2321

22+
# Check if this is an EFA-only interface (no device name or IP)
23+
if [ -z "${DEVICE_NAME}" ] && [ -z "${DEVICE_IP_ADDRESS}" ]; then
24+
echo "EFA-only interface detected - skipping IP configuration"
25+
exit 0
26+
fi
27+
28+
# If one of these is missing but not both, it is an invalid configuration
29+
if [ -z "${DEVICE_NAME}" ] || [ -z "${DEVICE_IP_ADDRESS}" ]; then
30+
echo "Device name or IP address is missing"
31+
exit 1
32+
fi
33+
2434
SUFFIX=$NETWORK_CARD_INDEX$(printf "%02d" $DEVICE_NUMBER)
2535

2636
ROUTE_TABLE="$(( $SUFFIX + 1000 ))"

cookbooks/aws-parallelcluster-environment/files/redhat-8.network_interfaces/configure_nw_interface.sh

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,20 +9,30 @@
99
# RedHat 8 official documentation:
1010
# https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/8/html/configuring_and_managing_networking/configuring-policy-based-routing-to-define-alternative-routes_configuring-and-managing-networking
1111

12-
set -e
12+
set -ex
1313

1414
if
15-
[ -z "${DEVICE_NAME}" ] || # name of the device
1615
[ -z "${DEVICE_NUMBER}" ] || # index of the device
1716
[ -z "${NETWORK_CARD_INDEX}" ] || # index of the network card
1817
[ -z "${GW_IP_ADDRESS}" ] || # gateway ip address
19-
[ -z "${DEVICE_IP_ADDRESS}" ] || # ip address to assign to the interface
2018
[ -z "${CIDR_PREFIX_LENGTH}" ] # the prefix length of the device IP cidr block
2119
then
2220
echo 'One or more environment variables missing'
2321
exit 1
2422
fi
2523

24+
# Check if this is an EFA-only interface (no device name or IP)
25+
if [ -z "${DEVICE_NAME}" ] && [ -z "${DEVICE_IP_ADDRESS}" ]; then
26+
echo "EFA-only interface detected - skipping IP configuration"
27+
exit 0
28+
fi
29+
30+
# If one of these is missing but not both, it is an invalid configuration
31+
if [ -z "${DEVICE_NAME}" ] || [ -z "${DEVICE_IP_ADDRESS}" ]; then
32+
echo "Device name or IP address is missing"
33+
exit 1
34+
fi
35+
2636
con_name="System ${DEVICE_NAME}"
2737
SUFFIX=$NETWORK_CARD_INDEX$(printf "%02d" $DEVICE_NUMBER)
2838
route_table="$(( $SUFFIX + 1000 ))"

cookbooks/aws-parallelcluster-environment/files/rocky/network_interfaces/configure_nw_interface.sh

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,20 +9,30 @@
99
# RedHat 8 official documentation:
1010
# https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/8/html/configuring_and_managing_networking/configuring-policy-based-routing-to-define-alternative-routes_configuring-and-managing-networking
1111

12-
set -e
12+
set -ex
1313

1414
if
15-
[ -z "${DEVICE_NAME}" ] || # name of the device
1615
[ -z "${DEVICE_NUMBER}" ] || # index of the device
1716
[ -z "${NETWORK_CARD_INDEX}" ] || # index of the network card
1817
[ -z "${GW_IP_ADDRESS}" ] || # gateway ip address
19-
[ -z "${DEVICE_IP_ADDRESS}" ] || # ip address to assign to the interface
2018
[ -z "${CIDR_PREFIX_LENGTH}" ] # the prefix length of the device IP cidr block
2119
then
2220
echo 'One or more environment variables missing'
2321
exit 1
2422
fi
2523

24+
# Check if this is an EFA-only interface (no device name or IP)
25+
if [ -z "${DEVICE_NAME}" ] && [ -z "${DEVICE_IP_ADDRESS}" ]; then
26+
echo "EFA-only interface detected - skipping IP configuration"
27+
exit 0
28+
fi
29+
30+
# If one of these is missing but not both, it is an invalid configuration
31+
if [ -z "${DEVICE_NAME}" ] || [ -z "${DEVICE_IP_ADDRESS}" ]; then
32+
echo "Device name or IP address is missing"
33+
exit 1
34+
fi
35+
2636
con_name="System ${DEVICE_NAME}"
2737
SUFFIX=$NETWORK_CARD_INDEX$(printf "%02d" $DEVICE_NUMBER)
2838
route_table="$(( $SUFFIX + 1000 ))"

cookbooks/aws-parallelcluster-environment/files/ubuntu/network_interfaces/configure_nw_interface.sh

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,14 +6,12 @@
66
# interface
77
# - A routing rule to make the OS use the specific routing table for this network interface
88

9-
set -e
9+
set -ex
1010

1111
if
12-
[ -z "${DEVICE_NAME}" ] || # name of the device
1312
[ -z "${DEVICE_NUMBER}" ] || # index of the device
1413
[ -z "${NETWORK_CARD_INDEX}" ] || # index of the network card
1514
[ -z "${GW_IP_ADDRESS}" ] || # gateway ip address
16-
[ -z "${DEVICE_IP_ADDRESS}" ] || # ip address to assign to the interface
1715
[ -z "${CIDR_PREFIX_LENGTH}" ] || # the prefix length of the device IP cidr block
1816
[ -z "${NETMASK}" ] || # netmask to apply to device ip address
1917
[ -z "${CIDR_BLOCK}" ] # (full) subnet cidr block
@@ -22,6 +20,18 @@ then
2220
exit 1
2321
fi
2422

23+
# Check if this is an EFA-only interface (no device name or IP)
24+
if [ -z "${DEVICE_NAME}" ] && [ -z "${DEVICE_IP_ADDRESS}" ]; then
25+
echo "EFA-only interface detected - skipping IP configuration"
26+
exit 0
27+
fi
28+
29+
# If one of these is missing but not both, it is an invalid configuration
30+
if [ -z "${DEVICE_NAME}" ] || [ -z "${DEVICE_IP_ADDRESS}" ]; then
31+
echo "Device name or IP address is missing"
32+
exit 1
33+
fi
34+
2535
STATIC_IP_CONFIG=$(cat<<END
2636
addresses:
2737
- ${DEVICE_IP_ADDRESS}/${CIDR_PREFIX_LENGTH}

0 commit comments

Comments
 (0)