Skip to content
Merged
7 changes: 5 additions & 2 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,14 @@ CHANGELOG
**BUG FIXES**
- Fix an issue where Security Group validation failed when a rule contained both IPv4 ranges (IpRanges) and security group references (UserIdGroupPairs).

3.13.1
3.13.2
------

**BUG FIXES**
- Fix build image failures occurring on non-latest versions of Rocky Linux 9.
- Fix a bug which may cause `update-cluster` and `update-compute-fleet` to fail when compute resources reference an expired Capacity Reservation
that is no longer accessible via EC2 APIs.
- Fix `build-image` failure on Rocky 9, occurring when the parent image does not ship the latest kernel version.
See https://github.com/aws/aws-parallelcluster/issues/6874.

3.13.1
------
Expand Down
11 changes: 9 additions & 2 deletions cli/src/pcluster/config/cluster_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -2385,9 +2385,8 @@ class SlurmComputeResource(_BaseSlurmComputeResource):

def __init__(self, instance_type=None, **kwargs):
super().__init__(**kwargs)
_instance_type = instance_type if instance_type else self._instance_type_from_capacity_reservation()
self.instance_type = Resource.init_param(_instance_type)
self.__instance_type_info = None
self._instance_type = Resource.init_param(instance_type)

def is_flexible(self):
"""Return False because the ComputeResource can not contain multiple instance types."""
Expand All @@ -2398,6 +2397,14 @@ def instance_types(self) -> List[str]:
"""List of instance types under this compute resource."""
return [self.instance_type]

@property
# Do not invoke in update path
def instance_type(self):
"""Instance type of this compute resource."""
if not self._instance_type:
self._instance_type = Resource.init_param(self._instance_type_from_capacity_reservation())
return self._instance_type

def _register_validators(self, context: ValidatorContext = None):
super()._register_validators(context)
self._register_validator(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,13 @@
# This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or implied.
# See the License for the specific language governing permissions and limitations under the License.
import logging
import os
import subprocess

import boto3
import pytest
from assertpy import assert_that
from utils import describe_cluster_instances, retrieve_cfn_resources
from utils import describe_cluster_instances, retrieve_cfn_resources, wait_for_computefleet_changed


@pytest.mark.usefixtures("os", "region")
Expand All @@ -40,7 +42,50 @@ def test_on_demand_capacity_reservation(
pg_capacity_reservation_id=odcr_resources["integTestsPgOdcr"],
pg_capacity_reservation_arn=resource_group_arn,
)
cluster = clusters_factory(cluster_config)

# Apply patch to the repo
logging.info("Applying patch to the repository")
repo_root = os.path.abspath(os.path.join(os.path.dirname(__file__), "../../../.."))
s3_bucket_file = os.path.join(repo_root, "cli/src/pcluster/models/s3_bucket.py")

# Backup the original file
with open(s3_bucket_file, "r") as f:
original_content = f.read()

try:
# Apply the patch - inject the bug that replaces capacity reservation IDs
with open(s3_bucket_file, "r") as f:
content = f.read()

# Add the bug injection line after the upload_config method definition
modified_content = content.replace(
" def upload_config(self, config, config_name, format=S3FileFormat.YAML):\n"
' """Upload config file to S3 bucket."""',
" def upload_config(self, config, config_name, format=S3FileFormat.YAML):\n"
' """Upload config file to S3 bucket."""\n'
' if config_name == "cluster-config.yaml":\n'
" config = re.sub(r'cr-[0-9a-f]{17}', 'cr-11111111111111111', config)",
)

# Write the modified content back
with open(s3_bucket_file, "w") as f:
f.write(modified_content)

# Install the CLI
logging.info("Installing CLI from local repository")
subprocess.run(["pip", "install", "./cli"], cwd=repo_root, check=True)

# Create the cluster
cluster = clusters_factory(cluster_config)
finally:
# Revert the patch by restoring the original file
logging.info("Reverting patch from the repository")
with open(s3_bucket_file, "w") as f:
f.write(original_content)

# Reinstall the CLI
logging.info("Reinstalling CLI from local repository")
subprocess.run(["pip", "install", "./cli"], cwd=repo_root, check=True)

_assert_instance_in_capacity_reservation(cluster, region, "open-odcr-id-cr", odcr_resources["integTestsOpenOdcr"])
_assert_instance_in_capacity_reservation(cluster, region, "open-odcr-arn-cr", odcr_resources["integTestsOpenOdcr"])
Expand All @@ -64,6 +109,19 @@ def test_on_demand_capacity_reservation(
)
_assert_instance_in_capacity_reservation(cluster, region, "pg-odcr-id-cr", odcr_resources["integTestsPgOdcr"])
_assert_instance_in_capacity_reservation(cluster, region, "pg-odcr-arn-cr", odcr_resources["integTestsPgOdcr"])
cluster.stop()
wait_for_computefleet_changed(cluster, "STOPPED")
updated_config_file = pcluster_config_reader(
config_file="pcluster.config.update.yaml",
placement_group=placement_group_stack.cfn_resources["PlacementGroup"],
open_capacity_reservation_id=odcr_resources["integTestsOpenOdcr"],
open_capacity_reservation_arn=resource_group_arn,
target_capacity_reservation_id=odcr_resources["integTestsTargetOdcr"],
target_capacity_reservation_arn=resource_group_arn,
pg_capacity_reservation_id=odcr_resources["integTestsPgOdcr"],
pg_capacity_reservation_arn=resource_group_arn,
)
cluster.update(str(updated_config_file))


def _assert_instance_in_capacity_reservation(cluster, region, compute_resource_name, expected_reservation):
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
Image:
Os: {{ os }}
HeadNode:
InstanceType: r5.xlarge
Networking:
SubnetId: {{ public_subnet_id }}
Ssh:
KeyName: {{ key_name }}
Scheduling:
Scheduler: slurm
SlurmQueues:
- Name: open-odcr-q
ComputeResources:
- Name: open-odcr-id-cr
InstanceType: m5.2xlarge
MinCount: 0
MaxCount: 1
CapacityReservationTarget:
CapacityReservationId: {{ open_capacity_reservation_id }}
- Name: open-odcr-arn-cr
InstanceType: m5.2xlarge
MinCount: 0
MaxCount: 1
CapacityReservationTarget:
CapacityReservationResourceGroupArn: {{ open_capacity_reservation_arn }}
- Name: open-odcr-arn-fl-cr
Instances:
- InstanceType: m5.2xlarge
MinCount: 0
MaxCount: 1
CapacityReservationTarget:
CapacityReservationResourceGroupArn: {{ open_capacity_reservation_arn }}
- Name: open-odcr-id-pg-cr
InstanceType: m5.2xlarge
MinCount: 0
MaxCount: 1
Networking:
PlacementGroup:
Enabled: true
CapacityReservationTarget:
CapacityReservationId: {{ open_capacity_reservation_id }}
- Name: open-odcr-arn-pg-cr
InstanceType: m5.2xlarge
MinCount: 0
MaxCount: 1
Networking:
PlacementGroup:
Enabled: true
CapacityReservationTarget:
CapacityReservationResourceGroupArn: {{ open_capacity_reservation_arn }}
- Name: open-odcr-arn-pg-fl-cr
Instances:
- InstanceType: m5.2xlarge
MinCount: 0
MaxCount: 1
Networking:
PlacementGroup:
Enabled: true
CapacityReservationTarget:
CapacityReservationResourceGroupArn: {{ open_capacity_reservation_arn }}
Networking:
SubnetIds:
- {{ public_subnet_id }}
- Name: target-odcr-q
ComputeResources:
- Name: target-odcr-id-cr
InstanceType: r5.xlarge
MinCount: 0
MaxCount: 1
CapacityReservationTarget:
CapacityReservationId: {{ target_capacity_reservation_id }}
- Name: target-odcr-arn-cr
InstanceType: r5.xlarge
MinCount: 0
MaxCount: 1
CapacityReservationTarget:
CapacityReservationResourceGroupArn: {{ target_capacity_reservation_arn }}
- Name: target-odcr-arn-fl-cr
Instances:
- InstanceType: r5.xlarge
MinCount: 0
MaxCount: 1
CapacityReservationTarget:
CapacityReservationResourceGroupArn: {{ target_capacity_reservation_arn }}
- Name: target-odcr-id-pg-cr
InstanceType: r5.xlarge
MinCount: 0
MaxCount: 1
Networking:
PlacementGroup:
Enabled: true
CapacityReservationTarget:
CapacityReservationId: {{ target_capacity_reservation_id }}
- Name: target-odcr-arn-pg-cr
InstanceType: r5.xlarge
MinCount: 0
MaxCount: 1
CapacityReservationTarget:
CapacityReservationResourceGroupArn: {{ target_capacity_reservation_arn }}
- Name: target-odcr-arn-pg-fl-cr
Instances:
- InstanceType: r5.xlarge
MinCount: 0
MaxCount: 1
CapacityReservationTarget:
CapacityReservationResourceGroupArn: {{ target_capacity_reservation_arn }}
Networking:
SubnetIds:
- {{ public_subnet_id }}
- Name: pg-odcr-q
ComputeResources:
- Name: pg-odcr-id-cr
InstanceType: m5.xlarge
MinCount: 0
MaxCount: 1
Networking:
PlacementGroup:
Name: {{ placement_group }}
CapacityReservationTarget:
CapacityReservationId: {{ pg_capacity_reservation_id }}
- Name: pg-odcr-arn-cr
InstanceType: m5.xlarge
MinCount: 0
MaxCount: 1
Networking:
PlacementGroup:
Name: {{ placement_group }}
CapacityReservationTarget:
CapacityReservationResourceGroupArn: {{ pg_capacity_reservation_arn }}
- Name: pg-odcr-arn-fleet-cr
Instances:
- InstanceType: m5.xlarge
MinCount: 0
MaxCount: 1
Networking:
PlacementGroup:
Name: {{ placement_group }}
CapacityReservationTarget:
CapacityReservationResourceGroupArn: {{ pg_capacity_reservation_arn }}
Networking:
SubnetIds:
- {{ public_subnet_id }}

Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@ Scheduling:
- Name: open-odcr-q
ComputeResources:
- Name: open-odcr-id-cr
InstanceType: m5.2xlarge
MinCount: 1
MaxCount: 1
CapacityReservationTarget:
Expand Down
Loading