Skip to content

Commit 84276a2

Browse files
author
Himani Anil Deshpande
committed
[SlurmTopo] Updated unit tests for not generating a file if block_size is empty
1 parent 5828309 commit 84276a2

File tree

4 files changed

+68
-67
lines changed

4 files changed

+68
-67
lines changed

cookbooks/aws-parallelcluster-slurm/files/default/head_node_slurm/slurm/pcluster_topology_generator.py

Lines changed: 61 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -54,69 +54,69 @@ def generate_topology_config_file(output_file: str, input_file: str, block_sizes
5454
BlockName=block2 Nodes=queue-1-st-compute-resource-0-[1-18] #### 18 nodes
5555
BlockSizes=9,18
5656
"""
57-
58-
min_block_size_list = min(list(map(int, block_sizes.split(","))))
59-
max_block_size_list = max(list(map(int, block_sizes.split(","))))
60-
61-
cluster_config = _load_cluster_config(input_file)
62-
queue_name, compute_resource_name = None, None
63-
try:
64-
topology_config = CONFIG_HEADER + "\n"
65-
block_count = 0
66-
for queue_config in cluster_config["Scheduling"]["SlurmQueues"]:
67-
queue_name = queue_config["Name"]
68-
69-
# Retrieve capacity info from the queue_name, if there
70-
# queue_capacity_type = CAPACITY_TYPE_MAP.get(queue_config.get("CapacityType", "ONDEMAND"))
71-
# if queue_capacity_type != CAPACITY_TYPE_MAP.get("CAPACITY_BLOCK"):
72-
# log.info("ParallelCluster does not create topology for %s", queue_capacity_type)
73-
# continue
74-
75-
queue_capacity_reservation_target = queue_config.get("CapacityReservationTarget", {})
76-
queue_capacity_reservation = (
77-
queue_capacity_reservation_target.get("CapacityReservationId")
78-
if queue_capacity_reservation_target
79-
else None
80-
)
81-
82-
for compute_resource_config in queue_config["ComputeResources"]:
83-
compute_resource_name = compute_resource_config["Name"]
84-
compute_min_count = compute_resource_config["MinCount"]
85-
compute_max_count = compute_resource_config["MaxCount"]
86-
if compute_min_count == compute_max_count:
87-
node_type = "st"
88-
else:
89-
continue
90-
91-
capacity_reservation_target = compute_resource_config.get("CapacityReservationTarget", {})
92-
capacity_reservation = (
93-
capacity_reservation_target.get("CapacityReservationId", queue_capacity_reservation)
94-
if capacity_reservation_target
95-
else queue_capacity_reservation
57+
if block_sizes:
58+
min_block_size_list = min(list(map(int, block_sizes.split(","))))
59+
max_block_size_list = max(list(map(int, block_sizes.split(","))))
60+
61+
cluster_config = _load_cluster_config(input_file)
62+
queue_name, compute_resource_name = None, None
63+
try:
64+
topology_config = CONFIG_HEADER + "\n"
65+
block_count = 0
66+
for queue_config in cluster_config["Scheduling"]["SlurmQueues"]:
67+
queue_name = queue_config["Name"]
68+
69+
# Retrieve capacity info from the queue_name, if there
70+
# queue_capacity_type = CAPACITY_TYPE_MAP.get(queue_config.get("CapacityType", "ONDEMAND"))
71+
# if queue_capacity_type != CAPACITY_TYPE_MAP.get("CAPACITY_BLOCK"):
72+
# log.info("ParallelCluster does not create topology for %s", queue_capacity_type)
73+
# continue
74+
75+
queue_capacity_reservation_target = queue_config.get("CapacityReservationTarget", {})
76+
queue_capacity_reservation = (
77+
queue_capacity_reservation_target.get("CapacityReservationId")
78+
if queue_capacity_reservation_target
79+
else None
9680
)
97-
### Check for if reservation is for NVLink and size matches min_block_size_list
98-
# if compute_resource_config.get('InstanceType') == 'p6e-gb200.36xlarge':
99-
if min_block_size_list == compute_min_count or max_block_size_list == compute_max_count:
100-
block_count += 1
101-
### Each Capacity Reservation ID is a Capacity Block and we associate each slurm block with a single capacity Block
102-
topology_config += "BlockName=Block" + str(block_count)+ " Nodes=" + str(queue_name) + "-" + str(node_type) + "-" + str(compute_resource_name) + "-[1-" + str(compute_max_count) + "]\n"
103-
104-
topology_config += "BlockSizes="+ str(block_sizes)+"\n"
105-
except(KeyError, AttributeError) as e:
106-
if isinstance(e, KeyError):
107-
message = f"Unable to find key {e} in the configuration file."
108-
else:
109-
message = f"Error parsing configuration file. {e}. {traceback.format_exc()}."
110-
message += f" Queue: {queue_name}" if queue_name else ""
111-
log.error(message)
112-
raise CriticalError(message)
113-
114-
log.info("Writing Info %s", topology_config)
115-
log.info("Generating %s", output_file)
116-
with open(output_file, "w", encoding="utf-8") as output:
117-
output.write(topology_config)
11881

119-
log.info("Finished.")
82+
for compute_resource_config in queue_config["ComputeResources"]:
83+
compute_resource_name = compute_resource_config["Name"]
84+
compute_min_count = compute_resource_config["MinCount"]
85+
compute_max_count = compute_resource_config["MaxCount"]
86+
if compute_min_count == compute_max_count:
87+
node_type = "st"
88+
else:
89+
continue
90+
91+
capacity_reservation_target = compute_resource_config.get("CapacityReservationTarget", {})
92+
capacity_reservation = (
93+
capacity_reservation_target.get("CapacityReservationId", queue_capacity_reservation)
94+
if capacity_reservation_target
95+
else queue_capacity_reservation
96+
)
97+
### Check for if reservation is for NVLink and size matches min_block_size_list
98+
# if compute_resource_config.get('InstanceType') == 'p6e-gb200.36xlarge':
99+
if min_block_size_list == compute_min_count or max_block_size_list == compute_max_count:
100+
block_count += 1
101+
### Each Capacity Reservation ID is a Capacity Block and we associate each slurm block with a single capacity Block
102+
topology_config += "BlockName=Block" + str(block_count)+ " Nodes=" + str(queue_name) + "-" + str(node_type) + "-" + str(compute_resource_name) + "-[1-" + str(compute_max_count) + "]\n"
103+
104+
topology_config += "BlockSizes="+ str(block_sizes)+"\n"
105+
except(KeyError, AttributeError) as e:
106+
if isinstance(e, KeyError):
107+
message = f"Unable to find key {e} in the configuration file."
108+
else:
109+
message = f"Error parsing configuration file. {e}. {traceback.format_exc()}."
110+
message += f" Queue: {queue_name}" if queue_name else ""
111+
log.error(message)
112+
raise CriticalError(message)
113+
114+
log.info("Writing Info %s", topology_config)
115+
log.info("Generating %s", output_file)
116+
with open(output_file, "w", encoding="utf-8") as output:
117+
output.write(topology_config)
118+
119+
log.info("Finished.")
120120

121121

122122
def cleanup_topology_config_file(file_path):

test/unit/slurm/test_topology_generator.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
# limitations under the License.
1111

1212
import pytest
13+
import os
1314
from assertpy import assert_that
1415
from pcluster_topology_generator import (
1516
cleanup_topology_config_file,
@@ -27,13 +28,16 @@ def _assert_files_are_equal(file, expected_file):
2728
"no_capacity_block"
2829
])
2930
def test_generate_topology_config(test_datadir, tmpdir, file_name_suffix):
30-
block_sizes = "9,18" #if 'no' not in file_name_suffix else None
31+
block_sizes = "9,18" if 'no' not in file_name_suffix else None
3132
file_name = "sample_" + file_name_suffix + ".yaml"
3233
input_file_path = str(test_datadir / file_name)
3334
output_file_name = "topology_" + file_name_suffix + ".conf"
3435
output_file_path = f"{tmpdir}/{output_file_name}"
3536
generate_topology_config_file(output_file_path, input_file_path, block_sizes)
36-
_assert_files_are_equal(output_file_path, test_datadir / "expected_outputs" / output_file_name)
37+
if 'no' in file_name_suffix:
38+
assert_that(os.path.isfile(output_file_path)).is_equal_to(False)
39+
else:
40+
_assert_files_are_equal(output_file_path, test_datadir / "expected_outputs" / output_file_name)
3741

3842

3943
@pytest.mark.parametrize("file_exists", [

test/unit/slurm/test_topology_generator/test_generate_topology_config/expected_outputs/topology_no_capacity_block.conf

Lines changed: 0 additions & 3 deletions
This file was deleted.

test/unit/slurm/test_topology_generator/test_generate_topology_config/expected_outputs/topology_with_capacity_block.conf

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,4 +3,4 @@
33
BlockName=Block1 Nodes=capacity-block-queue1-st-cb-gb200-1-[1-9]
44
BlockName=Block2 Nodes=capacity-block-queue2-st-cb-gb200-2-[1-18]
55
BlockName=Block3 Nodes=capacity-block-queue2-st-cb-gb200-3-[1-9]
6-
BlockSizes=9,18
6+
BlockSizes=9,18

0 commit comments

Comments
 (0)