diff --git a/clusterscope/cli.py b/clusterscope/cli.py index 96d1cc4..4bbcb43 100644 --- a/clusterscope/cli.py +++ b/clusterscope/cli.py @@ -195,7 +195,33 @@ def task(): default="json", help="Format to output the job requirements in", ) -def slurm(num_gpus: int, num_tasks_per_node: int, output_format: str, partition: str): +@click.option( + "--account", + type=str, + default=None, + help="SLURM account to charge resources to (optional)", +) +@click.option( + "--qos", + type=str, + default=None, + help="Quality of Service (QoS) specification for the job (optional)", +) +@click.option( + "--time", + type=str, + default=None, + help="Time limit for the job (format: HH:MM:SS or days-HH:MM:SS, optional)", +) +def slurm( + num_gpus: int, + num_tasks_per_node: int, + output_format: str, + partition: str, + account: str, + qos: str, + time: str, +): """Generate job requirements for a task of a Slurm job.""" partitions = get_partition_info() partition_names = [p.name for p in partitions] @@ -210,6 +236,9 @@ def slurm(num_gpus: int, num_tasks_per_node: int, output_format: str, partition: partition=partition, num_gpus=num_gpus, num_tasks_per_node=num_tasks_per_node, + account=account, + qos=qos, + time=time, ) # Route to the correct format method based on CLI option diff --git a/clusterscope/cluster_info.py b/clusterscope/cluster_info.py index 1b03e3e..12aabe0 100644 --- a/clusterscope/cluster_info.py +++ b/clusterscope/cluster_info.py @@ -27,6 +27,9 @@ class ResourceShape(NamedTuple): tasks_per_node: int gpus_per_node: int slurm_partition: str + account: Optional[str] = None + qos: Optional[str] = None + time: Optional[str] = None def to_json(self) -> str: """Convert ResourceShape to JSON format. @@ -34,16 +37,8 @@ def to_json(self) -> str: Returns: str: JSON representation of the resource requirements """ - mem_gb = parse_memory_to_gb(self.memory) - - data = { - "cpu_cores": self.cpu_cores, - "memory": self.memory, - "tasks_per_node": self.tasks_per_node, - "mem_gb": mem_gb, - "gpus_per_node": self.gpus_per_node, - "slurm_partition": self.slurm_partition, - } + data = {k: v for k, v in self._asdict().items() if v is not None} + data["mem_gb"] = parse_memory_to_gb(data["memory"]) return json.dumps(data, indent=2) def to_sbatch(self) -> str: @@ -60,6 +55,10 @@ def to_sbatch(self) -> str: f"#SBATCH --gres=gpu:{self.gpus_per_node}", f"#SBATCH --partition={self.slurm_partition}", ] + for attr_name in ["account", "qos", "time"]: + value = getattr(self, attr_name) + if value is not None: + lines.append(f"#SBATCH --{attr_name}={value}") return "\n".join(lines) def to_srun(self) -> str: @@ -76,6 +75,10 @@ def to_srun(self) -> str: f"--gres=gpu:{self.gpus_per_node}", f"--partition={self.slurm_partition}", ] + for attr_name in ["account", "qos", "time"]: + value = getattr(self, attr_name) + if value is not None: + cmd_parts.append(f"--{attr_name}={value}") return " ".join(cmd_parts) def to_submitit(self) -> str: @@ -90,9 +93,18 @@ def to_submitit(self) -> str: "slurm_partition": self.slurm_partition, "cpus_per_task": self.cpu_cores, "mem_gb": mem_gb, - "tasks_per_node": self.tasks_per_node, - "gpus_per_node": self.gpus_per_node, } + for attr_name in [ + "slurm_partition", + "tasks_per_node", + "gpus_per_node", + "account", + "qos", + "time", + ]: + value = getattr(self, attr_name) + if value is not None: + params[attr_name] = value return json.dumps(params, indent=2) @@ -232,7 +244,11 @@ def get_total_gpus_per_node(self) -> int: return max(total_gpus, 1) # Ensure at least 1 to avoid division by zero def get_task_resource_requirements( - self, partition: str, num_gpus: int, num_tasks_per_node: int = 1 + self, + partition: str, + num_gpus: int, + num_tasks_per_node: int = 1, + **kwargs, ) -> ResourceShape: """Calculate resource requirements for better GPU packing based on node's GPU configuration. @@ -300,6 +316,7 @@ def get_task_resource_requirements( memory=sbatch_memory, tasks_per_node=num_tasks_per_node, gpus_per_node=num_gpus, + **kwargs, ) def get_array_job_requirements(