Skip to content

Commit

Permalink
Merge pull request #461 from nebius/release-1-18-2/0
Browse files Browse the repository at this point in the history
Release 1.18.2
  • Loading branch information
rdjjke authored Feb 18, 2025
2 parents 8d2265e + d859c67 commit ff2fef7
Show file tree
Hide file tree
Showing 17 changed files with 56 additions and 24 deletions.
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
1.18.1
1.18.2
6 changes: 6 additions & 0 deletions api/v1/slurmcluster_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,12 @@ type SlurmConfig struct {
// +kubebuilder:validation:Optional
// +kubebuilder:default=86400
MinJobAge *int32 `json:"minJobAge,omitempty"`
// MessageTimeout specifies the permitted time for a round-trip communication to complete in seconds.
// See https://slurm.schedmd.com/slurm.conf.html#OPT_MessageTimeout.
//
// +kubebuilder:validation:Optional
// +kubebuilder:default=30
MessageTimeout *int32 `json:"messageTimeout,omitempty"`
}

type MPIConfig struct {
Expand Down
5 changes: 5 additions & 0 deletions api/v1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 7 additions & 0 deletions config/crd/bases/slurm.nebius.ai_slurmclusters.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1541,6 +1541,13 @@ spec:
description: Keep N last jobs in controller memory
format: int32
type: integer
messageTimeout:
default: 30
description: |-
MessageTimeout specifies the permitted time for a round-trip communication to complete in seconds.
See https://slurm.schedmd.com/slurm.conf.html#OPT_MessageTimeout.
format: int32
type: integer
minJobAge:
default: 86400
description: Don't remove jobs from controller memory after some
Expand Down
2 changes: 1 addition & 1 deletion config/manager/kustomization.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@ resources:
images:
- name: controller
newName: cr.eu-north1.nebius.cloud/soperator/slurm-operator
newTag: 1.18.1
newTag: 1.18.2
2 changes: 1 addition & 1 deletion config/manager/manager.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ spec:
value: "false"
- name: SLURM_OPERATOR_WATCH_NAMESPACES
value: "*"
image: controller:1.18.1
image: controller:1.18.2
imagePullPolicy: Always
name: manager
securityContext:
Expand Down
4 changes: 2 additions & 2 deletions helm/slurm-cluster-storage/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,5 @@ apiVersion: v2
name: helm-slurm-cluster-storage
description: A Helm chart for Kubernetes
type: application
version: "1.18.1"
appVersion: "1.18.1"
version: "1.18.2"
appVersion: "1.18.2"
4 changes: 2 additions & 2 deletions helm/slurm-cluster/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,6 @@ apiVersion: v2
name: helm-slurm-cluster
description: A Helm chart for Kubernetes
type: application
version: "1.18.1"
appVersion: "1.18.1"
version: "1.18.2"
appVersion: "1.18.2"
kubeVersion: ">=1.29.0-0"
18 changes: 9 additions & 9 deletions helm/slurm-cluster/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -408,14 +408,14 @@ telemetry: {}
# otelCollectorPort: 8429

images:
slurmctld: "cr.eu-north1.nebius.cloud/soperator/controller_slurmctld:1.18.1-jammy-slurm24.05.5"
slurmrestd: "cr.eu-north1.nebius.cloud/soperator/slurmrestd:1.18.1-jammy-slurm24.05.5"
slurmd: "cr.eu-north1.nebius.cloud/soperator/worker_slurmd:1.18.1-jammy-slurm24.05.5"
sshd: "cr.eu-north1.nebius.cloud/soperator/login_sshd:1.18.1-jammy-slurm24.05.5"
munge: "cr.eu-north1.nebius.cloud/soperator/munge:1.18.1-jammy-slurm24.05.5"
populateJail: "cr.eu-north1.nebius.cloud/soperator/populate_jail:1.18.1-jammy-slurm24.05.5"
ncclBenchmark: "cr.eu-north1.nebius.cloud/soperator/nccl_benchmark:1.18.1-jammy-slurm24.05.5"
slurmdbd: "cr.eu-north1.nebius.cloud/soperator/controller_slurmdbd:1.18.1-jammy-slurm24.05.5"
exporter: "cr.eu-north1.nebius.cloud/soperator/exporter:1.18.1-jammy-slurm24.05.5"
slurmctld: "cr.eu-north1.nebius.cloud/soperator/controller_slurmctld:1.18.2-jammy-slurm24.05.5"
slurmrestd: "cr.eu-north1.nebius.cloud/soperator/slurmrestd:1.18.2-jammy-slurm24.05.5"
slurmd: "cr.eu-north1.nebius.cloud/soperator/worker_slurmd:1.18.2-jammy-slurm24.05.5"
sshd: "cr.eu-north1.nebius.cloud/soperator/login_sshd:1.18.2-jammy-slurm24.05.5"
munge: "cr.eu-north1.nebius.cloud/soperator/munge:1.18.2-jammy-slurm24.05.5"
populateJail: "cr.eu-north1.nebius.cloud/soperator/populate_jail:1.18.2-jammy-slurm24.05.5"
ncclBenchmark: "cr.eu-north1.nebius.cloud/soperator/nccl_benchmark:1.18.2-jammy-slurm24.05.5"
slurmdbd: "cr.eu-north1.nebius.cloud/soperator/controller_slurmdbd:1.18.2-jammy-slurm24.05.5"
exporter: "cr.eu-north1.nebius.cloud/soperator/exporter:1.18.2-jammy-slurm24.05.5"
mariaDB: "docker-registry1.mariadb.com/library/mariadb:11.4.3"
rebooter: "cr.eu-north1.nebius.cloud/soperator/rebooter:1.17.0"
4 changes: 2 additions & 2 deletions helm/soperator-crds/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,6 @@ apiVersion: v2
name: helm-soperator-crds
description: A Helm chart for Kubernetes
type: application
version: 1.18.1
appVersion: "1.18.1"
version: 1.18.2
appVersion: "1.18.2"
kubeVersion: ">=1.29.0-0"
7 changes: 7 additions & 0 deletions helm/soperator-crds/templates/slurmcluster-crd.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1540,6 +1540,13 @@ spec:
description: Keep N last jobs in controller memory
format: int32
type: integer
messageTimeout:
default: 30
description: |-
MessageTimeout specifies the permitted time for a round-trip communication to complete in seconds.
See https://slurm.schedmd.com/slurm.conf.html#OPT_MessageTimeout.
format: int32
type: integer
minJobAge:
default: 86400
description: Don't remove jobs from controller memory after some
Expand Down
4 changes: 2 additions & 2 deletions helm/soperator/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,6 @@ apiVersion: v2
name: helm-soperator
description: A Helm chart for Kubernetes
type: application
version: 1.18.1
appVersion: "1.18.1"
version: 1.18.2
appVersion: "1.18.2"
kubeVersion: ">=1.29.0-0"
7 changes: 7 additions & 0 deletions helm/soperator/crds/slurmcluster-crd.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1540,6 +1540,13 @@ spec:
description: Keep N last jobs in controller memory
format: int32
type: integer
messageTimeout:
default: 30
description: |-
MessageTimeout specifies the permitted time for a round-trip communication to complete in seconds.
See https://slurm.schedmd.com/slurm.conf.html#OPT_MessageTimeout.
format: int32
type: integer
minJobAge:
default: 86400
description: Don't remove jobs from controller memory after some
Expand Down
2 changes: 1 addition & 1 deletion helm/soperator/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ controllerManager:
slurmOperatorWatchNamespaces: '*'
image:
repository: cr.eu-north1.nebius.cloud/soperator/slurm-operator
tag: 1.18.1
tag: 1.18.2
imagePullPolicy: Always
resources:
limits:
Expand Down
2 changes: 1 addition & 1 deletion images/common/scripts/complement_jail.sh
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,6 @@ pushd "${jaildir}"
# For $worker node only
if [ -n "$worker" ]; then
echo "Update linker cache inside the jail"
flock etc/complement_jail_ldconfig.lock -c "chroot \"${jaildir}\" /usr/sbin/ldconfig"
flock --nonblock etc/complement_jail_ldconfig.lock -c "chroot \"${jaildir}\" /usr/sbin/ldconfig" || true
fi
popd
2 changes: 1 addition & 1 deletion internal/check/maintanence.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ package check
import "nebius.ai/slurm-operator/internal/consts"

func IsMaintenanceActive(maintenance *consts.MaintenanceMode) bool {
return maintenance != nil && *maintenance != consts.ModeNone
return maintenance != nil && *maintenance != consts.ModeNone && *maintenance != consts.ModeSkipPopulate
}

func IsModeDownscaleAndDeletePopulate(maintenance *consts.MaintenanceMode) bool {
Expand Down
2 changes: 1 addition & 1 deletion internal/consts/version.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,5 @@
package consts

const (
VersionCR = "1.18.1"
VersionCR = "1.18.2"
)

0 comments on commit ff2fef7

Please sign in to comment.