Skip to content

Commit

Permalink
Merge pull request #440 from nebius/dev
Browse files Browse the repository at this point in the history
Soperator release 1.18.0
  • Loading branch information
rdjjke authored Feb 13, 2025
2 parents 9b33f35 + 1b65493 commit b580718
Show file tree
Hide file tree
Showing 151 changed files with 4,604 additions and 1,434 deletions.
2 changes: 1 addition & 1 deletion .github/dependabot.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ updates:
target-branch: "dev"

- package-ecosystem: gomod
directory: /images/jail/gpubench
directory: /images/worker/gpubench
schedule:
interval: daily
target-branch: "dev"
17 changes: 7 additions & 10 deletions .github/workflows/gpubench_only.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ name: Build gpubench only
on:
push:
paths:
- 'images/jail/gpubench/**'
- 'images/worker/gpubench/**'

permissions:
contents: read
Expand All @@ -20,7 +20,7 @@ jobs:

steps:
- name: Harden Runner
uses: step-security/harden-runner@0080882f6c36860b6ba35c610c98ce87d4e2f26f # v2.10.2
uses: step-security/harden-runner@cb605e52c26070c328afc4562f0b4ada7618a84e # v2.10.4
with:
egress-policy: audit

Expand All @@ -43,15 +43,15 @@ jobs:

steps:
- name: Harden Runner
uses: step-security/harden-runner@0080882f6c36860b6ba35c610c98ce87d4e2f26f # v2.10.2
uses: step-security/harden-runner@cb605e52c26070c328afc4562f0b4ada7618a84e # v2.10.4
with:
egress-policy: audit

- name: Checkout repository
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2

- name: Install GO
uses: actions/setup-go@3041bf56c941b39c61721a86cd11f3bb1338122a # v5.2.0
uses: actions/setup-go@f111f3307d8850f501ac008e886eec1fd1932a34 # v5.3.0
with:
go-version-file: 'go.mod'

Expand All @@ -64,10 +64,10 @@ jobs:
run: make test-version-sync

- name: Set up Docker Buildx
uses: docker/setup-buildx-action@6524bf65af31da8d45b59e8c27de4bd072b392f5 # v3.8.0
uses: docker/setup-buildx-action@f7ce87c1d6bead3e36075b2ce75da1f6cc28aaca # v3.9.0

- name: Log in to the Github Container registry
uses: docker/login-action@7ca345011ac4304463197fac0e56eab1bc7e6af0
uses: docker/login-action@327cd5a69de6c009b9ce71bce8395f28e651bf99
with:
registry: ghcr.io
username: ${{ github.actor }}
Expand All @@ -81,12 +81,9 @@ jobs:
OPERATOR_IMAGE_TAG=$(make get-operator-tag-version UNSTABLE=${UNSTABLE})
echo "Running gpubench tests"
cd ./images/jail/gpubench/
cd ./images/worker/gpubench/
go test
cd -
echo "Removing previous jail rootfs tar archive"
rm -rf images/jail_rootfs.tar
echo "Building tarball for jail"
make docker-build UNSTABLE="${UNSTABLE}" IMAGE_NAME=jail DOCKERFILE=jail/jail.dockerfile DOCKER_OUTPUT="--output type=tar,dest=jail_rootfs.tar"
19 changes: 10 additions & 9 deletions .github/workflows/one_job.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,8 @@ on:
- 'PROJECT'
- 'README.md'
- 'SECURITY.md'
- 'images/jail/gpubench/**'
pull_request:
branches:
- main
- 'images/worker/gpubench/**'


permissions:
contents: read
Expand All @@ -30,7 +28,7 @@ jobs:

steps:
- name: Harden Runner
uses: step-security/harden-runner@0080882f6c36860b6ba35c610c98ce87d4e2f26f # v2.10.2
uses: step-security/harden-runner@cb605e52c26070c328afc4562f0b4ada7618a84e # v2.10.4
with:
egress-policy: audit

Expand All @@ -54,15 +52,15 @@ jobs:

steps:
- name: Harden Runner
uses: step-security/harden-runner@0080882f6c36860b6ba35c610c98ce87d4e2f26f # v2.10.2
uses: step-security/harden-runner@cb605e52c26070c328afc4562f0b4ada7618a84e # v2.10.4
with:
egress-policy: audit

- name: Checkout repository
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2

- name: Install GO
uses: actions/setup-go@3041bf56c941b39c61721a86cd11f3bb1338122a # v5.2.0
uses: actions/setup-go@f111f3307d8850f501ac008e886eec1fd1932a34 # v5.3.0
with:
go-version-file: 'go.mod'

Expand All @@ -75,10 +73,10 @@ jobs:
run: make test-version-sync

- name: Set up Docker Buildx
uses: docker/setup-buildx-action@6524bf65af31da8d45b59e8c27de4bd072b392f5 # v3.8.0
uses: docker/setup-buildx-action@f7ce87c1d6bead3e36075b2ce75da1f6cc28aaca # v3.9.0

- name: Log in to the Github Container registry
uses: docker/login-action@7ca345011ac4304463197fac0e56eab1bc7e6af0
uses: docker/login-action@327cd5a69de6c009b9ce71bce8395f28e651bf99
with:
registry: ghcr.io
username: ${{ github.actor }}
Expand Down Expand Up @@ -120,6 +118,9 @@ jobs:
make docker-build UNSTABLE="${UNSTABLE}" IMAGE_NAME=slurmrestd DOCKERFILE=restd/slurmrestd.dockerfile
make docker-push UNSTABLE="${UNSTABLE}" IMAGE_NAME=slurmrestd
make docker-build UNSTABLE="${UNSTABLE}" IMAGE_NAME=rebooter DOCKERFILE=rebooter.dockerfile IMAGE_VERSION="$OPERATOR_IMAGE_TAG"
make docker-push UNSTABLE="${UNSTABLE}" IMAGE_NAME=rebooter IMAGE_VERSION="$OPERATOR_IMAGE_TAG"
echo "Common images were built"
echo "Removing previous jail rootfs tar archive"
Expand Down
4 changes: 2 additions & 2 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM golang:1.23@sha256:7ea4c9dcb2b97ff8ee80a67db3d44f98c8ffa0d191399197007d8459c1453041 AS operator_builder
FROM golang:1.23@sha256:927112936d6b496ed95f55f362cc09da6e3e624ef868814c56d55bd7323e0959 AS operator_builder

ARG GO_LDFLAGS=""
ARG BUILD_TIME
Expand All @@ -16,7 +16,7 @@ RUN GOOS=$GOOS GOARCH=$GOARCH CGO_ENABLED=$CGO_ENABLED GO_LDFLAGS=$GO_LDFLAGS \
go build -o slurm_operator ./cmd/

#######################################################################################################################
FROM alpine:latest@sha256:b97e2a89d0b9e4011bb88c02ddf01c544b8c781acf1f4d559e7c8f12f1047ac3 AS slurm-operator
FROM alpine:latest@sha256:56fa17d2a7e7f168a043a2712e63aed1f8543aeafdcee47c58dcffe38ed51099 AS slurm-operator

COPY --from=operator_builder /operator/slurm_operator /usr/bin/

Expand Down
11 changes: 7 additions & 4 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,15 @@ SHELL = /usr/bin/env bash -o pipefail
.SHELLFLAGS = -ec

# Limit the scope of generation otherwise it will try to generate configs for non-controller code
GENPATH = "./api/v1;./internal/controller/..."
GENPATH = "./api/v1;"

CHART_PATH = helm
CHART_OPERATOR_PATH = $(CHART_PATH)/soperator
CHART_OPERATOR_CRDS_PATH = $(CHART_PATH)/soperator-crds
CHART_CLUSTER_PATH = $(CHART_PATH)/slurm-cluster
CHART_STORAGE_PATH = $(CHART_PATH)/slurm-cluster-storage

SLURM_VERSION = 24.05.2
SLURM_VERSION = 24.05.5
UBUNTU_VERSION = jammy
VERSION = $(shell cat VERSION)

Expand Down Expand Up @@ -78,8 +78,9 @@ help: ## Display this help.

.PHONY: manifests
manifests: controller-gen ## Generate WebhookConfiguration, ClusterRole and CustomResourceDefinition objects.
$(CONTROLLER_GEN) rbac:roleName=manager-role crd webhook paths=$(GENPATH) output:crd:artifacts:config=config/crd/bases

$(CONTROLLER_GEN) crd webhook paths=$(GENPATH) output:crd:artifacts:config=config/crd/bases
$(CONTROLLER_GEN) rbac:roleName=manager-role paths="./internal/controller/clustercontroller/..." output:artifacts:config=config/rbac/clustercontroller/
$(CONTROLLER_GEN) rbac:roleName=node-configurator-role paths="./internal/rebooter/..." output:artifacts:config=config/rbac/node-configurator/
.PHONY: generate
generate: controller-gen ## Generate code containing DeepCopy, DeepCopyInto, and DeepCopyObject method implementations.
$(CONTROLLER_GEN) object paths=$(GENPATH)
Expand Down Expand Up @@ -229,6 +230,8 @@ ifndef DOCKERFILE
endif
ifeq (${IMAGE_NAME},slurm-operator)
docker build $(DOCKER_BUILD_ARGS) --tag $(IMAGE_REPO)/${IMAGE_NAME}:${IMAGE_VERSION} --target ${IMAGE_NAME} ${DOCKER_IGNORE_CACHE} ${DOCKER_LOAD} ${DOCKER_BUILD_PLATFORM} -f ${DOCKERFILE} ${DOCKER_OUTPUT} .
else ifeq ($(IMAGE_NAME),rebooter)
docker build $(DOCKER_BUILD_ARGS) --tag $(IMAGE_REPO)/${IMAGE_NAME}:${IMAGE_VERSION} --target ${IMAGE_NAME} ${DOCKER_IGNORE_CACHE} ${DOCKER_LOAD} ${DOCKER_BUILD_PLATFORM} -f ${DOCKERFILE} ${DOCKER_OUTPUT} .
else
cd images && docker build $(DOCKER_BUILD_ARGS) --tag $(IMAGE_REPO)/${IMAGE_NAME}:${IMAGE_VERSION} --target ${IMAGE_NAME} ${DOCKER_IGNORE_CACHE} ${DOCKER_LOAD} ${DOCKER_BUILD_PLATFORM} -f ${DOCKERFILE} ${DOCKER_OUTPUT} .
endif
Expand Down
9 changes: 9 additions & 0 deletions PROJECT
Original file line number Diff line number Diff line change
Expand Up @@ -25,4 +25,13 @@ resources:
webhooks:
validation: true
webhookVersion: v1
- api:
crdVersion: v1
namespaced: true
controller: true
domain: nebius.ai
group: slurm
kind: NodeConfigurator
path: nebius.ai/slurm-operator/api/v1alpha1
version: v1alpha1
version: "3"
13 changes: 6 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -107,15 +107,14 @@ This helps cluster administrators and users monitor resource utilization, enforc


## ❌ Limitations
- **GPUs are required**. Although support for CPU-only clusters or partitions seems pretty straightforward, we haven't
implemented it yet.
- **GPU-only or CPU-only**.The cluster of Slurm can currently be either GPU-only or CPU-only.
Support for mixed configurations based on nodesets (e.g., separate GPU and CPU nodesets) has not been implemented yet.
- **Single-partition clusters**. Slurm's ability to split clusters into several partitions isn't supported now.
- **Software versions**. The list of software versions we currently support is quite short.
- Linux: Ubuntu [20.04](https://releases.ubuntu.com/focal/) and
[22.04](https://releases.ubuntu.com/jammy/).
- Slurm: versions `23.11.6` and `24.05.3`.
- CUDA: version [12.2.2](https://developer.nvidia.com/cuda-12-2-2-download-archive).
- Kubernetes: >= [1.29](https://kubernetes.io/blog/2023/08/15/kubernetes-v1-28-release/).
- Linux: Ubuntu [22.04](https://releases.ubuntu.com/jammy/).
- Slurm: versions `24.05.5`.
- CUDA: version [12.4.1](https://developer.nvidia.com/cuda-12-4-1-download-archive).
- Kubernetes: >= [1.29](https://kubernetes.io/blog/2023/12/13/kubernetes-v1-29-release/).
- Versions of some preinstalled software packages can't be changed.


Expand Down
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
1.17.0
1.18.0
101 changes: 98 additions & 3 deletions api/v1/slurmcluster_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,11 @@ type SlurmClusterSpec struct {
// - none: No maintenance is performed. The cluster operates normally.
// - downscale: Scales down all components to 0.
// - downscaleAndDeletePopulateJail: Scales down all components to 0 and deletes the kubernetes Kind Jobs populateJail.
// - downscaleAndOverwritePopulateJail: Scales down all components to 0 and overwrite populateJail (same as overwrite=true).
// - skipPopulateJail: Skips the execution of the populateJail job during maintenance.
//
// +kubebuilder:validation:Optional
// +kubebuilder:validation:Enum=none;downscale;downscaleAndDeletePopulateJail;skipPopulateJail
// +kubebuilder:validation:Enum=none;downscale;downscaleAndDeletePopulateJail;downscaleAndOverwritePopulateJail;skipPopulateJail
// +kubebuilder:default="none"
Maintenance *consts.MaintenanceMode `json:"maintenance,omitempty"`

Expand Down Expand Up @@ -83,8 +84,15 @@ type SlurmClusterSpec struct {
// SlurmConfig represents the Slurm configuration in slurm.conf. Not all options are supported.
//
// +kubebuilder:validation:Optional
// +kubebuilder:default={defMemPerNode: 1228800, defCpuPerGPU: 16, completeWait: 5, debugFlags: "Cgroup,CPU_Bind,Gres,JobComp,Priority,Script,SelectType,Steps,TraceJobs", taskPluginParam: "", maxJobCount: 10000, minJobAge: 86400}
// +kubebuilder:default={defMemPerNode: 1228800, defCpuPerGPU: 16, completeWait: 5, debugFlags: "Cgroup,CPU_Bind,Gres,JobComp,Priority,Script,SelectType,Steps,TraceJobs", epilog: "", prolog: "", taskPluginParam: "", maxJobCount: 10000, minJobAge: 86400}
SlurmConfig SlurmConfig `json:"slurmConfig,omitempty"`

// MPIConfig represents the PMIx configuration in mpi.conf. Not all options are supported.
//
// +kubebuilder:validation:Optional
// +kubebuilder:default={pmixEnv: "OMPI_MCA_btl_tcp_if_include=eth0"}
MPIConfig MPIConfig `json:"mpiConfig,omitempty"`

// Generate and set default AppArmor profile for the Slurm worker and login nodes. The Security Profiles Operator must be installed.
//
// +kubebuilder:default=false
Expand Down Expand Up @@ -114,6 +122,16 @@ type SlurmConfig struct {
// +kubebuilder:default="Cgroup,CPU_Bind,Gres,JobComp,Priority,Script,SelectType,Steps,TraceJobs"
// +kubebuilder:validation:Pattern="^((Accrue|Agent|AuditRPCs|Backfill|BackfillMap|BurstBuffer|Cgroup|ConMgr|CPU_Bind|CpuFrequency|Data|DBD_Agent|Dependency|Elasticsearch|Energy|Federation|FrontEnd|Gres|Hetjob|Gang|GLOB_SILENCE|JobAccountGather|JobComp|JobContainer|License|Network|NetworkRaw|NodeFeatures|NO_CONF_HASH|Power|Priority|Profile|Protocol|Reservation|Route|Script|SelectType|Steps|Switch|TLS|TraceJobs|Triggers)(,)?)+$"
DebugFlags *string `json:"debugFlags,omitempty"`
// Defines specific file to run the epilog when job ends. Default value is no epilog
//
// +kubebuilder:validation:Optional
// +kubebuilder:default=""
Epilog *string `json:"epilog,omitempty"`
// Defines specific file to run the prolog when job starts. Default value is no prolog
//
// +kubebuilder:validation:Optional
// +kubebuilder:default=""
Prolog *string `json:"prolog,omitempty"`
// Additional parameters for the task plugin
//
// +kubebuilder:validation:Optional
Expand All @@ -132,6 +150,16 @@ type SlurmConfig struct {
MinJobAge *int32 `json:"minJobAge,omitempty"`
}

type MPIConfig struct {
// Semicolon separated list of environment variables to be set in job environments to be used by PMIx.
// Defaults to "OMPI_MCA_btl_tcp_if_include=eth0" to avoid "lo" and "docker" interfaces to be selected by OpenMPI.
//
// +kubebuilder:validation:Optional
// +kubebuilder:default="OMPI_MCA_btl_tcp_if_include=eth0"
// +kubebuilder:validation:Optional
PMIxEnv string `json:"pmixEnv,omitempty"`
}

type PartitionConfiguration struct {
// ConfigType
// +kubebuilder:validation:Enum=default;custom
Expand Down Expand Up @@ -308,7 +336,8 @@ type NCCLArguments struct {
// +kubebuilder:default="0"
ThresholdMoreThan string `json:"thresholdMoreThan,omitempty"`

// UseInfiniband defines using NCCL_P2P_DISABLE=1 NCCL_SHM_DISABLE=1 NCCL_ALGO=Ring env variables for test
// UseInfiniband defines using NCCL_P2P_DISABLE=1 NCCL_SHM_DISABLE=1 NCCL_ALGO=Ring env variables for test.
// According to NVIDIA these env vars should be used only for debugging.
// https://docs.nvidia.com/deeplearning/nccl/user-guide/docs/env.html
//
// +kubebuilder:validation:Optional
Expand Down Expand Up @@ -570,6 +599,8 @@ type SlurmdbdConfig struct {

type AccountingSlurmConf struct {
// +kubebuilder:validation:Optional
// +kubebuilder:validation:Pattern="^((Billing|CPU|Mem|VMem|Node|Energy|Pages|FS/Disk|FS/Lustre|Gres/gpu|Gres/gpu:tesla|Gres/gpu:volta)(,)?)+$"
// +kubebuilder:default="Billing,CPU,Mem,Node,VMem"
AccountingStorageTRES *string `json:"accountingStorageTRES,omitempty"`
// +kubebuilder:validation:Optional
AccountingStoreFlags *string `json:"accountingStoreFlags,omitempty"`
Expand All @@ -581,6 +612,7 @@ type AccountingSlurmConf struct {
AcctGatherProfileType *string `json:"acctGatherProfileType,omitempty"`
// +kubebuilder:validation:Optional
// +kubebuilder:validation:Enum="jobacct_gather/linux";"jobacct_gather/cgroup";"jobacct_gather/none"
// +kubebuilder:default="jobacct_gather/cgroup"
JobAcctGatherType *string `json:"jobAcctGatherType,omitempty"`
// +kubebuilder:validation:Optional
// +kubebuilder:default=30
Expand Down Expand Up @@ -681,6 +713,68 @@ type SlurmNodeWorker struct {
//
// +kubebuilder:validation:Optional
SlurmNodeExtra string `json:"slurmNodeExtra,omitempty"`

// PriorityClass defines the priority class for the Slurm worker node
//
// +kubebuilder:validation:Optional
PriorityClass string `json:"priorityClass,omitempty"`
// It's alpha feature and will be moved to separate CRD in the future
// Rebooter defines the configuration for the Slurm worker node rebooter
//
// +kubebuilder:validation:Optional
Rebooter Rebooter `json:"rebooter"`
}

// Rebooter defines the configuration for the Slurm worker node rebooter
type Rebooter struct {
// enabled defines whether the rebooter is enabled
//
// +kubebuilder:validation:Optional
// +kubebuilder:default=false
Enabled bool `json:"enabled"`

// Image defines the rebooter container image
//
// +kubebuilder:validation:Optional
Image string `json:"image"`

// imagePullPolicy defines the image pull policy
//
// +kubebuilder:validation:Enum=Always;Never;IfNotPresent
// +kubebuilder:validation:Optional
// +kubebuilder:default="IfNotPresent"
ImagePullPolicy corev1.PullPolicy `json:"imagePullPolicy,omitempty"`

// Resources defines the [corev1.ResourceRequirements] for the container
//
// +kubebuilder:validation:Optional
Resources corev1.ResourceList `json:"resources,omitempty"`

// evictionMethod defines the method of eviction for the Slurm worker node
// Must be one of [drain, evict]. Now only evict is supported
//
// +kubebuilder:validation:Optional
// +kubebuilder:validation:Enum="evict"
// +kubebuilder:default="evict"
EvictionMethod string `json:"evictionMethod,omitempty"`

// logLevel defines the log level for the rebooter
//
// +kubebuilder:validation:Optional
// +kubebuilder:default="info"
// +kubebuilder:validation:Enum="debug";"info";"warn";"error"
LogLevel string `json:"logLevel,omitempty"`

// Namespace defines the namespace where the rebooter will be deployed
// By default, the same namespace as the soperator
//
// +kubebuilder:validation:Optional
Namespace string `json:"namespace,omitempty"`

// serviceAccountName defines the service account name for the rebooter
//
// +kubebuilder:validation:Optional
ServiceAccountName string `json:"serviceAccountName,omitempty"`
}

// SlurmNodeWorkerVolumes defines the volumes for the Slurm worker node
Expand Down Expand Up @@ -1019,6 +1113,7 @@ const (
ConditionClusterWorkersAvailable = "WorkersAvailable"
ConditionClusterLoginAvailable = "LoginAvailable"
ConditionClusterAccountingAvailable = "AccountingAvailable"
ConditionClusterPopulateJailMode = "PopulateJailMode"

PhaseClusterReconciling = "Reconciling"
PhaseClusterNotAvailable = "Not available"
Expand Down
Loading

0 comments on commit b580718

Please sign in to comment.