Skip to content

Commit

Permalink
Merge pull request #168 from nebius/dev
Browse files Browse the repository at this point in the history
Release 1.15.1
  • Loading branch information
asteny authored Nov 7, 2024
2 parents 0574392 + f0b8b84 commit 8766049
Show file tree
Hide file tree
Showing 94 changed files with 1,767 additions and 169 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/github_release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ jobs:

- name: Generate changelog
id: changelog
uses: mikepenz/release-changelog-builder-action@f3fc77b47b74e78971fffecb2102ae6eac9a44d6 # v5
uses: mikepenz/release-changelog-builder-action@a57c1b7c90e56d9c8b26a6ed5d1eed159369e117 # v5
with:
mode: "PR"
fromTag: ${{ needs.tag.outputs.previous-tag }}
Expand Down Expand Up @@ -117,7 +117,7 @@ jobs:
token: ${{ secrets.GITHUB_TOKEN }}

- name: Create GitHub Release with changelog
uses: softprops/action-gh-release@c062e08bd532815e2082a85e87e3ef29c3e6d191 # v2.0.8
uses: softprops/action-gh-release@e7a8f85e1c67a31e6ed99a94b41bd0b71bbee6b8 # v2.0.9
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
with:
Expand Down
13 changes: 9 additions & 4 deletions .github/workflows/one_job.yml
Original file line number Diff line number Diff line change
Expand Up @@ -52,9 +52,6 @@ jobs:
with:
egress-policy: audit

- name: Debug vars
run: echo "UNSTABLE - is ${{ needs.pre-build.outputs.unstable }}"

- name: Checkout repository
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2

Expand All @@ -63,14 +60,19 @@ jobs:
with:
go-version-file: 'go.mod'

- name: Debug vars
run: |
echo "UNSTABLE - is ${{ needs.pre-build.outputs.unstable }}"
make get-version UNSTABLE=${{ needs.pre-build.outputs.unstable }}
- name: Check if version synced
run: make test-version-sync

- name: Set up Docker Buildx
uses: docker/setup-buildx-action@c47758b77c9736f4b2ef4073d4d51994fabfe349 # v3.7.1

- name: Log in to the Github Container registry
uses: docker/login-action@06895751d15a223ec091bea144ad5c7f50d228d0
uses: docker/login-action@7ca345011ac4304463197fac0e56eab1bc7e6af0
with:
registry: ghcr.io
username: ${{ github.actor }}
Expand Down Expand Up @@ -108,6 +110,9 @@ jobs:
make docker-build UNSTABLE="${UNSTABLE}" IMAGE_NAME=exporter DOCKERFILE=exporter/exporter.dockerfile
make docker-push UNSTABLE="${UNSTABLE}" IMAGE_NAME=exporter
make docker-build UNSTABLE="${UNSTABLE}" IMAGE_NAME=slurmrestd DOCKERFILE=restd/slurmrestd.dockerfile
make docker-push UNSTABLE="${UNSTABLE}" IMAGE_NAME=slurmrestd
echo "Common images were built"
Expand Down
8 changes: 2 additions & 6 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -43,17 +43,12 @@ VALUES_VERSION = $(shell $(YQ) '.images.slurmctld' helm/slurm-cluster/values
OPERATOR_IMAGE_TAG = $(VERSION)

ifeq ($(shell uname), Darwin)
SHA_CMD = shasum -a 256
SED_COMMAND = sed -i '' -e
USER_MAIL = $(shell git config user.email)
else
SHA_CMD = sha256sum
SED_COMMAND = sed -i -e
USER_MAIL = $(shell git log -1 --pretty=format:'%ae')
endif
ifeq ($(UNSTABLE), true)
USER_MAIL = $(shell git log -1 --pretty=format:'%ae')
SHORT_SHA = $(shell echo -n "$(USER_MAIL)-$(VERSION)" | $(SHA_CMD) | cut -c1-8)
SHORT_SHA = $(shell git rev-parse --short=8 HEAD)
OPERATOR_IMAGE_TAG = $(VERSION)-$(SHORT_SHA)
IMAGE_VERSION = $(VERSION)-$(UBUNTU_VERSION)-slurm$(SLURM_VERSION)-$(SHORT_SHA)
IMAGE_REPO = $(NEBIUS_REPO)-unstable
Expand Down Expand Up @@ -177,6 +172,7 @@ sync-version: yq ## Sync versions from file
@echo 'Syncing helm/slurm-cluster/values.yaml'
@$(YQ) -i ".images.ncclBenchmark = \"$(IMAGE_REPO)/nccl_benchmark:$(IMAGE_VERSION)\"" "helm/slurm-cluster/values.yaml"
@$(YQ) -i ".images.slurmctld = \"$(IMAGE_REPO)/controller_slurmctld:$(IMAGE_VERSION)\"" "helm/slurm-cluster/values.yaml"
@$(YQ) -i ".images.slurmrestd = \"$(IMAGE_REPO)/slurmrestd:$(IMAGE_VERSION)\"" "helm/slurm-cluster/values.yaml"
@$(YQ) -i ".images.slurmdbd = \"$(IMAGE_REPO)/controller_slurmdbd:$(IMAGE_VERSION)\"" "helm/slurm-cluster/values.yaml"
@$(YQ) -i ".images.slurmd = \"$(IMAGE_REPO)/worker_slurmd:$(IMAGE_VERSION)\"" "helm/slurm-cluster/values.yaml"
@$(YQ) -i ".images.sshd = \"$(IMAGE_REPO)/login_sshd:$(IMAGE_VERSION)\"" "helm/slurm-cluster/values.yaml"
Expand Down
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -140,10 +140,10 @@ Everything specific to Nebius AI is contained in a separate repository:
### Other clouds and on-premises
> [!IMPORTANT]
> When using the soperator, it is important that the CNI supports preserving the client source IP.
> Therefore, if kube-proxy is configured in IPVS mode, or if you're using CNIplugins like kube-router or Antrea Proxy,
> Therefore, if kube-proxy is configured in IPVS mode, or if you're using CNI plugins like kube-router or Antrea Proxy,
the operator will not work.
> This operator has been tested with
the[Cilium network plugin](https://kubernetes.io/docs/concepts/extend-kubernetes/compute-storage-net/network-plugins/)
the [Cilium network plugin](https://kubernetes.io/docs/concepts/extend-kubernetes/compute-storage-net/network-plugins/)
> running in
[kube-proxy replacement mode](https://docs.cilium.io/en/stable/network/kubernetes/kubeproxy-free/#kubernetes-without-kube-proxy).

Expand Down
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
1.14.14
1.15.1
47 changes: 46 additions & 1 deletion api/v1/slurmcluster_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,13 @@ type PopulateJail struct {
// +kubebuilder:validation:Required
Image string `json:"image"`

// ImagePullPolicy defines the image pull policy
//
// +kubebuilder:validation:Enum=Always;Never;IfNotPresent
// +kubebuilder:validation:Optional
// +kubebuilder:default="IfNotPresent"
ImagePullPolicy corev1.PullPolicy `json:"imagePullPolicy,omitempty"`

// K8sNodeFilterName defines the Kubernetes node filter name associated with the Slurm node.
// Must correspond to the name of one of [K8sNodeFilter]
//
Expand Down Expand Up @@ -175,6 +182,13 @@ type NCCLBenchmark struct {
// +kubebuilder:validation:Required
Image string `json:"image"`

// ImagePullPolicy defines the image pull policy
//
// +kubebuilder:validation:Enum=Always;Never;IfNotPresent
// +kubebuilder:validation:Optional
// +kubebuilder:default="IfNotPresent"
ImagePullPolicy corev1.PullPolicy `json:"imagePullPolicy,omitempty"`

// NCCLArguments define nccl settings
//
// +kubebuilder:validation:Optional
Expand Down Expand Up @@ -316,6 +330,24 @@ type SlurmNodes struct {
// TODO: Making exporter optional requires SlurmNode.K8sNodeFilterName to be optional.
// +kubebuilder:validation:Required
Exporter SlurmExporter `json:"exporter"`

Rest SlurmRest `json:"rest"`
}

// SlurmRest represents the Slur REST API configuration
type SlurmRest struct {
SlurmNode `json:",inline"`

// Enabled defines whether the SlurmRest is enabled
//
// +kubebuilder:validation:Optional
// +kubebuilder:default=false
Enabled bool `json:"enabled,omitempty"`

// SlurmRestNode represents the Slurm REST API daemon configuration
//
// +kubebuilder:validation:Optional
SlurmRestNode NodeContainer `json:"rest,omitempty"`
}

// SlurmNodeAccounting represents the Slurm accounting configuration
Expand Down Expand Up @@ -666,7 +698,7 @@ type ExporterContainer struct {
PodTemplateNameRef *string `json:"podTemplateNameRef,omitempty"`
}

// SlurmExporterVolumes define the volumes for the Slurm controller node
// SlurmExporterVolumes define the volumes for the Slurm exporter node
type SlurmExporterVolumes struct {
// Jail represents the jail data volume configuration
//
Expand All @@ -693,6 +725,13 @@ type NodeContainer struct {
// +kubebuilder:validation:Required
Image string `json:"image"`

// ImagePullPolicy defines the image pull policy
//
// +kubebuilder:validation:Enum=Always;Never;IfNotPresent
// +kubebuilder:validation:Optional
// +kubebuilder:default="IfNotPresent"
ImagePullPolicy corev1.PullPolicy `json:"imagePullPolicy,omitempty"`

// Port defines the port the container exposes
//
// +kubebuilder:validation:Optional
Expand All @@ -709,6 +748,12 @@ type NodeContainer struct {
//
// +kubebuilder:validation:Optional
SecurityLimitsConfig string `json:"securityLimitsConfig,omitempty"`

// AppArmorProfile defines the AppArmor profile for the Slurm worker node
//
// +kubebuilder:validation:Optional
// +kubebuilder:default="unconfined"
AppArmorProfile string `json:"appArmorProfile,omitempty"`
}

// NodeVolume defines the configuration for a node volume.
Expand Down
18 changes: 18 additions & 0 deletions api/v1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 8766049

Please sign in to comment.