Skip to content
This repository was archived by the owner on Jul 24, 2025. It is now read-only.
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions api/v1alpha1/modelservice_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,13 @@ type ModelArtifacts struct {
//
// +optional
Size *res.Quantity `json:"size,omitempty"`
// OCI image pull policy.
// One of Always, Never, IfNotPresent.
// Defaults to Always if :latest tag is specified, or IfNotPresent otherwise.
// Cannot be updated.
// More info: https://kubernetes.io/docs/concepts/containers/images#updating-images
// +optional
PullPolicy corev1.PullPolicy `json:"pullPolicy,omitempty" protobuf:"bytes,14,opt,name=pullPolicy,casttype=PullPolicy"`
}

// ModelServicePodSpec defines the specification for pod templates that will be created by ModelService.
Expand Down
8 changes: 8 additions & 0 deletions config/crd/bases/llm-d.ai_modelservices.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1387,6 +1387,14 @@ spec:
authSecretName:
description: Name of the authentication secret. Contains HF_TOKEN
type: string
pullPolicy:
description: |-
OCI image pull policy.
One of Always, Never, IfNotPresent.
Defaults to Always if :latest tag is specified, or IfNotPresent otherwise.
Cannot be updated.
More info: https://kubernetes.io/docs/concepts/containers/images#updating-images
type: string
size:
anyOf:
- type: integer
Expand Down
7 changes: 6 additions & 1 deletion docs/api_reference/out.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -125,8 +125,13 @@ Three types of URIs are support to enable models packaged as images (oci://<imag
models downloaded from HuggingFace (hf://<model-repo>/<model-name>) +
and pre-existing models loaded from a volume-mounted PVC (pvc://model-path) + | |
| *`authSecretName`* __string__ | Name of the authentication secret. Contains HF_TOKEN + | |
| *`size`* __link:https://kubernetes.io/docs/reference/generated/kubernetes-api/v/#quantity-resource-api[$$Quantity$$]__ | Size of the model artifacts on disk +
| *`size`* __xref:{anchor_prefix}-k8s-io-apimachinery-pkg-api-resource-quantity[$$Quantity$$]__ | Size of the model artifacts on disk +
ensure Size is large enough when providing hf://... URI + | |
| *`pullPolicy`* __link:https://kubernetes.io/docs/reference/generated/kubernetes-api/v/#pullpolicy-v1-core[$$PullPolicy$$]__ | OCI image pull policy. +
One of Always, Never, IfNotPresent. +
Defaults to Always if :latest tag is specified, or IfNotPresent otherwise. +
Cannot be updated. +
More info: https://kubernetes.io/docs/concepts/containers/images#updating-images + | |
|===


Expand Down
22 changes: 16 additions & 6 deletions docs/api_reference/out.html
Original file line number Diff line number Diff line change
Expand Up @@ -732,7 +732,7 @@ <h4 id="k8s-api-github-com-llm-d-llm-d-model-service-api-v1alpha1-modelartifacts
</tr>
<tr>
<td class="tableblock halign-left valign-top"><div class="content"><div class="paragraph">
<p><strong><code>size</code></strong> <em><a href="https://kubernetes.io/docs/reference/generated/kubernetes-api/v/#quantity-resource-api">Quantity</a></em></p>
<p><strong><code>size</code></strong> <em><a href="#k8s-api-k8s-io-apimachinery-pkg-api-resource-quantity">Quantity</a></em></p>
</div></div></td>
<td class="tableblock halign-left valign-top"><div class="content"><div class="paragraph">
<p>Size of the model artifacts on disk<br>
Expand All @@ -741,6 +741,20 @@ <h4 id="k8s-api-github-com-llm-d-llm-d-model-service-api-v1alpha1-modelartifacts
<td class="tableblock halign-left valign-top"><div class="content"></div></td>
<td class="tableblock halign-left valign-top"><div class="content"></div></td>
</tr>
<tr>
<td class="tableblock halign-left valign-top"><div class="content"><div class="paragraph">
<p><strong><code>pullPolicy</code></strong> <em><a href="https://kubernetes.io/docs/reference/generated/kubernetes-api/v/#pullpolicy-v1-core">PullPolicy</a></em></p>
</div></div></td>
<td class="tableblock halign-left valign-top"><div class="content"><div class="paragraph">
<p>OCI image pull policy.<br>
One of Always, Never, IfNotPresent.<br>
Defaults to Always if :latest tag is specified, or IfNotPresent otherwise.<br>
Cannot be updated.<br>
More info: <a href="https://kubernetes.io/docs/concepts/containers/images#updating-images" class="bare">https://kubernetes.io/docs/concepts/containers/images#updating-images</a><br></p>
</div></div></td>
<td class="tableblock halign-left valign-top"><div class="content"></div></td>
<td class="tableblock halign-left valign-top"><div class="content"></div></td>
</tr>
</tbody>
</table>
</div>
Expand Down Expand Up @@ -1629,11 +1643,7 @@ <h4 id="k8s-api-github-com-llm-d-llm-d-model-service-api-v1alpha1-routing">Routi
</div>
<div id="footer">
<div id="footer-text">
<<<<<<< HEAD
Last updated 2025-05-15 11:58:12 -0400
=======
Last updated 2025-05-14 17:04:13 -0400
>>>>>>> upstream/main
Last updated 2025-05-21 13:57:32 -0400
</div>
</div>
</body>
Expand Down
1 change: 1 addition & 0 deletions internal/controller/constants.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ package controller
const modelStorageVolumeName = "model-storage"
const modelStorageRoot = "/model-cache"
const pathSep = "/"
const ociPathToModelSep = "::"
const DECODE_ROLE = "decode"
const PREFILL_ROLE = "prefill"
const MODEL_ARTIFACT_URI_PVC = "pvc"
Expand Down
6 changes: 3 additions & 3 deletions internal/controller/mergeContainerSlice_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,9 @@ import (
)

// assertEqualSlices checks if two slices are equal in length, order, and content.
func assertEqualSlices[T comparable](t *testing.T, got, want []T) {
if !reflect.DeepEqual(got, want) {
sliceError := fmt.Errorf("slices do not match:\ngot: %v\nwant: %v", got, want)
func assertEqualSlices[T comparable](t *testing.T, expected, actual []T) {
if !reflect.DeepEqual(expected, actual) {
sliceError := fmt.Errorf("slices do not match:\nwant: %v\ngot: %v", expected, actual)
assert.NoError(t, sliceError, "error with comparing slices")
}
}
Expand Down
41 changes: 27 additions & 14 deletions internal/controller/modelservice_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -103,24 +103,37 @@ func (t *TemplateVars) from(ctx context.Context, msvc *msv1alpha1.ModelService)
t.AuthSecretName = *msvc.Spec.ModelArtifacts.AuthSecretName
}

// Compute ModelPath variable and HFModelName for HF prefix
var err error
uri := msvc.Spec.ModelArtifacts.URI
if strings.HasPrefix(uri, HF_PREFIX) {
t.HFModelName = strings.TrimPrefix(uri, HF_PREFIX)
t.ModelPath = t.HFModelName
} else if strings.HasPrefix(uri, PVC_PREFIX) {
tail := strings.TrimPrefix(uri, PVC_PREFIX)
segments := strings.Split(tail, pathSep)
t.ModelPath = strings.Join(segments[1:], pathSep)
} else {
err := fmt.Errorf("unsupported prefix")
log.FromContext(ctx).V(1).Error(err, "cannot get template vars", "uri", uri)
return err
uriType := UriType(uri)
switch uriType {
case HF:
if repoID, modelID, err := parseHFURI(&msvc.Spec.ModelArtifacts); err == nil {
t.HFModelName = repoID + pathSep + modelID
t.ModelPath = t.HFModelName
}
case PVC:
if _, modelPath, err := parsePVCURI(&msvc.Spec.ModelArtifacts); err == nil {
t.ModelPath = strings.Join(modelPath, pathSep)
}
case OCI:
if _, modelPath, err := parseOCIURI(&msvc.Spec.ModelArtifacts); err == nil {
t.ModelPath = strings.Join(modelPath, pathSep)
}
case UnknownURI:
err = fmt.Errorf("unsupported prefix")
log.FromContext(ctx).V(1).Error(err, "uri", uri)
}

if err != nil {
log.FromContext(ctx).V(1).Error(err, "cannot parse uri and thus cannot populate some ModelPath template vars", "uri", uri)
}

// Compute the mountedModelPath variable, given the URI type
// PVC: /path/to/model
// Compute the MountedModelPath variable, given the URI type
// PVC: /model-cache/path/to/model
// HF: /model-cache
// OCI: /model-cache
// OCI: /model-cache/path/to/model
mountedModelPath, err := mountedModelPath(msvc)
if err != nil {
return err
Expand Down
107 changes: 106 additions & 1 deletion internal/controller/modelservice_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -875,7 +875,7 @@ var _ = Describe("ModelService Controller", func() {
Expect(decode.Spec.Template.Spec.Volumes[0].Name).To(Equal(modelStorageVolumeName))
Expect(decode.Spec.Template.Spec.Volumes[0].EmptyDir).To(Not(BeNil()))

By("checking hte decode container where mountModelVolume is true has a volume mount")
By("checking the decode container where mountModelVolume is true has a volume mount")
Expect(len(decode.Spec.Template.Spec.Containers)).To(Equal(1))
Expect(len(decode.Spec.Template.Spec.Containers[0].VolumeMounts)).To(Equal(1))
Expect(decode.Spec.Template.Spec.Containers[0].VolumeMounts[0].Name).To(Equal(modelStorageVolumeName))
Expand All @@ -892,6 +892,111 @@ var _ = Describe("ModelService Controller", func() {
})
})

Context("When reconciling a OCI ModelService", func() {
It("should create child resources correctly", func() {
// hfMSVC names
ociMSVCName := "oci-msvc"
ociImageName := "oci-image-with-tag:0.1.1"
ociURI := MODEL_ARTIFACT_URI_OCI_PREFIX + ociImageName + ociPathToModelSep + modelPath
ociMountedModelPath := modelStorageRoot + pathSep + modelPath

ociNamespacedName := types.NamespacedName{
Name: ociMSVCName,
Namespace: namespace,
}

// variable definitions
ociMSVC := &msv1alpha1.ModelService{
TypeMeta: metav1.TypeMeta{
APIVersion: msv1alpha1.GroupVersion.String(),
Kind: "ModelService",
},
ObjectMeta: metav1.ObjectMeta{
Name: ociMSVCName,
Namespace: namespace,
},
Spec: msv1alpha1.ModelServiceSpec{
ModelArtifacts: msv1alpha1.ModelArtifacts{
URI: ociURI,
},
Routing: msv1alpha1.Routing{
ModelName: modelServiceName,
},
Decode: &msv1alpha1.PDSpec{
ModelServicePodSpec: msv1alpha1.ModelServicePodSpec{
Containers: []msv1alpha1.ContainerSpec{
{
Name: "llm",
Image: &imageName,
Args: []string{
"{{ .MountedModelPath }}",
},
MountModelVolume: true,
},
},
},
},
},
}

// Create ociMSVC in the cluster
By("creating the ociMSVC in the cluster")
err := k8sClient.Create(ctx, ociMSVC)
Expect(err).NotTo(HaveOccurred())

// Fetch from cluster
By("fetching the ociMSVC in the cluster")
var ociMSVCInCluster msv1alpha1.ModelService
err = k8sClient.Get(ctx, ociNamespacedName, &ociMSVCInCluster)
Expect(err).NotTo(HaveOccurred())

// Reconcile resource
By("Reconciling the ModelService")
reconciler := &ModelServiceReconciler{
Client: k8sClient,
Scheme: k8sClient.Scheme(),
RBACOptions: *rbacOptions,
}
_, err = reconciler.Reconcile(ctx, reconcile.Request{
NamespacedName: ociNamespacedName,
})
Expect(err).NotTo(HaveOccurred())

// Verify child resources
By("fetching the decode deployment child resource")
// fetch decode resource name
decodeNamespacedName := types.NamespacedName{
Name: deploymentName(ociMSVC, DECODE_ROLE),
Namespace: namespace,
}

var decode appsv1.Deployment
err = k8sClient.Get(ctx, decodeNamespacedName, &decode)
Eventually(func() bool {
err := k8sClient.Get(ctx, decodeNamespacedName, &decode)
return err == nil
}, time.Second*5, time.Millisecond*500).Should(BeTrue())
Expect(err).NotTo(HaveOccurred())

By("checking decode child has an oci image volume with the default pull policy")
Expect(len(decode.Spec.Template.Spec.Volumes)).To(Equal(1))
Expect(decode.Spec.Template.Spec.Volumes[0].Name).To(Equal(modelStorageVolumeName))
Expect(decode.Spec.Template.Spec.Volumes[0].Image).To(Not(BeNil()))
Expect(decode.Spec.Template.Spec.Volumes[0].Image.Reference).To(Equal(ociImageName))
Expect(decode.Spec.Template.Spec.Volumes[0].Image.PullPolicy).To(Equal(corev1.PullIfNotPresent))

By("checking the decode container where mountModelVolume is true has a volume mount")
Expect(len(decode.Spec.Template.Spec.Containers)).To(Equal(1))
Expect(len(decode.Spec.Template.Spec.Containers[0].VolumeMounts)).To(Equal(1))
Expect(decode.Spec.Template.Spec.Containers[0].VolumeMounts[0].Name).To(Equal(modelStorageVolumeName))
Expect(decode.Spec.Template.Spec.Containers[0].VolumeMounts[0].MountPath).To(Equal(modelStorageRoot))

By("checking decode container args are interpolated")
Expect(len(decode.Spec.Template.Spec.Containers[0].Args)).To(Equal(1))
Expect(decode.Spec.Template.Spec.Containers[0].Args[0]).To(Equal(ociMountedModelPath))
})
})

Context("When reconciling a MSVC with errorneous BaseConfig", func() {
When("BaseConfig's ConfigMap field is malformatted", func() {
It("should raise an error when reconciling", func() {
Expand Down
8 changes: 8 additions & 0 deletions internal/controller/suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,14 @@ var _ = BeforeSuite(func() {
// if someone runs tests from a subfolder, these may not run
CRDDirectoryPaths: []string{filepath.Join("..", "..", "config", "crd", "bases"), filepath.Join("..", "..", "test", "inferenceCRDs")},
ErrorIfCRDPathMissing: true,
ControlPlane: envtest.ControlPlane{
APIServer: &envtest.APIServer{
Args: []string{
// ImageSource feature gate is required for OCI URIs
"--feature-gates=ImageVolume=true",
},
},
},
}

// Retrieve the first found binary directory to allow running tests from IDEs
Expand Down
Loading
Loading