diff --git a/api/v1alpha1/modelservice_types.go b/api/v1alpha1/modelservice_types.go index 3cd7c73..717e4a1 100644 --- a/api/v1alpha1/modelservice_types.go +++ b/api/v1alpha1/modelservice_types.go @@ -133,7 +133,7 @@ type ContainerSpec struct { // For URIs with pvc:// prefix, a model-storage volume is created and mounted with the mountPath: /cache // For URIs with hf:// prefix, modelArtifact.authSecretName is used as the secret key reference, // and the value is mounted to an environment variable called HF_TOKEN - // For URIs with oci:// prefix, an OCI volume with image reference (https://kubernetes.io/blog/2024/08/16/kubernetes-1-31-image-volume-source/) + // For URIs with oci+native:// prefix, an OCI volume with image reference (https://kubernetes.io/blog/2024/08/16/kubernetes-1-31-image-volume-source/) // is created and mounted with the mountPath oci-dir // default:false // +optional @@ -258,7 +258,7 @@ type Routing struct { // ModelArtifacts describes the source of the model type ModelArtifacts struct { // URI is the model URI - // Three types of URIs are support to enable models packaged as images (oci:///<:image-tag>), + // Three types of URIs are support to enable models packaged as images (oci+native:///<:image-tag>), // models downloaded from HuggingFace (hf:///) // and pre-existing models loaded from a volume-mounted PVC (pvc://model-path) // @@ -273,6 +273,13 @@ type ModelArtifacts struct { // // +optional Size *res.Quantity `json:"size,omitempty"` + // OCI image pull policy. + // One of Always, Never, IfNotPresent. + // Defaults to Always if :latest tag is specified, or IfNotPresent otherwise. + // Cannot be updated. + // More info: https://kubernetes.io/docs/concepts/containers/images#updating-images + // +optional + PullPolicy corev1.PullPolicy `json:"pullPolicy,omitempty" protobuf:"bytes,14,opt,name=pullPolicy,casttype=PullPolicy"` } // ModelServicePodSpec defines the specification for pod templates that will be created by ModelService. diff --git a/config/crd/bases/llm-d.ai_modelservices.yaml b/config/crd/bases/llm-d.ai_modelservices.yaml index b97ed3b..bedc058 100644 --- a/config/crd/bases/llm-d.ai_modelservices.yaml +++ b/config/crd/bases/llm-d.ai_modelservices.yaml @@ -355,7 +355,7 @@ spec: For URIs with pvc:// prefix, a model-storage volume is created and mounted with the mountPath: /cache For URIs with hf:// prefix, modelArtifact.authSecretName is used as the secret key reference, and the value is mounted to an environment variable called HF_TOKEN - For URIs with oci:// prefix, an OCI volume with image reference (https://kubernetes.io/blog/2024/08/16/kubernetes-1-31-image-volume-source/) + For URIs with oci+native:// prefix, an OCI volume with image reference (https://kubernetes.io/blog/2024/08/16/kubernetes-1-31-image-volume-source/) is created and mounted with the mountPath oci-dir default:false type: boolean @@ -656,7 +656,7 @@ spec: For URIs with pvc:// prefix, a model-storage volume is created and mounted with the mountPath: /cache For URIs with hf:// prefix, modelArtifact.authSecretName is used as the secret key reference, and the value is mounted to an environment variable called HF_TOKEN - For URIs with oci:// prefix, an OCI volume with image reference (https://kubernetes.io/blog/2024/08/16/kubernetes-1-31-image-volume-source/) + For URIs with oci+native:// prefix, an OCI volume with image reference (https://kubernetes.io/blog/2024/08/16/kubernetes-1-31-image-volume-source/) is created and mounted with the mountPath oci-dir default:false type: boolean @@ -992,7 +992,7 @@ spec: For URIs with pvc:// prefix, a model-storage volume is created and mounted with the mountPath: /cache For URIs with hf:// prefix, modelArtifact.authSecretName is used as the secret key reference, and the value is mounted to an environment variable called HF_TOKEN - For URIs with oci:// prefix, an OCI volume with image reference (https://kubernetes.io/blog/2024/08/16/kubernetes-1-31-image-volume-source/) + For URIs with oci+native:// prefix, an OCI volume with image reference (https://kubernetes.io/blog/2024/08/16/kubernetes-1-31-image-volume-source/) is created and mounted with the mountPath oci-dir default:false type: boolean @@ -1293,7 +1293,7 @@ spec: For URIs with pvc:// prefix, a model-storage volume is created and mounted with the mountPath: /cache For URIs with hf:// prefix, modelArtifact.authSecretName is used as the secret key reference, and the value is mounted to an environment variable called HF_TOKEN - For URIs with oci:// prefix, an OCI volume with image reference (https://kubernetes.io/blog/2024/08/16/kubernetes-1-31-image-volume-source/) + For URIs with oci+native:// prefix, an OCI volume with image reference (https://kubernetes.io/blog/2024/08/16/kubernetes-1-31-image-volume-source/) is created and mounted with the mountPath oci-dir default:false type: boolean @@ -1387,6 +1387,14 @@ spec: authSecretName: description: Name of the authentication secret. Contains HF_TOKEN type: string + pullPolicy: + description: |- + OCI image pull policy. + One of Always, Never, IfNotPresent. + Defaults to Always if :latest tag is specified, or IfNotPresent otherwise. + Cannot be updated. + More info: https://kubernetes.io/docs/concepts/containers/images#updating-images + type: string size: anyOf: - type: integer @@ -1399,7 +1407,7 @@ spec: uri: description: |- URI is the model URI - Three types of URIs are support to enable models packaged as images (oci:///<:image-tag>), + Three types of URIs are support to enable models packaged as images (oci+native:///<:image-tag>), models downloaded from HuggingFace (hf:///) and pre-existing models loaded from a volume-mounted PVC (pvc://model-path) type: string @@ -1657,7 +1665,7 @@ spec: For URIs with pvc:// prefix, a model-storage volume is created and mounted with the mountPath: /cache For URIs with hf:// prefix, modelArtifact.authSecretName is used as the secret key reference, and the value is mounted to an environment variable called HF_TOKEN - For URIs with oci:// prefix, an OCI volume with image reference (https://kubernetes.io/blog/2024/08/16/kubernetes-1-31-image-volume-source/) + For URIs with oci+native:// prefix, an OCI volume with image reference (https://kubernetes.io/blog/2024/08/16/kubernetes-1-31-image-volume-source/) is created and mounted with the mountPath oci-dir default:false type: boolean @@ -1958,7 +1966,7 @@ spec: For URIs with pvc:// prefix, a model-storage volume is created and mounted with the mountPath: /cache For URIs with hf:// prefix, modelArtifact.authSecretName is used as the secret key reference, and the value is mounted to an environment variable called HF_TOKEN - For URIs with oci:// prefix, an OCI volume with image reference (https://kubernetes.io/blog/2024/08/16/kubernetes-1-31-image-volume-source/) + For URIs with oci+native:// prefix, an OCI volume with image reference (https://kubernetes.io/blog/2024/08/16/kubernetes-1-31-image-volume-source/) is created and mounted with the mountPath oci-dir default:false type: boolean diff --git a/docs/api_reference/out.asciidoc b/docs/api_reference/out.asciidoc index 5fc8d40..81f348e 100644 --- a/docs/api_reference/out.asciidoc +++ b/docs/api_reference/out.asciidoc @@ -97,7 +97,7 @@ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-co For URIs with pvc:// prefix, a model-storage volume is created and mounted with the mountPath: /cache + For URIs with hf:// prefix, modelArtifact.authSecretName is used as the secret key reference, + and the value is mounted to an environment variable called HF_TOKEN + -For URIs with oci:// prefix, an OCI volume with image reference (https://kubernetes.io/blog/2024/08/16/kubernetes-1-31-image-volume-source/) + +For URIs with oci+native:// prefix, an OCI volume with image reference (https://kubernetes.io/blog/2024/08/16/kubernetes-1-31-image-volume-source/) + is created and mounted with the mountPath oci-dir + default:false + | | |=== @@ -121,12 +121,17 @@ ModelArtifacts describes the source of the model |=== | Field | Description | Default | Validation | *`uri`* __string__ | URI is the model URI + -Three types of URIs are support to enable models packaged as images (oci:///<:image-tag>), + +Three types of URIs are support to enable models packaged as images (oci+native:///<:image-tag>), + models downloaded from HuggingFace (hf:///) + and pre-existing models loaded from a volume-mounted PVC (pvc://model-path) + | | | *`authSecretName`* __string__ | Name of the authentication secret. Contains HF_TOKEN + | | -| *`size`* __link:https://kubernetes.io/docs/reference/generated/kubernetes-api/v/#quantity-resource-api[$$Quantity$$]__ | Size of the model artifacts on disk + +| *`size`* __xref:{anchor_prefix}-k8s-io-apimachinery-pkg-api-resource-quantity[$$Quantity$$]__ | Size of the model artifacts on disk + ensure Size is large enough when providing hf://... URI + | | +| *`pullPolicy`* __link:https://kubernetes.io/docs/reference/generated/kubernetes-api/v/#pullpolicy-v1-core[$$PullPolicy$$]__ | OCI image pull policy. + +One of Always, Never, IfNotPresent. + +Defaults to Always if :latest tag is specified, or IfNotPresent otherwise. + +Cannot be updated. + +More info: https://kubernetes.io/docs/concepts/containers/images#updating-images + | | |=== @@ -278,6 +283,9 @@ this reference will be nil + | | | *`eppDeploymentRef`* __string__ | EppDeploymentRef identifies the epp deployment + if epp deployment is yet to be created, + this reference will be nil + | | +| *`httpRouteRef`* __string__ | HTTPRoute identifies the HTTPRoute resource + +if HTTPRoute is yet to be created, + +this reference will be nil + | | | *`inferenceModelRef`* __string__ | InferenceModelRef identifies the inference model resource + if inference model is yet to be created, + this reference will be nil + | | @@ -435,6 +443,88 @@ Required: {} + | *`ports`* __xref:{anchor_prefix}-github-com-llm-d-llm-d-model-service-api-v1alpha1-port[$$Port$$] array__ | Ports is a list of named ports + These can be referenced by name in configuration of base configuration or model services + | | +| *`gatewayRefs`* __ParentReference array__ | GatewayRef is merged to baseconfig based on the Name field. + +Directly from Gateway API: https://gateway-api.sigs.k8s.io/reference/spec/#commonroutespec + +ParentRefs references the resources (usually Gateways) that a Route wants + +to be attached to. Note that the referenced parent resource needs to + +allow this for the attachment to be complete. For Gateways, that means + +the Gateway needs to allow attachment from Routes of this kind and + +namespace. For Services, that means the Service must either be in the same + +namespace for a "producer" route, or the mesh implementation must support + +and allow "consumer" routes for the referenced Service. ReferenceGrant is + +not applicable for governing ParentRefs to Services - it is not possible to + +create a "producer" route for a Service in a different namespace from the + +Route. + + + +There are two kinds of parent resources with "Core" support: + + + +* Gateway (Gateway conformance profile) + +* Service (Mesh conformance profile, ClusterIP Services only) + + + +This API may be extended in the future to support additional kinds of parent + +resources. + + + +ParentRefs must be _distinct_. This means either that: + + + +* They select different objects. If this is the case, then parentRef + +entries are distinct. In terms of fields, this means that the + +multi-part key defined by `group`, `kind`, `namespace`, and `name` must + +be unique across all parentRef entries in the Route. + +* They do not select different objects, but for each optional field used, + +each ParentRef that selects the same object must set the same set of + +optional fields to different values. If one ParentRef sets a + +combination of optional fields, all must set the same combination. + + + +Some examples: + + + +* If one ParentRef sets `sectionName`, all ParentRefs referencing the + +same object must also set `sectionName`. + +* If one ParentRef sets `port`, all ParentRefs referencing the same + +object must also set `port`. + +* If one ParentRef sets `sectionName` and `port`, all ParentRefs + +referencing the same object must also set `sectionName` and `port`. + + + +It is possible to separately reference multiple distinct objects that may + +be collapsed by an implementation. For example, some implementations may + +choose to merge compatible Gateway Listeners together. If that is the + +case, the list of routes attached to those resources should also be + +merged. + + + +Note that for ParentRefs that cross namespace boundaries, there are specific + +rules. Cross-namespace references are only valid if they are explicitly + +allowed by something in the namespace they are referring to. For example, + +Gateway has the AllowedRoutes field, and ReferenceGrant provides a + +generic way to enable other kinds of cross-namespace reference. + + + + + +ParentRefs from a Route to a Service in the same namespace are "producer" + +routes, which apply default routing rules to inbound connections from + +any namespace to the Service. + + + +ParentRefs from a Route to a Service in a different namespace are + +"consumer" routes, and these routing rules are only applied to outbound + +connections originating from the same namespace as the Route, for which + +the intended destination of the connections are a Service targeted as a + +ParentRef of the Route. + + + + + + + + + + + + + | | MaxItems: 32 + + |=== diff --git a/docs/api_reference/out.html b/docs/api_reference/out.html index 4266a93..a7ce172 100644 --- a/docs/api_reference/out.html +++ b/docs/api_reference/out.html @@ -664,7 +664,7 @@

For URIs with hf:// prefix, modelArtifact.authSecretName is used as the secret key reference,
and the value is mounted to an environment variable called HF_TOKEN
-For URIs with oci:// prefix, an OCI volume with image reference (https://kubernetes.io/blog/2024/08/16/kubernetes-1-31-image-volume-source/)
+For URIs with oci+native:// prefix, an OCI volume with image reference (https://kubernetes.io/blog/2024/08/16/kubernetes-1-31-image-volume-source/)
is created and mounted with the mountPath oci-dir
default:false

@@ -713,7 +713,7 @@

URI is the model URI
-Three types of URIs are support to enable models packaged as images (oci://<image-repo>/<image-name><:image-tag>),
+Three types of URIs are support to enable models packaged as images (oci+native://<image-repo>/<image-name><:image-tag>),
models downloaded from HuggingFace (hf://<model-repo>/<model-name>)
and pre-existing models loaded from a volume-mounted PVC (pvc://model-path)

@@ -732,7 +732,7 @@

-

size Quantity

+

size Quantity

Size of the model artifacts on disk
@@ -741,6 +741,20 @@

+ +
+

pullPolicy PullPolicy

+
+
+

OCI image pull policy.
+One of Always, Never, IfNotPresent.
+Defaults to Always if :latest tag is specified, or IfNotPresent otherwise.
+Cannot be updated.
+More info: https://kubernetes.io/docs/concepts/containers/images#updating-images

+
+
+
+

@@ -1196,6 +1210,18 @@

+

httpRouteRef string

+
+
+

HTTPRoute identifies the HTTPRoute resource
+if HTTPRoute is yet to be created,
+this reference will be nil

+
+
+
+ + +

inferenceModelRef string

@@ -1620,6 +1646,118 @@

Routi
+ +
+

gatewayRefs ParentReference array

+
+
+

GatewayRef is merged to baseconfig based on the Name field.
+Directly from Gateway API: https://gateway-api.sigs.k8s.io/reference/spec/#commonroutespec
+ParentRefs references the resources (usually Gateways) that a Route wants
+to be attached to. Note that the referenced parent resource needs to
+allow this for the attachment to be complete. For Gateways, that means
+the Gateway needs to allow attachment from Routes of this kind and
+namespace. For Services, that means the Service must either be in the same
+namespace for a "producer" route, or the mesh implementation must support
+and allow "consumer" routes for the referenced Service. ReferenceGrant is
+not applicable for governing ParentRefs to Services - it is not possible to
+create a "producer" route for a Service in a different namespace from the
+Route.

+
+
+

There are two kinds of parent resources with "Core" support:

+
+
+
    +
  • +

    Gateway (Gateway conformance profile)

    +
  • +
  • +

    Service (Mesh conformance profile, ClusterIP Services only)

    +
  • +
+
+
+

This API may be extended in the future to support additional kinds of parent
+resources.

+
+
+

ParentRefs must be distinct. This means either that:

+
+
+
    +
  • +

    They select different objects. If this is the case, then parentRef
    +entries are distinct. In terms of fields, this means that the
    +multi-part key defined by group, kind, namespace, and name must
    +be unique across all parentRef entries in the Route.

    +
  • +
  • +

    They do not select different objects, but for each optional field used,
    +each ParentRef that selects the same object must set the same set of
    +optional fields to different values. If one ParentRef sets a
    +combination of optional fields, all must set the same combination.

    +
  • +
+
+
+

Some examples:

+
+
+
    +
  • +

    If one ParentRef sets sectionName, all ParentRefs referencing the
    +same object must also set sectionName.

    +
  • +
  • +

    If one ParentRef sets port, all ParentRefs referencing the same
    +object must also set port.

    +
  • +
  • +

    If one ParentRef sets sectionName and port, all ParentRefs
    +referencing the same object must also set sectionName and port.

    +
  • +
+
+
+

It is possible to separately reference multiple distinct objects that may
+be collapsed by an implementation. For example, some implementations may
+choose to merge compatible Gateway Listeners together. If that is the
+case, the list of routes attached to those resources should also be
+merged.

+
+
+

Note that for ParentRefs that cross namespace boundaries, there are specific
+rules. Cross-namespace references are only valid if they are explicitly
+allowed by something in the namespace they are referring to. For example,
+Gateway has the AllowedRoutes field, and ReferenceGrant provides a
+generic way to enable other kinds of cross-namespace reference.

+
+
+

<gateway:experimental:description>
+ParentRefs from a Route to a Service in the same namespace are "producer"
+routes, which apply default routing rules to inbound connections from
+any namespace to the Service.

+
+
+

ParentRefs from a Route to a Service in a different namespace are
+"consumer" routes, and these routing rules are only applied to outbound
+connections originating from the same namespace as the Route, for which
+the intended destination of the connections are a Service targeted as a
+ParentRef of the Route.
+</gateway:experimental:description>

+
+
+

<gateway:standard:validation:XValidation:message="sectionName must be specified when parentRefs includes 2 or more references to the same parent",rule="self.all(p1, self.all(p2, p1.group == p2.group && p1.kind == p2.kind && p1.name == p2.name && (!has(p1.namespace) || p1.namespace == '') && (!has(p2.namespace) || p2.namespace == '' || (has(p1.namespace) && has(p2.namespace) && p1.namespace == p2.namespace )) ? !has(p1.sectionName) || p1.sectionName == '') == (!has(p2.sectionName) || p2.sectionName == '' : true))">
+<gateway:standard:validation:XValidation:message="sectionName must be unique when parentRefs includes 2 or more references to the same parent",rule="self.all(p1, self.exists_one(p2, p1.group == p2.group && p1.kind == p2.kind && p1.name == p2.name && (!has(p1.namespace) || p1.namespace == '') && (!has(p2.namespace) || p2.namespace == '' || (has(p1.namespace) && has(p2.namespace) && p1.namespace == p2.namespace )) && (!has(p1.sectionName) || p1.sectionName == '') && (!has(p2.sectionName) || p2.sectionName == '' || (has(p1.sectionName) && has(p2.sectionName) && p1.sectionName == p2.sectionName))))">
+<gateway:experimental:validation:XValidation:message="sectionName or port must be specified when parentRefs includes 2 or more references to the same parent",rule="self.all(p1, self.all(p2, p1.group == p2.group && p1.kind == p2.kind && p1.name == p2.name && (!has(p1.namespace) || p1.namespace == '') && (!has(p2.namespace) || p2.namespace == '' || (has(p1.namespace) && has(p2.namespace) && p1.namespace == p2.namespace)) ? !has(p1.sectionName) || p1.sectionName == '') == (!has(p2.sectionName) || p2.sectionName == '') && (!has(p1.port) || p1.port == 0) == (!has(p2.port) || p2.port == 0: true))">
+<gateway:experimental:validation:XValidation:message="sectionName or port must be unique when parentRefs includes 2 or more references to the same parent",rule="self.all(p1, self.exists_one(p2, p1.group == p2.group && p1.kind == p2.kind && p1.name == p2.name && (!has(p1.namespace) || p1.namespace == '') && (!has(p2.namespace) || p2.namespace == '' || (has(p1.namespace) && has(p2.namespace) && p1.namespace == p2.namespace )) && (!has(p1.sectionName) || p1.sectionName == '') && (!has(p2.sectionName) || p2.sectionName == '' || ( has(p1.sectionName) && has(p2.sectionName) && p1.sectionName == p2.sectionName)) && (!has(p1.port) || p1.port == 0) && (!has(p2.port) || p2.port == 0 || (has(p1.port) && has(p2.port) && p1.port == p2.port))))">

+
+
+
+

MaxItems: 32

+
+

@@ -1629,11 +1767,7 @@

Routi

diff --git a/docs/userguide/model-artifacts.md b/docs/userguide/model-artifacts.md index 946bd75..e9d832d 100644 --- a/docs/userguide/model-artifacts.md +++ b/docs/userguide/model-artifacts.md @@ -4,7 +4,7 @@ The `modelArtifacts` section under the `spec` of a `ModelService` defines how mo ## Purpose -Without `ModelService`, users must manually configure vLLM arguments, environment variables, and pod/container specifications. This requires a deep understanding of both vLLM and the composition of model artifacts. The `ModelService` controller automates these configurations, enabling users to focus solely on specifying the model source. +The `ModelService` controller automates configurations, enabling users to focus solely on specifying the model source. Without `ModelService`, users must manually configure vLLM arguments, environment variables, and pod/container specifications. This requires a deep understanding of both vLLM and the composition of model artifacts. ## Model Artifact Sources and Behaviors @@ -99,6 +99,45 @@ Various template variable are exposed as a result of using the `"pvc://"` prefix - `{{ .MountedModelPath }}`: this is equal to `/model-cache/` where `` comes from the URI. In the above example, `{{ .MountedModelPath }}` interpolates to `/model-cache/path/to/granite` -### 3. Loading the model from an image volume +### 3. Loading the model as OCI arifacts using an image volume -NotImplemented. \ No newline at end of file +Model artifacts can be built into images and consumed by Kubernetes volumes. This is called [image volume](https://kubernetes.io/docs/tasks/configure-pod-container/image-volumes/), and is in beta state as of Kubernetes v1.33. If the cluster has `ImageVolume` enabled as a feature gate, then model owners can mount models from OCI images. + +#### URI Format + +`"oci+native://::"` + +Example: `"oci+native://redhat/granite-7b-lab-gguf:1.0::/"` + +(This OCI image comes from https://hub.docker.com/r/redhat/granite-7b-lab-gguf) + +#### Additional Fields + +- `pullPolicy` (one of: `IfNotPresent`, `Always`, and `Never`): the [pull policy](https://kubernetes.io/docs/concepts/containers/images/#image-pull-policy) of the OCI image. If specified, this is the pull policy supplied to the `volume.image.pullPolicy` + +#### Behavior + +- A image volume with the name `model-storage` is created for the deployment. The reference to the image is `` +- A read-only `volumeMount` with the `mountPath: model-cache` is created for each container where `mountModelVolume: true` + + +#### Example Deployment Snippet + +```yaml +volumes: + - name: model-storage + image: + reference: redhat/granite-7b-lab-gguf:1.0 +containers: + - name: vllm + volumeMounts: + - mountPath: /model-cache + name: model-storage + readOnly: true +``` + +#### Template variables + +Various template variable are exposed as a result of using the `"oci+native://"` prefix, with `.MountedModelPath` being particularly useful if vLLM arguments require it. + +- `{{ .MountedModelPath }}`: this is equal to `/model-cache/` where `` comes from the URI. In the above example, `{{ .MountedModelPath }}` interpolates to `/model-cache` because ` = "/"` \ No newline at end of file diff --git a/internal/controller/constants.go b/internal/controller/constants.go index 469538e..0ec259f 100644 --- a/internal/controller/constants.go +++ b/internal/controller/constants.go @@ -7,11 +7,12 @@ package controller const modelStorageVolumeName = "model-storage" const modelStorageRoot = "/model-cache" const pathSep = "/" +const ociPathToModelSep = "::" const DECODE_ROLE = "decode" const PREFILL_ROLE = "prefill" const MODEL_ARTIFACT_URI_PVC = "pvc" const MODEL_ARTIFACT_URI_HF = "hf" -const MODEL_ARTIFACT_URI_OCI = "oci" +const MODEL_ARTIFACT_URI_OCI = "oci+native" const MODEL_ARTIFACT_URI_PVC_PREFIX = MODEL_ARTIFACT_URI_PVC + "://" const MODEL_ARTIFACT_URI_HF_PREFIX = MODEL_ARTIFACT_URI_HF + "://" const MODEL_ARTIFACT_URI_OCI_PREFIX = MODEL_ARTIFACT_URI_OCI + "://" diff --git a/internal/controller/modelservice_controller.go b/internal/controller/modelservice_controller.go index 4662b4a..4128346 100644 --- a/internal/controller/modelservice_controller.go +++ b/internal/controller/modelservice_controller.go @@ -104,24 +104,37 @@ func (t *TemplateVars) from(ctx context.Context, msvc *msv1alpha1.ModelService) t.AuthSecretName = *msvc.Spec.ModelArtifacts.AuthSecretName } + // Compute ModelPath variable and HFModelName for HF prefix + var err error uri := msvc.Spec.ModelArtifacts.URI - if strings.HasPrefix(uri, HF_PREFIX) { - t.HFModelName = strings.TrimPrefix(uri, HF_PREFIX) - t.ModelPath = t.HFModelName - } else if strings.HasPrefix(uri, PVC_PREFIX) { - tail := strings.TrimPrefix(uri, PVC_PREFIX) - segments := strings.Split(tail, pathSep) - t.ModelPath = strings.Join(segments[1:], pathSep) - } else { - err := fmt.Errorf("unsupported prefix") - log.FromContext(ctx).V(1).Error(err, "cannot get template vars", "uri", uri) - return err + uriType := UriType(uri) + switch uriType { + case HF: + if repoID, modelID, err := parseHFURI(&msvc.Spec.ModelArtifacts); err == nil { + t.HFModelName = repoID + pathSep + modelID + t.ModelPath = t.HFModelName + } + case PVC: + if _, modelPath, err := parsePVCURI(&msvc.Spec.ModelArtifacts); err == nil { + t.ModelPath = strings.Join(modelPath, pathSep) + } + case OCI: + if _, modelPath, err := parseOCIURI(&msvc.Spec.ModelArtifacts); err == nil { + t.ModelPath = strings.Join(modelPath, pathSep) + } + case UnknownURI: + err = fmt.Errorf("unsupported prefix") + log.FromContext(ctx).V(1).Error(err, "uri", uri) + } + + if err != nil { + log.FromContext(ctx).V(1).Error(err, "cannot parse uri and thus cannot populate some ModelPath template vars", "uri", uri) } - // Compute the mountedModelPath variable, given the URI type - // PVC: /path/to/model + // Compute the MountedModelPath variable, given the URI type + // PVC: /model-cache/path/to/model // HF: /model-cache - // OCI: /model-cache + // OCI: /model-cache/path/to/model mountedModelPath, err := mountedModelPath(msvc) if err != nil { return err diff --git a/internal/controller/modelservice_controller_test.go b/internal/controller/modelservice_controller_test.go index 3f4f551..d3c1540 100644 --- a/internal/controller/modelservice_controller_test.go +++ b/internal/controller/modelservice_controller_test.go @@ -910,7 +910,7 @@ var _ = Describe("ModelService Controller", func() { Expect(decode.Spec.Template.Spec.Volumes[0].Name).To(Equal(modelStorageVolumeName)) Expect(decode.Spec.Template.Spec.Volumes[0].EmptyDir).To(Not(BeNil())) - By("checking hte decode container where mountModelVolume is true has a volume mount") + By("checking the decode container where mountModelVolume is true has a volume mount") Expect(len(decode.Spec.Template.Spec.Containers)).To(Equal(1)) Expect(len(decode.Spec.Template.Spec.Containers[0].VolumeMounts)).To(Equal(1)) Expect(decode.Spec.Template.Spec.Containers[0].VolumeMounts[0].Name).To(Equal(modelStorageVolumeName)) @@ -927,6 +927,111 @@ var _ = Describe("ModelService Controller", func() { }) }) + Context("When reconciling a OCI ModelService", func() { + It("should create child resources correctly", func() { + // hfMSVC names + ociMSVCName := "oci-msvc" + ociImageName := "oci-image-with-tag:0.1.1" + ociURI := MODEL_ARTIFACT_URI_OCI_PREFIX + ociImageName + ociPathToModelSep + modelPath + ociMountedModelPath := modelStorageRoot + pathSep + modelPath + + ociNamespacedName := types.NamespacedName{ + Name: ociMSVCName, + Namespace: namespace, + } + + // variable definitions + ociMSVC := &msv1alpha1.ModelService{ + TypeMeta: metav1.TypeMeta{ + APIVersion: msv1alpha1.GroupVersion.String(), + Kind: "ModelService", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: ociMSVCName, + Namespace: namespace, + }, + Spec: msv1alpha1.ModelServiceSpec{ + ModelArtifacts: msv1alpha1.ModelArtifacts{ + URI: ociURI, + }, + Routing: msv1alpha1.Routing{ + ModelName: modelServiceName, + }, + Decode: &msv1alpha1.PDSpec{ + ModelServicePodSpec: msv1alpha1.ModelServicePodSpec{ + Containers: []msv1alpha1.ContainerSpec{ + { + Name: "llm", + Image: &imageName, + Args: []string{ + "{{ .MountedModelPath }}", + }, + MountModelVolume: true, + }, + }, + }, + }, + }, + } + + // Create ociMSVC in the cluster + By("creating the ociMSVC in the cluster") + err := k8sClient.Create(ctx, ociMSVC) + Expect(err).NotTo(HaveOccurred()) + + // Fetch from cluster + By("fetching the ociMSVC in the cluster") + var ociMSVCInCluster msv1alpha1.ModelService + err = k8sClient.Get(ctx, ociNamespacedName, &ociMSVCInCluster) + Expect(err).NotTo(HaveOccurred()) + + // Reconcile resource + By("Reconciling the ModelService") + reconciler := &ModelServiceReconciler{ + Client: k8sClient, + Scheme: k8sClient.Scheme(), + RBACOptions: *rbacOptions, + } + _, err = reconciler.Reconcile(ctx, reconcile.Request{ + NamespacedName: ociNamespacedName, + }) + Expect(err).NotTo(HaveOccurred()) + + // Verify child resources + By("fetching the decode deployment child resource") + // fetch decode resource name + decodeNamespacedName := types.NamespacedName{ + Name: deploymentName(ociMSVC, DECODE_ROLE), + Namespace: namespace, + } + + var decode appsv1.Deployment + err = k8sClient.Get(ctx, decodeNamespacedName, &decode) + Eventually(func() bool { + err := k8sClient.Get(ctx, decodeNamespacedName, &decode) + return err == nil + }, time.Second*5, time.Millisecond*500).Should(BeTrue()) + Expect(err).NotTo(HaveOccurred()) + + By("checking decode child has an oci image volume with the default pull policy") + Expect(len(decode.Spec.Template.Spec.Volumes)).To(Equal(1)) + Expect(decode.Spec.Template.Spec.Volumes[0].Name).To(Equal(modelStorageVolumeName)) + Expect(decode.Spec.Template.Spec.Volumes[0].Image).To(Not(BeNil())) + Expect(decode.Spec.Template.Spec.Volumes[0].Image.Reference).To(Equal(ociImageName)) + Expect(decode.Spec.Template.Spec.Volumes[0].Image.PullPolicy).To(Equal(corev1.PullIfNotPresent)) + + By("checking the decode container where mountModelVolume is true has a volume mount") + Expect(len(decode.Spec.Template.Spec.Containers)).To(Equal(1)) + Expect(len(decode.Spec.Template.Spec.Containers[0].VolumeMounts)).To(Equal(1)) + Expect(decode.Spec.Template.Spec.Containers[0].VolumeMounts[0].Name).To(Equal(modelStorageVolumeName)) + Expect(decode.Spec.Template.Spec.Containers[0].VolumeMounts[0].MountPath).To(Equal(modelStorageRoot)) + + By("checking decode container args are interpolated") + Expect(len(decode.Spec.Template.Spec.Containers[0].Args)).To(Equal(1)) + Expect(decode.Spec.Template.Spec.Containers[0].Args[0]).To(Equal(ociMountedModelPath)) + }) + }) + Context("When reconciling a MSVC with errorneous BaseConfig", func() { When("BaseConfig's ConfigMap field is malformatted", func() { It("should raise an error when reconciling", func() { diff --git a/internal/controller/suite_test.go b/internal/controller/suite_test.go index e802cb3..fe83f2c 100644 --- a/internal/controller/suite_test.go +++ b/internal/controller/suite_test.go @@ -66,6 +66,14 @@ var _ = BeforeSuite(func() { // if someone runs tests from a subfolder, these may not run CRDDirectoryPaths: []string{filepath.Join("..", "..", "config", "crd", "bases"), filepath.Join("..", "..", "test", "inferenceCRDs")}, ErrorIfCRDPathMissing: true, + ControlPlane: envtest.ControlPlane{ + APIServer: &envtest.APIServer{ + Args: []string{ + // ImageSource feature gate is required for OCI URIs + "--feature-gates=ImageVolume=true", + }, + }, + }, } // Retrieve the first found binary directory to allow running tests from IDEs diff --git a/internal/controller/utils.go b/internal/controller/utils.go index f2cdbfe..192a18a 100644 --- a/internal/controller/utils.go +++ b/internal/controller/utils.go @@ -101,10 +101,10 @@ func mountedModelPath(modelService *msv1alpha1.ModelService) (string, error) { uri := modelService.Spec.ModelArtifacts.URI switch UriType(uri) { case PVC: - if parts, err := parsePVCURI(&modelService.Spec.ModelArtifacts); err == nil { - modelPath := strings.Join(parts[1:], pathSep) + if _, modelPathSlice, err := parsePVCURI(&modelService.Spec.ModelArtifacts); err == nil { + modelPath := strings.Join(modelPathSlice, pathSep) // if uri is pvc://pvc-name/path/to/model - // output is /cache/path/to/model + // output is /model-cache/path/to/model mountedModelPath = modelStorageRoot + pathSep + modelPath } @@ -112,8 +112,14 @@ func mountedModelPath(modelService *msv1alpha1.ModelService) (string, error) { // The mountModelPath for HF is just the storage root, ie. model-cache mountedModelPath = modelStorageRoot - // TODO - // case OCI: + // The mountModelPath for OCI is what comes after :: in the URI + case OCI: + if _, modelPathSlice, err := parseOCIURI(&modelService.Spec.ModelArtifacts); err == nil { + // if uri is oci+native://image-with-tag:0.0.1::path/to/model + // output is /model-cache/path/to/model + joinedModelPath := strings.Join(modelPathSlice, pathSep) + mountedModelPath = modelStorageRoot + pathSep + joinedModelPath + } case UnknownURI: err = fmt.Errorf("unknown uri type, cannot compute the mountedModelPath") @@ -132,7 +138,7 @@ func isPVCURI(uri string) bool { return strings.HasPrefix(uri, MODEL_ARTIFACT_URI_PVC_PREFIX) } -// isOCIURI returns True if the URI begins with oci:// +// isOCIURI returns True if the URI begins with oci+native:// func isOCIURI(uri string) bool { return strings.HasPrefix(uri, MODEL_ARTIFACT_URI_OCI_PREFIX) } @@ -154,24 +160,32 @@ func UriType(uri string) URIType { return UnknownURI } -// parsePVCURI returns parts from a valid pvc URI, or -// returns an error if the PVC URI is invalid -func parsePVCURI(modelArtifact *msv1alpha1.ModelArtifacts) ([]string, error) { +// parsePVCURI returns parts from a valid pvc URI +// Returns: +// first string is the pvc name +// second string slice is the path to the model +// error if uri format is invalid +func parsePVCURI(modelArtifact *msv1alpha1.ModelArtifacts) (string, []string, error) { + var pvcName string + modelPath := []string{} if modelArtifact == nil { - return nil, fmt.Errorf("modelArtifact is nil") + return pvcName, modelPath, fmt.Errorf("modelArtifact is nil") } uri := modelArtifact.URI if !isPVCURI(uri) { - return nil, fmt.Errorf("URI does not have pvc prefix: %s", uri) + return pvcName, modelPath, fmt.Errorf("URI does not have pvc prefix: %s", uri) } parts := strings.Split(strings.TrimPrefix(uri, MODEL_ARTIFACT_URI_PVC_PREFIX), pathSep) if len(parts) < 2 { - return nil, fmt.Errorf("invalid pvc URI format: %s; need pvc:///model/path", uri) + return pvcName, modelPath, fmt.Errorf("invalid pvc URI format: %s; need pvc:///model/path", uri) } - return parts, nil + pvcName = parts[0] + modelPath = parts[1:] + + return pvcName, modelPath, nil } // parseHFURI returns parts from a valid hf URI, or @@ -199,6 +213,36 @@ func parseHFURI(modelArtifact *msv1alpha1.ModelArtifacts) (string, string, error return parts[0], parts[1], nil } +// parseOCIURI returns parts from a valid oci URI, or +// returns an error if the OCI URI is invalid +// returns two strings +// First string is the complete image identifier including tag +// Second string is the path to the model +func parseOCIURI(modelArtifact *msv1alpha1.ModelArtifacts) (string, []string, error) { + var imageIdentifier string + pathToModel := []string{} + if modelArtifact == nil { + return imageIdentifier, pathToModel, fmt.Errorf("modelArtifact is nil") + } + + uri := modelArtifact.URI + if !isOCIURI(uri) { + return imageIdentifier, pathToModel, fmt.Errorf("URI does not have oci prefix: %s", uri) + } + + // Split by :: + parts := strings.Split(strings.TrimPrefix(uri, MODEL_ARTIFACT_URI_OCI_PREFIX), ociPathToModelSep) + if len(parts) != 2 { + return imageIdentifier, pathToModel, fmt.Errorf("invalid oci URI format: %s; need oci+native://::/path/to/model. Please double check if you are missing %s which did not result in exactly two segments", uri, ociPathToModelSep) + } + + imageIdentifier = parts[0] + // Split by / + pathToModel = strings.Split(strings.TrimPrefix(parts[1], pathSep), pathSep) + + return imageIdentifier, pathToModel, nil +} + // getVolumeMountForContainer returns a VolumeMount for a container where MountModelVolume: true func getVolumeMountsForContainer(ctx context.Context, msvc *msv1alpha1.ModelService) []corev1.VolumeMount { @@ -208,24 +252,24 @@ func getVolumeMountsForContainer(ctx context.Context, msvc *msv1alpha1.ModelServ switch uriType { - // The volume mount for HF and PVC is the same - // except that HF volume is not readOnly + // The volume mount for PVC and OCI is the same // volumeMounts: - // - mountPath: /model-cache - // name: model-storage - case PVC: + // - name: model-storage + // mountPath: /model-cache + // readOnly: true + case PVC, OCI: desiredVolumeMount = &corev1.VolumeMount{ Name: modelStorageVolumeName, MountPath: modelStorageRoot, ReadOnly: true, } + // Volume mount is the same, except that HF volume is not readOnly + // so that models can be downloaded into the mountPath case HF: desiredVolumeMount = &corev1.VolumeMount{ Name: modelStorageVolumeName, MountPath: modelStorageRoot, } - // TODO - // case OCI: case UnknownURI: // do nothing log.FromContext(ctx).V(1).Error(fmt.Errorf("uri type is unknown, cannot populate volume mounts"), "uri type: "+msvc.Spec.ModelArtifacts.URI) @@ -246,9 +290,11 @@ func getVolumeForPDDeployment(ctx context.Context, msvc *msv1alpha1.ModelService uriType := UriType(msvc.Spec.ModelArtifacts.URI) switch uriType { + // Return a volume with persistentVolumeClaim case PVC: - if parts, err := parsePVCURI(&msvc.Spec.ModelArtifacts); err == nil { - pvcName := parts[0] + if pvcName, _, err := parsePVCURI(&msvc.Spec.ModelArtifacts); err != nil { + log.FromContext(ctx).V(1).Error(err, "uri: "+msvc.Spec.ModelArtifacts.URI) + } else { desiredVolume = &corev1.Volume{ Name: modelStorageVolumeName, VolumeSource: corev1.VolumeSource{ @@ -258,12 +304,12 @@ func getVolumeForPDDeployment(ctx context.Context, msvc *msv1alpha1.ModelService }, }, } - } else { - log.FromContext(ctx).V(1).Error(err, "uri: "+msvc.Spec.ModelArtifacts.URI) } // Return an emptyDir volume with ModelArtifacts.Size case HF: - if _, _, err := parseHFURI(&msvc.Spec.ModelArtifacts); err == nil { + if _, _, err := parseHFURI(&msvc.Spec.ModelArtifacts); err != nil { + log.FromContext(ctx).V(1).Error(err, "uri: "+msvc.Spec.ModelArtifacts.URI) + } else { desiredVolume = &corev1.Volume{ Name: modelStorageVolumeName, VolumeSource: corev1.VolumeSource{ @@ -272,12 +318,23 @@ func getVolumeForPDDeployment(ctx context.Context, msvc *msv1alpha1.ModelService }, }, } - } else { - log.FromContext(ctx).V(1).Error(err, "uri: "+msvc.Spec.ModelArtifacts.URI) } - // TODO - // case OCI: + // Return a volume with image reference + case OCI: + if image, _, err := parseOCIURI(&msvc.Spec.ModelArtifacts); err != nil { + log.FromContext(ctx).V(1).Error(err, "uri: "+msvc.Spec.ModelArtifacts.URI) + } else { + desiredVolume = &corev1.Volume{ + Name: modelStorageVolumeName, + VolumeSource: corev1.VolumeSource{ + Image: &corev1.ImageVolumeSource{ + Reference: image, + PullPolicy: msvc.Spec.ModelArtifacts.PullPolicy, + }, + }, + } + } case UnknownURI: // do nothing log.FromContext(ctx).V(1).Error(fmt.Errorf("uri type is unknown, cannot populate volumes"), "uri type: "+msvc.Spec.ModelArtifacts.URI) diff --git a/internal/controller/utils_test.go b/internal/controller/utils_test.go index 9e74a03..1c83017 100644 --- a/internal/controller/utils_test.go +++ b/internal/controller/utils_test.go @@ -7,6 +7,7 @@ import ( . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" + corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" msv1alpha1 "github.com/llm-d/llm-d-model-service/api/v1alpha1" @@ -16,6 +17,7 @@ const PVC_NAME = "my-pvc" const MODEL_PATH = "path/to/model" const HF_REPO_ID = "ibm-granite" const HF_MODEL_ID = "granite-3.3-2b-instruct" +const OCI_IMAGE = "image-with-tag:0.0.1" var _ = Describe("Model Artifacts", func() { Context("Given a model artifact with an invalid URI prefix", func() { @@ -29,7 +31,7 @@ var _ = Describe("Model Artifacts", func() { Expect(isHFURI(modelArtifact.URI)).To(BeFalse()) By("Parsing PVC uri should fail") - _, err := parsePVCURI(&modelArtifact) + _, _, err := parsePVCURI(&modelArtifact) Expect(err).NotTo(BeNil()) By("Parsing HF uri should fail") @@ -45,11 +47,11 @@ var _ = Describe("Model Artifacts", func() { }{ "pvc://pvc-name/path/to/model": { expectedURIType: PVC, - expectedModelMountPath: modelStorageRoot + pathSep + "path/to/model", + expectedModelMountPath: modelStorageRoot + pathSep + MODEL_PATH, }, - "oci://repo-with-tag::path/to/model": { + "oci+native://repo-with-tag::path/to/model": { expectedURIType: OCI, - expectedModelMountPath: "", // TODO + expectedModelMountPath: modelStorageRoot + pathSep + MODEL_PATH, }, "hf://repo-id/model-id": { expectedURIType: HF, @@ -59,9 +61,15 @@ var _ = Describe("Model Artifacts", func() { expectedURIType: PVC, expectedModelMountPath: "", }, - "oci://": { + // invalid OCI + "oci+native://": { expectedURIType: OCI, - expectedModelMountPath: "", // TODO + expectedModelMountPath: "", + }, + // valid OCI + "oci+native://::": { + expectedURIType: OCI, + expectedModelMountPath: modelStorageRoot + pathSep, }, "hf://wrong": { expectedURIType: HF, @@ -83,7 +91,7 @@ var _ = Describe("Model Artifacts", func() { expectedURIType: UnknownURI, expectedModelMountPath: "", }, - "OCI://": { + "oci+other://": { expectedURIType: UnknownURI, expectedModelMountPath: "", }, @@ -140,11 +148,10 @@ var _ = Describe("Model Artifacts", func() { Expect(isOCIURI(modelArtifact.URI)).To(BeFalse()) By("Parsing uri parts should be successful") - parts, err := parsePVCURI(&modelArtifact) + pvcName, modelPath, err := parsePVCURI(&modelArtifact) Expect(err).To(BeNil()) - Expect(len(parts) > 1).To(BeTrue()) - Expect(parts[0]).To(Equal(PVC_NAME)) - Expect(strings.Join(parts[1:], "/")).To(Equal(MODEL_PATH)) + Expect(pvcName).To(Equal(PVC_NAME)) + Expect(strings.Join(modelPath, pathSep)).To(Equal(MODEL_PATH)) }) It("should produce a valid volumeMounts list", func() { volumeMounts := getVolumeMountsForContainer(ctx, &modelService) @@ -234,4 +241,70 @@ var _ = Describe("Model Artifacts", func() { Expect(hfHomeEnvVar.Value).To(Equal(modelStorageRoot)) }) }) + + Context("Given a model artifact with a valid OCI URI", func() { + + ctx := context.Background() + + pullPolicy := corev1.PullAlways + modelArtifact := msv1alpha1.ModelArtifacts{ + URI: fmt.Sprintf("oci+native://%s::%s", OCI_IMAGE, modelPath), + PullPolicy: pullPolicy, + } + + modelService := msv1alpha1.ModelService{ + Spec: msv1alpha1.ModelServiceSpec{ + ModelArtifacts: modelArtifact, + }, + } + + It("should parse correctly", func() { + By("checking type of uri") + Expect(isPVCURI(modelArtifact.URI)).To(BeFalse()) + Expect(isHFURI(modelArtifact.URI)).To(BeFalse()) + Expect(isOCIURI(modelArtifact.URI)).To(BeTrue()) + + By("Parsing uri parts should be successful") + image, ociModelPath, err := parseOCIURI(&modelArtifact) + Expect(err).To(BeNil()) + Expect(image).To(Equal(OCI_IMAGE)) + Expect(strings.Join(ociModelPath, pathSep)).To(Equal(modelPath)) + }) + + It("should produce a valid volumeMounts list", func() { + volumeMounts := getVolumeMountsForContainer(ctx, &modelService) + Expect(len(volumeMounts)).To(Equal(1)) + firstVolumeMount := volumeMounts[0] + + Expect(firstVolumeMount.Name).To(Equal(modelStorageVolumeName)) + Expect(firstVolumeMount.MountPath).To(Equal(modelStorageRoot)) + Expect(firstVolumeMount.ReadOnly).To(BeTrue()) + }) + + It("should produce a valid volumes list if pull policy is present", func() { + volumes := getVolumeForPDDeployment(ctx, &modelService) + Expect(len(volumes)).To(Equal(1)) + firstVolume := volumes[0] + Expect(firstVolume.Name).To(Equal(modelStorageVolumeName)) + Expect(firstVolume.Image.Reference).To(Equal(OCI_IMAGE)) + Expect(firstVolume.Image.PullPolicy).To(Equal(pullPolicy)) + }) + + It("should produce a valid volumes list if pull policy is not declared", func() { + modelService.Spec.ModelArtifacts = msv1alpha1.ModelArtifacts{ + URI: fmt.Sprintf("oci+native://%s::%s", OCI_IMAGE, modelPath), + } + + volumes := getVolumeForPDDeployment(ctx, &modelService) + Expect(len(volumes)).To(Equal(1)) + firstVolume := volumes[0] + Expect(firstVolume.Name).To(Equal(modelStorageVolumeName)) + Expect(firstVolume.Image.Reference).To(Equal(OCI_IMAGE)) + }) + + It("should produce a valid env list", func() { + envs := getEnvsForContainer(ctx, &modelService) + Expect(len(envs)).To(Equal(0)) + }) + }) }) diff --git a/samples/test/README.md b/samples/test/README.md index c5a6d55..ee46d15 100644 --- a/samples/test/README.md +++ b/samples/test/README.md @@ -1 +1,19 @@ -The files here are used for local development only. \ No newline at end of file +The files here are used for local development only. + +To test `msvc-oci` on Kind, the cluster must be configured with the `ImageVolume` feature gate enabled, otherwise the OCI volume will not get created in child deployments. Create the cluster with the following command. + +``` +kind create cluster --image="kindest/node:v1.32.0@sha256:c48c62eac5da28cdadcf560d1d8616cfa6783b58f0d94cf63ad1bf49600cb027" --config samples/test/kind-config.yaml +``` + +Then, make install the requried CRDs. Refer to developer [docs](../../docs/developer.md). + +Finally, apply the OCI example MSVC and baseconfig. + +``` +kubectl apply -f samples/test/baseconfig.yaml +kubectl apply -f samples/test/msvc-oci.yaml +kubectl get deploy busybox-decode -o yaml +``` + +and you should see the OCI image volume getting created. \ No newline at end of file diff --git a/samples/test/kind-config.yaml b/samples/test/kind-config.yaml new file mode 100644 index 0000000..aff1075 --- /dev/null +++ b/samples/test/kind-config.yaml @@ -0,0 +1,6 @@ +# Note: this won't be necessary when ImageVolume is GA +kind: Cluster +apiVersion: kind.x-k8s.io/v1alpha4 +name: kind +featureGates: + "ImageVolume": true \ No newline at end of file diff --git a/samples/test/msvc-oci.yaml b/samples/test/msvc-oci.yaml new file mode 100644 index 0000000..6b4eb58 --- /dev/null +++ b/samples/test/msvc-oci.yaml @@ -0,0 +1,34 @@ +apiVersion: llm-d.ai/v1alpha1 +kind: ModelService +metadata: + name: busybox +spec: + decoupleScaling: false + + baseConfigMapRef: + name: basic-basic-conf + + routing: + modelName: ibm-granite/granite-3.3-2b-instruct + ports: + - name: inport + port: 80 + - name: outport + port: 9376 + + modelArtifacts: + uri: oci+native://image-with-tag:0.0.1::path/to/model + pullPolicy: IfNotPresent + + # describe decode pods + decode: + replicas: 1 + containers: + - name: "sidecar" + image: "nginx" + - name: "llm" + image: busybox + args: + - "{{ .ModelPath }}" + - "{{ .MountedModelPath }}" + mountModelVolume: true \ No newline at end of file diff --git a/samples/test/msvc.yaml b/samples/test/msvc.yaml index 2b00c22..7d87aaf 100644 --- a/samples/test/msvc.yaml +++ b/samples/test/msvc.yaml @@ -20,7 +20,8 @@ spec: port: 1112 modelArtifacts: - uri: pvc://llama-of-the-future/path/to/llama-2075 + # uri: pvc://llama-of-the-future/path/to/llama-2075 + uri: oci+native://redhat/granite-7b-lab-gguf:1.0::/ # describe decode pods decode: