diff --git a/api/v1alpha1/modelservice_types.go b/api/v1alpha1/modelservice_types.go
index 3cd7c73..717e4a1 100644
--- a/api/v1alpha1/modelservice_types.go
+++ b/api/v1alpha1/modelservice_types.go
@@ -133,7 +133,7 @@ type ContainerSpec struct {
// For URIs with pvc:// prefix, a model-storage volume is created and mounted with the mountPath: /cache
// For URIs with hf:// prefix, modelArtifact.authSecretName is used as the secret key reference,
// and the value is mounted to an environment variable called HF_TOKEN
- // For URIs with oci:// prefix, an OCI volume with image reference (https://kubernetes.io/blog/2024/08/16/kubernetes-1-31-image-volume-source/)
+ // For URIs with oci+native:// prefix, an OCI volume with image reference (https://kubernetes.io/blog/2024/08/16/kubernetes-1-31-image-volume-source/)
// is created and mounted with the mountPath oci-dir
// default:false
// +optional
@@ -258,7 +258,7 @@ type Routing struct {
// ModelArtifacts describes the source of the model
type ModelArtifacts struct {
// URI is the model URI
- // Three types of URIs are support to enable models packaged as images (oci:///<:image-tag>),
+ // Three types of URIs are support to enable models packaged as images (oci+native:///<:image-tag>),
// models downloaded from HuggingFace (hf:///)
// and pre-existing models loaded from a volume-mounted PVC (pvc://model-path)
//
@@ -273,6 +273,13 @@ type ModelArtifacts struct {
//
// +optional
Size *res.Quantity `json:"size,omitempty"`
+ // OCI image pull policy.
+ // One of Always, Never, IfNotPresent.
+ // Defaults to Always if :latest tag is specified, or IfNotPresent otherwise.
+ // Cannot be updated.
+ // More info: https://kubernetes.io/docs/concepts/containers/images#updating-images
+ // +optional
+ PullPolicy corev1.PullPolicy `json:"pullPolicy,omitempty" protobuf:"bytes,14,opt,name=pullPolicy,casttype=PullPolicy"`
}
// ModelServicePodSpec defines the specification for pod templates that will be created by ModelService.
diff --git a/config/crd/bases/llm-d.ai_modelservices.yaml b/config/crd/bases/llm-d.ai_modelservices.yaml
index b97ed3b..bedc058 100644
--- a/config/crd/bases/llm-d.ai_modelservices.yaml
+++ b/config/crd/bases/llm-d.ai_modelservices.yaml
@@ -355,7 +355,7 @@ spec:
For URIs with pvc:// prefix, a model-storage volume is created and mounted with the mountPath: /cache
For URIs with hf:// prefix, modelArtifact.authSecretName is used as the secret key reference,
and the value is mounted to an environment variable called HF_TOKEN
- For URIs with oci:// prefix, an OCI volume with image reference (https://kubernetes.io/blog/2024/08/16/kubernetes-1-31-image-volume-source/)
+ For URIs with oci+native:// prefix, an OCI volume with image reference (https://kubernetes.io/blog/2024/08/16/kubernetes-1-31-image-volume-source/)
is created and mounted with the mountPath oci-dir
default:false
type: boolean
@@ -656,7 +656,7 @@ spec:
For URIs with pvc:// prefix, a model-storage volume is created and mounted with the mountPath: /cache
For URIs with hf:// prefix, modelArtifact.authSecretName is used as the secret key reference,
and the value is mounted to an environment variable called HF_TOKEN
- For URIs with oci:// prefix, an OCI volume with image reference (https://kubernetes.io/blog/2024/08/16/kubernetes-1-31-image-volume-source/)
+ For URIs with oci+native:// prefix, an OCI volume with image reference (https://kubernetes.io/blog/2024/08/16/kubernetes-1-31-image-volume-source/)
is created and mounted with the mountPath oci-dir
default:false
type: boolean
@@ -992,7 +992,7 @@ spec:
For URIs with pvc:// prefix, a model-storage volume is created and mounted with the mountPath: /cache
For URIs with hf:// prefix, modelArtifact.authSecretName is used as the secret key reference,
and the value is mounted to an environment variable called HF_TOKEN
- For URIs with oci:// prefix, an OCI volume with image reference (https://kubernetes.io/blog/2024/08/16/kubernetes-1-31-image-volume-source/)
+ For URIs with oci+native:// prefix, an OCI volume with image reference (https://kubernetes.io/blog/2024/08/16/kubernetes-1-31-image-volume-source/)
is created and mounted with the mountPath oci-dir
default:false
type: boolean
@@ -1293,7 +1293,7 @@ spec:
For URIs with pvc:// prefix, a model-storage volume is created and mounted with the mountPath: /cache
For URIs with hf:// prefix, modelArtifact.authSecretName is used as the secret key reference,
and the value is mounted to an environment variable called HF_TOKEN
- For URIs with oci:// prefix, an OCI volume with image reference (https://kubernetes.io/blog/2024/08/16/kubernetes-1-31-image-volume-source/)
+ For URIs with oci+native:// prefix, an OCI volume with image reference (https://kubernetes.io/blog/2024/08/16/kubernetes-1-31-image-volume-source/)
is created and mounted with the mountPath oci-dir
default:false
type: boolean
@@ -1387,6 +1387,14 @@ spec:
authSecretName:
description: Name of the authentication secret. Contains HF_TOKEN
type: string
+ pullPolicy:
+ description: |-
+ OCI image pull policy.
+ One of Always, Never, IfNotPresent.
+ Defaults to Always if :latest tag is specified, or IfNotPresent otherwise.
+ Cannot be updated.
+ More info: https://kubernetes.io/docs/concepts/containers/images#updating-images
+ type: string
size:
anyOf:
- type: integer
@@ -1399,7 +1407,7 @@ spec:
uri:
description: |-
URI is the model URI
- Three types of URIs are support to enable models packaged as images (oci:///<:image-tag>),
+ Three types of URIs are support to enable models packaged as images (oci+native:///<:image-tag>),
models downloaded from HuggingFace (hf:///)
and pre-existing models loaded from a volume-mounted PVC (pvc://model-path)
type: string
@@ -1657,7 +1665,7 @@ spec:
For URIs with pvc:// prefix, a model-storage volume is created and mounted with the mountPath: /cache
For URIs with hf:// prefix, modelArtifact.authSecretName is used as the secret key reference,
and the value is mounted to an environment variable called HF_TOKEN
- For URIs with oci:// prefix, an OCI volume with image reference (https://kubernetes.io/blog/2024/08/16/kubernetes-1-31-image-volume-source/)
+ For URIs with oci+native:// prefix, an OCI volume with image reference (https://kubernetes.io/blog/2024/08/16/kubernetes-1-31-image-volume-source/)
is created and mounted with the mountPath oci-dir
default:false
type: boolean
@@ -1958,7 +1966,7 @@ spec:
For URIs with pvc:// prefix, a model-storage volume is created and mounted with the mountPath: /cache
For URIs with hf:// prefix, modelArtifact.authSecretName is used as the secret key reference,
and the value is mounted to an environment variable called HF_TOKEN
- For URIs with oci:// prefix, an OCI volume with image reference (https://kubernetes.io/blog/2024/08/16/kubernetes-1-31-image-volume-source/)
+ For URIs with oci+native:// prefix, an OCI volume with image reference (https://kubernetes.io/blog/2024/08/16/kubernetes-1-31-image-volume-source/)
is created and mounted with the mountPath oci-dir
default:false
type: boolean
diff --git a/docs/api_reference/out.asciidoc b/docs/api_reference/out.asciidoc
index 5fc8d40..81f348e 100644
--- a/docs/api_reference/out.asciidoc
+++ b/docs/api_reference/out.asciidoc
@@ -97,7 +97,7 @@ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-co
For URIs with pvc:// prefix, a model-storage volume is created and mounted with the mountPath: /cache +
For URIs with hf:// prefix, modelArtifact.authSecretName is used as the secret key reference, +
and the value is mounted to an environment variable called HF_TOKEN +
-For URIs with oci:// prefix, an OCI volume with image reference (https://kubernetes.io/blog/2024/08/16/kubernetes-1-31-image-volume-source/) +
+For URIs with oci+native:// prefix, an OCI volume with image reference (https://kubernetes.io/blog/2024/08/16/kubernetes-1-31-image-volume-source/) +
is created and mounted with the mountPath oci-dir +
default:false + | |
|===
@@ -121,12 +121,17 @@ ModelArtifacts describes the source of the model
|===
| Field | Description | Default | Validation
| *`uri`* __string__ | URI is the model URI +
-Three types of URIs are support to enable models packaged as images (oci:///<:image-tag>), +
+Three types of URIs are support to enable models packaged as images (oci+native:///<:image-tag>), +
models downloaded from HuggingFace (hf:///) +
and pre-existing models loaded from a volume-mounted PVC (pvc://model-path) + | |
| *`authSecretName`* __string__ | Name of the authentication secret. Contains HF_TOKEN + | |
-| *`size`* __link:https://kubernetes.io/docs/reference/generated/kubernetes-api/v/#quantity-resource-api[$$Quantity$$]__ | Size of the model artifacts on disk +
+| *`size`* __xref:{anchor_prefix}-k8s-io-apimachinery-pkg-api-resource-quantity[$$Quantity$$]__ | Size of the model artifacts on disk +
ensure Size is large enough when providing hf://... URI + | |
+| *`pullPolicy`* __link:https://kubernetes.io/docs/reference/generated/kubernetes-api/v/#pullpolicy-v1-core[$$PullPolicy$$]__ | OCI image pull policy. +
+One of Always, Never, IfNotPresent. +
+Defaults to Always if :latest tag is specified, or IfNotPresent otherwise. +
+Cannot be updated. +
+More info: https://kubernetes.io/docs/concepts/containers/images#updating-images + | |
|===
@@ -278,6 +283,9 @@ this reference will be nil + | |
| *`eppDeploymentRef`* __string__ | EppDeploymentRef identifies the epp deployment +
if epp deployment is yet to be created, +
this reference will be nil + | |
+| *`httpRouteRef`* __string__ | HTTPRoute identifies the HTTPRoute resource +
+if HTTPRoute is yet to be created, +
+this reference will be nil + | |
| *`inferenceModelRef`* __string__ | InferenceModelRef identifies the inference model resource +
if inference model is yet to be created, +
this reference will be nil + | |
@@ -435,6 +443,88 @@ Required: {} +
| *`ports`* __xref:{anchor_prefix}-github-com-llm-d-llm-d-model-service-api-v1alpha1-port[$$Port$$] array__ | Ports is a list of named ports +
These can be referenced by name in configuration of base configuration or model services + | |
+| *`gatewayRefs`* __ParentReference array__ | GatewayRef is merged to baseconfig based on the Name field. +
+Directly from Gateway API: https://gateway-api.sigs.k8s.io/reference/spec/#commonroutespec +
+ParentRefs references the resources (usually Gateways) that a Route wants +
+to be attached to. Note that the referenced parent resource needs to +
+allow this for the attachment to be complete. For Gateways, that means +
+the Gateway needs to allow attachment from Routes of this kind and +
+namespace. For Services, that means the Service must either be in the same +
+namespace for a "producer" route, or the mesh implementation must support +
+and allow "consumer" routes for the referenced Service. ReferenceGrant is +
+not applicable for governing ParentRefs to Services - it is not possible to +
+create a "producer" route for a Service in a different namespace from the +
+Route. +
+
+
+There are two kinds of parent resources with "Core" support: +
+
+
+* Gateway (Gateway conformance profile) +
+* Service (Mesh conformance profile, ClusterIP Services only) +
+
+
+This API may be extended in the future to support additional kinds of parent +
+resources. +
+
+
+ParentRefs must be _distinct_. This means either that: +
+
+
+* They select different objects. If this is the case, then parentRef +
+entries are distinct. In terms of fields, this means that the +
+multi-part key defined by `group`, `kind`, `namespace`, and `name` must +
+be unique across all parentRef entries in the Route. +
+* They do not select different objects, but for each optional field used, +
+each ParentRef that selects the same object must set the same set of +
+optional fields to different values. If one ParentRef sets a +
+combination of optional fields, all must set the same combination. +
+
+
+Some examples: +
+
+
+* If one ParentRef sets `sectionName`, all ParentRefs referencing the +
+same object must also set `sectionName`. +
+* If one ParentRef sets `port`, all ParentRefs referencing the same +
+object must also set `port`. +
+* If one ParentRef sets `sectionName` and `port`, all ParentRefs +
+referencing the same object must also set `sectionName` and `port`. +
+
+
+It is possible to separately reference multiple distinct objects that may +
+be collapsed by an implementation. For example, some implementations may +
+choose to merge compatible Gateway Listeners together. If that is the +
+case, the list of routes attached to those resources should also be +
+merged. +
+
+
+Note that for ParentRefs that cross namespace boundaries, there are specific +
+rules. Cross-namespace references are only valid if they are explicitly +
+allowed by something in the namespace they are referring to. For example, +
+Gateway has the AllowedRoutes field, and ReferenceGrant provides a +
+generic way to enable other kinds of cross-namespace reference. +
+
+
+ +
+ParentRefs from a Route to a Service in the same namespace are "producer" +
+routes, which apply default routing rules to inbound connections from +
+any namespace to the Service. +
+
+
+ParentRefs from a Route to a Service in a different namespace are +
+"consumer" routes, and these routing rules are only applied to outbound +
+connections originating from the same namespace as the Route, for which +
+the intended destination of the connections are a Service targeted as a +
+ParentRef of the Route. +
+ +
+
+
+ +
+ +
+ +
+ + | | MaxItems: 32 +
+
|===
diff --git a/docs/api_reference/out.html b/docs/api_reference/out.html
index 4266a93..a7ce172 100644
--- a/docs/api_reference/out.html
+++ b/docs/api_reference/out.html
@@ -664,7 +664,7 @@
@@ -713,7 +713,7 @@