Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 29 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,35 @@ kubectl apply -f feature-flags.yaml
Within the next reconciliation loop the operator will begin creating a `<name>-network-policy` resource for each distribution.
Set `enabled: false` (or remove the block) to turn the feature back off; the operator will delete the previously managed policies.

## Image Mapping Overrides

The operator supports ConfigMap-driven image updates for LLS Distribution images. This allows independent patching for security fixes or bug fixes without requiring a new operator version.

### Configuration

Create or update the operator ConfigMap with an `image-overrides` key:

```yaml

image-overrides: |
starter-gpu: quay.io/custom/llama-stack:starter-gpu
starter: quay.io/custom/llama-stack:starter
```

### Configuration Format

Use the distribution name directly as the key (e.g., `starter-gpu`, `starter`). The operator will apply these overrides automatically

### Example Usage

To update the LLS Distribution image for all `starter` distributions:

```bash
kubectl patch configmap llama-stack-operator-config -n llama-stack-k8s-operator-system --type merge -p '{"data":{"image-overrides":"starter: quay.io/opendatahub/llama-stack:latest"}}'
```

This will cause all LlamaStackDistribution resources using the `starter` distribution to restart with the new image.

## Developer Guide

### Prerequisites
Expand Down
172 changes: 142 additions & 30 deletions controllers/llamastackdistribution_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ import (

"github.com/go-logr/logr"
"github.com/google/go-cmp/cmp"
"github.com/google/go-containerregistry/pkg/name"
llamav1alpha1 "github.com/llamastack/llama-stack-k8s-operator/api/v1alpha1"
"github.com/llamastack/llama-stack-k8s-operator/pkg/cluster"
"github.com/llamastack/llama-stack-k8s-operator/pkg/deploy"
Expand Down Expand Up @@ -85,11 +86,17 @@ const (
// When a ConfigMap's data changes, it automatically triggers reconciliation of the referencing
// LlamaStackDistribution, which recalculates a content-based hash and updates the deployment's
// pod template annotations. This causes Kubernetes to restart the pods with the updated configuration.
//
// Operator ConfigMap Watching Feature:
// This reconciler also watches for changes to the operator configuration ConfigMap. When the operator
// config changes, it triggers reconciliation of all LlamaStackDistribution resources.
type LlamaStackDistributionReconciler struct {
client.Client
Scheme *runtime.Scheme
// Feature flags
EnableNetworkPolicy bool
// Image mapping overrides
ImageMappingOverrides map[string]string
// Cluster info
ClusterInfo *cluster.ClusterInfo
httpClient *http.Client
Expand Down Expand Up @@ -678,6 +685,40 @@ func (r *LlamaStackDistributionReconciler) configMapUpdatePredicate(e event.Upda
return false
}

// Check if this is the operator config ConfigMap
if r.handleOperatorConfigUpdate(newConfigMap) {
return true
}

// Handle referenced ConfigMap updates
return r.handleReferencedConfigMapUpdate(oldConfigMap, newConfigMap)
}

// handleOperatorConfigUpdate processes updates to the operator config ConfigMap.
func (r *LlamaStackDistributionReconciler) handleOperatorConfigUpdate(configMap *corev1.ConfigMap) bool {
operatorNamespace, err := deploy.GetOperatorNamespace()
if err != nil {
return false
}

if configMap.Name != operatorConfigData || configMap.Namespace != operatorNamespace {
return false
}

// Update feature flags
EnableNetworkPolicy, err := parseFeatureFlags(configMap.Data)
if err != nil {
log.FromContext(context.Background()).Error(err, "Failed to parse feature flags")
} else {
r.EnableNetworkPolicy = EnableNetworkPolicy
}

r.ImageMappingOverrides = ParseImageMappingOverrides(context.Background(), configMap.Data)
return true
}

// handleReferencedConfigMapUpdate processes updates to referenced ConfigMaps.
func (r *LlamaStackDistributionReconciler) handleReferencedConfigMapUpdate(oldConfigMap, newConfigMap *corev1.ConfigMap) bool {
// Only proceed if this ConfigMap is referenced by any LlamaStackDistribution
if !r.isConfigMapReferenced(newConfigMap) {
return false
Expand Down Expand Up @@ -843,6 +884,27 @@ func (r *LlamaStackDistributionReconciler) manuallyCheckConfigMapReference(confi

// findLlamaStackDistributionsForConfigMap maps ConfigMap changes to LlamaStackDistribution reconcile requests.
func (r *LlamaStackDistributionReconciler) findLlamaStackDistributionsForConfigMap(ctx context.Context, configMap client.Object) []reconcile.Request {
logger := log.FromContext(ctx).WithValues(
"configMapName", configMap.GetName(),
"configMapNamespace", configMap.GetNamespace())

operatorNamespace, err := deploy.GetOperatorNamespace()
if err != nil {
logger.Error(err, "Failed to get operator namespace for config map event processing")
return nil
}
// If the operator config was changed, we reconcile all LlamaStackDistributions
if configMap.GetName() == operatorConfigData && configMap.GetNamespace() == operatorNamespace {
// List all LlamaStackDistribution resources across all namespaces
allLlamaStacks := llamav1alpha1.LlamaStackDistributionList{}
err = r.List(ctx, &allLlamaStacks)
if err != nil {
logger.Error(err, "Failed to list all LlamaStackDistributions for operator config change")
return nil
}
return r.convertToReconcileRequests(allLlamaStacks)
}

// Try field indexer lookup first
attachedLlamaStacks, found := r.tryFieldIndexerLookup(ctx, configMap)
if !found {
Expand Down Expand Up @@ -1713,53 +1775,103 @@ func NewLlamaStackDistributionReconciler(ctx context.Context, client client.Clie
return nil, fmt.Errorf("failed to get operator namespace: %w", err)
}

// Get the ConfigMap
// If the ConfigMap doesn't exist, create it with default feature flags
// If the ConfigMap exists, parse the feature flags from the Configmap
// Initialize operator config ConfigMap
configMap, err := initializeOperatorConfigMap(ctx, client, operatorNamespace)
if err != nil {
return nil, err
}

// Parse feature flags from ConfigMap
enableNetworkPolicy, err := parseFeatureFlags(configMap.Data)
if err != nil {
return nil, fmt.Errorf("failed to parse feature flags: %w", err)
}

// Parse image mapping overrides from ConfigMap
imageMappingOverrides := ParseImageMappingOverrides(ctx, configMap.Data)

return &LlamaStackDistributionReconciler{
Client: client,
Scheme: scheme,
EnableNetworkPolicy: enableNetworkPolicy,
ImageMappingOverrides: imageMappingOverrides,
ClusterInfo: clusterInfo,
httpClient: &http.Client{Timeout: 5 * time.Second},
}, nil
}

// initializeOperatorConfigMap gets or creates the operator config ConfigMap.
func initializeOperatorConfigMap(ctx context.Context, c client.Client, operatorNamespace string) (*corev1.ConfigMap, error) {
configMap := &corev1.ConfigMap{}
configMapName := types.NamespacedName{
Name: operatorConfigData,
Namespace: operatorNamespace,
}

if err = client.Get(ctx, configMapName, configMap); err != nil {
if !k8serrors.IsNotFound(err) {
return nil, fmt.Errorf("failed to get ConfigMap: %w", err)
}
err := c.Get(ctx, configMapName, configMap)
if err == nil {
return configMap, nil
}

// ConfigMap doesn't exist, create it with defaults
configMap, err = createDefaultConfigMap(configMapName)
if err != nil {
return nil, fmt.Errorf("failed to generate default configMap: %w", err)
if !k8serrors.IsNotFound(err) {
return nil, fmt.Errorf("failed to get ConfigMap: %w", err)
}

// ConfigMap doesn't exist, create it with defaults
configMap, err = createDefaultConfigMap(configMapName)
if err != nil {
return nil, fmt.Errorf("failed to generate default configMap: %w", err)
}

if err = c.Create(ctx, configMap); err != nil {
return nil, fmt.Errorf("failed to create ConfigMap: %w", err)
}

return configMap, nil
}

func ParseImageMappingOverrides(ctx context.Context, configMapData map[string]string) map[string]string {
imageMappingOverrides := make(map[string]string)
logger := log.FromContext(ctx)

// Look for the image-overrides key in the ConfigMap data
if overridesYAML, exists := configMapData["image-overrides"]; exists {
// Parse the YAML content
var overrides map[string]string
if err := yaml.Unmarshal([]byte(overridesYAML), &overrides); err != nil {
// Log error but continue with empty overrides
logger.V(1).Info("failed to parse image-overrides YAML", "error", err)
return imageMappingOverrides
}

if err = client.Create(ctx, configMap); err != nil {
return nil, fmt.Errorf("failed to create ConfigMap: %w", err)
// Validate and copy the parsed overrides to our result map
for version, image := range overrides {
// Validate the image reference format
if _, err := name.ParseReference(image); err != nil {
logger.V(1).Info(
"skipping invalid image override",
"version", version,
"image", image,
"error", err,
)
continue
}
imageMappingOverrides[version] = image
}
}

// Parse feature flags from ConfigMap
enableNetworkPolicy, err := parseFeatureFlags(configMap.Data)
if err != nil {
return nil, fmt.Errorf("failed to parse feature flags: %w", err)
}
return &LlamaStackDistributionReconciler{
Client: client,
Scheme: scheme,
EnableNetworkPolicy: enableNetworkPolicy,
ClusterInfo: clusterInfo,
httpClient: &http.Client{Timeout: 5 * time.Second},
}, nil
return imageMappingOverrides
}

// NewTestReconciler creates a reconciler for testing, allowing injection of a custom http client and feature flags.
func NewTestReconciler(client client.Client, scheme *runtime.Scheme, clusterInfo *cluster.ClusterInfo,
httpClient *http.Client, enableNetworkPolicy bool) *LlamaStackDistributionReconciler {
return &LlamaStackDistributionReconciler{
Client: client,
Scheme: scheme,
ClusterInfo: clusterInfo,
httpClient: httpClient,
EnableNetworkPolicy: enableNetworkPolicy,
Client: client,
Scheme: scheme,
ClusterInfo: clusterInfo,
httpClient: httpClient,
EnableNetworkPolicy: enableNetworkPolicy,
ImageMappingOverrides: make(map[string]string),
}
}
Loading