diff --git a/.gitignore b/.gitignore index ff02e9d9..950c9b99 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,5 @@ .idea/* charts/mlrun-ce/charts/* .DS_Store +**/.DS_Store +*.DS_Store diff --git a/charts/mlrun-ce/Chart.yaml b/charts/mlrun-ce/Chart.yaml index e04c6977..603ba5ef 100644 --- a/charts/mlrun-ce/Chart.yaml +++ b/charts/mlrun-ce/Chart.yaml @@ -1,6 +1,6 @@ apiVersion: v1 name: mlrun-ce -version: 0.10.0-rc5 +version: 0.10.0-rc6 description: MLRun Open Source Stack home: https://iguazio.com icon: https://www.iguazio.com/wp-content/uploads/2019/10/Iguazio-Logo.png diff --git a/charts/mlrun-ce/requirements.lock b/charts/mlrun-ce/requirements.lock index a65ccfd0..8f9f7d7e 100644 --- a/charts/mlrun-ce/requirements.lock +++ b/charts/mlrun-ce/requirements.lock @@ -17,8 +17,8 @@ dependencies: - name: kube-prometheus-stack repository: https://prometheus-community.github.io/helm-charts version: 72.1.1 -- name: kafka - repository: https://charts.bitnami.com/bitnami - version: 31.3.1 -digest: sha256:d92e2702f26b3fbbe527fd4439cec8ce50bc79ad54fc69e10c28301e04e0114a -generated: "2025-11-04T09:39:37.92185Z" +- name: strimzi-kafka-operator + repository: https://strimzi.io/charts/ + version: 0.48.0 +digest: sha256:f45be2a1208958d753b2e8a95f33eee17718ad1e691317ec0b50e3c088a7cae8 +generated: "2025-11-04T15:56:02.250773+02:00" diff --git a/charts/mlrun-ce/requirements.yaml b/charts/mlrun-ce/requirements.yaml index 1e7e0941..2d362851 100644 --- a/charts/mlrun-ce/requirements.yaml +++ b/charts/mlrun-ce/requirements.yaml @@ -21,7 +21,7 @@ dependencies: repository: "https://prometheus-community.github.io/helm-charts" version: "72.1.1" condition: kube-prometheus-stack.enabled - - name: kafka - repository: "https://charts.bitnami.com/bitnami" - version: "31.3.1" - condition: kafka.enabled + - name: strimzi-kafka-operator + repository: "https://strimzi.io/charts/" + version: "0.48.0" + condition: strimzi-kafka-operator.enabled diff --git a/charts/mlrun-ce/templates/kafka/kafka-bootstrap-alias.yaml b/charts/mlrun-ce/templates/kafka/kafka-bootstrap-alias.yaml new file mode 100644 index 00000000..9791297b --- /dev/null +++ b/charts/mlrun-ce/templates/kafka/kafka-bootstrap-alias.yaml @@ -0,0 +1,24 @@ +{{- if .Values.kafka.bootstrapAlias.enabled }} +--- +apiVersion: v1 +kind: Service +metadata: + name: {{ .Values.kafka.bootstrapAlias.name | default "kafka-stream" }} + namespace: {{ .Release.Namespace }} + labels: + app.kubernetes.io/name: kafka + app.kubernetes.io/component: bootstrap-alias + {{- include "mlrun-ce.common.labels" . | nindent 4 }} +spec: + type: ClusterIP + ports: + - name: client + port: 9092 + targetPort: 9092 + protocol: TCP + selector: + strimzi.io/cluster: {{ .Values.kafka.name | default "kafka-stream" }} + strimzi.io/kind: Kafka + strimzi.io/name: {{ .Values.kafka.name | default "kafka-stream" }}-kafka +{{- end }} + diff --git a/charts/mlrun-ce/templates/kafka/kafka-cluster.yaml b/charts/mlrun-ce/templates/kafka/kafka-cluster.yaml new file mode 100644 index 00000000..8e65b552 --- /dev/null +++ b/charts/mlrun-ce/templates/kafka/kafka-cluster.yaml @@ -0,0 +1,31 @@ +{{- if .Values.kafka.enabled }} +apiVersion: kafka.strimzi.io/v1beta2 +kind: Kafka +metadata: + name: {{ .Values.kafka.name }} + namespace: {{ .Values.kafka.namespace | default .Release.Namespace }} + labels: + app.kubernetes.io/name: kafka + app.kubernetes.io/component: cluster + {{- include "mlrun-ce.common.labels" . | nindent 4 }} +spec: + kafka: + listeners: + {{- range .Values.kafka.listeners }} + - name: {{ .name }} + port: {{ .port }} + type: {{ .type }} + tls: {{ .tls }} + {{- end }} + config: + {{- range $key, $value := .Values.kafka.config }} + {{ $key }}: {{ $value | quote }} + {{- end }} + {{- if gt (.Values.kafka.zookeeper.replicas | int) 0 }} + zookeeper: + replicas: {{ .Values.kafka.zookeeper.replicas }} + storage: + type: persistent-claim + size: 8Gi + {{- end }} +{{- end }} diff --git a/charts/mlrun-ce/templates/kafka/kafka-network-policy.yaml b/charts/mlrun-ce/templates/kafka/kafka-network-policy.yaml new file mode 100644 index 00000000..dccb782e --- /dev/null +++ b/charts/mlrun-ce/templates/kafka/kafka-network-policy.yaml @@ -0,0 +1,64 @@ +{{- if .Values.kafka.rbac.enabled -}} +{{- $operatorNamespace := .Values.kafka.rbac.operatorNamespace | default "controller" -}} +{{- $kafkaName := .Values.kafka.name | default "kafka-stream" -}} +{{- $currentNamespace := .Release.Namespace -}} +--- +# NetworkPolicy: Allow egress from this namespace to Kafka namespace +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: allow-kafka-access + namespace: {{ $currentNamespace }} + labels: + app.kubernetes.io/name: mlrun-ce + app.kubernetes.io/component: kafka-rbac + app.kubernetes.io/managed-by: {{ .Release.Name }} +spec: + # Apply to all pods in this namespace + podSelector: {} + + policyTypes: + - Egress + + egress: + # Allow egress to Kafka namespace + - to: + - namespaceSelector: + matchLabels: + kubernetes.io/metadata.name: {{ $operatorNamespace }} + podSelector: + matchLabels: + strimzi.io/cluster: {{ $kafkaName }} + ports: + - protocol: TCP + port: 9092 # client listener + - protocol: TCP + port: 9093 # controller listener + - protocol: TCP + port: 9094 # internal listener + + # Allow DNS resolution (required for service discovery) + - to: + - namespaceSelector: + matchLabels: + kubernetes.io/metadata.name: kube-system + podSelector: + matchLabels: + k8s-app: kube-dns + ports: + - protocol: UDP + port: 53 + - protocol: TCP + port: 53 + + # Allow egress to the internet/other services (optional) + # Comment out the next section if you want to restrict to Kafka only + - to: + - namespaceSelector: {} + - podSelector: {} + + # Allow egress within same namespace + - to: + - podSelector: {} +{{- end }} + diff --git a/charts/mlrun-ce/templates/kafka/kafka-nodepool.yaml b/charts/mlrun-ce/templates/kafka/kafka-nodepool.yaml new file mode 100644 index 00000000..d1c98c49 --- /dev/null +++ b/charts/mlrun-ce/templates/kafka/kafka-nodepool.yaml @@ -0,0 +1,30 @@ +{{- if .Values.kafka.enabled }} +apiVersion: kafka.strimzi.io/v1beta2 +kind: KafkaNodePool +metadata: + name: {{ .Values.kafka.name }}-pool + namespace: {{ .Values.kafka.namespace | default .Release.Namespace }} + labels: + app.kubernetes.io/name: kafka + app.kubernetes.io/component: nodepool + strimzi.io/cluster: {{ .Values.kafka.name }} + {{- include "mlrun-ce.common.labels" . | nindent 4 }} +spec: + replicas: {{ .Values.kafka.replicas }} + roles: + - controller + - broker + storage: + type: {{ .Values.kafka.storage.type }} + size: {{ .Values.kafka.storage.size }} + {{- if .Values.kafka.storage.class }} + class: {{ .Values.kafka.storage.class }} + {{- end }} + resources: + requests: + memory: {{ .Values.kafka.resources.requests.memory }} + cpu: {{ .Values.kafka.resources.requests.cpu }} + limits: + memory: {{ .Values.kafka.resources.limits.memory }} + cpu: {{ .Values.kafka.resources.limits.cpu }} +{{- end }} diff --git a/charts/mlrun-ce/templates/kafka/kafka-rbac.yaml b/charts/mlrun-ce/templates/kafka/kafka-rbac.yaml new file mode 100644 index 00000000..6dc9eb8a --- /dev/null +++ b/charts/mlrun-ce/templates/kafka/kafka-rbac.yaml @@ -0,0 +1,90 @@ +{{- if .Values.kafka.rbac.enabled -}} +{{- $operatorNamespace := .Values.kafka.rbac.operatorNamespace | default "controller" -}} +{{- $kafkaName := .Values.kafka.name | default "kafka-stream" -}} +{{- $currentNamespace := .Release.Namespace -}} +--- +# ServiceAccount for Kafka client applications +apiVersion: v1 +kind: ServiceAccount +metadata: + name: kafka-client + namespace: {{ $currentNamespace }} + labels: + app.kubernetes.io/name: mlrun-ce + app.kubernetes.io/component: kafka-rbac + app.kubernetes.io/managed-by: {{ .Release.Name }} +--- +# Role: Allow managing Kafka resources via CRDs in the operator namespace +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: {{ $currentNamespace }}-kafka-resource-manager + namespace: {{ $operatorNamespace }} + labels: + app.kubernetes.io/name: mlrun-ce + app.kubernetes.io/component: kafka-rbac + app.kubernetes.io/managed-by: {{ .Release.Name }} + user-namespace: {{ $currentNamespace }} +rules: + # Allow creating and managing KafkaTopic CRDs + - apiGroups: + - kafka.strimzi.io + resources: + - kafkatopics + verbs: + - get + - list + - watch + - create + - update + - patch + - delete + # Allow checking KafkaTopic status + - apiGroups: + - kafka.strimzi.io + resources: + - kafkatopics/status + verbs: + - get + - list + - watch + # Allow reading KafkaUser CRDs (if using SCRAM auth) + - apiGroups: + - kafka.strimzi.io + resources: + - kafkausers + verbs: + - get + - list + - watch + # Allow reading the Kafka cluster info + - apiGroups: + - kafka.strimzi.io + resources: + - kafkas + verbs: + - get + - list + - watch +--- +# RoleBinding: Grant Kafka resource management permissions to ServiceAccount +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: {{ $currentNamespace }}-kafka-resource-manager + namespace: {{ $operatorNamespace }} + labels: + app.kubernetes.io/name: mlrun-ce + app.kubernetes.io/component: kafka-rbac + app.kubernetes.io/managed-by: {{ .Release.Name }} + user-namespace: {{ $currentNamespace }} +subjects: + - kind: ServiceAccount + name: kafka-client + namespace: {{ $currentNamespace }} +roleRef: + kind: Role + name: {{ $currentNamespace }}-kafka-resource-manager + apiGroup: rbac.authorization.k8s.io +{{- end }} + diff --git a/charts/mlrun-ce/values.yaml b/charts/mlrun-ce/values.yaml index 6edd8fdf..4e57a199 100644 --- a/charts/mlrun-ce/values.yaml +++ b/charts/mlrun-ce/values.yaml @@ -378,10 +378,6 @@ pipelines: cacheImage: repository: gcr.io/google-containers/busybox tag: latest - minio: - repository: minio/minio - tag: "RELEASE.2025-10-15T17-29-55Z" - kube-prometheus-stack: fullnameOverride: monitoring @@ -431,6 +427,7 @@ kube-prometheus-stack: nodePort: 30020 kube-state-metrics: fullnameOverride: state-metrics + prometheus-node-exporter: fullnameOverride: node-exporter hostNetwork: false @@ -468,37 +465,69 @@ tdengine: CLUSTER: "0" TAOS_REPLICA: "1" +strimzi-kafka-operator: + enabled: false + watchAnyNamespace: true + kafka: - global: - security: - allowInsecureImages: true enabled: true - fullnameOverride: kafka-stream - image: - repository: 'bitnamilegacy/kafka' - extraConfigYaml: - default.replication.factor: "1" - offsets.topic.replication.factor: "1" - transaction.state.log.replication.factor: "1" - transaction.state.log.min.isr: "1" + name: kafka-stream + + # Bootstrap service alias configuration + bootstrapAlias: + # Create a service alias for simpler Kafka bootstrap server name + # When enabled, creates: {aliasName}.{namespace}.svc.cluster.local:9092 + # instead of the default: {name}-kafka-bootstrap.{namespace}.svc.cluster.local:9092 + enabled: true + # Name for the bootstrap service alias (only used if enabled is true) + name: kafka-stream + + replicas: 1 - controller: - replicaCount: 1 - resourcesPreset: "medium" listeners: - client: - name: CLIENT - containerPort: 9092 - protocol: PLAINTEXT - controller: - name: CONTROLLER - containerPort: 9093 - protocol: PLAINTEXT - interbroker: - name: INTERNAL - containerPort: 9094 - protocol: PLAINTEXT - advertisedListeners: >- - CLIENT://kafka-stream:9092 - CONTROLLER://kafka-stream-controller-headless:9093, - INTERNAL://kafka-stream-controller-headless:9094, + - name: client + port: 9092 + type: internal + tls: false + - name: controller + port: 9093 + type: internal + tls: false + - name: internal + port: 9094 + type: internal + tls: false + + storage: + type: persistent-claim + size: 8Gi + class: "" + + resources: + requests: + memory: "1Gi" + cpu: "500m" + limits: + memory: "2Gi" + cpu: "1000m" + + config: + # Replication settings for single-node setup + default.replication.factor: 1 + offsets.topic.replication.factor: 1 + transaction.state.log.replication.factor: 1 + transaction.state.log.min.isr: 1 + + zookeeper: + replicas: 0 + + # Kafka RBAC for user namespaces + # Enable this when installing in user namespaces (mlrun, mlrun1, etc.) + # When enabled, creates: ServiceAccount "kafka-client" + Role/RoleBinding + NetworkPolicy + rbac: + # Enable RBAC for this namespace to access Kafka + enabled: false + + # Operator namespace (where Kafka operator/cluster is running) + # Example: "mlrun-ce-cont" if that's where you installed the operator + operatorNamespace: controller