diff --git a/.github/workflows/lint-yaml.yml b/.github/workflows/lint-yaml.yml index 702e673..30fe0a9 100644 --- a/.github/workflows/lint-yaml.yml +++ b/.github/workflows/lint-yaml.yml @@ -27,11 +27,14 @@ jobs: run: | # The yamlfix command will automatically find and apply fixes to any YAML files. # It uses the `.yamllint.yml` configuration file for its rules. - yamlfix --exclude "**/templates/**" . && yamlfix common/security/kustomization.yaml common/security/netpol.yaml + yamlfix --exclude "**/templates/**" --exclude ".github/workflows/**" . && yamlfix common/security/kustomization.yaml common/security/netpol.yaml - # Check if there are any changes to commit. - # `git status --porcelain` will be empty if there are no changes. - if [[ -n $(git status --porcelain) ]]; then + # Stage only non-workflow changes so the bot never attempts to modify + # workflow files, which requires elevated permissions. + git add --all -- . ':(exclude).github/workflows/**' + + # Check if there are any staged changes to commit. + if ! git diff --cached --quiet; then echo "Changes detected, will commit and push." echo "changes_detected=true" >> $GITHUB_ENV else @@ -43,9 +46,8 @@ jobs: run: | git config --global user.name 'github-actions[bot]' git config --global user.email 'github-actions[bot]@users.noreply.github.com' - git add . git commit -m "style: auto-fix YAML linting issues ✨" - git push origin ${{ github.head_ref }} + git push origin "HEAD:${{ github.head_ref }}" env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - name: Final Check diff --git a/README.md b/README.md index 223abb5..024ea2b 100644 --- a/README.md +++ b/README.md @@ -228,15 +228,15 @@ This tricks your local machine into thinking `localhost` is the remote server, w 1. **Update `/etc/hosts`**: ```bash # Add this line - 127.0.0.1 argocd.mip-tds.chuv.cscs.ch + 127.0.0.1 argocd.example.com ``` 2. **Open Tunnel (Sudo required for port 443)**: ```bash - sudo ssh -L 443:argocd.mip-tds.chuv.cscs.ch:443 @ + sudo ssh -L 443:argocd.example.com:443 @ ``` 3. **Login**: ```bash - argocd login argocd.mip-tds.chuv.cscs.ch:443 --insecure --grpc-web + argocd login argocd.example.com:443 --insecure --grpc-web ``` ### Initial secrets: diff --git a/argo-setup/patches/patch-argocd-application-controller-clusterrole.yaml b/argo-setup/patches/patch-argocd-application-controller-clusterrole.yaml index 094e99b..077d5ee 100644 --- a/argo-setup/patches/patch-argocd-application-controller-clusterrole.yaml +++ b/argo-setup/patches/patch-argocd-application-controller-clusterrole.yaml @@ -101,3 +101,24 @@ rules: # - apiGroups: [''] # resources: [endpoints] # verbs: [get, list, watch, create, update, delete, patch] + # Rule 5: Elastic Stack resources + - apiGroups: + - elasticsearch.k8s.elastic.co + - kibana.k8s.elastic.co + - beat.k8s.elastic.co + - apm.k8s.elastic.co + - enterprisesearch.k8s.elastic.co + - maps.k8s.elastic.co + - agent.k8s.elastic.co + - autoscaling.k8s.elastic.co + resources: + - elasticsearches + - kibanas + - beats + - apmservers + - enterprisesearches + - agents + - agentpolicies + - elasticmapsservers + - elasticsearchautoscalings + verbs: [create, delete, patch, update, get, list, watch] diff --git a/base/argo-projects.yaml b/base/argo-projects.yaml index 5947cf0..95806d8 100644 --- a/base/argo-projects.yaml +++ b/base/argo-projects.yaml @@ -10,7 +10,7 @@ metadata: argocd.instance: mip-team annotations: argocd.argoproj.io/note: 'Manages static AppProjects: mip-federations, mip-shared-apps, - mip-common, mip-security' + mip-common, mip-monitoring, mip-security, submariner' spec: generators: - list: @@ -21,6 +21,8 @@ spec: fileName: mip-shared-apps - projectName: mip-argo-project-common fileName: mip-common + - projectName: mip-argo-project-monitoring + fileName: mip-monitoring - projectName: mip-argo-project-security fileName: mip-security - projectName: mip-argo-project-submariner diff --git a/base/mip-infrastructure/rbac/eck-beats-rbac.yaml b/base/mip-infrastructure/rbac/eck-beats-rbac.yaml new file mode 100644 index 0000000..4c9b349 --- /dev/null +++ b/base/mip-infrastructure/rbac/eck-beats-rbac.yaml @@ -0,0 +1,58 @@ +--- +# Manual RBAC for ECK Beats (filebeat/metricbeat). +# ECK chart templates intentionally do not include Beat RBAC resources. +# ServiceAccounts are created by the ECK Helm chart in namespace elastic-system. +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: eck-filebeat +rules: + - apiGroups: [''] + resources: [pods, namespaces, nodes, endpoints, services] + verbs: [get, list, watch] + - apiGroups: [coordination.k8s.io] + resources: [leases] + verbs: [get, list, watch, create, update, delete] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: eck-metricbeat +rules: + - apiGroups: [''] + resources: [nodes, pods, namespaces, services, endpoints] + verbs: [get, list, watch] + - apiGroups: [''] + resources: [nodes/stats] + verbs: [get] + - nonResourceURLs: [/metrics] + verbs: [get] + - apiGroups: [coordination.k8s.io] + resources: [leases] + verbs: [get, list, watch, create, update, delete] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: eck-filebeat +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: eck-filebeat +subjects: + - kind: ServiceAccount + name: eck-filebeat + namespace: elastic-system +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: eck-metricbeat +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: eck-metricbeat +subjects: + - kind: ServiceAccount + name: eck-metricbeat + namespace: elastic-system diff --git a/common/elastic-operator/Chart.yaml b/common/elastic-operator/Chart.yaml new file mode 100644 index 0000000..89bc123 --- /dev/null +++ b/common/elastic-operator/Chart.yaml @@ -0,0 +1,11 @@ +--- +apiVersion: v2 +name: elastic-operator +description: Elastic Cloud on Kubernetes (ECK) Operator +type: application +version: 1.0.0 +appVersion: 2.13.0 +dependencies: + - name: eck-operator + version: 2.13.0 + repository: https://helm.elastic.co diff --git a/common/elastic-operator/values.yaml b/common/elastic-operator/values.yaml new file mode 100644 index 0000000..3336ec1 --- /dev/null +++ b/common/elastic-operator/values.yaml @@ -0,0 +1,5 @@ +--- +eck-operator: + createNamespace: false + webhook: + enabled: true diff --git a/common/monitoring/eck/Chart.yaml b/common/monitoring/eck/Chart.yaml new file mode 100644 index 0000000..00e5422 --- /dev/null +++ b/common/monitoring/eck/Chart.yaml @@ -0,0 +1,13 @@ +--- +apiVersion: v2 +name: eck-stack-rke2 +description: Helm chart to deploy Elastic Stack resources (Elasticsearch, Kibana, + Beats, etc.) on an RKE2 cluster managed by an existing Elastic Cloud on Kubernetes + (ECK) operator +kubeVersion: '>=1.23.0-0' +type: application +version: 0.2.0 +appVersion: 2.13.0 +icon: https://www.elastic.co/static/images/elastic-logo-200.png +keywords: [elasticsearch, eck, elastic-stack] +dependencies: [] diff --git a/common/monitoring/eck/README.md b/common/monitoring/eck/README.md new file mode 100644 index 0000000..e2fab70 --- /dev/null +++ b/common/monitoring/eck/README.md @@ -0,0 +1,183 @@ +# ECK Helm Chart for RKE2 + +This directory contains the ECK Helm chart. It targets managed RKE2 clusters where the Elastic Cloud on Kubernetes (ECK) operator already runs (Rancher installs it under `kube-system`). By default the chart provisions: + +- A single-node Elasticsearch cluster plus Kibana. + +Optional components (disabled by default): + +- Filebeat and Metricbeat DaemonSets that forward cluster logs and metrics. +- The eck-notifier CronJob that pushes Kibana alert summaries to Microsoft Teams and Cisco Webex. + +## Prerequisites + +- Helm 3 and `kubectl` available locally. +- RKE2 cluster v1.23+ with access to the `elastic-system` namespace. +- ECK operator 2.13+ running cluster-wide. + > **Note regarding the ECK Operator**: This chart does **not** install the operator because doing so requires cluster-admin privileges that shouldn't be granted to this standard monitoring deployment. If your hosting provider (like Rancher) already provides it, you are good to go. Otherwise, you must install the `common/elastic-operator` chart and its privileged namespace manually or include it in your infrastructure overlays before deploying this monitoring stack. +- Default StorageClass compatible with the sample workloads (defaults assume `ceph-corbo-cephfs`). +- Namespace prepared for Beats hostPath mounts (needed only if Beats are enabled and Pod Security Admission is enforced): + + ```bash + kubectl create namespace elastic-system + kubectl label namespace elastic-system \ + pod-security.kubernetes.io/enforce=privileged \ + pod-security.kubernetes.io/audit=privileged \ + pod-security.kubernetes.io/warn=privileged --overwrite + ``` + +- Secret `eck-eck-notifier-secrets` populated with Elasticsearch credentials plus Teams/Webex settings (required only if `alertNotifier.enabled=true`, see [Alert notifier configuration](#alert-notifier-configuration)). +- When Beats are enabled, apply the manual RBAC manifest (the chart does not template Beat RBAC resources): + + ```bash + kubectl apply -f base/mip-infrastructure/rbac/eck-beats-rbac.yaml + ``` + +## Install / upgrade + +```bash +helm upgrade --install eck . \ + --namespace elastic-system \ + --create-namespace \ + --skip-crds \ + --wait \ + --timeout 15m +``` + +> Helm 4 uses server-side apply by default. Because the ECK operator also mutates the CRs, add `--server-side=false` (or configure the same in Argo CD) for conflict-free upgrades. + +Supply overrides through `--set`/`-f my-values.yaml` as usual. + +## Customising values + +All knobs live in `values.yaml`. Common overrides: + +- `elasticsearch.*` – adjust resources, replica count, or the StorageClass. Note: The default `storageClassName` is currently hardcoded to `ceph-corbo-cephfs` as it aligns with our current infrastructure, but you can override this for deployments in other environments. +- `kibana.ingress.*` – enable ingress, set hosts/TLS, or keep using port-forward. +- `observability.filebeat.*` / `observability.metricbeat.*` – enable and tune the DaemonSets. Filebeat defaults to 100m CPU, 400Mi request / 600Mi limit. Both use Generic Ephemeral Volumes for their `data` mounts by default (set to `ceph-corbo-cephfs` at 2Gi). +- `alertNotifier.*` – enable notifier mode, then change the Cron schedule, PVC behaviour, secret names/keys, or Teams/Webex delivery. Note: Like Elasticsearch, the notifier PVC's default `storageClassName` is hardcoded to `ceph-corbo-cephfs`. + +## Alert notifier configuration + +The chart bundles the `alertNotifier` CronJob so Kibana alerts arrive in Microsoft Teams or Cisco Webex. Adjust the schedule, outputs, and credentials through values. A minimal override file could look like: + +```yaml +# alert-notifier-values.yaml +alertNotifier: + image: + repository: registry.example.com/eck-notifier + tag: latest + schedule: "*/5 * * * *" + es: + index: ".internal.alerts-observability.logs.alerts-default-*" + skipVerify: true + teams: + enabled: true + webex: + enabled: true + roomId: "" # leave empty to pull from the secret + personEmail: "" + tokenKey: webexBotToken + roomIdKey: webexRoomId + secret: + create: false + name: eck-eck-notifier-secrets + +kibana: + ingress: + enabled: true + hosts: + - host: localhost + path: / + pathType: Prefix + http: + tls: + selfSignedCertificate: + disabled: true + config: + xpack.security.secureCookies: false +``` + +Deploy (or upgrade) the chart from the repository root: + +```bash +helm upgrade --install eck common/monitoring/eck -f alert-notifier-values.yaml \ + --namespace elastic-system --create-namespace +``` + +### Secret + +Populate the notifier secret so the CronJob can talk to Elasticsearch and your chat tools: + +```bash +kubectl create secret generic eck-eck-notifier-secrets \ + -n elastic-system \ + --from-literal=es-url=https://elasticsearch-sample-es-http.elastic-system.svc:9200 \ + --from-literal=es-user=elastic \ + --from-literal=es-pass="" \ + --from-literal=teams-webhook="https://outlook.office.com/webhook/..." \ + --from-literal=webexBotToken="" \ + --from-literal=webexRoomId="Y2lzY29zcGFyazovL3VzL1JPT00v..." +``` + +If you prefer direct Webex messages, leave `webexRoomId` empty and set `alertNotifier.webex.personEmail` instead. Whenever Elasticsearch rotates the `elastic` password, regenerate the secret: + +```bash +ES_PASS=$(kubectl get secret elasticsearch-sample-es-elastic-user \ + -n elastic-system \ + -o go-template='{{printf "%s" (index .data "elastic")}}' | base64 -d) + +kubectl create secret generic eck-eck-notifier-secrets \ + -n elastic-system \ + --from-literal=es-url=https://elasticsearch-sample-es-http.elastic-system.svc:9200 \ + --from-literal=es-user=elastic \ + --from-literal=es-pass="$ES_PASS" \ + --from-literal=teams-webhook="https://outlook.office.com/webhook/..." \ + --from-literal=webexBotToken="" \ + --from-literal=webexRoomId="Y2lzY29zcGFyazovL3VzL1JPT00v..." \ + --dry-run=client -o yaml | kubectl apply -f - +``` + +### Persistent state + +The CronJob persists alert hashes under `/var/lib/eck-notifier/state.json` (PVC) so it only posts deltas. Override `alertNotifier.state.persistence.*` if you already have a claim or disable persistence for ephemeral deployments. + +## Verifying the deployment + +```bash +kubectl get elasticsearch -n elastic-system +kubectl get kibana -n elastic-system +# Optional (when enabled) +kubectl get beats.beat.k8s.elastic.co -n elastic-system +kubectl get cronjob eck-eck-notifier -n elastic-system +``` + +Fetch the autogenerated `elastic` password: + +```bash +kubectl get secret elasticsearch-sample-es-elastic-user \ + -n elastic-system \ + -o go-template='{{printf "%s" (index .data "elastic")}}' | base64 -d; echo +``` + +## Accessing Kibana + +Port-forward the service when you only need temporary access: + +```bash +kubectl port-forward -n elastic-system svc/kibana-sample-kb-http 5601:5601 +``` + +Then browse to `https://localhost:5601` (accept the self-signed cert warning) and log in with `elastic` plus the password above. To expose Kibana permanently, enable `kibana.ingress.enabled` and provide hosts/TLS values. + +## Observability notes + +Filebeat autodiscovers pods via hints and forwards container logs. Metricbeat scrapes nodes, pods, containers, volumes, the apiserver, and host metrics. They are disabled by default and can be enabled through `observability.*` in `values.yaml`. + +## Uninstalling + +```bash +helm uninstall eck -n elastic-system +``` + +This removes Elasticsearch/Kibana/Beats/notifier workloads but leaves the upstream ECK CRDs installed (so existing CRs keep working). Delete `crds/eck-crds.yaml` manually if you also want the CRDs gone after uninstalling. diff --git a/common/monitoring/eck/templates/_helpers.tpl b/common/monitoring/eck/templates/_helpers.tpl new file mode 100644 index 0000000..8bdf5f0 --- /dev/null +++ b/common/monitoring/eck/templates/_helpers.tpl @@ -0,0 +1,102 @@ +{{- define "eck-stack.fullname" -}} +{{- if .Chart.Name -}} +{{- printf "%s" .Chart.Name | trunc 63 | trimSuffix "-" -}} +{{- else -}} +{{- printf "eck-stack" -}} +{{- end -}} +{{- end -}} + +{{- define "eck-stack.alertNotifier.fullname" -}} +{{- printf "%s-eck-notifier" .Release.Name | trunc 63 | trimSuffix "-" -}} +{{- end -}} + +{{- define "eck-stack.alertNotifier.labels" -}} +app.kubernetes.io/name: {{ include "eck-stack.alertNotifier.fullname" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +helm.sh/chart: {{ printf "%s-%s" .Chart.Name .Chart.Version | trunc 63 | trimSuffix "-" }} +{{- end -}} + +{{- define "eck-stack.alertNotifier.secretName" -}} +{{- $cfg := .Values.alertNotifier.secret | default (dict) -}} +{{- if $cfg.existingSecret -}} +{{- $cfg.existingSecret -}} +{{- else -}} +{{- $name := $cfg.name | default (printf "%s-eck-notifier-secrets" .Release.Name) -}} +{{- $name | trunc 63 | trimSuffix "-" -}} +{{- end -}} +{{- end -}} + +{{- define "eck-stack.alertNotifier.pvcName" -}} +{{- $cfg := .Values.alertNotifier.state.persistence | default (dict) -}} +{{- if $cfg.existingClaim -}} +{{- $cfg.existingClaim -}} +{{- else -}} +{{- printf "%s-eck-notifier-state" .Release.Name | trunc 63 | trimSuffix "-" -}} +{{- end -}} +{{- end -}} + +{{- define "eck-stack.alertNotifier.image" -}} +{{- $img := .Values.alertNotifier.image | default (dict) -}} +{{- $repo := $img.repository | default "eck-notifier" -}} +{{- $tag := $img.tag | default "latest" -}} +{{- printf "%s:%s" $repo $tag -}} +{{- end -}} + +{{- define "eck-stack.alertNotifier.stateFile" -}} +{{- $path := .Values.alertNotifier.state.path | default "/var/lib/eck-notifier/state.json" -}} +{{- if $path -}} +{{- $path -}} +{{- else -}} +/var/lib/eck-notifier/state.json +{{- end -}} +{{- end -}} + +{{- define "eck-stack.alertNotifier.stateDir" -}} +{{- $file := include "eck-stack.alertNotifier.stateFile" . -}} +{{- $dir := regexReplaceAll "[^/]+$" $file "" -}} +{{- $trimmed := trimSuffix "/" $dir -}} +{{- if $trimmed -}} +{{- $trimmed -}} +{{- else -}} +/var/lib/eck-notifier +{{- end -}} +{{- end -}} + +{{- define "eck-stack.operatorNamespace" -}} +{{- .Values.operator.namespace | default "elastic-system" -}} +{{- end -}} + +{{- define "eck-stack.operatorLabels" -}} +app.kubernetes.io/name: {{ include "eck-stack.fullname" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +app.kubernetes.io/version: "{{ .Values.operator.version }}" +control-plane: elastic-operator +{{- end -}} + +{{- define "eck-stack.operatorSelectorLabels" -}} +control-plane: elastic-operator +{{- end -}} + +{{- define "eck-stack.operatorImage" -}} +{{- printf "%s:%s" (.Values.operator.image.repository | default "docker.elastic.co/eck/eck-operator") (.Values.operator.image.tag | default .Values.operator.version) -}} +{{- end -}} + +{{- define "eck-stack.operatorConfig" -}} +{{- $config := deepCopy (.Values.operator.config | default (dict)) -}} +{{- $ns := include "eck-stack.operatorNamespace" . -}} +{{- if not $config }} + {{- $config = dict -}} +{{- end -}} +{{- $currentNs := (index $config "operator-namespace") | default "" -}} +{{- if not $currentNs }} + {{- $_ := set $config "operator-namespace" $ns -}} +{{- end -}} +{{- if .Values.operator.webhook.enabled }} + {{- $_ := set $config "enable-webhook" true -}} + {{- $_ := set $config "webhook-port" (.Values.operator.webhook.port | default 9443) -}} +{{- else }} + {{- $_ := set $config "enable-webhook" false -}} +{{- end -}} +{{- toYaml $config -}} +{{- end -}} diff --git a/common/monitoring/eck/templates/alert-notifier-cronjob.yaml b/common/monitoring/eck/templates/alert-notifier-cronjob.yaml new file mode 100644 index 0000000..ff4cf44 --- /dev/null +++ b/common/monitoring/eck/templates/alert-notifier-cronjob.yaml @@ -0,0 +1,160 @@ +{{- if .Values.alertNotifier.enabled }} +{{- $secretName := include "eck-stack.alertNotifier.secretName" . -}} +{{- $persistence := .Values.alertNotifier.state.persistence | default (dict) -}} +{{- $stateDir := include "eck-stack.alertNotifier.stateDir" . -}} +{{- $stateFile := include "eck-stack.alertNotifier.stateFile" . -}} +{{- $es := .Values.alertNotifier.es | default (dict) -}} +{{- $teams := .Values.alertNotifier.teams | default (dict) -}} +{{- $webex := .Values.alertNotifier.webex | default (dict) -}} +{{- $webexTokenKey := $webex.tokenKey | default "webexBotToken" -}} +{{- if not $secretName -}} +{{- fail "alertNotifier.secret.name or alertNotifier.secret.existingSecret must be set" -}} +{{- end -}} +apiVersion: batch/v1 +kind: CronJob +metadata: + name: {{ include "eck-stack.alertNotifier.fullname" . }} + labels: + {{- include "eck-stack.alertNotifier.labels" . | nindent 4 }} + {{- with .Values.alertNotifier.labels }} + {{- toYaml . | nindent 4 }} + {{- end }} + {{- with .Values.alertNotifier.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + schedule: {{ quote (.Values.alertNotifier.schedule | default "*/5 * * * *") }} + concurrencyPolicy: {{ .Values.alertNotifier.concurrencyPolicy | default "Forbid" }} + successfulJobsHistoryLimit: {{ .Values.alertNotifier.successfulJobsHistoryLimit | default 1 }} + failedJobsHistoryLimit: {{ .Values.alertNotifier.failedJobsHistoryLimit | default 3 }} + {{- if .Values.alertNotifier.startingDeadlineSeconds }} + startingDeadlineSeconds: {{ .Values.alertNotifier.startingDeadlineSeconds }} + {{- end }} + jobTemplate: + spec: + backoffLimit: {{ .Values.alertNotifier.backoffLimit | default 3 }} + template: + metadata: + labels: + {{- include "eck-stack.alertNotifier.labels" . | nindent 12 }} + {{- with .Values.alertNotifier.podLabels }} + {{- toYaml . | nindent 12 }} + {{- end }} + {{- with .Values.alertNotifier.podAnnotations }} + annotations: + {{- toYaml . | nindent 12 }} + {{- end }} + spec: + restartPolicy: OnFailure + {{- if .Values.alertNotifier.serviceAccountName }} + serviceAccountName: {{ .Values.alertNotifier.serviceAccountName }} + {{- end }} + {{- with .Values.alertNotifier.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 12 }} + {{- end }} + {{- with .Values.alertNotifier.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 12 }} + {{- end }} + {{- with .Values.alertNotifier.affinity }} + affinity: + {{- toYaml . | nindent 12 }} + {{- end }} + {{- with .Values.alertNotifier.tolerations }} + tolerations: + {{- toYaml . | nindent 12 }} + {{- end }} + containers: + - name: eck-notifier + image: {{ include "eck-stack.alertNotifier.image" . }} + imagePullPolicy: {{ .Values.alertNotifier.image.pullPolicy | default "IfNotPresent" }} + args: + - --state + - {{ $stateFile | quote }} + - --es-index + - {{ ($es.index | default ".internal.alerts-observability.logs.alerts-default-*") | quote }} + - --es-query-size + - {{ default 200 $es.querySize | int | quote }} + {{- range $arg := .Values.alertNotifier.extraArgs }} + - {{ $arg | quote }} + {{- end }} + env: + - name: ALERT_STATE_FILE + value: {{ $stateFile | quote }} + - name: ENABLE_TEAMS + value: {{ ternary "true" "false" ($teams.enabled | default true) | quote }} + - name: ENABLE_WEBEX + value: {{ ternary "true" "false" ($webex.enabled | default false) | quote }} + {{- if $secretName }} + - name: ES_URL + valueFrom: + secretKeyRef: + name: {{ $secretName }} + key: es-url + - name: ES_USER + valueFrom: + secretKeyRef: + name: {{ $secretName }} + key: es-user + - name: ES_PASS + valueFrom: + secretKeyRef: + name: {{ $secretName }} + key: es-pass + - name: TEAMS_WEBHOOK_URL + valueFrom: + secretKeyRef: + name: {{ $secretName }} + key: teams-webhook + {{- if $webexTokenKey }} + - name: WEBEX_BOT_TOKEN + valueFrom: + secretKeyRef: + name: {{ $secretName }} + key: {{ $webexTokenKey }} + {{- end }} + {{- end }} + - name: ES_INDEX + value: {{ ($es.index | default ".internal.alerts-observability.logs.alerts-default-*") | quote }} + - name: ES_QUERY_SIZE + value: {{ default 200 $es.querySize | int | quote }} + - name: ES_SKIP_VERIFY + value: {{ ternary "true" "false" ($es.skipVerify | default false) | quote }} + {{- if $webex.roomId }} + - name: WEBEX_ROOM_ID + value: {{ $webex.roomId | quote }} + {{- else if $webex.roomIdKey }} + - name: WEBEX_ROOM_ID + valueFrom: + secretKeyRef: + name: {{ $secretName }} + key: {{ $webex.roomIdKey }} + {{- end }} + {{- if $webex.personEmail }} + - name: WEBEX_PERSON_EMAIL + value: {{ $webex.personEmail | quote }} + {{- end }} + {{- with .Values.alertNotifier.extraEnv }} + {{- toYaml . | nindent 16 }} + {{- end }} + volumeMounts: + - name: state + mountPath: {{ $stateDir | quote }} + {{- with .Values.alertNotifier.resources }} + resources: + {{- toYaml . | nindent 16 }} + {{- end }} + volumes: + - name: state + {{- if and ($persistence.enabled | default false) (not $persistence.existingClaim) }} + persistentVolumeClaim: + claimName: {{ include "eck-stack.alertNotifier.pvcName" . }} + {{- else if and ($persistence.enabled | default false) $persistence.existingClaim }} + persistentVolumeClaim: + claimName: {{ $persistence.existingClaim }} + {{- else }} + emptyDir: {} + {{- end }} +{{- end }} diff --git a/common/monitoring/eck/templates/alert-notifier-pvc.yaml b/common/monitoring/eck/templates/alert-notifier-pvc.yaml new file mode 100644 index 0000000..81e830c --- /dev/null +++ b/common/monitoring/eck/templates/alert-notifier-pvc.yaml @@ -0,0 +1,24 @@ +{{- $persistence := .Values.alertNotifier.state.persistence -}} +{{- if and .Values.alertNotifier.enabled ($persistence.enabled | default false) (not $persistence.existingClaim) }} +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: {{ include "eck-stack.alertNotifier.pvcName" . }} + labels: + {{- include "eck-stack.alertNotifier.labels" . | nindent 4 }} + {{- with $persistence.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + accessModes: + {{- range $persistence.accessModes }} + - {{ . }} + {{- end }} + resources: + requests: + storage: {{ $persistence.size | default "1Gi" }} + {{- if $persistence.storageClassName }} + storageClassName: {{ $persistence.storageClassName }} + {{- end }} +{{- end }} diff --git a/common/monitoring/eck/templates/alert-notifier-secret.yaml b/common/monitoring/eck/templates/alert-notifier-secret.yaml new file mode 100644 index 0000000..1a9aeee --- /dev/null +++ b/common/monitoring/eck/templates/alert-notifier-secret.yaml @@ -0,0 +1,21 @@ +{{- if and .Values.alertNotifier.enabled (.Values.alertNotifier.secret.create | default false) (not .Values.alertNotifier.secret.existingSecret) }} +apiVersion: v1 +kind: Secret +metadata: + name: {{ include "eck-stack.alertNotifier.secretName" . }} + labels: + {{- include "eck-stack.alertNotifier.labels" . | nindent 4 }} +stringData: + {{- with .Values.alertNotifier.secret.data.esUrl }} + es-url: {{ . | quote }} + {{- end }} + {{- with .Values.alertNotifier.secret.data.esUser }} + es-user: {{ . | quote }} + {{- end }} + {{- with .Values.alertNotifier.secret.data.esPass }} + es-pass: {{ . | quote }} + {{- end }} + {{- with .Values.alertNotifier.secret.data.teamsWebhook }} + teams-webhook: {{ . | quote }} + {{- end }} +{{- end }} diff --git a/common/monitoring/eck/templates/elasticsearch.yaml b/common/monitoring/eck/templates/elasticsearch.yaml new file mode 100644 index 0000000..71dfd7b --- /dev/null +++ b/common/monitoring/eck/templates/elasticsearch.yaml @@ -0,0 +1,27 @@ +{{- if .Values.elasticsearch.enabled }} +apiVersion: elasticsearch.k8s.elastic.co/v1 +kind: Elasticsearch +metadata: + name: {{ .Values.elasticsearch.name }} + namespace: {{ .Values.elasticsearch.namespace | default (include "eck-stack.operatorNamespace" .) }} +{{- with .Values.elasticsearch.labels }} + labels: +{{ toYaml . | indent 4 }} +{{- end }} +{{- with .Values.elasticsearch.annotations }} + annotations: +{{ toYaml . | indent 4 }} +{{- end }} +spec: + version: {{ .Values.elasticsearch.version | quote }} +{{- with .Values.elasticsearch.http }} + http: +{{ toYaml . | indent 4 }} +{{- end }} +{{- with .Values.elasticsearch.service }} + service: +{{ toYaml . | indent 4 }} +{{- end }} + nodeSets: +{{ toYaml .Values.elasticsearch.nodeSets | indent 4 }} +{{- end }} diff --git a/common/monitoring/eck/templates/filebeat.yaml b/common/monitoring/eck/templates/filebeat.yaml new file mode 100644 index 0000000..3948dde --- /dev/null +++ b/common/monitoring/eck/templates/filebeat.yaml @@ -0,0 +1,85 @@ +{{- if .Values.observability.filebeat.enabled }} +apiVersion: beat.k8s.elastic.co/v1beta1 +kind: Beat +metadata: + name: {{ .Release.Name }}-filebeat + namespace: {{ include "eck-stack.operatorNamespace" . }} + labels: + app.kubernetes.io/name: filebeat + app.kubernetes.io/instance: {{ .Release.Name }} + app.kubernetes.io/version: {{ .Values.observability.filebeat.version | quote }} +spec: + type: filebeat + version: {{ .Values.observability.filebeat.version | quote }} + elasticsearchRef: + name: {{ .Values.elasticsearch.name }} + namespace: {{ .Values.elasticsearch.namespace | default (include "eck-stack.operatorNamespace" .) }} + daemonSet: + podTemplate: + metadata: + labels: + app.kubernetes.io/name: filebeat + spec: + automountServiceAccountToken: true + serviceAccountName: {{ .Release.Name }}-filebeat + hostNetwork: true + dnsPolicy: ClusterFirstWithHostNet + tolerations: + - effect: NoSchedule + operator: Exists + containers: + - name: filebeat + env: + - name: NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + - name: ELASTICSEARCH_HOSTS + value: https://{{ .Values.elasticsearch.name }}-es-http.{{ .Values.elasticsearch.namespace | default (include "eck-stack.operatorNamespace" .) }}.svc:9200 + - name: ELASTICSEARCH_USERNAME + valueFrom: + secretKeyRef: + name: {{ printf "%s-%s-beat-user" (include "eck-stack.operatorNamespace" .) (printf "%s-filebeat" .Release.Name) }} + key: name + - name: ELASTICSEARCH_PASSWORD + valueFrom: + secretKeyRef: + name: {{ .Release.Name }}-filebeat-beat-user + key: {{ printf "%s-%s-beat-user" (include "eck-stack.operatorNamespace" .) (printf "%s-filebeat" .Release.Name) }} + volumeMounts: + - name: varlog + mountPath: /var/log + - name: varlibdockercontainers + mountPath: /var/lib/docker/containers + readOnly: true + - name: data + mountPath: /usr/share/filebeat/data + {{- with .Values.observability.filebeat.resources }} + resources: +{{ toYaml . | indent 14 }} + {{- end }} + volumes: + - name: data + ephemeral: + volumeClaimTemplate: + spec: + accessModes: [ "ReadWriteOnce" ] + storageClassName: {{ .Values.observability.filebeat.state.persistence.storageClassName | quote }} + resources: + requests: + storage: {{ .Values.observability.filebeat.state.persistence.size | quote }} + - name: varlog + hostPath: + path: /var/log + - name: varlibdockercontainers + hostPath: + path: /var/lib/docker/containers + config: +{{ toYaml .Values.observability.filebeat.config | indent 4 }} +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ .Release.Name }}-filebeat + namespace: {{ include "eck-stack.operatorNamespace" . }} +{{- end }} diff --git a/common/monitoring/eck/templates/kibana-ingress.yaml b/common/monitoring/eck/templates/kibana-ingress.yaml new file mode 100644 index 0000000..33fb352 --- /dev/null +++ b/common/monitoring/eck/templates/kibana-ingress.yaml @@ -0,0 +1,44 @@ +{{- if and .Values.kibana.enabled .Values.kibana.ingress.enabled }} +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: {{ .Values.kibana.name }} + namespace: {{ .Values.kibana.namespace | default (include "eck-stack.operatorNamespace" .) }} +{{- with .Values.kibana.labels }} + labels: +{{ toYaml . | indent 4 }} +{{- end }} +{{- $annotations := deepCopy (.Values.kibana.ingress.annotations | default (dict)) }} +{{- if not (hasKey $annotations "nginx.ingress.kubernetes.io/backend-protocol") }} + {{- if .Values.kibana.http.tls.selfSignedCertificate.disabled }} + {{- $_ := set $annotations "nginx.ingress.kubernetes.io/backend-protocol" "HTTP" }} + {{- else }} + {{- $_ := set $annotations "nginx.ingress.kubernetes.io/backend-protocol" "HTTPS" }} + {{- end }} +{{- end }} +{{- if $annotations }} + annotations: +{{ toYaml $annotations | indent 4 }} +{{- end }} +spec: +{{- if .Values.kibana.ingress.className }} + ingressClassName: {{ .Values.kibana.ingress.className }} +{{- end }} + rules: +{{- range .Values.kibana.ingress.hosts }} + - host: {{ .host }} + http: + paths: + - path: {{ .path | default "/" }} + pathType: {{ .pathType | default "Prefix" }} + backend: + service: + name: {{ $.Values.kibana.name }}-kb-http + port: + number: 5601 +{{- end }} +{{- with .Values.kibana.ingress.tls }} + tls: +{{ toYaml . | indent 4 }} +{{- end }} +{{- end }} diff --git a/common/monitoring/eck/templates/kibana.yaml b/common/monitoring/eck/templates/kibana.yaml new file mode 100644 index 0000000..6a1c01d --- /dev/null +++ b/common/monitoring/eck/templates/kibana.yaml @@ -0,0 +1,32 @@ +{{- if .Values.kibana.enabled }} +apiVersion: kibana.k8s.elastic.co/v1 +kind: Kibana +metadata: + name: {{ .Values.kibana.name }} + namespace: {{ .Values.kibana.namespace | default (include "eck-stack.operatorNamespace" .) }} +{{- with .Values.kibana.labels }} + labels: +{{ toYaml . | indent 4 }} +{{- end }} +{{- with .Values.kibana.annotations }} + annotations: +{{ toYaml . | indent 4 }} +{{- end }} +spec: + version: {{ .Values.kibana.version | quote }} + count: {{ .Values.kibana.count }} + elasticsearchRef: +{{ toYaml .Values.kibana.elasticsearchRef | indent 4 }} +{{- with .Values.kibana.config }} + config: +{{ toYaml . | indent 4 }} +{{- end }} +{{- with .Values.kibana.http }} + http: +{{ toYaml . | indent 4 }} +{{- end }} +{{- with .Values.kibana.podTemplate }} + podTemplate: +{{ toYaml . | indent 4 }} +{{- end }} +{{- end }} diff --git a/common/monitoring/eck/templates/metricbeat.yaml b/common/monitoring/eck/templates/metricbeat.yaml new file mode 100644 index 0000000..0d411fd --- /dev/null +++ b/common/monitoring/eck/templates/metricbeat.yaml @@ -0,0 +1,96 @@ +{{- if .Values.observability.metricbeat.enabled }} +apiVersion: beat.k8s.elastic.co/v1beta1 +kind: Beat +metadata: + name: {{ .Release.Name }}-metricbeat + namespace: {{ include "eck-stack.operatorNamespace" . }} + labels: + app.kubernetes.io/name: metricbeat + app.kubernetes.io/instance: {{ .Release.Name }} + app.kubernetes.io/version: {{ .Values.observability.metricbeat.version | quote }} +spec: + type: metricbeat + version: {{ .Values.observability.metricbeat.version | quote }} + elasticsearchRef: + name: {{ .Values.elasticsearch.name }} + namespace: {{ .Values.elasticsearch.namespace | default (include "eck-stack.operatorNamespace" .) }} + daemonSet: + podTemplate: + metadata: + labels: + app.kubernetes.io/name: metricbeat + spec: + automountServiceAccountToken: true + serviceAccountName: {{ .Release.Name }}-metricbeat + hostNetwork: true + dnsPolicy: ClusterFirstWithHostNet + tolerations: + - effect: NoSchedule + operator: Exists + containers: + - name: metricbeat + env: + - name: NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + - name: NODE_IP + valueFrom: + fieldRef: + fieldPath: status.hostIP + - name: ELASTICSEARCH_HOSTS + value: https://{{ .Values.elasticsearch.name }}-es-http.{{ .Values.elasticsearch.namespace | default (include "eck-stack.operatorNamespace" .) }}.svc:9200 + - name: ELASTICSEARCH_USERNAME + valueFrom: + secretKeyRef: + name: {{ printf "%s-%s-beat-user" (include "eck-stack.operatorNamespace" .) (printf "%s-metricbeat" .Release.Name) }} + key: name + - name: ELASTICSEARCH_PASSWORD + valueFrom: + secretKeyRef: + name: {{ .Release.Name }}-metricbeat-beat-user + key: {{ printf "%s-%s-beat-user" (include "eck-stack.operatorNamespace" .) (printf "%s-metricbeat" .Release.Name) }} + volumeMounts: + - name: proc + mountPath: /hostfs/proc + readOnly: true + - name: cgroup + mountPath: /hostfs/sys/fs/cgroup + readOnly: true + - name: rootfs + mountPath: /hostfs + readOnly: true + - name: data + mountPath: /usr/share/metricbeat/data + {{- with .Values.observability.metricbeat.resources }} + resources: +{{ toYaml . | indent 14 }} + {{- end }} + volumes: + - name: data + ephemeral: + volumeClaimTemplate: + spec: + accessModes: [ "ReadWriteOnce" ] + storageClassName: {{ .Values.observability.metricbeat.state.persistence.storageClassName | quote }} + resources: + requests: + storage: {{ .Values.observability.metricbeat.state.persistence.size | quote }} + - name: proc + hostPath: + path: /proc + - name: cgroup + hostPath: + path: /sys/fs/cgroup + - name: rootfs + hostPath: + path: / + config: +{{ toYaml .Values.observability.metricbeat.config | indent 4 }} +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ .Release.Name }}-metricbeat + namespace: {{ include "eck-stack.operatorNamespace" . }} +{{- end }} diff --git a/common/monitoring/eck/values.yaml b/common/monitoring/eck/values.yaml new file mode 100644 index 0000000..a70338e --- /dev/null +++ b/common/monitoring/eck/values.yaml @@ -0,0 +1,272 @@ +--- +operator: + enabled: false + createNamespace: false + namespace: elastic-system + version: 2.13.0 + image: + repository: docker.elastic.co/eck/eck-operator + tag: 2.13.0 + pullPolicy: IfNotPresent + replicas: 1 + resources: + limits: + cpu: 1 + memory: 1Gi + requests: + cpu: 100m + memory: 150Mi + priorityClassName: '' + nodeSelector: {} + tolerations: [] + affinity: {} + podAnnotations: {} + webhook: + enabled: true + serviceName: elastic-webhook-server + servicePort: 443 + port: 9443 + extraArgs: [] + env: [] + config: + log-verbosity: 0 + metrics-port: 0 + container-registry: docker.elastic.co + max-concurrent-reconciles: 3 + ca-cert-validity: 8760h + ca-cert-rotate-before: 24h + cert-validity: 8760h + cert-rotate-before: 24h + disable-config-watch: false + exposed-node-labels: + - topology.kubernetes.io/.* + - failure-domain.beta.kubernetes.io/.* + set-default-security-context: auto-detect + kube-client-timeout: 60s + elasticsearch-client-timeout: 180s + disable-telemetry: false + distribution-channel: all-in-one + validate-storage-class: true + enable-webhook: true + webhook-name: elastic-webhook.k8s.elastic.co + webhook-port: 9443 + operator-namespace: '' + enable-leader-election: true + elasticsearch-observation-interval: 10s + ubi-only: false + managedNamespaces: [] + webhookSecretName: elastic-webhook-server-cert +elasticsearch: + enabled: true + namespace: elastic-system + name: elasticsearch-sample + version: 8.13.4 + labels: {} + annotations: {} + http: + tls: + selfSignedCertificate: + disabled: false + service: + metadata: + annotations: {} + nodeSets: + - name: default + count: 1 + config: + node.store.allow_mmap: false + podTemplate: + metadata: + labels: {} + spec: + containers: + - name: elasticsearch + resources: + requests: + cpu: 500m + memory: 2Gi + limits: + cpu: 2 + memory: 2Gi + volumeClaimTemplates: + - metadata: + name: elasticsearch-data + spec: + accessModes: [ReadWriteOnce] + resources: + requests: + storage: 10Gi + storageClassName: ceph-corbo-cephfs +kibana: + enabled: true + namespace: elastic-system + name: kibana-sample + version: 8.13.4 + count: 1 + labels: {} + annotations: {} + http: + service: + metadata: + annotations: {} + tls: + selfSignedCertificate: + disabled: false + elasticsearchRef: + name: elasticsearch-sample + config: {} + podTemplate: + metadata: + labels: {} + spec: + containers: + - name: kibana + resources: + requests: + cpu: 100m + memory: 512Mi + limits: + cpu: 1 + memory: 1Gi + ingress: + enabled: false + className: public + annotations: {} + hosts: + - host: localhost + path: / + pathType: Prefix + tls: [] +observability: + filebeat: + enabled: false + version: 8.13.4 + state: + persistence: + storageClassName: ceph-corbo-cephfs + size: 2Gi + config: + filebeat: + autodiscover: + providers: + - type: kubernetes + node: ${NODE_NAME} + hints: + enabled: true + default_config: + type: container + paths: ['/var/log/containers/*${data.kubernetes.container.id}.log'] + processors: + - add_kubernetes_metadata: {} + processors: + - add_cloud_metadata: {} + - add_host_metadata: {} + - add_kubernetes_metadata: {} + resources: + requests: + cpu: 100m + memory: 400Mi + limits: + memory: 600Mi + output: + elasticsearch: + hosts: ['${ELASTICSEARCH_HOSTS}'] + username: ${ELASTICSEARCH_USERNAME} + password: ${ELASTICSEARCH_PASSWORD} + metricbeat: + enabled: false + version: 8.13.4 + state: + persistence: + storageClassName: ceph-corbo-cephfs + size: 2Gi + config: + metricbeat: + data_stream: + enabled: true + modules: + - module: kubernetes + metricsets: [node, system, pod, container, volume] + period: 20s + host: ${NODE_NAME} + hosts: ['https://${NODE_IP}:10250'] + ssl.verification_mode: none + bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + processors: + - add_kubernetes_metadata: {} + - module: kubernetes + metricsets: [apiserver] + hosts: ['https://${KUBERNETES_SERVICE_HOST}:${KUBERNETES_SERVICE_PORT}'] + ssl.certificate_authorities: + - /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + period: 30s + - module: system + period: 20s + hostfs: /hostfs + metricsets: [cpu, memory, network, diskio, filesystem] + processors: + - add_cloud_metadata: {} + - add_host_metadata: {} + - add_kubernetes_metadata: {} + output: + elasticsearch: + hosts: ['${ELASTICSEARCH_HOSTS}'] + username: ${ELASTICSEARCH_USERNAME} + password: ${ELASTICSEARCH_PASSWORD} +alertNotifier: + enabled: false + schedule: '*/5 * * * *' + concurrencyPolicy: Forbid + successfulJobsHistoryLimit: 1 + failedJobsHistoryLimit: 3 + startingDeadlineSeconds: + backoffLimit: 3 + image: + repository: madgik/eck-notifier + tag: 0.0.1 + pullPolicy: IfNotPresent + imagePullSecrets: [] + serviceAccountName: '' + labels: {} + annotations: {} + podLabels: {} + podAnnotations: {} + nodeSelector: {} + tolerations: [] + affinity: {} + resources: {} + extraArgs: [] + extraEnv: [] + teams: + enabled: true + webex: + enabled: true + roomId: '' + roomIdKey: webexRoomId + personEmail: '' + tokenKey: webexBotToken + state: + path: /var/lib/eck-notifier/state.json + persistence: + enabled: true + existingClaim: '' + storageClassName: ceph-corbo-cephfs + accessModes: [ReadWriteOnce] + size: 1Gi + annotations: {} + secret: + create: false + name: eck-eck-notifier-secrets + existingSecret: '' + data: + esUrl: '' + esUser: '' + esPass: '' + teamsWebhook: '' + webexBotToken: '' + webexRoomId: '' + es: + index: .internal.alerts-observability.logs.alerts-default-* + querySize: 200 + skipVerify: true diff --git a/common/submariner/operator/values.yaml b/common/submariner/operator/values.yaml index 1ae4252..3eed22e 100644 --- a/common/submariner/operator/values.yaml +++ b/common/submariner/operator/values.yaml @@ -28,7 +28,7 @@ submariner: # set via a strategic merge patch in kustomization.yaml to reference # the submariner-broker-secret created by the PostSync hook broker: - server: mip-tds.chuv.cscs.ch:6443 + server: mip.chuv.cscs.ch:6443 token: '' ca: '' namespace: submariner-k8s-broker diff --git a/deployments/hybrid/federations/federation-Z/mip-infrastructure/customizations/mip-stack-values.yaml b/deployments/hybrid/federations/federation-Z/mip-infrastructure/customizations/mip-stack-values.yaml index ab4acee..c4e4f1c 100644 --- a/deployments/hybrid/federations/federation-Z/mip-infrastructure/customizations/mip-stack-values.yaml +++ b/deployments/hybrid/federations/federation-Z/mip-infrastructure/customizations/mip-stack-values.yaml @@ -2,4 +2,4 @@ log_level: DEBUG namespace: federation-z mip: - PUBLIC_HOST: federation-z.mip-tds.chuv.cscs.ch + PUBLIC_HOST: federation-z.example.com diff --git a/deployments/hybrid/federations/federation-Z/remote-node/README.md b/deployments/hybrid/federations/federation-Z/remote-node/README.md index c63a586..de0953a 100644 --- a/deployments/hybrid/federations/federation-Z/remote-node/README.md +++ b/deployments/hybrid/federations/federation-Z/remote-node/README.md @@ -167,8 +167,8 @@ If you see certificate errors in the `submariner-operator` logs or `ServiceExpor base64 -d broker-ca-base64.txt > broker-ca.crt # 2. Verify connection to the broker API server -# Replace mip-tds.chuv.cscs.ch:6443 with your broker address if different -openssl s_client -connect mip-tds.chuv.cscs.ch:6443 -CAfile broker-ca.crt -showcerts < /dev/null +# Replace mip.chuv.cscs.ch:6443 with your broker address if different +openssl s_client -connect mip.chuv.cscs.ch:6443 -CAfile broker-ca.crt -showcerts < /dev/null # You should see "Verify return code: 0 (ok)" at the end. # If you see "Verify return code: 19 (self-signed certificate...)", the CA is incorrect or missing. diff --git a/deployments/hybrid/federations/federation-Z/remote-node/submariner-values.yaml b/deployments/hybrid/federations/federation-Z/remote-node/submariner-values.yaml index ca70365..6be32e9 100644 --- a/deployments/hybrid/federations/federation-Z/remote-node/submariner-values.yaml +++ b/deployments/hybrid/federations/federation-Z/remote-node/submariner-values.yaml @@ -8,7 +8,7 @@ # --values submariner-values.yaml # Broker configuration - populated via --set during helm install broker: - server: mip-tds.chuv.cscs.ch:6443 + server: mip.chuv.cscs.ch:6443 token: '' # Populated via --set-string broker.token="$(cat broker-token.txt)" (plain text) ca: '' # Populated via --set-string broker.ca="$(cat broker-ca-base64.txt)" (base64-encoded) namespace: submariner-k8s-broker diff --git a/deployments/local/federations/federation-A/customizations/mip-stack-values.yaml b/deployments/local/federations/federation-A/customizations/mip-stack-values.yaml index 52bfc45..4117e7b 100644 --- a/deployments/local/federations/federation-A/customizations/mip-stack-values.yaml +++ b/deployments/local/federations/federation-A/customizations/mip-stack-values.yaml @@ -4,7 +4,7 @@ cluster: namespace: federation-a managed: true network: - publicHost: federation-a.mip-tds.chuv.cscs.ch + publicHost: federation-a.example.com platform-ui: ingress: className: nginx diff --git a/projects/static/mip-monitoring/eck.yaml b/projects/static/mip-monitoring/eck.yaml new file mode 100644 index 0000000..2ecb22f --- /dev/null +++ b/projects/static/mip-monitoring/eck.yaml @@ -0,0 +1,29 @@ +--- +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: eck + namespace: argocd-mip-team + finalizers: [resources-finalizer.argocd.argoproj.io] +spec: + project: mip-argo-project-monitoring + source: + repoURL: https://github.com/Medical-Informatics-Platform/mip-infra.git + targetRevision: main + path: common/monitoring/eck + helm: + releaseName: eck + skipCrds: true + values: | + observability: + metricbeat: + enabled: true + filebeat: + enabled: true + destination: + server: https://kubernetes.default.svc + namespace: elastic-system + syncPolicy: + automated: + selfHeal: true + prune: true diff --git a/projects/static/mip-monitoring/kustomization.yaml b/projects/static/mip-monitoring/kustomization.yaml new file mode 100644 index 0000000..7d9239f --- /dev/null +++ b/projects/static/mip-monitoring/kustomization.yaml @@ -0,0 +1,4 @@ +--- +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +resources: [mip-monitoring.yaml, eck.yaml] diff --git a/projects/static/mip-monitoring/mip-monitoring.yaml b/projects/static/mip-monitoring/mip-monitoring.yaml new file mode 100644 index 0000000..cbb4d15 --- /dev/null +++ b/projects/static/mip-monitoring/mip-monitoring.yaml @@ -0,0 +1,103 @@ +--- +apiVersion: argoproj.io/v1alpha1 +kind: AppProject +metadata: + name: mip-argo-project-monitoring + namespace: argocd-mip-team +spec: + description: Monitoring resources like ECK + sourceRepos: + - https://github.com/Medical-Informatics-Platform/mip-infra.git + destinations: + - namespace: elastic-system + server: https://kubernetes.default.svc + namespaceResourceWhitelist: + - group: argoproj.io + kind: Application + - group: apps + kind: Deployment + - group: apps + kind: StatefulSet + - group: apps + kind: DaemonSet + - group: '' + kind: Service + - group: '' + kind: ConfigMap + - group: '' + kind: Secret + - group: '' + kind: PersistentVolumeClaim + - group: '' + kind: ServiceAccount + - group: '' + kind: Pod + - group: networking.k8s.io + kind: Ingress + - group: networking.k8s.io + kind: NetworkPolicy + - group: batch + kind: Job + - group: batch + kind: CronJob + - group: elasticsearch.k8s.elastic.co + kind: '*' + - group: kibana.k8s.elastic.co + kind: '*' + - group: beat.k8s.elastic.co + kind: '*' + - group: apm.k8s.elastic.co + kind: '*' + - group: enterprisesearch.k8s.elastic.co + kind: '*' + - group: maps.k8s.elastic.co + kind: '*' + - group: agent.k8s.elastic.co + kind: '*' + - group: autoscaling.k8s.elastic.co + kind: '*' + # Explicitly blacklist sensitive resources to be future proof + clusterResourceBlacklist: + - group: rbac.authorization.k8s.io + kind: ClusterRole + - group: rbac.authorization.k8s.io + kind: ClusterRoleBinding + - group: admissionregistration.k8s.io + kind: ValidatingWebhookConfiguration + - group: admissionregistration.k8s.io + kind: MutatingWebhookConfiguration + - group: apiextensions.k8s.io + kind: CustomResourceDefinition + namespaceResourceBlacklist: + - group: rbac.authorization.k8s.io + kind: Role + - group: rbac.authorization.k8s.io + kind: RoleBinding + syncWindows: + - kind: allow + schedule: '* * * * *' + duration: 24h + applications: ['*'] + roles: + - name: monitoring-admin + description: Full access to monitoring resources + policies: + - p, proj:mip-argo-project-monitoring:monitoring-admin, applications, get, + mip-argo-project-monitoring/*, allow + - p, proj:mip-argo-project-monitoring:monitoring-admin, applications, create, + mip-argo-project-monitoring/*, allow + - p, proj:mip-argo-project-monitoring:monitoring-admin, applications, update, + mip-argo-project-monitoring/*, allow + - p, proj:mip-argo-project-monitoring:monitoring-admin, applications, delete, + mip-argo-project-monitoring/*, allow + - p, proj:mip-argo-project-monitoring:monitoring-admin, applications, sync, + mip-argo-project-monitoring/*, allow + groups: [argocd-admins] + - name: monitoring-operator + description: Limited access to monitoring resources + policies: + - p, proj:mip-argo-project-monitoring:monitoring-operator, applications, sync, + mip-argo-project-monitoring/*, allow + - p, proj:mip-argo-project-monitoring:monitoring-operator, applications, get, + mip-argo-project-monitoring/*, allow + groups: [argocd-operators]