Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion charts/controlplane-operations/Chart.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
apiVersion: v2
name: controlplane-operations
version: 1.0.27
version: 1.0.28
description: A set of Plutono dashboards and Prometheus alerting rules combined with playbooks to ensure effective operations of Controlplane clusters.
maintainers:
- name: Vladimir Videlov (d051408)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,26 @@ groups:
for: {{ dig "ShootUnavailability" "for" "10m" .Values.prometheusRules }}
labels:
{{ include "controlplane-operations.additionalRuleLabels" . }}
severity: {{ dig "ShootUnavailability" "severity" "info" .Values.prometheusRules }}
severity: {{ dig "ShootUnavailability" "severity" "warning" .Values.prometheusRules }}
playbook: https://github.com/cobaltcore-dev/controlplane-operations/playbooks/ShootUnavailability.md
service: {{ dig "ShootUnavailability" "service" .Values.prometheusRules.defaultService .Values.prometheusRules }}
support_group: {{ dig "ShootUnavailability" "support_group" .Values.prometheusRules.defaultSupportGroup .Values.prometheusRules }}
annotations:
description: Shoot cluster is unavailable for more than 10 minutes.
summary: Shoot cluster is unavailable for more than 10 minutes.
{{- end }}

{{- if not (.Values.prometheusRules.disabled.CalicoBirdDown | default false) }}
- alert: CalicoBirdDown
expr: bird_protocol_up{import_filter="ACCEPT",ip_version="4",proto="BGP",state!="Passive"} == 0
for: {{ dig "CalicoBirdDown" "for" "5m" .Values.prometheusRules }}
labels:
{{ include "controlplane-operations.additionalRuleLabels" . }}
severity: {{ dig "CalicoBirdDown" "severity" "warning" .Values.prometheusRules }}
playbook: https://github.com/cobaltcore-dev/controlplane-operations/playbooks/CalicoBirdDown.md
service: {{ dig "CalicoBirdDown" "service" .Values.prometheusRules.defaultService .Values.prometheusRules }}
support_group: {{ dig "CalicoBirdDown" "support_group" .Values.prometheusRules.defaultSupportGroup .Values.prometheusRules }}
annotations:
description: Calico Bird is down for more than 5 minutes.
summary: Calico Bird is down for more than 5 minutes.
Comment thread
videlov marked this conversation as resolved.
Outdated
{{- end }}
4 changes: 2 additions & 2 deletions charts/controlplane-operations/plugindefinition.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,15 @@ kind: PluginDefinition
metadata:
name: controlplane-operations
spec:
version: 1.0.27
version: 1.0.28
displayName: Controlplane operations bundle
description: Operations bundle for Controlane clusters
docMarkDownUrl: https://raw.githubusercontent.com/cloudoperators/controlplane-operations/main/README.md
icon: https://raw.githubusercontent.com/cloudoperators/controlplane-operations/main/charts/controlplane-operations/kubernetes-logo.png
helmChart:
name: controlplane-operations
repository: oci://ghcr.io/cloudoperators/controlplane-operations/charts
version: 1.0.27
version: 1.0.28
options:
- name: prometheusRules.create
description: Create Prometheus rules
Expand Down
Loading