Skip to content

Commit 94481d7

Browse files
Merge pull request #2322 from marioferh/metrics_server_config_monitoring_api
Monitoring API: Add Metric server config
2 parents 61248d9 + 881674e commit 94481d7

13 files changed

+3319
-34
lines changed

config/v1alpha1/tests/clustermonitoring.config.openshift.io/ClusterMonitoringConfig.yaml

Lines changed: 143 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -208,3 +208,146 @@ tests:
208208
request: "500m"
209209
limit: "200m"
210210
expectedError: 'spec.alertmanagerConfig.customConfig.resources[0]: Invalid value: "object": limit must be greater than or equal to request'
211+
- name: Should be able to create a minimal MetricsServerConfig
212+
initial: |
213+
apiVersion: config.openshift.io/v1alpha1
214+
kind: ClusterMonitoring
215+
spec:
216+
metricsServerConfig:
217+
verbosity: Info
218+
expected: |
219+
apiVersion: config.openshift.io/v1alpha1
220+
kind: ClusterMonitoring
221+
spec:
222+
metricsServerConfig:
223+
verbosity: Info
224+
- name: Should accept MetricsServerConfig with comprehensive ContainerResource array
225+
initial: |
226+
apiVersion: config.openshift.io/v1alpha1
227+
kind: ClusterMonitoring
228+
spec:
229+
userDefined:
230+
mode: "Disabled"
231+
metricsServerConfig:
232+
resources:
233+
- name: "cpu"
234+
request: "100m"
235+
limit: "500m"
236+
- name: "memory"
237+
request: "128Mi"
238+
limit: "512Mi"
239+
- name: "ephemeral-storage"
240+
request: "1Gi"
241+
limit: "2Gi"
242+
expected: |
243+
apiVersion: config.openshift.io/v1alpha1
244+
kind: ClusterMonitoring
245+
spec:
246+
userDefined:
247+
mode: "Disabled"
248+
metricsServerConfig:
249+
resources:
250+
- name: "cpu"
251+
request: "100m"
252+
limit: "500m"
253+
- name: "memory"
254+
request: "128Mi"
255+
limit: "512Mi"
256+
- name: "ephemeral-storage"
257+
request: "1Gi"
258+
limit: "2Gi"
259+
- name: Should accept MetricsServerConfig with only requests
260+
initial: |
261+
apiVersion: config.openshift.io/v1alpha1
262+
kind: ClusterMonitoring
263+
spec:
264+
userDefined:
265+
mode: "Disabled"
266+
metricsServerConfig:
267+
resources:
268+
- name: "cpu"
269+
request: "200m"
270+
- name: "memory"
271+
request: "256Mi"
272+
expected: |
273+
apiVersion: config.openshift.io/v1alpha1
274+
kind: ClusterMonitoring
275+
spec:
276+
userDefined:
277+
mode: "Disabled"
278+
metricsServerConfig:
279+
resources:
280+
- name: "cpu"
281+
request: "200m"
282+
- name: "memory"
283+
request: "256Mi"
284+
- name: Should accept MetricsServerConfig with only limits
285+
initial: |
286+
apiVersion: config.openshift.io/v1alpha1
287+
kind: ClusterMonitoring
288+
spec:
289+
userDefined:
290+
mode: "Disabled"
291+
metricsServerConfig:
292+
resources:
293+
- name: "cpu"
294+
limit: "1"
295+
- name: "memory"
296+
limit: "1Gi"
297+
expected: |
298+
apiVersion: config.openshift.io/v1alpha1
299+
kind: ClusterMonitoring
300+
spec:
301+
userDefined:
302+
mode: "Disabled"
303+
metricsServerConfig:
304+
resources:
305+
- name: "cpu"
306+
limit: "1"
307+
- name: "memory"
308+
limit: "1Gi"
309+
- name: Should reject MetricsServerConfig with limit less than request
310+
initial: |
311+
apiVersion: config.openshift.io/v1alpha1
312+
kind: ClusterMonitoring
313+
spec:
314+
userDefined:
315+
mode: "Disabled"
316+
metricsServerConfig:
317+
resources:
318+
- name: "cpu"
319+
request: "500m"
320+
limit: "200m"
321+
expectedError: 'spec.metricsServerConfig.resources[0]: Invalid value: "object": limit must be greater than or equal to request'
322+
- name: Should reject MetricsServerConfig with more than 10 resource items
323+
initial: |
324+
apiVersion: config.openshift.io/v1alpha1
325+
kind: ClusterMonitoring
326+
spec:
327+
userDefined:
328+
mode: "Disabled"
329+
metricsServerConfig:
330+
resources:
331+
- name: "cpu"
332+
request: "100m"
333+
- name: "memory"
334+
request: "64Mi"
335+
- name: "hugepages-2Mi"
336+
request: "32Mi"
337+
- name: "hugepages-1Gi"
338+
request: "1Gi"
339+
- name: "ephemeral-storage"
340+
request: "1Gi"
341+
- name: "nvidia.com/gpu"
342+
request: "1"
343+
- name: "example.com/foo"
344+
request: "1"
345+
- name: "example.com/bar"
346+
request: "1"
347+
- name: "example.com/baz"
348+
request: "1"
349+
- name: "example.com/qux"
350+
request: "1"
351+
- name: "example.com/quux"
352+
request: "1"
353+
expectedError: 'spec.metricsServerConfig.resources: Too many: 11: must have at most 10 items'

config/v1alpha1/types_cluster_monitoring.go

Lines changed: 144 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -81,14 +81,19 @@ type ClusterMonitoringSpec struct {
8181
// When omitted, this means no opinion and the platform is left to choose a reasonable default, which is subject to change over time.
8282
// The current default value is `Disabled`.
8383
// +optional
84-
UserDefined *UserDefinedMonitoring `json:"userDefined,omitempty"`
84+
UserDefined UserDefinedMonitoring `json:"userDefined,omitempty,omitzero"`
8585
// alertmanagerConfig allows users to configure how the default Alertmanager instance
8686
// should be deployed in the `openshift-monitoring` namespace.
8787
// alertmanagerConfig is optional.
8888
// When omitted, this means no opinion and the platform is left to choose a reasonable default, that is subject to change over time.
8989
// The current default value is `DefaultConfig`.
9090
// +optional
91-
AlertmanagerConfig *AlertmanagerConfig `json:"alertmanagerConfig,omitempty"`
91+
AlertmanagerConfig AlertmanagerConfig `json:"alertmanagerConfig,omitempty,omitzero"`
92+
// metricsServerConfig is an optional field that can be used to configure the Kubernetes Metrics Server that runs in the openshift-monitoring namespace.
93+
// Specifically, it can configure how the Metrics Server instance is deployed, pod scheduling, its audit policy and log verbosity.
94+
// When omitted, this means no opinion and the platform is left to choose a reasonable default, which is subject to change over time.
95+
// +optional
96+
MetricsServerConfig MetricsServerConfig `json:"metricsServerConfig,omitempty,omitzero"`
9297
}
9398

9499
// UserDefinedMonitoring config for user-defined projects.
@@ -128,12 +133,12 @@ type AlertmanagerConfig struct {
128133
//
129134
// +unionDiscriminator
130135
// +required
131-
DeploymentMode AlertManagerDeployMode `json:"deploymentMode"`
136+
DeploymentMode AlertManagerDeployMode `json:"deploymentMode,omitempty"`
132137

133138
// customConfig must be set when deploymentMode is CustomConfig, and must be unset otherwise.
134139
// When set to CustomConfig, the Alertmanager will be deployed with custom configuration.
135140
// +optional
136-
CustomConfig *AlertmanagerCustomConfig `json:"customConfig,omitempty"`
141+
CustomConfig AlertmanagerCustomConfig `json:"customConfig,omitempty,omitzero"`
137142
}
138143

139144
// AlertmanagerCustomConfig represents the configuration for a custom Alertmanager deployment.
@@ -153,7 +158,7 @@ type AlertmanagerCustomConfig struct {
153158
// When omitted, this means no opinion and the platform is left to choose a reasonable default, that is subject to change over time.
154159
// The current default value is `Info`.
155160
// +optional
156-
LogLevel LogLevel `json:"logLevel"`
161+
LogLevel LogLevel `json:"logLevel,omitempty"`
157162
// nodeSelector defines the nodes on which the Pods are scheduled
158163
// nodeSelector is optional.
159164
//
@@ -291,9 +296,10 @@ type ContainerResource struct {
291296
// This field is required.
292297
// name must consist only of alphanumeric characters, `-`, `_` and `.` and must start and end with an alphanumeric character.
293298
// +required
299+
// +kubebuilder:validation:MinLength=1
294300
// +kubebuilder:validation:MaxLength=253
295301
// +kubebuilder:validation:XValidation:rule="!format.qualifiedName().validate(self).hasValue()",message="name must consist only of alphanumeric characters, `-`, `_` and `.` and must start and end with an alphanumeric character"
296-
Name string `json:"name"`
302+
Name string `json:"name,omitempty"`
297303

298304
// request is the minimum amount of the resource required (e.g. "2Mi", "1Gi").
299305
// This field is optional.
@@ -322,3 +328,135 @@ type ContainerResource struct {
322328
// +kubebuilder:validation:XValidation:rule="!format.dns1123Subdomain().validate(self).hasValue()",message="a lowercase RFC 1123 subdomain must consist of lower case alphanumeric characters, '-' or '.', and must start and end with an alphanumeric character."
323329
// +kubebuilder:validation:MaxLength=63
324330
type SecretName string
331+
332+
// MetricsServerConfig provides configuration options for the Metrics Server instance
333+
// that runs in the `openshift-monitoring` namespace. Use this configuration to control
334+
// how the Metrics Server instance is deployed, how it logs, and how its pods are scheduled.
335+
// +kubebuilder:validation:MinProperties=1
336+
type MetricsServerConfig struct {
337+
// audit defines the audit configuration used by the Metrics Server instance.
338+
// audit is optional.
339+
// When omitted, this means no opinion and the platform is left to choose a reasonable default, that is subject to change over time.
340+
//The current default sets audit.profile to Metadata
341+
// +optional
342+
Audit Audit `json:"audit,omitempty,omitzero"`
343+
// nodeSelector defines the nodes on which the Pods are scheduled
344+
// nodeSelector is optional.
345+
//
346+
// When omitted, this means the user has no opinion and the platform is left
347+
// to choose reasonable defaults. These defaults are subject to change over time.
348+
// The current default value is `kubernetes.io/os: linux`.
349+
// +optional
350+
// +kubebuilder:validation:MinProperties=1
351+
// +kubebuilder:validation:MaxProperties=10
352+
NodeSelector map[string]string `json:"nodeSelector,omitempty"`
353+
// tolerations defines tolerations for the pods.
354+
// tolerations is optional.
355+
//
356+
// When omitted, this means the user has no opinion and the platform is left
357+
// to choose reasonable defaults. These defaults are subject to change over time.
358+
// Defaults are empty/unset.
359+
// Maximum length for this list is 10
360+
// Minimum length for this list is 1
361+
// +kubebuilder:validation:MaxItems=10
362+
// +kubebuilder:validation:MinItems=1
363+
// +listType=atomic
364+
// +optional
365+
Tolerations []v1.Toleration `json:"tolerations,omitempty"`
366+
// verbosity defines the verbosity of log messages for Metrics Server.
367+
// Valid values are Errors, Info, Trace, TraceAll and omitted.
368+
// When set to Errors, only critical messages and errors are logged.
369+
// When set to Info, only basic information messages are logged.
370+
// When set to Trace, information useful for general debugging is logged.
371+
// When set to TraceAll, detailed information about metric scraping is logged.
372+
// When omitted, this means no opinion and the platform is left to choose a reasonable default, that is subject to change over time.
373+
// The current default value is `Errors`
374+
// +optional
375+
Verbosity VerbosityLevel `json:"verbosity,omitempty,omitzero"`
376+
// resources defines the compute resource requests and limits for the Metrics Server container.
377+
// This includes CPU, memory and HugePages constraints to help control scheduling and resource usage.
378+
// When not specified, defaults are used by the platform. Requests cannot exceed limits.
379+
// This field is optional.
380+
// More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
381+
// This is a simplified API that maps to Kubernetes ResourceRequirements.
382+
// The current default values are:
383+
// resources:
384+
// - name: cpu
385+
// request: 4m
386+
// limit: null
387+
// - name: memory
388+
// request: 40Mi
389+
// limit: null
390+
// Maximum length for this list is 10.
391+
// Minimum length for this list is 1.
392+
// +optional
393+
// +listType=map
394+
// +listMapKey=name
395+
// +kubebuilder:validation:MaxItems=10
396+
// +kubebuilder:validation:MinItems=1
397+
Resources []ContainerResource `json:"resources,omitempty"`
398+
// topologySpreadConstraints defines rules for how Metrics Server Pods should be distributed
399+
// across topology domains such as zones, nodes, or other user-defined labels.
400+
// topologySpreadConstraints is optional.
401+
// This helps improve high availability and resource efficiency by avoiding placing
402+
// too many replicas in the same failure domain.
403+
//
404+
// When omitted, this means no opinion and the platform is left to choose a default, which is subject to change over time.
405+
// This field maps directly to the `topologySpreadConstraints` field in the Pod spec.
406+
// Default is empty list.
407+
// Maximum length for this list is 10.
408+
// Minimum length for this list is 1
409+
// Entries must have unique topologyKey and whenUnsatisfiable pairs.
410+
// +kubebuilder:validation:MaxItems=10
411+
// +kubebuilder:validation:MinItems=1
412+
// +listType=map
413+
// +listMapKey=topologyKey
414+
// +listMapKey=whenUnsatisfiable
415+
// +optional
416+
TopologySpreadConstraints []v1.TopologySpreadConstraint `json:"topologySpreadConstraints,omitempty"`
417+
}
418+
419+
// AuditProfile defines the audit log level for the Metrics Server.
420+
// +kubebuilder:validation:Enum=None;Metadata;Request;RequestResponse
421+
type AuditProfile string
422+
423+
const (
424+
// AuditProfileNone disables audit logging
425+
AuditProfileNone AuditProfile = "None"
426+
// AuditProfileMetadata logs request metadata (requesting user, timestamp, resource, verb, etc.) but not request or response body
427+
AuditProfileMetadata AuditProfile = "Metadata"
428+
// AuditProfileRequest logs event metadata and request body but not response body
429+
AuditProfileRequest AuditProfile = "Request"
430+
// AuditProfileRequestResponse logs event metadata, request and response bodies
431+
AuditProfileRequestResponse AuditProfile = "RequestResponse"
432+
)
433+
434+
// VerbosityLevel defines the verbosity of log messages for Metrics Server.
435+
// +kubebuilder:validation:Enum=Errors;Info;Trace;TraceAll
436+
type VerbosityLevel string
437+
438+
const (
439+
// VerbosityLevelErrors means only critical messages and errors are logged.
440+
VerbosityLevelErrors VerbosityLevel = "Errors"
441+
// VerbosityLevelInfo means basic informational messages are logged.
442+
VerbosityLevelInfo VerbosityLevel = "Info"
443+
// VerbosityLevelTrace means extended information useful for general debugging is logged.
444+
VerbosityLevelTrace VerbosityLevel = "Trace"
445+
// VerbosityLevelTraceAll means detailed information about metric scraping operations is logged.
446+
VerbosityLevelTraceAll VerbosityLevel = "TraceAll"
447+
)
448+
449+
// Audit profile configurations
450+
type Audit struct {
451+
// profile is a required field for configuring the audit log level of the Kubernetes Metrics Server.
452+
// Allowed values are None, Metadata, Request, or RequestResponse.
453+
// When set to None, audit logging is disabled and no audit events are recorded.
454+
// When set to Metadata, only request metadata (such as requesting user, timestamp, resource, verb, etc.) is logged, but not the request or response body.
455+
// When set to Request, event metadata and the request body are logged, but not the response body.
456+
// When set to RequestResponse, event metadata, request body, and response body are all logged, providing the most detailed audit information.
457+
//
458+
// See: https://kubernetes.io/docs/tasks/debug-application-cluster/audit/#audit-policy
459+
// for more information about auditing and log levels.
460+
// +required
461+
Profile AuditProfile `json:"profile,omitempty"`
462+
}

0 commit comments

Comments
 (0)