Skip to content

Commit d0be0e8

Browse files
committed
KAR-617: setup kubearchive logging kflux-prd-rh03 config
Signed-off-by: obetsun <[email protected]> rh-pre-commit.version: 2.3.2 rh-pre-commit.check-secrets: ENABLED
1 parent 07e5e23 commit d0be0e8

File tree

8 files changed

+514
-4
lines changed

8 files changed

+514
-4
lines changed

argo-cd-apps/base/member/infra-deployments/vector-kubearchive-log-collector/vector-kubearchive-log-collector.yaml

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -32,12 +32,10 @@ spec:
3232
# Public
3333
# - nameNormalized: stone-prd-rh01
3434
# values.clusterDir: stone-prd-rh01
35-
# - nameNormalized: kflux-prd-rh02
36-
# values.clusterDir: kflux-prd-rh02
37-
# - nameNormalized: kflux-prd-rh03
38-
# values.clusterDir: kflux-prd-rh03
3935
- nameNormalized: kflux-rhel-p01
4036
values.clusterDir: kflux-rhel-p01
37+
- nameNormalized: kflux-prd-rh03
38+
values.clusterDir: kflux-prd-rh03
4139
template:
4240
metadata:
4341
name: vector-kubearchive-log-collector-{{nameNormalized}}
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
apiVersion: kustomize.config.k8s.io/v1beta1
2+
kind: Kustomization
3+
4+
commonAnnotations:
5+
ignore-check.kube-linter.io/drop-net-raw-capability: |
6+
"Vector runs requires access to socket."
7+
ignore-check.kube-linter.io/run-as-non-root: |
8+
"Vector runs as Root and attach host Path."
9+
ignore-check.kube-linter.io/sensitive-host-mounts: |
10+
"Vector runs requires certain host mounts to watch files being created by pods."
11+
ignore-check.kube-linter.io/pdb-unhealthy-pod-eviction-policy: |
12+
"Managed by upstream Loki chart (no value exposed for unhealthyPodEvictionPolicy)."
13+
14+
resources:
15+
- ../base
16+
17+
generators:
18+
- vector-helm-generator.yaml
19+
- loki-helm-generator.yaml
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
apiVersion: builtin
2+
kind: HelmChartInflationGenerator
3+
metadata:
4+
name: loki
5+
name: loki
6+
repo: https://grafana.github.io/helm-charts
7+
version: 6.30.1
8+
releaseName: loki
9+
namespace: product-kubearchive-logging
10+
valuesFile: loki-helm-values.yaml
11+
additionalValuesFiles:
12+
- loki-helm-prod-values.yaml
13+
valuesInline:
14+
# Cluster-specific overrides
15+
serviceAccount:
16+
create: true
17+
name: loki-sa
18+
annotations:
19+
eks.amazonaws.com/role-arn: "arn:aws:iam::310587744735:role/stone-prod-p02-loki-storage-role"
20+
loki:
21+
storage:
22+
bucketNames:
23+
chunks: stone-prod-p02-loki-storage
24+
admin: stone-prod-p02-loki-storage
25+
storage_config:
26+
aws:
27+
bucketnames: stone-prod-p02-loki-storage
Lines changed: 191 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,191 @@
1+
---
2+
gateway:
3+
service:
4+
type: LoadBalancer
5+
resources:
6+
requests:
7+
cpu: 100m
8+
memory: 128Mi
9+
limits:
10+
memory: 256Mi
11+
12+
# Basic Loki configuration with S3 storage
13+
loki:
14+
commonConfig:
15+
replication_factor: 3
16+
# Required storage configuration for Helm chart
17+
storage:
18+
type: s3
19+
# bucketNames: Fill it on the generator for each cluster
20+
s3:
21+
region: us-east-1
22+
storage_config:
23+
aws:
24+
# bucketnames: Fill it on the generator for each cluster
25+
region: us-east-1
26+
s3forcepathstyle: false
27+
# Configure ingestion limits to handle Vector's data volume
28+
limits_config:
29+
retention_period: 744h # 31 days retention
30+
ingestion_rate_mb: 50
31+
ingestion_burst_size_mb: 100
32+
ingestion_rate_strategy: "local"
33+
max_streams_per_user: 0
34+
max_line_size: 2097152
35+
per_stream_rate_limit: 50M
36+
per_stream_rate_limit_burst: 200M
37+
reject_old_samples: false
38+
reject_old_samples_max_age: 168h
39+
discover_service_name: []
40+
discover_log_levels: false
41+
volume_enabled: true
42+
max_global_streams_per_user: 75000
43+
max_entries_limit_per_query: 100000
44+
increment_duplicate_timestamp: true
45+
allow_structured_metadata: true
46+
ingester:
47+
chunk_target_size: 8388608 # 8MB
48+
chunk_idle_period: 5m
49+
max_chunk_age: 2h
50+
chunk_encoding: snappy # Compress data (reduces S3 transfer size)
51+
chunk_retain_period: 1h # Keep chunks in memory after flush
52+
flush_op_timeout: 10m # Add timeout for S3 operations
53+
54+
# Tuning for high-load queries
55+
querier:
56+
max_concurrent: 8
57+
query_range:
58+
# split_queries_by_interval deprecated in Loki 3.x - removed
59+
parallelise_shardable_queries: true
60+
61+
# Distributed components configuration
62+
ingester:
63+
replicas: 3
64+
autoscaling:
65+
enabled: true
66+
zoneAwareReplication:
67+
enabled: true
68+
maxUnavailable: 1
69+
resources:
70+
requests:
71+
cpu: 500m
72+
memory: 1Gi
73+
limits:
74+
cpu: 2000m
75+
memory: 2Gi
76+
persistence:
77+
enabled: true
78+
size: 10Gi
79+
affinity: {}
80+
podAntiAffinity:
81+
soft: {}
82+
hard: {}
83+
84+
querier:
85+
replicas: 3
86+
autoscaling:
87+
enabled: true
88+
maxUnavailable: 1
89+
resources:
90+
requests:
91+
cpu: 300m
92+
memory: 512Mi
93+
limits:
94+
memory: 1Gi
95+
affinity: {}
96+
97+
queryFrontend:
98+
replicas: 2
99+
maxUnavailable: 1
100+
resources:
101+
requests:
102+
cpu: 200m
103+
memory: 256Mi
104+
limits:
105+
memory: 512Mi
106+
107+
queryScheduler:
108+
replicas: 2
109+
maxUnavailable: 1
110+
resources:
111+
requests:
112+
cpu: 200m
113+
memory: 256Mi
114+
limits:
115+
memory: 512Mi
116+
117+
distributor:
118+
replicas: 3
119+
autoscaling:
120+
enabled: true
121+
maxUnavailable: 1
122+
resources:
123+
requests:
124+
cpu: 300m
125+
memory: 512Mi
126+
limits:
127+
memory: 1Gi
128+
affinity: {}
129+
130+
compactor:
131+
replicas: 1
132+
retention_enabled: true
133+
retention_delete_delay: 2h
134+
retention_delete_worker_count: 150
135+
resources:
136+
requests:
137+
cpu: 200m
138+
memory: 512Mi
139+
limits:
140+
memory: 1Gi
141+
142+
indexGateway:
143+
replicas: 2
144+
maxUnavailable: 0
145+
resources:
146+
requests:
147+
cpu: 300m
148+
memory: 512Mi
149+
limits:
150+
memory: 1Gi
151+
affinity: {}
152+
153+
# Enable Memcached caches for performance
154+
chunksCache:
155+
enabled: true
156+
replicas: 1
157+
158+
resultsCache:
159+
enabled: true
160+
replicas: 1
161+
162+
memcached:
163+
enabled: true
164+
165+
memcachedResults:
166+
enabled: true
167+
168+
memcachedChunks:
169+
enabled: true
170+
171+
memcachedFrontend:
172+
enabled: true
173+
174+
memcachedIndexQueries:
175+
enabled: true
176+
177+
memcachedIndexWrites:
178+
enabled: true
179+
180+
# Disable Minio - staging uses S3 with IAM role
181+
minio:
182+
enabled: false
183+
184+
# Resources for memcached exporter to satisfy linter
185+
memcachedExporter:
186+
resources:
187+
requests:
188+
cpu: 50m
189+
memory: 64Mi
190+
limits:
191+
memory: 128Mi
Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
---
2+
# simplified Loki configuration for staging
3+
deploymentMode: Distributed
4+
5+
# This exposes the Loki gateway so it can be written to and queried externally
6+
gateway:
7+
image:
8+
registry: quay.io # Use Quay.io registry to prevent docker hub rate limit
9+
repository: nginx/nginx-unprivileged
10+
tag: 1.24-alpine
11+
nginxConfig:
12+
resolver: "dns-default.openshift-dns.svc.cluster.local."
13+
14+
# Basic Loki configuration
15+
loki:
16+
# Enable multi-tenancy to handle X-Scope-OrgID headers
17+
auth_enabled: true
18+
commonConfig:
19+
path_prefix: /var/loki # This directory will be writable via volume mount
20+
storage:
21+
type: s3
22+
schemaConfig:
23+
configs:
24+
- from: "2024-04-01"
25+
store: tsdb
26+
object_store: s3
27+
schema: v13
28+
index:
29+
prefix: loki_index_
30+
period: 24h
31+
# Configure compactor to use writable volumes
32+
compactor:
33+
working_directory: /var/loki/compactor
34+
35+
# Security contexts for OpenShift
36+
podSecurityContext:
37+
runAsNonRoot: false
38+
allowPrivilegeEscalation: false
39+
40+
containerSecurityContext:
41+
runAsNonRoot: false
42+
allowPrivilegeEscalation: false
43+
capabilities:
44+
drop:
45+
- ALL
46+
readOnlyRootFilesystem: true # Keep read-only root filesystem for security
47+
48+
# Disable test pods
49+
test:
50+
enabled: false
51+
52+
# Disable sidecar completely to avoid loki-sc-rules container
53+
sidecar:
54+
rules:
55+
enabled: false
56+
datasources:
57+
enabled: false
58+
59+
# Zero out replica counts of other deployment modes
60+
61+
singleBinary:
62+
replicas: 0
63+
backend:
64+
replicas: 0
65+
read:
66+
replicas: 0
67+
write:
68+
replicas: 0
69+
70+
bloomPlanner:
71+
replicas: 0
72+
bloomBuilder:
73+
replicas: 0
74+
bloomGateway:
75+
replicas: 0
76+
77+
# Disable lokiCanary - not essential for core functionality
78+
lokiCanary:
79+
enabled: false
80+
81+
# Disable the ruler - not needed as we aren't using metrics
82+
ruler:
83+
enabled: false
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
apiVersion: builtin
2+
kind: HelmChartInflationGenerator
3+
metadata:
4+
name: vector
5+
name: vector
6+
repo: https://helm.vector.dev
7+
version: 0.43.0
8+
releaseName: vector
9+
namespace: product-kubearchive-logging
10+
valuesFile: vector-helm-values.yaml
11+
additionalValuesFiles:
12+
- vector-helm-prod-values.yaml
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
---
2+
resources:
3+
requests:
4+
cpu: 512m
5+
memory: 4096Mi
6+
limits:
7+
cpu: 2000m
8+
memory: 4096Mi
9+
10+
customConfig:
11+
sources:
12+
k8s_logs:
13+
extra_label_selector: "app.kubernetes.io/managed-by in (tekton-pipelines,pipelinesascode.tekton.dev)"
14+
extra_field_selector: "metadata.namespace!=product-kubearchive-logging"
15+
16+
podLabels:
17+
vector.dev/exclude: "false"

0 commit comments

Comments
 (0)