Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@
.vs/
.vscode/
.idea/
data/
9 changes: 4 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
# dockprom

A monitoring solution for Docker hosts and containers with [Prometheus](https://prometheus.io/), [Grafana](http://grafana.org/), [cAdvisor](https://github.com/google/cadvisor),
[NodeExporter](https://github.com/prometheus/node_exporter) and alerting with [AlertManager](https://github.com/prometheus/alertmanager).
A monitoring solution for Docker hosts and containers with [Prometheus](https://prometheus.io/), [Grafana](http://grafana.org/), [Loki](https://github.com/grafana/loki), [Tempo](https://github.com/grafana/tempo), [cAdvisor](https://github.com/google/cadvisor), [Alloy](https://github.com/grafana/alloy), [NodeExporter](https://github.com/prometheus/node_exporter) and alerting with [AlertManager](https://github.com/prometheus/alertmanager).

## Install

Expand Down Expand Up @@ -304,10 +303,10 @@ Please replace the `user:password` part with your user and password set in the i

[In Grafana versions >= 5.1 the id of the grafana user has been changed](http://docs.grafana.org/installation/docker/#migration-from-a-previous-version-of-the-docker-container-to-5-1-or-later). Unfortunately this means that files created prior to 5.1 won’t have the correct permissions for later versions.

| Version | User | User ID |
|:-------:|:-------:|:-------:|
| Version | User | User ID |
| :-----: | :-----: | :-----: |
| < 5.1 | grafana | 104 |
| \>= 5.1 | grafana | 472 |
| \>= 5.1 | grafana | 472 |

There are two possible solutions to this problem.

Expand Down
48 changes: 48 additions & 0 deletions alloy/config.alloy
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
///////////////////////////////////////////////////////////////////////////////
// Configuration file
// Reference: https://github.com/grafana/loki/blob/main/examples/getting-started/alloy-local-config.yaml
// Reference: https://github.com/grafana/intro-to-mltp/blob/main/alloy/config.alloy

discovery.docker "flog_scrape" {
host = "unix:///var/run/docker.sock"
refresh_interval = "5s"
}

discovery.relabel "flog_scrape" {
targets = []

rule {
source_labels = ["__meta_docker_container_name"]
regex = "/(.*)"
target_label = "container"
}

rule {
source_labels = ["__meta_docker_container_label_com_docker_compose_service"]
target_label = "service"
}

rule {
source_labels = ["__meta_docker_container_state_status"]
target_label = "status"
}
}

loki.source.docker "flog_scrape" {
host = "unix:///var/run/docker.sock"
targets = discovery.docker.flog_scrape.targets
forward_to = [loki.write.to_loki.receiver]
relabel_rules = discovery.relabel.flog_scrape.rules
refresh_interval = "5s"
}


loki.write "to_loki" {
endpoint {
url = string.format(
"http://%s/loki/api/v1/push",
coalesce(sys.env("LOKI_HOST"), "localhost:3100"),
)
}
external_labels = {}
}
20 changes: 16 additions & 4 deletions docker-compose.exporters.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
services:

nodeexporter:
image: prom/node-exporter:v1.8.2
container_name: nodeexporter
Expand All @@ -15,7 +14,7 @@ services:
restart: unless-stopped
network_mode: host
labels:
org.label-schema.group: "monitoring"
org.label-schema.group: 'monitoring'

cadvisor:
image: gcr.io/cadvisor/cadvisor:v0.51.0
Expand All @@ -32,6 +31,19 @@ services:
restart: unless-stopped
network_mode: host
labels:
org.label-schema.group: "monitoring"

org.label-schema.group: 'monitoring'

alloy:
image: grafana/alloy:v1.7.5
container_name: alloy
volumes:
- './alloy/config.alloy:/etc/alloy/config.alloy'
- /var/lib/docker/containers:/var/lib/docker/containers:ro
- /var/run/docker.sock:/var/run/docker.sock:ro
command: ['run', '--server.http.listen-addr=0.0.0.0:12345', '--stability.level=public-preview', '/etc/alloy/config.alloy']
restart: unless-stopped
environment:
- LOKI_HOST=loki:3100
network_mode: host
labels:
org.label-schema.group: 'monitoring'
96 changes: 96 additions & 0 deletions docker-compose.loki.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
networks:
monitor-net:
driver: bridge

services:
otel-collector:
image: otel/opentelemetry-collector:0.123.0
container_name: otel-collector
restart: unless-stopped
command: ['--config=/etc/otel-collector.yaml']
volumes:
- ./otel/otel-collector.yaml:/etc/otel-collector.yaml
ports:
- '1888:1888' # pprof extension
- '8888:8888' # Prometheus metrics exposed by the collector
- '8889:8889' # Prometheus exporter metrics
- '13133:13133' # health_check extension
- '4317:4317' # OTLP gRPC receiver
- '4318:4318' # OTLP HTTP receiver
- '55679:55679' # zpages extension
depends_on:
- tempo
- loki
networks:
- monitor-net
labels:
org.label-schema.group: 'monitoring'

alloy:
image: grafana/alloy:v1.7.5
container_name: alloy
volumes:
- './alloy/config.alloy:/etc/alloy/config.alloy'
- /var/lib/docker/containers:/var/lib/docker/containers:ro
- /var/run/docker.sock:/var/run/docker.sock:ro
command: ['run', '--server.http.listen-addr=0.0.0.0:12345', '--stability.level=public-preview', '/etc/alloy/config.alloy']
restart: unless-stopped
expose:
- 12345
environment:
- LOKI_HOST=loki:3100
depends_on:
- otel-collector
- loki
networks:
- monitor-net
labels:
org.label-schema.group: 'monitoring'

tempo:
image: grafana/tempo:2.7.2
container_name: tempo
restart: unless-stopped
command: ['-config.file=/etc/tempo.yaml']
volumes:
- ./tempo/tempo.yaml:/etc/tempo.yaml
- ./data/tempo:/var/tempo
expose:
- 14268
- 3200
- 4317
- 4318
- 9411
ports:
- '14268' # jaeger ingest
- '3200' # tempo
- '4317' # otlp grpc
- '4318' # otlp http
- '9411' # zipkin
networks:
- monitor-net
labels:
org.label-schema.group: 'monitoring'

loki:
image: grafana/loki:3.4
container_name: loki
restart: unless-stopped
user: root
command: -config.file=/etc/loki/loki.yaml -config.expand-env=true
expose:
- 3100
ports:
- '3100:3100' # loki needs to be exposed so it receives logs
environment:
- JAEGER_AGENT_HOST=tempo
- JAEGER_ENDPOINT=http://tempo:14268/api/traces # send traces to Tempo
- JAEGER_SAMPLER_TYPE=const
- JAEGER_SAMPLER_PARAM=1
volumes:
- ./loki/loki.yaml:/etc/loki/loki.yaml
- ./data/loki:/tmp/loki
networks:
- monitor-net
labels:
org.label-schema.group: 'monitoring'
29 changes: 14 additions & 15 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,10 @@ networks:
driver: bridge

volumes:
prometheus_data: {}
grafana_data: {}
prometheus_data: {}
grafana_data: {}

services:

prometheus:
image: prom/prometheus:v3.1.0
container_name: prometheus
Expand All @@ -27,7 +26,7 @@ services:
networks:
- monitor-net
labels:
org.label-schema.group: "monitoring"
org.label-schema.group: 'monitoring'

alertmanager:
image: prom/alertmanager:v0.28.0
Expand All @@ -43,7 +42,7 @@ services:
networks:
- monitor-net
labels:
org.label-schema.group: "monitoring"
org.label-schema.group: 'monitoring'

nodeexporter:
image: prom/node-exporter:v1.8.2
Expand All @@ -63,7 +62,7 @@ services:
networks:
- monitor-net
labels:
org.label-schema.group: "monitoring"
org.label-schema.group: 'monitoring'

cadvisor:
image: gcr.io/cadvisor/cadvisor:v0.51.0
Expand All @@ -83,7 +82,7 @@ services:
networks:
- monitor-net
labels:
org.label-schema.group: "monitoring"
org.label-schema.group: 'monitoring'

grafana:
image: grafana/grafana:11.5.1
Expand All @@ -102,7 +101,7 @@ services:
networks:
- monitor-net
labels:
org.label-schema.group: "monitoring"
org.label-schema.group: 'monitoring'

pushgateway:
image: prom/pushgateway:v1.11.0
Expand All @@ -113,17 +112,17 @@ services:
networks:
- monitor-net
labels:
org.label-schema.group: "monitoring"
org.label-schema.group: 'monitoring'

caddy:
image: caddy:2.9.1
container_name: caddy
ports:
- "3000:3000"
- "8080:8080"
- "9090:9090"
- "9093:9093"
- "9091:9091"
- '3000:3000'
- '8080:8080'
- '9090:9090'
- '9093:9093'
- '9091:9091'
volumes:
- ./caddy:/etc/caddy
environment:
Expand All @@ -134,4 +133,4 @@ services:
networks:
- monitor-net
labels:
org.label-schema.group: "monitoring"
org.label-schema.group: 'monitoring'
66 changes: 65 additions & 1 deletion grafana/provisioning/datasources/datasource.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,68 @@ datasources:
url: http://prometheus:9090
basicAuth: false
isDefault: true
editable: true
editable: true

- name: Tempo
type: tempo
access: proxy
orgId: 1
url: http://tempo:3200
basicAuth: false
isDefault: false
version: 1
editable: true
apiVersion: 1
uid: tempo
jsonData:
tracesToLogsV2:
# Field with an internal link pointing to a logs data source in Grafana.
# datasourceUid value must match the uid value of the logs data source.
datasourceUid: 'loki'
spanStartTimeShift: '1h'
spanEndTimeShift: '-1h'
tags: ['job', 'instance', 'pod', 'namespace', 'app']
filterByTraceID: false
filterBySpanID: false
customQuery: true
query: 'method="${__span.tags.method}"'
tracesToMetrics:
datasourceUid: 'prom'
spanStartTimeShift: '1h'
spanEndTimeShift: '-1h'
tags: [{ key: 'service.name', value: 'service' }, { key: 'job' }]
queries:
- name: 'Sample query'
query: 'sum(rate(traces_spanmetrics_latency_bucket{$$__tags}[5m]))'
serviceMap:
datasourceUid: 'prometheus'
nodeGraph:
enabled: true
search:
hide: false
lokiSearch:
datasourceUid: 'loki'
traceQuery:
timeShiftEnabled: true
spanStartTimeShift: '1h'
spanEndTimeShift: '-1h'
spanBar:
type: 'Tag'
tag: 'http.path'

- name: Loki
type: loki
access: proxy
orgId: 1
url: http://loki:3100
basicAuth: false
isDefault: false
version: 1
editable: false
apiVersion: 1
jsonData:
derivedFields:
- datasourceUid: tempo
matcherRegex: '"traceId":"([A-Za-z0-9]+)"'
name: TraceID
url: $${__value.raw}
Loading