Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions roles/kepler/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
Kepler

Kepler (Kubernetes-based Efficient Power Level Exporter) is a Prometheus exporter that measures energy consumption at the container, pod, VM, and process level by reading hardware sensors and attributing power based on resource utilization.

Kepler uses Intel RAPL (Running Average Power Limit) sensors to collect energy data from CPU packages, cores, and memory subsystems, then distributes this energy proportionally to workloads based on their CPU time consumption.

* GIT: https://github.com/sustainable-computing-io/kepler
* IMAGES: https://quay.io/repository/sustainable_computing_io/kepler
* WWW: https://sustainable-computing.io/
33 changes: 33 additions & 0 deletions roles/kepler/defaults/main.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
---
##########################
# operator

operator_user: "dragon"
operator_group: "{{ operator_user }}"

##########################
# docker

docker_network_mtu: 1500
docker_registry_kepler: "quay.io"

##########################
# kepler

kepler_configuration_directory: "/opt/kepler/configuration"
kepler_container_name: "kepler"
kepler_docker_compose_directory: "/opt/kepler"
kepler_exporter: "prometheus"
kepler_flags: "{{ kepler_flags_defaults + kepler_flags_extra }}"
kepler_flags_defaults: "--config.file=/etc/kepler/config.yaml"
kepler_flags_extras:
kepler_host: "0.0.0.0"
kepler_image: "{{ kepler_repository }}:{{ kepler_tag }}"
kepler_kubeconfig_directory: "/opt/kepler/kubeconfig"
kepler_network: "172.31.101.80/28"
kepler_port: 28282
kepler_port_container: "{{ kepler_port }}"
kepler_repository: "{{ docker_registry_kepler }}/sustainable_computing_io/kepler"
kepler_service_name: "docker-compose@kepler"
kepler_share_pids_with_host: true
kepler_tag: "v0.11.2"
10 changes: 10 additions & 0 deletions roles/kepler/handlers/main.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
---
- name: Restart kepler service
become: true
ansible.builtin.service:
name: "{{ kepler_service_name }}"
state: restarted
register: result
until: result["status"]["ActiveState"] == "active"
retries: 10
delay: 20
22 changes: 22 additions & 0 deletions roles/kepler/meta/main.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
---
galaxy_info:
author: Vinícius Zavam
description: Role osism.services.kepler
company: OSBA ECO:DIGIT
license: Apache License 2.0
min_ansible_version: 2.16.0
platforms:
- name: Ubuntu
versions:
- jammy
- noble
- name: Debian
versions:
- bookworm
- name: EL
versions:
- "9"
galaxy_tags:
- osism
- system
dependencies: []
22 changes: 22 additions & 0 deletions roles/kepler/tasks/config.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
---
- name: Create required directories
become: true
ansible.builtin.file:
path: "{{ item }}"
state: directory
owner: "{{ operator_user }}"
group: "{{ operator_group }}"
mode: 0750
loop:
- "{{ kepler_configuration_directory }}"
- "{{ kepler_docker_compose_directory }}"
- "{{ kepler_kubeconfig_directory }}"

- name: Copy configuration file
ansible.builtin.template:
src: config.yaml.j2
dest: "{{ kepler_configuration_directory }}/config.yaml"
owner: "{{ operator_user }}"
group: "{{ operator_group }}"
mode: 0640
notify: Restart kepler service
8 changes: 8 additions & 0 deletions roles/kepler/tasks/main.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
---
- name: Include config tasks
ansible.builtin.include_tasks: config.yml
tags: config

- name: Include service tasks
ansible.builtin.include_tasks: service.yml
tags: service
20 changes: 20 additions & 0 deletions roles/kepler/tasks/service.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
---
- name: Copy docker-compose.yml file
ansible.builtin.template:
src: docker-compose.yml.j2
dest: "{{ kepler_docker_compose_directory }}/docker-compose.yml"
owner: "{{ operator_user }}"
group: "{{ operator_group }}"
mode: 0640
notify: Restart kepler service

- name: Manage kepler service
become: true
ansible.builtin.service:
name: "{{ kepler_service_name }}"
state: started
enabled: true
register: result
until: result["status"]["ActiveState"] == "active"
retries: 10
delay: 20
83 changes: 83 additions & 0 deletions roles/kepler/templates/config.yaml.j2
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
---
log:
level: info # debug, info, warn, error (default: info)
format: text # text or json (default: text)

monitor:
# Interval is the monitor's refresh interval. All process that
# have a life time (gets spawned and killed within one monitor interval)
# less than the interval will be ignored.
#
# NOTE: setting the interval to 0s will disable the monitor refreshes
interval: 5s

# Staleness is the duration after the data that is computed by the monitor
# in each refresh is considered stale and recomputed when requested again.
#
# This is especially useful if you have multiple prometheus instances
# (in a HA) scrapping kepler and the data received by both instances will be
# the same as long as the scrapes happens within the staleness duration.
#
# NOTE: Keep staleness shorter than the monitor interval.
staleness: 1000ms

# maximum number of terminated workloads (process, container, VM, pods)
# to be kept in memory until the data is exported; 0 disables the limit
maxTerminated: 500

# minimum energy threshold (in joules) for terminated workloads
# terminated workloads with energy consumption below this threshold will be filtered out
minTerminatedEnergyThreshold: 10

host:
sysfs: /sys # Path to sysfs filesystem (default: /sys)
procfs: /proc # Path to procfs filesystem (default: /proc)

rapl:
zones: [] # zones to be enabled, empty enables all default zones (core, dram, package)

exporter:
stdout: # stdout exporter related config
enabled: false # disabled by default

prometheus: # prometheus exporter related config
enabled: true
# debugCollectors:
# - go
# - process
metricsLevel:
- node
- process
- container
- vm
- pod

debug: # debug related config
pprof: # pprof related config
enabled: false

web:
configFile: "" # Path to TLS server config file
listenAddresses: # Web server listen addresses
- {{ kepler_host }}:{{ kepler_port_container }}

kube: # kubernetes related config
enabled: false # enable kubernetes monitoring (default: false)
config: "" # path to kubeconfig file (optional if running in-cluster)
nodeName: "" # name of the kubernetes node (required when enabled)

# WARN DO NOT ENABLE THIS IN PRODUCTION - for development / testing only
dev:
fake-cpu-meter:
enabled: false
zones: [] # zones to be enabled, empty enables all default zones

# EXPERIMENTAL FEATURES - These features are experimental and may be unstable
# and are disabled by default
experimental:
platform:
redfish:
enabled: false # Enable experimental Redfish BMC power monitoring
configFile: hack/redfish.yaml # Path to Redfish BMC configuration file
nodeName: "" # Node name to use (overrides Kubernetes node name and hostname fallback)
httpTimeout: 5s # HTTP client timeout for BMC requests (default: 5s)
42 changes: 42 additions & 0 deletions roles/kepler/templates/docker-compose.yml.j2
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
---
services:
kepler:
container_name: "{{ kepler_container_name }}"
entrypoint: kepler {{ kepler_flags|join(" ") }}
image: "{{ kepler_image }}"
privileged: true
restart: unless-stopped
{% if kepler_share_pids_with_host %}
pid: host
{% endif %}
ports:
- "{{ kepler_host | ansible.utils.ipwrap }}:{{ kepler_port }}:{{ kepler_port_container }}/tcp"
volumes:
- type: bind
source: /proc
target: /host/proc
read_only: true
- type: bind
source: /sys
target: /host/sys
read_only: true
- type: bind
source: "{{ kepler_configuration_directory }}"
target: /etc/kepler
read_only: true
- type: bind
source: "{{ kepler_kubeconfig_directory }}"
target: /host/kube
read_only: true
command:
- --config.file=/etc/kepler/config.yaml

networks:
default:
driver: bridge
driver_opts:
com.docker.network.driver.mtu: {{ docker_network_mtu }}
ipam:
driver: default
config:
- subnet: {{ kepler_network }}