From bc5bcf28a4152c0b9051f205834256779efc0f1f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vin=C3=ADcius=20Zavam?= Date: Wed, 8 Oct 2025 15:09:36 -0300 Subject: [PATCH 1/8] feat: create new role to deploy kepler MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Vinícius Zavam --- roles/kepler/README.md | 9 +++ roles/kepler/defaults/main.yml | 33 ++++++++ roles/kepler/handlers/main.yml | 10 +++ roles/kepler/meta/main.yml | 22 ++++++ roles/kepler/tasks/config.yml | 22 ++++++ roles/kepler/tasks/main.yml | 8 ++ roles/kepler/tasks/service.yml | 20 +++++ roles/kepler/templates/config.yaml.j2 | 83 ++++++++++++++++++++ roles/kepler/templates/docker-compose.yml.j2 | 42 ++++++++++ 9 files changed, 249 insertions(+) create mode 100644 roles/kepler/README.md create mode 100644 roles/kepler/defaults/main.yml create mode 100644 roles/kepler/handlers/main.yml create mode 100644 roles/kepler/meta/main.yml create mode 100644 roles/kepler/tasks/config.yml create mode 100644 roles/kepler/tasks/main.yml create mode 100644 roles/kepler/tasks/service.yml create mode 100644 roles/kepler/templates/config.yaml.j2 create mode 100644 roles/kepler/templates/docker-compose.yml.j2 diff --git a/roles/kepler/README.md b/roles/kepler/README.md new file mode 100644 index 000000000..3b9634cd5 --- /dev/null +++ b/roles/kepler/README.md @@ -0,0 +1,9 @@ +Kepler + +Kepler (Kubernetes-based Efficient Power Level Exporter) is a Prometheus exporter that measures energy consumption at the container, pod, VM, and process level by reading hardware sensors and attributing power based on resource utilization. + +Kepler uses Intel RAPL (Running Average Power Limit) sensors to collect energy data from CPU packages, cores, and memory subsystems, then distributes this energy proportionally to workloads based on their CPU time consumption. + + * GIT: https://github.com/sustainable-computing-io/kepler + * IMAGES: https://quay.io/repository/sustainable_computing_io/kepler + * WWW: https://sustainable-computing.io/ diff --git a/roles/kepler/defaults/main.yml b/roles/kepler/defaults/main.yml new file mode 100644 index 000000000..52b0ce934 --- /dev/null +++ b/roles/kepler/defaults/main.yml @@ -0,0 +1,33 @@ +--- +########################## +# operator + +operator_user: "dragon" +operator_group: "{{ operator_user }}" + +########################## +# docker + +docker_network_mtu: 1500 +docker_registry_kepler: "quay.io" + +########################## +# kepler + +kepler_configuration_directory: "/opt/kepler/configuration" +kepler_container_name: "kepler" +kepler_docker_compose_directory: "/opt/kepler" +kepler_exporter: "prometheus" +kepler_flags: "{{ kepler_flags_defaults + kepler_flags_extra }}" +kepler_flags_defaults: "--config.file=/etc/kepler/config.yaml" +kepler_flags_extras: +kepler_host: "0.0.0.0" +kepler_image: "{{ kepler_repository }}:{{ kepler_tag }}" +kepler_kubeconfig_directory: "/opt/kepler/kubeconfig" +kepler_network: "172.31.101.80/28" +kepler_port: 28282 +kepler_port_container: "{{ kepler_port }}" +kepler_repository: "{{ docker_registry_kepler }}/sustainable_computing_io/kepler" +kepler_service_name: "docker-compose@kepler" +kepler_share_pids_with_host: true +kepler_tag: "v0.11.2" diff --git a/roles/kepler/handlers/main.yml b/roles/kepler/handlers/main.yml new file mode 100644 index 000000000..dc192063a --- /dev/null +++ b/roles/kepler/handlers/main.yml @@ -0,0 +1,10 @@ +--- +- name: Restart kepler service + become: true + ansible.builtin.service: + name: "{{ kepler_service_name }}" + state: restarted + register: result + until: result["status"]["ActiveState"] == "active" + retries: 10 + delay: 20 diff --git a/roles/kepler/meta/main.yml b/roles/kepler/meta/main.yml new file mode 100644 index 000000000..1d3cde852 --- /dev/null +++ b/roles/kepler/meta/main.yml @@ -0,0 +1,22 @@ +--- +galaxy_info: + author: Vinícius Zavam + description: Role osism.services.kepler + company: OSBA ECO:DIGIT + license: Apache License 2.0 + min_ansible_version: 2.16.0 + platforms: + - name: Ubuntu + versions: + - jammy + - noble + - name: Debian + versions: + - bookworm + - name: EL + versions: + - "9" + galaxy_tags: + - osism + - system +dependencies: [] diff --git a/roles/kepler/tasks/config.yml b/roles/kepler/tasks/config.yml new file mode 100644 index 000000000..6a9814433 --- /dev/null +++ b/roles/kepler/tasks/config.yml @@ -0,0 +1,22 @@ +--- +- name: Create required directories + become: true + ansible.builtin.file: + path: "{{ item }}" + state: directory + owner: "{{ operator_user }}" + group: "{{ operator_group }}" + mode: 0750 + loop: + - "{{ kepler_configuration_directory }}" + - "{{ kepler_docker_compose_directory }}" + - "{{ kepler_kubeconfig_directory }}" + +- name: Copy configuration file + ansible.builtin.template: + src: config.yaml.j2 + dest: "{{ kepler_configuration_directory }}/config.yaml" + owner: "{{ operator_user }}" + group: "{{ operator_group }}" + mode: 0640 + notify: Restart kepler service diff --git a/roles/kepler/tasks/main.yml b/roles/kepler/tasks/main.yml new file mode 100644 index 000000000..c906e0e1f --- /dev/null +++ b/roles/kepler/tasks/main.yml @@ -0,0 +1,8 @@ +--- +- name: Include config tasks + ansible.builtin.include_tasks: config.yml + tags: config + +- name: Include service tasks + ansible.builtin.include_tasks: service.yml + tags: service diff --git a/roles/kepler/tasks/service.yml b/roles/kepler/tasks/service.yml new file mode 100644 index 000000000..d8757ad1a --- /dev/null +++ b/roles/kepler/tasks/service.yml @@ -0,0 +1,20 @@ +--- +- name: Copy docker-compose.yml file + ansible.builtin.template: + src: docker-compose.yml.j2 + dest: "{{ kepler_docker_compose_directory }}/docker-compose.yml" + owner: "{{ operator_user }}" + group: "{{ operator_group }}" + mode: 0640 + notify: Restart kepler service + +- name: Manage kepler service + become: true + ansible.builtin.service: + name: "{{ kepler_service_name }}" + state: started + enabled: true + register: result + until: result["status"]["ActiveState"] == "active" + retries: 10 + delay: 20 diff --git a/roles/kepler/templates/config.yaml.j2 b/roles/kepler/templates/config.yaml.j2 new file mode 100644 index 000000000..037e5df9f --- /dev/null +++ b/roles/kepler/templates/config.yaml.j2 @@ -0,0 +1,83 @@ +--- +log: + level: info # debug, info, warn, error (default: info) + format: text # text or json (default: text) + +monitor: + # Interval is the monitor's refresh interval. All process that + # have a life time (gets spawned and killed within one monitor interval) + # less than the interval will be ignored. + # + # NOTE: setting the interval to 0s will disable the monitor refreshes + interval: 5s + + # Staleness is the duration after the data that is computed by the monitor + # in each refresh is considered stale and recomputed when requested again. + # + # This is especially useful if you have multiple prometheus instances + # (in a HA) scrapping kepler and the data received by both instances will be + # the same as long as the scrapes happens within the staleness duration. + # + # NOTE: Keep staleness shorter than the monitor interval. + staleness: 1000ms + + # maximum number of terminated workloads (process, container, VM, pods) + # to be kept in memory until the data is exported; 0 disables the limit + maxTerminated: 500 + + # minimum energy threshold (in joules) for terminated workloads + # terminated workloads with energy consumption below this threshold will be filtered out + minTerminatedEnergyThreshold: 10 + +host: + sysfs: /sys # Path to sysfs filesystem (default: /sys) + procfs: /proc # Path to procfs filesystem (default: /proc) + +rapl: + zones: [] # zones to be enabled, empty enables all default zones (core, dram, package) + +exporter: + stdout: # stdout exporter related config + enabled: false # disabled by default + + prometheus: # prometheus exporter related config + enabled: true + # debugCollectors: + # - go + # - process + metricsLevel: + - node + - process + - container + - vm + - pod + +debug: # debug related config + pprof: # pprof related config + enabled: false + +web: + configFile: "" # Path to TLS server config file + listenAddresses: # Web server listen addresses + - {{ kepler_host }}:{{ kepler_port_container }} + +kube: # kubernetes related config + enabled: false # enable kubernetes monitoring (default: false) + config: "" # path to kubeconfig file (optional if running in-cluster) + nodeName: "" # name of the kubernetes node (required when enabled) + +# WARN DO NOT ENABLE THIS IN PRODUCTION - for development / testing only +dev: + fake-cpu-meter: + enabled: false + zones: [] # zones to be enabled, empty enables all default zones + +# EXPERIMENTAL FEATURES - These features are experimental and may be unstable +# and are disabled by default +experimental: + platform: + redfish: + enabled: false # Enable experimental Redfish BMC power monitoring + configFile: hack/redfish.yaml # Path to Redfish BMC configuration file + nodeName: "" # Node name to use (overrides Kubernetes node name and hostname fallback) + httpTimeout: 5s # HTTP client timeout for BMC requests (default: 5s) diff --git a/roles/kepler/templates/docker-compose.yml.j2 b/roles/kepler/templates/docker-compose.yml.j2 new file mode 100644 index 000000000..f78cada24 --- /dev/null +++ b/roles/kepler/templates/docker-compose.yml.j2 @@ -0,0 +1,42 @@ +--- +services: + kepler: + container_name: "{{ kepler_container_name }}" + entrypoint: kepler {{ kepler_flags|join(" ") }} + image: "{{ kepler_image }}" + privileged: true + restart: unless-stopped +{% if kepler_share_pids_with_host %} + pid: host +{% endif %} + ports: + - "{{ kepler_host | ansible.utils.ipwrap }}:{{ kepler_port }}:{{ kepler_port_container }}/tcp" + volumes: + - type: bind + source: /proc + target: /host/proc + read_only: true + - type: bind + source: /sys + target: /host/sys + read_only: true + - type: bind + source: "{{ kepler_configuration_directory }}" + target: /etc/kepler + read_only: true + - type: bind + source: "{{ kepler_kubeconfig_directory }}" + target: /host/kube + read_only: true + command: + - --config.file=/etc/kepler/config.yaml + +networks: + default: + driver: bridge + driver_opts: + com.docker.network.driver.mtu: {{ docker_network_mtu }} + ipam: + driver: default + config: + - subnet: {{ kepler_network }} From ab43504918d40c0bdb0ffed2fd37650ecb780d30 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vin=C3=ADcius=20Zavam?= Date: Wed, 8 Oct 2025 15:21:42 -0300 Subject: [PATCH 2/8] Revert "feat: create new role to deploy kepler" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 51a9d5e79e659d1774797b47d2261f83e1bea8d8. Signed-off-by: Vinícius Zavam --- roles/kepler/README.md | 9 --- roles/kepler/defaults/main.yml | 33 -------- roles/kepler/handlers/main.yml | 10 --- roles/kepler/meta/main.yml | 22 ------ roles/kepler/tasks/config.yml | 22 ------ roles/kepler/tasks/main.yml | 8 -- roles/kepler/tasks/service.yml | 20 ----- roles/kepler/templates/config.yaml.j2 | 83 -------------------- roles/kepler/templates/docker-compose.yml.j2 | 42 ---------- 9 files changed, 249 deletions(-) delete mode 100644 roles/kepler/README.md delete mode 100644 roles/kepler/defaults/main.yml delete mode 100644 roles/kepler/handlers/main.yml delete mode 100644 roles/kepler/meta/main.yml delete mode 100644 roles/kepler/tasks/config.yml delete mode 100644 roles/kepler/tasks/main.yml delete mode 100644 roles/kepler/tasks/service.yml delete mode 100644 roles/kepler/templates/config.yaml.j2 delete mode 100644 roles/kepler/templates/docker-compose.yml.j2 diff --git a/roles/kepler/README.md b/roles/kepler/README.md deleted file mode 100644 index 3b9634cd5..000000000 --- a/roles/kepler/README.md +++ /dev/null @@ -1,9 +0,0 @@ -Kepler - -Kepler (Kubernetes-based Efficient Power Level Exporter) is a Prometheus exporter that measures energy consumption at the container, pod, VM, and process level by reading hardware sensors and attributing power based on resource utilization. - -Kepler uses Intel RAPL (Running Average Power Limit) sensors to collect energy data from CPU packages, cores, and memory subsystems, then distributes this energy proportionally to workloads based on their CPU time consumption. - - * GIT: https://github.com/sustainable-computing-io/kepler - * IMAGES: https://quay.io/repository/sustainable_computing_io/kepler - * WWW: https://sustainable-computing.io/ diff --git a/roles/kepler/defaults/main.yml b/roles/kepler/defaults/main.yml deleted file mode 100644 index 52b0ce934..000000000 --- a/roles/kepler/defaults/main.yml +++ /dev/null @@ -1,33 +0,0 @@ ---- -########################## -# operator - -operator_user: "dragon" -operator_group: "{{ operator_user }}" - -########################## -# docker - -docker_network_mtu: 1500 -docker_registry_kepler: "quay.io" - -########################## -# kepler - -kepler_configuration_directory: "/opt/kepler/configuration" -kepler_container_name: "kepler" -kepler_docker_compose_directory: "/opt/kepler" -kepler_exporter: "prometheus" -kepler_flags: "{{ kepler_flags_defaults + kepler_flags_extra }}" -kepler_flags_defaults: "--config.file=/etc/kepler/config.yaml" -kepler_flags_extras: -kepler_host: "0.0.0.0" -kepler_image: "{{ kepler_repository }}:{{ kepler_tag }}" -kepler_kubeconfig_directory: "/opt/kepler/kubeconfig" -kepler_network: "172.31.101.80/28" -kepler_port: 28282 -kepler_port_container: "{{ kepler_port }}" -kepler_repository: "{{ docker_registry_kepler }}/sustainable_computing_io/kepler" -kepler_service_name: "docker-compose@kepler" -kepler_share_pids_with_host: true -kepler_tag: "v0.11.2" diff --git a/roles/kepler/handlers/main.yml b/roles/kepler/handlers/main.yml deleted file mode 100644 index dc192063a..000000000 --- a/roles/kepler/handlers/main.yml +++ /dev/null @@ -1,10 +0,0 @@ ---- -- name: Restart kepler service - become: true - ansible.builtin.service: - name: "{{ kepler_service_name }}" - state: restarted - register: result - until: result["status"]["ActiveState"] == "active" - retries: 10 - delay: 20 diff --git a/roles/kepler/meta/main.yml b/roles/kepler/meta/main.yml deleted file mode 100644 index 1d3cde852..000000000 --- a/roles/kepler/meta/main.yml +++ /dev/null @@ -1,22 +0,0 @@ ---- -galaxy_info: - author: Vinícius Zavam - description: Role osism.services.kepler - company: OSBA ECO:DIGIT - license: Apache License 2.0 - min_ansible_version: 2.16.0 - platforms: - - name: Ubuntu - versions: - - jammy - - noble - - name: Debian - versions: - - bookworm - - name: EL - versions: - - "9" - galaxy_tags: - - osism - - system -dependencies: [] diff --git a/roles/kepler/tasks/config.yml b/roles/kepler/tasks/config.yml deleted file mode 100644 index 6a9814433..000000000 --- a/roles/kepler/tasks/config.yml +++ /dev/null @@ -1,22 +0,0 @@ ---- -- name: Create required directories - become: true - ansible.builtin.file: - path: "{{ item }}" - state: directory - owner: "{{ operator_user }}" - group: "{{ operator_group }}" - mode: 0750 - loop: - - "{{ kepler_configuration_directory }}" - - "{{ kepler_docker_compose_directory }}" - - "{{ kepler_kubeconfig_directory }}" - -- name: Copy configuration file - ansible.builtin.template: - src: config.yaml.j2 - dest: "{{ kepler_configuration_directory }}/config.yaml" - owner: "{{ operator_user }}" - group: "{{ operator_group }}" - mode: 0640 - notify: Restart kepler service diff --git a/roles/kepler/tasks/main.yml b/roles/kepler/tasks/main.yml deleted file mode 100644 index c906e0e1f..000000000 --- a/roles/kepler/tasks/main.yml +++ /dev/null @@ -1,8 +0,0 @@ ---- -- name: Include config tasks - ansible.builtin.include_tasks: config.yml - tags: config - -- name: Include service tasks - ansible.builtin.include_tasks: service.yml - tags: service diff --git a/roles/kepler/tasks/service.yml b/roles/kepler/tasks/service.yml deleted file mode 100644 index d8757ad1a..000000000 --- a/roles/kepler/tasks/service.yml +++ /dev/null @@ -1,20 +0,0 @@ ---- -- name: Copy docker-compose.yml file - ansible.builtin.template: - src: docker-compose.yml.j2 - dest: "{{ kepler_docker_compose_directory }}/docker-compose.yml" - owner: "{{ operator_user }}" - group: "{{ operator_group }}" - mode: 0640 - notify: Restart kepler service - -- name: Manage kepler service - become: true - ansible.builtin.service: - name: "{{ kepler_service_name }}" - state: started - enabled: true - register: result - until: result["status"]["ActiveState"] == "active" - retries: 10 - delay: 20 diff --git a/roles/kepler/templates/config.yaml.j2 b/roles/kepler/templates/config.yaml.j2 deleted file mode 100644 index 037e5df9f..000000000 --- a/roles/kepler/templates/config.yaml.j2 +++ /dev/null @@ -1,83 +0,0 @@ ---- -log: - level: info # debug, info, warn, error (default: info) - format: text # text or json (default: text) - -monitor: - # Interval is the monitor's refresh interval. All process that - # have a life time (gets spawned and killed within one monitor interval) - # less than the interval will be ignored. - # - # NOTE: setting the interval to 0s will disable the monitor refreshes - interval: 5s - - # Staleness is the duration after the data that is computed by the monitor - # in each refresh is considered stale and recomputed when requested again. - # - # This is especially useful if you have multiple prometheus instances - # (in a HA) scrapping kepler and the data received by both instances will be - # the same as long as the scrapes happens within the staleness duration. - # - # NOTE: Keep staleness shorter than the monitor interval. - staleness: 1000ms - - # maximum number of terminated workloads (process, container, VM, pods) - # to be kept in memory until the data is exported; 0 disables the limit - maxTerminated: 500 - - # minimum energy threshold (in joules) for terminated workloads - # terminated workloads with energy consumption below this threshold will be filtered out - minTerminatedEnergyThreshold: 10 - -host: - sysfs: /sys # Path to sysfs filesystem (default: /sys) - procfs: /proc # Path to procfs filesystem (default: /proc) - -rapl: - zones: [] # zones to be enabled, empty enables all default zones (core, dram, package) - -exporter: - stdout: # stdout exporter related config - enabled: false # disabled by default - - prometheus: # prometheus exporter related config - enabled: true - # debugCollectors: - # - go - # - process - metricsLevel: - - node - - process - - container - - vm - - pod - -debug: # debug related config - pprof: # pprof related config - enabled: false - -web: - configFile: "" # Path to TLS server config file - listenAddresses: # Web server listen addresses - - {{ kepler_host }}:{{ kepler_port_container }} - -kube: # kubernetes related config - enabled: false # enable kubernetes monitoring (default: false) - config: "" # path to kubeconfig file (optional if running in-cluster) - nodeName: "" # name of the kubernetes node (required when enabled) - -# WARN DO NOT ENABLE THIS IN PRODUCTION - for development / testing only -dev: - fake-cpu-meter: - enabled: false - zones: [] # zones to be enabled, empty enables all default zones - -# EXPERIMENTAL FEATURES - These features are experimental and may be unstable -# and are disabled by default -experimental: - platform: - redfish: - enabled: false # Enable experimental Redfish BMC power monitoring - configFile: hack/redfish.yaml # Path to Redfish BMC configuration file - nodeName: "" # Node name to use (overrides Kubernetes node name and hostname fallback) - httpTimeout: 5s # HTTP client timeout for BMC requests (default: 5s) diff --git a/roles/kepler/templates/docker-compose.yml.j2 b/roles/kepler/templates/docker-compose.yml.j2 deleted file mode 100644 index f78cada24..000000000 --- a/roles/kepler/templates/docker-compose.yml.j2 +++ /dev/null @@ -1,42 +0,0 @@ ---- -services: - kepler: - container_name: "{{ kepler_container_name }}" - entrypoint: kepler {{ kepler_flags|join(" ") }} - image: "{{ kepler_image }}" - privileged: true - restart: unless-stopped -{% if kepler_share_pids_with_host %} - pid: host -{% endif %} - ports: - - "{{ kepler_host | ansible.utils.ipwrap }}:{{ kepler_port }}:{{ kepler_port_container }}/tcp" - volumes: - - type: bind - source: /proc - target: /host/proc - read_only: true - - type: bind - source: /sys - target: /host/sys - read_only: true - - type: bind - source: "{{ kepler_configuration_directory }}" - target: /etc/kepler - read_only: true - - type: bind - source: "{{ kepler_kubeconfig_directory }}" - target: /host/kube - read_only: true - command: - - --config.file=/etc/kepler/config.yaml - -networks: - default: - driver: bridge - driver_opts: - com.docker.network.driver.mtu: {{ docker_network_mtu }} - ipam: - driver: default - config: - - subnet: {{ kepler_network }} From ac56752c42681dcbea4cf6265189355d342c2d14 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vin=C3=ADcius=20Zavam?= Date: Wed, 8 Oct 2025 15:22:07 -0300 Subject: [PATCH 3/8] roles: create new dir for the kepler role MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Vinícius Zavam --- roles/kepler/README.md | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100644 roles/kepler/README.md diff --git a/roles/kepler/README.md b/roles/kepler/README.md new file mode 100644 index 000000000..3b9634cd5 --- /dev/null +++ b/roles/kepler/README.md @@ -0,0 +1,9 @@ +Kepler + +Kepler (Kubernetes-based Efficient Power Level Exporter) is a Prometheus exporter that measures energy consumption at the container, pod, VM, and process level by reading hardware sensors and attributing power based on resource utilization. + +Kepler uses Intel RAPL (Running Average Power Limit) sensors to collect energy data from CPU packages, cores, and memory subsystems, then distributes this energy proportionally to workloads based on their CPU time consumption. + + * GIT: https://github.com/sustainable-computing-io/kepler + * IMAGES: https://quay.io/repository/sustainable_computing_io/kepler + * WWW: https://sustainable-computing.io/ From a987fb025ca1bc77ba4762c2443fe20fcfed12e6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vin=C3=ADcius=20Zavam?= Date: Wed, 8 Oct 2025 15:22:55 -0300 Subject: [PATCH 4/8] roles(kepler): init default vars MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Vinícius Zavam --- roles/kepler/defaults/main.yml | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 roles/kepler/defaults/main.yml diff --git a/roles/kepler/defaults/main.yml b/roles/kepler/defaults/main.yml new file mode 100644 index 000000000..52b0ce934 --- /dev/null +++ b/roles/kepler/defaults/main.yml @@ -0,0 +1,33 @@ +--- +########################## +# operator + +operator_user: "dragon" +operator_group: "{{ operator_user }}" + +########################## +# docker + +docker_network_mtu: 1500 +docker_registry_kepler: "quay.io" + +########################## +# kepler + +kepler_configuration_directory: "/opt/kepler/configuration" +kepler_container_name: "kepler" +kepler_docker_compose_directory: "/opt/kepler" +kepler_exporter: "prometheus" +kepler_flags: "{{ kepler_flags_defaults + kepler_flags_extra }}" +kepler_flags_defaults: "--config.file=/etc/kepler/config.yaml" +kepler_flags_extras: +kepler_host: "0.0.0.0" +kepler_image: "{{ kepler_repository }}:{{ kepler_tag }}" +kepler_kubeconfig_directory: "/opt/kepler/kubeconfig" +kepler_network: "172.31.101.80/28" +kepler_port: 28282 +kepler_port_container: "{{ kepler_port }}" +kepler_repository: "{{ docker_registry_kepler }}/sustainable_computing_io/kepler" +kepler_service_name: "docker-compose@kepler" +kepler_share_pids_with_host: true +kepler_tag: "v0.11.2" From 7e837fa2dbc33ac36732b6ef64889d5ca6548838 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vin=C3=ADcius=20Zavam?= Date: Wed, 8 Oct 2025 15:23:45 -0300 Subject: [PATCH 5/8] roles(kepler): create templates for kepler and docker-compose MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Vinícius Zavam --- roles/kepler/templates/config.yaml.j2 | 83 ++++++++++++++++++++ roles/kepler/templates/docker-compose.yml.j2 | 42 ++++++++++ 2 files changed, 125 insertions(+) create mode 100644 roles/kepler/templates/config.yaml.j2 create mode 100644 roles/kepler/templates/docker-compose.yml.j2 diff --git a/roles/kepler/templates/config.yaml.j2 b/roles/kepler/templates/config.yaml.j2 new file mode 100644 index 000000000..037e5df9f --- /dev/null +++ b/roles/kepler/templates/config.yaml.j2 @@ -0,0 +1,83 @@ +--- +log: + level: info # debug, info, warn, error (default: info) + format: text # text or json (default: text) + +monitor: + # Interval is the monitor's refresh interval. All process that + # have a life time (gets spawned and killed within one monitor interval) + # less than the interval will be ignored. + # + # NOTE: setting the interval to 0s will disable the monitor refreshes + interval: 5s + + # Staleness is the duration after the data that is computed by the monitor + # in each refresh is considered stale and recomputed when requested again. + # + # This is especially useful if you have multiple prometheus instances + # (in a HA) scrapping kepler and the data received by both instances will be + # the same as long as the scrapes happens within the staleness duration. + # + # NOTE: Keep staleness shorter than the monitor interval. + staleness: 1000ms + + # maximum number of terminated workloads (process, container, VM, pods) + # to be kept in memory until the data is exported; 0 disables the limit + maxTerminated: 500 + + # minimum energy threshold (in joules) for terminated workloads + # terminated workloads with energy consumption below this threshold will be filtered out + minTerminatedEnergyThreshold: 10 + +host: + sysfs: /sys # Path to sysfs filesystem (default: /sys) + procfs: /proc # Path to procfs filesystem (default: /proc) + +rapl: + zones: [] # zones to be enabled, empty enables all default zones (core, dram, package) + +exporter: + stdout: # stdout exporter related config + enabled: false # disabled by default + + prometheus: # prometheus exporter related config + enabled: true + # debugCollectors: + # - go + # - process + metricsLevel: + - node + - process + - container + - vm + - pod + +debug: # debug related config + pprof: # pprof related config + enabled: false + +web: + configFile: "" # Path to TLS server config file + listenAddresses: # Web server listen addresses + - {{ kepler_host }}:{{ kepler_port_container }} + +kube: # kubernetes related config + enabled: false # enable kubernetes monitoring (default: false) + config: "" # path to kubeconfig file (optional if running in-cluster) + nodeName: "" # name of the kubernetes node (required when enabled) + +# WARN DO NOT ENABLE THIS IN PRODUCTION - for development / testing only +dev: + fake-cpu-meter: + enabled: false + zones: [] # zones to be enabled, empty enables all default zones + +# EXPERIMENTAL FEATURES - These features are experimental and may be unstable +# and are disabled by default +experimental: + platform: + redfish: + enabled: false # Enable experimental Redfish BMC power monitoring + configFile: hack/redfish.yaml # Path to Redfish BMC configuration file + nodeName: "" # Node name to use (overrides Kubernetes node name and hostname fallback) + httpTimeout: 5s # HTTP client timeout for BMC requests (default: 5s) diff --git a/roles/kepler/templates/docker-compose.yml.j2 b/roles/kepler/templates/docker-compose.yml.j2 new file mode 100644 index 000000000..f78cada24 --- /dev/null +++ b/roles/kepler/templates/docker-compose.yml.j2 @@ -0,0 +1,42 @@ +--- +services: + kepler: + container_name: "{{ kepler_container_name }}" + entrypoint: kepler {{ kepler_flags|join(" ") }} + image: "{{ kepler_image }}" + privileged: true + restart: unless-stopped +{% if kepler_share_pids_with_host %} + pid: host +{% endif %} + ports: + - "{{ kepler_host | ansible.utils.ipwrap }}:{{ kepler_port }}:{{ kepler_port_container }}/tcp" + volumes: + - type: bind + source: /proc + target: /host/proc + read_only: true + - type: bind + source: /sys + target: /host/sys + read_only: true + - type: bind + source: "{{ kepler_configuration_directory }}" + target: /etc/kepler + read_only: true + - type: bind + source: "{{ kepler_kubeconfig_directory }}" + target: /host/kube + read_only: true + command: + - --config.file=/etc/kepler/config.yaml + +networks: + default: + driver: bridge + driver_opts: + com.docker.network.driver.mtu: {{ docker_network_mtu }} + ipam: + driver: default + config: + - subnet: {{ kepler_network }} From 1f55ac6ba28ab6a0cb8bc4e7395256973c9d6f95 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vin=C3=ADcius=20Zavam?= Date: Wed, 8 Oct 2025 15:24:22 -0300 Subject: [PATCH 6/8] roles(kepler): create task files for config and service for kepler MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Vinícius Zavam --- roles/kepler/tasks/config.yml | 22 ++++++++++++++++++++++ roles/kepler/tasks/main.yml | 8 ++++++++ roles/kepler/tasks/service.yml | 20 ++++++++++++++++++++ 3 files changed, 50 insertions(+) create mode 100644 roles/kepler/tasks/config.yml create mode 100644 roles/kepler/tasks/main.yml create mode 100644 roles/kepler/tasks/service.yml diff --git a/roles/kepler/tasks/config.yml b/roles/kepler/tasks/config.yml new file mode 100644 index 000000000..6a9814433 --- /dev/null +++ b/roles/kepler/tasks/config.yml @@ -0,0 +1,22 @@ +--- +- name: Create required directories + become: true + ansible.builtin.file: + path: "{{ item }}" + state: directory + owner: "{{ operator_user }}" + group: "{{ operator_group }}" + mode: 0750 + loop: + - "{{ kepler_configuration_directory }}" + - "{{ kepler_docker_compose_directory }}" + - "{{ kepler_kubeconfig_directory }}" + +- name: Copy configuration file + ansible.builtin.template: + src: config.yaml.j2 + dest: "{{ kepler_configuration_directory }}/config.yaml" + owner: "{{ operator_user }}" + group: "{{ operator_group }}" + mode: 0640 + notify: Restart kepler service diff --git a/roles/kepler/tasks/main.yml b/roles/kepler/tasks/main.yml new file mode 100644 index 000000000..c906e0e1f --- /dev/null +++ b/roles/kepler/tasks/main.yml @@ -0,0 +1,8 @@ +--- +- name: Include config tasks + ansible.builtin.include_tasks: config.yml + tags: config + +- name: Include service tasks + ansible.builtin.include_tasks: service.yml + tags: service diff --git a/roles/kepler/tasks/service.yml b/roles/kepler/tasks/service.yml new file mode 100644 index 000000000..d8757ad1a --- /dev/null +++ b/roles/kepler/tasks/service.yml @@ -0,0 +1,20 @@ +--- +- name: Copy docker-compose.yml file + ansible.builtin.template: + src: docker-compose.yml.j2 + dest: "{{ kepler_docker_compose_directory }}/docker-compose.yml" + owner: "{{ operator_user }}" + group: "{{ operator_group }}" + mode: 0640 + notify: Restart kepler service + +- name: Manage kepler service + become: true + ansible.builtin.service: + name: "{{ kepler_service_name }}" + state: started + enabled: true + register: result + until: result["status"]["ActiveState"] == "active" + retries: 10 + delay: 20 From 8652ce0237f066adb72db005eeb1f91f65bd4ad0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vin=C3=ADcius=20Zavam?= Date: Wed, 8 Oct 2025 15:24:50 -0300 Subject: [PATCH 7/8] roles(kepler): add handlers tasks for kepler MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Vinícius Zavam --- roles/kepler/handlers/main.yml | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 roles/kepler/handlers/main.yml diff --git a/roles/kepler/handlers/main.yml b/roles/kepler/handlers/main.yml new file mode 100644 index 000000000..dc192063a --- /dev/null +++ b/roles/kepler/handlers/main.yml @@ -0,0 +1,10 @@ +--- +- name: Restart kepler service + become: true + ansible.builtin.service: + name: "{{ kepler_service_name }}" + state: restarted + register: result + until: result["status"]["ActiveState"] == "active" + retries: 10 + delay: 20 From 08169de011898832aaf63ecab1617da911bb65b5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vin=C3=ADcius=20Zavam?= Date: Wed, 8 Oct 2025 15:25:17 -0300 Subject: [PATCH 8/8] roles: create new ansible role to deploy kepler MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: vinícius Signed-off-by: Vinícius Zavam --- roles/kepler/meta/main.yml | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 roles/kepler/meta/main.yml diff --git a/roles/kepler/meta/main.yml b/roles/kepler/meta/main.yml new file mode 100644 index 000000000..1d3cde852 --- /dev/null +++ b/roles/kepler/meta/main.yml @@ -0,0 +1,22 @@ +--- +galaxy_info: + author: Vinícius Zavam + description: Role osism.services.kepler + company: OSBA ECO:DIGIT + license: Apache License 2.0 + min_ansible_version: 2.16.0 + platforms: + - name: Ubuntu + versions: + - jammy + - noble + - name: Debian + versions: + - bookworm + - name: EL + versions: + - "9" + galaxy_tags: + - osism + - system +dependencies: []