diff --git a/Makefile b/Makefile index 38aff1c..fe661c4 100644 --- a/Makefile +++ b/Makefile @@ -21,7 +21,7 @@ SRCS = $(wildcard $(SRCDIR)/*.c) OBJS = $(patsubst $(SRCDIR)/%.c,$(BUILDDIR)/%.o,$(SRCS)) TARGET = $(BUILDDIR)/nvfd -.PHONY: all clean check install uninstall +.PHONY: all clean check install uninstall install-utils uninstall-utils all: $(TARGET) @@ -50,7 +50,15 @@ install: $(TARGET) install -d $(DESTDIR)$(UNITDIR) install -m 644 systemd/nvfd.service $(DESTDIR)$(UNITDIR)/nvfd.service +install-utils: + install -m 755 utils/nvfd-fan-control.sh $(DESTDIR)$(BINDIR)/ + install -m 644 utils/nvfd-fan-control.service $(DESTDIR)$(UNITDIR)/ + uninstall: rm -f $(DESTDIR)$(BINDIR)/nvfd rm -f $(DESTDIR)$(UNITDIR)/nvfd.service @echo "Config files preserved in $(CONFDIR). Remove manually if desired." + +uninstall-utils: + rm -f $(DESTDIR)$(BINDIR)/nvfd-fan-control.sh + rm -f $(DESTDIR)$(UNITDIR)/nvfd-fan-control.service diff --git a/README.md b/README.md index 4ad1ba1..0f9eae7 100644 --- a/README.md +++ b/README.md @@ -15,6 +15,7 @@ NVFD is an open-source NVIDIA GPU fan control daemon for Linux. It uses the NVML - Fixed fan speed mode - True auto mode (returns control to NVIDIA driver) - Multi-GPU support with per-GPU or all-GPU control, adaptive full/tabbed display +- Per-GPU mode switching via CLI (`nvfd 0 auto`, `nvfd 1 curve`, etc.) - Real-time temperature, utilization, memory, and power monitoring - Systemd service with automatic fan reset on shutdown - Config hot-reload via SIGHUP @@ -75,6 +76,13 @@ The install script will: - Set up the systemd service - Migrate any existing config from v1.x +**Optional:** Install with utility scripts: +```bash +sudo scripts/install.sh --with-utils +``` + +See [Advanced Usage](#advanced-usage) for details on utility scripts. + ### Manual build ```bash @@ -83,17 +91,51 @@ sudo make install sudo systemctl enable --now nvfd.service ``` +**Optional: Install utilities** +```bash +sudo make install-utils +sudo systemctl daemon-reload +sudo systemctl enable --now nvfd-fan-control.service +``` + ## Uninstallation +### Using uninstall script + +**Without utilities (default):** ```bash sudo scripts/uninstall.sh ``` -Or manually: +**With utilities:** +```bash +sudo scripts/uninstall.sh --with-utils +``` + +### Manual uninstallation + +**Without utilities:** +```bash +sudo systemctl stop nvfd.service +sudo systemctl disable nvfd.service +sudo make uninstall +``` + +**With utilities:** ```bash sudo systemctl stop nvfd.service sudo systemctl disable nvfd.service +sudo systemctl stop nvfd-fan-control.service +sudo systemctl disable nvfd-fan-control.service sudo make uninstall +sudo make uninstall-utils +``` + +**Utilities only (keep nvfd):** +```bash +sudo systemctl stop nvfd-fan-control.service +sudo systemctl disable nvfd-fan-control.service +sudo make uninstall-utils ``` Config files in `/etc/nvfd/` are preserved. Remove manually if desired. @@ -101,18 +143,21 @@ Config files in `/etc/nvfd/` are preserved. Remove manually if desired. ## Usage ``` -nvfd Interactive TUI dashboard (on TTY) -nvfd auto Return fan control to NVIDIA driver -nvfd curve Enable custom fan curve for all GPUs -nvfd curve Edit fan curve point (e.g., nvfd curve 60 70) -nvfd curve show Show current fan curve -nvfd curve edit Interactive curve editor (ncurses) -nvfd curve reset Reset fan curve to default -nvfd Set fixed fan speed for all GPUs (30-100) -nvfd Set fixed fan speed for specific GPU -nvfd list List all GPUs and their indices -nvfd status Show current status -nvfd -h Show help +nvfd Interactive TUI dashboard (on TTY) +nvfd auto Return fan control to NVIDIA driver +nvfd curve Enable custom fan curve for all GPUs +nvfd curve Edit fan curve point (e.g., nvfd curve 60 70) +nvfd curve show Show current fan curve +nvfd curve edit Interactive curve editor (ncurses) +nvfd curve reset Reset fan curve to default +nvfd Set fixed fan speed for all GPUs (30-100) +nvfd Set fixed fan speed for specific GPU +nvfd auto Set specific GPU to auto mode +nvfd curve Set specific GPU to curve mode +nvfd manual Set specific GPU to fixed speed +nvfd list List all GPUs and their indices +nvfd status Show current status +nvfd -h Show help ``` When run with no arguments on a TTY, `nvfd` launches the interactive TUI dashboard. @@ -169,6 +214,11 @@ nvfd 0 60 # Return all fans to driver control nvfd auto +# Per-GPU mode control +nvfd 0 auto # Set GPU 0 to auto mode +nvfd 1 curve # Set GPU 1 to curve mode +nvfd 0 manual 70 # Set GPU 0 to manual mode at 70% + # Use custom fan curve nvfd curve nvfd curve show @@ -215,6 +265,72 @@ sudo systemctl status nvfd # Check status The daemon resets all fans to driver-controlled auto mode on shutdown. +## Advanced Usage + +### Temperature-Aware Fan Control + +The `nvfd-fan-control.sh` utility provides automatic per-GPU fan mode switching based on temperature thresholds with hysteresis: + +```bash +# Run with default thresholds (up: 45°C, down: 35°C) +sudo nvfd-fan-control.sh + +# Custom thresholds with hysteresis +sudo nvfd-fan-control.sh --threshold-up 50 --threshold-down 40 + +# Verbose logging +sudo nvfd-fan-control.sh -v +``` + +**Hysteresis explained:** +- `--threshold-up 45`: Switch to **curve mode** when temperature **rises above 45°C** +- `--threshold-down 35`: Switch to **auto mode** when temperature **falls below 35°C** +- Between 35-45°C: **Keep current mode** (prevents thrashing) + +The script monitors GPU temperatures and automatically switches each GPU between: +- **Auto mode** (quiet) when temperature falls below threshold-down +- **Curve mode** (cooled) when temperature rises above threshold-up + +### Monitoring Mode Switches + +You can monitor mode switches in real-time using the **nvfd dashboard**: + +```bash +nvfd +``` + +The dashboard shows the current mode (Auto/Manual/Curve) for each GPU. When `nvfd-fan-control.sh` switches a GPU mode, the dashboard updates within 5 seconds. + +**Important notes:** +- **Polling interval:** The script checks GPU temperatures every **10 seconds** +- **Mode change latency:** A mode switch may take up to 10 seconds after temperature crosses the threshold +- **Dashboard update:** The nvfd daemon reads config every 5 seconds +- **Total latency:** 5-15 seconds from temperature crossing threshold to dashboard showing new mode + +#### Systemd Service + +When installed with `--with-utils`, the service unit is installed but **not enabled by default**. To enable: + +```bash +sudo systemctl enable --now nvfd-fan-control.service +``` + +The service depends on `nvfd.service` and automatically detects config changes within 5 seconds. + +**Customizing thresholds:** Edit `/etc/systemd/system/nvfd-fan-control.service` and modify the `ExecStart` line: +```ini +ExecStart=/usr/local/bin/nvfd-fan-control.sh --threshold-up 50 --threshold-down 40 +``` +Then reload and restart: `sudo systemctl daemon-reload && sudo systemctl restart nvfd-fan-control.service` + +### Dependencies + +The utility script requires: +- `nvidia-smi` (included with NVIDIA drivers) +- `nvfd` binary (installed via this package) + +No CUDA toolkit required for runtime. + ## Migration from v1.x NVFD automatically migrates old configuration: diff --git a/README.zh-TW.md b/README.zh-TW.md index d49629d..0a803b0 100644 --- a/README.zh-TW.md +++ b/README.zh-TW.md @@ -15,6 +15,7 @@ NVFD 是一款開源的 Linux NVIDIA GPU 風扇控制守護程式。透過 NVML - 固定轉速模式 - 自動模式(將控制權交還 NVIDIA 驅動程式) - 多 GPU 支援,單卡或全卡控制,自適應全顯/分頁顯示 +- 透過 CLI 實現每張 GPU 的模式切換 (`nvfd 0 auto`, `nvfd 1 curve` 等) - 即時溫度、使用率、記憶體、功耗監控 - Systemd 服務,關機時自動重設風扇 - 透過 SIGHUP 熱載入設定 @@ -75,6 +76,13 @@ sudo scripts/install.sh - 設定 systemd 服務 - 自動遷移 v1.x 舊設定 +**可選:** 安裝實用工具腳本: +```bash +sudo scripts/install.sh --with-utils +``` + +詳見 [進階用法](#進階用法) 了解實用工具腳本的詳細資訊。 + ### 手動編譯 ```bash @@ -83,17 +91,51 @@ sudo make install sudo systemctl enable --now nvfd.service ``` +**可選:安裝實用工具** +```bash +sudo make install-utils +sudo systemctl daemon-reload +sudo systemctl enable --now nvfd-fan-control.service +``` + ## 解除安裝 +### 使用解除安裝腳本 + +**不包含實用工具(預設):** ```bash sudo scripts/uninstall.sh ``` -或手動執行: +**包含實用工具:** +```bash +sudo scripts/uninstall.sh --with-utils +``` + +### 手動解除安裝 + +**不包含實用工具:** +```bash +sudo systemctl stop nvfd.service +sudo systemctl disable nvfd.service +sudo make uninstall +``` + +**包含實用工具:** ```bash sudo systemctl stop nvfd.service sudo systemctl disable nvfd.service +sudo systemctl stop nvfd-fan-control.service +sudo systemctl disable nvfd-fan-control.service sudo make uninstall +sudo make uninstall-utils +``` + +**僅實用工具(保留 nvfd):** +```bash +sudo systemctl stop nvfd-fan-control.service +sudo systemctl disable nvfd-fan-control.service +sudo make uninstall-utils ``` 設定檔 `/etc/nvfd/` 會被保留,如需移除請手動刪除。 @@ -101,18 +143,21 @@ sudo make uninstall ## 使用方式 ``` -nvfd 互動式 TUI 儀表板(需在終端機執行) -nvfd auto 將風扇控制權交還 NVIDIA 驅動程式 -nvfd curve 啟用自訂風扇曲線 -nvfd curve <溫度> <轉速> 編輯風扇曲線控制點(例:nvfd curve 60 70) -nvfd curve show 顯示目前風扇曲線 -nvfd curve edit 互動式曲線編輯器(ncurses) -nvfd curve reset 重設風扇曲線為預設值 -nvfd <轉速> 設定所有 GPU 固定轉速(30-100) -nvfd <轉速> 設定指定 GPU 固定轉速 -nvfd list 列出所有 GPU -nvfd status 顯示目前狀態 -nvfd -h 顯示說明 +nvfd 互動式 TUI 儀表板(需在終端機執行) +nvfd auto 將風扇控制權交還 NVIDIA 驅動程式 +nvfd curve 啟用自訂風扇曲線 +nvfd curve <溫度> <轉速> 編輯風扇曲線控制點(例:nvfd curve 60 70) +nvfd curve show 顯示目前風扇曲線 +nvfd curve edit 互動式曲線編輯器(ncurses) +nvfd curve reset 重設風扇曲線為預設值 +nvfd <轉速> 設定所有 GPU 固定轉速(30-100) +nvfd <轉速> 設定指定 GPU 固定轉速 +nvfd auto 設定指定 GPU 為自動模式 +nvfd curve 設定指定 GPU 為曲線模式 +nvfd manual <轉速> 設定指定 GPU 為固定轉速 +nvfd list 列出所有 GPU +nvfd status 顯示目前狀態 +nvfd -h 顯示說明 ``` 在終端機中不帶參數執行 `nvfd` 會啟動互動式 TUI 儀表板。 @@ -169,6 +214,11 @@ nvfd 0 60 # 所有風扇交還驅動程式控制 nvfd auto +# 每張 GPU 的模式控制 +nvfd 0 auto # 設定 GPU 0 為自動模式 +nvfd 1 curve # 設定 GPU 1 為曲線模式 +nvfd 0 manual 70 # 設定 GPU 0 為手動模式 70% + # 使用自訂風扇曲線 nvfd curve nvfd curve show @@ -215,6 +265,72 @@ sudo systemctl status nvfd # 查看狀態 守護程式關閉時會自動將所有風扇重設為驅動程式控制的自動模式。 +## 進階用法 + +### 溫度感知風扇控制 + +`nvfd-fan-control.sh` 實用工具可根據溫度閾值(帶遲滯)自動切換每張 GPU 的風扇模式: + +```bash +# 使用預設閾值(up: 45°C, down: 35°C)運行 +sudo nvfd-fan-control.sh + +# 自訂閾值(帶遲滯) +sudo nvfd-fan-control.sh --threshold-up 50 --threshold-down 40 + +# 啟用詳細日誌 +sudo nvfd-fan-control.sh -v +``` + +**遲滯說明:** +- `--threshold-up 45`:溫度**上升至 45°C 以上**時切換到**曲線模式** +- `--threshold-down 35`:溫度**下降至 35°C 以下**時切換到**自動模式** +- 在 35-45°C 之間:**保持當前模式**(防止頻繁切換) + +該腳本會監控 GPU 溫度,並自動在以下模式之間切換每張 GPU: +- **自動模式**(安靜):溫度低於 threshold-down 時 +- **曲線模式**(散熱):溫度高於 threshold-up 時 + +### 監控模式切換 + +您可以使用 **nvfd 儀表板** 即時監控模式切換: + +```bash +nvfd +``` + +儀表板會顯示每張 GPU 的當前模式(Auto/Manual/Curve)。當 `nvfd-fan-control.sh` 切換 GPU 模式時,儀表板會在 5 秒內更新。 + +**重要說明:** +- **輪詢間隔:** 腳本每 **10 秒** 檢查一次 GPU 溫度 +- **模式切換延遲:** 溫度超過閾值後,模式切換可能需要最多 10 秒 +- **儀表板更新:** nvfd 守護程式每 5 秒讀取配置 +- **總延遲:** 從溫度超過閾值到儀表板顯示新模式,總共需要 5-15 秒 + +#### Systemd 服務 + +使用 `--with-utils` 安裝後,服務單元已安裝但**預設未啟用**。啟用方法: + +```bash +sudo systemctl enable --now nvfd-fan-control.service +``` + +該服務依賴於 `nvfd.service`,並會在 5 秒內自動偵測到配置變更。 + +**自訂閾值:** 編輯 `/etc/systemd/system/nvfd-fan-control.service` 並修改 `ExecStart` 行: +```ini +ExecStart=/usr/local/bin/nvfd-fan-control.sh --threshold-up 50 --threshold-down 40 +``` +然後重新載入並重啟:`sudo systemctl daemon-reload && sudo systemctl restart nvfd-fan-control.service` + +### 依賴項 + +實用工具腳本需要: +- `nvidia-smi`(包含在 NVIDIA 驅動中) +- `nvfd` 二進位檔案(通過此套件安裝) + +運行時不需要 CUDA toolkit。 + ## 從 v1.x 遷移 NVFD 會自動遷移舊版設定: diff --git a/include/curve.h b/include/curve.h index 67a9a19..981c7ab 100644 --- a/include/curve.h +++ b/include/curve.h @@ -8,6 +8,7 @@ int curve_write(const FanCurve *curve); void curve_edit(int temp, int speed); void curve_reset(void); int curve_interpolate(int temp, const FanCurve *curve); +int curve_apply_to_gpu(unsigned int gpu_index); /* Default built-in curve interpolation (fallback when no curve file exists) */ int curve_default_interpolate(int temp); diff --git a/scripts/install.sh b/scripts/install.sh index dac95a9..10ac05e 100755 --- a/scripts/install.sh +++ b/scripts/install.sh @@ -6,6 +6,31 @@ if [ "$EUID" -ne 0 ]; then exit 1 fi +# Parse arguments +WITH_UTILS=false +while [[ $# -gt 0 ]]; do + case "$1" in + --with-utils) + WITH_UTILS=true + shift + ;; + --help) + echo "Usage: sudo $(basename "$0") [OPTIONS]" + echo + echo "Options:" + echo " --with-utils Install utility scripts (nvfd-fan-control.sh and service)" + echo " --help Show this help message" + echo + exit 0 + ;; + *) + echo "Unknown option: $1" + echo "Run with --help for usage information." + exit 1 + ;; + esac +done + echo " _ ___ _______ ____ " echo " | \ | \ \ / / ___| _ \ " echo " | \| |\ \ / /| |_ | | | |" @@ -91,10 +116,22 @@ rm -f /usr/local/bin/infinirc_gpu_fan_control rm -f /usr/local/bin/igfc echo "Enabling and starting service..." + +# Install optional utilities +if [ "$WITH_UTILS" = true ]; then + echo "Installing utility scripts..." + make install-utils +fi + systemctl daemon-reload systemctl enable nvfd.service systemctl start nvfd.service +if [ "$WITH_UTILS" = true ]; then + systemctl enable nvfd-fan-control.service + systemctl start nvfd-fan-control.service +fi + cat << EOF ====================================================================== @@ -104,18 +141,24 @@ cat << EOF Service Status: Started Usage: - nvfd Interactive TUI dashboard (on TTY) - nvfd auto Return fan control to NVIDIA driver - nvfd curve Enable custom fan curve for all GPUs - nvfd curve Edit fan curve point - nvfd curve show Show current fan curve - nvfd curve edit Interactive curve editor (ncurses) - nvfd curve reset Reset fan curve to default - nvfd Set fixed fan speed for all GPUs (30-100) - nvfd Set fixed fan speed for specific GPU - nvfd list List all GPUs and their indices - nvfd status Show current status - nvfd -h Show help + nvfd Interactive TUI dashboard (on TTY) + nvfd auto Return fan control to NVIDIA driver + nvfd curve Enable custom fan curve for all GPUs + nvfd curve Edit fan curve point + nvfd curve show Show current fan curve + nvfd curve edit Interactive curve editor (ncurses) + nvfd curve reset Reset fan curve to default + nvfd Set fixed fan speed for all GPUs (30-100) + nvfd Set fixed fan speed for specific GPU + nvfd auto Set specific GPU to auto mode + nvfd curve Set specific GPU to curve mode + nvfd manual Set specific GPU to fixed speed + nvfd list List all GPUs and their indices + nvfd status Show current status + nvfd -h Show help + +Advanced: + nvfd-fan-control.sh Temperature-aware automatic fan mode switching Works on X11, Wayland, and headless systems. No nvidia-settings required. diff --git a/scripts/uninstall.sh b/scripts/uninstall.sh index 044e89d..50f422b 100755 --- a/scripts/uninstall.sh +++ b/scripts/uninstall.sh @@ -1,6 +1,31 @@ #!/bin/bash set -e +# Parse arguments +WITH_UTILS=false +while [[ $# -gt 0 ]]; do + case "$1" in + --with-utils) + WITH_UTILS=true + shift + ;; + --help) + echo "Usage: sudo $(basename "$0") [OPTIONS]" + echo + echo "Options:" + echo " --with-utils Also remove utility scripts (nvfd-fan-control.sh and service)" + echo " --help Show this help message" + echo + exit 0 + ;; + *) + echo "Unknown option: $1" + echo "Run with --help for usage information." + exit 1 + ;; + esac +done + if [ "$EUID" -ne 0 ]; then echo "Please run this script with root privileges (sudo)" exit 1 @@ -40,6 +65,25 @@ if grep -q 'alias igfc=' /etc/bash.bashrc 2>/dev/null; then sed -i '/alias igfc=/d' /etc/bash.bashrc fi +# Remove optional utilities if requested +if [ "$WITH_UTILS" = true ]; then + echo "Removing utility scripts..." + + # Stop and disable fan-control service if running + if systemctl is-active --quiet nvfd-fan-control.service 2>/dev/null; then + systemctl stop nvfd-fan-control.service + fi + if systemctl is-enabled --quiet nvfd-fan-control.service 2>/dev/null; then + systemctl disable nvfd-fan-control.service + fi + + # Remove utility files + rm -f /usr/local/bin/nvfd-fan-control.sh + rm -f /etc/systemd/system/nvfd-fan-control.service + + systemctl daemon-reload +fi + echo "" echo "NVFD has been uninstalled." echo "" diff --git a/src/curve.c b/src/curve.c index 9057619..62ed8e3 100644 --- a/src/curve.c +++ b/src/curve.c @@ -3,6 +3,8 @@ #include #include #include "curve.h" +#include "fan.h" +#include "gpu.h" static int compare_points(const void *a, const void *b) { return ((const FanCurvePoint *)a)->temperature - @@ -158,6 +160,25 @@ int curve_interpolate(int temp, const FanCurve *curve) { return curve->points[curve->point_count - 1].fan_speed; } +int curve_apply_to_gpu(unsigned int gpu_index) { + nvmlDevice_t device; + if (gpu_get_handle(gpu_index, &device) != 0) + return -1; + + int temp = gpu_get_temperature(device); + if (temp < 0) + return -1; + + FanCurve *curve = curve_read(); + int fan_speed = curve ? + curve_interpolate(temp, curve) : + curve_default_interpolate(temp); + + if (curve) free(curve); + + return fan_set_gpu_speed(gpu_index, (unsigned int)fan_speed); +} + int curve_default_interpolate(int temp) { if (temp < 30) return 30; if (temp >= 75) return 100; diff --git a/src/display.c b/src/display.c index bb62892..9544e30 100644 --- a/src/display.c +++ b/src/display.c @@ -9,33 +9,39 @@ void display_help(void) { printf("NVIDIA Fan Daemon (NVFD) v%s\n\n", NVFD_VERSION); - printf("+-----------------------------+-----------------------------------------+\n"); - printf("| Command | Description |\n"); - printf("+-----------------------------+-----------------------------------------+\n"); - printf("| nvfd | Interactive TUI dashboard (on TTY) |\n"); - printf("+-----------------------------+-----------------------------------------+\n"); - printf("| nvfd auto | Return fan control to NVIDIA driver |\n"); - printf("+-----------------------------+-----------------------------------------+\n"); - printf("| nvfd curve | Enable custom fan curve for all GPUs |\n"); - printf("+-----------------------------+-----------------------------------------+\n"); - printf("| nvfd curve | Edit fan curve point |\n"); - printf("+-----------------------------+-----------------------------------------+\n"); - printf("| nvfd curve show | Show current fan curve |\n"); - printf("+-----------------------------+-----------------------------------------+\n"); - printf("| nvfd curve edit | Interactive curve editor (ncurses) |\n"); - printf("+-----------------------------+-----------------------------------------+\n"); - printf("| nvfd curve reset | Reset fan curve to default |\n"); - printf("+-----------------------------+-----------------------------------------+\n"); - printf("| nvfd | Set fixed fan speed for all GPUs (30-100)|\n"); - printf("+-----------------------------+-----------------------------------------+\n"); - printf("| nvfd | Set fixed fan speed for specific GPU |\n"); - printf("+-----------------------------+-----------------------------------------+\n"); - printf("| nvfd list | List all GPUs and their indices |\n"); - printf("+-----------------------------+-----------------------------------------+\n"); - printf("| nvfd status | Show current status |\n"); - printf("+-----------------------------+-----------------------------------------+\n"); - printf("| nvfd -h | Show this help message |\n"); - printf("+-----------------------------+-----------------------------------------+\n"); + printf("+----------------------------------+-------------------------------------------+\n"); + printf("| Command | Description |\n"); + printf("+----------------------------------+-------------------------------------------+\n"); + printf("| nvfd | Interactive TUI dashboard (on TTY) |\n"); + printf("+----------------------------------+-------------------------------------------+\n"); + printf("| nvfd auto | Return fan control to NVIDIA driver |\n"); + printf("+----------------------------------+-------------------------------------------+\n"); + printf("| nvfd curve | Enable custom fan curve for all GPUs |\n"); + printf("+----------------------------------+-------------------------------------------+\n"); + printf("| nvfd curve | Edit fan curve point |\n"); + printf("+----------------------------------+-------------------------------------------+\n"); + printf("| nvfd curve show | Show current fan curve |\n"); + printf("+----------------------------------+-------------------------------------------+\n"); + printf("| nvfd curve edit | Interactive curve editor (ncurses) |\n"); + printf("+----------------------------------+-------------------------------------------+\n"); + printf("| nvfd curve reset | Reset fan curve to default |\n"); + printf("+----------------------------------+-------------------------------------------+\n"); + printf("| nvfd | Set fixed fan speed for all GPUs (30-100) |\n"); + printf("+----------------------------------+-------------------------------------------+\n"); + printf("| nvfd | Set fixed fan speed for specific GPU |\n"); + printf("+----------------------------------+-------------------------------------------+\n"); + printf("| nvfd auto | Set specific GPU to auto mode |\n"); + printf("+----------------------------------+-------------------------------------------+\n"); + printf("| nvfd curve | Set specific GPU to curve mode |\n"); + printf("+----------------------------------+-------------------------------------------+\n"); + printf("| nvfd manual | Set specific GPU to fixed speed |\n"); + printf("+----------------------------------+-------------------------------------------+\n"); + printf("| nvfd list | List all GPUs and their indices |\n"); + printf("+----------------------------------+-------------------------------------------+\n"); + printf("| nvfd status | Show current status |\n"); + printf("+----------------------------------+-------------------------------------------+\n"); + printf("| nvfd -h | Show this help message |\n"); + printf("+----------------------------------+-------------------------------------------+\n"); } void display_status(void) { diff --git a/src/main.c b/src/main.c index 552a386..365ac39 100644 --- a/src/main.c +++ b/src/main.c @@ -214,7 +214,48 @@ int main(int argc, char *argv[]) { } } else if (argc == 3) { gpu_index = atoi(argv[1]); - speed = atoi(argv[2]); + + /* Per-GPU mode keywords */ + if (gpu_index >= 0 && gpu_index < (int)device_count) { + if (strcmp(argv[2], "auto") == 0) { + char gpu_key[20]; + snprintf(gpu_key, sizeof(gpu_key), "gpu%d", gpu_index); + config_write_gpu(gpu_key, "auto", 0); + fan_reset_to_auto((unsigned int)gpu_index); + printf("GPU %d set to auto mode (driver-controlled).\n", gpu_index); + } else if (strcmp(argv[2], "curve") == 0) { + char gpu_key[20]; + snprintf(gpu_key, sizeof(gpu_key), "gpu%d", gpu_index); + config_write_gpu(gpu_key, "curve", 0); + curve_apply_to_gpu((unsigned int)gpu_index); + printf("GPU %d set to curve mode.\n", gpu_index); + } else { + /* Treat as speed */ + speed = atoi(argv[2]); + } + } else { + printf("Invalid GPU index. Use 'nvfd list' to see available GPUs.\n"); + display_help(); + gpu_shutdown(); + return 1; + } + } else if (argc == 4) { + /* Per-GPU manual mode: nvfd manual */ + int gpu_idx = atoi(argv[1]); + if (strcmp(argv[2], "manual") != 0) { + printf("Invalid command: %s\n", argv[2]); + display_help(); + gpu_shutdown(); + return 1; + } else if (gpu_idx < 0 || gpu_idx >= (int)device_count) { + printf("Invalid GPU index. Use 'nvfd list' to see available GPUs.\n"); + display_help(); + gpu_shutdown(); + return 1; + } else { + speed = atoi(argv[3]); + gpu_index = gpu_idx; + } } if (speed >= 30 && speed <= 100) { @@ -236,7 +277,7 @@ int main(int argc, char *argv[]) { } else { printf("Invalid GPU index. Use 'nvfd list' to see available GPUs.\n"); } - } else { + } else if (speed != -1) { printf("Invalid speed. Use a value between 30 and 100.\n"); display_help(); } diff --git a/utils/nvfd-fan-control.service b/utils/nvfd-fan-control.service new file mode 100644 index 0000000..8573d91 --- /dev/null +++ b/utils/nvfd-fan-control.service @@ -0,0 +1,14 @@ +[Unit] +Description=NVIDIA Fan Control - Temperature-aware per-GPU mode switching +After=nvfd.service +Wants=nvfd.service + +[Service] +Type=simple +ExecStart=/usr/local/bin/nvfd-fan-control.sh --threshold-up 45 --threshold-down 35 +Restart=always +RestartSec=5 +User=root + +[Install] +WantedBy=multi-user.target diff --git a/utils/nvfd-fan-control.sh b/utils/nvfd-fan-control.sh new file mode 100755 index 0000000..7a18b5d --- /dev/null +++ b/utils/nvfd-fan-control.sh @@ -0,0 +1,136 @@ +#!/usr/bin/env bash +# ============================================================================= +# nvfd-fan-control.sh - Temperature-aware per-GPU fan mode switching +# ============================================================================= + +set -euo pipefail + +NVFD="${NVFD:-$(command -v nvfd)}" +THRESHOLD_UP=45 +THRESHOLD_DOWN=35 +VERBOSE=false +LOCKFILE="/var/run/nvfd-fan-control.lock" + +usage() { + cat <&2; exit 1; } + THRESHOLD_UP="$2"; shift 2 + ;; + -d|--threshold-down) + [[ -z "${2:-}" || "$2" == -* ]] && { echo "ERROR: --threshold-down requires a value" >&2; exit 1; } + THRESHOLD_DOWN="$2"; shift 2 + ;; + -v|--verbose) VERBOSE=true; shift ;; + -h|--help) usage ;; + *) echo "ERROR: Unknown option: $1" >&2; usage ;; + esac +done + +# Validate thresholds +[[ ! "$THRESHOLD_UP" =~ ^[0-9]+$ ]] && { echo "ERROR: Invalid threshold-up: $THRESHOLD_UP" >&2; exit 1; } +[[ ! "$THRESHOLD_DOWN" =~ ^[0-9]+$ ]] && { echo "ERROR: Invalid threshold-down: $THRESHOLD_DOWN" >&2; exit 1; } + +# Check nvfd is available +[[ -z "$NVFD" || ! -x "$NVFD" ]] && { echo "ERROR: nvfd command not found" >&2; exit 1; } + +# Check for root privileges +[[ "$EUID" -ne 0 ]] && { echo "ERROR: This script must be run as root (use sudo)" >&2; exit 1; } + +# Acquire file lock to prevent multiple instances +exec 200>"$LOCKFILE" +if ! flock -n 200; then + echo "ERROR: Another instance of nvfd-fan-control is already running" >&2 + echo " Remove $LOCKFILE if no instance is running" >&2 + exit 1 +fi + +# Initialize +declare -a GPU_MODES=() +declare -a GPU_NAMES=() +NUM_GPUS=$(nvidia-smi --list-gpus 2>/dev/null | wc -l) + +[[ "$NUM_GPUS" -eq 0 ]] && { echo "ERROR: No NVIDIA GPUs detected!" >&2; exit 1; } + +# Get GPU names +for i in $(seq 0 $((NUM_GPUS - 1))); do + GPU_MODES+=("") + GPU_NAMES[$i]=$(nvidia-smi --query-gpu=name --format=csv,noheader,nounits --id=$i) +done + +echo "[INFO] Detected $NUM_GPUS GPU(s)" + +# Graceful shutdown +cleanup() { + echo "[INFO] Shutting down, resetting all GPUs to auto mode..." + for i in $(seq 0 $((NUM_GPUS - 1))); do + "$NVFD" "$i" auto >/dev/null 2>&1 || true + done + rm -f "$LOCKFILE" + exit 0 +} +trap cleanup SIGINT SIGTERM + +# Main loop +echo "[INFO] Fan control started (threshold-up: ${THRESHOLD_UP}°C, threshold-down: ${THRESHOLD_DOWN}°C)" + +while true; do + for i in $(seq 0 $((NUM_GPUS - 1))); do + current_mode="${GPU_MODES[$i]:-unknown}" + gpu_name="${GPU_NAMES[$i]}" + + temp=$(nvidia-smi --query-gpu=temperature.gpu --format=csv,noheader,nounits --id=$i) + + [[ "$VERBOSE" == "true" ]] && echo "[INFO] GPU $i ($gpu_name): ${temp}°C | Mode: $current_mode" + + if [[ "$temp" -ge "$THRESHOLD_UP" ]]; then + if [[ "$current_mode" != "curve" ]]; then + if "$NVFD" "$i" curve >/dev/null 2>&1; then + GPU_MODES[$i]="curve" + [[ "$VERBOSE" == "true" ]] && echo "[INFO] GPU $i ($gpu_name) → curve mode (temp: ${temp}°C, threshold-up: ${THRESHOLD_UP}°C)" + else + echo "[ERROR] Failed to set GPU $i ($gpu_name) to curve mode" >&2 + fi + fi + elif [[ "$temp" -le "$THRESHOLD_DOWN" ]]; then + if [[ "$current_mode" != "auto" ]]; then + if "$NVFD" "$i" auto >/dev/null 2>&1; then + GPU_MODES[$i]="auto" + [[ "$VERBOSE" == "true" ]] && echo "[INFO] GPU $i ($gpu_name) → auto mode (temp: ${temp}°C, threshold-down: ${THRESHOLD_DOWN}°C)" + else + echo "[ERROR] Failed to set GPU $i ($gpu_name) to auto mode" >&2 + fi + fi + fi + # If temp is between DOWN and UP, keep current mode (no action) + done + + sleep 10 +done