Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -658,18 +658,21 @@ set(SOURCES_CORE
if(WIN32)
list(APPEND SOURCES_CORE src/cpp/server/utils/platform/path_windows.cpp)
list(APPEND SOURCES_CORE src/cpp/server/platform/metrics_windows.cpp)
list(APPEND SOURCES_CORE src/cpp/server/platform/suspend_stub.cpp)
list(APPEND SOURCES_CORE src/cpp/server/utils/platform/archive_windows.cpp)
list(APPEND SOURCES_CORE src/cpp/server/utils/platform/process_windows.cpp)
elseif(APPLE)
list(APPEND SOURCES_CORE src/cpp/server/macos_system_info.mm)
list(APPEND SOURCES_CORE src/cpp/server/utils/platform/path_macos.cpp)
list(APPEND SOURCES_CORE src/cpp/server/platform/metrics_macos.cpp)
list(APPEND SOURCES_CORE src/cpp/server/platform/suspend_stub.cpp)
list(APPEND SOURCES_CORE src/cpp/server/utils/platform/archive_unix.cpp)
list(APPEND SOURCES_CORE src/cpp/server/utils/platform/process_macos.cpp)
set_source_files_properties(src/cpp/server/macos_system_info.mm PROPERTIES LANGUAGE OBJCXX)
elseif(UNIX)
list(APPEND SOURCES_CORE src/cpp/server/utils/platform/path_linux.cpp)
list(APPEND SOURCES_CORE src/cpp/server/platform/metrics_linux.cpp)
list(APPEND SOURCES_CORE src/cpp/server/platform/suspend_linux.cpp)
list(APPEND SOURCES_CORE src/cpp/server/utils/platform/archive_unix.cpp)
list(APPEND SOURCES_CORE src/cpp/server/utils/platform/process_unix.cpp)
endif()
Expand Down Expand Up @@ -2048,6 +2051,13 @@ if(EXISTS "${_TELEMETRY_HELPERS_TEST_SRC}")
add_test(NAME TelemetryHelpersTest COMMAND test_telemetry_helpers)
endif()

set(_SUSPEND_INHIBITOR_TEST_SRC "${CMAKE_CURRENT_SOURCE_DIR}/test/cpp/test_suspend_inhibitor.cpp")
if(EXISTS "${_SUSPEND_INHIBITOR_TEST_SRC}")
add_executable(test_suspend_inhibitor test/cpp/test_suspend_inhibitor.cpp)
target_link_libraries(test_suspend_inhibitor PRIVATE lemonade-server-core)
add_test(NAME SuspendInhibitorTest COMMAND test_suspend_inhibitor)
endif()

set(_CONFIG_MIGRATION_TEST_SRC "${CMAKE_CURRENT_SOURCE_DIR}/test/cpp/test_config_migration.cpp")
if(EXISTS "${_CONFIG_MIGRATION_TEST_SRC}")
add_executable(test_config_migration test/cpp/test_config_migration.cpp)
Expand Down
2 changes: 2 additions & 0 deletions docs/guide/configuration/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ Values set in the user's `config.json` always take precedence over these seeded
},
"global_timeout": 600,
"host": "localhost",
"inhibit_suspend": true,
"kokoro": {
"cpu_bin": "builtin"
},
Expand Down Expand Up @@ -144,6 +145,7 @@ Values set in the user's `config.json` always take precedence over these seeded
| `offline` | bool | false | Skip model downloads |
| `no_fetch_executables` | bool | false | Prevent downloading backend executable artifacts; backends must already be installed or use the system backend |
| `disable_model_filtering` | bool | false | Show all models regardless of hardware capabilities |
| `inhibit_suspend` | bool | true | Prevent the OS from suspending while inference is active. Linux only (uses systemd-logind); no-op on Windows/macOS/non-systemd environments. |
| `enable_dgpu_gtt` | bool | false | Include GTT for hardware-based model filtering |
| `rocm_channel` | string | "stable" | ROCm backend channel: "stable" (default) or "nightly". See [llama.cpp Backend](./llamacpp.md) for details |

Expand Down
2 changes: 2 additions & 0 deletions src/cpp/include/lemon/router.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ struct ModelTelemetryRecord {

class EvictionEngine;
class GlobalVramMonitor;
class SuspendInhibitor;

class Router {
public:
Expand Down Expand Up @@ -158,6 +159,7 @@ class Router {

std::unique_ptr<GlobalVramMonitor> vram_monitor_;
std::unique_ptr<EvictionEngine> eviction_engine_;
std::unique_ptr<SuspendInhibitor> suspend_inhibitor_;

// Helper methods for multi-model management
WrappedServer* find_server_by_model_name(const std::string& model_name) const;
Expand Down
1 change: 1 addition & 0 deletions src/cpp/include/lemon/runtime_config.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ class RuntimeConfig {
int ctx_size() const;
bool auto_evict() const;
double auto_evict_threshold_pct() const;
bool inhibit_suspend() const;

// Telemetry settings
bool telemetry_enabled() const;
Expand Down
53 changes: 53 additions & 0 deletions src/cpp/include/lemon/suspend_inhibitor.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
#pragma once

#include <memory>
#include <mutex>

namespace lemon {

// Prevents the OS from suspending/idling while inference is active. Refcounted:
// the first acquire() takes the OS-level lock, the matching last release() drops
// it.
//
// Linux: uses systemd-logind Inhibit ("sleep:idle", "block"). If logind is
// unreachable (containers, WSL, minimal environments) the feature degrades to
// no-op and logs debug on first failure (once), then stays silent.
// macOS / Windows / non-systemd builds: no-op.
class SuspendInhibitor {
public:
virtual ~SuspendInhibitor() = default;

void acquire() {
std::lock_guard<std::mutex> lock(mutex_);
if (refcount_++ == 0) {
on_first_acquire();
}
}

void release() {
std::lock_guard<std::mutex> lock(mutex_);
if (refcount_ > 0 && --refcount_ == 0) {
on_last_release();
}
}

// Test hook: returns current refcount (thread-safe).
int refcount() const {
std::lock_guard<std::mutex> lock(mutex_);
return refcount_;
}

protected:
// Called exactly when refcount transitions 0->1.
virtual void on_first_acquire() {}
// Called exactly when refcount transitions 1->0.
virtual void on_last_release() {}

private:
mutable std::mutex mutex_;
int refcount_ = 0;
};

std::unique_ptr<SuspendInhibitor> create_suspend_inhibitor();

} // namespace lemon
1 change: 1 addition & 0 deletions src/cpp/resources/defaults.json
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
},
"global_timeout": 600,
"host": "localhost",
"inhibit_suspend": true,
"kokoro": {
"cpu_bin": "builtin"
},
Expand Down
120 changes: 120 additions & 0 deletions src/cpp/server/platform/suspend_linux.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
#include <lemon/suspend_inhibitor.h>

#include <mutex>

#ifdef HAVE_SYSTEMD
#include <systemd/sd-bus.h>
#include <unistd.h>
#endif

#include <lemon/utils/aixlog.hpp>

namespace lemon {

#ifdef HAVE_SYSTEMD

namespace {

// Take a logind delay/block inhibitor lock via D-Bus. Returns a dup'd fd the
// caller owns, or -1. Logs an error at debug level on failure.
int take_logind_inhibitor() {
sd_bus* bus = nullptr;
sd_bus_message* reply = nullptr;
sd_bus_error error = SD_BUS_ERROR_NULL;

int r = sd_bus_open_system(&bus);
if (r < 0 || !bus) {
LOG(DEBUG, "Suspend") << "logind: cannot connect to system bus; will not retry" << std::endl;
sd_bus_error_free(&error);
return -1;
}

r = sd_bus_call_method(
bus,
"org.freedesktop.login1",
"/org/freedesktop/login1",
"org.freedesktop.login1.Manager",
"Inhibit",
&error,
&reply,
"ssss",
"sleep:idle",
"lemonade",
"Inference in progress",
"block"
);

if (r < 0 || !reply) {
LOG(DEBUG, "Suspend") << "logind Inhibit failed: "
<< (error.message ? error.message : "unknown dbus error")
<< "; will not retry" << std::endl;
sd_bus_error_free(&error);
sd_bus_unref(bus);
return -1;
}

// The fd is owned by the message; dup it so it survives sd_bus_message_unref.
int lock_fd = -1;
r = sd_bus_message_read(reply, "h", &lock_fd);
int dup_fd = (r >= 0 && lock_fd >= 0) ? dup(lock_fd) : -1;

sd_bus_message_unref(reply);
sd_bus_error_free(&error);
sd_bus_unref(bus);

if (dup_fd < 0) {
LOG(DEBUG, "Suspend") << "logind: failed to read inhibitor fd; will not retry" << std::endl;
}
return dup_fd;
}

class LinuxSuspendInhibitor : public SuspendInhibitor {
public:
~LinuxSuspendInhibitor() override = default;

protected:
void on_first_acquire() override {
if (acquire_failed_) {
return;
}
lock_fd_ = take_logind_inhibitor();
if (lock_fd_ < 0) {
acquire_failed_ = true;
}
}

void on_last_release() override {
if (lock_fd_ >= 0) {
close(lock_fd_);
lock_fd_ = -1;
}
}

private:
int lock_fd_ = -1;
bool acquire_failed_ = false;
};

} // namespace

std::unique_ptr<SuspendInhibitor> create_suspend_inhibitor() {
return std::make_unique<LinuxSuspendInhibitor>();
}

#else // HAVE_SYSTEMD

namespace {
class NoopSuspendInhibitor : public SuspendInhibitor {
public:
~NoopSuspendInhibitor() override = default;
};
} // namespace

std::unique_ptr<SuspendInhibitor> create_suspend_inhibitor() {
LOG(DEBUG, "Suspend") << "Built without systemd; suspend inhibition disabled" << std::endl;
return std::make_unique<NoopSuspendInhibitor>();
}

#endif // HAVE_SYSTEMD

} // namespace lemon
16 changes: 16 additions & 0 deletions src/cpp/server/platform/suspend_stub.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#include <lemon/suspend_inhibitor.h>

namespace lemon {

namespace {
class NoopSuspendInhibitor : public SuspendInhibitor {
public:
~NoopSuspendInhibitor() override = default;
};
} // namespace

std::unique_ptr<SuspendInhibitor> create_suspend_inhibitor() {
return std::make_unique<NoopSuspendInhibitor>();
}

} // namespace lemon
27 changes: 27 additions & 0 deletions src/cpp/server/router.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,33 @@
#include "lemon/utils/aixlog.hpp"
#include "lemon/global_vram_monitor.h"
#include "lemon/eviction_engine.h"
#include "lemon/suspend_inhibitor.h"
#include "lemon/utils/http_client.h"

namespace lemon {

namespace {

// RAII: holds a suspend-inhibitor refcount for the duration of one inference,
// but only when the feature is enabled in config. Released on scope exit so all
// early-return/exception paths are covered.
class InhibitGuard {
public:
InhibitGuard(SuspendInhibitor* inhibitor, bool enabled)
: inhibitor_(enabled ? inhibitor : nullptr) {
if (inhibitor_) inhibitor_->acquire();
}
~InhibitGuard() {
if (inhibitor_) inhibitor_->release();
}
InhibitGuard(const InhibitGuard&) = delete;
InhibitGuard& operator=(const InhibitGuard&) = delete;

private:
SuspendInhibitor* inhibitor_;
};

} // namespace

Router::Router(RuntimeConfig* config, ModelManager* model_manager, BackendManager* backend_manager)
: config_(config), model_manager_(model_manager), backend_manager_(backend_manager) {
Expand All @@ -43,6 +65,7 @@ Router::Router(RuntimeConfig* config, ModelManager* model_manager, BackendManage

vram_monitor_ = std::make_unique<GlobalVramMonitor>();
eviction_engine_ = std::make_unique<EvictionEngine>(this, vram_monitor_.get());
suspend_inhibitor_ = create_suspend_inhibitor();

// Always start the monitor/engine threads; they are cheap no-ops until the
// user opts in. The monitor skips the VRAM poll when auto_evict is disabled,
Expand Down Expand Up @@ -864,6 +887,8 @@ auto Router::execute_inference(const json& request, Func&& inference_func) -> de
);
}

InhibitGuard inhibit_guard(suspend_inhibitor_.get(), config_->inhibit_suspend());

try {
auto response = inference_func(server);
const bool watchdog_reset =
Expand Down Expand Up @@ -975,6 +1000,8 @@ void Router::execute_streaming(const std::string& request_body, httplib::DataSin
return;
}

InhibitGuard inhibit_guard(suspend_inhibitor_.get(), config_->inhibit_suspend());

try {
streaming_func(server);
const bool watchdog_reset = server->was_watchdog_triggered();
Expand Down
12 changes: 12 additions & 0 deletions src/cpp/server/runtime_config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -252,6 +252,14 @@ bool RuntimeConfig::auto_evict() const {
return false;
}

bool RuntimeConfig::inhibit_suspend() const {
std::shared_lock lock(mutex_);
if (config_.contains("inhibit_suspend")) {
return config_["inhibit_suspend"].get<bool>();
}
return true;
}

double RuntimeConfig::auto_evict_threshold_pct() const {
std::shared_lock lock(mutex_);
if (config_.contains("auto_evict_threshold_pct")) {
Expand Down Expand Up @@ -557,6 +565,10 @@ void RuntimeConfig::validate(const std::string& key, const json& value) const {
if (!value.is_boolean()) {
throw std::invalid_argument("'auto_evict' must be a boolean");
}
} else if (key == "inhibit_suspend") {
if (!value.is_boolean()) {
throw std::invalid_argument("'inhibit_suspend' must be a boolean");
}
} else if (key == "auto_evict_threshold_pct") {
if (!value.is_number()) {
throw std::invalid_argument("'auto_evict_threshold_pct' must be a number");
Expand Down
Loading
Loading