Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Node Drain/resume event #436

Open
wants to merge 7 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions protos/Plugin.proto
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ package crane.grpc.plugin;
option go_package = "/protos";

import "PublicDefs.proto";
import "google/protobuf/timestamp.proto";

message StartHookRequest {
repeated TaskInfo task_info_list = 1;
Expand Down Expand Up @@ -70,10 +71,28 @@ message DestroyCgroupHookReply {
bool ok = 1;
}

message CranedEventInfo {
google.protobuf.Timestamp start_time = 1;
string node_name = 2;
string cluster_name = 3;
string reason = 4;
CranedControlState state = 5;
uint32 uid = 6;
}

message NodeEventHookRequest {
repeated CranedEventInfo event_info_list = 1;
}

message NodeEventHookReply {
bool ok = 1;
}

service CranePluginD {
/* ----------------------------------- Called from CraneCtld ---------------------------------------------------- */
rpc StartHook(StartHookRequest) returns (StartHookReply);
rpc EndHook(EndHookRequest) returns (EndHookReply);
rpc NodeEventHook(NodeEventHookRequest) returns (NodeEventHookReply);

/* ----------------------------------- Called from Craned ---------------------------------------------------- */
rpc CreateCgroupHook(CreateCgroupHookRequest) returns (CreateCgroupHookReply);
Expand Down
3 changes: 3 additions & 0 deletions src/CraneCtld/CraneCtld.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,9 @@ void ParseConfig(int argc, char** argv) {
try {
YAML::Node config = YAML::LoadFile(config_path);

if (config["ClusterName"])
g_config.CraneClusterName = config["ClusterName"].as<std::string>();

if (config["CraneBaseDir"])
g_config.CraneBaseDir = config["CraneBaseDir"].as<std::string>();
else
Expand Down
43 changes: 43 additions & 0 deletions src/CraneCtld/CranedMetaContainer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@

#include "CranedKeeper.h"
#include "protos/PublicDefs.pb.h"
#include "crane/PluginClient.h"

namespace Ctld {

Expand Down Expand Up @@ -542,6 +543,14 @@ crane::grpc::QueryClusterInfoReply CranedMetaContainer::QueryClusterInfo(
crane::grpc::ModifyCranedStateReply CranedMetaContainer::ChangeNodeState(
const crane::grpc::ModifyCranedStateRequest& request) {
crane::grpc::ModifyCranedStateReply reply;
std::vector<crane::grpc::plugin::CranedEventInfo> event_list;
crane::grpc::plugin::CranedEventInfo event;

if (g_config.Plugin.Enabled) {
event.set_cluster_name(g_config.CraneClusterName);
event.set_uid(request.uid());
event.set_reason(request.reason());
}

for (auto craned_id : request.craned_ids()) {
if (!craned_meta_map_.Contains(craned_id)) {
Expand All @@ -554,11 +563,41 @@ crane::grpc::ModifyCranedStateReply CranedMetaContainer::ChangeNodeState(

if (craned_meta->alive) {
if (request.new_state() == crane::grpc::CranedControlState::CRANE_DRAIN) {
if (craned_meta->drain != true && g_config.Plugin.Enabled) {
//insert event info
event.set_node_name(craned_id);
event.set_state(crane::grpc::CranedControlState::CRANE_DRAIN);
//set now time
absl::Time now = absl::Now();
int64_t seconds = absl::ToUnixSeconds(now);
int32_t nanos = static_cast<int32_t>(absl::ToUnixNanos(now) % 1000000000);

auto timestamp = std::make_unique<::google::protobuf::Timestamp>();
timestamp->set_seconds(seconds);
timestamp->set_nanos(nanos);
event.set_allocated_start_time(timestamp.release());
event_list.emplace_back(event);
}
craned_meta->drain = true;
craned_meta->state_reason = request.reason();
reply.add_modified_nodes(craned_id);
} else if (request.new_state() ==
crane::grpc::CranedControlState::CRANE_NONE) {
if (craned_meta->drain != false && g_config.Plugin.Enabled) {
//insert event info
event.set_node_name(craned_id);
event.set_state(crane::grpc::CranedControlState::CRANE_NONE);
//set now time
absl::Time now = absl::Now();
int64_t seconds = absl::ToUnixSeconds(now);
int32_t nanos = static_cast<int32_t>(absl::ToUnixNanos(now) % 1000000000);

auto timestamp = std::make_unique<::google::protobuf::Timestamp>();
timestamp->set_seconds(seconds);
timestamp->set_nanos(nanos);
event.set_allocated_start_time(timestamp.release());
event_list.emplace_back(event);
}
craned_meta->drain = false;
craned_meta->state_reason.clear();
reply.add_modified_nodes(craned_id);
Expand All @@ -571,6 +610,10 @@ crane::grpc::ModifyCranedStateReply CranedMetaContainer::ChangeNodeState(
reply.add_not_modified_reasons("Can't change the state of a DOWN node!");
}
}

if (g_config.Plugin.Enabled && !event_list.empty()) {
g_plugin_client->NodeEventHookAsync(std::move(event_list));
}
return reply;
}

Expand Down
1 change: 1 addition & 0 deletions src/CraneCtld/CtldPublicDefs.h
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,7 @@ struct Config {

bool CompressedRpc{};

std::string CraneClusterName;
std::string CraneCtldDebugLevel;
std::string CraneCtldLogFile;

Expand Down
27 changes: 27 additions & 0 deletions src/Utilities/PluginClient/PluginClient.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,20 @@ grpc::Status PluginClient::SendDestroyCgroupHook_(
return m_stub_->DestroyCgroupHook(context, *request, &reply);
}

grpc::Status PluginClient::NodeEventHook_(grpc::ClientContext* context,
google::protobuf::Message* msg) {
using crane::grpc::plugin::NodeEventHookReply;
using crane::grpc::plugin::NodeEventHookRequest;

auto* request = dynamic_cast<NodeEventHookRequest*>(msg);
CRANE_ASSERT(request != nullptr);

NodeEventHookReply reply;

CRANE_TRACE("[Plugin] Sending NodeEventHook");
return m_stub_->NodeEventHook(context, *request, &reply);
}

void PluginClient::StartHookAsync(std::vector<crane::grpc::TaskInfo> tasks) {
auto request = std::make_unique<crane::grpc::plugin::StartHookRequest>();
auto* task_list = request->mutable_task_info_list();
Expand Down Expand Up @@ -237,4 +251,17 @@ void PluginClient::DestroyCgroupHookAsync(task_id_t task_id,
m_event_queue_.enqueue(std::move(e));
}

void PluginClient::NodeEventHookAsync(std::vector<crane::grpc::plugin::CranedEventInfo> events) {
auto request = std::make_unique<crane::grpc::plugin::NodeEventHookRequest>();
auto* event_list = request->mutable_event_info_list();
for (auto& event : events) {
auto* event_it = event_list->Add();
event_it->CopyFrom(event);
}

HookEvent e{HookType::INSERT_EVENT,
std::unique_ptr<google::protobuf::Message>(std::move(request))};
m_event_queue_.enqueue(std::move(e));
}

} // namespace plugin
7 changes: 6 additions & 1 deletion src/Utilities/PluginClient/include/crane/PluginClient.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ class PluginClient {
END,
CREATE_CGROUP,
DESTROY_CGROUP,
INSERT_EVENT,
HookTypeCount,
};

Expand All @@ -66,6 +67,7 @@ class PluginClient {
// Launched by Ctld
void StartHookAsync(std::vector<crane::grpc::TaskInfo> tasks);
void EndHookAsync(std::vector<crane::grpc::TaskInfo> tasks);
void NodeEventHookAsync(std::vector<crane::grpc::plugin::CranedEventInfo> events);

// Launched by Craned
void CreateCgroupHookAsync(
Expand All @@ -86,6 +88,8 @@ class PluginClient {
google::protobuf::Message* msg);
grpc::Status SendDestroyCgroupHook_(grpc::ClientContext* context,
google::protobuf::Message* msg);
grpc::Status NodeEventHook_(grpc::ClientContext* context,
google::protobuf::Message* msg);

void AsyncSendThread_();

Expand All @@ -103,7 +107,8 @@ class PluginClient {
s_hook_dispatch_funcs_{{&PluginClient::SendStartHook_,
&PluginClient::SendEndHook_,
&PluginClient::SendCreateCgroupHook_,
&PluginClient::SendDestroyCgroupHook_}};
&PluginClient::SendDestroyCgroupHook_,
&PluginClient::NodeEventHook_}};
};

} // namespace plugin
Expand Down