Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

TLS/SSL Security Framework #349

Open
wants to merge 24 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion dependencies/cmake/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ if(CRANE_USE_MIMALLOC)
add_subdirectory(mimalloc)
endif()
add_subdirectory(BSThreadPool)
add_subdirectory(nlohmann_json)
add_subdirectory(jwt-cpp)
add_subdirectory(yaml-cpp)
add_subdirectory(fmt)
add_subdirectory(googletest)
Expand All @@ -29,6 +29,7 @@ add_subdirectory(ranges-v3)
add_subdirectory(backward-cpp)
add_subdirectory(fpm)


#add_subdirectory(mariadb-connector-c)

include(${CMAKE_SOURCE_DIR}/CMakeModule/SuppressHeaderWarning.cmake)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ else ()
set(JSON_SRC_URL "https://github.com/nlohmann/json/archive/refs/tags/v3.11.2.tar.gz")
endif ()

set(JWT_CPP_SRC_URL "https://github.com/Thalhammer/jwt-cpp/archive/refs/tags/v0.7.0.tar.gz")

FetchContent_Declare(json
URL ${JSON_SRC_URL}
URL_HASH SHA256=d69f9deb6a75e2580465c6c4c5111b89c4dc2fa94e3a85fcd2ffcd9a143d9273
Expand All @@ -16,4 +18,15 @@ FetchContent_GetProperties(json)
if (NOT json_POPULATED)
FetchContent_Populate(json)
add_subdirectory(${json_SOURCE_DIR} ${json_BINARY_DIR} EXCLUDE_FROM_ALL)
endif ()
endif ()

set(nlohmann_json_DIR "${json_BINARY_DIR}")

fetchcontent_declare(jwt-cpp
URL ${JWT_CPP_SRC_URL}
URL_HASH SHA256=b9eb270e3ba8221e4b2bc38723c9a1cb4fa6c241a42908b9a334daff31137406
INACTIVITY_TIMEOUT 5
)
Copy link
Collaborator

@L-Xiafeng L-Xiafeng Nov 6, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

这个需要cache吗?
Crane动态链接需不需要修改参数?需要确认一下

set(JWT_BUILD_EXAMPLES OFF CACHE BOOL "disable building examples" FORCE)

fetchcontent_makeavailable(jwt-cpp)
24 changes: 19 additions & 5 deletions etc/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,17 +11,31 @@ DbConfigPath: /etc/crane/database.yaml
CraneBaseDir: /var/crane/

# Tls settings
UseTls: false
ServerCertFilePath: /etc/crane/server.crt
ServerKeyFilePath: /etc/crane/server.key
CaCertFilePath: /etc/crane/ca.crt
DomainSuffix: riley.local
UseTls: true
SSL:
CranectldExternalCertFilePath: /etc/crane/cranectld_external.pem
CranectldExternalKeyFilePath: /etc/crane/cranectld_external.key
ExternalCaFilePath: /etc/crane/external_ca.pem

CranectldInternalCertFilePath: /etc/crane/cranectld_internal.pem
CranectldInternalKeyFilePath: /etc/crane/cranectld_internal.key
CranedCertFilePath: /etc/crane/craned.pem
CranedKeyFilePath: /etc/crane/craned.key
CforedCertFilePath: /etc/crane/cfored.pem
CforedKeyFilePath: /etc/crane/cfored.key
InternalCaFilePath: /etc/crane/internal_ca.pem

DomainSuffix: crane.com

JwtCertFilePath: /etc/crane/jwt.pem
Nativu5 marked this conversation as resolved.
Show resolved Hide resolved

# Ctld settings
# the listening address of control machine
CraneCtldListenAddr: 0.0.0.0
# the port of control machine to listen
CraneCtldListenPort: 10011
CraneCtldForCranedListenPort: 10013
CraneCtldForCforedListenPort: 10014
# debug level of cranectld
CraneCtldDebugLevel: trace
# file path of cranectld log file (relative to CraneBaseDir)
Expand Down
33 changes: 26 additions & 7 deletions protos/Crane.proto
Original file line number Diff line number Diff line change
Expand Up @@ -744,17 +744,21 @@ message StreamCforedTaskIOReply {
}
}

message LoginRequest {
uint32 uid = 1;
string password = 2;
}

message LoginReply {
bool ok = 1;
string token = 2;
ErrCode reason = 3;
}

// Todo: Divide service into two parts: one for Craned and one for Crun
// We need to distinguish the message sender
// and have some kind of authentication
service CraneCtld {
/* RPCs called from Craned */
rpc TaskStatusChange(TaskStatusChangeRequest) returns (TaskStatusChangeReply);
rpc CranedRegister(CranedRegisterRequest) returns (CranedRegisterReply);

/* RPCs called from Cfored */
rpc CforedStream(stream StreamCforedRequest) returns(stream StreamCtldReply);

/* RPCs called from ccancel */
rpc CancelTask(CancelTaskRequest) returns (CancelTaskReply);

Expand Down Expand Up @@ -790,10 +794,25 @@ service CraneCtld {
/* RPCs called from cinfo */
rpc QueryClusterInfo(QueryClusterInfoRequest) returns (QueryClusterInfoReply);

/* RPCs called from clogin */
rpc Login(LoginRequest) returns (LoginReply);

/* common RPCs */
rpc QueryTasksInfo(QueryTasksInfoRequest) returns (QueryTasksInfoReply);
}

service CraneCtldForCraned {
/* RPCs called from Craned */
rpc TaskStatusChange(TaskStatusChangeRequest) returns (TaskStatusChangeReply);
rpc CranedRegister(CranedRegisterRequest) returns (CranedRegisterReply);
}

service CraneCtldForCfored {
/* RPCs called from Cfored */
rpc CforedStream(stream StreamCforedRequest) returns(stream StreamCtldReply);
}


service Craned {
/* ----------------------------------- Called from CraneCtld ---------------------------------------------------- */
rpc ExecuteTask(ExecuteTasksRequest) returns(ExecuteTasksReply);
Expand Down
103 changes: 54 additions & 49 deletions protos/PublicDefs.proto
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ message TaskToCtld {
string extra_attr = 23;

string cmd_line = 31;
string cwd = 32; // Current working directory
string cwd = 32; // Current working directory
map<string, string> env = 33;

string excludes = 34;
Expand Down Expand Up @@ -182,7 +182,7 @@ message TaskToD {
// If this task is PENDING, start_time is either not set (default constructed)
// or an estimated start time.
// If this task is RUNNING, start_time is the actual starting time.
google.protobuf.Timestamp start_time = 5; // Currently Only used in CraneCtld
google.protobuf.Timestamp start_time = 5; // Currently Only used in CraneCtld
google.protobuf.Duration time_limit = 6;

string partition = 8;
Expand Down Expand Up @@ -263,10 +263,11 @@ message TaskInfo {
string craned_list = 36;
}

// The time of different nodes across the whole cluster might not always be synchronized.
// If the time on the front end node is more than several seconds ahead of the CraneCtld node,
// a negative elapsed time might occur.
// To avoid this, the elapsed time of a task is calculated on the CraneCtld side.
// The time of different nodes across the whole cluster might not always be
// synchronized. If the time on the front end node is more than several
// seconds ahead of the CraneCtld node, a negative elapsed time might occur.
// To avoid this, the elapsed time of a task is calculated on the CraneCtld
// side.
google.protobuf.Duration elapsed_time = 37;
repeated string execution_node = 38;
}
Expand Down Expand Up @@ -328,36 +329,37 @@ enum ErrCode {
ERR_INVALID_ADMIN_LEVEL = 10007;
ERR_USER_ACCOUNT_MISMATCH = 10008;
ERR_NO_ACCOUNT_SPECIFIED = 10009;

ERR_INVALID_ACCOUNT = 10010;
ERR_DUPLICATE_ACCOUNT = 10011;
ERR_INVALID_PARENTACCOUNT = 10012;
ERR_DELETE_ACCOUNT = 10013;

ERR_INVALID_PARTITION = 10014;
ERR_ALLOWED_PARTITION = 10015;
ERR_DUPLICATE_PARTITION = 10016;
ERR_PARENT_ALLOWED_PARTITION = 10017;
ERR_USER_EMPTY_PARTITION = 10018;
ERR_CHILD_HAS_PARTITION = 10019;

ERR_INVALID_QOS = 10020;
ERR_DB_DUPLICATE_QOS = 10021;
ERR_DELETE_QOS = 10022;
ERR_CONVERT_TO_INTERGER = 10023;
ERR_TIME_LIMIT = 10024;
ERR_ALLOWED_QOS = 10025;
ERR_DUPLICATE_QOS = 10026;
ERR_PARENT_ALLOWED_QOS = 10027;
ERR_SET_ALLOWED_QOS = 10028;
ERR_ALLOWED_DEFAULT_QOS = 10029;
ERR_DUPLICATE_DEFAULT_QOS = 10030;
ERR_CHILD_HAS_DEFAULT_QOS = 10031;
ERR_SET_ACCOUNT_QOS = 10032;
ERR_SET_DEFAULT_QOS = 10033;
ERR_IS_DEFAULT_QOS = 10034;

ERR_UPDATE_DATABASE = 10035;
ERR_PASSWORD_MISMATCH = 100010;

ERR_INVALID_ACCOUNT = 10011;
ERR_DUPLICATE_ACCOUNT = 10012;
ERR_INVALID_PARENTACCOUNT = 10013;
ERR_DELETE_ACCOUNT = 10014;

ERR_INVALID_PARTITION = 10015;
ERR_ALLOWED_PARTITION = 10016;
ERR_DUPLICATE_PARTITION = 10017;
ERR_PARENT_ALLOWED_PARTITION = 10018;
ERR_USER_EMPTY_PARTITION = 10019;
ERR_CHILD_HAS_PARTITION = 10020;

ERR_INVALID_QOS = 10021;
ERR_DB_DUPLICATE_QOS = 10022;
ERR_DELETE_QOS = 10023;
ERR_CONVERT_TO_INTERGER = 10024;
ERR_TIME_LIMIT = 10025;
ERR_ALLOWED_QOS = 10026;
ERR_DUPLICATE_QOS = 10027;
ERR_PARENT_ALLOWED_QOS = 10028;
ERR_SET_ALLOWED_QOS = 10029;
ERR_ALLOWED_DEFAULT_QOS = 10030;
ERR_DUPLICATE_DEFAULT_QOS = 10031;
ERR_CHILD_HAS_DEFAULT_QOS = 10032;
ERR_SET_ACCOUNT_QOS = 10033;
ERR_SET_DEFAULT_QOS = 10034;
ERR_IS_DEFAULT_QOS = 10035;

ERR_UPDATE_DATABASE = 10036;

ERR_GENERIC_FAILURE = 10100;
ERR_NO_RESOURCE = 10101;
Expand Down Expand Up @@ -420,15 +422,17 @@ message AccountInfo {
bool blocked = 10;
}

// Note: UserInfo DIFFERS from the `User` struct in C++ code and database representation
// and is ONLY used for communication between CraneCtld and cacctmgr command.
// If an user belongs to multiple accounts, There will be multiple `UserInfo`
// messages with `account` pointing to each account.
// Note: UserInfo DIFFERS from the `User` struct in C++ code and database
// representation
// and is ONLY used for communication between CraneCtld and cacctmgr
// command. If an user belongs to multiple accounts, There will be
// multiple `UserInfo` messages with `account` pointing to each account.
// For example, if a user (uid=1) belongs to accounts `1,2,3`,
// there will be three `UserInfo` messages: (uid=1, account=1), (uid=1, account=2),
// (uid=1, account=3).
// The c++ code and database representation use a Map<account name, AttrsInAccount> to contain
// in ONE UserInfo message all the information belonging to different accounts.
// there will be three `UserInfo` messages: (uid=1, account=1), (uid=1,
// account=2), (uid=1, account=3). The c++ code and database
// representation use a Map<account name, AttrsInAccount> to contain in
// ONE UserInfo message all the information belonging to different
// accounts.
message UserInfo {
enum AdminLevel {
None = 0;
Expand All @@ -444,11 +448,12 @@ message UserInfo {

uint32 uid = 1;
string name = 2;
string account = 3;
bool blocked = 4;
repeated AllowedPartitionQos allowed_partition_qos_list = 5;
repeated string coordinator_accounts = 6;
AdminLevel admin_level = 7;
string password = 3;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🤔 明文?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

有SSL的话,前端加密应该没必要了吧

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

SSL不一定强制启用 可能还是考虑Hash 存储再加一个Salt

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@RileyWen @Nativu5 @L-Xiafeng 密码这块感觉还得对一下 今天马老师说集群已经认证过账号密码了,所以鹤思本身不需要密码,直接clogin获取token即可。现在的实现是clogin通过uid和密码登录(密码可为空),所以目前的安全框架是否需要对鹤思系统加入密码,保留现在的实现?还是说现在不需要密码,只依赖集群认证登录?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

这个应该还是需要的:

  1. 之前的规划中有提到前端在客户主机而不只是在登录节点上,那么自然需要密码;
  2. 从 JWT 的原理来讲,如果调用 clogin 传递 UID 就能使服务端签发 JWT,那么冒充者可以直接仿造一个 clogin 请求,传递一个受害者的 UID 来拿到 JWT。此时 JWT 防冒充的作用就完全失效了。

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

确实是的

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@RileyWen @Nativu5 @L-Xiafeng 马老师认为鹤思不能有输入密码这个步骤,但要保证获取token是安全的。感觉有点麻烦,目前我只能想到AddUser时传回密码保存文件用于登录自动读取,但文件权限保证不了,感觉不太行,或者手动在登录节点配置类似于公私钥这种,你们有没有什么更好的想法

string account = 4;
bool blocked = 5;
repeated AllowedPartitionQos allowed_partition_qos_list = 6;
repeated string coordinator_accounts = 7;
AdminLevel admin_level = 8;
}

message QosInfo {
Expand Down
19 changes: 19 additions & 0 deletions src/CraneCtld/AccountManager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,25 @@ namespace Ctld {

AccountManager::AccountManager() { InitDataMap_(); }

AccountManager::CraneExpected<std::string> AccountManager::Login(
uint32_t uid, const std::string& password) {
util::read_lock_guard user_guard(m_rw_user_mutex_);

auto user_result = GetUserInfoByUidNoLock_(uid);
if (!user_result) return std::unexpected(user_result.error());
const User* user = user_result.value();

if (password != user->password) {
return std::unexpected(CraneErrCode::ERR_PASSWORD_MISMATCH);
}
std::unordered_map<std::string, std::string> claims{
{"UID", std::to_string(uid)}};
const std::string& token =
util::GenerateToken(g_config.ListenConf.JwtSecretContent, claims);

return token;
}

AccountManager::CraneExpected<void> AccountManager::AddUser(
uint32_t uid, const User& new_user) {
CraneExpected<void> result;
Expand Down
2 changes: 2 additions & 0 deletions src/CraneCtld/AccountManager.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@ class AccountManager {

~AccountManager() = default;

CraneExpected<std::string> Login(uint32_t uid, const std::string& password);

CraneExpected<void> AddUser(uint32_t uid, const User& new_user);

CraneExpected<void> AddAccount(uint32_t uid, const Account& new_account);
Expand Down
8 changes: 4 additions & 4 deletions src/CraneCtld/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,13 +1,12 @@

add_subdirectory(RpcService)

add_executable(cranectld
CtldPublicDefs.h
CtldGrpcServer.h
CtldGrpcServer.cpp
DbClient.h
DbClient.cpp
TaskScheduler.h
TaskScheduler.cpp
CranedKeeper.h
CranedKeeper.cpp
CranedMetaContainer.h
CranedMetaContainer.cpp
AccountManager.h
Expand All @@ -26,6 +25,7 @@ target_link_libraries(cranectld PRIVATE
spdlog::spdlog
concurrentqueue

RpcService
Utility_PublicHeader
Utility_PluginClient

Expand Down
Loading
Loading