Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ AC_CHECK_LIB(aio, io_setup, , missing="yes")
AC_CHECK_LIB(qb, qb_ipcs_connection_auth_set, , missing="yes")
AC_CHECK_LIB(cib, cib_new, , missing="yes")
AC_CHECK_LIB(crmcommon, set_crm_log_level, , missing="yes")
AC_CHECK_LIB(quorum, quorum_initialize, , missing="yes")
AC_CHECK_LIB(pe_status, pe_find_node, , missing="yes")
AC_CHECK_LIB(pe_rules, test_rule, , missing="yes")
AC_CHECK_LIB(crmcluster, crm_peer_init, , missing="yes")
Expand Down
54 changes: 53 additions & 1 deletion src/sbd-cluster.c
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,10 @@
#include <crm/cluster.h>
#include <crm/common/mainloop.h>

#if SUPPORT_COROSYNC
#include <corosync/quorum.h>
#endif

#include "sbd.h"

//undef SUPPORT_PLUGIN
Expand Down Expand Up @@ -85,9 +89,18 @@ sbd_cpg_membership_dispatch(cpg_handle_t handle,
}
#endif

#if SUPPORT_COROSYNC
static quorum_handle_t q_handle;
static uint32_t q_type;
#endif

static gboolean
notify_timer_cb(gpointer data)
{
int is_quorate;
int err;
static int ever_had_quorum = FALSE;

cl_log(LOG_DEBUG, "Refreshing %sstate", remote_node?"remote ":"");

if(remote_node) {
Expand All @@ -102,7 +115,20 @@ notify_timer_cb(gpointer data)

case pcmk_cluster_corosync:
case pcmk_cluster_cman:
/* TODO - Make a CPG call and only call notify_parent() when we get a reply */
#if SUPPORT_COROSYNC
/* Report healthy if we're quorate or we've never seen quorum */
err = quorum_getquorate(q_handle, &is_quorate);
if (err != CS_OK) {
set_servant_health(pcmk_health_transient, LOG_INFO, "Unable to dispatch quorum status: %d", err);
} else if (is_quorate) {
set_servant_health(pcmk_health_online, LOG_INFO, "Node state: online");
ever_had_quorum = TRUE;
} else if (ever_had_quorum) {
set_servant_health(pcmk_health_noquorum, LOG_WARNING, "Quorum lost");
} else {
set_servant_health(pcmk_health_online, LOG_INFO, "We do not have quorum yet");
}
#endif
notify_parent();
break;

Expand All @@ -117,6 +143,7 @@ static void
sbd_membership_connect(void)
{
bool connected = false;
int err;

cl_log(LOG_NOTICE, "Attempting cluster connection");

Expand All @@ -128,6 +155,7 @@ sbd_membership_connect(void)

#if SUPPORT_COROSYNC
cluster.cpg.cpg_confchg_fn = sbd_cpg_membership_dispatch;
q_handle = 0;
#endif

while(connected == false) {
Expand All @@ -146,6 +174,18 @@ sbd_membership_connect(void)
if(crm_cluster_connect(&cluster)) {
connected = true;
}

#if SUPPORT_COROSYNC
/* Connect to quorum service so we can use q_handle */
cl_log(LOG_INFO, "Attempting quorum connection");
err = quorum_initialize(&q_handle, NULL, &q_type);
if (err != CS_OK) {
cl_log(LOG_ERR, "Cannot initialize QUORUM service: %d\n", err);
q_handle = 0;
crm_cluster_disconnect(&cluster);
connected = false;
}
#endif
}

if(connected == false) {
Expand All @@ -163,11 +203,23 @@ sbd_membership_connect(void)
static void
sbd_membership_destroy(gpointer user_data)
{
int err;

cl_log(LOG_WARNING, "Lost connection to %s", name_for_cluster_type(get_cluster_type()));

set_servant_health(pcmk_health_unclean, LOG_ERR, "Cluster connection terminated");
notify_parent();

#if SUPPORT_COROSYNC
/* Best effort attempt to disconnect from quorum service */
cl_log(LOG_INFO, "Attempting quorum disconnection");
err = quorum_finalize(q_handle);
if (err != CS_OK) {
cl_log(LOG_ERR, "Cannot finalize QUORUM service: %d\n", err);
q_handle = 0;
}
#endif

/* Attempt to reconnect, the watchdog will take the node down if the problem isn't transient */
sbd_membership_connect();
}
Expand Down