From cb0f29d437ded2557d8ae35970fdadf9da7392c1 Mon Sep 17 00:00:00 2001 From: David Vossel Date: Fri, 6 Mar 2015 13:19:43 -0500 Subject: [PATCH 01/22] Fix: systemd: fix crash caused when canceling in-flight operation --- lib/services/dbus.c | 8 ++++++-- lib/services/pcmk-dbus.h | 3 ++- lib/services/systemd.c | 21 +++++++++++++++++---- lib/services/upstart.c | 14 ++++++++++++-- 4 files changed, 37 insertions(+), 9 deletions(-) diff --git a/lib/services/dbus.c b/lib/services/dbus.c index f44b59056f0..637f7499867 100644 --- a/lib/services/dbus.c +++ b/lib/services/dbus.c @@ -325,7 +325,7 @@ pcmk_dbus_lookup_cb(DBusPendingCall *pending, void *user_data) char * pcmk_dbus_get_property( DBusConnection *connection, const char *target, const char *obj, const gchar * iface, const char *name, - void (*callback)(const char *name, const char *value, void *userdata), void *userdata) + void (*callback)(const char *name, const char *value, void *userdata), void *userdata, DBusPendingCall **pending) { DBusMessage *msg; const char *method = "GetAll"; @@ -365,7 +365,11 @@ pcmk_dbus_get_property( } if(query_data->callback) { - pcmk_dbus_send(msg, connection, pcmk_dbus_lookup_cb, query_data); + DBusPendingCall* _pending; + _pending = pcmk_dbus_send(msg, connection, pcmk_dbus_lookup_cb, query_data); + if (pending != NULL) { + *pending = _pending; + } } else { DBusMessage *reply = pcmk_dbus_send_recv(msg, connection, NULL); diff --git a/lib/services/pcmk-dbus.h b/lib/services/pcmk-dbus.h index 468020eeab9..63910f621ea 100644 --- a/lib/services/pcmk-dbus.h +++ b/lib/services/pcmk-dbus.h @@ -8,7 +8,8 @@ DBusMessage *pcmk_dbus_send_recv(DBusMessage *msg, DBusConnection *connection, D bool pcmk_dbus_type_check(DBusMessage *msg, DBusMessageIter *field, int expected, const char *function, int line); char *pcmk_dbus_get_property( DBusConnection *connection, const char *target, const char *obj, const gchar * iface, const char *name, - void (*callback)(const char *name, const char *value, void *userdata), void *userdata); + void (*callback)(const char *name, const char *value, void *userdata), void *userdata, + DBusPendingCall **pending); bool pcmk_dbus_find_error(const char *method, DBusPendingCall* pending, DBusMessage *reply, DBusError *error); diff --git a/lib/services/systemd.c b/lib/services/systemd.c index c0a1721b967..10c605aae28 100644 --- a/lib/services/systemd.c +++ b/lib/services/systemd.c @@ -363,7 +363,7 @@ systemd_unit_metadata(const char *name) if (path) { /* TODO: Worth a making blocking call for? Probably not. Possibly if cached. */ - desc = pcmk_dbus_get_property(systemd_proxy, BUS_NAME, path, BUS_NAME ".Unit", "Description", NULL, NULL); + desc = pcmk_dbus_get_property(systemd_proxy, BUS_NAME, path, BUS_NAME ".Unit", "Description", NULL, NULL, NULL); } else { desc = g_strdup_printf("Systemd unit file for %s", name); } @@ -499,6 +499,9 @@ systemd_unit_check(const char *name, const char *state, void *userdata) } if (op->synchronous == FALSE) { + if (op->opaque->pending) { + dbus_pending_call_unref(op->opaque->pending); + } op->opaque->pending = NULL; operation_finalize(op); } @@ -521,14 +524,24 @@ systemd_unit_exec_with_unit(svc_action_t * op, const char *unit) } if (safe_str_eq(op->action, "monitor") || safe_str_eq(method, "status")) { - char *state = pcmk_dbus_get_property(systemd_proxy, BUS_NAME, unit, BUS_NAME ".Unit", "ActiveState", - op->synchronous?NULL:systemd_unit_check, op); + DBusPendingCall *pending = NULL; + char *state; + + state = pcmk_dbus_get_property(systemd_proxy, BUS_NAME, unit, + BUS_NAME ".Unit", "ActiveState", + op->synchronous?NULL:systemd_unit_check, + op, op->synchronous?NULL:&pending); if (op->synchronous) { systemd_unit_check("ActiveState", state, op); free(state); return op->rc == PCMK_OCF_OK; + } else if (pending) { + dbus_pending_call_ref(pending); + op->opaque->pending = pending; + return TRUE; } - return TRUE; + + return FALSE; } else if (g_strcmp0(method, "start") == 0) { FILE *file_strm = NULL; diff --git a/lib/services/upstart.c b/lib/services/upstart.c index 01ff817a6b0..98944307273 100644 --- a/lib/services/upstart.c +++ b/lib/services/upstart.c @@ -322,6 +322,10 @@ upstart_job_check(const char *name, const char *state, void *userdata) } if (op->synchronous == FALSE) { + if (op->opaque->pending) { + dbus_pending_call_unref(op->opaque->pending); + } + op->opaque->pending = NULL; operation_finalize(op); } } @@ -465,9 +469,11 @@ upstart_job_exec(svc_action_t * op, gboolean synchronous) op->rc = PCMK_OCF_NOT_RUNNING; if(path) { + DBusPendingCall *pending = NULL; char *state = pcmk_dbus_get_property( upstart_proxy, BUS_NAME, path, UPSTART_06_API ".Instance", "state", - op->synchronous?NULL:upstart_job_check, op); + op->synchronous?NULL:upstart_job_check, op, + op->synchronous?NULL:&pending); free(job); free(path); @@ -476,8 +482,12 @@ upstart_job_exec(svc_action_t * op, gboolean synchronous) upstart_job_check("state", state, op); free(state); return op->rc == PCMK_OCF_OK; + } else if (pending) { + dbus_pending_call_ref(pending); + op->opaque->pending = pending; + return TRUE; } - return TRUE; + return FALSE; } goto cleanup; From 53d7d54d5a33857c3331f0dbc44eadf6d092c90c Mon Sep 17 00:00:00 2001 From: "Gao,Yan" Date: Tue, 10 Mar 2015 16:02:33 +0100 Subject: [PATCH 02/22] Fix: crmd: Reset stonith failcount to recover transitioner when the node rejoins CRMd transitioner could not recover from "Too many failures to fence". Steps to produce: 1. Two-node cluster with stonith, for example using IPMI. 2. Node-1 has a complete power outage for a couple of minutes. The IPMI device is also without power, which causes the fencing to fail 3. Node-2 tries to fence node-1 for several times but fails. 4. Node-2 reports "Too many failures to fence node-1 (11), giving up". 5. The power returns and node-1 boots up normally. 6. Node-1 rejoins the cluster, but resources are not started on it. Expected result: The stonith failcount for node-1 should be reset and resources should be started on node-1. Actual result: Node-2 still logs "Too many failures to fence" and resources are not started on node-1. --- crmd/callbacks.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/crmd/callbacks.c b/crmd/callbacks.c index eae222324b3..cb1134e9be8 100644 --- a/crmd/callbacks.c +++ b/crmd/callbacks.c @@ -204,6 +204,9 @@ peer_update_callback(enum crm_status_type type, crm_node_t * node, const void *d if (alive && safe_str_eq(task, CRM_OP_FENCE)) { crm_info("Node return implies stonith of %s (action %d) completed", node->uname, down->id); + + st_fail_count_reset(node->uname); + erase_status_tag(node->uname, XML_CIB_TAG_LRM, cib_scope_local); erase_status_tag(node->uname, XML_TAG_TRANSIENT_NODEATTRS, cib_scope_local); /* down->confirmed = TRUE; Only stonith-ng returning should imply completion */ From ff7bf79feec3c43045663fc3564507a3f3d7b3c4 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Wed, 11 Mar 2015 12:14:07 -0400 Subject: [PATCH 03/22] Fix: lrmd, services: interpret LSB status codes properly LSB status exit code 4 is now treated as "status unknown" (increase fail count) rather than "not installed" (fail over immediately). Also, custom LSB status exit codes are used internally when the LSB agent could not be executed, so that "not installed" and "insufficient privileges" can be handled as fatal. --- include/crm/services.h | 18 +++++++-- lib/services/services_linux.c | 72 +++++++++++++++++++---------------- lrmd/lrmd.c | 4 +- 3 files changed, 55 insertions(+), 39 deletions(-) diff --git a/include/crm/services.h b/include/crm/services.h index fd4697ec273..71a630e4858 100644 --- a/include/crm/services.h +++ b/include/crm/services.h @@ -78,7 +78,11 @@ enum lsb_status_exitcode { PCMK_LSB_STATUS_VAR_PID = 1, PCMK_LSB_STATUS_VAR_LOCK = 2, PCMK_LSB_STATUS_NOT_RUNNING = 3, - PCMK_LSB_STATUS_NOT_INSTALLED = 4, + PCMK_LSB_STATUS_UNKNOWN = 4, + + /* custom codes should be in the 150-199 range reserved for application use */ + PCMK_LSB_STATUS_NOT_INSTALLED = 150, + PCMK_LSB_STATUS_INSUFFICIENT_PRIV = 151, }; /* Uniform exit codes @@ -302,7 +306,10 @@ enum nagios_exitcode { case PCMK_LRM_OP_ERROR:return "Error"; case PCMK_LRM_OP_NOT_INSTALLED:return "Not installed"; default:return "UNKNOWN!"; - }} static inline const char *services_ocf_exitcode_str(enum ocf_exitcode code) { + } + } + + static inline const char *services_ocf_exitcode_str(enum ocf_exitcode code) { switch (code) { case PCMK_OCF_OK: return "ok"; @@ -342,7 +349,7 @@ enum nagios_exitcode { } static inline enum ocf_exitcode - services_get_ocf_exitcode(char *action, int lsb_exitcode) { + services_get_ocf_exitcode(char *action, int lsb_exitcode) { if (action != NULL && strcmp("status", action) == 0) { switch (lsb_exitcode) { case PCMK_LSB_STATUS_OK: @@ -354,7 +361,10 @@ enum nagios_exitcode { case PCMK_LSB_STATUS_NOT_RUNNING: return PCMK_OCF_NOT_RUNNING; case PCMK_LSB_STATUS_NOT_INSTALLED: - return PCMK_OCF_UNKNOWN_ERROR; + return PCMK_OCF_NOT_INSTALLED; + case PCMK_LSB_STATUS_INSUFFICIENT_PRIV: + return PCMK_OCF_INSUFFICIENT_PRIV; + case PCMK_LSB_STATUS_UNKNOWN: default: return PCMK_OCF_UNKNOWN_ERROR; } diff --git a/lib/services/services_linux.c b/lib/services/services_linux.c index b1244e934d9..dcdb7ec0fa2 100644 --- a/lib/services/services_linux.c +++ b/lib/services/services_linux.c @@ -332,50 +332,56 @@ operation_finished(mainloop_child_t * p, pid_t pid, int core, int signo, int exi operation_finalize(op); } +/*! + * \internal + * \brief Set operation rc and status per errno from stat(), fork() or execvp() + * + * \param[in,out] op Operation to set rc and status for + * \param[in] error Value of errno after system call + * + * \return void + */ static void services_handle_exec_error(svc_action_t * op, int error) { - op->rc = PCMK_OCF_EXEC_ERROR; - op->status = PCMK_LRM_OP_ERROR; + int rc_not_installed, rc_insufficient_priv, rc_exec_error; - /* Need to mimic the return codes for each standard as thats what we'll convert back from in get_uniform_rc() */ + /* Mimic the return codes for each standard as that's what we'll convert back from in get_uniform_rc() */ if (safe_str_eq(op->standard, "lsb") && safe_str_eq(op->action, "status")) { - switch (error) { /* see execve(2) */ - case ENOENT: /* No such file or directory */ - case EISDIR: /* Is a directory */ - op->rc = PCMK_LSB_STATUS_NOT_INSTALLED; - op->status = PCMK_LRM_OP_NOT_INSTALLED; - break; - case EACCES: /* permission denied (various errors) */ - /* LSB status ops don't support 'not installed' */ - break; - } + rc_not_installed = PCMK_LSB_STATUS_NOT_INSTALLED; + rc_insufficient_priv = PCMK_LSB_STATUS_INSUFFICIENT_PRIV; + rc_exec_error = PCMK_LSB_STATUS_UNKNOWN; #if SUPPORT_NAGIOS } else if (safe_str_eq(op->standard, "nagios")) { - switch (error) { - case ENOENT: /* No such file or directory */ - case EISDIR: /* Is a directory */ - op->rc = NAGIOS_NOT_INSTALLED; - op->status = PCMK_LRM_OP_NOT_INSTALLED; - break; - case EACCES: /* permission denied (various errors) */ - op->rc = NAGIOS_INSUFFICIENT_PRIV; - break; - } + rc_not_installed = NAGIOS_NOT_INSTALLED; + rc_insufficient_priv = NAGIOS_INSUFFICIENT_PRIV; + rc_exec_error = PCMK_OCF_EXEC_ERROR; #endif } else { - switch (error) { - case ENOENT: /* No such file or directory */ - case EISDIR: /* Is a directory */ - op->rc = PCMK_OCF_NOT_INSTALLED; /* Valid for LSB */ - op->status = PCMK_LRM_OP_NOT_INSTALLED; - break; - case EACCES: /* permission denied (various errors) */ - op->rc = PCMK_OCF_INSUFFICIENT_PRIV; /* Valid for LSB */ - break; - } + rc_not_installed = PCMK_OCF_NOT_INSTALLED; + rc_insufficient_priv = PCMK_OCF_INSUFFICIENT_PRIV; + rc_exec_error = PCMK_OCF_EXEC_ERROR; + } + + switch (error) { /* see execve(2), stat(2) and fork(2) */ + case ENOENT: /* No such file or directory */ + case EISDIR: /* Is a directory */ + case ENOTDIR: /* Path component is not a directory */ + case EINVAL: /* Invalid executable format */ + case ENOEXEC: /* Invalid executable format */ + op->rc = rc_not_installed; + op->status = PCMK_LRM_OP_NOT_INSTALLED; + break; + case EACCES: /* permission denied (various errors) */ + case EPERM: /* permission denied (various errors) */ + op->rc = rc_insufficient_priv; + op->status = PCMK_LRM_OP_ERROR; + break; + default: + op->rc = rc_exec_error; + op->status = PCMK_LRM_OP_ERROR; } } diff --git a/lrmd/lrmd.c b/lrmd/lrmd.c index f40710460f1..b74ece407e8 100644 --- a/lrmd/lrmd.c +++ b/lrmd/lrmd.c @@ -576,12 +576,12 @@ lsb2uniform_rc(const char *action, int rc) return PCMK_OCF_OK; case PCMK_LSB_STATUS_NOT_INSTALLED: return PCMK_OCF_NOT_INSTALLED; + case PCMK_LSB_STATUS_INSUFFICIENT_PRIV: + return PCMK_OCF_INSUFFICIENT_PRIV; case PCMK_LSB_STATUS_VAR_PID: case PCMK_LSB_STATUS_VAR_LOCK: case PCMK_LSB_STATUS_NOT_RUNNING: return PCMK_OCF_NOT_RUNNING; - default: - return PCMK_OCF_UNKNOWN_ERROR; } return PCMK_OCF_UNKNOWN_ERROR; From a7a09a27e8a4511f0bc3174ce2e5270c60c44f49 Mon Sep 17 00:00:00 2001 From: David Vossel Date: Fri, 13 Mar 2015 18:18:58 -0400 Subject: [PATCH 04/22] Feature: remote: pcmk remote client tool for use with container wrapper script --- lrmd/Makefile.am | 11 +- lrmd/remote_ctl.c | 350 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 360 insertions(+), 1 deletion(-) create mode 100644 lrmd/remote_ctl.c diff --git a/lrmd/Makefile.am b/lrmd/Makefile.am index 67f6329bd56..9addd1bd56c 100644 --- a/lrmd/Makefile.am +++ b/lrmd/Makefile.am @@ -21,7 +21,7 @@ testdir = $(datadir)/$(PACKAGE)/tests/lrmd test_SCRIPTS = regression.py lrmdlibdir = $(CRM_DAEMON_DIR) -lrmdlib_PROGRAMS = lrmd lrmd_test +lrmdlib_PROGRAMS = lrmd lrmd_test pacemaker_remote_ctl initdir = $(INITDIR) init_SCRIPTS = pacemaker_remote @@ -43,6 +43,15 @@ pacemaker_remoted_CFLAGS = -DSUPPORT_REMOTE pacemaker_remoted_LDADD = $(lrmd_LDADD) +pacemaker_remote_ctl_SOURCES = remote_ctl.c +pacemaker_remote_ctl_LDADD = $(top_builddir)/lib/common/libcrmcommon.la \ + $(top_builddir)/lib/lrmd/liblrmd.la \ + $(top_builddir)/lib/cib/libcib.la \ + $(top_builddir)/lib/services/libcrmservice.la \ + $(top_builddir)/lib/pengine/libpe_status.la \ + $(top_builddir)/pengine/libpengine.la + + lrmd_test_SOURCES = test.c lrmd_test_LDADD = $(top_builddir)/lib/common/libcrmcommon.la \ $(top_builddir)/lib/lrmd/liblrmd.la \ diff --git a/lrmd/remote_ctl.c b/lrmd/remote_ctl.c new file mode 100644 index 00000000000..d2ab9eb4eb1 --- /dev/null +++ b/lrmd/remote_ctl.c @@ -0,0 +1,350 @@ +/* + * Copyright (c) 2015 David Vossel + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include + +#include +#include + +#include +#include +#include + +#include +#include +#include + +/* *INDENT-OFF* */ +static struct crm_option long_options[] = { + {"help", 0, 0, '?'}, + {"verbose", 0, 0, 'V', "\t\tPrint out logs and events to screen"}, + {"quiet", 0, 0, 'Q', "\t\tSuppress all output to screen"}, + {"tls", 1, 0, 'S', "\t\tSet tls host to contact"}, + {"tls-port", 1, 0, 'p', "\t\tUse custom tls port"}, + {"api-call", 1, 0, 'c', "\tDirectly relates to lrmd api functions"}, + {"-spacer-", 1, 0, '-', "\nParameters for api-call option"}, + {"action", 1, 0, 'a'}, + {"rsc-id", 1, 0, 'r'}, + {"provider", 1, 0, 'P'}, + {"class", 1, 0, 'C'}, + {"type", 1, 0, 'T'}, + {"timeout", 1, 0, 't'}, + {"param-key", 1, 0, 'k'}, + {"param-val", 1, 0, 'v'}, + + {"-spacer-", 1, 0, '-'}, + {0, 0, 0, 0} +}; +/* *INDENT-ON* */ + +static int wait_poke = 0; +static int exec_call_id = 0; +static gboolean client_start(gpointer user_data); +static void try_connect(void); + +static struct { + int verbose; + int quiet; + int print; + int interval; + int timeout; + int port; + const char *api_call; + const char *rsc_id; + const char *provider; + const char *class; + const char *type; + const char *action; + const char *listen; + const char *tls_host; + lrmd_key_value_t *params; +} options; + +GMainLoop *mainloop = NULL; +lrmd_t *lrmd_conn = NULL; + +static void +client_exit(int rc) +{ + lrmd_api_delete(lrmd_conn); + exit(rc); +} + +#define print_result(result) \ + if (!options.quiet) { \ + result; \ + } \ + +static void +client_shutdown(int nsig) +{ + lrmd_api_delete(lrmd_conn); + lrmd_conn = NULL; +} + +static void +read_events(lrmd_event_data_t * event) +{ + if (wait_poke && event->type == lrmd_event_poke) { + client_exit(PCMK_OCF_OK); + } + if ((event->call_id == exec_call_id) && (event->type == lrmd_event_exec_complete)) { + if (event->output) { + printf("%s", event->output); + } + + client_exit(event->rc); + } +} + +static gboolean +timeout_err(gpointer data) +{ + print_result(printf("timed out in remote_client\n")); + client_exit(PCMK_OCF_TIMEOUT); + + return FALSE; +} + +static void +connection_events(lrmd_event_data_t * event) +{ + int rc = event->connection_rc; + + if (event->type != lrmd_event_connect) { + /* ignore */ + return; + } + + if (!rc) { + client_start(NULL); + return; + } else { + sleep(1); + try_connect(); + } +} + +static void +try_connect(void) +{ + int tries = 10; + static int num_tries = 0; + int rc = 0; + + lrmd_conn->cmds->set_callback(lrmd_conn, connection_events); + for (; num_tries < tries; num_tries++) { + rc = lrmd_conn->cmds->connect_async(lrmd_conn, "lrmd", 10000); + + if (!rc) { + num_tries++; + return; /* we'll hear back in async callback */ + } + sleep(1); + } + + print_result(printf("Failed to connect to pacemaker remote.\n")); + client_exit(PCMK_OCF_UNKNOWN_ERROR); +} + +static gboolean +client_start(gpointer user_data) +{ + int rc = 0; + + if (!lrmd_conn->cmds->is_connected(lrmd_conn)) { + try_connect(); + /* async connect, this funciton will get called back into. */ + return 0; + } + + lrmd_conn->cmds->set_callback(lrmd_conn, read_events); + + if (options.timeout) { + g_timeout_add(options.timeout, timeout_err, NULL); + } + + if (safe_str_eq(options.api_call, "metadata")) { + char *output = NULL; + + rc = lrmd_conn->cmds->get_metadata(lrmd_conn, + options.class, + options.provider, options.type, &output, 0); + if (rc == pcmk_ok) { + printf("%s", output); + free(output); + client_exit(PCMK_OCF_OK); + } + client_exit(PCMK_OCF_UNKNOWN_ERROR); + + } else if (safe_str_eq(options.api_call, "poke")) { + rc = lrmd_conn->cmds->poke_connection(lrmd_conn); + if (rc != pcmk_ok) { + client_exit(PCMK_OCF_UNKNOWN_ERROR); + } + wait_poke = 1; + } else { + lrmd_rsc_info_t *rsc_info = NULL; + + rsc_info = lrmd_conn->cmds->get_rsc_info(lrmd_conn, options.rsc_id, 0); + if (rsc_info == NULL) { + rc = lrmd_conn->cmds->register_rsc(lrmd_conn, options.rsc_id, + options.class, options.provider, options.type, 0); + + if (rc != 0){ + print_result(printf("failed to register resource %s with pacemaker_remote. rc: %d\n", options.rsc_id, rc)); + client_exit(1); + } + } + lrmd_free_rsc_info(rsc_info); + + rc = lrmd_conn->cmds->exec(lrmd_conn, + options.rsc_id, + options.action, + NULL, + options.interval, + options.timeout, + 0, 0, options.params); + + if (rc > 0) { + exec_call_id = rc; + } else { + print_result(printf("execution of rsc %s failed. rc = %d\n", options.rsc_id, rc)); + client_exit(PCMK_OCF_UNKNOWN_ERROR); + } + } + + return 0; +} + +int +main(int argc, char **argv) +{ + int option_index = 0; + int argerr = 0; + int flag; + char *key = NULL; + char *val = NULL; + gboolean use_tls = FALSE; + crm_trigger_t *trig; + + crm_set_options(NULL, "mode [options]", long_options, + "Inject commands into the lrmd and watch for events\n"); + + while (1) { + flag = crm_get_option(argc, argv, &option_index); + if (flag == -1) + break; + + switch (flag) { + case '?': + crm_help(flag, EX_OK); + break; + case 'V': + options.verbose = 1; + break; + case 'Q': + options.quiet = 1; + options.verbose = 0; + break; + case 'c': + options.api_call = optarg; + break; + case 'a': + options.action = optarg; + break; + case 'r': + options.rsc_id = optarg; + break; + case 'P': + options.provider = optarg; + break; + case 'C': + options.class = optarg; + break; + case 'T': + options.type = optarg; + break; + case 't': + if(optarg) { + options.timeout = atoi(optarg); + } + break; + case 'k': + key = optarg; + if (key && val) { + options.params = lrmd_key_value_add(options.params, key, val); + key = val = NULL; + } + break; + case 'v': + val = optarg; + if (key && val) { + options.params = lrmd_key_value_add(options.params, key, val); + key = val = NULL; + } + break; + case 'S': + options.tls_host = optarg; + use_tls = TRUE; + break; + case 'p': + if(optarg) { + options.port = atoi(optarg); + } + use_tls = TRUE; + break; + default: + ++argerr; + break; + } + } + + if (argerr) { + crm_help('?', EX_USAGE); + } + if (optind > argc) { + ++argerr; + } + + /* if we can't perform an api_call or listen for events, + * there is nothing to do */ + if (!options.api_call ) { + print_result(printf("Nothing to be done. Please specify 'api-call'\n")); + return PCMK_OCF_UNKNOWN_ERROR; + } + + if (!options.timeout ) { + options.timeout = 20000; + } + if (use_tls) { + lrmd_conn = lrmd_remote_api_new(NULL, options.tls_host ? options.tls_host : "localhost", options.port); + } else { + lrmd_conn = lrmd_api_new(); + } + trig = mainloop_add_trigger(G_PRIORITY_HIGH, client_start, NULL); + mainloop_set_trigger(trig); + mainloop_add_signal(SIGTERM, client_shutdown); + + mainloop = g_main_new(FALSE); + g_main_run(mainloop); + + client_exit(0); + return 0; +} From a5c5caa3ce8ae94a48030e625b1d0f0d7d0d053e Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Wed, 18 Mar 2015 10:06:55 -0400 Subject: [PATCH 05/22] Fix: tools: make crm_mon last updated header consistent across formats This refactors a new function crm_now_string() so that the plain text, XML and HTML output formats use the same logic for displaying the Last Updated header. This fixes potential segmentation faults in the extremely unlikely case of system time functions failing. --- tools/crm_mon.c | 48 ++++++++++++++++++++++++++---------------------- 1 file changed, 26 insertions(+), 22 deletions(-) diff --git a/tools/crm_mon.c b/tools/crm_mon.c index fd112cbeac7..66210b2f9b3 100644 --- a/tools/crm_mon.c +++ b/tools/crm_mon.c @@ -1184,6 +1184,28 @@ crm_mon_get_parameters(resource_t *rsc, pe_working_set_t * data_set) } } +/*! + * \internal + * \brief Return human-friendly string representing current time + * + * \return Current time as string (as by ctime() but without newline) on success + * or "Could not determine current time" on error + * \note The return value points to a statically allocated string which might be + * overwritten by subsequent calls to any of the C library date and time functions. + */ +static const char * +crm_now_string(void) +{ + time_t a_time = time(NULL); + char *since_epoch = ctime(&a_time); + + if ((a_time == (time_t) -1) || (since_epoch == NULL)) { + return "Could not determine current time"; + } + since_epoch[strlen(since_epoch) - 1] = EOS; /* trim newline */ + return (since_epoch); +} + static int print_status(pe_working_set_t * data_set) { @@ -1191,7 +1213,6 @@ print_status(pe_working_set_t * data_set) GListPtr gIter = NULL; node_t *dc = NULL; - char *since_epoch = NULL; char *online_nodes = NULL; char *online_remote_nodes = NULL; char *online_remote_containers = NULL; @@ -1201,7 +1222,6 @@ print_status(pe_working_set_t * data_set) xmlNode *dc_version = NULL; xmlNode *quorum_node = NULL; xmlNode *stack = NULL; - time_t a_time = time(NULL); int print_opts = pe_print_ncurses; const char *quorum_votes = "unknown"; @@ -1223,14 +1243,9 @@ print_status(pe_working_set_t * data_set) updates++; dc = data_set->dc_node; - if (a_time == (time_t) - 1) { - crm_perror(LOG_ERR, "set_node_tstamp(): Invalid time returned"); - return 1; - } - since_epoch = ctime(&a_time); - if (since_epoch != NULL && print_last_updated && !hide_headers) { - print_as("Last updated: %s", since_epoch); + if (print_last_updated && !hide_headers) { + print_as("Last updated: %s\n", crm_now_string()); } if (print_last_change && !hide_headers) { @@ -1575,11 +1590,7 @@ print_xml_status(pe_working_set_t * data_set) fprintf(stream, " \n"); if (print_last_updated) { - time_t now = time(NULL); - char *now_str = ctime(&now); - - now_str[24] = EOS; /* replace the newline */ - fprintf(stream, " \n", now_str); + fprintf(stream, " \n", crm_now_string()); } if (print_last_change) { @@ -1812,14 +1823,7 @@ print_html_status(pe_working_set_t * data_set, const char *filename, gboolean we /*** SUMMARY ***/ fprintf(stream, "

Cluster summary

"); - { - char *now_str = NULL; - time_t now = time(NULL); - - now_str = ctime(&now); - now_str[24] = EOS; /* replace the newline */ - fprintf(stream, "Last updated: %s
\n", now_str); - } + fprintf(stream, "Last updated: %s
\n", crm_now_string()); if (dc == NULL) { fprintf(stream, "Current DC: NONE
"); From d5934b96ea518c888010b2ae7a20f4755c1441cf Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Wed, 18 Mar 2015 12:02:31 -0400 Subject: [PATCH 06/22] Fix: tools: Improve crm_mon output with certain option combinations This refactors a new function get_resource_display_options() to use the same logic to set the pe_print_options bitmask for plain, XML and HTML output formats. The only behavioral changes this makes are: * If --show-detail and --as-html are both specified, clone details are now shown. * If --group-by-node, --inactive and --brief are all specified, an inactive group will now correctly be displayed briefly rather than in long format. --- tools/crm_mon.c | 60 +++++++++++++++++++++++++++++++------------------ 1 file changed, 38 insertions(+), 22 deletions(-) diff --git a/tools/crm_mon.c b/tools/crm_mon.c index 66210b2f9b3..274477aa07c 100644 --- a/tools/crm_mon.c +++ b/tools/crm_mon.c @@ -1184,6 +1184,41 @@ crm_mon_get_parameters(resource_t *rsc, pe_working_set_t * data_set) } } +/*! + * \internal + * \brief Return resource display options corresponding to command-line choices + * + * \return Bitmask of pe_print_options suitable for resource print functions + */ +static int +get_resource_display_options(void) +{ + int print_opts = as_console? pe_print_ncurses : pe_print_printf; + + /* Determine basic output format */ + if (as_xml) { + print_opts = pe_print_xml; + } else if (as_html_file || web_cgi) { + print_opts = pe_print_html; + } else if (as_console) { + print_opts = pe_print_ncurses; + } else { + print_opts = pe_print_printf; + } + + /* Add optional display elements */ + if (print_pending) { + print_opts |= pe_print_pending; + } + if (print_clone_detail) { + print_opts |= pe_print_clone_details; + } + if (print_brief) { + print_opts |= pe_print_brief; + } + return print_opts; +} + /*! * \internal * \brief Return human-friendly string representing current time @@ -1223,22 +1258,13 @@ print_status(pe_working_set_t * data_set) xmlNode *quorum_node = NULL; xmlNode *stack = NULL; - int print_opts = pe_print_ncurses; + int print_opts = get_resource_display_options(); const char *quorum_votes = "unknown"; if (as_console) { blank_screen(); - } else { - print_opts = pe_print_printf; } - if (print_pending) { - print_opts |= pe_print_pending; - } - - if (print_clone_detail) { - print_opts |= pe_print_clone_details; - } updates++; dc = data_set->dc_node; @@ -1437,7 +1463,6 @@ print_status(pe_working_set_t * data_set) print_as("\n"); if (print_brief && group_by_node == FALSE) { - print_opts |= pe_print_brief; print_rscs_brief(data_set->resources, NULL, print_opts, stdout, inactive_resources); } @@ -1575,14 +1600,10 @@ print_xml_status(pe_working_set_t * data_set) xmlNode *stack = NULL; xmlNode *quorum_node = NULL; const char *quorum_votes = "unknown"; - int print_opts = pe_print_xml; + int print_opts = get_resource_display_options(); dc = data_set->dc_node; - if (print_pending) { - print_opts |= pe_print_pending; - } - fprintf(stream, "\n"); fprintf(stream, "\n", VERSION); @@ -1790,11 +1811,7 @@ print_html_status(pe_working_set_t * data_set, const char *filename, gboolean we node_t *dc = NULL; static int updates = 0; char *filename_tmp = NULL; - int print_opts = pe_print_html; - - if (print_pending) { - print_opts |= pe_print_pending; - } + int print_opts = get_resource_display_options(); if (web_cgi) { stream = stdout; @@ -1923,7 +1940,6 @@ print_html_status(pe_working_set_t * data_set, const char *filename, gboolean we if (group_by_node == FALSE || inactive_resources) { if (print_brief && group_by_node == FALSE) { - print_opts |= pe_print_brief; print_rscs_brief(data_set->resources, NULL, print_opts, stream, inactive_resources); } From 1c761c38ca45a9719bb3cac64c66d5630a7f2223 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Thu, 19 Mar 2015 11:14:06 -0400 Subject: [PATCH 07/22] tools: display node names more consistently in crm_mon output This refactors a new function get_node_display_name() to display a node name (with node ID and/or container ID when appropriate) and modifies the plaintext and HTML outputs to use it when displaying node names. Also, the XML output will now add a container_id attribute to node tags when appropriate. --- tools/crm_mon.c | 143 +++++++++++++++++++++++++++++++++--------------- 1 file changed, 100 insertions(+), 43 deletions(-) diff --git a/tools/crm_mon.c b/tools/crm_mon.c index 274477aa07c..3e050c056ca 100644 --- a/tools/crm_mon.c +++ b/tools/crm_mon.c @@ -345,7 +345,7 @@ static struct crm_option long_options[] = { {"neg-locations", 2, 0, 'L', "Display negative location constraints [optionally filtered by id prefix]"}, {"show-node-attributes", 0, 0, 'A', "Display node attributes" }, {"hide-headers", 0, 0, 'D', "\tHide all headers" }, - {"show-detail", 0, 0, 'R', "\tShow more details of cloned resources" }, + {"show-detail", 0, 0, 'R', "\tShow more details (node IDs, individual clone instances)" }, {"brief", 0, 0, 'b', "\t\tBrief output" }, {"pending", 0, 0, 'j', "\t\tDisplay pending state if 'record-pending' is enabled" }, @@ -1149,23 +1149,86 @@ print_cluster_tickets(pe_working_set_t * data_set) return; } +/*! + * \internal + * \brief Return human-friendly string representing node name + * + * The returned string will be in the format + * uname[:containerID] [(nodeID)] + * ":containerID" will be printed if the node is a remote container node. + * "(nodeID)" will be printed if the node ID is different from the node uname, + * and detailed output has been requested. + * + * \param[in] node Node to represent + * \return Newly allocated string with representation of node name + * \note It is the caller's responsibility to free the result with free(). + */ +static char * +get_node_display_name(node_t *node) +{ + char *node_name; + const char *node_container_id = NULL; + const char *node_id = NULL; + int name_len; + + CRM_ASSERT((node != NULL) && (node->details != NULL) && (node->details->uname != NULL)); + + /* Container ID is displayed only if this is a remote container node */ + if (is_container_remote_node(node)) { + node_container_id = node->details->remote_rsc->container->id; + } + + /* Node ID is displayed if different from uname and detail is requested */ + if (print_clone_detail && safe_str_neq(node->details->uname, node->details->id)) { + node_id = node->details->id; + } + + /* Determine name length */ + name_len = strlen(node->details->uname) + 1; + if (node_container_id) { + name_len += strlen(node_container_id) + 1; /* ":node_container_id" */ + } + if (node_id) { + name_len += strlen(node_id) + 3; /* + " (node_id)" */ + } + + /* Allocate and populate display name */ + node_name = malloc(name_len); + CRM_ASSERT(node_name != NULL); + strcpy(node_name, node->details->uname); + if (node_container_id) { + strcat(node_name, ":"); + strcat(node_name, node_container_id); + } + if (node_id) { + strcat(node_name, " ("); + strcat(node_name, node_id); + strcat(node_name, ")"); + } + return node_name; +} + static void print_neg_locations(pe_working_set_t *data_set) { GListPtr gIter, gIter2; - print_as("\nFencing constraints:\n"); + print_as("\nNegative location constraints:\n"); for (gIter = data_set->placement_constraints; gIter != NULL; gIter = gIter->next) { rsc_to_node_t *location = (rsc_to_node_t *) gIter->data; if (!g_str_has_prefix(location->id, print_neg_location_prefix)) continue; for (gIter2 = location->node_list_rh; gIter2 != NULL; gIter2 = gIter2->next) { node_t *node = (node_t *) gIter2->data; - if (node->weight >= 0) /* != -INFINITY ??? */ - continue; - print_as(" %s\tprevents %s from running %son %s\n", - location->id, location->rsc_lh->id, - location->role_filter == RSC_ROLE_MASTER ? "as Master " : "", - node->details->uname); + + if (node->weight < 0) { + char *node_name = get_node_display_name(node); + + print_as(" %s\tprevents %s from running %son %s\n", + location->id, location->rsc_lh->id, + location->role_filter == RSC_ROLE_MASTER ? "as Master " : "", + node_name); + free(node_name); + } } } } @@ -1307,16 +1370,14 @@ print_status(pe_working_set_t * data_set) print_as("Current DC: NONE\n"); } else if (!hide_headers) { const char *quorum = crm_element_value(data_set->input, XML_ATTR_HAVE_QUORUM); + char *dc_name = get_node_display_name(dc); - if (safe_str_neq(dc->details->uname, dc->details->id)) { - print_as("Current DC: %s (%s)", dc->details->uname, dc->details->id); - } else { - print_as("Current DC: %s", dc->details->uname); - } + print_as("Current DC: %s", dc_name); print_as(" - partition %s quorum\n", crm_is_true(quorum) ? "with" : "WITHOUT"); if (dc_version) { print_as("Version: %s\n", crm_element_value(dc_version, XML_NVPAIR_ATTR_VALUE)); } + free(dc_name); } quorum_node = @@ -1340,13 +1401,7 @@ print_status(pe_working_set_t * data_set) for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) { node_t *node = (node_t *) gIter->data; const char *node_mode = NULL; - char *node_name = NULL; - - if (is_container_remote_node(node)) { - node_name = crm_strdup_printf("%s:%s", node->details->uname, node->details->remote_rsc->container->id); - } else { - node_name = crm_strdup_printf("%s", node->details->uname); - } + char *node_name = get_node_display_name(node); if (node->details->unclean) { if (node->details->online && node->details->unclean) { @@ -1406,14 +1461,11 @@ print_status(pe_working_set_t * data_set) } if (is_container_remote_node(node)) { - print_as("ContainerNode %s: %s\n", node_name, node_mode); + print_as("Container"); } else if (is_baremetal_remote_node(node)) { - print_as("RemoteNode %s: %s\n", node_name, node_mode); - } else if (safe_str_eq(node->details->uname, node->details->id)) { - print_as("Node %s: %s\n", node_name, node_mode); - } else { - print_as("Node %s (%s): %s\n", node_name, node->details->id, node_mode); + print_as("Remote"); } + print_as("Node %s: %s\n", node_name, node_mode); if (print_brief && group_by_node) { print_rscs_brief(node->details->running_rsc, "\t", print_opts | pe_print_rsconly, @@ -1501,11 +1553,16 @@ print_status(pe_working_set_t * data_set) for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) { node_t *node = (node_t *) gIter->data; + char *node_name; if (node == NULL || node->details->online == FALSE) { continue; } - print_as("* Node %s:\n", node->details->uname); + + node_name = get_node_display_name(node); + print_as("* Node %s:\n", node_name); + free(node_name); + g_hash_table_foreach(node->details->attrs, create_attr_list, NULL); g_list_foreach(attr_list, print_node_attribute, node); g_list_free(attr_list); @@ -1693,6 +1750,9 @@ print_xml_status(pe_working_set_t * data_set) fprintf(stream, "is_dc=\"%s\" ", node->details->is_dc ? "true" : "false"); fprintf(stream, "resources_running=\"%d\" ", g_list_length(node->details->running_rsc)); fprintf(stream, "type=\"%s\" ", node_type); + if (is_container_remote_node(node)) { + fprintf(stream, "container_id=\"%s\" ", node->details->remote_rsc->container->id); + } if (group_by_node) { GListPtr lpc2 = NULL; @@ -1845,7 +1905,10 @@ print_html_status(pe_working_set_t * data_set, const char *filename, gboolean we if (dc == NULL) { fprintf(stream, "Current DC: NONE
"); } else { - fprintf(stream, "Current DC: %s (%s)
", dc->details->uname, dc->details->id); + char *dc_name = get_node_display_name(dc); + + fprintf(stream, "Current DC: %s
\n", dc_name); + free(dc_name); } fprintf(stream, "%d Nodes configured.
", g_list_length(data_set->nodes)); fprintf(stream, "%d Resources configured.
", count_resources(data_set, NULL)); @@ -1884,29 +1947,23 @@ print_html_status(pe_working_set_t * data_set, const char *filename, gboolean we fprintf(stream, "
    \n"); for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) { node_t *node = (node_t *) gIter->data; + char *node_name = get_node_display_name(node); - fprintf(stream, "
  • "); + fprintf(stream, "
  • Node: %s: ", node_name); if (node->details->standby_onfail && node->details->online) { - fprintf(stream, "Node: %s (%s): %s", node->details->uname, node->details->id, - "standby (on-fail)\n"); + fprintf(stream, "standby (on-fail)\n"); } else if (node->details->standby && node->details->online) { - fprintf(stream, "Node: %s (%s): %s", node->details->uname, node->details->id, - "standby\n"); + fprintf(stream, "standby\n"); } else if (node->details->standby) { - fprintf(stream, "Node: %s (%s): %s", node->details->uname, node->details->id, - "OFFLINE (standby)\n"); + fprintf(stream, "OFFLINE (standby)\n"); } else if (node->details->maintenance && node->details->online) { - fprintf(stream, "Node: %s (%s): %s", node->details->uname, node->details->id, - "maintenance\n"); + fprintf(stream, "maintenance\n"); } else if (node->details->maintenance) { - fprintf(stream, "Node: %s (%s): %s", node->details->uname, node->details->id, - "OFFLINE (maintenance)\n"); + fprintf(stream, "OFFLINE (maintenance)\n"); } else if (node->details->online) { - fprintf(stream, "Node: %s (%s): %s", node->details->uname, node->details->id, - "online\n"); + fprintf(stream, "online\n"); } else { - fprintf(stream, "Node: %s (%s): %s", node->details->uname, node->details->id, - "OFFLINE\n"); + fprintf(stream, "OFFLINE\n"); } if (print_brief && group_by_node) { fprintf(stream, "
      \n"); From b2ae244cafeef69e2e4e1675cfe1f3fa45852867 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Thu, 19 Mar 2015 16:04:20 -0400 Subject: [PATCH 08/22] tools: crm_mon prints Stopped clones only if --inactive was specified This introduces a new pe_print_options flag pe_print_clone_active; if set, the print method for clone resources will print only instances that are active. crm_mon sets this new flag unless the --inactive command-line option was used. --- include/crm/pengine/common.h | 1 + lib/pengine/clone.c | 48 +++++++++++++++++++----------------- tools/crm_mon.c | 3 +++ 3 files changed, 29 insertions(+), 23 deletions(-) diff --git a/include/crm/pengine/common.h b/include/crm/pengine/common.h index 0d853c0b97f..cc9347a7aa1 100644 --- a/include/crm/pengine/common.h +++ b/include/crm/pengine/common.h @@ -111,6 +111,7 @@ enum pe_print_options { pe_print_brief = 0x0800, pe_print_pending = 0x1000, pe_print_clone_details = 0x2000, + pe_print_clone_active = 0x4000, /* print clone instances only if active */ }; /* *INDENT-ON* */ diff --git a/lib/pengine/clone.c b/lib/pengine/clone.c index ea1a9b23ea2..6213694a7bf 100644 --- a/lib/pengine/clone.c +++ b/lib/pengine/clone.c @@ -426,7 +426,8 @@ clone_print(resource_t * rsc, const char *pre_text, long options, void *print_da } else if (is_set(rsc->flags, pe_rsc_unique)) { print_full = TRUE; - } else { + + } else if (is_not_set(options, pe_print_clone_active)) { stopped_list = add_list_element(stopped_list, child_rsc->id); } @@ -508,37 +509,38 @@ clone_print(resource_t * rsc, const char *pre_text, long options, void *print_da free(list_text); list_text = NULL; - /* Stopped */ - if(is_not_set(rsc->flags, pe_rsc_unique) - && (clone_data->clone_max > active_instances)) { + if (is_not_set(options, pe_print_clone_active)) { + /* Stopped */ + if (is_not_set(rsc->flags, pe_rsc_unique) + && (clone_data->clone_max > active_instances)) { - GListPtr nIter; - GListPtr list = g_hash_table_get_values(rsc->allowed_nodes); + GListPtr nIter; + GListPtr list = g_hash_table_get_values(rsc->allowed_nodes); - /* Custom stopped list for non-unique clones */ - free(stopped_list); stopped_list = NULL; + /* Custom stopped list for non-unique clones */ + free(stopped_list); stopped_list = NULL; - if(g_list_length(list) == 0) { - /* Clusters with symmetrical=false haven't calculated allowed_nodes yet - * If we've not probed for them yet, the Stopped list will be empty - */ - list = g_hash_table_get_values(rsc->known_on); - } + if (g_list_length(list) == 0) { + /* Clusters with symmetrical=false haven't calculated allowed_nodes yet + * If we've not probed for them yet, the Stopped list will be empty + */ + list = g_hash_table_get_values(rsc->known_on); + } - list = g_list_sort(list, sort_node_uname); - for (nIter = list; nIter != NULL; nIter = nIter->next) { - node_t *node = (node_t *)nIter->data; + list = g_list_sort(list, sort_node_uname); + for (nIter = list; nIter != NULL; nIter = nIter->next) { + node_t *node = (node_t *)nIter->data; - if(pe_find_node(rsc->running_on, node->details->uname) == NULL) { - stopped_list = add_list_element(stopped_list, node->details->uname); + if (pe_find_node(rsc->running_on, node->details->uname) == NULL) { + stopped_list = add_list_element(stopped_list, node->details->uname); + } } + g_list_free(list); } - g_list_free(list); + short_print(stopped_list, child_text, "Stopped", options, print_data); + free(stopped_list); } - short_print(stopped_list, child_text, "Stopped", options, print_data); - free(stopped_list); - if (options & pe_print_html) { status_print("
    \n"); } diff --git a/tools/crm_mon.c b/tools/crm_mon.c index 3e050c056ca..52343a28015 100644 --- a/tools/crm_mon.c +++ b/tools/crm_mon.c @@ -1276,6 +1276,9 @@ get_resource_display_options(void) if (print_clone_detail) { print_opts |= pe_print_clone_details; } + if (!inactive_resources) { + print_opts |= pe_print_clone_active; + } if (print_brief) { print_opts |= pe_print_brief; } From c7d3f0c8c5b2cdc385d480ba050dc276dee9aa4a Mon Sep 17 00:00:00 2001 From: David Vossel Date: Fri, 13 Mar 2015 19:06:07 -0400 Subject: [PATCH 09/22] High: extra: docker container tech wrapper script for pcmk remote --- extra/resources/Makefile.am | 5 + extra/resources/docker-wrapper | 492 +++++++++++++++++++++++++++++++++ 2 files changed, 497 insertions(+) create mode 100755 extra/resources/docker-wrapper diff --git a/extra/resources/Makefile.am b/extra/resources/Makefile.am index cc162e5e6ec..955e233a1b8 100644 --- a/extra/resources/Makefile.am +++ b/extra/resources/Makefile.am @@ -21,6 +21,9 @@ include $(top_srcdir)/Makefile.common EXTRA_DIST = $(ocf_SCRIPTS) + +containertechdir = @OCF_RA_DIR@/containers + ocfdir = @OCF_RA_DIR@/pacemaker ocf_SCRIPTS = ClusterMon \ @@ -36,6 +39,8 @@ ocf_SCRIPTS = ClusterMon \ SystemHealth \ remote +containertech_SCRIPTS = docker-wrapper + if BUILD_XML_HELP man7_MANS = $(ocf_SCRIPTS:%=ocf_pacemaker_%.7) diff --git a/extra/resources/docker-wrapper b/extra/resources/docker-wrapper new file mode 100755 index 00000000000..c67022ff2be --- /dev/null +++ b/extra/resources/docker-wrapper @@ -0,0 +1,492 @@ +#!/bin/sh +# +# Copyright (c) 2015 David Vossel +# All Rights Reserved. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of version 2 of the GNU General Public License as +# published by the Free Software Foundation. +# +# This program is distributed in the hope that it would be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# Further, this software is distributed without any warranty that it is +# free of the rightful claim of any third person regarding infringement +# or the like. Any license provided herein, whether implied or +# otherwise, applies only to this software file. Patent licenses, if +# any, provided herein do not apply to combinations of this program with +# other software, or any other product whatsoever. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write the Free Software Foundation, +# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. +# + +####################################################################### +# Initialization: + +: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} +. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs + +####################################################################### + +meta_data() { + cat < + + +1.0 + + +Docker technology wrapper for pacemaker remote. + +docker wrapper + + + + +Docker image to run resources within + +docker image + + + + + +Give resources within container access to cluster resources +such as the CIB and the ability to manage cluster attributes. + +is privileged + + + + + +Add options to be appended to the 'docker run' command which is used +when creating the container during the start action. This option allows +users to do things such as setting a custom entry point and injecting +environment variables into the newly created container. Note the '-d' +option is supplied regardless of this value to force containers to run +in the background. + +NOTE: Do not explicitly specify the --name argument in the run_opts. This +agent will set --name using the resource's instance name + + +run options + + + + + + + + + + + + + + +END +} + +####################################################################### + + +CLIENT="/usr/libexec/pacemaker/pacemaker_remote_ctl" +DOCKER_AGENT="/usr/lib/ocf/resource.d/heartbeat/docker" +KEY_VAL_STR="" +PROVIDER=$OCF_RESKEY_CRM_meta_provider +CLASS=$OCF_RESKEY_CRM_meta_class +TYPE=$OCF_RESKEY_CRM_meta_type + +CONTAINER=$OCF_RESKEY_CRM_meta_isolation_instance +if [ -z "$CONTAINER" ]; then + CONTAINER=$OCF_RESOURCE_INSTANCE +fi + +RSC_STATE_DIR="${HA_RSCTMP}/docker-wrapper/${CONTAINER}-data/" +RSC_STATE_FILE="$RSC_STATE_DIR/$OCF_RESOURCE_INSTANCE.state" +CONNECTION_FAILURE=0 + +pcmk_docker_wrapper_usage() { + cat < $RSC_STATE_FILE + fi +} + +clear_state_file() +{ + if [ -f "$RSC_STATE_FILE" ]; then + rm -f $RSC_STATE_FILE + fi +} + +clear_state_dir() +{ + [ -d "$RSC_STATE_DIR" ] || return 0 + + rm -rf $RSC_STATE_DIR +} + +num_active_resources() +{ + local count + + [ -d "$RSC_STATE_DIR" ] || return 0 + + count="$(ls $RSC_STATE_DIR | wc -w)" + if [ $? -ne 0 ] || [ -z "$count" ]; then + return 0 + fi + return $count +} + +random_port() +{ + local port=$(python -c 'import socket; s=socket.socket(); s.bind(("localhost", 0)); print(s.getsockname()[1]); s.close()') + if [ $? -eq 0 ] && [ -n "$port" ]; then + echo "$port" + fi +} + +get_active_port() +{ + PORT="$(docker port $CONTAINER 3121 | awk -F: '{ print $2 }')" +} + +# separate docker args from ocf resource args. +separate_args() +{ + local env key value + + # write out arguments to key value string for ocf agent + for item in $(printenv | grep "^OCF.*" | grep -v "^OCF_RESKEY_pcmk_docker_.*"); + do + key="$(echo $item | awk -F= '{print $1}')" + val="$(echo $item | awk -F= '{print $2}')" + KEY_VAL_STR="$KEY_VAL_STR -k \"$key\" -v \"$val\"" + done + + # sanitize args for DOCKER agent's consumption + for item in $(printenv | grep "^OCF_RESKEY_pcmk_docker_.*"); + do + env="$(echo $item | awk -F= '{print $1}')" + val="$(echo $item | awk -F= '{print $2}')" + key="$(echo "$env" | sed 's/^OCF_RESKEY_pcmk_docker/OCF_RESKEY/g')" + export ${key}=$(echo $val) + done + + if ocf_is_true $OCF_RESKEY_pcmk_docker_privileged ; then + export OCF_RESKEY_run_cmd="/usr/sbin/pacemaker_remoted" + # on start set random port to run_opts + # write port to state file... or potentially get from ps? maybe docker info or inspect as well? + + else + export OCF_RESKEY_run_cmd="/usr/libexec/pacemaker/lrmd" + fi + export OCF_RESKEY_name="$CONTAINER" +} + +monitor_container() +{ + local rc + + $DOCKER_AGENT monitor + rc=$? + if [ $rc -ne $OCF_SUCCESS ]; then + clear_state_dir + return $rc + fi + + poke_remote + rc=$? + if [ $rc -ne $OCF_SUCCESS ]; then + # container is up without an active daemon. this is bad + ocf_log err "Container, $CONTAINER, is active without a responsive pacemaker_remote instance" + CONNECTION_FAILURE=1 + return $OCF_ERR_GENERIC + fi + CONNECTION_FAILURE=0 + + return $rc +} + +pcmk_docker_wrapper_monitor() { + local rc + + monitor_container + rc=$? + if [ $rc -ne $OCF_SUCCESS ]; then + return $rc + fi + + client_action "monitor" + rc=$? + if [ $rc -eq $OCF_SUCCESS ]; then + write_state_file + elif [ $rc -eq $OCF_NOT_RUNNING ]; then + clear_state_file + fi + + return $rc +} + +client_action() +{ + local action=$1 + local agent_type="-T $TYPE -C $CLASS" + local rc=0 + + if [ -n "$PROVIDER" ]; then + agent_type="$agent_type -P $PROVIDER" + fi + + if ocf_is_true $OCF_RESKEY_pcmk_docker_privileged ; then + if [ -z "$PORT" ]; then + get_active_port + fi + ocf_log debug "$CLIENT -c \"exec\" -S \"127.0.0.1\" -p $PORT -a $action -r \"$OCF_RESOURCE_INSTANCE\" $agent_type $KEY_VAL_STR >/dev/null 2>&1" + $CLIENT -c "exec" -S "127.0.0.1" -p $PORT -a $action -r "$OCF_RESOURCE_INSTANCE" $agent_type $KEY_VAL_STR >/dev/null 2>&1 + else + echo "$CLIENT -c \"exec\" -a $action -r \"$OCF_RESOURCE_INSTANCE\" $agent_type $KEY_VAL_STR >/dev/null 2>&1" | nsenter --target $(docker inspect --format {{.State.Pid}} ${CONTAINER}) --mount --uts --ipc --net --pid 2>&1 + + + fi + rc=$? + + ocf_log debug "Client action $action with result $rc" + return $rc +} + +poke_remote() +{ + # verifies daemon in container is active + if ocf_is_true $OCF_RESKEY_pcmk_docker_privileged ; then + get_active_port + ocf_log debug "Attempting to contect $CONTAINER on port $PORT" + $CLIENT -c "poke" -S "127.0.0.1" -p $PORT >/dev/null 2>&1 + fi + # no op for non privileged containers since we handed the + # client monitor action as the monitor_cmd for the docker agent +} + +pcmk_docker_wrapper_reload() +{ + local rc + + monitor_container + rc=$? + if [ $? -ne $OCF_SUCCESS ]; then + return $rc + fi + + client_action "reload" +} + +start_container() +{ + local rc + + monitor_container + rc=$? + if [ $rc -eq $OCF_SUCCESS ]; then + return $rc + fi + + if ocf_is_true $OCF_RESKEY_pcmk_docker_privileged ; then + if ! [ -f "/etc/pacemaker/authkey" ]; then + # generate an authkey if it doesn't exist. + mkdir -p /etc/pacemaker/ + dd if=/dev/urandom of=/etc/pacemaker/authkey bs=4096 count=1 > /dev/null 2>&1 + fi + + PORT=$(random_port) + if [ -z "$PORT" ]; then + ocf_exit_reason "Unable to assign random port for pacemaker remote" + return $OCF_ERR_GENERIC + fi + export OCF_RESKEY_run_opts="-p 127.0.0.1:${PORT}:3121 $OCF_RESKEY_run_opts" + export OCF_RESKEY_run_opts="-v /etc/pacemaker/authkey:/etc/pacemaker/authkey $OCF_RESKEY_run_opts" + ocf_log debug "using privileged mode: run_opts=$OCF_RESKEY_run_opts" + else + export OCF_RESKEY_monitor_cmd="$CLIENT -c poke" + fi + + $DOCKER_AGENT start + rc=$? + if [ $rc -ne $OCF_SUCCESS ]; then + ocf_exit_reason "Docker container failed to start" + return $rc + fi + + monitor_container +} + +pcmk_docker_wrapper_start() { + local rc + + start_container + rc=$? + if [ $rc -ne $OCF_SUCCESS ]; then + return $rc + fi + + client_action "start" + rc=$? + if [ $? -ne "$OCF_SUCCESS" ]; then + ocf_exit_reason "Failed to start agent within container" + return $rc + fi + + pcmk_docker_wrapper_monitor + return $? +} + +stop_container() +{ + local rc + local count + + num_active_resources + count=$? + if [ $count -ne 0 ]; then + ocf_log err "Failed to stop agent within container. Killing container $CONTAINER with $count active resources" + fi + + $DOCKER_AGENT "stop" + rc=$? + if [ $rc -ne $OCF_SUCCESS ]; then + ocf_exit_reason "Docker container failed to stop" + return $rc + fi + clear_state_dir + return $rc +} + +stop_resource() +{ + local rc + + client_action "stop" + rc=$? + if [ $? -ne "$OCF_SUCCESS" ]; then + export OCF_RESKEY_force_stop="true" + kill_now=1 + else + clear_state_file + fi +} + +pcmk_docker_wrapper_stop() { + local rc + local kill_now=0 + local all_stopped=0 + + pcmk_docker_wrapper_monitor + rc=$? + if [ $rc -eq $OCF_NOT_RUNNING ]; then + rc=$OCF_SUCCESS + num_active_resources + if [ $? -eq 0 ]; then + # stop container if no more resources are running + ocf_log info "Gracefully stopping container $CONTAINER because no resources are left running." + stop_container + rc=$? + fi + return $rc + fi + + # if we can't talk to the remote daemon but the container is + # active, we have to force kill the container. + if [ $CONNECTION_FAILURE -eq 1 ]; then + export OCF_RESKEY_force_kill="true" + stop_container + return $? + fi + + + # If we've gotten this far, the container is up, and we + # need to gracefully stop a resource within the container. + client_action "stop" + rc=$? + if [ $? -ne "$OCF_SUCCESS" ]; then + export OCF_RESKEY_force_stop="true" + # force kill the container if we fail to stop a resource. + stop_container + rc=$? + else + clear_state_file + num_active_resources + if [ $? -eq 0 ]; then + # stop container if no more resources are running + ocf_log info "Gracefully stopping container $CONTAINER because last resource has stopped" + stop_container + rc=$? + fi + fi + + return $rc +} + +pcmk_docker_wrapper_validate() { + + if [ -z "$CLASS" ] || [ -z "$TYPE" ]; then + ocf_exit_reason "Update pacemaker to a version that supports container wrappers." + return $OCF_ERR_CONFIGURED + fi + + if ! [ -f "$DOCKER_AGENT" ]; then + ocf_exit_reason "Requires $DOCKER_AGENT to be installed. update the resource-agents package" + return $OCF_ERR_INSTALLED + fi + $DOCKER_AGENT validate-all + return $? +} + +case $__OCF_ACTION in +meta-data) meta_data + exit $OCF_SUCCESS + ;; +usage|help) pcmk_docker_wrapper_usage + exit $OCF_SUCCESS + ;; +esac + +separate_args +pcmk_docker_wrapper_validate +rc=$? +if [ $rc -ne 0 ]; then + case $__OCF_ACTION in + stop) exit $OCF_SUCCESS;; + monitor) exit $OCF_NOT_RUNNING;; + *) exit $rc;; + esac +fi + +case $__OCF_ACTION in + start) pcmk_docker_wrapper_start;; + stop) pcmk_docker_wrapper_stop;; + monitor) pcmk_docker_wrapper_monitor;; + reload) pcmk_docker_wrapper_reload;; + validate-all) pcmk_docker_wrapper_validate;; + *) pcmk_docker_wrapper_usage + exit $OCF_ERR_UNIMPLEMENTED + ;; +esac +rc=$? +ocf_log debug "Docker-wrapper ${OCF_RESOURCE_INSTANCE} $__OCF_ACTION : $rc" +exit $rc + From eb74798c41f7b97b061d8990a93f58b9043c543e Mon Sep 17 00:00:00 2001 From: David Vossel Date: Fri, 20 Mar 2015 17:06:28 -0400 Subject: [PATCH 10/22] High: pengine: ability to launch resources in isolated containers --- include/crm/msg_xml.h | 3 +++ include/crm/pengine/status.h | 2 ++ lib/pengine/complex.c | 16 ++++++++++++-- lrmd/lrmd.c | 41 +++++++++++++++++++++++++++++------- lrmd/lrmd_private.h | 3 +++ pengine/native.c | 37 ++++++++++++++++++++++++++++++++ 6 files changed, 92 insertions(+), 10 deletions(-) diff --git a/include/crm/msg_xml.h b/include/crm/msg_xml.h index d3c2643fc5d..ed680e9430f 100644 --- a/include/crm/msg_xml.h +++ b/include/crm/msg_xml.h @@ -185,6 +185,9 @@ # define XML_CIB_TAG_RSC_TEMPLATE "template" +# define XML_RSC_ATTR_ISOLATION_INSTANCE "isolation-instance" +# define XML_RSC_ATTR_ISOLATION_WRAPPER "isolation-wrapper" +# define XML_RSC_ATTR_ISOLATION "isolation" # define XML_RSC_ATTR_RESTART "restart-type" # define XML_RSC_ATTR_ORDERED "ordered" # define XML_RSC_ATTR_INTERLEAVE "interleave" diff --git a/include/crm/pengine/status.h b/include/crm/pengine/status.h index 0460767c879..f7c8fac239d 100644 --- a/include/crm/pengine/status.h +++ b/include/crm/pengine/status.h @@ -290,6 +290,8 @@ struct resource_s { GListPtr fillers; char *pending_task; + + const char *isolation_wrapper; }; struct pe_action_s { diff --git a/lib/pengine/complex.c b/lib/pengine/complex.c index 99571b8b1e0..9c876959d75 100644 --- a/lib/pengine/complex.c +++ b/lib/pengine/complex.c @@ -492,10 +492,22 @@ common_unpack(xmlNode * xml_obj, resource_t ** rsc, } pe_rsc_trace((*rsc), "Options for %s", (*rsc)->id); - value = g_hash_table_lookup((*rsc)->meta, XML_RSC_ATTR_UNIQUE); top = uber_parent(*rsc); - if (crm_is_true(value) || top->variant < pe_clone) { + + /* check for isolation wrapper mapping if the parent doesn't have one set + * isolation mapping is enabled by default. For safety, we are allowing isolation + * to be disabled by setting the meta attr, isolation=false. */ + value = g_hash_table_lookup((*rsc)->meta, XML_RSC_ATTR_ISOLATION); + if (top->isolation_wrapper == NULL && (value == NULL || crm_is_true(value))) { + if (g_hash_table_lookup((*rsc)->meta, "pcmk_docker_image")) { + (*rsc)->isolation_wrapper = "docker-wrapper"; + } + /* add more isolation technologies here as we expand */ + } + + value = g_hash_table_lookup((*rsc)->meta, XML_RSC_ATTR_UNIQUE); + if (crm_is_true(value) || top->variant < pe_clone || (*rsc)->isolation_wrapper) { set_bit((*rsc)->flags, pe_rsc_unique); } diff --git a/lrmd/lrmd.c b/lrmd/lrmd.c index 3fe56ab61b6..30b7926a33e 100644 --- a/lrmd/lrmd.c +++ b/lrmd/lrmd.c @@ -65,6 +65,9 @@ typedef struct lrmd_cmd_s { char *output; char *userdata_str; + /* when set, this cmd should go through a container wrapper */ + const char *isolation_wrapper; + #ifdef HAVE_SYS_TIMEB_H /* Timestamp of when op first ran */ struct timeb t_first_run; @@ -156,7 +159,7 @@ build_rsc_from_xml(xmlNode * msg) } static lrmd_cmd_t * -create_lrmd_cmd(xmlNode * msg, crm_client_t * client) +create_lrmd_cmd(xmlNode * msg, crm_client_t * client, lrmd_rsc_t *rsc) { int call_options = 0; xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, msg, LOG_ERR); @@ -180,6 +183,18 @@ create_lrmd_cmd(xmlNode * msg, crm_client_t * client) cmd->rsc_id = crm_element_value_copy(rsc_xml, F_LRMD_RSC_ID); cmd->params = xml2list(rsc_xml); + cmd->isolation_wrapper = g_hash_table_lookup(cmd->params, "CRM_meta_isolation_wrapper"); + + if (cmd->isolation_wrapper) { + if (g_hash_table_lookup(cmd->params, "CRM_meta_isolation_instance") == NULL) { + g_hash_table_insert(cmd->params, strdup("CRM_meta_isolation_instance"), strdup(rsc->rsc_id)); + } + if (rsc->provider) { + g_hash_table_insert(cmd->params, strdup("CRM_meta_provider"), strdup(rsc->provider)); + } + g_hash_table_insert(cmd->params, strdup("CRM_meta_class"), strdup(rsc->class)); + g_hash_table_insert(cmd->params, strdup("CRM_meta_type"), strdup(rsc->type)); + } return cmd; } @@ -1161,12 +1176,22 @@ lrmd_rsc_execute_service_lib(lrmd_rsc_t * rsc, lrmd_cmd_t * cmd) } } - action = resources_action_create(rsc->rsc_id, - rsc->class, - rsc->provider, - rsc->type, - normalize_action_name(rsc, cmd->action), - cmd->interval, cmd->timeout, params_copy); + if (cmd->isolation_wrapper) { + action = resources_action_create(rsc->rsc_id, + "ocf", + CONTAINER_PROVIDER, + cmd->isolation_wrapper, + cmd->action, /*action will be normalized in wrapper*/ + cmd->interval, cmd->timeout, params_copy); + } else { + action = resources_action_create(rsc->rsc_id, + rsc->class, + rsc->provider, + rsc->type, + normalize_action_name(rsc, cmd->action), + cmd->interval, cmd->timeout, params_copy); + + } if (!action) { crm_err("Failed to create action, action:%s on resource %s", cmd->action, rsc->rsc_id); @@ -1450,7 +1475,7 @@ process_lrmd_rsc_exec(crm_client_t * client, uint32_t id, xmlNode * request) return -ENODEV; } - cmd = create_lrmd_cmd(request, client); + cmd = create_lrmd_cmd(request, client, rsc); call_id = cmd->call_id; /* Don't reference cmd after handing it off to be scheduled. diff --git a/lrmd/lrmd_private.h b/lrmd/lrmd_private.h index 02593d39d3f..340f150a381 100644 --- a/lrmd/lrmd_private.h +++ b/lrmd/lrmd_private.h @@ -29,6 +29,9 @@ # undef KEYFILE # include # endif + +#define CONTAINER_PROVIDER "containers" + GHashTable *rsc_list; typedef struct lrmd_rsc_s { diff --git a/pengine/native.c b/pengine/native.c index 8e30b9423b1..7faf692798b 100644 --- a/pengine/native.c +++ b/pengine/native.c @@ -3267,6 +3267,7 @@ void native_append_meta(resource_t * rsc, xmlNode * xml) { char *value = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_INCARNATION); + resource_t *parent, *last_parent; if (value) { char *name = NULL; @@ -3284,4 +3285,40 @@ native_append_meta(resource_t * rsc, xmlNode * xml) crm_xml_add(xml, name, value); free(name); } + + last_parent = parent = rsc; + while (parent != NULL) { + char *name = NULL; + + if (parent->isolation_wrapper == NULL) { + last_parent = parent; + parent = parent->parent; + continue; + } + + /* name of wrapper script this resource is routed through. */ + name = crm_meta_name(XML_RSC_ATTR_ISOLATION_WRAPPER); + crm_xml_add(xml, name, parent->isolation_wrapper); + free(name); + + /* instance name for isolated environment */ + name = crm_meta_name(XML_RSC_ATTR_ISOLATION_INSTANCE); + if (parent->variant < pe_clone) { + crm_xml_add(xml, name, parent->id); + } else { + char *iso = NULL; + /* if isolation is set at the clone/master level, we have to + * give this resource the unique isolation instance associated + * with the matching clone child */ + value = g_hash_table_lookup(last_parent->meta, XML_RSC_ATTR_INCARNATION); + CRM_ASSERT(value != NULL); + + iso = crm_concat(parent->id, value, '_'); + crm_xml_add(xml, name, iso); + free(iso); + } + free(name); + + break; + } } From 5cdb4bb9ed361b2399632d111cba821ba00a6b71 Mon Sep 17 00:00:00 2001 From: David Vossel Date: Fri, 20 Mar 2015 17:11:40 -0400 Subject: [PATCH 11/22] Low: pengine: isolation regression tests --- pengine/regression.sh | 3 + pengine/test10/isolation-start-all.dot | 81 ++++ pengine/test10/isolation-start-all.exp | 460 +++++++++++++++++++++ pengine/test10/isolation-start-all.scores | 67 +++ pengine/test10/isolation-start-all.summary | 94 +++++ pengine/test10/isolation-start-all.xml | 189 +++++++++ 6 files changed, 894 insertions(+) create mode 100644 pengine/test10/isolation-start-all.dot create mode 100644 pengine/test10/isolation-start-all.exp create mode 100644 pengine/test10/isolation-start-all.scores create mode 100644 pengine/test10/isolation-start-all.summary create mode 100644 pengine/test10/isolation-start-all.xml diff --git a/pengine/regression.sh b/pengine/regression.sh index f2dbef13348..58ff4d476e5 100755 --- a/pengine/regression.sh +++ b/pengine/regression.sh @@ -798,5 +798,8 @@ echo "" do_test resource-discovery "Exercises resource-discovery location constraint option." do_test rsc-discovery-per-node "Disable resource discovery per node" +echo "" +do_test isolation-start-all "Start docker isolated resources." + echo "" test_results diff --git a/pengine/test10/isolation-start-all.dot b/pengine/test10/isolation-start-all.dot new file mode 100644 index 00000000000..b6fbe164ddf --- /dev/null +++ b/pengine/test10/isolation-start-all.dot @@ -0,0 +1,81 @@ + digraph "g" { +"fake_monitor_60000 rhel7-auto5" [ style=bold color="green" fontcolor="black"] +"fake_start_0 rhel7-auto5" -> "fake_monitor_60000 rhel7-auto5" [ style = bold] +"fake_start_0 rhel7-auto5" [ style=bold color="green" fontcolor="black"] +"g1:0_monitor_10000 rhel7-auto5" [ style=bold color="green" fontcolor="black"] +"g1:0_start_0 rhel7-auto5" -> "g1:0_monitor_10000 rhel7-auto5" [ style = bold] +"g1:0_start_0 rhel7-auto5" -> "g2:0_start_0 rhel7-auto5" [ style = bold] +"g1:0_start_0 rhel7-auto5" -> "mygroup:0_running_0" [ style = bold] +"g1:0_start_0 rhel7-auto5" [ style=bold color="green" fontcolor="black"] +"g1:1_monitor_10000 rhel7-auto5" [ style=bold color="green" fontcolor="black"] +"g1:1_start_0 rhel7-auto5" -> "g1:1_monitor_10000 rhel7-auto5" [ style = bold] +"g1:1_start_0 rhel7-auto5" -> "g2:1_start_0 rhel7-auto5" [ style = bold] +"g1:1_start_0 rhel7-auto5" -> "mygroup:1_running_0" [ style = bold] +"g1:1_start_0 rhel7-auto5" [ style=bold color="green" fontcolor="black"] +"g2:0_monitor_10000 rhel7-auto5" [ style=bold color="green" fontcolor="black"] +"g2:0_start_0 rhel7-auto5" -> "g2:0_monitor_10000 rhel7-auto5" [ style = bold] +"g2:0_start_0 rhel7-auto5" -> "mygroup:0_running_0" [ style = bold] +"g2:0_start_0 rhel7-auto5" [ style=bold color="green" fontcolor="black"] +"g2:1_monitor_10000 rhel7-auto5" [ style=bold color="green" fontcolor="black"] +"g2:1_start_0 rhel7-auto5" -> "g2:1_monitor_10000 rhel7-auto5" [ style = bold] +"g2:1_start_0 rhel7-auto5" -> "mygroup:1_running_0" [ style = bold] +"g2:1_start_0 rhel7-auto5" [ style=bold color="green" fontcolor="black"] +"group_is_container_running_0" [ style=bold color="green" fontcolor="orange"] +"group_is_container_start_0" -> "group_is_container_running_0" [ style = bold] +"group_is_container_start_0" -> "s1_start_0 rhel7-auto5" [ style = bold] +"group_is_container_start_0" -> "s2_start_0 rhel7-auto5" [ style = bold] +"group_is_container_start_0" [ style=bold color="green" fontcolor="orange"] +"group_of_containers_running_0" [ style=bold color="green" fontcolor="orange"] +"group_of_containers_start_0" -> "group_of_containers_running_0" [ style = bold] +"group_of_containers_start_0" -> "iso_mem1_start_0 rhel7-auto5" [ style = bold] +"group_of_containers_start_0" -> "iso_mem2_start_0 rhel7-auto5" [ style = bold] +"group_of_containers_start_0" [ style=bold color="green" fontcolor="orange"] +"iso_mem1_monitor_60000 rhel7-auto5" [ style=bold color="green" fontcolor="black"] +"iso_mem1_start_0 rhel7-auto5" -> "group_of_containers_running_0" [ style = bold] +"iso_mem1_start_0 rhel7-auto5" -> "iso_mem1_monitor_60000 rhel7-auto5" [ style = bold] +"iso_mem1_start_0 rhel7-auto5" -> "iso_mem2_start_0 rhel7-auto5" [ style = bold] +"iso_mem1_start_0 rhel7-auto5" [ style=bold color="green" fontcolor="black"] +"iso_mem2_monitor_60000 rhel7-auto5" [ style=bold color="green" fontcolor="black"] +"iso_mem2_start_0 rhel7-auto5" -> "group_of_containers_running_0" [ style = bold] +"iso_mem2_start_0 rhel7-auto5" -> "iso_mem2_monitor_60000 rhel7-auto5" [ style = bold] +"iso_mem2_start_0 rhel7-auto5" [ style=bold color="green" fontcolor="black"] +"mygroup-clone_running_0" [ style=bold color="green" fontcolor="orange"] +"mygroup-clone_start_0" -> "mygroup-clone_running_0" [ style = bold] +"mygroup-clone_start_0" -> "mygroup:0_start_0" [ style = bold] +"mygroup-clone_start_0" -> "mygroup:1_start_0" [ style = bold] +"mygroup-clone_start_0" [ style=bold color="green" fontcolor="orange"] +"mygroup:0_running_0" -> "mygroup-clone_running_0" [ style = bold] +"mygroup:0_running_0" [ style=bold color="green" fontcolor="orange"] +"mygroup:0_start_0" -> "g1:0_start_0 rhel7-auto5" [ style = bold] +"mygroup:0_start_0" -> "g2:0_start_0 rhel7-auto5" [ style = bold] +"mygroup:0_start_0" -> "mygroup:0_running_0" [ style = bold] +"mygroup:0_start_0" [ style=bold color="green" fontcolor="orange"] +"mygroup:1_running_0" -> "mygroup-clone_running_0" [ style = bold] +"mygroup:1_running_0" [ style=bold color="green" fontcolor="orange"] +"mygroup:1_start_0" -> "g1:1_start_0 rhel7-auto5" [ style = bold] +"mygroup:1_start_0" -> "g2:1_start_0 rhel7-auto5" [ style = bold] +"mygroup:1_start_0" -> "mygroup:1_running_0" [ style = bold] +"mygroup:1_start_0" [ style=bold color="green" fontcolor="orange"] +"replicated-clone_running_0" [ style=bold color="green" fontcolor="orange"] +"replicated-clone_start_0" -> "replicated-clone_running_0" [ style = bold] +"replicated-clone_start_0" -> "replicated:0_start_0 rhel7-auto5" [ style = bold] +"replicated-clone_start_0" -> "replicated:1_start_0 rhel7-auto5" [ style = bold] +"replicated-clone_start_0" [ style=bold color="green" fontcolor="orange"] +"replicated:0_monitor_10000 rhel7-auto5" [ style=bold color="green" fontcolor="black"] +"replicated:0_start_0 rhel7-auto5" -> "replicated-clone_running_0" [ style = bold] +"replicated:0_start_0 rhel7-auto5" -> "replicated:0_monitor_10000 rhel7-auto5" [ style = bold] +"replicated:0_start_0 rhel7-auto5" [ style=bold color="green" fontcolor="black"] +"replicated:1_monitor_10000 rhel7-auto5" [ style=bold color="green" fontcolor="black"] +"replicated:1_start_0 rhel7-auto5" -> "replicated-clone_running_0" [ style = bold] +"replicated:1_start_0 rhel7-auto5" -> "replicated:1_monitor_10000 rhel7-auto5" [ style = bold] +"replicated:1_start_0 rhel7-auto5" [ style=bold color="green" fontcolor="black"] +"s1_monitor_10000 rhel7-auto5" [ style=bold color="green" fontcolor="black"] +"s1_start_0 rhel7-auto5" -> "group_is_container_running_0" [ style = bold] +"s1_start_0 rhel7-auto5" -> "s1_monitor_10000 rhel7-auto5" [ style = bold] +"s1_start_0 rhel7-auto5" -> "s2_start_0 rhel7-auto5" [ style = bold] +"s1_start_0 rhel7-auto5" [ style=bold color="green" fontcolor="black"] +"s2_monitor_10000 rhel7-auto5" [ style=bold color="green" fontcolor="black"] +"s2_start_0 rhel7-auto5" -> "group_is_container_running_0" [ style = bold] +"s2_start_0 rhel7-auto5" -> "s2_monitor_10000 rhel7-auto5" [ style = bold] +"s2_start_0 rhel7-auto5" [ style=bold color="green" fontcolor="black"] +} diff --git a/pengine/test10/isolation-start-all.exp b/pengine/test10/isolation-start-all.exp new file mode 100644 index 00000000000..8b059da1a9c --- /dev/null +++ b/pengine/test10/isolation-start-all.exp @@ -0,0 +1,460 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/pengine/test10/isolation-start-all.scores b/pengine/test10/isolation-start-all.scores new file mode 100644 index 00000000000..56d40a70012 --- /dev/null +++ b/pengine/test10/isolation-start-all.scores @@ -0,0 +1,67 @@ +Allocation scores: +clone_color: g1:0 allocation score on rhel7-auto4: 0 +clone_color: g1:0 allocation score on rhel7-auto5: 0 +clone_color: g1:1 allocation score on rhel7-auto4: 0 +clone_color: g1:1 allocation score on rhel7-auto5: 0 +clone_color: g2:0 allocation score on rhel7-auto4: 0 +clone_color: g2:0 allocation score on rhel7-auto5: 0 +clone_color: g2:1 allocation score on rhel7-auto4: 0 +clone_color: g2:1 allocation score on rhel7-auto5: 0 +clone_color: mygroup-clone allocation score on rhel7-auto4: 0 +clone_color: mygroup-clone allocation score on rhel7-auto5: 0 +clone_color: mygroup:0 allocation score on rhel7-auto4: 0 +clone_color: mygroup:0 allocation score on rhel7-auto5: 0 +clone_color: mygroup:1 allocation score on rhel7-auto4: 0 +clone_color: mygroup:1 allocation score on rhel7-auto5: 0 +clone_color: replicated-clone allocation score on rhel7-auto4: 0 +clone_color: replicated-clone allocation score on rhel7-auto5: 0 +clone_color: replicated:0 allocation score on rhel7-auto4: 0 +clone_color: replicated:0 allocation score on rhel7-auto5: 0 +clone_color: replicated:1 allocation score on rhel7-auto4: 0 +clone_color: replicated:1 allocation score on rhel7-auto5: 0 +group_color: g1:0 allocation score on rhel7-auto4: -INFINITY +group_color: g1:0 allocation score on rhel7-auto5: 0 +group_color: g1:1 allocation score on rhel7-auto4: -INFINITY +group_color: g1:1 allocation score on rhel7-auto5: 0 +group_color: g2:0 allocation score on rhel7-auto4: -INFINITY +group_color: g2:0 allocation score on rhel7-auto5: 0 +group_color: g2:1 allocation score on rhel7-auto4: -INFINITY +group_color: g2:1 allocation score on rhel7-auto5: 0 +group_color: group_is_container allocation score on rhel7-auto4: 0 +group_color: group_is_container allocation score on rhel7-auto5: 0 +group_color: group_of_containers allocation score on rhel7-auto4: 0 +group_color: group_of_containers allocation score on rhel7-auto5: 0 +group_color: iso_mem1 allocation score on rhel7-auto4: 0 +group_color: iso_mem1 allocation score on rhel7-auto5: 0 +group_color: iso_mem2 allocation score on rhel7-auto4: 0 +group_color: iso_mem2 allocation score on rhel7-auto5: 0 +group_color: mygroup:0 allocation score on rhel7-auto4: -INFINITY +group_color: mygroup:0 allocation score on rhel7-auto5: 0 +group_color: mygroup:1 allocation score on rhel7-auto4: -INFINITY +group_color: mygroup:1 allocation score on rhel7-auto5: 0 +group_color: s1 allocation score on rhel7-auto4: 0 +group_color: s1 allocation score on rhel7-auto5: 0 +group_color: s2 allocation score on rhel7-auto4: 0 +group_color: s2 allocation score on rhel7-auto5: 0 +native_color: fake allocation score on rhel7-auto4: 0 +native_color: fake allocation score on rhel7-auto5: 0 +native_color: g1:0 allocation score on rhel7-auto4: -INFINITY +native_color: g1:0 allocation score on rhel7-auto5: 0 +native_color: g1:1 allocation score on rhel7-auto4: -INFINITY +native_color: g1:1 allocation score on rhel7-auto5: 0 +native_color: g2:0 allocation score on rhel7-auto4: -INFINITY +native_color: g2:0 allocation score on rhel7-auto5: 0 +native_color: g2:1 allocation score on rhel7-auto4: -INFINITY +native_color: g2:1 allocation score on rhel7-auto5: 0 +native_color: iso_mem1 allocation score on rhel7-auto4: -INFINITY +native_color: iso_mem1 allocation score on rhel7-auto5: 0 +native_color: iso_mem2 allocation score on rhel7-auto4: -INFINITY +native_color: iso_mem2 allocation score on rhel7-auto5: 0 +native_color: replicated:0 allocation score on rhel7-auto4: -INFINITY +native_color: replicated:0 allocation score on rhel7-auto5: 0 +native_color: replicated:1 allocation score on rhel7-auto4: -INFINITY +native_color: replicated:1 allocation score on rhel7-auto5: 0 +native_color: s1 allocation score on rhel7-auto4: -INFINITY +native_color: s1 allocation score on rhel7-auto5: 0 +native_color: s2 allocation score on rhel7-auto4: -INFINITY +native_color: s2 allocation score on rhel7-auto5: 0 diff --git a/pengine/test10/isolation-start-all.summary b/pengine/test10/isolation-start-all.summary new file mode 100644 index 00000000000..5f777727485 --- /dev/null +++ b/pengine/test10/isolation-start-all.summary @@ -0,0 +1,94 @@ + +Current cluster status: +Online: [ rhel7-auto5 ] +OFFLINE: [ rhel7-auto4 ] + + fake (ocf::heartbeat:Dummy): Stopped + Clone Set: replicated-clone [replicated] (unique) + replicated:0 (ocf::heartbeat:Dummy): Stopped + replicated:1 (ocf::heartbeat:Dummy): Stopped + Resource Group: group_is_container + s1 (ocf::heartbeat:Dummy): Stopped + s2 (ocf::heartbeat:Dummy): Stopped + Clone Set: mygroup-clone [mygroup] (unique) + Resource Group: mygroup:0 + g1:0 (ocf::heartbeat:Dummy): Stopped + g2:0 (ocf::heartbeat:Dummy): Stopped + Resource Group: mygroup:1 + g1:1 (ocf::heartbeat:Dummy): Stopped + g2:1 (ocf::heartbeat:Dummy): Stopped + Resource Group: group_of_containers + iso_mem1 (ocf::heartbeat:Dummy): Stopped + iso_mem2 (ocf::heartbeat:Dummy): Stopped + +Transition Summary: + * Start fake (rhel7-auto5) + * Start replicated:0 (rhel7-auto5) + * Start replicated:1 (rhel7-auto5) + * Start s1 (rhel7-auto5) + * Start s2 (rhel7-auto5) + * Start g1:0 (rhel7-auto5) + * Start g2:0 (rhel7-auto5) + * Start g1:1 (rhel7-auto5) + * Start g2:1 (rhel7-auto5) + * Start iso_mem1 (rhel7-auto5) + * Start iso_mem2 (rhel7-auto5) + +Executing cluster transition: + * Resource action: fake start on rhel7-auto5 + * Pseudo action: replicated-clone_start_0 + * Pseudo action: group_is_container_start_0 + * Resource action: s1 start on rhel7-auto5 + * Resource action: s2 start on rhel7-auto5 + * Pseudo action: mygroup-clone_start_0 + * Pseudo action: group_of_containers_start_0 + * Resource action: iso_mem1 start on rhel7-auto5 + * Resource action: iso_mem2 start on rhel7-auto5 + * Resource action: fake monitor=60000 on rhel7-auto5 + * Resource action: replicated:0 start on rhel7-auto5 + * Resource action: replicated:1 start on rhel7-auto5 + * Pseudo action: replicated-clone_running_0 + * Pseudo action: group_is_container_running_0 + * Resource action: s1 monitor=10000 on rhel7-auto5 + * Resource action: s2 monitor=10000 on rhel7-auto5 + * Pseudo action: mygroup:0_start_0 + * Resource action: g1:0 start on rhel7-auto5 + * Resource action: g2:0 start on rhel7-auto5 + * Pseudo action: mygroup:1_start_0 + * Resource action: g1:1 start on rhel7-auto5 + * Resource action: g2:1 start on rhel7-auto5 + * Pseudo action: group_of_containers_running_0 + * Resource action: iso_mem1 monitor=60000 on rhel7-auto5 + * Resource action: iso_mem2 monitor=60000 on rhel7-auto5 + * Resource action: replicated:0 monitor=10000 on rhel7-auto5 + * Resource action: replicated:1 monitor=10000 on rhel7-auto5 + * Pseudo action: mygroup:0_running_0 + * Resource action: g1:0 monitor=10000 on rhel7-auto5 + * Resource action: g2:0 monitor=10000 on rhel7-auto5 + * Pseudo action: mygroup:1_running_0 + * Resource action: g1:1 monitor=10000 on rhel7-auto5 + * Resource action: g2:1 monitor=10000 on rhel7-auto5 + * Pseudo action: mygroup-clone_running_0 + +Revised cluster status: +Online: [ rhel7-auto5 ] +OFFLINE: [ rhel7-auto4 ] + + fake (ocf::heartbeat:Dummy): Started rhel7-auto5 + Clone Set: replicated-clone [replicated] (unique) + replicated:0 (ocf::heartbeat:Dummy): Started rhel7-auto5 + replicated:1 (ocf::heartbeat:Dummy): Started rhel7-auto5 + Resource Group: group_is_container + s1 (ocf::heartbeat:Dummy): Started rhel7-auto5 + s2 (ocf::heartbeat:Dummy): Started rhel7-auto5 + Clone Set: mygroup-clone [mygroup] (unique) + Resource Group: mygroup:0 + g1:0 (ocf::heartbeat:Dummy): Started rhel7-auto5 + g2:0 (ocf::heartbeat:Dummy): Started rhel7-auto5 + Resource Group: mygroup:1 + g1:1 (ocf::heartbeat:Dummy): Started rhel7-auto5 + g2:1 (ocf::heartbeat:Dummy): Started rhel7-auto5 + Resource Group: group_of_containers + iso_mem1 (ocf::heartbeat:Dummy): Started rhel7-auto5 + iso_mem2 (ocf::heartbeat:Dummy): Started rhel7-auto5 + diff --git a/pengine/test10/isolation-start-all.xml b/pengine/test10/isolation-start-all.xml new file mode 100644 index 00000000000..98580ab3e2e --- /dev/null +++ b/pengine/test10/isolation-start-all.xml @@ -0,0 +1,189 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + From a736fbaa367e93c0f1d416b4fb2e19d2c182eb9e Mon Sep 17 00:00:00 2001 From: David Vossel Date: Mon, 23 Mar 2015 16:46:51 -0400 Subject: [PATCH 12/22] High: pengine: disable reloading of resources within isolated container wrappers --- pengine/allocate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pengine/allocate.c b/pengine/allocate.c index c9e446a9cbf..6c5d581367e 100644 --- a/pengine/allocate.c +++ b/pengine/allocate.c @@ -286,7 +286,7 @@ check_action_definition(resource_t * rsc, node_t * active_node, xmlNode * xml_op set_bit(op->flags, pe_action_reschedule); #endif - } else if (digest_restart) { + } else if (digest_restart && rsc->isolation_wrapper == NULL && (uber_parent(rsc))->isolation_wrapper == NULL) { pe_rsc_trace(rsc, "Reloading '%s' action for resource %s", task, rsc->id); /* Allow this resource to reload - unless something else causes a full restart */ From 7770e0a57df18a7e493bc0a74d97519ce0298b63 Mon Sep 17 00:00:00 2001 From: David Vossel Date: Mon, 23 Mar 2015 16:49:18 -0400 Subject: [PATCH 13/22] Low: pengine: regression tests for disable reload during isolation --- pengine/regression.sh | 1 + pengine/test10/isolation-restart-all.dot | 167 ++++ pengine/test10/isolation-restart-all.exp | 873 +++++++++++++++++++ pengine/test10/isolation-restart-all.scores | 67 ++ pengine/test10/isolation-restart-all.summary | 118 +++ pengine/test10/isolation-restart-all.xml | 184 ++++ 6 files changed, 1410 insertions(+) create mode 100644 pengine/test10/isolation-restart-all.dot create mode 100644 pengine/test10/isolation-restart-all.exp create mode 100644 pengine/test10/isolation-restart-all.scores create mode 100644 pengine/test10/isolation-restart-all.summary create mode 100644 pengine/test10/isolation-restart-all.xml diff --git a/pengine/regression.sh b/pengine/regression.sh index 58ff4d476e5..59e511e488d 100755 --- a/pengine/regression.sh +++ b/pengine/regression.sh @@ -800,6 +800,7 @@ do_test rsc-discovery-per-node "Disable resource discovery per node" echo "" do_test isolation-start-all "Start docker isolated resources." +do_test isolation-restart-all "Restart docker isolated resources." echo "" test_results diff --git a/pengine/test10/isolation-restart-all.dot b/pengine/test10/isolation-restart-all.dot new file mode 100644 index 00000000000..4c0df1213d2 --- /dev/null +++ b/pengine/test10/isolation-restart-all.dot @@ -0,0 +1,167 @@ + digraph "g" { +"all_stopped" [ style=bold color="green" fontcolor="orange"] +"fake_monitor_60000 rhel7-auto5" [ style=bold color="green" fontcolor="black"] +"fake_start_0 rhel7-auto5" -> "fake_monitor_60000 rhel7-auto5" [ style = bold] +"fake_start_0 rhel7-auto5" [ style=bold color="green" fontcolor="black"] +"fake_stop_0 rhel7-auto5" -> "all_stopped" [ style = bold] +"fake_stop_0 rhel7-auto5" -> "fake_start_0 rhel7-auto5" [ style = bold] +"fake_stop_0 rhel7-auto5" [ style=bold color="green" fontcolor="black"] +"g1:0_monitor_10000 rhel7-auto5" [ style=bold color="green" fontcolor="black"] +"g1:0_start_0 rhel7-auto5" -> "g1:0_monitor_10000 rhel7-auto5" [ style = bold] +"g1:0_start_0 rhel7-auto5" -> "g2:0_start_0 rhel7-auto5" [ style = bold] +"g1:0_start_0 rhel7-auto5" -> "mygroup:0_running_0" [ style = bold] +"g1:0_start_0 rhel7-auto5" [ style=bold color="green" fontcolor="black"] +"g1:0_stop_0 rhel7-auto5" -> "all_stopped" [ style = bold] +"g1:0_stop_0 rhel7-auto5" -> "g1:0_start_0 rhel7-auto5" [ style = bold] +"g1:0_stop_0 rhel7-auto5" -> "mygroup:0_stopped_0" [ style = bold] +"g1:0_stop_0 rhel7-auto5" [ style=bold color="green" fontcolor="black"] +"g1:1_monitor_10000 rhel7-auto5" [ style=bold color="green" fontcolor="black"] +"g1:1_start_0 rhel7-auto5" -> "g1:1_monitor_10000 rhel7-auto5" [ style = bold] +"g1:1_start_0 rhel7-auto5" -> "g2:1_start_0 rhel7-auto5" [ style = bold] +"g1:1_start_0 rhel7-auto5" -> "mygroup:1_running_0" [ style = bold] +"g1:1_start_0 rhel7-auto5" [ style=bold color="green" fontcolor="black"] +"g1:1_stop_0 rhel7-auto5" -> "all_stopped" [ style = bold] +"g1:1_stop_0 rhel7-auto5" -> "g1:1_start_0 rhel7-auto5" [ style = bold] +"g1:1_stop_0 rhel7-auto5" -> "mygroup:1_stopped_0" [ style = bold] +"g1:1_stop_0 rhel7-auto5" [ style=bold color="green" fontcolor="black"] +"g2:0_monitor_10000 rhel7-auto5" [ style=bold color="green" fontcolor="black"] +"g2:0_start_0 rhel7-auto5" -> "g2:0_monitor_10000 rhel7-auto5" [ style = bold] +"g2:0_start_0 rhel7-auto5" -> "mygroup:0_running_0" [ style = bold] +"g2:0_start_0 rhel7-auto5" [ style=bold color="green" fontcolor="black"] +"g2:0_stop_0 rhel7-auto5" -> "all_stopped" [ style = bold] +"g2:0_stop_0 rhel7-auto5" -> "g1:0_stop_0 rhel7-auto5" [ style = bold] +"g2:0_stop_0 rhel7-auto5" -> "g2:0_start_0 rhel7-auto5" [ style = bold] +"g2:0_stop_0 rhel7-auto5" -> "mygroup:0_stopped_0" [ style = bold] +"g2:0_stop_0 rhel7-auto5" [ style=bold color="green" fontcolor="black"] +"g2:1_monitor_10000 rhel7-auto5" [ style=bold color="green" fontcolor="black"] +"g2:1_start_0 rhel7-auto5" -> "g2:1_monitor_10000 rhel7-auto5" [ style = bold] +"g2:1_start_0 rhel7-auto5" -> "mygroup:1_running_0" [ style = bold] +"g2:1_start_0 rhel7-auto5" [ style=bold color="green" fontcolor="black"] +"g2:1_stop_0 rhel7-auto5" -> "all_stopped" [ style = bold] +"g2:1_stop_0 rhel7-auto5" -> "g1:1_stop_0 rhel7-auto5" [ style = bold] +"g2:1_stop_0 rhel7-auto5" -> "g2:1_start_0 rhel7-auto5" [ style = bold] +"g2:1_stop_0 rhel7-auto5" -> "mygroup:1_stopped_0" [ style = bold] +"g2:1_stop_0 rhel7-auto5" [ style=bold color="green" fontcolor="black"] +"group_is_container_running_0" [ style=bold color="green" fontcolor="orange"] +"group_is_container_start_0" -> "group_is_container_running_0" [ style = bold] +"group_is_container_start_0" -> "s1_start_0 rhel7-auto5" [ style = bold] +"group_is_container_start_0" -> "s2_start_0 rhel7-auto5" [ style = bold] +"group_is_container_start_0" [ style=bold color="green" fontcolor="orange"] +"group_is_container_stop_0" -> "group_is_container_stopped_0" [ style = bold] +"group_is_container_stop_0" -> "s1_stop_0 rhel7-auto5" [ style = bold] +"group_is_container_stop_0" -> "s2_stop_0 rhel7-auto5" [ style = bold] +"group_is_container_stop_0" [ style=bold color="green" fontcolor="orange"] +"group_is_container_stopped_0" -> "group_is_container_start_0" [ style = bold] +"group_is_container_stopped_0" [ style=bold color="green" fontcolor="orange"] +"group_of_containers_running_0" [ style=bold color="green" fontcolor="orange"] +"group_of_containers_start_0" -> "group_of_containers_running_0" [ style = bold] +"group_of_containers_start_0" -> "iso_mem1_start_0 rhel7-auto5" [ style = bold] +"group_of_containers_start_0" -> "iso_mem2_start_0 rhel7-auto5" [ style = bold] +"group_of_containers_start_0" [ style=bold color="green" fontcolor="orange"] +"group_of_containers_stop_0" -> "group_of_containers_stopped_0" [ style = bold] +"group_of_containers_stop_0" -> "iso_mem1_stop_0 rhel7-auto5" [ style = bold] +"group_of_containers_stop_0" -> "iso_mem2_stop_0 rhel7-auto5" [ style = bold] +"group_of_containers_stop_0" [ style=bold color="green" fontcolor="orange"] +"group_of_containers_stopped_0" -> "group_of_containers_start_0" [ style = bold] +"group_of_containers_stopped_0" [ style=bold color="green" fontcolor="orange"] +"iso_mem1_monitor_60000 rhel7-auto5" [ style=bold color="green" fontcolor="black"] +"iso_mem1_start_0 rhel7-auto5" -> "group_of_containers_running_0" [ style = bold] +"iso_mem1_start_0 rhel7-auto5" -> "iso_mem1_monitor_60000 rhel7-auto5" [ style = bold] +"iso_mem1_start_0 rhel7-auto5" -> "iso_mem2_start_0 rhel7-auto5" [ style = bold] +"iso_mem1_start_0 rhel7-auto5" [ style=bold color="green" fontcolor="black"] +"iso_mem1_stop_0 rhel7-auto5" -> "all_stopped" [ style = bold] +"iso_mem1_stop_0 rhel7-auto5" -> "group_of_containers_stopped_0" [ style = bold] +"iso_mem1_stop_0 rhel7-auto5" -> "iso_mem1_start_0 rhel7-auto5" [ style = bold] +"iso_mem1_stop_0 rhel7-auto5" [ style=bold color="green" fontcolor="black"] +"iso_mem2_monitor_60000 rhel7-auto5" [ style=bold color="green" fontcolor="black"] +"iso_mem2_start_0 rhel7-auto5" -> "group_of_containers_running_0" [ style = bold] +"iso_mem2_start_0 rhel7-auto5" -> "iso_mem2_monitor_60000 rhel7-auto5" [ style = bold] +"iso_mem2_start_0 rhel7-auto5" [ style=bold color="green" fontcolor="black"] +"iso_mem2_stop_0 rhel7-auto5" -> "all_stopped" [ style = bold] +"iso_mem2_stop_0 rhel7-auto5" -> "group_of_containers_stopped_0" [ style = bold] +"iso_mem2_stop_0 rhel7-auto5" -> "iso_mem1_stop_0 rhel7-auto5" [ style = bold] +"iso_mem2_stop_0 rhel7-auto5" -> "iso_mem2_start_0 rhel7-auto5" [ style = bold] +"iso_mem2_stop_0 rhel7-auto5" [ style=bold color="green" fontcolor="black"] +"mygroup-clone_running_0" [ style=bold color="green" fontcolor="orange"] +"mygroup-clone_start_0" -> "mygroup-clone_running_0" [ style = bold] +"mygroup-clone_start_0" -> "mygroup:0_start_0" [ style = bold] +"mygroup-clone_start_0" -> "mygroup:1_start_0" [ style = bold] +"mygroup-clone_start_0" [ style=bold color="green" fontcolor="orange"] +"mygroup-clone_stop_0" -> "mygroup-clone_stopped_0" [ style = bold] +"mygroup-clone_stop_0" -> "mygroup:0_stop_0" [ style = bold] +"mygroup-clone_stop_0" -> "mygroup:1_stop_0" [ style = bold] +"mygroup-clone_stop_0" [ style=bold color="green" fontcolor="orange"] +"mygroup-clone_stopped_0" -> "mygroup-clone_start_0" [ style = bold] +"mygroup-clone_stopped_0" [ style=bold color="green" fontcolor="orange"] +"mygroup:0_running_0" -> "mygroup-clone_running_0" [ style = bold] +"mygroup:0_running_0" [ style=bold color="green" fontcolor="orange"] +"mygroup:0_start_0" -> "g1:0_start_0 rhel7-auto5" [ style = bold] +"mygroup:0_start_0" -> "g2:0_start_0 rhel7-auto5" [ style = bold] +"mygroup:0_start_0" -> "mygroup:0_running_0" [ style = bold] +"mygroup:0_start_0" [ style=bold color="green" fontcolor="orange"] +"mygroup:0_stop_0" -> "g1:0_stop_0 rhel7-auto5" [ style = bold] +"mygroup:0_stop_0" -> "g2:0_stop_0 rhel7-auto5" [ style = bold] +"mygroup:0_stop_0" -> "mygroup:0_stopped_0" [ style = bold] +"mygroup:0_stop_0" [ style=bold color="green" fontcolor="orange"] +"mygroup:0_stopped_0" -> "mygroup-clone_stopped_0" [ style = bold] +"mygroup:0_stopped_0" -> "mygroup:0_start_0" [ style = bold] +"mygroup:0_stopped_0" [ style=bold color="green" fontcolor="orange"] +"mygroup:1_running_0" -> "mygroup-clone_running_0" [ style = bold] +"mygroup:1_running_0" [ style=bold color="green" fontcolor="orange"] +"mygroup:1_start_0" -> "g1:1_start_0 rhel7-auto5" [ style = bold] +"mygroup:1_start_0" -> "g2:1_start_0 rhel7-auto5" [ style = bold] +"mygroup:1_start_0" -> "mygroup:1_running_0" [ style = bold] +"mygroup:1_start_0" [ style=bold color="green" fontcolor="orange"] +"mygroup:1_stop_0" -> "g1:1_stop_0 rhel7-auto5" [ style = bold] +"mygroup:1_stop_0" -> "g2:1_stop_0 rhel7-auto5" [ style = bold] +"mygroup:1_stop_0" -> "mygroup:1_stopped_0" [ style = bold] +"mygroup:1_stop_0" [ style=bold color="green" fontcolor="orange"] +"mygroup:1_stopped_0" -> "mygroup-clone_stopped_0" [ style = bold] +"mygroup:1_stopped_0" -> "mygroup:1_start_0" [ style = bold] +"mygroup:1_stopped_0" [ style=bold color="green" fontcolor="orange"] +"replicated-clone_running_0" [ style=bold color="green" fontcolor="orange"] +"replicated-clone_start_0" -> "replicated-clone_running_0" [ style = bold] +"replicated-clone_start_0" -> "replicated:0_start_0 rhel7-auto5" [ style = bold] +"replicated-clone_start_0" -> "replicated:1_start_0 rhel7-auto5" [ style = bold] +"replicated-clone_start_0" [ style=bold color="green" fontcolor="orange"] +"replicated-clone_stop_0" -> "replicated-clone_stopped_0" [ style = bold] +"replicated-clone_stop_0" -> "replicated:0_stop_0 rhel7-auto5" [ style = bold] +"replicated-clone_stop_0" -> "replicated:1_stop_0 rhel7-auto5" [ style = bold] +"replicated-clone_stop_0" [ style=bold color="green" fontcolor="orange"] +"replicated-clone_stopped_0" -> "replicated-clone_start_0" [ style = bold] +"replicated-clone_stopped_0" [ style=bold color="green" fontcolor="orange"] +"replicated:0_monitor_10000 rhel7-auto5" [ style=bold color="green" fontcolor="black"] +"replicated:0_start_0 rhel7-auto5" -> "replicated-clone_running_0" [ style = bold] +"replicated:0_start_0 rhel7-auto5" -> "replicated:0_monitor_10000 rhel7-auto5" [ style = bold] +"replicated:0_start_0 rhel7-auto5" [ style=bold color="green" fontcolor="black"] +"replicated:0_stop_0 rhel7-auto5" -> "all_stopped" [ style = bold] +"replicated:0_stop_0 rhel7-auto5" -> "replicated-clone_stopped_0" [ style = bold] +"replicated:0_stop_0 rhel7-auto5" -> "replicated:0_start_0 rhel7-auto5" [ style = bold] +"replicated:0_stop_0 rhel7-auto5" [ style=bold color="green" fontcolor="black"] +"replicated:1_monitor_10000 rhel7-auto5" [ style=bold color="green" fontcolor="black"] +"replicated:1_start_0 rhel7-auto5" -> "replicated-clone_running_0" [ style = bold] +"replicated:1_start_0 rhel7-auto5" -> "replicated:1_monitor_10000 rhel7-auto5" [ style = bold] +"replicated:1_start_0 rhel7-auto5" [ style=bold color="green" fontcolor="black"] +"replicated:1_stop_0 rhel7-auto5" -> "all_stopped" [ style = bold] +"replicated:1_stop_0 rhel7-auto5" -> "replicated-clone_stopped_0" [ style = bold] +"replicated:1_stop_0 rhel7-auto5" -> "replicated:1_start_0 rhel7-auto5" [ style = bold] +"replicated:1_stop_0 rhel7-auto5" [ style=bold color="green" fontcolor="black"] +"s1_monitor_10000 rhel7-auto5" [ style=bold color="green" fontcolor="black"] +"s1_start_0 rhel7-auto5" -> "group_is_container_running_0" [ style = bold] +"s1_start_0 rhel7-auto5" -> "s1_monitor_10000 rhel7-auto5" [ style = bold] +"s1_start_0 rhel7-auto5" -> "s2_start_0 rhel7-auto5" [ style = bold] +"s1_start_0 rhel7-auto5" [ style=bold color="green" fontcolor="black"] +"s1_stop_0 rhel7-auto5" -> "all_stopped" [ style = bold] +"s1_stop_0 rhel7-auto5" -> "group_is_container_stopped_0" [ style = bold] +"s1_stop_0 rhel7-auto5" -> "s1_start_0 rhel7-auto5" [ style = bold] +"s1_stop_0 rhel7-auto5" [ style=bold color="green" fontcolor="black"] +"s2_monitor_10000 rhel7-auto5" [ style=bold color="green" fontcolor="black"] +"s2_start_0 rhel7-auto5" -> "group_is_container_running_0" [ style = bold] +"s2_start_0 rhel7-auto5" -> "s2_monitor_10000 rhel7-auto5" [ style = bold] +"s2_start_0 rhel7-auto5" [ style=bold color="green" fontcolor="black"] +"s2_stop_0 rhel7-auto5" -> "all_stopped" [ style = bold] +"s2_stop_0 rhel7-auto5" -> "group_is_container_stopped_0" [ style = bold] +"s2_stop_0 rhel7-auto5" -> "s1_stop_0 rhel7-auto5" [ style = bold] +"s2_stop_0 rhel7-auto5" -> "s2_start_0 rhel7-auto5" [ style = bold] +"s2_stop_0 rhel7-auto5" [ style=bold color="green" fontcolor="black"] +} diff --git a/pengine/test10/isolation-restart-all.exp b/pengine/test10/isolation-restart-all.exp new file mode 100644 index 00000000000..ba2ccca2113 --- /dev/null +++ b/pengine/test10/isolation-restart-all.exp @@ -0,0 +1,873 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/pengine/test10/isolation-restart-all.scores b/pengine/test10/isolation-restart-all.scores new file mode 100644 index 00000000000..188aaab6308 --- /dev/null +++ b/pengine/test10/isolation-restart-all.scores @@ -0,0 +1,67 @@ +Allocation scores: +clone_color: g1:0 allocation score on rhel7-auto4: 0 +clone_color: g1:0 allocation score on rhel7-auto5: 1 +clone_color: g1:1 allocation score on rhel7-auto4: 0 +clone_color: g1:1 allocation score on rhel7-auto5: 1 +clone_color: g2:0 allocation score on rhel7-auto4: 0 +clone_color: g2:0 allocation score on rhel7-auto5: 1 +clone_color: g2:1 allocation score on rhel7-auto4: 0 +clone_color: g2:1 allocation score on rhel7-auto5: 1 +clone_color: mygroup-clone allocation score on rhel7-auto4: 0 +clone_color: mygroup-clone allocation score on rhel7-auto5: 0 +clone_color: mygroup:0 allocation score on rhel7-auto4: 0 +clone_color: mygroup:0 allocation score on rhel7-auto5: 0 +clone_color: mygroup:1 allocation score on rhel7-auto4: 0 +clone_color: mygroup:1 allocation score on rhel7-auto5: 0 +clone_color: replicated-clone allocation score on rhel7-auto4: 0 +clone_color: replicated-clone allocation score on rhel7-auto5: 0 +clone_color: replicated:0 allocation score on rhel7-auto4: 0 +clone_color: replicated:0 allocation score on rhel7-auto5: 1 +clone_color: replicated:1 allocation score on rhel7-auto4: 0 +clone_color: replicated:1 allocation score on rhel7-auto5: 1 +group_color: g1:0 allocation score on rhel7-auto4: -INFINITY +group_color: g1:0 allocation score on rhel7-auto5: 1 +group_color: g1:1 allocation score on rhel7-auto4: -INFINITY +group_color: g1:1 allocation score on rhel7-auto5: 1 +group_color: g2:0 allocation score on rhel7-auto4: -INFINITY +group_color: g2:0 allocation score on rhel7-auto5: 1 +group_color: g2:1 allocation score on rhel7-auto4: -INFINITY +group_color: g2:1 allocation score on rhel7-auto5: 1 +group_color: group_is_container allocation score on rhel7-auto4: 0 +group_color: group_is_container allocation score on rhel7-auto5: 0 +group_color: group_of_containers allocation score on rhel7-auto4: 0 +group_color: group_of_containers allocation score on rhel7-auto5: 0 +group_color: iso_mem1 allocation score on rhel7-auto4: 0 +group_color: iso_mem1 allocation score on rhel7-auto5: 0 +group_color: iso_mem2 allocation score on rhel7-auto4: 0 +group_color: iso_mem2 allocation score on rhel7-auto5: 0 +group_color: mygroup:0 allocation score on rhel7-auto4: -INFINITY +group_color: mygroup:0 allocation score on rhel7-auto5: 0 +group_color: mygroup:1 allocation score on rhel7-auto4: -INFINITY +group_color: mygroup:1 allocation score on rhel7-auto5: 0 +group_color: s1 allocation score on rhel7-auto4: 0 +group_color: s1 allocation score on rhel7-auto5: 0 +group_color: s2 allocation score on rhel7-auto4: 0 +group_color: s2 allocation score on rhel7-auto5: 0 +native_color: fake allocation score on rhel7-auto4: 0 +native_color: fake allocation score on rhel7-auto5: 0 +native_color: g1:0 allocation score on rhel7-auto4: -INFINITY +native_color: g1:0 allocation score on rhel7-auto5: 2 +native_color: g1:1 allocation score on rhel7-auto4: -INFINITY +native_color: g1:1 allocation score on rhel7-auto5: 2 +native_color: g2:0 allocation score on rhel7-auto4: -INFINITY +native_color: g2:0 allocation score on rhel7-auto5: 1 +native_color: g2:1 allocation score on rhel7-auto4: -INFINITY +native_color: g2:1 allocation score on rhel7-auto5: 1 +native_color: iso_mem1 allocation score on rhel7-auto4: -INFINITY +native_color: iso_mem1 allocation score on rhel7-auto5: 0 +native_color: iso_mem2 allocation score on rhel7-auto4: -INFINITY +native_color: iso_mem2 allocation score on rhel7-auto5: 0 +native_color: replicated:0 allocation score on rhel7-auto4: -INFINITY +native_color: replicated:0 allocation score on rhel7-auto5: 1 +native_color: replicated:1 allocation score on rhel7-auto4: -INFINITY +native_color: replicated:1 allocation score on rhel7-auto5: 1 +native_color: s1 allocation score on rhel7-auto4: -INFINITY +native_color: s1 allocation score on rhel7-auto5: 0 +native_color: s2 allocation score on rhel7-auto4: -INFINITY +native_color: s2 allocation score on rhel7-auto5: 0 diff --git a/pengine/test10/isolation-restart-all.summary b/pengine/test10/isolation-restart-all.summary new file mode 100644 index 00000000000..a2939f104c6 --- /dev/null +++ b/pengine/test10/isolation-restart-all.summary @@ -0,0 +1,118 @@ + +Current cluster status: +Online: [ rhel7-auto5 ] +OFFLINE: [ rhel7-auto4 ] + + fake (ocf::heartbeat:Dummy): Started rhel7-auto5 + Clone Set: replicated-clone [replicated] (unique) + replicated:0 (ocf::heartbeat:Dummy): Started rhel7-auto5 + replicated:1 (ocf::heartbeat:Dummy): Started rhel7-auto5 + Resource Group: group_is_container + s1 (ocf::heartbeat:Dummy): Started rhel7-auto5 + s2 (ocf::heartbeat:Dummy): Started rhel7-auto5 + Clone Set: mygroup-clone [mygroup] (unique) + Resource Group: mygroup:0 + g1:0 (ocf::heartbeat:Dummy): Started rhel7-auto5 + g2:0 (ocf::heartbeat:Dummy): Started rhel7-auto5 + Resource Group: mygroup:1 + g1:1 (ocf::heartbeat:Dummy): Started rhel7-auto5 + g2:1 (ocf::heartbeat:Dummy): Started rhel7-auto5 + Resource Group: group_of_containers + iso_mem1 (ocf::heartbeat:Dummy): Started rhel7-auto5 + iso_mem2 (ocf::heartbeat:Dummy): Started rhel7-auto5 + +Transition Summary: + * Restart fake (Started rhel7-auto5) + * Restart replicated:0 (Started rhel7-auto5) + * Restart replicated:1 (Started rhel7-auto5) + * Restart s1 (Started rhel7-auto5) + * Restart s2 (Started rhel7-auto5) + * Restart g1:0 (Started rhel7-auto5) + * Restart g2:0 (Started rhel7-auto5) + * Restart g1:1 (Started rhel7-auto5) + * Restart g2:1 (Started rhel7-auto5) + * Restart iso_mem1 (Started rhel7-auto5) + * Restart iso_mem2 (Started rhel7-auto5) + +Executing cluster transition: + * Resource action: fake stop on rhel7-auto5 + * Resource action: fake start on rhel7-auto5 + * Resource action: fake monitor=60000 on rhel7-auto5 + * Pseudo action: replicated-clone_stop_0 + * Pseudo action: group_is_container_stop_0 + * Resource action: s2 stop on rhel7-auto5 + * Pseudo action: mygroup-clone_stop_0 + * Pseudo action: group_of_containers_stop_0 + * Resource action: iso_mem2 stop on rhel7-auto5 + * Resource action: replicated:0 stop on rhel7-auto5 + * Resource action: replicated:1 stop on rhel7-auto5 + * Pseudo action: replicated-clone_stopped_0 + * Pseudo action: replicated-clone_start_0 + * Resource action: s1 stop on rhel7-auto5 + * Pseudo action: mygroup:0_stop_0 + * Resource action: g2:0 stop on rhel7-auto5 + * Pseudo action: mygroup:1_stop_0 + * Resource action: g2:1 stop on rhel7-auto5 + * Resource action: iso_mem1 stop on rhel7-auto5 + * Resource action: replicated:0 start on rhel7-auto5 + * Resource action: replicated:0 monitor=10000 on rhel7-auto5 + * Resource action: replicated:1 start on rhel7-auto5 + * Resource action: replicated:1 monitor=10000 on rhel7-auto5 + * Pseudo action: replicated-clone_running_0 + * Pseudo action: group_is_container_stopped_0 + * Pseudo action: group_is_container_start_0 + * Resource action: s1 start on rhel7-auto5 + * Resource action: s1 monitor=10000 on rhel7-auto5 + * Resource action: s2 start on rhel7-auto5 + * Resource action: s2 monitor=10000 on rhel7-auto5 + * Resource action: g1:0 stop on rhel7-auto5 + * Resource action: g1:1 stop on rhel7-auto5 + * Pseudo action: group_of_containers_stopped_0 + * Pseudo action: group_of_containers_start_0 + * Resource action: iso_mem1 start on rhel7-auto5 + * Resource action: iso_mem1 monitor=60000 on rhel7-auto5 + * Resource action: iso_mem2 start on rhel7-auto5 + * Resource action: iso_mem2 monitor=60000 on rhel7-auto5 + * Pseudo action: all_stopped + * Pseudo action: group_is_container_running_0 + * Pseudo action: mygroup:0_stopped_0 + * Pseudo action: mygroup:1_stopped_0 + * Pseudo action: mygroup-clone_stopped_0 + * Pseudo action: mygroup-clone_start_0 + * Pseudo action: group_of_containers_running_0 + * Pseudo action: mygroup:0_start_0 + * Resource action: g1:0 start on rhel7-auto5 + * Resource action: g1:0 monitor=10000 on rhel7-auto5 + * Resource action: g2:0 start on rhel7-auto5 + * Resource action: g2:0 monitor=10000 on rhel7-auto5 + * Pseudo action: mygroup:1_start_0 + * Resource action: g1:1 start on rhel7-auto5 + * Resource action: g1:1 monitor=10000 on rhel7-auto5 + * Resource action: g2:1 start on rhel7-auto5 + * Resource action: g2:1 monitor=10000 on rhel7-auto5 + * Pseudo action: mygroup:0_running_0 + * Pseudo action: mygroup:1_running_0 + * Pseudo action: mygroup-clone_running_0 + +Revised cluster status: +Online: [ rhel7-auto5 ] +OFFLINE: [ rhel7-auto4 ] + + fake (ocf::heartbeat:Dummy): Started rhel7-auto5 + Clone Set: replicated-clone [replicated] (unique) + replicated:0 (ocf::heartbeat:Dummy): Started rhel7-auto5 + replicated:1 (ocf::heartbeat:Dummy): Started rhel7-auto5 + Resource Group: group_is_container + s1 (ocf::heartbeat:Dummy): Started rhel7-auto5 + s2 (ocf::heartbeat:Dummy): Started rhel7-auto5 + Clone Set: mygroup-clone [mygroup] (unique) + Resource Group: mygroup:0 + g1:0 (ocf::heartbeat:Dummy): Started rhel7-auto5 + g2:0 (ocf::heartbeat:Dummy): Started rhel7-auto5 + Resource Group: mygroup:1 + g1:1 (ocf::heartbeat:Dummy): Started rhel7-auto5 + g2:1 (ocf::heartbeat:Dummy): Started rhel7-auto5 + Resource Group: group_of_containers + iso_mem1 (ocf::heartbeat:Dummy): Started rhel7-auto5 + iso_mem2 (ocf::heartbeat:Dummy): Started rhel7-auto5 + diff --git a/pengine/test10/isolation-restart-all.xml b/pengine/test10/isolation-restart-all.xml new file mode 100644 index 00000000000..124f524f58e --- /dev/null +++ b/pengine/test10/isolation-restart-all.xml @@ -0,0 +1,184 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + From 2244aafa3798d30be32c2187f70506d9246bccb3 Mon Sep 17 00:00:00 2001 From: David Vossel Date: Mon, 23 Mar 2015 17:27:23 -0400 Subject: [PATCH 14/22] High: pengine: disable migrations for resources with isolation containers --- lib/pengine/complex.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/lib/pengine/complex.c b/lib/pengine/complex.c index 9c876959d75..fe03d447b24 100644 --- a/lib/pengine/complex.c +++ b/lib/pengine/complex.c @@ -502,9 +502,14 @@ common_unpack(xmlNode * xml_obj, resource_t ** rsc, if (top->isolation_wrapper == NULL && (value == NULL || crm_is_true(value))) { if (g_hash_table_lookup((*rsc)->meta, "pcmk_docker_image")) { (*rsc)->isolation_wrapper = "docker-wrapper"; + clear_bit((*rsc)->flags, pe_rsc_allow_migrate); } /* add more isolation technologies here as we expand */ } + if (top->isolation_wrapper) { + /* never allow resources with an isolation wrapper migrate */ + clear_bit((*rsc)->flags, pe_rsc_allow_migrate); + } value = g_hash_table_lookup((*rsc)->meta, XML_RSC_ATTR_UNIQUE); if (crm_is_true(value) || top->variant < pe_clone || (*rsc)->isolation_wrapper) { From f2e957ba710a4119dbe5a9a426eb424342b93788 Mon Sep 17 00:00:00 2001 From: David Vossel Date: Mon, 23 Mar 2015 17:27:50 -0400 Subject: [PATCH 15/22] Low: spec: add docker-wrapper directory to spec file --- pacemaker.spec.in | 1 + 1 file changed, 1 insertion(+) diff --git a/pacemaker.spec.in b/pacemaker.spec.in index 597fb3ae3a7..267baf1d856 100644 --- a/pacemaker.spec.in +++ b/pacemaker.spec.in @@ -444,6 +444,7 @@ exit 0 %dir /usr/lib/ocf %dir /usr/lib/ocf/resource.d /usr/lib/ocf/resource.d/pacemaker +/usr/lib/ocf/resource.d/containers %if "%{?cs_version}" != "UNKNOWN" %if 0%{?cs_version} < 2 From 757f110d4dbdb4a3ce372ae38d6d311c9a30246e Mon Sep 17 00:00:00 2001 From: David Vossel Date: Mon, 23 Mar 2015 18:37:47 -0400 Subject: [PATCH 16/22] Low: lrmd: properly handle poke requests in lrmd client when using ipc --- lib/lrmd/lrmd_client.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/lib/lrmd/lrmd_client.c b/lib/lrmd/lrmd_client.c index eace0dd5f85..bf45822652b 100644 --- a/lib/lrmd/lrmd_client.c +++ b/lib/lrmd/lrmd_client.c @@ -868,13 +868,14 @@ static int lrmd_api_poke_connection(lrmd_t * lrmd) { int rc; + lrmd_private_t *native = lrmd->private; xmlNode *data = create_xml_node(NULL, F_LRMD_RSC); crm_xml_add(data, F_LRMD_ORIGIN, __FUNCTION__); - rc = lrmd_send_command(lrmd, LRMD_OP_POKE, data, NULL, 0, 0, FALSE); + rc = lrmd_send_command(lrmd, LRMD_OP_POKE, data, NULL, 0, 0, native->type == CRM_CLIENT_IPC ? TRUE : FALSE); free_xml(data); - return rc; + return rc < 0 ? rc : pcmk_ok; } static int From 23ce0371879c51200a11c90e629359114ee8b7e3 Mon Sep 17 00:00:00 2001 From: David Vossel Date: Mon, 23 Mar 2015 18:38:20 -0400 Subject: [PATCH 17/22] Low: enable logging in remote_ctl.c --- lrmd/remote_ctl.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/lrmd/remote_ctl.c b/lrmd/remote_ctl.c index d2ab9eb4eb1..c9fbe93e287 100644 --- a/lrmd/remote_ctl.c +++ b/lrmd/remote_ctl.c @@ -86,11 +86,6 @@ client_exit(int rc) exit(rc); } -#define print_result(result) \ - if (!options.quiet) { \ - result; \ - } \ - static void client_shutdown(int nsig) { @@ -116,7 +111,7 @@ read_events(lrmd_event_data_t * event) static gboolean timeout_err(gpointer data) { - print_result(printf("timed out in remote_client\n")); + crm_err("timed out in remote_client\n"); client_exit(PCMK_OCF_TIMEOUT); return FALSE; @@ -159,7 +154,7 @@ try_connect(void) sleep(1); } - print_result(printf("Failed to connect to pacemaker remote.\n")); + crm_err("Failed to connect to pacemaker remote.\n"); client_exit(PCMK_OCF_UNKNOWN_ERROR); } @@ -208,7 +203,7 @@ client_start(gpointer user_data) options.class, options.provider, options.type, 0); if (rc != 0){ - print_result(printf("failed to register resource %s with pacemaker_remote. rc: %d\n", options.rsc_id, rc)); + crm_err("failed to register resource %s with pacemaker_remote. rc: %d\n", options.rsc_id, rc); client_exit(1); } } @@ -225,7 +220,7 @@ client_start(gpointer user_data) if (rc > 0) { exec_call_id = rc; } else { - print_result(printf("execution of rsc %s failed. rc = %d\n", options.rsc_id, rc)); + crm_err("execution of rsc %s failed. rc = %d\n", options.rsc_id, rc); client_exit(PCMK_OCF_UNKNOWN_ERROR); } } @@ -322,11 +317,12 @@ main(int argc, char **argv) if (optind > argc) { ++argerr; } + crm_log_init("remote_client", LOG_INFO, TRUE, options.verbose ? TRUE : FALSE, argc, argv, FALSE); /* if we can't perform an api_call or listen for events, * there is nothing to do */ if (!options.api_call ) { - print_result(printf("Nothing to be done. Please specify 'api-call'\n")); + crm_err("Nothing to be done. Please specify 'api-call'\n"); return PCMK_OCF_UNKNOWN_ERROR; } From 338311455b6404a14fbb517283ec94e176e9e26b Mon Sep 17 00:00:00 2001 From: David Vossel Date: Tue, 24 Mar 2015 17:36:25 -0400 Subject: [PATCH 18/22] High: lrmd: enable ipc proxy for docker-wrapper privileged mode --- crmd/lrm_state.c | 77 +------------- extra/resources/docker-wrapper | 4 +- include/crm_internal.h | 18 ++++ lib/lrmd/Makefile.am | 2 +- lib/lrmd/proxy_common.c | 100 ++++++++++++++++++ lrmd/remote_ctl.c | 178 +++++++++++++++++++++++++++++++++ 6 files changed, 300 insertions(+), 79 deletions(-) create mode 100644 lib/lrmd/proxy_common.c diff --git a/crmd/lrm_state.c b/crmd/lrm_state.c index efd061cb53b..31ca7ee7d1c 100644 --- a/crmd/lrm_state.c +++ b/crmd/lrm_state.c @@ -27,22 +27,10 @@ #include GHashTable *lrm_state_table = NULL; -GHashTable *proxy_table = NULL; +extern GHashTable *proxy_table; int lrmd_internal_proxy_send(lrmd_t * lrmd, xmlNode *msg); void lrmd_internal_set_proxy_callback(lrmd_t * lrmd, void *userdata, void (*callback)(lrmd_t *lrmd, void *userdata, xmlNode *msg)); -typedef struct remote_proxy_s { - char *node_name; - char *session_id; - - gboolean is_local; - - crm_ipc_t *ipc; - mainloop_io_t *source; - uint32_t last_request_id; - -} remote_proxy_t; - static void history_cache_destroy(gpointer data) { @@ -218,32 +206,6 @@ lrm_state_reset_tables(lrm_state_t * lrm_state) } } -static void -remote_proxy_end_session(const char *session) -{ - remote_proxy_t *proxy = g_hash_table_lookup(proxy_table, session); - - if (proxy == NULL) { - return; - } - crm_trace("ending session ID %s", proxy->session_id); - - if (proxy->source) { - mainloop_del_ipc_client(proxy->source); - } -} - -static void -remote_proxy_free(gpointer data) -{ - remote_proxy_t *proxy = data; - - crm_trace("freed proxy session ID %s", proxy->session_id); - free(proxy->node_name); - free(proxy->session_id); - free(proxy); -} - gboolean lrm_state_init_local(void) { @@ -359,43 +321,6 @@ lrm_state_ipc_connect(lrm_state_t * lrm_state) return ret; } -static void -remote_proxy_notify_destroy(lrmd_t *lrmd, const char *session_id) -{ - /* sending to the remote node that an ipc connection has been destroyed */ - xmlNode *msg = create_xml_node(NULL, T_LRMD_IPC_PROXY); - crm_xml_add(msg, F_LRMD_IPC_OP, "destroy"); - crm_xml_add(msg, F_LRMD_IPC_SESSION, session_id); - lrmd_internal_proxy_send(lrmd, msg); - free_xml(msg); -} - -static void -remote_proxy_relay_event(lrmd_t *lrmd, const char *session_id, xmlNode *msg) -{ - /* sending to the remote node an event msg. */ - xmlNode *event = create_xml_node(NULL, T_LRMD_IPC_PROXY); - crm_xml_add(event, F_LRMD_IPC_OP, "event"); - crm_xml_add(event, F_LRMD_IPC_SESSION, session_id); - add_message_xml(event, F_LRMD_IPC_MSG, msg); - crm_log_xml_explicit(event, "EventForProxy"); - lrmd_internal_proxy_send(lrmd, event); - free_xml(event); -} - -static void -remote_proxy_relay_response(lrmd_t *lrmd, const char *session_id, xmlNode *msg, int msg_id) -{ - /* sending to the remote node a response msg. */ - xmlNode *response = create_xml_node(NULL, T_LRMD_IPC_PROXY); - crm_xml_add(response, F_LRMD_IPC_OP, "response"); - crm_xml_add(response, F_LRMD_IPC_SESSION, session_id); - crm_xml_add_int(response, F_LRMD_IPC_MSG_ID, msg_id); - add_message_xml(response, F_LRMD_IPC_MSG, msg); - lrmd_internal_proxy_send(lrmd, response); - free_xml(response); -} - static int remote_proxy_dispatch_internal(const char *buffer, ssize_t length, gpointer userdata) { diff --git a/extra/resources/docker-wrapper b/extra/resources/docker-wrapper index c67022ff2be..5dc5ba8a452 100755 --- a/extra/resources/docker-wrapper +++ b/extra/resources/docker-wrapper @@ -258,7 +258,7 @@ client_action() if [ -z "$PORT" ]; then get_active_port fi - ocf_log debug "$CLIENT -c \"exec\" -S \"127.0.0.1\" -p $PORT -a $action -r \"$OCF_RESOURCE_INSTANCE\" $agent_type $KEY_VAL_STR >/dev/null 2>&1" + ocf_log debug "$CLIENT -c \"exec\" -S \"127.0.0.1\" -p $PORT -a $action -r \"$OCF_RESOURCE_INSTANCE\" -n $CONTAINER $agent_type $KEY_VAL_STR >/dev/null 2>&1" $CLIENT -c "exec" -S "127.0.0.1" -p $PORT -a $action -r "$OCF_RESOURCE_INSTANCE" $agent_type $KEY_VAL_STR >/dev/null 2>&1 else echo "$CLIENT -c \"exec\" -a $action -r \"$OCF_RESOURCE_INSTANCE\" $agent_type $KEY_VAL_STR >/dev/null 2>&1" | nsenter --target $(docker inspect --format {{.State.Pid}} ${CONTAINER}) --mount --uts --ipc --net --pid 2>&1 @@ -277,7 +277,7 @@ poke_remote() if ocf_is_true $OCF_RESKEY_pcmk_docker_privileged ; then get_active_port ocf_log debug "Attempting to contect $CONTAINER on port $PORT" - $CLIENT -c "poke" -S "127.0.0.1" -p $PORT >/dev/null 2>&1 + $CLIENT -c "poke" -S "127.0.0.1" -p $PORT -n $CONTAINER >/dev/null 2>&1 fi # no op for non privileged containers since we handed the # client monitor action as the monitor_cmd for the docker agent diff --git a/include/crm_internal.h b/include/crm_internal.h index ed07dc96ddf..ca4d416fc97 100644 --- a/include/crm_internal.h +++ b/include/crm_internal.h @@ -366,4 +366,22 @@ gboolean crm_digest_verify(xmlNode *input, const char *expected); /* cross-platform compatibility functions */ char *crm_compat_realpath(const char *path); +/* IPC Proxy Backend Shared Functions */ +typedef struct remote_proxy_s { + char *node_name; + char *session_id; + + gboolean is_local; + + crm_ipc_t *ipc; + mainloop_io_t *source; + uint32_t last_request_id; + +} remote_proxy_t; +void remote_proxy_notify_destroy(lrmd_t *lrmd, const char *session_id); +void remote_proxy_relay_event(lrmd_t *lrmd, const char *session_id, xmlNode *msg); +void remote_proxy_relay_response(lrmd_t *lrmd, const char *session_id, xmlNode *msg, int msg_id); +void remote_proxy_end_session(const char *session); +void remote_proxy_free(gpointer data); + #endif /* CRM_INTERNAL__H */ diff --git a/lib/lrmd/Makefile.am b/lib/lrmd/Makefile.am index bd62676efec..107598dbdcb 100644 --- a/lib/lrmd/Makefile.am +++ b/lib/lrmd/Makefile.am @@ -24,7 +24,7 @@ AM_CPPFLAGS = -I$(top_builddir)/include -I$(top_srcdir)/include \ lib_LTLIBRARIES = liblrmd.la -liblrmd_la_SOURCES = lrmd_client.c +liblrmd_la_SOURCES = lrmd_client.c proxy_common.c liblrmd_la_LDFLAGS = -version-info 2:2:1 liblrmd_la_LIBADD = $(top_builddir)/lib/common/libcrmcommon.la \ $(top_builddir)/lib/services/libcrmservice.la \ diff --git a/lib/lrmd/proxy_common.c b/lib/lrmd/proxy_common.c new file mode 100644 index 00000000000..3026227eed6 --- /dev/null +++ b/lib/lrmd/proxy_common.c @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2015 David Vossel + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include + +#include +#include + +#include +#include +#include +#include + +#include +#include +#include + +int lrmd_internal_proxy_send(lrmd_t * lrmd, xmlNode *msg); +GHashTable *proxy_table = NULL; + +void +remote_proxy_notify_destroy(lrmd_t *lrmd, const char *session_id) +{ + /* sending to the remote node that an ipc connection has been destroyed */ + xmlNode *msg = create_xml_node(NULL, T_LRMD_IPC_PROXY); + crm_xml_add(msg, F_LRMD_IPC_OP, "destroy"); + crm_xml_add(msg, F_LRMD_IPC_SESSION, session_id); + lrmd_internal_proxy_send(lrmd, msg); + free_xml(msg); +} + +void +remote_proxy_relay_event(lrmd_t *lrmd, const char *session_id, xmlNode *msg) +{ + /* sending to the remote node an event msg. */ + xmlNode *event = create_xml_node(NULL, T_LRMD_IPC_PROXY); + crm_xml_add(event, F_LRMD_IPC_OP, "event"); + crm_xml_add(event, F_LRMD_IPC_SESSION, session_id); + add_message_xml(event, F_LRMD_IPC_MSG, msg); + crm_log_xml_explicit(event, "EventForProxy"); + lrmd_internal_proxy_send(lrmd, event); + free_xml(event); +} + +void +remote_proxy_relay_response(lrmd_t *lrmd, const char *session_id, xmlNode *msg, int msg_id) +{ + /* sending to the remote node a response msg. */ + xmlNode *response = create_xml_node(NULL, T_LRMD_IPC_PROXY); + crm_xml_add(response, F_LRMD_IPC_OP, "response"); + crm_xml_add(response, F_LRMD_IPC_SESSION, session_id); + crm_xml_add_int(response, F_LRMD_IPC_MSG_ID, msg_id); + add_message_xml(response, F_LRMD_IPC_MSG, msg); + lrmd_internal_proxy_send(lrmd, response); + free_xml(response); +} + +void +remote_proxy_end_session(const char *session) +{ + remote_proxy_t *proxy = g_hash_table_lookup(proxy_table, session); + + if (proxy == NULL) { + return; + } + crm_trace("ending session ID %s", proxy->session_id); + + if (proxy->source) { + mainloop_del_ipc_client(proxy->source); + } +} + +void +remote_proxy_free(gpointer data) +{ + remote_proxy_t *proxy = data; + + crm_trace("freed proxy session ID %s", proxy->session_id); + free(proxy->node_name); + free(proxy->session_id); + free(proxy); +} + + diff --git a/lrmd/remote_ctl.c b/lrmd/remote_ctl.c index c9fbe93e287..db8d8048305 100644 --- a/lrmd/remote_ctl.c +++ b/lrmd/remote_ctl.c @@ -23,6 +23,7 @@ #include #include +#include #include #include @@ -30,6 +31,9 @@ #include #include +extern GHashTable *proxy_table; +void lrmd_internal_set_proxy_callback(lrmd_t * lrmd, void *userdata, void (*callback)(lrmd_t *lrmd, void *userdata, xmlNode *msg)); + /* *INDENT-OFF* */ static struct crm_option long_options[] = { {"help", 0, 0, '?'}, @@ -37,6 +41,7 @@ static struct crm_option long_options[] = { {"quiet", 0, 0, 'Q', "\t\tSuppress all output to screen"}, {"tls", 1, 0, 'S', "\t\tSet tls host to contact"}, {"tls-port", 1, 0, 'p', "\t\tUse custom tls port"}, + {"node", 1, 0, 'n', "\tNode name to use for ipc proxy"}, {"api-call", 1, 0, 'c', "\tDirectly relates to lrmd api functions"}, {"-spacer-", 1, 0, '-', "\nParameters for api-call option"}, {"action", 1, 0, 'a'}, @@ -65,6 +70,7 @@ static struct { int interval; int timeout; int port; + const char *node_name; const char *api_call; const char *rsc_id; const char *provider; @@ -83,6 +89,9 @@ static void client_exit(int rc) { lrmd_api_delete(lrmd_conn); + if (proxy_table) { + g_hash_table_destroy(proxy_table); proxy_table = NULL; + } exit(rc); } @@ -171,6 +180,11 @@ client_start(gpointer user_data) lrmd_conn->cmds->set_callback(lrmd_conn, read_events); + + if (safe_str_eq(options.api_call, "ipc_debug")) { + /* Do nothing, leave connection up just for debugging ipc proxy */ + return 0; + } if (options.timeout) { g_timeout_add(options.timeout, timeout_err, NULL); } @@ -228,6 +242,159 @@ client_start(gpointer user_data) return 0; } +static int +remote_proxy_dispatch_internal(const char *buffer, ssize_t length, gpointer userdata) +{ + /* Async responses from cib and friends back to clients via pacemaker_remoted */ + xmlNode *xml = NULL; + remote_proxy_t *proxy = userdata; + uint32_t flags; + + xml = string2xml(buffer); + if (xml == NULL) { + crm_warn("Received a NULL msg from IPC service."); + return 1; + } + + flags = crm_ipc_buffer_flags(proxy->ipc); + if (flags & crm_ipc_proxied_relay_response) { + crm_trace("Passing response back to %.8s on %s: %.200s - request id: %d", proxy->session_id, proxy->node_name, buffer, proxy->last_request_id); + remote_proxy_relay_response(lrmd_conn, proxy->session_id, xml, proxy->last_request_id); + proxy->last_request_id = 0; + + } else { + crm_trace("Passing event back to %.8s on %s: %.200s", proxy->session_id, proxy->node_name, buffer); + remote_proxy_relay_event(lrmd_conn, proxy->session_id, xml); + } + free_xml(xml); + return 1; +} + +static void +remote_proxy_disconnected(void *userdata) +{ + remote_proxy_t *proxy = userdata; + + crm_trace("destroying %p", userdata); + + proxy->source = NULL; + proxy->ipc = NULL; + + remote_proxy_notify_destroy(lrmd_conn, proxy->session_id); + g_hash_table_remove(proxy_table, proxy->session_id); +} + +static remote_proxy_t * +remote_proxy_new(const char *node_name, const char *session_id, const char *channel) +{ + static struct ipc_client_callbacks proxy_callbacks = { + .dispatch = remote_proxy_dispatch_internal, + .destroy = remote_proxy_disconnected + }; + remote_proxy_t *proxy = calloc(1, sizeof(remote_proxy_t)); + + proxy->node_name = strdup(node_name); + proxy->session_id = strdup(session_id); + + if (safe_str_eq(channel, CRM_SYSTEM_CRMD)) { + proxy->is_local = TRUE; + } else { + proxy->source = mainloop_add_ipc_client(channel, G_PRIORITY_LOW, 0, proxy, &proxy_callbacks); + proxy->ipc = mainloop_get_ipc_client(proxy->source); + + if (proxy->source == NULL) { + remote_proxy_free(proxy); + return NULL; + } + } + + crm_trace("created proxy session ID %s", proxy->session_id); + g_hash_table_insert(proxy_table, proxy->session_id, proxy); + + return proxy; +} + +static void +remote_proxy_cb(lrmd_t *lrmd, void *userdata, xmlNode *msg) +{ + const char *op = crm_element_value(msg, F_LRMD_IPC_OP); + const char *session = crm_element_value(msg, F_LRMD_IPC_SESSION); + int msg_id = 0; + + /* sessions are raw ipc connections to IPC, + * all we do is proxy requests/responses exactly + * like they are given to us at the ipc level. */ + + CRM_CHECK(op != NULL, return); + CRM_CHECK(session != NULL, return); + + crm_element_value_int(msg, F_LRMD_IPC_MSG_ID, &msg_id); + + /* This is msg from remote ipc client going to real ipc server */ + if (safe_str_eq(op, "new")) { + const char *channel = crm_element_value(msg, F_LRMD_IPC_IPC_SERVER); + + CRM_CHECK(channel != NULL, return); + + if (remote_proxy_new(options.node_name, session, channel) == NULL) { + remote_proxy_notify_destroy(lrmd, session); + } + crm_info("new remote proxy client established to %s, session id %s", channel, session); + } else if (safe_str_eq(op, "destroy")) { + remote_proxy_end_session(session); + + } else if (safe_str_eq(op, "request")) { + int flags = 0; + xmlNode *request = get_message_xml(msg, F_LRMD_IPC_MSG); + const char *name = crm_element_value(msg, F_LRMD_IPC_CLIENT); + remote_proxy_t *proxy = g_hash_table_lookup(proxy_table, session); + + CRM_CHECK(request != NULL, return); + + if (proxy == NULL) { + /* proxy connection no longer exists */ + remote_proxy_notify_destroy(lrmd, session); + return; + } else if ((proxy->is_local == FALSE) && (crm_ipc_connected(proxy->ipc) == FALSE)) { + remote_proxy_end_session(session); + return; + } + proxy->last_request_id = 0; + crm_element_value_int(msg, F_LRMD_IPC_MSG_FLAGS, &flags); + crm_xml_add(request, XML_ACL_TAG_ROLE, "pacemaker-remote"); + +#if ENABLE_ACL + CRM_ASSERT(options.node_name); + crm_acl_get_set_user(request, F_LRMD_IPC_USER, options.node_name); +#endif + + if (is_set(flags, crm_ipc_proxied)) { + int rc = crm_ipc_send(proxy->ipc, request, flags, 5000, NULL); + + if(rc < 0) { + xmlNode *op_reply = create_xml_node(NULL, "nack"); + + crm_err("Could not relay %s request %d from %s to %s for %s: %s (%d)", + op, msg_id, proxy->node_name, crm_ipc_name(proxy->ipc), name, pcmk_strerror(rc), rc); + + /* Send a n'ack so the caller doesn't block */ + crm_xml_add(op_reply, "function", __FUNCTION__); + crm_xml_add_int(op_reply, "line", __LINE__); + crm_xml_add_int(op_reply, "rc", rc); + remote_proxy_relay_response(lrmd, session, op_reply, msg_id); + free_xml(op_reply); + + } else { + crm_trace("Relayed %s request %d from %s to %s for %s", + op, msg_id, proxy->node_name, crm_ipc_name(proxy->ipc), name); + proxy->last_request_id = msg_id; + } + } + } else { + crm_err("Unknown proxy operation: %s", op); + } +} + int main(int argc, char **argv) { @@ -258,6 +425,9 @@ main(int argc, char **argv) options.quiet = 1; options.verbose = 0; break; + case 'n': + options.node_name = optarg; + break; case 'c': options.api_call = optarg; break; @@ -329,8 +499,16 @@ main(int argc, char **argv) if (!options.timeout ) { options.timeout = 20000; } + if (use_tls) { + if (options.node_name == NULL) { + crm_err("\"node\" option required when tls is in use.\n"); + return PCMK_OCF_UNKNOWN_ERROR; + } + proxy_table = + g_hash_table_new_full(crm_strcase_hash, crm_strcase_equal, NULL, remote_proxy_free); lrmd_conn = lrmd_remote_api_new(NULL, options.tls_host ? options.tls_host : "localhost", options.port); + lrmd_internal_set_proxy_callback(lrmd_conn, NULL, remote_proxy_cb); } else { lrmd_conn = lrmd_api_new(); } From 83c3a49aeb53f997425b4f0f576b9ec4405a5151 Mon Sep 17 00:00:00 2001 From: David Vossel Date: Tue, 24 Mar 2015 17:54:16 -0400 Subject: [PATCH 19/22] Low: rename pacemaker_remote_ctl to lrmd_interal_ctl --- extra/resources/docker-wrapper | 2 +- lrmd/Makefile.am | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/extra/resources/docker-wrapper b/extra/resources/docker-wrapper index 5dc5ba8a452..11fa4d83d9e 100755 --- a/extra/resources/docker-wrapper +++ b/extra/resources/docker-wrapper @@ -95,7 +95,7 @@ END ####################################################################### -CLIENT="/usr/libexec/pacemaker/pacemaker_remote_ctl" +CLIENT="/usr/libexec/pacemaker/lrmd_internal_ctl" DOCKER_AGENT="/usr/lib/ocf/resource.d/heartbeat/docker" KEY_VAL_STR="" PROVIDER=$OCF_RESKEY_CRM_meta_provider diff --git a/lrmd/Makefile.am b/lrmd/Makefile.am index 9addd1bd56c..57d7810fcfe 100644 --- a/lrmd/Makefile.am +++ b/lrmd/Makefile.am @@ -21,7 +21,7 @@ testdir = $(datadir)/$(PACKAGE)/tests/lrmd test_SCRIPTS = regression.py lrmdlibdir = $(CRM_DAEMON_DIR) -lrmdlib_PROGRAMS = lrmd lrmd_test pacemaker_remote_ctl +lrmdlib_PROGRAMS = lrmd lrmd_test lrmd_internal_ctl initdir = $(INITDIR) init_SCRIPTS = pacemaker_remote @@ -43,8 +43,8 @@ pacemaker_remoted_CFLAGS = -DSUPPORT_REMOTE pacemaker_remoted_LDADD = $(lrmd_LDADD) -pacemaker_remote_ctl_SOURCES = remote_ctl.c -pacemaker_remote_ctl_LDADD = $(top_builddir)/lib/common/libcrmcommon.la \ +lrmd_internal_ctl_SOURCES = remote_ctl.c +lrmd_internal_ctl_LDADD = $(top_builddir)/lib/common/libcrmcommon.la \ $(top_builddir)/lib/lrmd/liblrmd.la \ $(top_builddir)/lib/cib/libcib.la \ $(top_builddir)/lib/services/libcrmservice.la \ From 9726f76889f1f5a7afd2f8523ef731cb507df8cf Mon Sep 17 00:00:00 2001 From: David Vossel Date: Wed, 25 Mar 2015 12:25:23 -0400 Subject: [PATCH 20/22] Low: docker-wrapper: set authkey file permissions and properly set container 'node name' during start --- extra/resources/docker-wrapper | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/extra/resources/docker-wrapper b/extra/resources/docker-wrapper index 11fa4d83d9e..eb3520e25c3 100755 --- a/extra/resources/docker-wrapper +++ b/extra/resources/docker-wrapper @@ -258,12 +258,10 @@ client_action() if [ -z "$PORT" ]; then get_active_port fi - ocf_log debug "$CLIENT -c \"exec\" -S \"127.0.0.1\" -p $PORT -a $action -r \"$OCF_RESOURCE_INSTANCE\" -n $CONTAINER $agent_type $KEY_VAL_STR >/dev/null 2>&1" - $CLIENT -c "exec" -S "127.0.0.1" -p $PORT -a $action -r "$OCF_RESOURCE_INSTANCE" $agent_type $KEY_VAL_STR >/dev/null 2>&1 + ocf_log info "$CLIENT -c \"exec\" -S \"127.0.0.1\" -p $PORT -a $action -r \"$OCF_RESOURCE_INSTANCE\" -n $CONTAINER $agent_type $KEY_VAL_STR >/dev/null 2>&1" + $CLIENT -c "exec" -S "127.0.0.1" -p $PORT -a $action -r "$OCF_RESOURCE_INSTANCE" -n $CONTAINER $agent_type $KEY_VAL_STR >/dev/null 2>&1 else echo "$CLIENT -c \"exec\" -a $action -r \"$OCF_RESOURCE_INSTANCE\" $agent_type $KEY_VAL_STR >/dev/null 2>&1" | nsenter --target $(docker inspect --format {{.State.Pid}} ${CONTAINER}) --mount --uts --ipc --net --pid 2>&1 - - fi rc=$? @@ -276,7 +274,7 @@ poke_remote() # verifies daemon in container is active if ocf_is_true $OCF_RESKEY_pcmk_docker_privileged ; then get_active_port - ocf_log debug "Attempting to contect $CONTAINER on port $PORT" + ocf_log info "Attempting to contect $CONTAINER on port $PORT" $CLIENT -c "poke" -S "127.0.0.1" -p $PORT -n $CONTAINER >/dev/null 2>&1 fi # no op for non privileged containers since we handed the @@ -311,6 +309,7 @@ start_container() # generate an authkey if it doesn't exist. mkdir -p /etc/pacemaker/ dd if=/dev/urandom of=/etc/pacemaker/authkey bs=4096 count=1 > /dev/null 2>&1 + chmod 600 /etc/pacemaker/authkey fi PORT=$(random_port) From 48838111981b417fc5132d36eb4899dba03199d2 Mon Sep 17 00:00:00 2001 From: David Vossel Date: Wed, 25 Mar 2015 17:38:12 -0400 Subject: [PATCH 21/22] Fix: docker-wrapper: properly separate docker and resource specific attributes --- extra/resources/docker-wrapper | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/extra/resources/docker-wrapper b/extra/resources/docker-wrapper index eb3520e25c3..d4c48ea58dc 100755 --- a/extra/resources/docker-wrapper +++ b/extra/resources/docker-wrapper @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # # Copyright (c) 2015 David Vossel # All Rights Reserved. @@ -173,21 +173,21 @@ separate_args() local env key value # write out arguments to key value string for ocf agent - for item in $(printenv | grep "^OCF.*" | grep -v "^OCF_RESKEY_pcmk_docker_.*"); + while read -r line; do - key="$(echo $item | awk -F= '{print $1}')" - val="$(echo $item | awk -F= '{print $2}')" + key="$(echo $line | awk -F= '{print $1}')" + val="$(echo $line | awk -F= '{print $2}')" KEY_VAL_STR="$KEY_VAL_STR -k \"$key\" -v \"$val\"" - done + done < <(printenv | grep "^OCF.*" | grep -v "^OCF_RESKEY_pcmk_docker_.*") # sanitize args for DOCKER agent's consumption - for item in $(printenv | grep "^OCF_RESKEY_pcmk_docker_.*"); + while read -r line; do - env="$(echo $item | awk -F= '{print $1}')" - val="$(echo $item | awk -F= '{print $2}')" + env="$(echo $line | awk -F= '{print $1}')" + val="$(echo $line | awk -F= '{print $2}')" key="$(echo "$env" | sed 's/^OCF_RESKEY_pcmk_docker/OCF_RESKEY/g')" - export ${key}=$(echo $val) - done + export $key="$val" + done < <(printenv | grep "^OCF_RESKEY_pcmk_docker_.*") if ocf_is_true $OCF_RESKEY_pcmk_docker_privileged ; then export OCF_RESKEY_run_cmd="/usr/sbin/pacemaker_remoted" From 7302cd7e63774cea45f990271379b44192048af7 Mon Sep 17 00:00:00 2001 From: David Vossel Date: Wed, 25 Mar 2015 17:38:55 -0400 Subject: [PATCH 22/22] Low: lrmd: preserve exit reason string when isolation wrappers are in use --- lrmd/remote_ctl.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/lrmd/remote_ctl.c b/lrmd/remote_ctl.c index db8d8048305..32151d744f4 100644 --- a/lrmd/remote_ctl.c +++ b/lrmd/remote_ctl.c @@ -110,9 +110,11 @@ read_events(lrmd_event_data_t * event) } if ((event->call_id == exec_call_id) && (event->type == lrmd_event_exec_complete)) { if (event->output) { - printf("%s", event->output); + crm_info("%s", event->output); + } + if (event->exit_reason) { + fprintf(stderr, "%s%s\n", PCMK_OCF_REASON_PREFIX, event->exit_reason); } - client_exit(event->rc); } }