From a7a09a27e8a4511f0bc3174ce2e5270c60c44f49 Mon Sep 17 00:00:00 2001 From: David Vossel Date: Fri, 13 Mar 2015 18:18:58 -0400 Subject: [PATCH 01/15] Feature: remote: pcmk remote client tool for use with container wrapper script --- lrmd/Makefile.am | 11 +- lrmd/remote_ctl.c | 350 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 360 insertions(+), 1 deletion(-) create mode 100644 lrmd/remote_ctl.c diff --git a/lrmd/Makefile.am b/lrmd/Makefile.am index 67f6329bd56..9addd1bd56c 100644 --- a/lrmd/Makefile.am +++ b/lrmd/Makefile.am @@ -21,7 +21,7 @@ testdir = $(datadir)/$(PACKAGE)/tests/lrmd test_SCRIPTS = regression.py lrmdlibdir = $(CRM_DAEMON_DIR) -lrmdlib_PROGRAMS = lrmd lrmd_test +lrmdlib_PROGRAMS = lrmd lrmd_test pacemaker_remote_ctl initdir = $(INITDIR) init_SCRIPTS = pacemaker_remote @@ -43,6 +43,15 @@ pacemaker_remoted_CFLAGS = -DSUPPORT_REMOTE pacemaker_remoted_LDADD = $(lrmd_LDADD) +pacemaker_remote_ctl_SOURCES = remote_ctl.c +pacemaker_remote_ctl_LDADD = $(top_builddir)/lib/common/libcrmcommon.la \ + $(top_builddir)/lib/lrmd/liblrmd.la \ + $(top_builddir)/lib/cib/libcib.la \ + $(top_builddir)/lib/services/libcrmservice.la \ + $(top_builddir)/lib/pengine/libpe_status.la \ + $(top_builddir)/pengine/libpengine.la + + lrmd_test_SOURCES = test.c lrmd_test_LDADD = $(top_builddir)/lib/common/libcrmcommon.la \ $(top_builddir)/lib/lrmd/liblrmd.la \ diff --git a/lrmd/remote_ctl.c b/lrmd/remote_ctl.c new file mode 100644 index 00000000000..d2ab9eb4eb1 --- /dev/null +++ b/lrmd/remote_ctl.c @@ -0,0 +1,350 @@ +/* + * Copyright (c) 2015 David Vossel + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include + +#include +#include + +#include +#include +#include + +#include +#include +#include + +/* *INDENT-OFF* */ +static struct crm_option long_options[] = { + {"help", 0, 0, '?'}, + {"verbose", 0, 0, 'V', "\t\tPrint out logs and events to screen"}, + {"quiet", 0, 0, 'Q', "\t\tSuppress all output to screen"}, + {"tls", 1, 0, 'S', "\t\tSet tls host to contact"}, + {"tls-port", 1, 0, 'p', "\t\tUse custom tls port"}, + {"api-call", 1, 0, 'c', "\tDirectly relates to lrmd api functions"}, + {"-spacer-", 1, 0, '-', "\nParameters for api-call option"}, + {"action", 1, 0, 'a'}, + {"rsc-id", 1, 0, 'r'}, + {"provider", 1, 0, 'P'}, + {"class", 1, 0, 'C'}, + {"type", 1, 0, 'T'}, + {"timeout", 1, 0, 't'}, + {"param-key", 1, 0, 'k'}, + {"param-val", 1, 0, 'v'}, + + {"-spacer-", 1, 0, '-'}, + {0, 0, 0, 0} +}; +/* *INDENT-ON* */ + +static int wait_poke = 0; +static int exec_call_id = 0; +static gboolean client_start(gpointer user_data); +static void try_connect(void); + +static struct { + int verbose; + int quiet; + int print; + int interval; + int timeout; + int port; + const char *api_call; + const char *rsc_id; + const char *provider; + const char *class; + const char *type; + const char *action; + const char *listen; + const char *tls_host; + lrmd_key_value_t *params; +} options; + +GMainLoop *mainloop = NULL; +lrmd_t *lrmd_conn = NULL; + +static void +client_exit(int rc) +{ + lrmd_api_delete(lrmd_conn); + exit(rc); +} + +#define print_result(result) \ + if (!options.quiet) { \ + result; \ + } \ + +static void +client_shutdown(int nsig) +{ + lrmd_api_delete(lrmd_conn); + lrmd_conn = NULL; +} + +static void +read_events(lrmd_event_data_t * event) +{ + if (wait_poke && event->type == lrmd_event_poke) { + client_exit(PCMK_OCF_OK); + } + if ((event->call_id == exec_call_id) && (event->type == lrmd_event_exec_complete)) { + if (event->output) { + printf("%s", event->output); + } + + client_exit(event->rc); + } +} + +static gboolean +timeout_err(gpointer data) +{ + print_result(printf("timed out in remote_client\n")); + client_exit(PCMK_OCF_TIMEOUT); + + return FALSE; +} + +static void +connection_events(lrmd_event_data_t * event) +{ + int rc = event->connection_rc; + + if (event->type != lrmd_event_connect) { + /* ignore */ + return; + } + + if (!rc) { + client_start(NULL); + return; + } else { + sleep(1); + try_connect(); + } +} + +static void +try_connect(void) +{ + int tries = 10; + static int num_tries = 0; + int rc = 0; + + lrmd_conn->cmds->set_callback(lrmd_conn, connection_events); + for (; num_tries < tries; num_tries++) { + rc = lrmd_conn->cmds->connect_async(lrmd_conn, "lrmd", 10000); + + if (!rc) { + num_tries++; + return; /* we'll hear back in async callback */ + } + sleep(1); + } + + print_result(printf("Failed to connect to pacemaker remote.\n")); + client_exit(PCMK_OCF_UNKNOWN_ERROR); +} + +static gboolean +client_start(gpointer user_data) +{ + int rc = 0; + + if (!lrmd_conn->cmds->is_connected(lrmd_conn)) { + try_connect(); + /* async connect, this funciton will get called back into. */ + return 0; + } + + lrmd_conn->cmds->set_callback(lrmd_conn, read_events); + + if (options.timeout) { + g_timeout_add(options.timeout, timeout_err, NULL); + } + + if (safe_str_eq(options.api_call, "metadata")) { + char *output = NULL; + + rc = lrmd_conn->cmds->get_metadata(lrmd_conn, + options.class, + options.provider, options.type, &output, 0); + if (rc == pcmk_ok) { + printf("%s", output); + free(output); + client_exit(PCMK_OCF_OK); + } + client_exit(PCMK_OCF_UNKNOWN_ERROR); + + } else if (safe_str_eq(options.api_call, "poke")) { + rc = lrmd_conn->cmds->poke_connection(lrmd_conn); + if (rc != pcmk_ok) { + client_exit(PCMK_OCF_UNKNOWN_ERROR); + } + wait_poke = 1; + } else { + lrmd_rsc_info_t *rsc_info = NULL; + + rsc_info = lrmd_conn->cmds->get_rsc_info(lrmd_conn, options.rsc_id, 0); + if (rsc_info == NULL) { + rc = lrmd_conn->cmds->register_rsc(lrmd_conn, options.rsc_id, + options.class, options.provider, options.type, 0); + + if (rc != 0){ + print_result(printf("failed to register resource %s with pacemaker_remote. rc: %d\n", options.rsc_id, rc)); + client_exit(1); + } + } + lrmd_free_rsc_info(rsc_info); + + rc = lrmd_conn->cmds->exec(lrmd_conn, + options.rsc_id, + options.action, + NULL, + options.interval, + options.timeout, + 0, 0, options.params); + + if (rc > 0) { + exec_call_id = rc; + } else { + print_result(printf("execution of rsc %s failed. rc = %d\n", options.rsc_id, rc)); + client_exit(PCMK_OCF_UNKNOWN_ERROR); + } + } + + return 0; +} + +int +main(int argc, char **argv) +{ + int option_index = 0; + int argerr = 0; + int flag; + char *key = NULL; + char *val = NULL; + gboolean use_tls = FALSE; + crm_trigger_t *trig; + + crm_set_options(NULL, "mode [options]", long_options, + "Inject commands into the lrmd and watch for events\n"); + + while (1) { + flag = crm_get_option(argc, argv, &option_index); + if (flag == -1) + break; + + switch (flag) { + case '?': + crm_help(flag, EX_OK); + break; + case 'V': + options.verbose = 1; + break; + case 'Q': + options.quiet = 1; + options.verbose = 0; + break; + case 'c': + options.api_call = optarg; + break; + case 'a': + options.action = optarg; + break; + case 'r': + options.rsc_id = optarg; + break; + case 'P': + options.provider = optarg; + break; + case 'C': + options.class = optarg; + break; + case 'T': + options.type = optarg; + break; + case 't': + if(optarg) { + options.timeout = atoi(optarg); + } + break; + case 'k': + key = optarg; + if (key && val) { + options.params = lrmd_key_value_add(options.params, key, val); + key = val = NULL; + } + break; + case 'v': + val = optarg; + if (key && val) { + options.params = lrmd_key_value_add(options.params, key, val); + key = val = NULL; + } + break; + case 'S': + options.tls_host = optarg; + use_tls = TRUE; + break; + case 'p': + if(optarg) { + options.port = atoi(optarg); + } + use_tls = TRUE; + break; + default: + ++argerr; + break; + } + } + + if (argerr) { + crm_help('?', EX_USAGE); + } + if (optind > argc) { + ++argerr; + } + + /* if we can't perform an api_call or listen for events, + * there is nothing to do */ + if (!options.api_call ) { + print_result(printf("Nothing to be done. Please specify 'api-call'\n")); + return PCMK_OCF_UNKNOWN_ERROR; + } + + if (!options.timeout ) { + options.timeout = 20000; + } + if (use_tls) { + lrmd_conn = lrmd_remote_api_new(NULL, options.tls_host ? options.tls_host : "localhost", options.port); + } else { + lrmd_conn = lrmd_api_new(); + } + trig = mainloop_add_trigger(G_PRIORITY_HIGH, client_start, NULL); + mainloop_set_trigger(trig); + mainloop_add_signal(SIGTERM, client_shutdown); + + mainloop = g_main_new(FALSE); + g_main_run(mainloop); + + client_exit(0); + return 0; +} From c7d3f0c8c5b2cdc385d480ba050dc276dee9aa4a Mon Sep 17 00:00:00 2001 From: David Vossel Date: Fri, 13 Mar 2015 19:06:07 -0400 Subject: [PATCH 02/15] High: extra: docker container tech wrapper script for pcmk remote --- extra/resources/Makefile.am | 5 + extra/resources/docker-wrapper | 492 +++++++++++++++++++++++++++++++++ 2 files changed, 497 insertions(+) create mode 100755 extra/resources/docker-wrapper diff --git a/extra/resources/Makefile.am b/extra/resources/Makefile.am index cc162e5e6ec..955e233a1b8 100644 --- a/extra/resources/Makefile.am +++ b/extra/resources/Makefile.am @@ -21,6 +21,9 @@ include $(top_srcdir)/Makefile.common EXTRA_DIST = $(ocf_SCRIPTS) + +containertechdir = @OCF_RA_DIR@/containers + ocfdir = @OCF_RA_DIR@/pacemaker ocf_SCRIPTS = ClusterMon \ @@ -36,6 +39,8 @@ ocf_SCRIPTS = ClusterMon \ SystemHealth \ remote +containertech_SCRIPTS = docker-wrapper + if BUILD_XML_HELP man7_MANS = $(ocf_SCRIPTS:%=ocf_pacemaker_%.7) diff --git a/extra/resources/docker-wrapper b/extra/resources/docker-wrapper new file mode 100755 index 00000000000..c67022ff2be --- /dev/null +++ b/extra/resources/docker-wrapper @@ -0,0 +1,492 @@ +#!/bin/sh +# +# Copyright (c) 2015 David Vossel +# All Rights Reserved. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of version 2 of the GNU General Public License as +# published by the Free Software Foundation. +# +# This program is distributed in the hope that it would be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# Further, this software is distributed without any warranty that it is +# free of the rightful claim of any third person regarding infringement +# or the like. Any license provided herein, whether implied or +# otherwise, applies only to this software file. Patent licenses, if +# any, provided herein do not apply to combinations of this program with +# other software, or any other product whatsoever. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write the Free Software Foundation, +# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. +# + +####################################################################### +# Initialization: + +: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} +. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs + +####################################################################### + +meta_data() { + cat < + + +1.0 + + +Docker technology wrapper for pacemaker remote. + +docker wrapper + + + + +Docker image to run resources within + +docker image + + + + + +Give resources within container access to cluster resources +such as the CIB and the ability to manage cluster attributes. + +is privileged + + + + + +Add options to be appended to the 'docker run' command which is used +when creating the container during the start action. This option allows +users to do things such as setting a custom entry point and injecting +environment variables into the newly created container. Note the '-d' +option is supplied regardless of this value to force containers to run +in the background. + +NOTE: Do not explicitly specify the --name argument in the run_opts. This +agent will set --name using the resource's instance name + + +run options + + + + + + + + + + + + + + +END +} + +####################################################################### + + +CLIENT="/usr/libexec/pacemaker/pacemaker_remote_ctl" +DOCKER_AGENT="/usr/lib/ocf/resource.d/heartbeat/docker" +KEY_VAL_STR="" +PROVIDER=$OCF_RESKEY_CRM_meta_provider +CLASS=$OCF_RESKEY_CRM_meta_class +TYPE=$OCF_RESKEY_CRM_meta_type + +CONTAINER=$OCF_RESKEY_CRM_meta_isolation_instance +if [ -z "$CONTAINER" ]; then + CONTAINER=$OCF_RESOURCE_INSTANCE +fi + +RSC_STATE_DIR="${HA_RSCTMP}/docker-wrapper/${CONTAINER}-data/" +RSC_STATE_FILE="$RSC_STATE_DIR/$OCF_RESOURCE_INSTANCE.state" +CONNECTION_FAILURE=0 + +pcmk_docker_wrapper_usage() { + cat < $RSC_STATE_FILE + fi +} + +clear_state_file() +{ + if [ -f "$RSC_STATE_FILE" ]; then + rm -f $RSC_STATE_FILE + fi +} + +clear_state_dir() +{ + [ -d "$RSC_STATE_DIR" ] || return 0 + + rm -rf $RSC_STATE_DIR +} + +num_active_resources() +{ + local count + + [ -d "$RSC_STATE_DIR" ] || return 0 + + count="$(ls $RSC_STATE_DIR | wc -w)" + if [ $? -ne 0 ] || [ -z "$count" ]; then + return 0 + fi + return $count +} + +random_port() +{ + local port=$(python -c 'import socket; s=socket.socket(); s.bind(("localhost", 0)); print(s.getsockname()[1]); s.close()') + if [ $? -eq 0 ] && [ -n "$port" ]; then + echo "$port" + fi +} + +get_active_port() +{ + PORT="$(docker port $CONTAINER 3121 | awk -F: '{ print $2 }')" +} + +# separate docker args from ocf resource args. +separate_args() +{ + local env key value + + # write out arguments to key value string for ocf agent + for item in $(printenv | grep "^OCF.*" | grep -v "^OCF_RESKEY_pcmk_docker_.*"); + do + key="$(echo $item | awk -F= '{print $1}')" + val="$(echo $item | awk -F= '{print $2}')" + KEY_VAL_STR="$KEY_VAL_STR -k \"$key\" -v \"$val\"" + done + + # sanitize args for DOCKER agent's consumption + for item in $(printenv | grep "^OCF_RESKEY_pcmk_docker_.*"); + do + env="$(echo $item | awk -F= '{print $1}')" + val="$(echo $item | awk -F= '{print $2}')" + key="$(echo "$env" | sed 's/^OCF_RESKEY_pcmk_docker/OCF_RESKEY/g')" + export ${key}=$(echo $val) + done + + if ocf_is_true $OCF_RESKEY_pcmk_docker_privileged ; then + export OCF_RESKEY_run_cmd="/usr/sbin/pacemaker_remoted" + # on start set random port to run_opts + # write port to state file... or potentially get from ps? maybe docker info or inspect as well? + + else + export OCF_RESKEY_run_cmd="/usr/libexec/pacemaker/lrmd" + fi + export OCF_RESKEY_name="$CONTAINER" +} + +monitor_container() +{ + local rc + + $DOCKER_AGENT monitor + rc=$? + if [ $rc -ne $OCF_SUCCESS ]; then + clear_state_dir + return $rc + fi + + poke_remote + rc=$? + if [ $rc -ne $OCF_SUCCESS ]; then + # container is up without an active daemon. this is bad + ocf_log err "Container, $CONTAINER, is active without a responsive pacemaker_remote instance" + CONNECTION_FAILURE=1 + return $OCF_ERR_GENERIC + fi + CONNECTION_FAILURE=0 + + return $rc +} + +pcmk_docker_wrapper_monitor() { + local rc + + monitor_container + rc=$? + if [ $rc -ne $OCF_SUCCESS ]; then + return $rc + fi + + client_action "monitor" + rc=$? + if [ $rc -eq $OCF_SUCCESS ]; then + write_state_file + elif [ $rc -eq $OCF_NOT_RUNNING ]; then + clear_state_file + fi + + return $rc +} + +client_action() +{ + local action=$1 + local agent_type="-T $TYPE -C $CLASS" + local rc=0 + + if [ -n "$PROVIDER" ]; then + agent_type="$agent_type -P $PROVIDER" + fi + + if ocf_is_true $OCF_RESKEY_pcmk_docker_privileged ; then + if [ -z "$PORT" ]; then + get_active_port + fi + ocf_log debug "$CLIENT -c \"exec\" -S \"127.0.0.1\" -p $PORT -a $action -r \"$OCF_RESOURCE_INSTANCE\" $agent_type $KEY_VAL_STR >/dev/null 2>&1" + $CLIENT -c "exec" -S "127.0.0.1" -p $PORT -a $action -r "$OCF_RESOURCE_INSTANCE" $agent_type $KEY_VAL_STR >/dev/null 2>&1 + else + echo "$CLIENT -c \"exec\" -a $action -r \"$OCF_RESOURCE_INSTANCE\" $agent_type $KEY_VAL_STR >/dev/null 2>&1" | nsenter --target $(docker inspect --format {{.State.Pid}} ${CONTAINER}) --mount --uts --ipc --net --pid 2>&1 + + + fi + rc=$? + + ocf_log debug "Client action $action with result $rc" + return $rc +} + +poke_remote() +{ + # verifies daemon in container is active + if ocf_is_true $OCF_RESKEY_pcmk_docker_privileged ; then + get_active_port + ocf_log debug "Attempting to contect $CONTAINER on port $PORT" + $CLIENT -c "poke" -S "127.0.0.1" -p $PORT >/dev/null 2>&1 + fi + # no op for non privileged containers since we handed the + # client monitor action as the monitor_cmd for the docker agent +} + +pcmk_docker_wrapper_reload() +{ + local rc + + monitor_container + rc=$? + if [ $? -ne $OCF_SUCCESS ]; then + return $rc + fi + + client_action "reload" +} + +start_container() +{ + local rc + + monitor_container + rc=$? + if [ $rc -eq $OCF_SUCCESS ]; then + return $rc + fi + + if ocf_is_true $OCF_RESKEY_pcmk_docker_privileged ; then + if ! [ -f "/etc/pacemaker/authkey" ]; then + # generate an authkey if it doesn't exist. + mkdir -p /etc/pacemaker/ + dd if=/dev/urandom of=/etc/pacemaker/authkey bs=4096 count=1 > /dev/null 2>&1 + fi + + PORT=$(random_port) + if [ -z "$PORT" ]; then + ocf_exit_reason "Unable to assign random port for pacemaker remote" + return $OCF_ERR_GENERIC + fi + export OCF_RESKEY_run_opts="-p 127.0.0.1:${PORT}:3121 $OCF_RESKEY_run_opts" + export OCF_RESKEY_run_opts="-v /etc/pacemaker/authkey:/etc/pacemaker/authkey $OCF_RESKEY_run_opts" + ocf_log debug "using privileged mode: run_opts=$OCF_RESKEY_run_opts" + else + export OCF_RESKEY_monitor_cmd="$CLIENT -c poke" + fi + + $DOCKER_AGENT start + rc=$? + if [ $rc -ne $OCF_SUCCESS ]; then + ocf_exit_reason "Docker container failed to start" + return $rc + fi + + monitor_container +} + +pcmk_docker_wrapper_start() { + local rc + + start_container + rc=$? + if [ $rc -ne $OCF_SUCCESS ]; then + return $rc + fi + + client_action "start" + rc=$? + if [ $? -ne "$OCF_SUCCESS" ]; then + ocf_exit_reason "Failed to start agent within container" + return $rc + fi + + pcmk_docker_wrapper_monitor + return $? +} + +stop_container() +{ + local rc + local count + + num_active_resources + count=$? + if [ $count -ne 0 ]; then + ocf_log err "Failed to stop agent within container. Killing container $CONTAINER with $count active resources" + fi + + $DOCKER_AGENT "stop" + rc=$? + if [ $rc -ne $OCF_SUCCESS ]; then + ocf_exit_reason "Docker container failed to stop" + return $rc + fi + clear_state_dir + return $rc +} + +stop_resource() +{ + local rc + + client_action "stop" + rc=$? + if [ $? -ne "$OCF_SUCCESS" ]; then + export OCF_RESKEY_force_stop="true" + kill_now=1 + else + clear_state_file + fi +} + +pcmk_docker_wrapper_stop() { + local rc + local kill_now=0 + local all_stopped=0 + + pcmk_docker_wrapper_monitor + rc=$? + if [ $rc -eq $OCF_NOT_RUNNING ]; then + rc=$OCF_SUCCESS + num_active_resources + if [ $? -eq 0 ]; then + # stop container if no more resources are running + ocf_log info "Gracefully stopping container $CONTAINER because no resources are left running." + stop_container + rc=$? + fi + return $rc + fi + + # if we can't talk to the remote daemon but the container is + # active, we have to force kill the container. + if [ $CONNECTION_FAILURE -eq 1 ]; then + export OCF_RESKEY_force_kill="true" + stop_container + return $? + fi + + + # If we've gotten this far, the container is up, and we + # need to gracefully stop a resource within the container. + client_action "stop" + rc=$? + if [ $? -ne "$OCF_SUCCESS" ]; then + export OCF_RESKEY_force_stop="true" + # force kill the container if we fail to stop a resource. + stop_container + rc=$? + else + clear_state_file + num_active_resources + if [ $? -eq 0 ]; then + # stop container if no more resources are running + ocf_log info "Gracefully stopping container $CONTAINER because last resource has stopped" + stop_container + rc=$? + fi + fi + + return $rc +} + +pcmk_docker_wrapper_validate() { + + if [ -z "$CLASS" ] || [ -z "$TYPE" ]; then + ocf_exit_reason "Update pacemaker to a version that supports container wrappers." + return $OCF_ERR_CONFIGURED + fi + + if ! [ -f "$DOCKER_AGENT" ]; then + ocf_exit_reason "Requires $DOCKER_AGENT to be installed. update the resource-agents package" + return $OCF_ERR_INSTALLED + fi + $DOCKER_AGENT validate-all + return $? +} + +case $__OCF_ACTION in +meta-data) meta_data + exit $OCF_SUCCESS + ;; +usage|help) pcmk_docker_wrapper_usage + exit $OCF_SUCCESS + ;; +esac + +separate_args +pcmk_docker_wrapper_validate +rc=$? +if [ $rc -ne 0 ]; then + case $__OCF_ACTION in + stop) exit $OCF_SUCCESS;; + monitor) exit $OCF_NOT_RUNNING;; + *) exit $rc;; + esac +fi + +case $__OCF_ACTION in + start) pcmk_docker_wrapper_start;; + stop) pcmk_docker_wrapper_stop;; + monitor) pcmk_docker_wrapper_monitor;; + reload) pcmk_docker_wrapper_reload;; + validate-all) pcmk_docker_wrapper_validate;; + *) pcmk_docker_wrapper_usage + exit $OCF_ERR_UNIMPLEMENTED + ;; +esac +rc=$? +ocf_log debug "Docker-wrapper ${OCF_RESOURCE_INSTANCE} $__OCF_ACTION : $rc" +exit $rc + From eb74798c41f7b97b061d8990a93f58b9043c543e Mon Sep 17 00:00:00 2001 From: David Vossel Date: Fri, 20 Mar 2015 17:06:28 -0400 Subject: [PATCH 03/15] High: pengine: ability to launch resources in isolated containers --- include/crm/msg_xml.h | 3 +++ include/crm/pengine/status.h | 2 ++ lib/pengine/complex.c | 16 ++++++++++++-- lrmd/lrmd.c | 41 +++++++++++++++++++++++++++++------- lrmd/lrmd_private.h | 3 +++ pengine/native.c | 37 ++++++++++++++++++++++++++++++++ 6 files changed, 92 insertions(+), 10 deletions(-) diff --git a/include/crm/msg_xml.h b/include/crm/msg_xml.h index d3c2643fc5d..ed680e9430f 100644 --- a/include/crm/msg_xml.h +++ b/include/crm/msg_xml.h @@ -185,6 +185,9 @@ # define XML_CIB_TAG_RSC_TEMPLATE "template" +# define XML_RSC_ATTR_ISOLATION_INSTANCE "isolation-instance" +# define XML_RSC_ATTR_ISOLATION_WRAPPER "isolation-wrapper" +# define XML_RSC_ATTR_ISOLATION "isolation" # define XML_RSC_ATTR_RESTART "restart-type" # define XML_RSC_ATTR_ORDERED "ordered" # define XML_RSC_ATTR_INTERLEAVE "interleave" diff --git a/include/crm/pengine/status.h b/include/crm/pengine/status.h index 0460767c879..f7c8fac239d 100644 --- a/include/crm/pengine/status.h +++ b/include/crm/pengine/status.h @@ -290,6 +290,8 @@ struct resource_s { GListPtr fillers; char *pending_task; + + const char *isolation_wrapper; }; struct pe_action_s { diff --git a/lib/pengine/complex.c b/lib/pengine/complex.c index 99571b8b1e0..9c876959d75 100644 --- a/lib/pengine/complex.c +++ b/lib/pengine/complex.c @@ -492,10 +492,22 @@ common_unpack(xmlNode * xml_obj, resource_t ** rsc, } pe_rsc_trace((*rsc), "Options for %s", (*rsc)->id); - value = g_hash_table_lookup((*rsc)->meta, XML_RSC_ATTR_UNIQUE); top = uber_parent(*rsc); - if (crm_is_true(value) || top->variant < pe_clone) { + + /* check for isolation wrapper mapping if the parent doesn't have one set + * isolation mapping is enabled by default. For safety, we are allowing isolation + * to be disabled by setting the meta attr, isolation=false. */ + value = g_hash_table_lookup((*rsc)->meta, XML_RSC_ATTR_ISOLATION); + if (top->isolation_wrapper == NULL && (value == NULL || crm_is_true(value))) { + if (g_hash_table_lookup((*rsc)->meta, "pcmk_docker_image")) { + (*rsc)->isolation_wrapper = "docker-wrapper"; + } + /* add more isolation technologies here as we expand */ + } + + value = g_hash_table_lookup((*rsc)->meta, XML_RSC_ATTR_UNIQUE); + if (crm_is_true(value) || top->variant < pe_clone || (*rsc)->isolation_wrapper) { set_bit((*rsc)->flags, pe_rsc_unique); } diff --git a/lrmd/lrmd.c b/lrmd/lrmd.c index 3fe56ab61b6..30b7926a33e 100644 --- a/lrmd/lrmd.c +++ b/lrmd/lrmd.c @@ -65,6 +65,9 @@ typedef struct lrmd_cmd_s { char *output; char *userdata_str; + /* when set, this cmd should go through a container wrapper */ + const char *isolation_wrapper; + #ifdef HAVE_SYS_TIMEB_H /* Timestamp of when op first ran */ struct timeb t_first_run; @@ -156,7 +159,7 @@ build_rsc_from_xml(xmlNode * msg) } static lrmd_cmd_t * -create_lrmd_cmd(xmlNode * msg, crm_client_t * client) +create_lrmd_cmd(xmlNode * msg, crm_client_t * client, lrmd_rsc_t *rsc) { int call_options = 0; xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, msg, LOG_ERR); @@ -180,6 +183,18 @@ create_lrmd_cmd(xmlNode * msg, crm_client_t * client) cmd->rsc_id = crm_element_value_copy(rsc_xml, F_LRMD_RSC_ID); cmd->params = xml2list(rsc_xml); + cmd->isolation_wrapper = g_hash_table_lookup(cmd->params, "CRM_meta_isolation_wrapper"); + + if (cmd->isolation_wrapper) { + if (g_hash_table_lookup(cmd->params, "CRM_meta_isolation_instance") == NULL) { + g_hash_table_insert(cmd->params, strdup("CRM_meta_isolation_instance"), strdup(rsc->rsc_id)); + } + if (rsc->provider) { + g_hash_table_insert(cmd->params, strdup("CRM_meta_provider"), strdup(rsc->provider)); + } + g_hash_table_insert(cmd->params, strdup("CRM_meta_class"), strdup(rsc->class)); + g_hash_table_insert(cmd->params, strdup("CRM_meta_type"), strdup(rsc->type)); + } return cmd; } @@ -1161,12 +1176,22 @@ lrmd_rsc_execute_service_lib(lrmd_rsc_t * rsc, lrmd_cmd_t * cmd) } } - action = resources_action_create(rsc->rsc_id, - rsc->class, - rsc->provider, - rsc->type, - normalize_action_name(rsc, cmd->action), - cmd->interval, cmd->timeout, params_copy); + if (cmd->isolation_wrapper) { + action = resources_action_create(rsc->rsc_id, + "ocf", + CONTAINER_PROVIDER, + cmd->isolation_wrapper, + cmd->action, /*action will be normalized in wrapper*/ + cmd->interval, cmd->timeout, params_copy); + } else { + action = resources_action_create(rsc->rsc_id, + rsc->class, + rsc->provider, + rsc->type, + normalize_action_name(rsc, cmd->action), + cmd->interval, cmd->timeout, params_copy); + + } if (!action) { crm_err("Failed to create action, action:%s on resource %s", cmd->action, rsc->rsc_id); @@ -1450,7 +1475,7 @@ process_lrmd_rsc_exec(crm_client_t * client, uint32_t id, xmlNode * request) return -ENODEV; } - cmd = create_lrmd_cmd(request, client); + cmd = create_lrmd_cmd(request, client, rsc); call_id = cmd->call_id; /* Don't reference cmd after handing it off to be scheduled. diff --git a/lrmd/lrmd_private.h b/lrmd/lrmd_private.h index 02593d39d3f..340f150a381 100644 --- a/lrmd/lrmd_private.h +++ b/lrmd/lrmd_private.h @@ -29,6 +29,9 @@ # undef KEYFILE # include # endif + +#define CONTAINER_PROVIDER "containers" + GHashTable *rsc_list; typedef struct lrmd_rsc_s { diff --git a/pengine/native.c b/pengine/native.c index 8e30b9423b1..7faf692798b 100644 --- a/pengine/native.c +++ b/pengine/native.c @@ -3267,6 +3267,7 @@ void native_append_meta(resource_t * rsc, xmlNode * xml) { char *value = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_INCARNATION); + resource_t *parent, *last_parent; if (value) { char *name = NULL; @@ -3284,4 +3285,40 @@ native_append_meta(resource_t * rsc, xmlNode * xml) crm_xml_add(xml, name, value); free(name); } + + last_parent = parent = rsc; + while (parent != NULL) { + char *name = NULL; + + if (parent->isolation_wrapper == NULL) { + last_parent = parent; + parent = parent->parent; + continue; + } + + /* name of wrapper script this resource is routed through. */ + name = crm_meta_name(XML_RSC_ATTR_ISOLATION_WRAPPER); + crm_xml_add(xml, name, parent->isolation_wrapper); + free(name); + + /* instance name for isolated environment */ + name = crm_meta_name(XML_RSC_ATTR_ISOLATION_INSTANCE); + if (parent->variant < pe_clone) { + crm_xml_add(xml, name, parent->id); + } else { + char *iso = NULL; + /* if isolation is set at the clone/master level, we have to + * give this resource the unique isolation instance associated + * with the matching clone child */ + value = g_hash_table_lookup(last_parent->meta, XML_RSC_ATTR_INCARNATION); + CRM_ASSERT(value != NULL); + + iso = crm_concat(parent->id, value, '_'); + crm_xml_add(xml, name, iso); + free(iso); + } + free(name); + + break; + } } From 5cdb4bb9ed361b2399632d111cba821ba00a6b71 Mon Sep 17 00:00:00 2001 From: David Vossel Date: Fri, 20 Mar 2015 17:11:40 -0400 Subject: [PATCH 04/15] Low: pengine: isolation regression tests --- pengine/regression.sh | 3 + pengine/test10/isolation-start-all.dot | 81 ++++ pengine/test10/isolation-start-all.exp | 460 +++++++++++++++++++++ pengine/test10/isolation-start-all.scores | 67 +++ pengine/test10/isolation-start-all.summary | 94 +++++ pengine/test10/isolation-start-all.xml | 189 +++++++++ 6 files changed, 894 insertions(+) create mode 100644 pengine/test10/isolation-start-all.dot create mode 100644 pengine/test10/isolation-start-all.exp create mode 100644 pengine/test10/isolation-start-all.scores create mode 100644 pengine/test10/isolation-start-all.summary create mode 100644 pengine/test10/isolation-start-all.xml diff --git a/pengine/regression.sh b/pengine/regression.sh index f2dbef13348..58ff4d476e5 100755 --- a/pengine/regression.sh +++ b/pengine/regression.sh @@ -798,5 +798,8 @@ echo "" do_test resource-discovery "Exercises resource-discovery location constraint option." do_test rsc-discovery-per-node "Disable resource discovery per node" +echo "" +do_test isolation-start-all "Start docker isolated resources." + echo "" test_results diff --git a/pengine/test10/isolation-start-all.dot b/pengine/test10/isolation-start-all.dot new file mode 100644 index 00000000000..b6fbe164ddf --- /dev/null +++ b/pengine/test10/isolation-start-all.dot @@ -0,0 +1,81 @@ + digraph "g" { +"fake_monitor_60000 rhel7-auto5" [ style=bold color="green" fontcolor="black"] +"fake_start_0 rhel7-auto5" -> "fake_monitor_60000 rhel7-auto5" [ style = bold] +"fake_start_0 rhel7-auto5" [ style=bold color="green" fontcolor="black"] +"g1:0_monitor_10000 rhel7-auto5" [ style=bold color="green" fontcolor="black"] +"g1:0_start_0 rhel7-auto5" -> "g1:0_monitor_10000 rhel7-auto5" [ style = bold] +"g1:0_start_0 rhel7-auto5" -> "g2:0_start_0 rhel7-auto5" [ style = bold] +"g1:0_start_0 rhel7-auto5" -> "mygroup:0_running_0" [ style = bold] +"g1:0_start_0 rhel7-auto5" [ style=bold color="green" fontcolor="black"] +"g1:1_monitor_10000 rhel7-auto5" [ style=bold color="green" fontcolor="black"] +"g1:1_start_0 rhel7-auto5" -> "g1:1_monitor_10000 rhel7-auto5" [ style = bold] +"g1:1_start_0 rhel7-auto5" -> "g2:1_start_0 rhel7-auto5" [ style = bold] +"g1:1_start_0 rhel7-auto5" -> "mygroup:1_running_0" [ style = bold] +"g1:1_start_0 rhel7-auto5" [ style=bold color="green" fontcolor="black"] +"g2:0_monitor_10000 rhel7-auto5" [ style=bold color="green" fontcolor="black"] +"g2:0_start_0 rhel7-auto5" -> "g2:0_monitor_10000 rhel7-auto5" [ style = bold] +"g2:0_start_0 rhel7-auto5" -> "mygroup:0_running_0" [ style = bold] +"g2:0_start_0 rhel7-auto5" [ style=bold color="green" fontcolor="black"] +"g2:1_monitor_10000 rhel7-auto5" [ style=bold color="green" fontcolor="black"] +"g2:1_start_0 rhel7-auto5" -> "g2:1_monitor_10000 rhel7-auto5" [ style = bold] +"g2:1_start_0 rhel7-auto5" -> "mygroup:1_running_0" [ style = bold] +"g2:1_start_0 rhel7-auto5" [ style=bold color="green" fontcolor="black"] +"group_is_container_running_0" [ style=bold color="green" fontcolor="orange"] +"group_is_container_start_0" -> "group_is_container_running_0" [ style = bold] +"group_is_container_start_0" -> "s1_start_0 rhel7-auto5" [ style = bold] +"group_is_container_start_0" -> "s2_start_0 rhel7-auto5" [ style = bold] +"group_is_container_start_0" [ style=bold color="green" fontcolor="orange"] +"group_of_containers_running_0" [ style=bold color="green" fontcolor="orange"] +"group_of_containers_start_0" -> "group_of_containers_running_0" [ style = bold] +"group_of_containers_start_0" -> "iso_mem1_start_0 rhel7-auto5" [ style = bold] +"group_of_containers_start_0" -> "iso_mem2_start_0 rhel7-auto5" [ style = bold] +"group_of_containers_start_0" [ style=bold color="green" fontcolor="orange"] +"iso_mem1_monitor_60000 rhel7-auto5" [ style=bold color="green" fontcolor="black"] +"iso_mem1_start_0 rhel7-auto5" -> "group_of_containers_running_0" [ style = bold] +"iso_mem1_start_0 rhel7-auto5" -> "iso_mem1_monitor_60000 rhel7-auto5" [ style = bold] +"iso_mem1_start_0 rhel7-auto5" -> "iso_mem2_start_0 rhel7-auto5" [ style = bold] +"iso_mem1_start_0 rhel7-auto5" [ style=bold color="green" fontcolor="black"] +"iso_mem2_monitor_60000 rhel7-auto5" [ style=bold color="green" fontcolor="black"] +"iso_mem2_start_0 rhel7-auto5" -> "group_of_containers_running_0" [ style = bold] +"iso_mem2_start_0 rhel7-auto5" -> "iso_mem2_monitor_60000 rhel7-auto5" [ style = bold] +"iso_mem2_start_0 rhel7-auto5" [ style=bold color="green" fontcolor="black"] +"mygroup-clone_running_0" [ style=bold color="green" fontcolor="orange"] +"mygroup-clone_start_0" -> "mygroup-clone_running_0" [ style = bold] +"mygroup-clone_start_0" -> "mygroup:0_start_0" [ style = bold] +"mygroup-clone_start_0" -> "mygroup:1_start_0" [ style = bold] +"mygroup-clone_start_0" [ style=bold color="green" fontcolor="orange"] +"mygroup:0_running_0" -> "mygroup-clone_running_0" [ style = bold] +"mygroup:0_running_0" [ style=bold color="green" fontcolor="orange"] +"mygroup:0_start_0" -> "g1:0_start_0 rhel7-auto5" [ style = bold] +"mygroup:0_start_0" -> "g2:0_start_0 rhel7-auto5" [ style = bold] +"mygroup:0_start_0" -> "mygroup:0_running_0" [ style = bold] +"mygroup:0_start_0" [ style=bold color="green" fontcolor="orange"] +"mygroup:1_running_0" -> "mygroup-clone_running_0" [ style = bold] +"mygroup:1_running_0" [ style=bold color="green" fontcolor="orange"] +"mygroup:1_start_0" -> "g1:1_start_0 rhel7-auto5" [ style = bold] +"mygroup:1_start_0" -> "g2:1_start_0 rhel7-auto5" [ style = bold] +"mygroup:1_start_0" -> "mygroup:1_running_0" [ style = bold] +"mygroup:1_start_0" [ style=bold color="green" fontcolor="orange"] +"replicated-clone_running_0" [ style=bold color="green" fontcolor="orange"] +"replicated-clone_start_0" -> "replicated-clone_running_0" [ style = bold] +"replicated-clone_start_0" -> "replicated:0_start_0 rhel7-auto5" [ style = bold] +"replicated-clone_start_0" -> "replicated:1_start_0 rhel7-auto5" [ style = bold] +"replicated-clone_start_0" [ style=bold color="green" fontcolor="orange"] +"replicated:0_monitor_10000 rhel7-auto5" [ style=bold color="green" fontcolor="black"] +"replicated:0_start_0 rhel7-auto5" -> "replicated-clone_running_0" [ style = bold] +"replicated:0_start_0 rhel7-auto5" -> "replicated:0_monitor_10000 rhel7-auto5" [ style = bold] +"replicated:0_start_0 rhel7-auto5" [ style=bold color="green" fontcolor="black"] +"replicated:1_monitor_10000 rhel7-auto5" [ style=bold color="green" fontcolor="black"] +"replicated:1_start_0 rhel7-auto5" -> "replicated-clone_running_0" [ style = bold] +"replicated:1_start_0 rhel7-auto5" -> "replicated:1_monitor_10000 rhel7-auto5" [ style = bold] +"replicated:1_start_0 rhel7-auto5" [ style=bold color="green" fontcolor="black"] +"s1_monitor_10000 rhel7-auto5" [ style=bold color="green" fontcolor="black"] +"s1_start_0 rhel7-auto5" -> "group_is_container_running_0" [ style = bold] +"s1_start_0 rhel7-auto5" -> "s1_monitor_10000 rhel7-auto5" [ style = bold] +"s1_start_0 rhel7-auto5" -> "s2_start_0 rhel7-auto5" [ style = bold] +"s1_start_0 rhel7-auto5" [ style=bold color="green" fontcolor="black"] +"s2_monitor_10000 rhel7-auto5" [ style=bold color="green" fontcolor="black"] +"s2_start_0 rhel7-auto5" -> "group_is_container_running_0" [ style = bold] +"s2_start_0 rhel7-auto5" -> "s2_monitor_10000 rhel7-auto5" [ style = bold] +"s2_start_0 rhel7-auto5" [ style=bold color="green" fontcolor="black"] +} diff --git a/pengine/test10/isolation-start-all.exp b/pengine/test10/isolation-start-all.exp new file mode 100644 index 00000000000..8b059da1a9c --- /dev/null +++ b/pengine/test10/isolation-start-all.exp @@ -0,0 +1,460 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/pengine/test10/isolation-start-all.scores b/pengine/test10/isolation-start-all.scores new file mode 100644 index 00000000000..56d40a70012 --- /dev/null +++ b/pengine/test10/isolation-start-all.scores @@ -0,0 +1,67 @@ +Allocation scores: +clone_color: g1:0 allocation score on rhel7-auto4: 0 +clone_color: g1:0 allocation score on rhel7-auto5: 0 +clone_color: g1:1 allocation score on rhel7-auto4: 0 +clone_color: g1:1 allocation score on rhel7-auto5: 0 +clone_color: g2:0 allocation score on rhel7-auto4: 0 +clone_color: g2:0 allocation score on rhel7-auto5: 0 +clone_color: g2:1 allocation score on rhel7-auto4: 0 +clone_color: g2:1 allocation score on rhel7-auto5: 0 +clone_color: mygroup-clone allocation score on rhel7-auto4: 0 +clone_color: mygroup-clone allocation score on rhel7-auto5: 0 +clone_color: mygroup:0 allocation score on rhel7-auto4: 0 +clone_color: mygroup:0 allocation score on rhel7-auto5: 0 +clone_color: mygroup:1 allocation score on rhel7-auto4: 0 +clone_color: mygroup:1 allocation score on rhel7-auto5: 0 +clone_color: replicated-clone allocation score on rhel7-auto4: 0 +clone_color: replicated-clone allocation score on rhel7-auto5: 0 +clone_color: replicated:0 allocation score on rhel7-auto4: 0 +clone_color: replicated:0 allocation score on rhel7-auto5: 0 +clone_color: replicated:1 allocation score on rhel7-auto4: 0 +clone_color: replicated:1 allocation score on rhel7-auto5: 0 +group_color: g1:0 allocation score on rhel7-auto4: -INFINITY +group_color: g1:0 allocation score on rhel7-auto5: 0 +group_color: g1:1 allocation score on rhel7-auto4: -INFINITY +group_color: g1:1 allocation score on rhel7-auto5: 0 +group_color: g2:0 allocation score on rhel7-auto4: -INFINITY +group_color: g2:0 allocation score on rhel7-auto5: 0 +group_color: g2:1 allocation score on rhel7-auto4: -INFINITY +group_color: g2:1 allocation score on rhel7-auto5: 0 +group_color: group_is_container allocation score on rhel7-auto4: 0 +group_color: group_is_container allocation score on rhel7-auto5: 0 +group_color: group_of_containers allocation score on rhel7-auto4: 0 +group_color: group_of_containers allocation score on rhel7-auto5: 0 +group_color: iso_mem1 allocation score on rhel7-auto4: 0 +group_color: iso_mem1 allocation score on rhel7-auto5: 0 +group_color: iso_mem2 allocation score on rhel7-auto4: 0 +group_color: iso_mem2 allocation score on rhel7-auto5: 0 +group_color: mygroup:0 allocation score on rhel7-auto4: -INFINITY +group_color: mygroup:0 allocation score on rhel7-auto5: 0 +group_color: mygroup:1 allocation score on rhel7-auto4: -INFINITY +group_color: mygroup:1 allocation score on rhel7-auto5: 0 +group_color: s1 allocation score on rhel7-auto4: 0 +group_color: s1 allocation score on rhel7-auto5: 0 +group_color: s2 allocation score on rhel7-auto4: 0 +group_color: s2 allocation score on rhel7-auto5: 0 +native_color: fake allocation score on rhel7-auto4: 0 +native_color: fake allocation score on rhel7-auto5: 0 +native_color: g1:0 allocation score on rhel7-auto4: -INFINITY +native_color: g1:0 allocation score on rhel7-auto5: 0 +native_color: g1:1 allocation score on rhel7-auto4: -INFINITY +native_color: g1:1 allocation score on rhel7-auto5: 0 +native_color: g2:0 allocation score on rhel7-auto4: -INFINITY +native_color: g2:0 allocation score on rhel7-auto5: 0 +native_color: g2:1 allocation score on rhel7-auto4: -INFINITY +native_color: g2:1 allocation score on rhel7-auto5: 0 +native_color: iso_mem1 allocation score on rhel7-auto4: -INFINITY +native_color: iso_mem1 allocation score on rhel7-auto5: 0 +native_color: iso_mem2 allocation score on rhel7-auto4: -INFINITY +native_color: iso_mem2 allocation score on rhel7-auto5: 0 +native_color: replicated:0 allocation score on rhel7-auto4: -INFINITY +native_color: replicated:0 allocation score on rhel7-auto5: 0 +native_color: replicated:1 allocation score on rhel7-auto4: -INFINITY +native_color: replicated:1 allocation score on rhel7-auto5: 0 +native_color: s1 allocation score on rhel7-auto4: -INFINITY +native_color: s1 allocation score on rhel7-auto5: 0 +native_color: s2 allocation score on rhel7-auto4: -INFINITY +native_color: s2 allocation score on rhel7-auto5: 0 diff --git a/pengine/test10/isolation-start-all.summary b/pengine/test10/isolation-start-all.summary new file mode 100644 index 00000000000..5f777727485 --- /dev/null +++ b/pengine/test10/isolation-start-all.summary @@ -0,0 +1,94 @@ + +Current cluster status: +Online: [ rhel7-auto5 ] +OFFLINE: [ rhel7-auto4 ] + + fake (ocf::heartbeat:Dummy): Stopped + Clone Set: replicated-clone [replicated] (unique) + replicated:0 (ocf::heartbeat:Dummy): Stopped + replicated:1 (ocf::heartbeat:Dummy): Stopped + Resource Group: group_is_container + s1 (ocf::heartbeat:Dummy): Stopped + s2 (ocf::heartbeat:Dummy): Stopped + Clone Set: mygroup-clone [mygroup] (unique) + Resource Group: mygroup:0 + g1:0 (ocf::heartbeat:Dummy): Stopped + g2:0 (ocf::heartbeat:Dummy): Stopped + Resource Group: mygroup:1 + g1:1 (ocf::heartbeat:Dummy): Stopped + g2:1 (ocf::heartbeat:Dummy): Stopped + Resource Group: group_of_containers + iso_mem1 (ocf::heartbeat:Dummy): Stopped + iso_mem2 (ocf::heartbeat:Dummy): Stopped + +Transition Summary: + * Start fake (rhel7-auto5) + * Start replicated:0 (rhel7-auto5) + * Start replicated:1 (rhel7-auto5) + * Start s1 (rhel7-auto5) + * Start s2 (rhel7-auto5) + * Start g1:0 (rhel7-auto5) + * Start g2:0 (rhel7-auto5) + * Start g1:1 (rhel7-auto5) + * Start g2:1 (rhel7-auto5) + * Start iso_mem1 (rhel7-auto5) + * Start iso_mem2 (rhel7-auto5) + +Executing cluster transition: + * Resource action: fake start on rhel7-auto5 + * Pseudo action: replicated-clone_start_0 + * Pseudo action: group_is_container_start_0 + * Resource action: s1 start on rhel7-auto5 + * Resource action: s2 start on rhel7-auto5 + * Pseudo action: mygroup-clone_start_0 + * Pseudo action: group_of_containers_start_0 + * Resource action: iso_mem1 start on rhel7-auto5 + * Resource action: iso_mem2 start on rhel7-auto5 + * Resource action: fake monitor=60000 on rhel7-auto5 + * Resource action: replicated:0 start on rhel7-auto5 + * Resource action: replicated:1 start on rhel7-auto5 + * Pseudo action: replicated-clone_running_0 + * Pseudo action: group_is_container_running_0 + * Resource action: s1 monitor=10000 on rhel7-auto5 + * Resource action: s2 monitor=10000 on rhel7-auto5 + * Pseudo action: mygroup:0_start_0 + * Resource action: g1:0 start on rhel7-auto5 + * Resource action: g2:0 start on rhel7-auto5 + * Pseudo action: mygroup:1_start_0 + * Resource action: g1:1 start on rhel7-auto5 + * Resource action: g2:1 start on rhel7-auto5 + * Pseudo action: group_of_containers_running_0 + * Resource action: iso_mem1 monitor=60000 on rhel7-auto5 + * Resource action: iso_mem2 monitor=60000 on rhel7-auto5 + * Resource action: replicated:0 monitor=10000 on rhel7-auto5 + * Resource action: replicated:1 monitor=10000 on rhel7-auto5 + * Pseudo action: mygroup:0_running_0 + * Resource action: g1:0 monitor=10000 on rhel7-auto5 + * Resource action: g2:0 monitor=10000 on rhel7-auto5 + * Pseudo action: mygroup:1_running_0 + * Resource action: g1:1 monitor=10000 on rhel7-auto5 + * Resource action: g2:1 monitor=10000 on rhel7-auto5 + * Pseudo action: mygroup-clone_running_0 + +Revised cluster status: +Online: [ rhel7-auto5 ] +OFFLINE: [ rhel7-auto4 ] + + fake (ocf::heartbeat:Dummy): Started rhel7-auto5 + Clone Set: replicated-clone [replicated] (unique) + replicated:0 (ocf::heartbeat:Dummy): Started rhel7-auto5 + replicated:1 (ocf::heartbeat:Dummy): Started rhel7-auto5 + Resource Group: group_is_container + s1 (ocf::heartbeat:Dummy): Started rhel7-auto5 + s2 (ocf::heartbeat:Dummy): Started rhel7-auto5 + Clone Set: mygroup-clone [mygroup] (unique) + Resource Group: mygroup:0 + g1:0 (ocf::heartbeat:Dummy): Started rhel7-auto5 + g2:0 (ocf::heartbeat:Dummy): Started rhel7-auto5 + Resource Group: mygroup:1 + g1:1 (ocf::heartbeat:Dummy): Started rhel7-auto5 + g2:1 (ocf::heartbeat:Dummy): Started rhel7-auto5 + Resource Group: group_of_containers + iso_mem1 (ocf::heartbeat:Dummy): Started rhel7-auto5 + iso_mem2 (ocf::heartbeat:Dummy): Started rhel7-auto5 + diff --git a/pengine/test10/isolation-start-all.xml b/pengine/test10/isolation-start-all.xml new file mode 100644 index 00000000000..98580ab3e2e --- /dev/null +++ b/pengine/test10/isolation-start-all.xml @@ -0,0 +1,189 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + From a736fbaa367e93c0f1d416b4fb2e19d2c182eb9e Mon Sep 17 00:00:00 2001 From: David Vossel Date: Mon, 23 Mar 2015 16:46:51 -0400 Subject: [PATCH 05/15] High: pengine: disable reloading of resources within isolated container wrappers --- pengine/allocate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pengine/allocate.c b/pengine/allocate.c index c9e446a9cbf..6c5d581367e 100644 --- a/pengine/allocate.c +++ b/pengine/allocate.c @@ -286,7 +286,7 @@ check_action_definition(resource_t * rsc, node_t * active_node, xmlNode * xml_op set_bit(op->flags, pe_action_reschedule); #endif - } else if (digest_restart) { + } else if (digest_restart && rsc->isolation_wrapper == NULL && (uber_parent(rsc))->isolation_wrapper == NULL) { pe_rsc_trace(rsc, "Reloading '%s' action for resource %s", task, rsc->id); /* Allow this resource to reload - unless something else causes a full restart */ From 7770e0a57df18a7e493bc0a74d97519ce0298b63 Mon Sep 17 00:00:00 2001 From: David Vossel Date: Mon, 23 Mar 2015 16:49:18 -0400 Subject: [PATCH 06/15] Low: pengine: regression tests for disable reload during isolation --- pengine/regression.sh | 1 + pengine/test10/isolation-restart-all.dot | 167 ++++ pengine/test10/isolation-restart-all.exp | 873 +++++++++++++++++++ pengine/test10/isolation-restart-all.scores | 67 ++ pengine/test10/isolation-restart-all.summary | 118 +++ pengine/test10/isolation-restart-all.xml | 184 ++++ 6 files changed, 1410 insertions(+) create mode 100644 pengine/test10/isolation-restart-all.dot create mode 100644 pengine/test10/isolation-restart-all.exp create mode 100644 pengine/test10/isolation-restart-all.scores create mode 100644 pengine/test10/isolation-restart-all.summary create mode 100644 pengine/test10/isolation-restart-all.xml diff --git a/pengine/regression.sh b/pengine/regression.sh index 58ff4d476e5..59e511e488d 100755 --- a/pengine/regression.sh +++ b/pengine/regression.sh @@ -800,6 +800,7 @@ do_test rsc-discovery-per-node "Disable resource discovery per node" echo "" do_test isolation-start-all "Start docker isolated resources." +do_test isolation-restart-all "Restart docker isolated resources." echo "" test_results diff --git a/pengine/test10/isolation-restart-all.dot b/pengine/test10/isolation-restart-all.dot new file mode 100644 index 00000000000..4c0df1213d2 --- /dev/null +++ b/pengine/test10/isolation-restart-all.dot @@ -0,0 +1,167 @@ + digraph "g" { +"all_stopped" [ style=bold color="green" fontcolor="orange"] +"fake_monitor_60000 rhel7-auto5" [ style=bold color="green" fontcolor="black"] +"fake_start_0 rhel7-auto5" -> "fake_monitor_60000 rhel7-auto5" [ style = bold] +"fake_start_0 rhel7-auto5" [ style=bold color="green" fontcolor="black"] +"fake_stop_0 rhel7-auto5" -> "all_stopped" [ style = bold] +"fake_stop_0 rhel7-auto5" -> "fake_start_0 rhel7-auto5" [ style = bold] +"fake_stop_0 rhel7-auto5" [ style=bold color="green" fontcolor="black"] +"g1:0_monitor_10000 rhel7-auto5" [ style=bold color="green" fontcolor="black"] +"g1:0_start_0 rhel7-auto5" -> "g1:0_monitor_10000 rhel7-auto5" [ style = bold] +"g1:0_start_0 rhel7-auto5" -> "g2:0_start_0 rhel7-auto5" [ style = bold] +"g1:0_start_0 rhel7-auto5" -> "mygroup:0_running_0" [ style = bold] +"g1:0_start_0 rhel7-auto5" [ style=bold color="green" fontcolor="black"] +"g1:0_stop_0 rhel7-auto5" -> "all_stopped" [ style = bold] +"g1:0_stop_0 rhel7-auto5" -> "g1:0_start_0 rhel7-auto5" [ style = bold] +"g1:0_stop_0 rhel7-auto5" -> "mygroup:0_stopped_0" [ style = bold] +"g1:0_stop_0 rhel7-auto5" [ style=bold color="green" fontcolor="black"] +"g1:1_monitor_10000 rhel7-auto5" [ style=bold color="green" fontcolor="black"] +"g1:1_start_0 rhel7-auto5" -> "g1:1_monitor_10000 rhel7-auto5" [ style = bold] +"g1:1_start_0 rhel7-auto5" -> "g2:1_start_0 rhel7-auto5" [ style = bold] +"g1:1_start_0 rhel7-auto5" -> "mygroup:1_running_0" [ style = bold] +"g1:1_start_0 rhel7-auto5" [ style=bold color="green" fontcolor="black"] +"g1:1_stop_0 rhel7-auto5" -> "all_stopped" [ style = bold] +"g1:1_stop_0 rhel7-auto5" -> "g1:1_start_0 rhel7-auto5" [ style = bold] +"g1:1_stop_0 rhel7-auto5" -> "mygroup:1_stopped_0" [ style = bold] +"g1:1_stop_0 rhel7-auto5" [ style=bold color="green" fontcolor="black"] +"g2:0_monitor_10000 rhel7-auto5" [ style=bold color="green" fontcolor="black"] +"g2:0_start_0 rhel7-auto5" -> "g2:0_monitor_10000 rhel7-auto5" [ style = bold] +"g2:0_start_0 rhel7-auto5" -> "mygroup:0_running_0" [ style = bold] +"g2:0_start_0 rhel7-auto5" [ style=bold color="green" fontcolor="black"] +"g2:0_stop_0 rhel7-auto5" -> "all_stopped" [ style = bold] +"g2:0_stop_0 rhel7-auto5" -> "g1:0_stop_0 rhel7-auto5" [ style = bold] +"g2:0_stop_0 rhel7-auto5" -> "g2:0_start_0 rhel7-auto5" [ style = bold] +"g2:0_stop_0 rhel7-auto5" -> "mygroup:0_stopped_0" [ style = bold] +"g2:0_stop_0 rhel7-auto5" [ style=bold color="green" fontcolor="black"] +"g2:1_monitor_10000 rhel7-auto5" [ style=bold color="green" fontcolor="black"] +"g2:1_start_0 rhel7-auto5" -> "g2:1_monitor_10000 rhel7-auto5" [ style = bold] +"g2:1_start_0 rhel7-auto5" -> "mygroup:1_running_0" [ style = bold] +"g2:1_start_0 rhel7-auto5" [ style=bold color="green" fontcolor="black"] +"g2:1_stop_0 rhel7-auto5" -> "all_stopped" [ style = bold] +"g2:1_stop_0 rhel7-auto5" -> "g1:1_stop_0 rhel7-auto5" [ style = bold] +"g2:1_stop_0 rhel7-auto5" -> "g2:1_start_0 rhel7-auto5" [ style = bold] +"g2:1_stop_0 rhel7-auto5" -> "mygroup:1_stopped_0" [ style = bold] +"g2:1_stop_0 rhel7-auto5" [ style=bold color="green" fontcolor="black"] +"group_is_container_running_0" [ style=bold color="green" fontcolor="orange"] +"group_is_container_start_0" -> "group_is_container_running_0" [ style = bold] +"group_is_container_start_0" -> "s1_start_0 rhel7-auto5" [ style = bold] +"group_is_container_start_0" -> "s2_start_0 rhel7-auto5" [ style = bold] +"group_is_container_start_0" [ style=bold color="green" fontcolor="orange"] +"group_is_container_stop_0" -> "group_is_container_stopped_0" [ style = bold] +"group_is_container_stop_0" -> "s1_stop_0 rhel7-auto5" [ style = bold] +"group_is_container_stop_0" -> "s2_stop_0 rhel7-auto5" [ style = bold] +"group_is_container_stop_0" [ style=bold color="green" fontcolor="orange"] +"group_is_container_stopped_0" -> "group_is_container_start_0" [ style = bold] +"group_is_container_stopped_0" [ style=bold color="green" fontcolor="orange"] +"group_of_containers_running_0" [ style=bold color="green" fontcolor="orange"] +"group_of_containers_start_0" -> "group_of_containers_running_0" [ style = bold] +"group_of_containers_start_0" -> "iso_mem1_start_0 rhel7-auto5" [ style = bold] +"group_of_containers_start_0" -> "iso_mem2_start_0 rhel7-auto5" [ style = bold] +"group_of_containers_start_0" [ style=bold color="green" fontcolor="orange"] +"group_of_containers_stop_0" -> "group_of_containers_stopped_0" [ style = bold] +"group_of_containers_stop_0" -> "iso_mem1_stop_0 rhel7-auto5" [ style = bold] +"group_of_containers_stop_0" -> "iso_mem2_stop_0 rhel7-auto5" [ style = bold] +"group_of_containers_stop_0" [ style=bold color="green" fontcolor="orange"] +"group_of_containers_stopped_0" -> "group_of_containers_start_0" [ style = bold] +"group_of_containers_stopped_0" [ style=bold color="green" fontcolor="orange"] +"iso_mem1_monitor_60000 rhel7-auto5" [ style=bold color="green" fontcolor="black"] +"iso_mem1_start_0 rhel7-auto5" -> "group_of_containers_running_0" [ style = bold] +"iso_mem1_start_0 rhel7-auto5" -> "iso_mem1_monitor_60000 rhel7-auto5" [ style = bold] +"iso_mem1_start_0 rhel7-auto5" -> "iso_mem2_start_0 rhel7-auto5" [ style = bold] +"iso_mem1_start_0 rhel7-auto5" [ style=bold color="green" fontcolor="black"] +"iso_mem1_stop_0 rhel7-auto5" -> "all_stopped" [ style = bold] +"iso_mem1_stop_0 rhel7-auto5" -> "group_of_containers_stopped_0" [ style = bold] +"iso_mem1_stop_0 rhel7-auto5" -> "iso_mem1_start_0 rhel7-auto5" [ style = bold] +"iso_mem1_stop_0 rhel7-auto5" [ style=bold color="green" fontcolor="black"] +"iso_mem2_monitor_60000 rhel7-auto5" [ style=bold color="green" fontcolor="black"] +"iso_mem2_start_0 rhel7-auto5" -> "group_of_containers_running_0" [ style = bold] +"iso_mem2_start_0 rhel7-auto5" -> "iso_mem2_monitor_60000 rhel7-auto5" [ style = bold] +"iso_mem2_start_0 rhel7-auto5" [ style=bold color="green" fontcolor="black"] +"iso_mem2_stop_0 rhel7-auto5" -> "all_stopped" [ style = bold] +"iso_mem2_stop_0 rhel7-auto5" -> "group_of_containers_stopped_0" [ style = bold] +"iso_mem2_stop_0 rhel7-auto5" -> "iso_mem1_stop_0 rhel7-auto5" [ style = bold] +"iso_mem2_stop_0 rhel7-auto5" -> "iso_mem2_start_0 rhel7-auto5" [ style = bold] +"iso_mem2_stop_0 rhel7-auto5" [ style=bold color="green" fontcolor="black"] +"mygroup-clone_running_0" [ style=bold color="green" fontcolor="orange"] +"mygroup-clone_start_0" -> "mygroup-clone_running_0" [ style = bold] +"mygroup-clone_start_0" -> "mygroup:0_start_0" [ style = bold] +"mygroup-clone_start_0" -> "mygroup:1_start_0" [ style = bold] +"mygroup-clone_start_0" [ style=bold color="green" fontcolor="orange"] +"mygroup-clone_stop_0" -> "mygroup-clone_stopped_0" [ style = bold] +"mygroup-clone_stop_0" -> "mygroup:0_stop_0" [ style = bold] +"mygroup-clone_stop_0" -> "mygroup:1_stop_0" [ style = bold] +"mygroup-clone_stop_0" [ style=bold color="green" fontcolor="orange"] +"mygroup-clone_stopped_0" -> "mygroup-clone_start_0" [ style = bold] +"mygroup-clone_stopped_0" [ style=bold color="green" fontcolor="orange"] +"mygroup:0_running_0" -> "mygroup-clone_running_0" [ style = bold] +"mygroup:0_running_0" [ style=bold color="green" fontcolor="orange"] +"mygroup:0_start_0" -> "g1:0_start_0 rhel7-auto5" [ style = bold] +"mygroup:0_start_0" -> "g2:0_start_0 rhel7-auto5" [ style = bold] +"mygroup:0_start_0" -> "mygroup:0_running_0" [ style = bold] +"mygroup:0_start_0" [ style=bold color="green" fontcolor="orange"] +"mygroup:0_stop_0" -> "g1:0_stop_0 rhel7-auto5" [ style = bold] +"mygroup:0_stop_0" -> "g2:0_stop_0 rhel7-auto5" [ style = bold] +"mygroup:0_stop_0" -> "mygroup:0_stopped_0" [ style = bold] +"mygroup:0_stop_0" [ style=bold color="green" fontcolor="orange"] +"mygroup:0_stopped_0" -> "mygroup-clone_stopped_0" [ style = bold] +"mygroup:0_stopped_0" -> "mygroup:0_start_0" [ style = bold] +"mygroup:0_stopped_0" [ style=bold color="green" fontcolor="orange"] +"mygroup:1_running_0" -> "mygroup-clone_running_0" [ style = bold] +"mygroup:1_running_0" [ style=bold color="green" fontcolor="orange"] +"mygroup:1_start_0" -> "g1:1_start_0 rhel7-auto5" [ style = bold] +"mygroup:1_start_0" -> "g2:1_start_0 rhel7-auto5" [ style = bold] +"mygroup:1_start_0" -> "mygroup:1_running_0" [ style = bold] +"mygroup:1_start_0" [ style=bold color="green" fontcolor="orange"] +"mygroup:1_stop_0" -> "g1:1_stop_0 rhel7-auto5" [ style = bold] +"mygroup:1_stop_0" -> "g2:1_stop_0 rhel7-auto5" [ style = bold] +"mygroup:1_stop_0" -> "mygroup:1_stopped_0" [ style = bold] +"mygroup:1_stop_0" [ style=bold color="green" fontcolor="orange"] +"mygroup:1_stopped_0" -> "mygroup-clone_stopped_0" [ style = bold] +"mygroup:1_stopped_0" -> "mygroup:1_start_0" [ style = bold] +"mygroup:1_stopped_0" [ style=bold color="green" fontcolor="orange"] +"replicated-clone_running_0" [ style=bold color="green" fontcolor="orange"] +"replicated-clone_start_0" -> "replicated-clone_running_0" [ style = bold] +"replicated-clone_start_0" -> "replicated:0_start_0 rhel7-auto5" [ style = bold] +"replicated-clone_start_0" -> "replicated:1_start_0 rhel7-auto5" [ style = bold] +"replicated-clone_start_0" [ style=bold color="green" fontcolor="orange"] +"replicated-clone_stop_0" -> "replicated-clone_stopped_0" [ style = bold] +"replicated-clone_stop_0" -> "replicated:0_stop_0 rhel7-auto5" [ style = bold] +"replicated-clone_stop_0" -> "replicated:1_stop_0 rhel7-auto5" [ style = bold] +"replicated-clone_stop_0" [ style=bold color="green" fontcolor="orange"] +"replicated-clone_stopped_0" -> "replicated-clone_start_0" [ style = bold] +"replicated-clone_stopped_0" [ style=bold color="green" fontcolor="orange"] +"replicated:0_monitor_10000 rhel7-auto5" [ style=bold color="green" fontcolor="black"] +"replicated:0_start_0 rhel7-auto5" -> "replicated-clone_running_0" [ style = bold] +"replicated:0_start_0 rhel7-auto5" -> "replicated:0_monitor_10000 rhel7-auto5" [ style = bold] +"replicated:0_start_0 rhel7-auto5" [ style=bold color="green" fontcolor="black"] +"replicated:0_stop_0 rhel7-auto5" -> "all_stopped" [ style = bold] +"replicated:0_stop_0 rhel7-auto5" -> "replicated-clone_stopped_0" [ style = bold] +"replicated:0_stop_0 rhel7-auto5" -> "replicated:0_start_0 rhel7-auto5" [ style = bold] +"replicated:0_stop_0 rhel7-auto5" [ style=bold color="green" fontcolor="black"] +"replicated:1_monitor_10000 rhel7-auto5" [ style=bold color="green" fontcolor="black"] +"replicated:1_start_0 rhel7-auto5" -> "replicated-clone_running_0" [ style = bold] +"replicated:1_start_0 rhel7-auto5" -> "replicated:1_monitor_10000 rhel7-auto5" [ style = bold] +"replicated:1_start_0 rhel7-auto5" [ style=bold color="green" fontcolor="black"] +"replicated:1_stop_0 rhel7-auto5" -> "all_stopped" [ style = bold] +"replicated:1_stop_0 rhel7-auto5" -> "replicated-clone_stopped_0" [ style = bold] +"replicated:1_stop_0 rhel7-auto5" -> "replicated:1_start_0 rhel7-auto5" [ style = bold] +"replicated:1_stop_0 rhel7-auto5" [ style=bold color="green" fontcolor="black"] +"s1_monitor_10000 rhel7-auto5" [ style=bold color="green" fontcolor="black"] +"s1_start_0 rhel7-auto5" -> "group_is_container_running_0" [ style = bold] +"s1_start_0 rhel7-auto5" -> "s1_monitor_10000 rhel7-auto5" [ style = bold] +"s1_start_0 rhel7-auto5" -> "s2_start_0 rhel7-auto5" [ style = bold] +"s1_start_0 rhel7-auto5" [ style=bold color="green" fontcolor="black"] +"s1_stop_0 rhel7-auto5" -> "all_stopped" [ style = bold] +"s1_stop_0 rhel7-auto5" -> "group_is_container_stopped_0" [ style = bold] +"s1_stop_0 rhel7-auto5" -> "s1_start_0 rhel7-auto5" [ style = bold] +"s1_stop_0 rhel7-auto5" [ style=bold color="green" fontcolor="black"] +"s2_monitor_10000 rhel7-auto5" [ style=bold color="green" fontcolor="black"] +"s2_start_0 rhel7-auto5" -> "group_is_container_running_0" [ style = bold] +"s2_start_0 rhel7-auto5" -> "s2_monitor_10000 rhel7-auto5" [ style = bold] +"s2_start_0 rhel7-auto5" [ style=bold color="green" fontcolor="black"] +"s2_stop_0 rhel7-auto5" -> "all_stopped" [ style = bold] +"s2_stop_0 rhel7-auto5" -> "group_is_container_stopped_0" [ style = bold] +"s2_stop_0 rhel7-auto5" -> "s1_stop_0 rhel7-auto5" [ style = bold] +"s2_stop_0 rhel7-auto5" -> "s2_start_0 rhel7-auto5" [ style = bold] +"s2_stop_0 rhel7-auto5" [ style=bold color="green" fontcolor="black"] +} diff --git a/pengine/test10/isolation-restart-all.exp b/pengine/test10/isolation-restart-all.exp new file mode 100644 index 00000000000..ba2ccca2113 --- /dev/null +++ b/pengine/test10/isolation-restart-all.exp @@ -0,0 +1,873 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/pengine/test10/isolation-restart-all.scores b/pengine/test10/isolation-restart-all.scores new file mode 100644 index 00000000000..188aaab6308 --- /dev/null +++ b/pengine/test10/isolation-restart-all.scores @@ -0,0 +1,67 @@ +Allocation scores: +clone_color: g1:0 allocation score on rhel7-auto4: 0 +clone_color: g1:0 allocation score on rhel7-auto5: 1 +clone_color: g1:1 allocation score on rhel7-auto4: 0 +clone_color: g1:1 allocation score on rhel7-auto5: 1 +clone_color: g2:0 allocation score on rhel7-auto4: 0 +clone_color: g2:0 allocation score on rhel7-auto5: 1 +clone_color: g2:1 allocation score on rhel7-auto4: 0 +clone_color: g2:1 allocation score on rhel7-auto5: 1 +clone_color: mygroup-clone allocation score on rhel7-auto4: 0 +clone_color: mygroup-clone allocation score on rhel7-auto5: 0 +clone_color: mygroup:0 allocation score on rhel7-auto4: 0 +clone_color: mygroup:0 allocation score on rhel7-auto5: 0 +clone_color: mygroup:1 allocation score on rhel7-auto4: 0 +clone_color: mygroup:1 allocation score on rhel7-auto5: 0 +clone_color: replicated-clone allocation score on rhel7-auto4: 0 +clone_color: replicated-clone allocation score on rhel7-auto5: 0 +clone_color: replicated:0 allocation score on rhel7-auto4: 0 +clone_color: replicated:0 allocation score on rhel7-auto5: 1 +clone_color: replicated:1 allocation score on rhel7-auto4: 0 +clone_color: replicated:1 allocation score on rhel7-auto5: 1 +group_color: g1:0 allocation score on rhel7-auto4: -INFINITY +group_color: g1:0 allocation score on rhel7-auto5: 1 +group_color: g1:1 allocation score on rhel7-auto4: -INFINITY +group_color: g1:1 allocation score on rhel7-auto5: 1 +group_color: g2:0 allocation score on rhel7-auto4: -INFINITY +group_color: g2:0 allocation score on rhel7-auto5: 1 +group_color: g2:1 allocation score on rhel7-auto4: -INFINITY +group_color: g2:1 allocation score on rhel7-auto5: 1 +group_color: group_is_container allocation score on rhel7-auto4: 0 +group_color: group_is_container allocation score on rhel7-auto5: 0 +group_color: group_of_containers allocation score on rhel7-auto4: 0 +group_color: group_of_containers allocation score on rhel7-auto5: 0 +group_color: iso_mem1 allocation score on rhel7-auto4: 0 +group_color: iso_mem1 allocation score on rhel7-auto5: 0 +group_color: iso_mem2 allocation score on rhel7-auto4: 0 +group_color: iso_mem2 allocation score on rhel7-auto5: 0 +group_color: mygroup:0 allocation score on rhel7-auto4: -INFINITY +group_color: mygroup:0 allocation score on rhel7-auto5: 0 +group_color: mygroup:1 allocation score on rhel7-auto4: -INFINITY +group_color: mygroup:1 allocation score on rhel7-auto5: 0 +group_color: s1 allocation score on rhel7-auto4: 0 +group_color: s1 allocation score on rhel7-auto5: 0 +group_color: s2 allocation score on rhel7-auto4: 0 +group_color: s2 allocation score on rhel7-auto5: 0 +native_color: fake allocation score on rhel7-auto4: 0 +native_color: fake allocation score on rhel7-auto5: 0 +native_color: g1:0 allocation score on rhel7-auto4: -INFINITY +native_color: g1:0 allocation score on rhel7-auto5: 2 +native_color: g1:1 allocation score on rhel7-auto4: -INFINITY +native_color: g1:1 allocation score on rhel7-auto5: 2 +native_color: g2:0 allocation score on rhel7-auto4: -INFINITY +native_color: g2:0 allocation score on rhel7-auto5: 1 +native_color: g2:1 allocation score on rhel7-auto4: -INFINITY +native_color: g2:1 allocation score on rhel7-auto5: 1 +native_color: iso_mem1 allocation score on rhel7-auto4: -INFINITY +native_color: iso_mem1 allocation score on rhel7-auto5: 0 +native_color: iso_mem2 allocation score on rhel7-auto4: -INFINITY +native_color: iso_mem2 allocation score on rhel7-auto5: 0 +native_color: replicated:0 allocation score on rhel7-auto4: -INFINITY +native_color: replicated:0 allocation score on rhel7-auto5: 1 +native_color: replicated:1 allocation score on rhel7-auto4: -INFINITY +native_color: replicated:1 allocation score on rhel7-auto5: 1 +native_color: s1 allocation score on rhel7-auto4: -INFINITY +native_color: s1 allocation score on rhel7-auto5: 0 +native_color: s2 allocation score on rhel7-auto4: -INFINITY +native_color: s2 allocation score on rhel7-auto5: 0 diff --git a/pengine/test10/isolation-restart-all.summary b/pengine/test10/isolation-restart-all.summary new file mode 100644 index 00000000000..a2939f104c6 --- /dev/null +++ b/pengine/test10/isolation-restart-all.summary @@ -0,0 +1,118 @@ + +Current cluster status: +Online: [ rhel7-auto5 ] +OFFLINE: [ rhel7-auto4 ] + + fake (ocf::heartbeat:Dummy): Started rhel7-auto5 + Clone Set: replicated-clone [replicated] (unique) + replicated:0 (ocf::heartbeat:Dummy): Started rhel7-auto5 + replicated:1 (ocf::heartbeat:Dummy): Started rhel7-auto5 + Resource Group: group_is_container + s1 (ocf::heartbeat:Dummy): Started rhel7-auto5 + s2 (ocf::heartbeat:Dummy): Started rhel7-auto5 + Clone Set: mygroup-clone [mygroup] (unique) + Resource Group: mygroup:0 + g1:0 (ocf::heartbeat:Dummy): Started rhel7-auto5 + g2:0 (ocf::heartbeat:Dummy): Started rhel7-auto5 + Resource Group: mygroup:1 + g1:1 (ocf::heartbeat:Dummy): Started rhel7-auto5 + g2:1 (ocf::heartbeat:Dummy): Started rhel7-auto5 + Resource Group: group_of_containers + iso_mem1 (ocf::heartbeat:Dummy): Started rhel7-auto5 + iso_mem2 (ocf::heartbeat:Dummy): Started rhel7-auto5 + +Transition Summary: + * Restart fake (Started rhel7-auto5) + * Restart replicated:0 (Started rhel7-auto5) + * Restart replicated:1 (Started rhel7-auto5) + * Restart s1 (Started rhel7-auto5) + * Restart s2 (Started rhel7-auto5) + * Restart g1:0 (Started rhel7-auto5) + * Restart g2:0 (Started rhel7-auto5) + * Restart g1:1 (Started rhel7-auto5) + * Restart g2:1 (Started rhel7-auto5) + * Restart iso_mem1 (Started rhel7-auto5) + * Restart iso_mem2 (Started rhel7-auto5) + +Executing cluster transition: + * Resource action: fake stop on rhel7-auto5 + * Resource action: fake start on rhel7-auto5 + * Resource action: fake monitor=60000 on rhel7-auto5 + * Pseudo action: replicated-clone_stop_0 + * Pseudo action: group_is_container_stop_0 + * Resource action: s2 stop on rhel7-auto5 + * Pseudo action: mygroup-clone_stop_0 + * Pseudo action: group_of_containers_stop_0 + * Resource action: iso_mem2 stop on rhel7-auto5 + * Resource action: replicated:0 stop on rhel7-auto5 + * Resource action: replicated:1 stop on rhel7-auto5 + * Pseudo action: replicated-clone_stopped_0 + * Pseudo action: replicated-clone_start_0 + * Resource action: s1 stop on rhel7-auto5 + * Pseudo action: mygroup:0_stop_0 + * Resource action: g2:0 stop on rhel7-auto5 + * Pseudo action: mygroup:1_stop_0 + * Resource action: g2:1 stop on rhel7-auto5 + * Resource action: iso_mem1 stop on rhel7-auto5 + * Resource action: replicated:0 start on rhel7-auto5 + * Resource action: replicated:0 monitor=10000 on rhel7-auto5 + * Resource action: replicated:1 start on rhel7-auto5 + * Resource action: replicated:1 monitor=10000 on rhel7-auto5 + * Pseudo action: replicated-clone_running_0 + * Pseudo action: group_is_container_stopped_0 + * Pseudo action: group_is_container_start_0 + * Resource action: s1 start on rhel7-auto5 + * Resource action: s1 monitor=10000 on rhel7-auto5 + * Resource action: s2 start on rhel7-auto5 + * Resource action: s2 monitor=10000 on rhel7-auto5 + * Resource action: g1:0 stop on rhel7-auto5 + * Resource action: g1:1 stop on rhel7-auto5 + * Pseudo action: group_of_containers_stopped_0 + * Pseudo action: group_of_containers_start_0 + * Resource action: iso_mem1 start on rhel7-auto5 + * Resource action: iso_mem1 monitor=60000 on rhel7-auto5 + * Resource action: iso_mem2 start on rhel7-auto5 + * Resource action: iso_mem2 monitor=60000 on rhel7-auto5 + * Pseudo action: all_stopped + * Pseudo action: group_is_container_running_0 + * Pseudo action: mygroup:0_stopped_0 + * Pseudo action: mygroup:1_stopped_0 + * Pseudo action: mygroup-clone_stopped_0 + * Pseudo action: mygroup-clone_start_0 + * Pseudo action: group_of_containers_running_0 + * Pseudo action: mygroup:0_start_0 + * Resource action: g1:0 start on rhel7-auto5 + * Resource action: g1:0 monitor=10000 on rhel7-auto5 + * Resource action: g2:0 start on rhel7-auto5 + * Resource action: g2:0 monitor=10000 on rhel7-auto5 + * Pseudo action: mygroup:1_start_0 + * Resource action: g1:1 start on rhel7-auto5 + * Resource action: g1:1 monitor=10000 on rhel7-auto5 + * Resource action: g2:1 start on rhel7-auto5 + * Resource action: g2:1 monitor=10000 on rhel7-auto5 + * Pseudo action: mygroup:0_running_0 + * Pseudo action: mygroup:1_running_0 + * Pseudo action: mygroup-clone_running_0 + +Revised cluster status: +Online: [ rhel7-auto5 ] +OFFLINE: [ rhel7-auto4 ] + + fake (ocf::heartbeat:Dummy): Started rhel7-auto5 + Clone Set: replicated-clone [replicated] (unique) + replicated:0 (ocf::heartbeat:Dummy): Started rhel7-auto5 + replicated:1 (ocf::heartbeat:Dummy): Started rhel7-auto5 + Resource Group: group_is_container + s1 (ocf::heartbeat:Dummy): Started rhel7-auto5 + s2 (ocf::heartbeat:Dummy): Started rhel7-auto5 + Clone Set: mygroup-clone [mygroup] (unique) + Resource Group: mygroup:0 + g1:0 (ocf::heartbeat:Dummy): Started rhel7-auto5 + g2:0 (ocf::heartbeat:Dummy): Started rhel7-auto5 + Resource Group: mygroup:1 + g1:1 (ocf::heartbeat:Dummy): Started rhel7-auto5 + g2:1 (ocf::heartbeat:Dummy): Started rhel7-auto5 + Resource Group: group_of_containers + iso_mem1 (ocf::heartbeat:Dummy): Started rhel7-auto5 + iso_mem2 (ocf::heartbeat:Dummy): Started rhel7-auto5 + diff --git a/pengine/test10/isolation-restart-all.xml b/pengine/test10/isolation-restart-all.xml new file mode 100644 index 00000000000..124f524f58e --- /dev/null +++ b/pengine/test10/isolation-restart-all.xml @@ -0,0 +1,184 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + From 2244aafa3798d30be32c2187f70506d9246bccb3 Mon Sep 17 00:00:00 2001 From: David Vossel Date: Mon, 23 Mar 2015 17:27:23 -0400 Subject: [PATCH 07/15] High: pengine: disable migrations for resources with isolation containers --- lib/pengine/complex.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/lib/pengine/complex.c b/lib/pengine/complex.c index 9c876959d75..fe03d447b24 100644 --- a/lib/pengine/complex.c +++ b/lib/pengine/complex.c @@ -502,9 +502,14 @@ common_unpack(xmlNode * xml_obj, resource_t ** rsc, if (top->isolation_wrapper == NULL && (value == NULL || crm_is_true(value))) { if (g_hash_table_lookup((*rsc)->meta, "pcmk_docker_image")) { (*rsc)->isolation_wrapper = "docker-wrapper"; + clear_bit((*rsc)->flags, pe_rsc_allow_migrate); } /* add more isolation technologies here as we expand */ } + if (top->isolation_wrapper) { + /* never allow resources with an isolation wrapper migrate */ + clear_bit((*rsc)->flags, pe_rsc_allow_migrate); + } value = g_hash_table_lookup((*rsc)->meta, XML_RSC_ATTR_UNIQUE); if (crm_is_true(value) || top->variant < pe_clone || (*rsc)->isolation_wrapper) { From f2e957ba710a4119dbe5a9a426eb424342b93788 Mon Sep 17 00:00:00 2001 From: David Vossel Date: Mon, 23 Mar 2015 17:27:50 -0400 Subject: [PATCH 08/15] Low: spec: add docker-wrapper directory to spec file --- pacemaker.spec.in | 1 + 1 file changed, 1 insertion(+) diff --git a/pacemaker.spec.in b/pacemaker.spec.in index 597fb3ae3a7..267baf1d856 100644 --- a/pacemaker.spec.in +++ b/pacemaker.spec.in @@ -444,6 +444,7 @@ exit 0 %dir /usr/lib/ocf %dir /usr/lib/ocf/resource.d /usr/lib/ocf/resource.d/pacemaker +/usr/lib/ocf/resource.d/containers %if "%{?cs_version}" != "UNKNOWN" %if 0%{?cs_version} < 2 From 757f110d4dbdb4a3ce372ae38d6d311c9a30246e Mon Sep 17 00:00:00 2001 From: David Vossel Date: Mon, 23 Mar 2015 18:37:47 -0400 Subject: [PATCH 09/15] Low: lrmd: properly handle poke requests in lrmd client when using ipc --- lib/lrmd/lrmd_client.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/lib/lrmd/lrmd_client.c b/lib/lrmd/lrmd_client.c index eace0dd5f85..bf45822652b 100644 --- a/lib/lrmd/lrmd_client.c +++ b/lib/lrmd/lrmd_client.c @@ -868,13 +868,14 @@ static int lrmd_api_poke_connection(lrmd_t * lrmd) { int rc; + lrmd_private_t *native = lrmd->private; xmlNode *data = create_xml_node(NULL, F_LRMD_RSC); crm_xml_add(data, F_LRMD_ORIGIN, __FUNCTION__); - rc = lrmd_send_command(lrmd, LRMD_OP_POKE, data, NULL, 0, 0, FALSE); + rc = lrmd_send_command(lrmd, LRMD_OP_POKE, data, NULL, 0, 0, native->type == CRM_CLIENT_IPC ? TRUE : FALSE); free_xml(data); - return rc; + return rc < 0 ? rc : pcmk_ok; } static int From 23ce0371879c51200a11c90e629359114ee8b7e3 Mon Sep 17 00:00:00 2001 From: David Vossel Date: Mon, 23 Mar 2015 18:38:20 -0400 Subject: [PATCH 10/15] Low: enable logging in remote_ctl.c --- lrmd/remote_ctl.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/lrmd/remote_ctl.c b/lrmd/remote_ctl.c index d2ab9eb4eb1..c9fbe93e287 100644 --- a/lrmd/remote_ctl.c +++ b/lrmd/remote_ctl.c @@ -86,11 +86,6 @@ client_exit(int rc) exit(rc); } -#define print_result(result) \ - if (!options.quiet) { \ - result; \ - } \ - static void client_shutdown(int nsig) { @@ -116,7 +111,7 @@ read_events(lrmd_event_data_t * event) static gboolean timeout_err(gpointer data) { - print_result(printf("timed out in remote_client\n")); + crm_err("timed out in remote_client\n"); client_exit(PCMK_OCF_TIMEOUT); return FALSE; @@ -159,7 +154,7 @@ try_connect(void) sleep(1); } - print_result(printf("Failed to connect to pacemaker remote.\n")); + crm_err("Failed to connect to pacemaker remote.\n"); client_exit(PCMK_OCF_UNKNOWN_ERROR); } @@ -208,7 +203,7 @@ client_start(gpointer user_data) options.class, options.provider, options.type, 0); if (rc != 0){ - print_result(printf("failed to register resource %s with pacemaker_remote. rc: %d\n", options.rsc_id, rc)); + crm_err("failed to register resource %s with pacemaker_remote. rc: %d\n", options.rsc_id, rc); client_exit(1); } } @@ -225,7 +220,7 @@ client_start(gpointer user_data) if (rc > 0) { exec_call_id = rc; } else { - print_result(printf("execution of rsc %s failed. rc = %d\n", options.rsc_id, rc)); + crm_err("execution of rsc %s failed. rc = %d\n", options.rsc_id, rc); client_exit(PCMK_OCF_UNKNOWN_ERROR); } } @@ -322,11 +317,12 @@ main(int argc, char **argv) if (optind > argc) { ++argerr; } + crm_log_init("remote_client", LOG_INFO, TRUE, options.verbose ? TRUE : FALSE, argc, argv, FALSE); /* if we can't perform an api_call or listen for events, * there is nothing to do */ if (!options.api_call ) { - print_result(printf("Nothing to be done. Please specify 'api-call'\n")); + crm_err("Nothing to be done. Please specify 'api-call'\n"); return PCMK_OCF_UNKNOWN_ERROR; } From 338311455b6404a14fbb517283ec94e176e9e26b Mon Sep 17 00:00:00 2001 From: David Vossel Date: Tue, 24 Mar 2015 17:36:25 -0400 Subject: [PATCH 11/15] High: lrmd: enable ipc proxy for docker-wrapper privileged mode --- crmd/lrm_state.c | 77 +------------- extra/resources/docker-wrapper | 4 +- include/crm_internal.h | 18 ++++ lib/lrmd/Makefile.am | 2 +- lib/lrmd/proxy_common.c | 100 ++++++++++++++++++ lrmd/remote_ctl.c | 178 +++++++++++++++++++++++++++++++++ 6 files changed, 300 insertions(+), 79 deletions(-) create mode 100644 lib/lrmd/proxy_common.c diff --git a/crmd/lrm_state.c b/crmd/lrm_state.c index efd061cb53b..31ca7ee7d1c 100644 --- a/crmd/lrm_state.c +++ b/crmd/lrm_state.c @@ -27,22 +27,10 @@ #include GHashTable *lrm_state_table = NULL; -GHashTable *proxy_table = NULL; +extern GHashTable *proxy_table; int lrmd_internal_proxy_send(lrmd_t * lrmd, xmlNode *msg); void lrmd_internal_set_proxy_callback(lrmd_t * lrmd, void *userdata, void (*callback)(lrmd_t *lrmd, void *userdata, xmlNode *msg)); -typedef struct remote_proxy_s { - char *node_name; - char *session_id; - - gboolean is_local; - - crm_ipc_t *ipc; - mainloop_io_t *source; - uint32_t last_request_id; - -} remote_proxy_t; - static void history_cache_destroy(gpointer data) { @@ -218,32 +206,6 @@ lrm_state_reset_tables(lrm_state_t * lrm_state) } } -static void -remote_proxy_end_session(const char *session) -{ - remote_proxy_t *proxy = g_hash_table_lookup(proxy_table, session); - - if (proxy == NULL) { - return; - } - crm_trace("ending session ID %s", proxy->session_id); - - if (proxy->source) { - mainloop_del_ipc_client(proxy->source); - } -} - -static void -remote_proxy_free(gpointer data) -{ - remote_proxy_t *proxy = data; - - crm_trace("freed proxy session ID %s", proxy->session_id); - free(proxy->node_name); - free(proxy->session_id); - free(proxy); -} - gboolean lrm_state_init_local(void) { @@ -359,43 +321,6 @@ lrm_state_ipc_connect(lrm_state_t * lrm_state) return ret; } -static void -remote_proxy_notify_destroy(lrmd_t *lrmd, const char *session_id) -{ - /* sending to the remote node that an ipc connection has been destroyed */ - xmlNode *msg = create_xml_node(NULL, T_LRMD_IPC_PROXY); - crm_xml_add(msg, F_LRMD_IPC_OP, "destroy"); - crm_xml_add(msg, F_LRMD_IPC_SESSION, session_id); - lrmd_internal_proxy_send(lrmd, msg); - free_xml(msg); -} - -static void -remote_proxy_relay_event(lrmd_t *lrmd, const char *session_id, xmlNode *msg) -{ - /* sending to the remote node an event msg. */ - xmlNode *event = create_xml_node(NULL, T_LRMD_IPC_PROXY); - crm_xml_add(event, F_LRMD_IPC_OP, "event"); - crm_xml_add(event, F_LRMD_IPC_SESSION, session_id); - add_message_xml(event, F_LRMD_IPC_MSG, msg); - crm_log_xml_explicit(event, "EventForProxy"); - lrmd_internal_proxy_send(lrmd, event); - free_xml(event); -} - -static void -remote_proxy_relay_response(lrmd_t *lrmd, const char *session_id, xmlNode *msg, int msg_id) -{ - /* sending to the remote node a response msg. */ - xmlNode *response = create_xml_node(NULL, T_LRMD_IPC_PROXY); - crm_xml_add(response, F_LRMD_IPC_OP, "response"); - crm_xml_add(response, F_LRMD_IPC_SESSION, session_id); - crm_xml_add_int(response, F_LRMD_IPC_MSG_ID, msg_id); - add_message_xml(response, F_LRMD_IPC_MSG, msg); - lrmd_internal_proxy_send(lrmd, response); - free_xml(response); -} - static int remote_proxy_dispatch_internal(const char *buffer, ssize_t length, gpointer userdata) { diff --git a/extra/resources/docker-wrapper b/extra/resources/docker-wrapper index c67022ff2be..5dc5ba8a452 100755 --- a/extra/resources/docker-wrapper +++ b/extra/resources/docker-wrapper @@ -258,7 +258,7 @@ client_action() if [ -z "$PORT" ]; then get_active_port fi - ocf_log debug "$CLIENT -c \"exec\" -S \"127.0.0.1\" -p $PORT -a $action -r \"$OCF_RESOURCE_INSTANCE\" $agent_type $KEY_VAL_STR >/dev/null 2>&1" + ocf_log debug "$CLIENT -c \"exec\" -S \"127.0.0.1\" -p $PORT -a $action -r \"$OCF_RESOURCE_INSTANCE\" -n $CONTAINER $agent_type $KEY_VAL_STR >/dev/null 2>&1" $CLIENT -c "exec" -S "127.0.0.1" -p $PORT -a $action -r "$OCF_RESOURCE_INSTANCE" $agent_type $KEY_VAL_STR >/dev/null 2>&1 else echo "$CLIENT -c \"exec\" -a $action -r \"$OCF_RESOURCE_INSTANCE\" $agent_type $KEY_VAL_STR >/dev/null 2>&1" | nsenter --target $(docker inspect --format {{.State.Pid}} ${CONTAINER}) --mount --uts --ipc --net --pid 2>&1 @@ -277,7 +277,7 @@ poke_remote() if ocf_is_true $OCF_RESKEY_pcmk_docker_privileged ; then get_active_port ocf_log debug "Attempting to contect $CONTAINER on port $PORT" - $CLIENT -c "poke" -S "127.0.0.1" -p $PORT >/dev/null 2>&1 + $CLIENT -c "poke" -S "127.0.0.1" -p $PORT -n $CONTAINER >/dev/null 2>&1 fi # no op for non privileged containers since we handed the # client monitor action as the monitor_cmd for the docker agent diff --git a/include/crm_internal.h b/include/crm_internal.h index ed07dc96ddf..ca4d416fc97 100644 --- a/include/crm_internal.h +++ b/include/crm_internal.h @@ -366,4 +366,22 @@ gboolean crm_digest_verify(xmlNode *input, const char *expected); /* cross-platform compatibility functions */ char *crm_compat_realpath(const char *path); +/* IPC Proxy Backend Shared Functions */ +typedef struct remote_proxy_s { + char *node_name; + char *session_id; + + gboolean is_local; + + crm_ipc_t *ipc; + mainloop_io_t *source; + uint32_t last_request_id; + +} remote_proxy_t; +void remote_proxy_notify_destroy(lrmd_t *lrmd, const char *session_id); +void remote_proxy_relay_event(lrmd_t *lrmd, const char *session_id, xmlNode *msg); +void remote_proxy_relay_response(lrmd_t *lrmd, const char *session_id, xmlNode *msg, int msg_id); +void remote_proxy_end_session(const char *session); +void remote_proxy_free(gpointer data); + #endif /* CRM_INTERNAL__H */ diff --git a/lib/lrmd/Makefile.am b/lib/lrmd/Makefile.am index bd62676efec..107598dbdcb 100644 --- a/lib/lrmd/Makefile.am +++ b/lib/lrmd/Makefile.am @@ -24,7 +24,7 @@ AM_CPPFLAGS = -I$(top_builddir)/include -I$(top_srcdir)/include \ lib_LTLIBRARIES = liblrmd.la -liblrmd_la_SOURCES = lrmd_client.c +liblrmd_la_SOURCES = lrmd_client.c proxy_common.c liblrmd_la_LDFLAGS = -version-info 2:2:1 liblrmd_la_LIBADD = $(top_builddir)/lib/common/libcrmcommon.la \ $(top_builddir)/lib/services/libcrmservice.la \ diff --git a/lib/lrmd/proxy_common.c b/lib/lrmd/proxy_common.c new file mode 100644 index 00000000000..3026227eed6 --- /dev/null +++ b/lib/lrmd/proxy_common.c @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2015 David Vossel + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include + +#include +#include + +#include +#include +#include +#include + +#include +#include +#include + +int lrmd_internal_proxy_send(lrmd_t * lrmd, xmlNode *msg); +GHashTable *proxy_table = NULL; + +void +remote_proxy_notify_destroy(lrmd_t *lrmd, const char *session_id) +{ + /* sending to the remote node that an ipc connection has been destroyed */ + xmlNode *msg = create_xml_node(NULL, T_LRMD_IPC_PROXY); + crm_xml_add(msg, F_LRMD_IPC_OP, "destroy"); + crm_xml_add(msg, F_LRMD_IPC_SESSION, session_id); + lrmd_internal_proxy_send(lrmd, msg); + free_xml(msg); +} + +void +remote_proxy_relay_event(lrmd_t *lrmd, const char *session_id, xmlNode *msg) +{ + /* sending to the remote node an event msg. */ + xmlNode *event = create_xml_node(NULL, T_LRMD_IPC_PROXY); + crm_xml_add(event, F_LRMD_IPC_OP, "event"); + crm_xml_add(event, F_LRMD_IPC_SESSION, session_id); + add_message_xml(event, F_LRMD_IPC_MSG, msg); + crm_log_xml_explicit(event, "EventForProxy"); + lrmd_internal_proxy_send(lrmd, event); + free_xml(event); +} + +void +remote_proxy_relay_response(lrmd_t *lrmd, const char *session_id, xmlNode *msg, int msg_id) +{ + /* sending to the remote node a response msg. */ + xmlNode *response = create_xml_node(NULL, T_LRMD_IPC_PROXY); + crm_xml_add(response, F_LRMD_IPC_OP, "response"); + crm_xml_add(response, F_LRMD_IPC_SESSION, session_id); + crm_xml_add_int(response, F_LRMD_IPC_MSG_ID, msg_id); + add_message_xml(response, F_LRMD_IPC_MSG, msg); + lrmd_internal_proxy_send(lrmd, response); + free_xml(response); +} + +void +remote_proxy_end_session(const char *session) +{ + remote_proxy_t *proxy = g_hash_table_lookup(proxy_table, session); + + if (proxy == NULL) { + return; + } + crm_trace("ending session ID %s", proxy->session_id); + + if (proxy->source) { + mainloop_del_ipc_client(proxy->source); + } +} + +void +remote_proxy_free(gpointer data) +{ + remote_proxy_t *proxy = data; + + crm_trace("freed proxy session ID %s", proxy->session_id); + free(proxy->node_name); + free(proxy->session_id); + free(proxy); +} + + diff --git a/lrmd/remote_ctl.c b/lrmd/remote_ctl.c index c9fbe93e287..db8d8048305 100644 --- a/lrmd/remote_ctl.c +++ b/lrmd/remote_ctl.c @@ -23,6 +23,7 @@ #include #include +#include #include #include @@ -30,6 +31,9 @@ #include #include +extern GHashTable *proxy_table; +void lrmd_internal_set_proxy_callback(lrmd_t * lrmd, void *userdata, void (*callback)(lrmd_t *lrmd, void *userdata, xmlNode *msg)); + /* *INDENT-OFF* */ static struct crm_option long_options[] = { {"help", 0, 0, '?'}, @@ -37,6 +41,7 @@ static struct crm_option long_options[] = { {"quiet", 0, 0, 'Q', "\t\tSuppress all output to screen"}, {"tls", 1, 0, 'S', "\t\tSet tls host to contact"}, {"tls-port", 1, 0, 'p', "\t\tUse custom tls port"}, + {"node", 1, 0, 'n', "\tNode name to use for ipc proxy"}, {"api-call", 1, 0, 'c', "\tDirectly relates to lrmd api functions"}, {"-spacer-", 1, 0, '-', "\nParameters for api-call option"}, {"action", 1, 0, 'a'}, @@ -65,6 +70,7 @@ static struct { int interval; int timeout; int port; + const char *node_name; const char *api_call; const char *rsc_id; const char *provider; @@ -83,6 +89,9 @@ static void client_exit(int rc) { lrmd_api_delete(lrmd_conn); + if (proxy_table) { + g_hash_table_destroy(proxy_table); proxy_table = NULL; + } exit(rc); } @@ -171,6 +180,11 @@ client_start(gpointer user_data) lrmd_conn->cmds->set_callback(lrmd_conn, read_events); + + if (safe_str_eq(options.api_call, "ipc_debug")) { + /* Do nothing, leave connection up just for debugging ipc proxy */ + return 0; + } if (options.timeout) { g_timeout_add(options.timeout, timeout_err, NULL); } @@ -228,6 +242,159 @@ client_start(gpointer user_data) return 0; } +static int +remote_proxy_dispatch_internal(const char *buffer, ssize_t length, gpointer userdata) +{ + /* Async responses from cib and friends back to clients via pacemaker_remoted */ + xmlNode *xml = NULL; + remote_proxy_t *proxy = userdata; + uint32_t flags; + + xml = string2xml(buffer); + if (xml == NULL) { + crm_warn("Received a NULL msg from IPC service."); + return 1; + } + + flags = crm_ipc_buffer_flags(proxy->ipc); + if (flags & crm_ipc_proxied_relay_response) { + crm_trace("Passing response back to %.8s on %s: %.200s - request id: %d", proxy->session_id, proxy->node_name, buffer, proxy->last_request_id); + remote_proxy_relay_response(lrmd_conn, proxy->session_id, xml, proxy->last_request_id); + proxy->last_request_id = 0; + + } else { + crm_trace("Passing event back to %.8s on %s: %.200s", proxy->session_id, proxy->node_name, buffer); + remote_proxy_relay_event(lrmd_conn, proxy->session_id, xml); + } + free_xml(xml); + return 1; +} + +static void +remote_proxy_disconnected(void *userdata) +{ + remote_proxy_t *proxy = userdata; + + crm_trace("destroying %p", userdata); + + proxy->source = NULL; + proxy->ipc = NULL; + + remote_proxy_notify_destroy(lrmd_conn, proxy->session_id); + g_hash_table_remove(proxy_table, proxy->session_id); +} + +static remote_proxy_t * +remote_proxy_new(const char *node_name, const char *session_id, const char *channel) +{ + static struct ipc_client_callbacks proxy_callbacks = { + .dispatch = remote_proxy_dispatch_internal, + .destroy = remote_proxy_disconnected + }; + remote_proxy_t *proxy = calloc(1, sizeof(remote_proxy_t)); + + proxy->node_name = strdup(node_name); + proxy->session_id = strdup(session_id); + + if (safe_str_eq(channel, CRM_SYSTEM_CRMD)) { + proxy->is_local = TRUE; + } else { + proxy->source = mainloop_add_ipc_client(channel, G_PRIORITY_LOW, 0, proxy, &proxy_callbacks); + proxy->ipc = mainloop_get_ipc_client(proxy->source); + + if (proxy->source == NULL) { + remote_proxy_free(proxy); + return NULL; + } + } + + crm_trace("created proxy session ID %s", proxy->session_id); + g_hash_table_insert(proxy_table, proxy->session_id, proxy); + + return proxy; +} + +static void +remote_proxy_cb(lrmd_t *lrmd, void *userdata, xmlNode *msg) +{ + const char *op = crm_element_value(msg, F_LRMD_IPC_OP); + const char *session = crm_element_value(msg, F_LRMD_IPC_SESSION); + int msg_id = 0; + + /* sessions are raw ipc connections to IPC, + * all we do is proxy requests/responses exactly + * like they are given to us at the ipc level. */ + + CRM_CHECK(op != NULL, return); + CRM_CHECK(session != NULL, return); + + crm_element_value_int(msg, F_LRMD_IPC_MSG_ID, &msg_id); + + /* This is msg from remote ipc client going to real ipc server */ + if (safe_str_eq(op, "new")) { + const char *channel = crm_element_value(msg, F_LRMD_IPC_IPC_SERVER); + + CRM_CHECK(channel != NULL, return); + + if (remote_proxy_new(options.node_name, session, channel) == NULL) { + remote_proxy_notify_destroy(lrmd, session); + } + crm_info("new remote proxy client established to %s, session id %s", channel, session); + } else if (safe_str_eq(op, "destroy")) { + remote_proxy_end_session(session); + + } else if (safe_str_eq(op, "request")) { + int flags = 0; + xmlNode *request = get_message_xml(msg, F_LRMD_IPC_MSG); + const char *name = crm_element_value(msg, F_LRMD_IPC_CLIENT); + remote_proxy_t *proxy = g_hash_table_lookup(proxy_table, session); + + CRM_CHECK(request != NULL, return); + + if (proxy == NULL) { + /* proxy connection no longer exists */ + remote_proxy_notify_destroy(lrmd, session); + return; + } else if ((proxy->is_local == FALSE) && (crm_ipc_connected(proxy->ipc) == FALSE)) { + remote_proxy_end_session(session); + return; + } + proxy->last_request_id = 0; + crm_element_value_int(msg, F_LRMD_IPC_MSG_FLAGS, &flags); + crm_xml_add(request, XML_ACL_TAG_ROLE, "pacemaker-remote"); + +#if ENABLE_ACL + CRM_ASSERT(options.node_name); + crm_acl_get_set_user(request, F_LRMD_IPC_USER, options.node_name); +#endif + + if (is_set(flags, crm_ipc_proxied)) { + int rc = crm_ipc_send(proxy->ipc, request, flags, 5000, NULL); + + if(rc < 0) { + xmlNode *op_reply = create_xml_node(NULL, "nack"); + + crm_err("Could not relay %s request %d from %s to %s for %s: %s (%d)", + op, msg_id, proxy->node_name, crm_ipc_name(proxy->ipc), name, pcmk_strerror(rc), rc); + + /* Send a n'ack so the caller doesn't block */ + crm_xml_add(op_reply, "function", __FUNCTION__); + crm_xml_add_int(op_reply, "line", __LINE__); + crm_xml_add_int(op_reply, "rc", rc); + remote_proxy_relay_response(lrmd, session, op_reply, msg_id); + free_xml(op_reply); + + } else { + crm_trace("Relayed %s request %d from %s to %s for %s", + op, msg_id, proxy->node_name, crm_ipc_name(proxy->ipc), name); + proxy->last_request_id = msg_id; + } + } + } else { + crm_err("Unknown proxy operation: %s", op); + } +} + int main(int argc, char **argv) { @@ -258,6 +425,9 @@ main(int argc, char **argv) options.quiet = 1; options.verbose = 0; break; + case 'n': + options.node_name = optarg; + break; case 'c': options.api_call = optarg; break; @@ -329,8 +499,16 @@ main(int argc, char **argv) if (!options.timeout ) { options.timeout = 20000; } + if (use_tls) { + if (options.node_name == NULL) { + crm_err("\"node\" option required when tls is in use.\n"); + return PCMK_OCF_UNKNOWN_ERROR; + } + proxy_table = + g_hash_table_new_full(crm_strcase_hash, crm_strcase_equal, NULL, remote_proxy_free); lrmd_conn = lrmd_remote_api_new(NULL, options.tls_host ? options.tls_host : "localhost", options.port); + lrmd_internal_set_proxy_callback(lrmd_conn, NULL, remote_proxy_cb); } else { lrmd_conn = lrmd_api_new(); } From 83c3a49aeb53f997425b4f0f576b9ec4405a5151 Mon Sep 17 00:00:00 2001 From: David Vossel Date: Tue, 24 Mar 2015 17:54:16 -0400 Subject: [PATCH 12/15] Low: rename pacemaker_remote_ctl to lrmd_interal_ctl --- extra/resources/docker-wrapper | 2 +- lrmd/Makefile.am | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/extra/resources/docker-wrapper b/extra/resources/docker-wrapper index 5dc5ba8a452..11fa4d83d9e 100755 --- a/extra/resources/docker-wrapper +++ b/extra/resources/docker-wrapper @@ -95,7 +95,7 @@ END ####################################################################### -CLIENT="/usr/libexec/pacemaker/pacemaker_remote_ctl" +CLIENT="/usr/libexec/pacemaker/lrmd_internal_ctl" DOCKER_AGENT="/usr/lib/ocf/resource.d/heartbeat/docker" KEY_VAL_STR="" PROVIDER=$OCF_RESKEY_CRM_meta_provider diff --git a/lrmd/Makefile.am b/lrmd/Makefile.am index 9addd1bd56c..57d7810fcfe 100644 --- a/lrmd/Makefile.am +++ b/lrmd/Makefile.am @@ -21,7 +21,7 @@ testdir = $(datadir)/$(PACKAGE)/tests/lrmd test_SCRIPTS = regression.py lrmdlibdir = $(CRM_DAEMON_DIR) -lrmdlib_PROGRAMS = lrmd lrmd_test pacemaker_remote_ctl +lrmdlib_PROGRAMS = lrmd lrmd_test lrmd_internal_ctl initdir = $(INITDIR) init_SCRIPTS = pacemaker_remote @@ -43,8 +43,8 @@ pacemaker_remoted_CFLAGS = -DSUPPORT_REMOTE pacemaker_remoted_LDADD = $(lrmd_LDADD) -pacemaker_remote_ctl_SOURCES = remote_ctl.c -pacemaker_remote_ctl_LDADD = $(top_builddir)/lib/common/libcrmcommon.la \ +lrmd_internal_ctl_SOURCES = remote_ctl.c +lrmd_internal_ctl_LDADD = $(top_builddir)/lib/common/libcrmcommon.la \ $(top_builddir)/lib/lrmd/liblrmd.la \ $(top_builddir)/lib/cib/libcib.la \ $(top_builddir)/lib/services/libcrmservice.la \ From 9726f76889f1f5a7afd2f8523ef731cb507df8cf Mon Sep 17 00:00:00 2001 From: David Vossel Date: Wed, 25 Mar 2015 12:25:23 -0400 Subject: [PATCH 13/15] Low: docker-wrapper: set authkey file permissions and properly set container 'node name' during start --- extra/resources/docker-wrapper | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/extra/resources/docker-wrapper b/extra/resources/docker-wrapper index 11fa4d83d9e..eb3520e25c3 100755 --- a/extra/resources/docker-wrapper +++ b/extra/resources/docker-wrapper @@ -258,12 +258,10 @@ client_action() if [ -z "$PORT" ]; then get_active_port fi - ocf_log debug "$CLIENT -c \"exec\" -S \"127.0.0.1\" -p $PORT -a $action -r \"$OCF_RESOURCE_INSTANCE\" -n $CONTAINER $agent_type $KEY_VAL_STR >/dev/null 2>&1" - $CLIENT -c "exec" -S "127.0.0.1" -p $PORT -a $action -r "$OCF_RESOURCE_INSTANCE" $agent_type $KEY_VAL_STR >/dev/null 2>&1 + ocf_log info "$CLIENT -c \"exec\" -S \"127.0.0.1\" -p $PORT -a $action -r \"$OCF_RESOURCE_INSTANCE\" -n $CONTAINER $agent_type $KEY_VAL_STR >/dev/null 2>&1" + $CLIENT -c "exec" -S "127.0.0.1" -p $PORT -a $action -r "$OCF_RESOURCE_INSTANCE" -n $CONTAINER $agent_type $KEY_VAL_STR >/dev/null 2>&1 else echo "$CLIENT -c \"exec\" -a $action -r \"$OCF_RESOURCE_INSTANCE\" $agent_type $KEY_VAL_STR >/dev/null 2>&1" | nsenter --target $(docker inspect --format {{.State.Pid}} ${CONTAINER}) --mount --uts --ipc --net --pid 2>&1 - - fi rc=$? @@ -276,7 +274,7 @@ poke_remote() # verifies daemon in container is active if ocf_is_true $OCF_RESKEY_pcmk_docker_privileged ; then get_active_port - ocf_log debug "Attempting to contect $CONTAINER on port $PORT" + ocf_log info "Attempting to contect $CONTAINER on port $PORT" $CLIENT -c "poke" -S "127.0.0.1" -p $PORT -n $CONTAINER >/dev/null 2>&1 fi # no op for non privileged containers since we handed the @@ -311,6 +309,7 @@ start_container() # generate an authkey if it doesn't exist. mkdir -p /etc/pacemaker/ dd if=/dev/urandom of=/etc/pacemaker/authkey bs=4096 count=1 > /dev/null 2>&1 + chmod 600 /etc/pacemaker/authkey fi PORT=$(random_port) From 48838111981b417fc5132d36eb4899dba03199d2 Mon Sep 17 00:00:00 2001 From: David Vossel Date: Wed, 25 Mar 2015 17:38:12 -0400 Subject: [PATCH 14/15] Fix: docker-wrapper: properly separate docker and resource specific attributes --- extra/resources/docker-wrapper | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/extra/resources/docker-wrapper b/extra/resources/docker-wrapper index eb3520e25c3..d4c48ea58dc 100755 --- a/extra/resources/docker-wrapper +++ b/extra/resources/docker-wrapper @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # # Copyright (c) 2015 David Vossel # All Rights Reserved. @@ -173,21 +173,21 @@ separate_args() local env key value # write out arguments to key value string for ocf agent - for item in $(printenv | grep "^OCF.*" | grep -v "^OCF_RESKEY_pcmk_docker_.*"); + while read -r line; do - key="$(echo $item | awk -F= '{print $1}')" - val="$(echo $item | awk -F= '{print $2}')" + key="$(echo $line | awk -F= '{print $1}')" + val="$(echo $line | awk -F= '{print $2}')" KEY_VAL_STR="$KEY_VAL_STR -k \"$key\" -v \"$val\"" - done + done < <(printenv | grep "^OCF.*" | grep -v "^OCF_RESKEY_pcmk_docker_.*") # sanitize args for DOCKER agent's consumption - for item in $(printenv | grep "^OCF_RESKEY_pcmk_docker_.*"); + while read -r line; do - env="$(echo $item | awk -F= '{print $1}')" - val="$(echo $item | awk -F= '{print $2}')" + env="$(echo $line | awk -F= '{print $1}')" + val="$(echo $line | awk -F= '{print $2}')" key="$(echo "$env" | sed 's/^OCF_RESKEY_pcmk_docker/OCF_RESKEY/g')" - export ${key}=$(echo $val) - done + export $key="$val" + done < <(printenv | grep "^OCF_RESKEY_pcmk_docker_.*") if ocf_is_true $OCF_RESKEY_pcmk_docker_privileged ; then export OCF_RESKEY_run_cmd="/usr/sbin/pacemaker_remoted" From 7302cd7e63774cea45f990271379b44192048af7 Mon Sep 17 00:00:00 2001 From: David Vossel Date: Wed, 25 Mar 2015 17:38:55 -0400 Subject: [PATCH 15/15] Low: lrmd: preserve exit reason string when isolation wrappers are in use --- lrmd/remote_ctl.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/lrmd/remote_ctl.c b/lrmd/remote_ctl.c index db8d8048305..32151d744f4 100644 --- a/lrmd/remote_ctl.c +++ b/lrmd/remote_ctl.c @@ -110,9 +110,11 @@ read_events(lrmd_event_data_t * event) } if ((event->call_id == exec_call_id) && (event->type == lrmd_event_exec_complete)) { if (event->output) { - printf("%s", event->output); + crm_info("%s", event->output); + } + if (event->exit_reason) { + fprintf(stderr, "%s%s\n", PCMK_OCF_REASON_PREFIX, event->exit_reason); } - client_exit(event->rc); } }