diff --git a/.gitignore b/.gitignore index 561ef6d..1e3c89c 100644 --- a/.gitignore +++ b/.gitignore @@ -1,16 +1,3 @@ -/Pacemaker-1.1.4.tar.bz2 -/Pacemaker-1.1.5.tar.bz2 -/Pacemaker-1.1.6.tar.bz2 -/ClusterLabs-pacemaker-89678d4.tar.gz -/ClusterLabs-pacemaker-bc7c125.tar.gz -/ClusterLabs-pacemaker-148fccf.tar.gz -/ClusterLabs-pacemaker-7742926.tar.gz -/ClusterLabs-pacemaker-Pacemaker-1.1.7.tar.gz -/ClusterLabs-pacemaker-b5b0a7b.tar.gz -/ClusterLabs-pacemaker-c72d970.tar.gz -/ClusterLabs-pacemaker-394e906.tar.gz -/ClusterLabs-pacemaker-70ad9fa.tar.gz -/ClusterLabs-pacemaker-781a388.tar.gz -/ClusterLabs-pacemaker-9d39a6b.tar.gz -/pacemaker-a9c81774b89f21f990be255f9862446d1a38afee.tar.gz -/pacemaker-6052cd16c2f455809f8088af76ce86483bf98353.tar.gz +/ClusterLabs-pacemaker-*.tar.gz +/pacemaker-*.tar.gz +/nagios-agents-metadata-*.tar.gz diff --git a/0004-Fix-crm_resource-Correctly-check-if-a-resource-is-un.patch b/0004-Fix-crm_resource-Correctly-check-if-a-resource-is-un.patch new file mode 100644 index 0000000..1ef6a11 --- /dev/null +++ b/0004-Fix-crm_resource-Correctly-check-if-a-resource-is-un.patch @@ -0,0 +1,82 @@ +From: Andrew Beekhof +Date: Fri, 14 Aug 2015 09:43:32 +1000 +Subject: [PATCH] Fix: crm_resource: Correctly check if a resource is unmanaged + or has a target-role + +(cherry picked from commit 3ff29dbe2cab872b452c4580736d23d1f69736fa) +--- + tools/crm_resource.c | 2 +- + tools/crm_resource_runtime.c | 31 ++++++++++++++++++------------- + 2 files changed, 19 insertions(+), 14 deletions(-) + +diff --git a/tools/crm_resource.c b/tools/crm_resource.c +index 2fce3b7..156bbea 100644 +--- a/tools/crm_resource.c ++++ b/tools/crm_resource.c +@@ -888,7 +888,7 @@ main(int argc, char **argv) + rsc = uber_parent(rsc); + } + +- crm_debug("Re-checking the state of %s on %s", rsc_id, host_uname); ++ crm_debug("Re-checking the state of %s for %s on %s", rsc->id, rsc_id, host_uname); + if(rsc) { + crmd_replies_needed = 0; + rc = cli_resource_delete(cib_conn, crmd_channel, host_uname, rsc, &data_set); +diff --git a/tools/crm_resource_runtime.c b/tools/crm_resource_runtime.c +index a270cbf..f260e19 100644 +--- a/tools/crm_resource_runtime.c ++++ b/tools/crm_resource_runtime.c +@@ -616,35 +616,40 @@ cli_resource_delete(cib_t *cib_conn, crm_ipc_t * crmd_channel, const char *host_ + void + cli_resource_check(cib_t * cib_conn, resource_t *rsc) + { +- ++ int need_nl = 0; + char *role_s = NULL; + char *managed = NULL; + resource_t *parent = uber_parent(rsc); + +- find_resource_attr(cib_conn, XML_ATTR_ID, parent->id, +- XML_TAG_META_SETS, NULL, NULL, XML_RSC_ATTR_MANAGED, &managed); ++ find_resource_attr(cib_conn, XML_NVPAIR_ATTR_VALUE, parent->id, ++ NULL, NULL, NULL, XML_RSC_ATTR_MANAGED, &managed); + +- find_resource_attr(cib_conn, XML_ATTR_ID, parent->id, +- XML_TAG_META_SETS, NULL, NULL, XML_RSC_ATTR_TARGET_ROLE, &role_s); ++ find_resource_attr(cib_conn, XML_NVPAIR_ATTR_VALUE, parent->id, ++ NULL, NULL, NULL, XML_RSC_ATTR_TARGET_ROLE, &role_s); + +- if(managed == NULL) { +- managed = strdup("1"); +- } +- if(crm_is_true(managed) == FALSE) { +- printf("\n\t*Resource %s is configured to not be managed by the cluster\n", parent->id); +- } + if(role_s) { + enum rsc_role_e role = text2role(role_s); + if(role == RSC_ROLE_UNKNOWN) { + // Treated as if unset + + } else if(role == RSC_ROLE_STOPPED) { +- printf("\n\t* The configuration specifies that '%s' should remain stopped\n", parent->id); ++ printf("\n * The configuration specifies that '%s' should remain stopped\n", parent->id); ++ need_nl++; + + } else if(parent->variant > pe_clone && role != RSC_ROLE_MASTER) { +- printf("\n\t* The configuration specifies that '%s' should not be promoted\n", parent->id); ++ printf("\n * The configuration specifies that '%s' should not be promoted\n", parent->id); ++ need_nl++; + } + } ++ ++ if(managed && crm_is_true(managed) == FALSE) { ++ printf("%s * The configuration prevents the cluster from stopping or starting '%s' (unmanaged)\n", need_nl == 0?"\n":"", parent->id); ++ need_nl++; ++ } ++ ++ if(need_nl) { ++ printf("\n"); ++ } + } + + int diff --git a/0005-Fix-PE-Bug-cl-5247-Imply-resources-running-on-a-cont.patch b/0005-Fix-PE-Bug-cl-5247-Imply-resources-running-on-a-cont.patch new file mode 100644 index 0000000..cf19707 --- /dev/null +++ b/0005-Fix-PE-Bug-cl-5247-Imply-resources-running-on-a-cont.patch @@ -0,0 +1,328 @@ +From: Andrew Beekhof +Date: Tue, 18 Aug 2015 10:30:49 +1000 +Subject: [PATCH] Fix: PE: Bug cl#5247 - Imply resources running on a container + are stopped when the container is stopped + +(cherry picked from commit e10eff1902d5b451454e2d467ee337c964f536ab) +--- + lib/pengine/unpack.c | 29 ++++++++++++++++++++--------- + pengine/allocate.c | 17 +++++++++++++++++ + pengine/graph.c | 7 ++++++- + pengine/test10/bug-rh-1097457.dot | 2 ++ + pengine/test10/bug-rh-1097457.exp | 12 ++++++++++-- + pengine/test10/bug-rh-1097457.summary | 10 +++++----- + pengine/test10/whitebox-fail1.dot | 1 + + pengine/test10/whitebox-fail1.exp | 6 +++++- + pengine/test10/whitebox-fail1.summary | 8 ++++---- + pengine/test10/whitebox-fail2.dot | 1 + + pengine/test10/whitebox-fail2.exp | 6 +++++- + pengine/test10/whitebox-fail2.summary | 8 ++++---- + 12 files changed, 80 insertions(+), 27 deletions(-) + +diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c +index 106c674..0f83be4 100644 +--- a/lib/pengine/unpack.c ++++ b/lib/pengine/unpack.c +@@ -44,7 +44,7 @@ CRM_TRACE_INIT_DATA(pe_status); + + gboolean unpack_rsc_op(resource_t * rsc, node_t * node, xmlNode * xml_op, + enum action_fail_response *failed, pe_working_set_t * data_set); +-static gboolean determine_remote_online_status(node_t * this_node); ++static gboolean determine_remote_online_status(pe_working_set_t * data_set, node_t * this_node); + + static gboolean + is_dangling_container_remote_node(node_t *node) +@@ -73,6 +73,8 @@ pe_fence_node(pe_working_set_t * data_set, node_t * node, const char *reason) + if (is_set(rsc->flags, pe_rsc_failed) == FALSE) { + crm_warn("Remote node %s will be fenced by recovering container resource %s", + node->details->uname, rsc->id, reason); ++ /* node->details->unclean = TRUE; */ ++ node->details->remote_requires_reset = TRUE; + set_bit(rsc->flags, pe_rsc_failed); + } + } else if (is_dangling_container_remote_node(node)) { +@@ -1157,7 +1159,7 @@ unpack_remote_status(xmlNode * status, pe_working_set_t * data_set) + if ((this_node == NULL) || (is_remote_node(this_node) == FALSE)) { + continue; + } +- determine_remote_online_status(this_node); ++ determine_remote_online_status(data_set, this_node); + } + + /* process attributes */ +@@ -1366,7 +1368,7 @@ determine_online_status_fencing(pe_working_set_t * data_set, xmlNode * node_stat + } + + static gboolean +-determine_remote_online_status(node_t * this_node) ++determine_remote_online_status(pe_working_set_t * data_set, node_t * this_node) + { + resource_t *rsc = this_node->details->remote_rsc; + resource_t *container = NULL; +@@ -1393,13 +1395,21 @@ determine_remote_online_status(node_t * this_node) + } + + /* Now check all the failure conditions. */ +- if (is_set(rsc->flags, pe_rsc_failed) || +- (rsc->role == RSC_ROLE_STOPPED) || +- (container && is_set(container->flags, pe_rsc_failed)) || +- (container && container->role == RSC_ROLE_STOPPED)) { ++ if(container && is_set(container->flags, pe_rsc_failed)) { ++ crm_trace("Remote node %s is set to UNCLEAN. rsc failed.", this_node->details->id); ++ this_node->details->online = FALSE; ++ this_node->details->remote_requires_reset = TRUE; + +- crm_trace("Remote node %s is set to OFFLINE. node is stopped or rsc failed.", this_node->details->id); ++ } else if(is_set(rsc->flags, pe_rsc_failed)) { ++ crm_trace("Remote node %s is set to OFFLINE. rsc failed.", this_node->details->id); + this_node->details->online = FALSE; ++ ++ } else if (rsc->role == RSC_ROLE_STOPPED ++ || (container && container->role == RSC_ROLE_STOPPED)) { ++ ++ crm_trace("Remote node %s is set to OFFLINE. node is stopped.", this_node->details->id); ++ this_node->details->online = FALSE; ++ this_node->details->remote_requires_reset = FALSE; + } + + remote_online_done: +@@ -3375,7 +3385,8 @@ find_operations(const char *rsc, const char *node, gboolean active_filter, + continue; + + } else if (is_remote_node(this_node)) { +- determine_remote_online_status(this_node); ++ determine_remote_online_status(data_set, this_node); ++ + } else { + determine_online_status(node_state, this_node, data_set); + } +diff --git a/pengine/allocate.c b/pengine/allocate.c +index c2e56f9..65ae05d 100644 +--- a/pengine/allocate.c ++++ b/pengine/allocate.c +@@ -1406,6 +1406,23 @@ stage6(pe_working_set_t * data_set) + + /* remote-nodes associated with a container resource (such as a vm) are not fenced */ + if (is_container_remote_node(node)) { ++ /* Guest */ ++ if (need_stonith ++ && node->details->remote_requires_reset ++ && pe_can_fence(data_set, node)) { ++ resource_t *container = node->details->remote_rsc->container; ++ char *key = stop_key(container); ++ GListPtr stop_list = find_actions(container->actions, key, NULL); ++ ++ crm_info("Impliying node %s is down when container %s is stopped (%p)", ++ node->details->uname, container->id, stop_list); ++ if(stop_list) { ++ stonith_constraints(node, stop_list->data, data_set); ++ } ++ ++ g_list_free(stop_list); ++ free(key); ++ } + continue; + } + +diff --git a/pengine/graph.c b/pengine/graph.c +index 3d832f0..a50f15b 100644 +--- a/pengine/graph.c ++++ b/pengine/graph.c +@@ -697,7 +697,12 @@ stonith_constraints(node_t * node, action_t * stonith_op, pe_working_set_t * dat + for (lpc = data_set->resources; lpc != NULL; lpc = lpc->next) { + resource_t *rsc = (resource_t *) lpc->data; + +- rsc_stonith_ordering(rsc, stonith_op, data_set); ++ if(stonith_op->rsc == NULL) { ++ rsc_stonith_ordering(rsc, stonith_op, data_set); ++ ++ } else if(stonith_op->rsc != rsc && stonith_op->rsc != rsc->container) { ++ rsc_stonith_ordering(rsc, stonith_op, data_set); ++ } + } + } + +diff --git a/pengine/test10/bug-rh-1097457.dot b/pengine/test10/bug-rh-1097457.dot +index 666099c..078d177 100644 +--- a/pengine/test10/bug-rh-1097457.dot ++++ b/pengine/test10/bug-rh-1097457.dot +@@ -49,10 +49,12 @@ digraph "g" { + "VM2_start_0 lama3" [ style=bold color="green" fontcolor="black"] + "VM2_stop_0 lama3" -> "FAKE4-IP_stop_0 lamaVM2" [ style = bold] + "VM2_stop_0 lama3" -> "FAKE4_stop_0 lamaVM2" [ style = bold] ++"VM2_stop_0 lama3" -> "FAKE6-clone_stop_0" [ style = bold] + "VM2_stop_0 lama3" -> "FAKE6_stop_0 lamaVM2" [ style = bold] + "VM2_stop_0 lama3" -> "FSlun3_stop_0 lamaVM2" [ style = bold] + "VM2_stop_0 lama3" -> "VM2_start_0 lama3" [ style = bold] + "VM2_stop_0 lama3" -> "all_stopped" [ style = bold] ++"VM2_stop_0 lama3" -> "lamaVM2-G4_stop_0" [ style = bold] + "VM2_stop_0 lama3" [ style=bold color="green" fontcolor="black"] + "all_stopped" [ style=bold color="green" fontcolor="orange"] + "lamaVM2-G4_running_0" [ style=bold color="green" fontcolor="orange"] +diff --git a/pengine/test10/bug-rh-1097457.exp b/pengine/test10/bug-rh-1097457.exp +index 36af9f3..175f413 100644 +--- a/pengine/test10/bug-rh-1097457.exp ++++ b/pengine/test10/bug-rh-1097457.exp +@@ -119,7 +119,11 @@ + + + +- ++ ++ ++ ++ ++ + + + +@@ -331,7 +335,11 @@ + + + +- ++ ++ ++ ++ ++ + + + +diff --git a/pengine/test10/bug-rh-1097457.summary b/pengine/test10/bug-rh-1097457.summary +index e2f235d..c8751ae 100644 +--- a/pengine/test10/bug-rh-1097457.summary ++++ b/pengine/test10/bug-rh-1097457.summary +@@ -39,17 +39,17 @@ Transition Summary: + * Restart lamaVM2 (Started lama3) + + Executing cluster transition: +- * Pseudo action: lamaVM2-G4_stop_0 +- * Pseudo action: FAKE6-clone_stop_0 + * Resource action: lamaVM2 stop on lama3 + * Resource action: VM2 stop on lama3 ++ * Pseudo action: lamaVM2-G4_stop_0 + * Pseudo action: FAKE4-IP_stop_0 +- * Pseudo action: FAKE6_stop_0 +- * Pseudo action: FAKE6-clone_stopped_0 +- * Pseudo action: FAKE6-clone_start_0 ++ * Pseudo action: FAKE6-clone_stop_0 + * Resource action: VM2 start on lama3 + * Resource action: VM2 monitor=10000 on lama3 + * Pseudo action: FAKE4_stop_0 ++ * Pseudo action: FAKE6_stop_0 ++ * Pseudo action: FAKE6-clone_stopped_0 ++ * Pseudo action: FAKE6-clone_start_0 + * Resource action: lamaVM2 start on lama3 + * Resource action: lamaVM2 monitor=30000 on lama3 + * Resource action: FSlun3 monitor=10000 on lamaVM2 +diff --git a/pengine/test10/whitebox-fail1.dot b/pengine/test10/whitebox-fail1.dot +index b595015..0f0fe26 100644 +--- a/pengine/test10/whitebox-fail1.dot ++++ b/pengine/test10/whitebox-fail1.dot +@@ -26,6 +26,7 @@ digraph "g" { + "container1_start_0 18node2" -> "lxc1_start_0 18node2" [ style = bold] + "container1_start_0 18node2" [ style=bold color="green" fontcolor="black"] + "container1_stop_0 18node2" -> "B_stop_0 lxc1" [ style = bold] ++"container1_stop_0 18node2" -> "M-clone_stop_0" [ style = bold] + "container1_stop_0 18node2" -> "M_stop_0 lxc1" [ style = bold] + "container1_stop_0 18node2" -> "all_stopped" [ style = bold] + "container1_stop_0 18node2" -> "container1_start_0 18node2" [ style = bold] +diff --git a/pengine/test10/whitebox-fail1.exp b/pengine/test10/whitebox-fail1.exp +index 834b231..01bb142 100644 +--- a/pengine/test10/whitebox-fail1.exp ++++ b/pengine/test10/whitebox-fail1.exp +@@ -96,7 +96,11 @@ + + + +- ++ ++ ++ ++ ++ + + + +diff --git a/pengine/test10/whitebox-fail1.summary b/pengine/test10/whitebox-fail1.summary +index 5e5887b..1586407 100644 +--- a/pengine/test10/whitebox-fail1.summary ++++ b/pengine/test10/whitebox-fail1.summary +@@ -20,17 +20,17 @@ Transition Summary: + * Restart lxc1 (Started 18node2) + + Executing cluster transition: +- * Pseudo action: M-clone_stop_0 + * Resource action: lxc1 stop on 18node2 + * Resource action: container1 stop on 18node2 ++ * Pseudo action: M-clone_stop_0 ++ * Pseudo action: B_stop_0 ++ * Resource action: container1 start on 18node2 + * Pseudo action: M_stop_0 + * Pseudo action: M-clone_stopped_0 + * Pseudo action: M-clone_start_0 +- * Pseudo action: B_stop_0 +- * Pseudo action: all_stopped +- * Resource action: container1 start on 18node2 + * Resource action: lxc1 start on 18node2 + * Resource action: lxc1 monitor=30000 on 18node2 ++ * Pseudo action: all_stopped + * Resource action: M start on lxc1 + * Pseudo action: M-clone_running_0 + * Resource action: B start on lxc1 +diff --git a/pengine/test10/whitebox-fail2.dot b/pengine/test10/whitebox-fail2.dot +index b595015..0f0fe26 100644 +--- a/pengine/test10/whitebox-fail2.dot ++++ b/pengine/test10/whitebox-fail2.dot +@@ -26,6 +26,7 @@ digraph "g" { + "container1_start_0 18node2" -> "lxc1_start_0 18node2" [ style = bold] + "container1_start_0 18node2" [ style=bold color="green" fontcolor="black"] + "container1_stop_0 18node2" -> "B_stop_0 lxc1" [ style = bold] ++"container1_stop_0 18node2" -> "M-clone_stop_0" [ style = bold] + "container1_stop_0 18node2" -> "M_stop_0 lxc1" [ style = bold] + "container1_stop_0 18node2" -> "all_stopped" [ style = bold] + "container1_stop_0 18node2" -> "container1_start_0 18node2" [ style = bold] +diff --git a/pengine/test10/whitebox-fail2.exp b/pengine/test10/whitebox-fail2.exp +index 834b231..01bb142 100644 +--- a/pengine/test10/whitebox-fail2.exp ++++ b/pengine/test10/whitebox-fail2.exp +@@ -96,7 +96,11 @@ + + + +- ++ ++ ++ ++ ++ + + + +diff --git a/pengine/test10/whitebox-fail2.summary b/pengine/test10/whitebox-fail2.summary +index 338173d..ab40d99 100644 +--- a/pengine/test10/whitebox-fail2.summary ++++ b/pengine/test10/whitebox-fail2.summary +@@ -20,17 +20,17 @@ Transition Summary: + * Recover lxc1 (Started 18node2) + + Executing cluster transition: +- * Pseudo action: M-clone_stop_0 + * Resource action: lxc1 stop on 18node2 + * Resource action: container1 stop on 18node2 ++ * Pseudo action: M-clone_stop_0 ++ * Pseudo action: B_stop_0 ++ * Resource action: container1 start on 18node2 + * Pseudo action: M_stop_0 + * Pseudo action: M-clone_stopped_0 + * Pseudo action: M-clone_start_0 +- * Pseudo action: B_stop_0 +- * Pseudo action: all_stopped +- * Resource action: container1 start on 18node2 + * Resource action: lxc1 start on 18node2 + * Resource action: lxc1 monitor=30000 on 18node2 ++ * Pseudo action: all_stopped + * Resource action: M start on lxc1 + * Pseudo action: M-clone_running_0 + * Resource action: B start on lxc1 diff --git a/0006-Fix-Date-Correctly-set-time-from-seconds-since-epoch.patch b/0006-Fix-Date-Correctly-set-time-from-seconds-since-epoch.patch new file mode 100644 index 0000000..ea40f7e --- /dev/null +++ b/0006-Fix-Date-Correctly-set-time-from-seconds-since-epoch.patch @@ -0,0 +1,21 @@ +From: Andrew Beekhof +Date: Tue, 18 Aug 2015 11:06:13 +1000 +Subject: [PATCH] Fix: Date: Correctly set time from seconds-since-epoch + +(cherry picked from commit efa318114d0b2124cc82fe143403e6de502e0134) +--- + lib/common/iso8601.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/lib/common/iso8601.c b/lib/common/iso8601.c +index 769e01b..5f4a73d 100644 +--- a/lib/common/iso8601.c ++++ b/lib/common/iso8601.c +@@ -1011,6 +1011,7 @@ ha_set_tm_time(crm_time_t * target, struct tm *source) + target->days = 1 + source->tm_yday; + } + ++ target->seconds = 0; + if (source->tm_hour >= 0) { + target->seconds += 60 * 60 * source->tm_hour; + } diff --git a/0007-Test-PE-Bug-cl-5247-Imply-resources-running-on-a-con.patch b/0007-Test-PE-Bug-cl-5247-Imply-resources-running-on-a-con.patch new file mode 100644 index 0000000..74aa4b1 --- /dev/null +++ b/0007-Test-PE-Bug-cl-5247-Imply-resources-running-on-a-con.patch @@ -0,0 +1,1419 @@ +From: Andrew Beekhof +Date: Tue, 18 Aug 2015 10:31:06 +1000 +Subject: [PATCH] Test: PE: Bug cl#5247 - Imply resources running on a + container are stopped when the container is stopped + +(cherry picked from commit 825e82a5098bde0412944c7d4f54c3d825ddff08) +--- + pengine/regression.sh | 29 +- + pengine/test10/bug-cl-5247.dot | 136 +++++++ + pengine/test10/bug-cl-5247.exp | 704 +++++++++++++++++++++++++++++++++++++ + pengine/test10/bug-cl-5247.scores | 84 +++++ + pengine/test10/bug-cl-5247.summary | 96 +++++ + pengine/test10/bug-cl-5247.xml | 295 ++++++++++++++++ + 6 files changed, 1331 insertions(+), 13 deletions(-) + create mode 100644 pengine/test10/bug-cl-5247.dot + create mode 100644 pengine/test10/bug-cl-5247.exp + create mode 100644 pengine/test10/bug-cl-5247.scores + create mode 100644 pengine/test10/bug-cl-5247.summary + create mode 100644 pengine/test10/bug-cl-5247.xml + +diff --git a/pengine/regression.sh b/pengine/regression.sh +index 7f73f92..1517e3d 100755 +--- a/pengine/regression.sh ++++ b/pengine/regression.sh +@@ -31,19 +31,6 @@ info Performing the following tests from $io_dir + create_mode="false" + + echo "" +-do_test cloned_start_one "order first clone then clone... first clone_min=2" +-do_test cloned_start_two "order first clone then clone... first clone_min=2" +-do_test cloned_stop_one "order first clone then clone... first clone_min=2" +-do_test cloned_stop_two "order first clone then clone... first clone_min=2" +-do_test clone_min_interleave_start_one "order first clone then clone... first clone_min=2 and then has interleave=true" +-do_test clone_min_interleave_start_two "order first clone then clone... first clone_min=2 and then has interleave=true" +-do_test clone_min_interleave_stop_one "order first clone then clone... first clone_min=2 and then has interleave=true" +-do_test clone_min_interleave_stop_two "order first clone then clone... first clone_min=2 and then has interleave=true" +-do_test clone_min_start_one "order first clone then primitive... first clone_min=2" +-do_test clone_min_start_two "order first clone then primitive... first clone_min=2" +-do_test clone_min_stop_all "order first clone then primitive... first clone_min=2" +-do_test clone_min_stop_one "order first clone then primitive... first clone_min=2" +-do_test clone_min_stop_two "order first clone then primitive... first clone_min=2" + + do_test simple1 "Offline " + do_test simple2 "Start " +@@ -373,6 +360,21 @@ do_test clone-interleave-2 "Clone-3 must stop on pcmk-1 due to interleaved order + do_test clone-interleave-3 "Clone-3 must be recovered on pcmk-1 due to interleaved ordering (no colocation)" + + echo "" ++do_test cloned_start_one "order first clone then clone... first clone_min=2" ++do_test cloned_start_two "order first clone then clone... first clone_min=2" ++do_test cloned_stop_one "order first clone then clone... first clone_min=2" ++do_test cloned_stop_two "order first clone then clone... first clone_min=2" ++do_test clone_min_interleave_start_one "order first clone then clone... first clone_min=2 and then has interleave=true" ++do_test clone_min_interleave_start_two "order first clone then clone... first clone_min=2 and then has interleave=true" ++do_test clone_min_interleave_stop_one "order first clone then clone... first clone_min=2 and then has interleave=true" ++do_test clone_min_interleave_stop_two "order first clone then clone... first clone_min=2 and then has interleave=true" ++do_test clone_min_start_one "order first clone then primitive... first clone_min=2" ++do_test clone_min_start_two "order first clone then primitive... first clone_min=2" ++do_test clone_min_stop_all "order first clone then primitive... first clone_min=2" ++do_test clone_min_stop_one "order first clone then primitive... first clone_min=2" ++do_test clone_min_stop_two "order first clone then primitive... first clone_min=2" ++ ++echo "" + do_test unfence-startup "Clean unfencing" + do_test unfence-definition "Unfencing when the agent changes" + do_test unfence-parameters "Unfencing when the agent parameters changes" +@@ -785,6 +787,7 @@ do_test container-group-3 "Container in group - stop failed" + do_test container-group-4 "Container in group - reached migration-threshold" + do_test container-is-remote-node "Place resource within container when container is remote-node" + do_test bug-rh-1097457 "Kill user defined container/contents ordering" ++do_test bug-cl-5247 "Graph loop when recovering m/s resource in a container" + + echo "" + do_test whitebox-fail1 "Fail whitebox container rsc." +diff --git a/pengine/test10/bug-cl-5247.dot b/pengine/test10/bug-cl-5247.dot +new file mode 100644 +index 0000000..ed728ac +--- /dev/null ++++ b/pengine/test10/bug-cl-5247.dot +@@ -0,0 +1,136 @@ ++digraph "g" { ++"all_stopped" [ style=bold color="green" fontcolor="orange"] ++"grpStonith1_running_0" [ style=bold color="green" fontcolor="orange"] ++"grpStonith1_start_0" -> "grpStonith1_running_0" [ style = bold] ++"grpStonith1_start_0" -> "prmStonith1-2_start_0 bl460g8n4" [ style = bold] ++"grpStonith1_start_0" [ style=bold color="green" fontcolor="orange"] ++"grpStonith1_stop_0" -> "grpStonith1_stopped_0" [ style = bold] ++"grpStonith1_stop_0" -> "prmStonith1-2_stop_0 bl460g8n4" [ style = bold] ++"grpStonith1_stop_0" [ style=bold color="green" fontcolor="orange"] ++"grpStonith1_stopped_0" -> "grpStonith1_start_0" [ style = bold] ++"grpStonith1_stopped_0" [ style=bold color="green" fontcolor="orange"] ++"grpStonith2_running_0" [ style=bold color="green" fontcolor="orange"] ++"grpStonith2_start_0" -> "grpStonith2_running_0" [ style = bold] ++"grpStonith2_start_0" -> "prmStonith2-2_start_0 bl460g8n3" [ style = bold] ++"grpStonith2_start_0" [ style=bold color="green" fontcolor="orange"] ++"grpStonith2_stop_0" -> "grpStonith2_stopped_0" [ style = bold] ++"grpStonith2_stop_0" -> "prmStonith2-2_stop_0 bl460g8n3" [ style = bold] ++"grpStonith2_stop_0" [ style=bold color="green" fontcolor="orange"] ++"grpStonith2_stopped_0" -> "grpStonith2_start_0" [ style = bold] ++"grpStonith2_stopped_0" [ style=bold color="green" fontcolor="orange"] ++"master-group_running_0" [ style=bold color="green" fontcolor="orange"] ++"master-group_start_0" -> "master-group_running_0" [ style = bold] ++"master-group_start_0" -> "vip-master_start_0 pgsr01" [ style = bold] ++"master-group_start_0" -> "vip-rep_start_0 pgsr01" [ style = bold] ++"master-group_start_0" [ style=bold color="green" fontcolor="orange"] ++"master-group_stop_0" -> "master-group_stopped_0" [ style = bold] ++"master-group_stop_0" -> "vip-master_stop_0 pgsr02" [ style = bold] ++"master-group_stop_0" -> "vip-rep_stop_0 pgsr02" [ style = bold] ++"master-group_stop_0" [ style=bold color="green" fontcolor="orange"] ++"master-group_stopped_0" -> "master-group_start_0" [ style = bold] ++"master-group_stopped_0" [ style=bold color="green" fontcolor="orange"] ++"msPostgresql_confirmed-post_notify_demoted_0" -> "master-group_stop_0" [ style = bold] ++"msPostgresql_confirmed-post_notify_demoted_0" -> "msPostgresql_pre_notify_stop_0" [ style = bold] ++"msPostgresql_confirmed-post_notify_demoted_0" -> "pgsql_monitor_9000 pgsr01" [ style = bold] ++"msPostgresql_confirmed-post_notify_demoted_0" [ style=bold color="green" fontcolor="orange"] ++"msPostgresql_confirmed-post_notify_stopped_0" -> "all_stopped" [ style = bold] ++"msPostgresql_confirmed-post_notify_stopped_0" -> "pgsql_monitor_9000 pgsr01" [ style = bold] ++"msPostgresql_confirmed-post_notify_stopped_0" [ style=bold color="green" fontcolor="orange"] ++"msPostgresql_confirmed-pre_notify_demote_0" -> "msPostgresql_demote_0" [ style = bold] ++"msPostgresql_confirmed-pre_notify_demote_0" -> "msPostgresql_post_notify_demoted_0" [ style = bold] ++"msPostgresql_confirmed-pre_notify_demote_0" [ style=bold color="green" fontcolor="orange"] ++"msPostgresql_confirmed-pre_notify_stop_0" -> "msPostgresql_post_notify_stopped_0" [ style = bold] ++"msPostgresql_confirmed-pre_notify_stop_0" -> "msPostgresql_stop_0" [ style = bold] ++"msPostgresql_confirmed-pre_notify_stop_0" [ style=bold color="green" fontcolor="orange"] ++"msPostgresql_demote_0" -> "msPostgresql_demoted_0" [ style = bold] ++"msPostgresql_demote_0" -> "pgsql_demote_0 pgsr02" [ style = bold] ++"msPostgresql_demote_0" [ style=bold color="green" fontcolor="orange"] ++"msPostgresql_demoted_0" -> "msPostgresql_post_notify_demoted_0" [ style = bold] ++"msPostgresql_demoted_0" -> "msPostgresql_stop_0" [ style = bold] ++"msPostgresql_demoted_0" [ style=bold color="green" fontcolor="orange"] ++"msPostgresql_post_notify_demoted_0" -> "msPostgresql_confirmed-post_notify_demoted_0" [ style = bold] ++"msPostgresql_post_notify_demoted_0" -> "pgsql_post_notify_demoted_0 pgsr01" [ style = bold] ++"msPostgresql_post_notify_demoted_0" [ style=bold color="green" fontcolor="orange"] ++"msPostgresql_post_notify_stopped_0" -> "msPostgresql_confirmed-post_notify_stopped_0" [ style = bold] ++"msPostgresql_post_notify_stopped_0" -> "pgsql_post_notify_stop_0 pgsr01" [ style = bold] ++"msPostgresql_post_notify_stopped_0" [ style=bold color="green" fontcolor="orange"] ++"msPostgresql_pre_notify_demote_0" -> "msPostgresql_confirmed-pre_notify_demote_0" [ style = bold] ++"msPostgresql_pre_notify_demote_0" -> "pgsql_pre_notify_demote_0 pgsr01" [ style = bold] ++"msPostgresql_pre_notify_demote_0" [ style=bold color="green" fontcolor="orange"] ++"msPostgresql_pre_notify_stop_0" -> "msPostgresql_confirmed-pre_notify_stop_0" [ style = bold] ++"msPostgresql_pre_notify_stop_0" -> "pgsql_pre_notify_stop_0 pgsr01" [ style = bold] ++"msPostgresql_pre_notify_stop_0" [ style=bold color="green" fontcolor="orange"] ++"msPostgresql_stop_0" -> "msPostgresql_stopped_0" [ style = bold] ++"msPostgresql_stop_0" -> "pgsql_stop_0 pgsr02" [ style = bold] ++"msPostgresql_stop_0" [ style=bold color="green" fontcolor="orange"] ++"msPostgresql_stopped_0" -> "msPostgresql_post_notify_stopped_0" [ style = bold] ++"msPostgresql_stopped_0" [ style=bold color="green" fontcolor="orange"] ++"pgsql_confirmed-post_notify_stop_0" -> "all_stopped" [ style = bold] ++"pgsql_confirmed-post_notify_stop_0" -> "pgsql_monitor_9000 pgsr01" [ style = bold] ++"pgsql_confirmed-post_notify_stop_0" [ style=bold color="green" fontcolor="orange"] ++"pgsql_demote_0 pgsr02" -> "msPostgresql_demoted_0" [ style = bold] ++"pgsql_demote_0 pgsr02" -> "pgsql_stop_0 pgsr02" [ style = bold] ++"pgsql_demote_0 pgsr02" [ style=bold color="green" fontcolor="orange"] ++"pgsql_monitor_9000 pgsr01" [ style=bold color="green" fontcolor="black"] ++"pgsql_post_notify_demoted_0 pgsr01" -> "msPostgresql_confirmed-post_notify_demoted_0" [ style = bold] ++"pgsql_post_notify_demoted_0 pgsr01" [ style=bold color="green" fontcolor="black"] ++"pgsql_post_notify_stop_0 pgsr01" -> "msPostgresql_confirmed-post_notify_stopped_0" [ style = bold] ++"pgsql_post_notify_stop_0 pgsr01" -> "pgsql_confirmed-post_notify_stop_0" [ style = bold] ++"pgsql_post_notify_stop_0 pgsr01" [ style=bold color="green" fontcolor="black"] ++"pgsql_post_notify_stop_0" -> "pgsql_confirmed-post_notify_stop_0" [ style = bold] ++"pgsql_post_notify_stop_0" -> "pgsql_post_notify_stop_0 pgsr01" [ style = bold] ++"pgsql_post_notify_stop_0" [ style=bold color="green" fontcolor="orange"] ++"pgsql_pre_notify_demote_0 pgsr01" -> "msPostgresql_confirmed-pre_notify_demote_0" [ style = bold] ++"pgsql_pre_notify_demote_0 pgsr01" [ style=bold color="green" fontcolor="black"] ++"pgsql_pre_notify_stop_0 pgsr01" -> "msPostgresql_confirmed-pre_notify_stop_0" [ style = bold] ++"pgsql_pre_notify_stop_0 pgsr01" [ style=bold color="green" fontcolor="black"] ++"pgsql_stop_0 pgsr02" -> "all_stopped" [ style = bold] ++"pgsql_stop_0 pgsr02" -> "msPostgresql_stopped_0" [ style = bold] ++"pgsql_stop_0 pgsr02" [ style=bold color="green" fontcolor="orange"] ++"pgsr02_stop_0 bl460g8n4" -> "all_stopped" [ style = bold] ++"pgsr02_stop_0 bl460g8n4" -> "prmDB2_stop_0 bl460g8n4" [ style = bold] ++"pgsr02_stop_0 bl460g8n4" [ style=bold color="green" fontcolor="black"] ++"prmDB2_stop_0 bl460g8n4" -> "all_stopped" [ style = bold] ++"prmDB2_stop_0 bl460g8n4" -> "master-group_stop_0" [ style = bold] ++"prmDB2_stop_0 bl460g8n4" -> "msPostgresql_stop_0" [ style = bold] ++"prmDB2_stop_0 bl460g8n4" -> "pgsql_demote_0 pgsr02" [ style = bold] ++"prmDB2_stop_0 bl460g8n4" -> "pgsql_post_notify_stop_0" [ style = bold] ++"prmDB2_stop_0 bl460g8n4" -> "pgsql_stop_0 pgsr02" [ style = bold] ++"prmDB2_stop_0 bl460g8n4" -> "vip-master_stop_0 pgsr02" [ style = bold] ++"prmDB2_stop_0 bl460g8n4" -> "vip-rep_stop_0 pgsr02" [ style = bold] ++"prmDB2_stop_0 bl460g8n4" [ style=bold color="green" fontcolor="black"] ++"prmStonith1-2_monitor_3600000 bl460g8n4" [ style=bold color="green" fontcolor="black"] ++"prmStonith1-2_start_0 bl460g8n4" -> "grpStonith1_running_0" [ style = bold] ++"prmStonith1-2_start_0 bl460g8n4" -> "prmStonith1-2_monitor_3600000 bl460g8n4" [ style = bold] ++"prmStonith1-2_start_0 bl460g8n4" [ style=bold color="green" fontcolor="black"] ++"prmStonith1-2_stop_0 bl460g8n4" -> "all_stopped" [ style = bold] ++"prmStonith1-2_stop_0 bl460g8n4" -> "grpStonith1_stopped_0" [ style = bold] ++"prmStonith1-2_stop_0 bl460g8n4" -> "prmStonith1-2_start_0 bl460g8n4" [ style = bold] ++"prmStonith1-2_stop_0 bl460g8n4" [ style=bold color="green" fontcolor="orange"] ++"prmStonith2-2_monitor_3600000 bl460g8n3" [ style=bold color="green" fontcolor="black"] ++"prmStonith2-2_start_0 bl460g8n3" -> "grpStonith2_running_0" [ style = bold] ++"prmStonith2-2_start_0 bl460g8n3" -> "prmStonith2-2_monitor_3600000 bl460g8n3" [ style = bold] ++"prmStonith2-2_start_0 bl460g8n3" [ style=bold color="green" fontcolor="black"] ++"prmStonith2-2_stop_0 bl460g8n3" -> "all_stopped" [ style = bold] ++"prmStonith2-2_stop_0 bl460g8n3" -> "grpStonith2_stopped_0" [ style = bold] ++"prmStonith2-2_stop_0 bl460g8n3" -> "prmStonith2-2_start_0 bl460g8n3" [ style = bold] ++"prmStonith2-2_stop_0 bl460g8n3" [ style=bold color="green" fontcolor="black"] ++"vip-master_monitor_10000 pgsr01" [ style=bold color="green" fontcolor="black"] ++"vip-master_start_0 pgsr01" -> "master-group_running_0" [ style = bold] ++"vip-master_start_0 pgsr01" -> "vip-master_monitor_10000 pgsr01" [ style = bold] ++"vip-master_start_0 pgsr01" -> "vip-rep_start_0 pgsr01" [ style = bold] ++"vip-master_start_0 pgsr01" [ style=bold color="green" fontcolor="black"] ++"vip-master_stop_0 pgsr02" -> "all_stopped" [ style = bold] ++"vip-master_stop_0 pgsr02" -> "master-group_stopped_0" [ style = bold] ++"vip-master_stop_0 pgsr02" -> "vip-master_start_0 pgsr01" [ style = bold] ++"vip-master_stop_0 pgsr02" [ style=bold color="green" fontcolor="orange"] ++"vip-rep_monitor_10000 pgsr01" [ style=bold color="green" fontcolor="black"] ++"vip-rep_start_0 pgsr01" -> "master-group_running_0" [ style = bold] ++"vip-rep_start_0 pgsr01" -> "vip-rep_monitor_10000 pgsr01" [ style = bold] ++"vip-rep_start_0 pgsr01" [ style=bold color="green" fontcolor="black"] ++"vip-rep_stop_0 pgsr02" -> "all_stopped" [ style = bold] ++"vip-rep_stop_0 pgsr02" -> "master-group_stopped_0" [ style = bold] ++"vip-rep_stop_0 pgsr02" -> "vip-master_stop_0 pgsr02" [ style = bold] ++"vip-rep_stop_0 pgsr02" -> "vip-rep_start_0 pgsr01" [ style = bold] ++"vip-rep_stop_0 pgsr02" [ style=bold color="green" fontcolor="orange"] ++} +diff --git a/pengine/test10/bug-cl-5247.exp b/pengine/test10/bug-cl-5247.exp +new file mode 100644 +index 0000000..5e36e84 +--- /dev/null ++++ b/pengine/test10/bug-cl-5247.exp +@@ -0,0 +1,704 @@ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ +diff --git a/pengine/test10/bug-cl-5247.scores b/pengine/test10/bug-cl-5247.scores +new file mode 100644 +index 0000000..e9e4709 +--- /dev/null ++++ b/pengine/test10/bug-cl-5247.scores +@@ -0,0 +1,84 @@ ++Allocation scores: ++Using the original execution date of: 2015-08-12 02:53:40Z ++clone_color: msPostgresql allocation score on bl460g8n3: -INFINITY ++clone_color: msPostgresql allocation score on bl460g8n4: -INFINITY ++clone_color: msPostgresql allocation score on pgsr01: 0 ++clone_color: msPostgresql allocation score on pgsr02: 0 ++clone_color: pgsql:0 allocation score on bl460g8n3: -INFINITY ++clone_color: pgsql:0 allocation score on bl460g8n4: -INFINITY ++clone_color: pgsql:0 allocation score on pgsr01: 0 ++clone_color: pgsql:0 allocation score on pgsr02: INFINITY ++clone_color: pgsql:1 allocation score on bl460g8n3: -INFINITY ++clone_color: pgsql:1 allocation score on bl460g8n4: -INFINITY ++clone_color: pgsql:1 allocation score on pgsr01: INFINITY ++clone_color: pgsql:1 allocation score on pgsr02: 0 ++group_color: grpStonith1 allocation score on bl460g8n3: -INFINITY ++group_color: grpStonith1 allocation score on bl460g8n4: 0 ++group_color: grpStonith1 allocation score on pgsr01: -INFINITY ++group_color: grpStonith1 allocation score on pgsr02: -INFINITY ++group_color: grpStonith2 allocation score on bl460g8n3: 0 ++group_color: grpStonith2 allocation score on bl460g8n4: -INFINITY ++group_color: grpStonith2 allocation score on pgsr01: -INFINITY ++group_color: grpStonith2 allocation score on pgsr02: -INFINITY ++group_color: master-group allocation score on bl460g8n3: 0 ++group_color: master-group allocation score on bl460g8n4: 0 ++group_color: master-group allocation score on pgsr01: 0 ++group_color: master-group allocation score on pgsr02: 0 ++group_color: prmStonith1-2 allocation score on bl460g8n3: -INFINITY ++group_color: prmStonith1-2 allocation score on bl460g8n4: INFINITY ++group_color: prmStonith1-2 allocation score on pgsr01: -INFINITY ++group_color: prmStonith1-2 allocation score on pgsr02: -INFINITY ++group_color: prmStonith2-2 allocation score on bl460g8n3: INFINITY ++group_color: prmStonith2-2 allocation score on bl460g8n4: -INFINITY ++group_color: prmStonith2-2 allocation score on pgsr01: -INFINITY ++group_color: prmStonith2-2 allocation score on pgsr02: -INFINITY ++group_color: vip-master allocation score on bl460g8n3: 0 ++group_color: vip-master allocation score on bl460g8n4: 0 ++group_color: vip-master allocation score on pgsr01: 0 ++group_color: vip-master allocation score on pgsr02: INFINITY ++group_color: vip-rep allocation score on bl460g8n3: 0 ++group_color: vip-rep allocation score on bl460g8n4: 0 ++group_color: vip-rep allocation score on pgsr01: 0 ++group_color: vip-rep allocation score on pgsr02: INFINITY ++native_color: pgsql:0 allocation score on bl460g8n3: -INFINITY ++native_color: pgsql:0 allocation score on bl460g8n4: -INFINITY ++native_color: pgsql:0 allocation score on pgsr01: -INFINITY ++native_color: pgsql:0 allocation score on pgsr02: -INFINITY ++native_color: pgsql:1 allocation score on bl460g8n3: -INFINITY ++native_color: pgsql:1 allocation score on bl460g8n4: -INFINITY ++native_color: pgsql:1 allocation score on pgsr01: INFINITY ++native_color: pgsql:1 allocation score on pgsr02: -INFINITY ++native_color: pgsr01 allocation score on bl460g8n3: INFINITY ++native_color: pgsr01 allocation score on bl460g8n4: -INFINITY ++native_color: pgsr01 allocation score on pgsr01: -INFINITY ++native_color: pgsr01 allocation score on pgsr02: -INFINITY ++native_color: pgsr02 allocation score on bl460g8n3: -INFINITY ++native_color: pgsr02 allocation score on bl460g8n4: -INFINITY ++native_color: pgsr02 allocation score on pgsr01: -INFINITY ++native_color: pgsr02 allocation score on pgsr02: -INFINITY ++native_color: prmDB1 allocation score on bl460g8n3: INFINITY ++native_color: prmDB1 allocation score on bl460g8n4: -INFINITY ++native_color: prmDB1 allocation score on pgsr01: -INFINITY ++native_color: prmDB1 allocation score on pgsr02: -INFINITY ++native_color: prmDB2 allocation score on bl460g8n3: -INFINITY ++native_color: prmDB2 allocation score on bl460g8n4: -INFINITY ++native_color: prmDB2 allocation score on pgsr01: -INFINITY ++native_color: prmDB2 allocation score on pgsr02: -INFINITY ++native_color: prmStonith1-2 allocation score on bl460g8n3: -INFINITY ++native_color: prmStonith1-2 allocation score on bl460g8n4: INFINITY ++native_color: prmStonith1-2 allocation score on pgsr01: -INFINITY ++native_color: prmStonith1-2 allocation score on pgsr02: -INFINITY ++native_color: prmStonith2-2 allocation score on bl460g8n3: INFINITY ++native_color: prmStonith2-2 allocation score on bl460g8n4: -INFINITY ++native_color: prmStonith2-2 allocation score on pgsr01: -INFINITY ++native_color: prmStonith2-2 allocation score on pgsr02: -INFINITY ++native_color: vip-master allocation score on bl460g8n3: -INFINITY ++native_color: vip-master allocation score on bl460g8n4: -INFINITY ++native_color: vip-master allocation score on pgsr01: INFINITY ++native_color: vip-master allocation score on pgsr02: -INFINITY ++native_color: vip-rep allocation score on bl460g8n3: -INFINITY ++native_color: vip-rep allocation score on bl460g8n4: -INFINITY ++native_color: vip-rep allocation score on pgsr01: 0 ++native_color: vip-rep allocation score on pgsr02: -INFINITY ++pgsql:0 promotion score on none: 0 ++pgsql:1 promotion score on pgsr01: 10 +diff --git a/pengine/test10/bug-cl-5247.summary b/pengine/test10/bug-cl-5247.summary +new file mode 100644 +index 0000000..5564286 +--- /dev/null ++++ b/pengine/test10/bug-cl-5247.summary +@@ -0,0 +1,96 @@ ++Using the original execution date of: 2015-08-12 02:53:40Z ++ ++Current cluster status: ++Online: [ bl460g8n3 bl460g8n4 ] ++Containers: [ pgsr01:prmDB1 ] ++ ++ prmDB1 (ocf::heartbeat:VirtualDomain): Started bl460g8n3 ++ prmDB2 (ocf::heartbeat:VirtualDomain): FAILED bl460g8n4 ++ Resource Group: grpStonith1 ++ prmStonith1-2 (stonith:external/ipmi): Started bl460g8n4 ++ Resource Group: grpStonith2 ++ prmStonith2-2 (stonith:external/ipmi): Started bl460g8n3 ++ Resource Group: master-group ++ vip-master (ocf::heartbeat:Dummy): FAILED pgsr02 ++ vip-rep (ocf::heartbeat:Dummy): FAILED pgsr02 ++ Master/Slave Set: msPostgresql [pgsql] ++ Masters: [ pgsr01 ] ++ Stopped: [ bl460g8n3 bl460g8n4 ] ++ ++Transition Summary: ++ * Stop prmDB2 (bl460g8n4) ++ * Restart prmStonith1-2 (Started bl460g8n4) ++ * Restart prmStonith2-2 (Started bl460g8n3) ++ * Recover vip-master (Started pgsr02 -> pgsr01) ++ * Recover vip-rep (Started pgsr02 -> pgsr01) ++ * Demote pgsql:0 (Master -> Stopped pgsr02) ++ * Stop pgsr02 (bl460g8n4) ++ ++Executing cluster transition: ++ * Pseudo action: grpStonith1_stop_0 ++ * Pseudo action: prmStonith1-2_stop_0 ++ * Pseudo action: grpStonith2_stop_0 ++ * Resource action: prmStonith2-2 stop on bl460g8n3 ++ * Pseudo action: msPostgresql_pre_notify_demote_0 ++ * Resource action: pgsr02 stop on bl460g8n4 ++ * Resource action: prmDB2 stop on bl460g8n4 ++ * Pseudo action: grpStonith1_stopped_0 ++ * Pseudo action: grpStonith1_start_0 ++ * Resource action: prmStonith1-2 start on bl460g8n4 ++ * Resource action: prmStonith1-2 monitor=3600000 on bl460g8n4 ++ * Pseudo action: grpStonith2_stopped_0 ++ * Pseudo action: grpStonith2_start_0 ++ * Resource action: prmStonith2-2 start on bl460g8n3 ++ * Resource action: prmStonith2-2 monitor=3600000 on bl460g8n3 ++ * Pseudo action: pgsql_post_notify_stop_0 ++ * Resource action: pgsql notify on pgsr01 ++ * Pseudo action: msPostgresql_confirmed-pre_notify_demote_0 ++ * Pseudo action: msPostgresql_demote_0 ++ * Pseudo action: grpStonith1_running_0 ++ * Pseudo action: grpStonith2_running_0 ++ * Pseudo action: pgsql_demote_0 ++ * Pseudo action: msPostgresql_demoted_0 ++ * Pseudo action: msPostgresql_post_notify_demoted_0 ++ * Resource action: pgsql notify on pgsr01 ++ * Pseudo action: msPostgresql_confirmed-post_notify_demoted_0 ++ * Pseudo action: msPostgresql_pre_notify_stop_0 ++ * Pseudo action: master-group_stop_0 ++ * Pseudo action: vip-rep_stop_0 ++ * Resource action: pgsql notify on pgsr01 ++ * Pseudo action: msPostgresql_confirmed-pre_notify_stop_0 ++ * Pseudo action: msPostgresql_stop_0 ++ * Pseudo action: vip-master_stop_0 ++ * Pseudo action: pgsql_stop_0 ++ * Pseudo action: msPostgresql_stopped_0 ++ * Pseudo action: master-group_stopped_0 ++ * Pseudo action: master-group_start_0 ++ * Resource action: vip-master start on pgsr01 ++ * Resource action: vip-rep start on pgsr01 ++ * Pseudo action: msPostgresql_post_notify_stopped_0 ++ * Pseudo action: master-group_running_0 ++ * Resource action: vip-master monitor=10000 on pgsr01 ++ * Resource action: vip-rep monitor=10000 on pgsr01 ++ * Resource action: pgsql notify on pgsr01 ++ * Pseudo action: msPostgresql_confirmed-post_notify_stopped_0 ++ * Pseudo action: pgsql_notified_0 ++ * Resource action: pgsql monitor=9000 on pgsr01 ++ * Pseudo action: all_stopped ++Using the original execution date of: 2015-08-12 02:53:40Z ++ ++Revised cluster status: ++Online: [ bl460g8n3 bl460g8n4 ] ++Containers: [ pgsr01:prmDB1 ] ++ ++ prmDB1 (ocf::heartbeat:VirtualDomain): Started bl460g8n3 ++ prmDB2 (ocf::heartbeat:VirtualDomain): FAILED ++ Resource Group: grpStonith1 ++ prmStonith1-2 (stonith:external/ipmi): Started bl460g8n4 ++ Resource Group: grpStonith2 ++ prmStonith2-2 (stonith:external/ipmi): Started bl460g8n3 ++ Resource Group: master-group ++ vip-master (ocf::heartbeat:Dummy): FAILED[ pgsr02 pgsr01 ] ++ vip-rep (ocf::heartbeat:Dummy): FAILED[ pgsr02 pgsr01 ] ++ Master/Slave Set: msPostgresql [pgsql] ++ Masters: [ pgsr01 ] ++ Stopped: [ bl460g8n3 bl460g8n4 ] ++ +diff --git a/pengine/test10/bug-cl-5247.xml b/pengine/test10/bug-cl-5247.xml +new file mode 100644 +index 0000000..c36ef40 +--- /dev/null ++++ b/pengine/test10/bug-cl-5247.xml +@@ -0,0 +1,295 @@ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ diff --git a/0008-Fix-tools-memory-leak-in-crm_resource.patch b/0008-Fix-tools-memory-leak-in-crm_resource.patch new file mode 100644 index 0000000..c29561f --- /dev/null +++ b/0008-Fix-tools-memory-leak-in-crm_resource.patch @@ -0,0 +1,33 @@ +From: Ken Gaillot +Date: Mon, 17 Aug 2015 10:28:19 -0500 +Subject: [PATCH] Fix: tools: memory leak in crm_resource + +(cherry picked from commit c11bc4b856b07d5ea5b8284a3d566dd782e6bb7c) +--- + tools/crm_resource_runtime.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/tools/crm_resource_runtime.c b/tools/crm_resource_runtime.c +index f260e19..b9427bc 100644 +--- a/tools/crm_resource_runtime.c ++++ b/tools/crm_resource_runtime.c +@@ -399,9 +399,11 @@ cli_resource_delete_attribute(const char *rsc_id, const char *attr_set, const ch + &local_attr_id); + + if (rc == -ENXIO) { ++ free(lookup_id); + return pcmk_ok; + + } else if (rc != pcmk_ok) { ++ free(lookup_id); + return rc; + } + +@@ -424,6 +426,7 @@ cli_resource_delete_attribute(const char *rsc_id, const char *attr_set, const ch + attr_name ? " name=" : "", attr_name ? attr_name : ""); + } + ++ free(lookup_id); + free_xml(xml_obj); + free(local_attr_id); + return rc; diff --git a/0009-Fix-pengine-The-failed-action-of-the-resource-that-o.patch b/0009-Fix-pengine-The-failed-action-of-the-resource-that-o.patch new file mode 100644 index 0000000..1ddba9f --- /dev/null +++ b/0009-Fix-pengine-The-failed-action-of-the-resource-that-o.patch @@ -0,0 +1,31 @@ +From: Hideo Yamauchi +Date: Fri, 21 Aug 2015 14:12:33 +0900 +Subject: [PATCH] Fix: pengine: The failed action of the resource that occurred + in shutdown is not displayed. + +It is like the problem that entered when you summarized an old judgment +in function (record_failed_op) by the next correction. + +* +https://github.com/ClusterLabs/pacemaker/commit/9cd666ac15a2998f4543e1dac33edea36bbcf930#diff-7dae505817fa61e544018e581ee45933 + +(cherry picked from commit 119df5c0bd8fac02bd36e45a28288dcf4624b89d) +--- + lib/pengine/unpack.c | 4 +--- + 1 file changed, 1 insertion(+), 3 deletions(-) + +diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c +index 0f83be4..156a192 100644 +--- a/lib/pengine/unpack.c ++++ b/lib/pengine/unpack.c +@@ -2546,9 +2546,7 @@ record_failed_op(xmlNode *op, node_t* node, pe_working_set_t * data_set) + xmlNode *xIter = NULL; + const char *op_key = crm_element_value(op, XML_LRM_ATTR_TASK_KEY); + +- if (node->details->shutdown) { +- return; +- } else if(node->details->online == FALSE) { ++ if ((node->details->shutdown) && (node->details->online == FALSE)) { + return; + } + diff --git a/0010-Log-services-Reduce-severity-of-noisy-log-messages.patch b/0010-Log-services-Reduce-severity-of-noisy-log-messages.patch new file mode 100644 index 0000000..40aeb8b --- /dev/null +++ b/0010-Log-services-Reduce-severity-of-noisy-log-messages.patch @@ -0,0 +1,34 @@ +From: "Gao,Yan" +Date: Wed, 26 Aug 2015 18:12:56 +0200 +Subject: [PATCH] Log: services: Reduce severity of noisy log messages + +They occurred for every monitor operation of systemd resources. + +(cherry picked from commit a77c401a3fcdedec165c05d27a75d75abcebf4a1) +--- + lib/services/services.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/lib/services/services.c b/lib/services/services.c +index 3f40078..abf1458 100644 +--- a/lib/services/services.c ++++ b/lib/services/services.c +@@ -366,15 +366,15 @@ services_set_op_pending(svc_action_t *op, DBusPendingCall *pending) + if (pending) { + crm_info("Lost pending %s DBus call (%p)", op->id, op->opaque->pending); + } else { +- crm_info("Done with pending %s DBus call (%p)", op->id, op->opaque->pending); ++ crm_trace("Done with pending %s DBus call (%p)", op->id, op->opaque->pending); + } + dbus_pending_call_unref(op->opaque->pending); + } + op->opaque->pending = pending; + if (pending) { +- crm_info("Updated pending %s DBus call (%p)", op->id, pending); ++ crm_trace("Updated pending %s DBus call (%p)", op->id, pending); + } else { +- crm_info("Cleared pending %s DBus call", op->id); ++ crm_trace("Cleared pending %s DBus call", op->id); + } + } + #endif diff --git a/0011-Fix-xml-Mark-xml-nodes-as-dirty-if-any-children-move.patch b/0011-Fix-xml-Mark-xml-nodes-as-dirty-if-any-children-move.patch new file mode 100644 index 0000000..c67a465 --- /dev/null +++ b/0011-Fix-xml-Mark-xml-nodes-as-dirty-if-any-children-move.patch @@ -0,0 +1,24 @@ +From: "Gao,Yan" +Date: Wed, 26 Aug 2015 16:28:38 +0200 +Subject: [PATCH] Fix: xml: Mark xml nodes as dirty if any children move + +Otherwise if nothing else changed in the new xml, even the versions +weren't bumped, crm_diff would output an empty xml diff. + +(cherry picked from commit 1073786ec24f3bbf26a0f6a5b0614a65edac4301) +--- + lib/common/xml.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/lib/common/xml.c b/lib/common/xml.c +index 299c7bf..353eb4b 100644 +--- a/lib/common/xml.c ++++ b/lib/common/xml.c +@@ -4275,6 +4275,7 @@ __xml_diff_object(xmlNode * old, xmlNode * new) + if(p_old != p_new) { + crm_info("%s.%s moved from %d to %d - %d", + new_child->name, ID(new_child), p_old, p_new); ++ __xml_node_dirty(new); + p->flags |= xpf_moved; + + if(p_old > p_new) { diff --git a/0012-Feature-crmd-Implement-reliable-event-notifications.patch b/0012-Feature-crmd-Implement-reliable-event-notifications.patch new file mode 100644 index 0000000..94e3307 --- /dev/null +++ b/0012-Feature-crmd-Implement-reliable-event-notifications.patch @@ -0,0 +1,565 @@ +From: Andrew Beekhof +Date: Tue, 1 Sep 2015 13:17:45 +1000 +Subject: [PATCH] Feature: crmd: Implement reliable event notifications + +(cherry picked from commit 0cd1b8f02b403976afe106e0ca3a8a8a16864c6c) +--- + crmd/Makefile.am | 2 +- + crmd/callbacks.c | 4 + + crmd/control.c | 67 +++++++++++++--- + crmd/crmd_utils.h | 1 + + crmd/lrm.c | 2 + + crmd/notify.c | 188 ++++++++++++++++++++++++++++++++++++++++++++ + crmd/notify.h | 30 +++++++ + crmd/te_utils.c | 2 + + cts/CIB.py | 2 + + extra/pcmk_notify_sample.sh | 68 ++++++++++++++++ + include/crm_internal.h | 1 + + lib/common/utils.c | 27 +++++++ + 12 files changed, 380 insertions(+), 14 deletions(-) + create mode 100644 crmd/notify.c + create mode 100644 crmd/notify.h + create mode 100755 extra/pcmk_notify_sample.sh + +diff --git a/crmd/Makefile.am b/crmd/Makefile.am +index 8e5e1df..984f5d0 100644 +--- a/crmd/Makefile.am ++++ b/crmd/Makefile.am +@@ -28,7 +28,7 @@ noinst_HEADERS = crmd.h crmd_fsa.h crmd_messages.h fsa_defines.h \ + fsa_matrix.h fsa_proto.h crmd_utils.h crmd_callbacks.h \ + crmd_lrm.h te_callbacks.h tengine.h + +-crmd_SOURCES = main.c crmd.c corosync.c \ ++crmd_SOURCES = main.c crmd.c corosync.c notify.c \ + fsa.c control.c messages.c membership.c callbacks.c \ + election.c join_client.c join_dc.c subsystems.c throttle.c \ + cib.c pengine.c tengine.c lrm.c lrm_state.c remote_lrmd_ra.c \ +diff --git a/crmd/callbacks.c b/crmd/callbacks.c +index f646927..38fb30b 100644 +--- a/crmd/callbacks.c ++++ b/crmd/callbacks.c +@@ -126,6 +126,7 @@ peer_update_callback(enum crm_status_type type, crm_node_t * node, const void *d + case crm_status_nstate: + crm_info("%s is now %s (was %s)", + node->uname, state_text(node->state), state_text(data)); ++ + if (safe_str_eq(data, node->state)) { + /* State did not change */ + return; +@@ -147,7 +148,10 @@ peer_update_callback(enum crm_status_type type, crm_node_t * node, const void *d + } + } + } ++ ++ crmd_notify_node_event(node); + break; ++ + case crm_status_processes: + if (data) { + old = *(const uint32_t *)data; +diff --git a/crmd/control.c b/crmd/control.c +index f4add49..d92f46b 100644 +--- a/crmd/control.c ++++ b/crmd/control.c +@@ -873,28 +873,64 @@ do_recover(long long action, + + /* *INDENT-OFF* */ + pe_cluster_option crmd_opts[] = { +- /* name, old-name, validate, default, description */ +- { "dc-version", NULL, "string", NULL, "none", NULL, "Version of Pacemaker on the cluster's DC.", "Includes the hash which identifies the exact Mercurial changeset it was built from. Used for diagnostic purposes." }, +- { "cluster-infrastructure", NULL, "string", NULL, "heartbeat", NULL, "The messaging stack on which Pacemaker is currently running.", "Used for informational and diagnostic purposes." }, +- { XML_CONFIG_ATTR_DC_DEADTIME, "dc_deadtime", "time", NULL, "20s", &check_time, "How long to wait for a response from other nodes during startup.", "The \"correct\" value will depend on the speed/load of your network and the type of switches used." }, ++ /* name, old-name, validate, values, default, short description, long description */ ++ { "dc-version", NULL, "string", NULL, "none", NULL, ++ "Version of Pacemaker on the cluster's DC.", ++ "Includes the hash which identifies the exact changeset it was built from. Used for diagnostic purposes." ++ }, ++ { "cluster-infrastructure", NULL, "string", NULL, "heartbeat", NULL, ++ "The messaging stack on which Pacemaker is currently running.", ++ "Used for informational and diagnostic purposes." }, ++ { XML_CONFIG_ATTR_DC_DEADTIME, "dc_deadtime", "time", NULL, "20s", &check_time, ++ "How long to wait for a response from other nodes during startup.", ++ "The \"correct\" value will depend on the speed/load of your network and the type of switches used." ++ }, + { XML_CONFIG_ATTR_RECHECK, "cluster_recheck_interval", "time", +- "Zero disables polling. Positive values are an interval in seconds (unless other SI units are specified. eg. 5min)", "15min", &check_timer, ++ "Zero disables polling. Positive values are an interval in seconds (unless other SI units are specified. eg. 5min)", ++ "15min", &check_timer, + "Polling interval for time based changes to options, resource parameters and constraints.", + "The Cluster is primarily event driven, however the configuration can have elements that change based on time." +- " To ensure these changes take effect, we can optionally poll the cluster's status for changes." }, ++ " To ensure these changes take effect, we can optionally poll the cluster's status for changes." ++ }, ++ ++ { "notification-script", NULL, "string", NULL, "/dev/null", &check_script, ++ "Notification script to be called after significant cluster events", ++ "Full path to a script that will be invoked when resources start/stop/fail, fencing occurs or nodes join/leave the cluster.\n" ++ "Must exist on all nodes in the cluster." ++ }, ++ { "notification-target", NULL, "string", NULL, "", NULL, ++ "Destination for notifications (Optional)", ++ "Where should the supplied script send notifications to. Useful to avoid hard-coding this in the script." ++ }, ++ + { "load-threshold", NULL, "percentage", NULL, "80%", &check_utilization, + "The maximum amount of system resources that should be used by nodes in the cluster", + "The cluster will slow down its recovery process when the amount of system resources used" +- " (currently CPU) approaches this limit", }, ++ " (currently CPU) approaches this limit", ++ }, + { "node-action-limit", NULL, "integer", NULL, "0", &check_number, + "The maximum number of jobs that can be scheduled per node. Defaults to 2x cores"}, +- { XML_CONFIG_ATTR_ELECTION_FAIL, "election_timeout", "time", NULL, "2min", &check_timer, "*** Advanced Use Only ***.", "If need to adjust this value, it probably indicates the presence of a bug." }, +- { XML_CONFIG_ATTR_FORCE_QUIT, "shutdown_escalation", "time", NULL, "20min", &check_timer, "*** Advanced Use Only ***.", "If need to adjust this value, it probably indicates the presence of a bug." }, +- { "crmd-integration-timeout", NULL, "time", NULL, "3min", &check_timer, "*** Advanced Use Only ***.", "If need to adjust this value, it probably indicates the presence of a bug." }, +- { "crmd-finalization-timeout", NULL, "time", NULL, "30min", &check_timer, "*** Advanced Use Only ***.", "If you need to adjust this value, it probably indicates the presence of a bug." }, +- { "crmd-transition-delay", NULL, "time", NULL, "0s", &check_timer, "*** Advanced Use Only ***\nEnabling this option will slow down cluster recovery under all conditions", "Delay cluster recovery for the configured interval to allow for additional/related events to occur.\nUseful if your configuration is sensitive to the order in which ping updates arrive." }, ++ { XML_CONFIG_ATTR_ELECTION_FAIL, "election_timeout", "time", NULL, "2min", &check_timer, ++ "*** Advanced Use Only ***.", "If need to adjust this value, it probably indicates the presence of a bug." ++ }, ++ { XML_CONFIG_ATTR_FORCE_QUIT, "shutdown_escalation", "time", NULL, "20min", &check_timer, ++ "*** Advanced Use Only ***.", "If need to adjust this value, it probably indicates the presence of a bug." ++ }, ++ { "crmd-integration-timeout", NULL, "time", NULL, "3min", &check_timer, ++ "*** Advanced Use Only ***.", "If need to adjust this value, it probably indicates the presence of a bug." ++ }, ++ { "crmd-finalization-timeout", NULL, "time", NULL, "30min", &check_timer, ++ "*** Advanced Use Only ***.", "If you need to adjust this value, it probably indicates the presence of a bug." ++ }, ++ { "crmd-transition-delay", NULL, "time", NULL, "0s", &check_timer, ++ "*** Advanced Use Only ***\n" ++ "Enabling this option will slow down cluster recovery under all conditions", ++ "Delay cluster recovery for the configured interval to allow for additional/related events to occur.\n" ++ "Useful if your configuration is sensitive to the order in which ping updates arrive." ++ }, + { "stonith-watchdog-timeout", NULL, "time", NULL, NULL, &check_timer, +- "How long to wait before we can assume nodes are safely down", NULL }, ++ "How long to wait before we can assume nodes are safely down", NULL ++ }, + { "no-quorum-policy", "no_quorum_policy", "enum", "stop, freeze, ignore, suicide", "stop", &check_quorum, NULL, NULL }, + + #if SUPPORT_PLUGIN +@@ -927,6 +963,7 @@ crmd_pref(GHashTable * options, const char *name) + static void + config_query_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) + { ++ const char *script = NULL; + const char *value = NULL; + GHashTable *config_hash = NULL; + crm_time_t *now = crm_time_new(NULL); +@@ -955,6 +992,10 @@ config_query_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void + + verify_crmd_options(config_hash); + ++ script = crmd_pref(config_hash, "notification-script"); ++ value = crmd_pref(config_hash, "notification-target"); ++ crmd_enable_notifications(script, value); ++ + value = crmd_pref(config_hash, XML_CONFIG_ATTR_DC_DEADTIME); + election_trigger->period_ms = crm_get_msec(value); + +diff --git a/crmd/crmd_utils.h b/crmd/crmd_utils.h +index 78214bf..7e8c3e6 100644 +--- a/crmd/crmd_utils.h ++++ b/crmd/crmd_utils.h +@@ -21,6 +21,7 @@ + # include + # include + # include /* For CIB_OP_MODIFY */ ++# include "notify.h" + + # define CLIENT_EXIT_WAIT 30 + # define FAKE_TE_ID "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" +diff --git a/crmd/lrm.c b/crmd/lrm.c +index 418e7cf..48195e8 100644 +--- a/crmd/lrm.c ++++ b/crmd/lrm.c +@@ -2415,6 +2415,8 @@ process_lrm_event(lrm_state_t * lrm_state, lrmd_event_data_t * op, struct recurr + free(prefix); + } + ++ crmd_notify_resource_op(lrm_state->node_name, op); ++ + if (op->rsc_deleted) { + crm_info("Deletion of resource '%s' complete after %s", op->rsc_id, op_key); + delete_rsc_entry(lrm_state, NULL, op->rsc_id, NULL, pcmk_ok, NULL); +diff --git a/crmd/notify.c b/crmd/notify.c +new file mode 100644 +index 0000000..980bfa6 +--- /dev/null ++++ b/crmd/notify.c +@@ -0,0 +1,188 @@ ++/* ++ * Copyright (C) 2015 Andrew Beekhof ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public ++ * License as published by the Free Software Foundation; either ++ * version 2 of the License, or (at your option) any later version. ++ * ++ * This software is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public ++ * License along with this library; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ ++#include ++#include ++#include ++#include "notify.h" ++ ++char *notify_script = NULL; ++char *notify_target = NULL; ++ ++ ++static const char *notify_keys[] = ++{ ++ "CRM_notify_recipient", ++ "CRM_notify_node", ++ "CRM_notify_rsc", ++ "CRM_notify_task", ++ "CRM_notify_interval", ++ "CRM_notify_desc", ++ "CRM_notify_status", ++ "CRM_notify_target_rc", ++ "CRM_notify_rc", ++ "CRM_notify_kind", ++ "CRM_notify_version", ++}; ++ ++ ++void ++crmd_enable_notifications(const char *script, const char *target) ++{ ++ free(notify_script); ++ notify_script = NULL; ++ ++ free(notify_target); ++ notify_target = NULL; ++ ++ if(safe_str_eq(script, "/dev/null")) { ++ crm_notice("Notifications disabled"); ++ return; ++ } ++ ++ notify_script = strdup(script); ++ notify_target = strdup(target); ++ crm_notice("Notifications enabled"); ++} ++ ++static void ++set_notify_key(const char *name, const char *cvalue, char *value) ++{ ++ int lpc; ++ bool found = 0; ++ ++ if(cvalue == NULL) { ++ cvalue = value; ++ } ++ ++ for(lpc = 0; lpc < DIMOF(notify_keys); lpc++) { ++ if(safe_str_eq(name, notify_keys[lpc])) { ++ found = 1; ++ crm_trace("Setting notify key %s = '%s'", name, cvalue); ++ setenv(name, cvalue, 1); ++ break; ++ } ++ } ++ ++ CRM_ASSERT(found != 0); ++ free(value); ++} ++ ++ ++static void ++send_notification(const char *kind) ++{ ++ int lpc; ++ pid_t pid; ++ ++ crm_debug("Sending '%s' notification to '%s' via '%s'", kind, notify_target, notify_script); ++ ++ set_notify_key("CRM_notify_recipient", notify_target, NULL); ++ set_notify_key("CRM_notify_kind", kind, NULL); ++ set_notify_key("CRM_notify_version", VERSION, NULL); ++ ++ pid = fork(); ++ if (pid == -1) { ++ crm_perror(LOG_ERR, "notification failed"); ++ } ++ ++ if (pid == 0) { ++ /* crm_debug("notification: I am the child. Executing the nofitication program."); */ ++ execl(notify_script, notify_script, NULL); ++ exit(EXIT_FAILURE); ++ ++ } else { ++ for(lpc = 0; lpc < DIMOF(notify_keys); lpc++) { ++ unsetenv(notify_keys[lpc]); ++ } ++ } ++} ++ ++void crmd_notify_node_event(crm_node_t *node) ++{ ++ if(notify_script == NULL) { ++ return; ++ } ++ ++ set_notify_key("CRM_notify_node", node->uname, NULL); ++ set_notify_key("CRM_notify_desc", node->state, NULL); ++ ++ send_notification("node"); ++} ++ ++void ++crmd_notify_fencing_op(stonith_event_t * e) ++{ ++ char *desc = NULL; ++ ++ if(notify_script) { ++ return; ++ } ++ ++ desc = crm_strdup_printf("Operation %s requested by %s for peer %s: %s (ref=%s)", ++ e->operation, e->origin, e->target, pcmk_strerror(e->result), ++ e->id); ++ ++ set_notify_key("CRM_notify_node", e->target, NULL); ++ set_notify_key("CRM_notify_task", e->operation, NULL); ++ set_notify_key("CRM_notify_desc", NULL, desc); ++ set_notify_key("CRM_notify_rc", NULL, crm_itoa(e->result)); ++ ++ send_notification("fencing"); ++} ++ ++void ++crmd_notify_resource_op(const char *node, lrmd_event_data_t * op) ++{ ++ int target_rc = 0; ++ ++ if(notify_script == NULL) { ++ return; ++ } ++ ++ target_rc = rsc_op_expected_rc(op); ++ if(op->interval == 0 && target_rc == op->rc && safe_str_eq(op->op_type, RSC_STATUS)) { ++ /* Leave it up to the script if they want to notify for ++ * 'failed' probes, only swallow ones for which the result was ++ * unexpected. ++ * ++ * Even if we find a resource running, it was probably because ++ * someone erased the status section. ++ */ ++ return; ++ } ++ ++ set_notify_key("CRM_notify_node", node, NULL); ++ ++ set_notify_key("CRM_notify_rsc", op->rsc_id, NULL); ++ set_notify_key("CRM_notify_task", op->op_type, NULL); ++ set_notify_key("CRM_notify_interval", NULL, crm_itoa(op->interval)); ++ ++ set_notify_key("CRM_notify_target_rc", NULL, crm_itoa(target_rc)); ++ set_notify_key("CRM_notify_status", NULL, crm_itoa(op->op_status)); ++ set_notify_key("CRM_notify_rc", NULL, crm_itoa(op->rc)); ++ ++ if(op->op_status == PCMK_LRM_OP_DONE) { ++ set_notify_key("CRM_notify_desc", services_ocf_exitcode_str(op->rc), NULL); ++ } else { ++ set_notify_key("CRM_notify_desc", services_lrm_status_str(op->op_status), NULL); ++ } ++ ++ send_notification("resource"); ++} ++ +diff --git a/crmd/notify.h b/crmd/notify.h +new file mode 100644 +index 0000000..4b138ea +--- /dev/null ++++ b/crmd/notify.h +@@ -0,0 +1,30 @@ ++/* ++ * Copyright (C) 2015 Andrew Beekhof ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public ++ * License as published by the Free Software Foundation; either ++ * version 2 of the License, or (at your option) any later version. ++ * ++ * This software is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public ++ * License along with this library; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++#ifndef CRMD_NOTIFY__H ++# define CRMD_NOTIFY__H ++ ++# include ++# include ++# include ++ ++void crmd_enable_notifications(const char *script, const char *target); ++void crmd_notify_node_event(crm_node_t *node); ++void crmd_notify_fencing_op(stonith_event_t * e); ++void crmd_notify_resource_op(const char *node, lrmd_event_data_t * op); ++ ++#endif +diff --git a/crmd/te_utils.c b/crmd/te_utils.c +index a1d29f6..22551ba 100644 +--- a/crmd/te_utils.c ++++ b/crmd/te_utils.c +@@ -124,6 +124,8 @@ tengine_stonith_notify(stonith_t * st, stonith_event_t * st_event) + return; + } + ++ crmd_notify_fencing_op(st_event); ++ + if (st_event->result == pcmk_ok && safe_str_eq("on", st_event->action)) { + crm_notice("%s was successfully unfenced by %s (at the request of %s)", + st_event->target, st_event->executioner ? st_event->executioner : "", st_event->origin); +diff --git a/cts/CIB.py b/cts/CIB.py +index 8fbba6c..cd3a6a1 100644 +--- a/cts/CIB.py ++++ b/cts/CIB.py +@@ -219,6 +219,8 @@ class CIB11(ConfigBase): + o["dc-deadtime"] = "5s" + o["no-quorum-policy"] = no_quorum + o["expected-quorum-votes"] = self.num_nodes ++ o["notification-script"] = "/var/lib/pacemaker/notify.sh" ++ o["notification-target"] = "/var/lib/pacemaker/notify.log" + + if self.CM.Env["DoBSC"] == 1: + o["ident-string"] = "Linux-HA TEST configuration file - REMOVEME!!" +diff --git a/extra/pcmk_notify_sample.sh b/extra/pcmk_notify_sample.sh +new file mode 100755 +index 0000000..83cf8e9 +--- /dev/null ++++ b/extra/pcmk_notify_sample.sh +@@ -0,0 +1,68 @@ ++#!/bin/bash ++# ++# Copyright (C) 2015 Andrew Beekhof ++# ++# This program is free software; you can redistribute it and/or ++# modify it under the terms of the GNU General Public ++# License as published by the Free Software Foundation; either ++# version 2 of the License, or (at your option) any later version. ++# ++# This software is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++# General Public License for more details. ++# ++# You should have received a copy of the GNU General Public ++# License along with this library; if not, write to the Free Software ++# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ ++if [ -z $CRM_notify_version ]; then ++ echo "Pacemaker version 1.1.14 is required" >> ${CRM_notify_recipient} ++ exit 0 ++fi ++ ++case $CRM_notify_kind in ++ node) ++ echo "Node '${CRM_notify_node}' is now '${CRM_notify_desc}'" >> ${CRM_notify_recipient} ++ ;; ++ fencing) ++ # Other keys: ++ # ++ # CRM_notify_node ++ # CRM_notify_task ++ # CRM_notify_rc ++ # ++ echo "Fencing ${CRM_notify_desc}" >> ${CRM_notify_recipient} ++ ;; ++ resource) ++ # Other keys: ++ # ++ # CRM_notify_target_rc ++ # CRM_notify_status ++ # CRM_notify_rc ++ # ++ if [ ${CRM_notify_interval} = "0" ]; then ++ CRM_notify_interval="" ++ else ++ CRM_notify_interval=" (${CRM_notify_interval})" ++ fi ++ ++ if [ ${CRM_notify_target_rc} = "0" ]; then ++ CRM_notify_target_rc="" ++ else ++ CRM_notify_target_rc=" (target: ${CRM_notify_target_rc})" ++ fi ++ ++ case ${CRM_notify_desc} in ++ Cancelled) ;; ++ *) ++ echo "Resource operation '${CRM_notify_task}${CRM_notify_interval}' for '${CRM_notify_rsc}' on '${CRM_notify_node}': ${CRM_notify_desc}${CRM_notify_target_rc}" >> ${CRM_notify_recipient} ++ ;; ++ esac ++ ;; ++ *) ++ echo "Unhandled $CRM_notify_kind notification" >> ${CRM_notify_recipient} ++ env | grep CRM_notify >> ${CRM_notify_recipient} ++ ;; ++ ++esac +diff --git a/include/crm_internal.h b/include/crm_internal.h +index c13bc7b..fb03537 100644 +--- a/include/crm_internal.h ++++ b/include/crm_internal.h +@@ -127,6 +127,7 @@ gboolean check_timer(const char *value); + gboolean check_boolean(const char *value); + gboolean check_number(const char *value); + gboolean check_quorum(const char *value); ++gboolean check_script(const char *value); + gboolean check_utilization(const char *value); + + /* Shared PE/crmd functionality */ +diff --git a/lib/common/utils.c b/lib/common/utils.c +index 6a234dc..628cf2f 100644 +--- a/lib/common/utils.c ++++ b/lib/common/utils.c +@@ -180,6 +180,33 @@ check_quorum(const char *value) + } + + gboolean ++check_script(const char *value) ++{ ++ struct stat st; ++ ++ if(safe_str_eq(value, "/dev/null")) { ++ return TRUE; ++ } ++ ++ if(stat(value, &st) != 0) { ++ crm_err("Script %s does not exist", value); ++ return FALSE; ++ } ++ ++ if(S_ISREG(st.st_mode) == 0) { ++ crm_err("Script %s is not a regular file", value); ++ return FALSE; ++ } ++ ++ if( (st.st_mode & (S_IXUSR | S_IXGRP )) == 0) { ++ crm_err("Script %s is not executable", value); ++ return FALSE; ++ } ++ ++ return TRUE; ++} ++ ++gboolean + check_utilization(const char *value) + { + char *end = NULL; diff --git a/0013-Fix-cman-Suppress-implied-node-names.patch b/0013-Fix-cman-Suppress-implied-node-names.patch new file mode 100644 index 0000000..eb14b0d --- /dev/null +++ b/0013-Fix-cman-Suppress-implied-node-names.patch @@ -0,0 +1,47 @@ +From: Andrew Beekhof +Date: Wed, 2 Sep 2015 12:08:52 +1000 +Subject: [PATCH] Fix: cman: Suppress implied node names + +(cherry picked from commit e94fbcd0c49db9d3c69b7c0e478ba89a4d360dde) +--- + tools/crm_node.c | 20 +++++++++++++++++++- + 1 file changed, 19 insertions(+), 1 deletion(-) + +diff --git a/tools/crm_node.c b/tools/crm_node.c +index d0195e3..24cc4d7 100644 +--- a/tools/crm_node.c ++++ b/tools/crm_node.c +@@ -434,6 +434,21 @@ try_heartbeat(int command, enum cluster_type_e stack) + #if SUPPORT_CMAN + # include + # define MAX_NODES 256 ++static bool valid_cman_name(const char *name, uint32_t nodeid) ++{ ++ bool rc = TRUE; ++ ++ /* Yes, %d, because that's what CMAN does */ ++ char *fakename = crm_strdup_printf("Node%d", nodeid); ++ ++ if(crm_str_eq(fakename, name, TRUE)) { ++ rc = FALSE; ++ crm_notice("Ignoring inferred name from cman: %s", fakename); ++ } ++ free(fakename); ++ return rc; ++} ++ + static gboolean + try_cman(int command, enum cluster_type_e stack) + { +@@ -478,7 +493,10 @@ try_cman(int command, enum cluster_type_e stack) + } + + for (lpc = 0; lpc < node_count; lpc++) { +- if (command == 'l') { ++ if(valid_cman_name(cman_nodes[lpc].cn_name, cman_nodes[lpc].cn_nodeid) == FALSE) { ++ /* Do not print */ ++ ++ } if (command == 'l') { + printf("%s ", cman_nodes[lpc].cn_name); + + } else if (cman_nodes[lpc].cn_nodeid != 0 && cman_nodes[lpc].cn_member) { diff --git a/0014-Fix-crmd-Choose-more-appropriate-names-for-notificat.patch b/0014-Fix-crmd-Choose-more-appropriate-names-for-notificat.patch new file mode 100644 index 0000000..2a12849 --- /dev/null +++ b/0014-Fix-crmd-Choose-more-appropriate-names-for-notificat.patch @@ -0,0 +1,58 @@ +From: Andrew Beekhof +Date: Wed, 2 Sep 2015 14:32:40 +1000 +Subject: [PATCH] Fix: crmd: Choose more appropriate names for notification + options + +(cherry picked from commit 8971ef024ffebf3d0240b30e620697a7b58232c4) +--- + crmd/control.c | 12 ++++++------ + cts/CIB.py | 4 ++-- + 2 files changed, 8 insertions(+), 8 deletions(-) + +diff --git a/crmd/control.c b/crmd/control.c +index d92f46b..d1f9acd 100644 +--- a/crmd/control.c ++++ b/crmd/control.c +@@ -893,12 +893,12 @@ pe_cluster_option crmd_opts[] = { + " To ensure these changes take effect, we can optionally poll the cluster's status for changes." + }, + +- { "notification-script", NULL, "string", NULL, "/dev/null", &check_script, +- "Notification script to be called after significant cluster events", +- "Full path to a script that will be invoked when resources start/stop/fail, fencing occurs or nodes join/leave the cluster.\n" ++ { "notification-agent", NULL, "string", NULL, "/dev/null", &check_script, ++ "Notification script or tool to be called after significant cluster events", ++ "Full path to a script or binary that will be invoked when resources start/stop/fail, fencing occurs or nodes join/leave the cluster.\n" + "Must exist on all nodes in the cluster." + }, +- { "notification-target", NULL, "string", NULL, "", NULL, ++ { "notification-recipient", NULL, "string", NULL, "", NULL, + "Destination for notifications (Optional)", + "Where should the supplied script send notifications to. Useful to avoid hard-coding this in the script." + }, +@@ -992,8 +992,8 @@ config_query_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void + + verify_crmd_options(config_hash); + +- script = crmd_pref(config_hash, "notification-script"); +- value = crmd_pref(config_hash, "notification-target"); ++ script = crmd_pref(config_hash, "notification-agent"); ++ value = crmd_pref(config_hash, "notification-recipient"); + crmd_enable_notifications(script, value); + + value = crmd_pref(config_hash, XML_CONFIG_ATTR_DC_DEADTIME); +diff --git a/cts/CIB.py b/cts/CIB.py +index cd3a6a1..0933ccd 100644 +--- a/cts/CIB.py ++++ b/cts/CIB.py +@@ -219,8 +219,8 @@ class CIB11(ConfigBase): + o["dc-deadtime"] = "5s" + o["no-quorum-policy"] = no_quorum + o["expected-quorum-votes"] = self.num_nodes +- o["notification-script"] = "/var/lib/pacemaker/notify.sh" +- o["notification-target"] = "/var/lib/pacemaker/notify.log" ++ o["notification-agent"] = "/var/lib/pacemaker/notify.sh" ++ o["notification-recipient"] = "/var/lib/pacemaker/notify.log" + + if self.CM.Env["DoBSC"] == 1: + o["ident-string"] = "Linux-HA TEST configuration file - REMOVEME!!" diff --git a/0015-Fix-crmd-Correctly-enable-disable-notifications.patch b/0015-Fix-crmd-Correctly-enable-disable-notifications.patch new file mode 100644 index 0000000..575f6ea --- /dev/null +++ b/0015-Fix-crmd-Correctly-enable-disable-notifications.patch @@ -0,0 +1,22 @@ +From: Andrew Beekhof +Date: Wed, 2 Sep 2015 14:48:17 +1000 +Subject: [PATCH] Fix: crmd: Correctly enable/disable notifications + +(cherry picked from commit 7368cf120cd5ee848d2bdcd788497a3b89616b05) +--- + crmd/notify.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/crmd/notify.c b/crmd/notify.c +index 980bfa6..ccf5ea8 100644 +--- a/crmd/notify.c ++++ b/crmd/notify.c +@@ -50,7 +50,7 @@ crmd_enable_notifications(const char *script, const char *target) + free(notify_target); + notify_target = NULL; + +- if(safe_str_eq(script, "/dev/null")) { ++ if(script == NULL || safe_str_eq(script, "/dev/null")) { + crm_notice("Notifications disabled"); + return; + } diff --git a/0016-Fix-crmd-Report-the-completion-status-and-output-of-.patch b/0016-Fix-crmd-Report-the-completion-status-and-output-of-.patch new file mode 100644 index 0000000..e7bc0e3 --- /dev/null +++ b/0016-Fix-crmd-Report-the-completion-status-and-output-of-.patch @@ -0,0 +1,109 @@ +From: Andrew Beekhof +Date: Wed, 2 Sep 2015 14:34:04 +1000 +Subject: [PATCH] Fix: crmd: Report the completion status and output of + notifications + +(cherry picked from commit 0c303d8a6f9f9a9dbec9f6d2e9e799fe335f8eaa) +--- + crmd/notify.c | 37 ++++++++++++++++++++++++------------- + lib/services/services.c | 4 ++-- + 2 files changed, 26 insertions(+), 15 deletions(-) + +diff --git a/crmd/notify.c b/crmd/notify.c +index ccf5ea8..ca2be0f 100644 +--- a/crmd/notify.c ++++ b/crmd/notify.c +@@ -29,6 +29,7 @@ static const char *notify_keys[] = + { + "CRM_notify_recipient", + "CRM_notify_node", ++ "CRM_notify_nodeid", + "CRM_notify_rsc", + "CRM_notify_task", + "CRM_notify_interval", +@@ -83,12 +84,21 @@ set_notify_key(const char *name, const char *cvalue, char *value) + free(value); + } + ++static void crmd_notify_complete(svc_action_t *op) ++{ ++ if(op->rc == 0) { ++ crm_info("Notification %d (%s) complete", op->sequence, op->agent); ++ } else { ++ crm_warn("Notification %d (%s) failed: %d", op->sequence, op->agent, op->rc); ++ } ++} + + static void + send_notification(const char *kind) + { + int lpc; +- pid_t pid; ++ svc_action_t *notify = NULL; ++ static int operations = 0; + + crm_debug("Sending '%s' notification to '%s' via '%s'", kind, notify_target, notify_script); + +@@ -96,20 +106,20 @@ send_notification(const char *kind) + set_notify_key("CRM_notify_kind", kind, NULL); + set_notify_key("CRM_notify_version", VERSION, NULL); + +- pid = fork(); +- if (pid == -1) { +- crm_perror(LOG_ERR, "notification failed"); +- } ++ notify = services_action_create_generic(notify_script, NULL); + +- if (pid == 0) { +- /* crm_debug("notification: I am the child. Executing the nofitication program."); */ +- execl(notify_script, notify_script, NULL); +- exit(EXIT_FAILURE); ++ notify->timeout = 300; ++ notify->standard = strdup("event"); ++ notify->id = strdup(notify_script); ++ notify->agent = strdup(notify_script); ++ notify->sequence = ++operations; + +- } else { +- for(lpc = 0; lpc < DIMOF(notify_keys); lpc++) { +- unsetenv(notify_keys[lpc]); +- } ++ if(services_action_async(notify, &crmd_notify_complete) == FALSE) { ++ services_action_free(notify); ++ } ++ ++ for(lpc = 0; lpc < DIMOF(notify_keys); lpc++) { ++ unsetenv(notify_keys[lpc]); + } + } + +@@ -120,6 +130,7 @@ void crmd_notify_node_event(crm_node_t *node) + } + + set_notify_key("CRM_notify_node", node->uname, NULL); ++ set_notify_key("CRM_notify_nodeid", NULL, crm_itoa(node->id)); + set_notify_key("CRM_notify_desc", node->state, NULL); + + send_notification("node"); +diff --git a/lib/services/services.c b/lib/services/services.c +index abf1458..4609a7d 100644 +--- a/lib/services/services.c ++++ b/lib/services/services.c +@@ -598,7 +598,7 @@ action_async_helper(svc_action_t * op) { + } + + /* keep track of ops that are in-flight to avoid collisions in the same namespace */ +- if (res) { ++ if (op->rsc && res) { + inflight_ops = g_list_append(inflight_ops, op); + } + +@@ -622,7 +622,7 @@ services_action_async(svc_action_t * op, void (*action_callback) (svc_action_t * + g_hash_table_replace(recurring_actions, op->id, op); + } + +- if (is_op_blocked(op->rsc)) { ++ if (op->rsc && is_op_blocked(op->rsc)) { + blocked_ops = g_list_append(blocked_ops, op); + return TRUE; + } diff --git a/0017-Fix-cman-Print-the-nodeid-of-nodes-with-fake-names.patch b/0017-Fix-cman-Print-the-nodeid-of-nodes-with-fake-names.patch new file mode 100644 index 0000000..b627349 --- /dev/null +++ b/0017-Fix-cman-Print-the-nodeid-of-nodes-with-fake-names.patch @@ -0,0 +1,23 @@ +From: Andrew Beekhof +Date: Thu, 3 Sep 2015 10:58:59 +1000 +Subject: [PATCH] Fix: cman: Print the nodeid of nodes with fake names + +(cherry picked from commit dd9a379408aa43b89c81d31ce7efa60b2e77f593) +--- + tools/crm_node.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/tools/crm_node.c b/tools/crm_node.c +index 24cc4d7..ed02ee7 100644 +--- a/tools/crm_node.c ++++ b/tools/crm_node.c +@@ -494,7 +494,8 @@ try_cman(int command, enum cluster_type_e stack) + + for (lpc = 0; lpc < node_count; lpc++) { + if(valid_cman_name(cman_nodes[lpc].cn_name, cman_nodes[lpc].cn_nodeid) == FALSE) { +- /* Do not print */ ++ /* The name was invented, but we need to print something, make it the id instead */ ++ printf("%u ", cman_nodes[lpc].cn_nodeid); + + } if (command == 'l') { + printf("%s ", cman_nodes[lpc].cn_name); diff --git a/0018-Refactor-Tools-Isolate-the-paths-which-truely-requir.patch b/0018-Refactor-Tools-Isolate-the-paths-which-truely-requir.patch new file mode 100644 index 0000000..2fbd35e --- /dev/null +++ b/0018-Refactor-Tools-Isolate-the-paths-which-truely-requir.patch @@ -0,0 +1,299 @@ +From: Andrew Beekhof +Date: Thu, 3 Sep 2015 11:36:21 +1000 +Subject: [PATCH] Refactor: Tools: Isolate the paths which truely require + corosync-2.x + +(cherry picked from commit 32c05b99f6a3e953668dcda71ce24e03927d83cb) +--- + tools/crm_node.c | 243 +++++++++++++++++++++++++++++++------------------------ + 1 file changed, 139 insertions(+), 104 deletions(-) + +diff --git a/tools/crm_node.c b/tools/crm_node.c +index ed02ee7..308d4f9 100644 +--- a/tools/crm_node.c ++++ b/tools/crm_node.c +@@ -60,6 +60,9 @@ static struct crm_option long_options[] = { + #if SUPPORT_COROSYNC + {"openais", 0, 0, 'A', "\tOnly try connecting to an OpenAIS-based cluster"}, + #endif ++#ifdef SUPPORT_CS_QUORUM ++ {"corosync", 0, 0, 'C', "\tOnly try connecting to an Corosync-based cluster"}, ++#endif + #ifdef SUPPORT_HEARTBEAT + {"heartbeat", 0, 0, 'H', "Only try connecting to a Heartbeat-based cluster"}, + #endif +@@ -223,6 +226,138 @@ int tools_remove_node_cache(const char *node, const char *target) + return rc > 0 ? 0 : rc; + } + ++static gint ++compare_node_uname(gconstpointer a, gconstpointer b) ++{ ++ const crm_node_t *a_node = a; ++ const crm_node_t *b_node = b; ++ return strcmp(a_node->uname?a_node->uname:"", b_node->uname?b_node->uname:""); ++} ++ ++static int ++node_mcp_dispatch(const char *buffer, ssize_t length, gpointer userdata) ++{ ++ xmlNode *msg = string2xml(buffer); ++ ++ if (msg) { ++ xmlNode *node = NULL; ++ GListPtr nodes = NULL; ++ GListPtr iter = NULL; ++ ++ crm_log_xml_trace(msg, "message"); ++ ++ for (node = __xml_first_child(msg); node != NULL; node = __xml_next(node)) { ++ crm_node_t *peer = calloc(1, sizeof(crm_node_t)); ++ ++ nodes = g_list_insert_sorted(nodes, peer, compare_node_uname); ++ peer->uname = (char*)crm_element_value_copy(node, "uname"); ++ peer->state = (char*)crm_element_value_copy(node, "state"); ++ crm_element_value_int(node, "id", (int*)&peer->id); ++ } ++ ++ for(iter = nodes; iter; iter = iter->next) { ++ crm_node_t *peer = iter->data; ++ if (command == 'l') { ++ fprintf(stdout, "%u %s %s\n", peer->id, peer->uname, peer->state); ++ ++ } else if (command == 'p') { ++ if(safe_str_eq(peer->state, CRM_NODE_MEMBER)) { ++ fprintf(stdout, "%s ", peer->uname); ++ } ++ ++ } else if (command == 'i') { ++ if(safe_str_eq(peer->state, CRM_NODE_MEMBER)) { ++ fprintf(stdout, "%u ", peer->id); ++ } ++ } ++ } ++ ++ g_list_free_full(nodes, free); ++ free_xml(msg); ++ ++ if (command == 'p') { ++ fprintf(stdout, "\n"); ++ } ++ ++ crm_exit(pcmk_ok); ++ } ++ ++ return 0; ++} ++ ++static void ++node_mcp_destroy(gpointer user_data) ++{ ++ crm_exit(ENOTCONN); ++} ++ ++static gboolean ++try_pacemaker(int command, enum cluster_type_e stack) ++{ ++ struct ipc_client_callbacks node_callbacks = { ++ .dispatch = node_mcp_dispatch, ++ .destroy = node_mcp_destroy ++ }; ++ ++ if (stack == pcmk_cluster_heartbeat) { ++ /* Nothing to do for them */ ++ return FALSE; ++ } ++ ++ switch (command) { ++ case 'e': ++ /* Age only applies to heartbeat clusters */ ++ fprintf(stdout, "1\n"); ++ crm_exit(pcmk_ok); ++ ++ case 'q': ++ /* Implement one day? ++ * Wouldn't be much for pacemakerd to track it and include in the poke reply ++ */ ++ return FALSE; ++ ++ case 'R': ++ { ++ int lpc = 0; ++ const char *daemons[] = { ++ CRM_SYSTEM_CRMD, ++ "stonith-ng", ++ T_ATTRD, ++ CRM_SYSTEM_MCP, ++ }; ++ ++ for(lpc = 0; lpc < DIMOF(daemons); lpc++) { ++ if (tools_remove_node_cache(target_uname, daemons[lpc])) { ++ crm_err("Failed to connect to %s to remove node '%s'", daemons[lpc], target_uname); ++ crm_exit(pcmk_err_generic); ++ } ++ } ++ crm_exit(pcmk_ok); ++ } ++ break; ++ ++ case 'i': ++ case 'l': ++ case 'p': ++ /* Go to pacemakerd */ ++ { ++ GMainLoop *amainloop = g_main_new(FALSE); ++ mainloop_io_t *ipc = ++ mainloop_add_ipc_client(CRM_SYSTEM_MCP, G_PRIORITY_DEFAULT, 0, NULL, &node_callbacks); ++ if (ipc != NULL) { ++ /* Sending anything will get us a list of nodes */ ++ xmlNode *poke = create_xml_node(NULL, "poke"); ++ ++ crm_ipc_send(mainloop_get_ipc_client(ipc), poke, 0, 0, NULL); ++ free_xml(poke); ++ g_main_run(amainloop); ++ } ++ } ++ break; ++ } ++ return FALSE; ++} ++ + #if SUPPORT_HEARTBEAT + # include + # include +@@ -626,66 +761,6 @@ ais_membership_dispatch(cpg_handle_t handle, + # include + # include + +-static gint +-compare_node_uname(gconstpointer a, gconstpointer b) +-{ +- const crm_node_t *a_node = a; +- const crm_node_t *b_node = b; +- return strcmp(a_node->uname?a_node->uname:"", b_node->uname?b_node->uname:""); +-} +- +-static int +-node_mcp_dispatch(const char *buffer, ssize_t length, gpointer userdata) +-{ +- xmlNode *msg = string2xml(buffer); +- +- if (msg) { +- xmlNode *node = NULL; +- GListPtr nodes = NULL; +- GListPtr iter = NULL; +- +- crm_log_xml_trace(msg, "message"); +- +- for (node = __xml_first_child(msg); node != NULL; node = __xml_next(node)) { +- crm_node_t *peer = calloc(1, sizeof(crm_node_t)); +- +- nodes = g_list_insert_sorted(nodes, peer, compare_node_uname); +- peer->uname = (char*)crm_element_value_copy(node, "uname"); +- peer->state = (char*)crm_element_value_copy(node, "state"); +- crm_element_value_int(node, "id", (int*)&peer->id); +- } +- +- for(iter = nodes; iter; iter = iter->next) { +- crm_node_t *peer = iter->data; +- if (command == 'l') { +- fprintf(stdout, "%u %s\n", peer->id, peer->uname); +- +- } else if (command == 'p') { +- if(safe_str_eq(peer->state, CRM_NODE_MEMBER)) { +- fprintf(stdout, "%s ", peer->uname); +- } +- } +- } +- +- g_list_free_full(nodes, free); +- free_xml(msg); +- +- if (command == 'p') { +- fprintf(stdout, "\n"); +- } +- +- crm_exit(pcmk_ok); +- } +- +- return 0; +-} +- +-static void +-node_mcp_destroy(gpointer user_data) +-{ +- crm_exit(ENOTCONN); +-} +- + static gboolean + try_corosync(int command, enum cluster_type_e stack) + { +@@ -696,36 +771,7 @@ try_corosync(int command, enum cluster_type_e stack) + cpg_handle_t c_handle = 0; + quorum_handle_t q_handle = 0; + +- mainloop_io_t *ipc = NULL; +- GMainLoop *amainloop = NULL; +- const char *daemons[] = { +- CRM_SYSTEM_CRMD, +- "stonith-ng", +- T_ATTRD, +- CRM_SYSTEM_MCP, +- }; +- +- struct ipc_client_callbacks node_callbacks = { +- .dispatch = node_mcp_dispatch, +- .destroy = node_mcp_destroy +- }; +- + switch (command) { +- case 'R': +- for(rc = 0; rc < DIMOF(daemons); rc++) { +- if (tools_remove_node_cache(target_uname, daemons[rc])) { +- crm_err("Failed to connect to %s to remove node '%s'", daemons[rc], target_uname); +- crm_exit(pcmk_err_generic); +- } +- } +- crm_exit(pcmk_ok); +- break; +- +- case 'e': +- /* Age makes no sense (yet) in an AIS cluster */ +- fprintf(stdout, "1\n"); +- crm_exit(pcmk_ok); +- + case 'q': + /* Go direct to the Quorum API */ + rc = quorum_initialize(&q_handle, NULL, &quorum_type); +@@ -766,21 +812,8 @@ try_corosync(int command, enum cluster_type_e stack) + cpg_finalize(c_handle); + crm_exit(pcmk_ok); + +- case 'l': +- case 'p': +- /* Go to pacemakerd */ +- amainloop = g_main_new(FALSE); +- ipc = +- mainloop_add_ipc_client(CRM_SYSTEM_MCP, G_PRIORITY_DEFAULT, 0, NULL, +- &node_callbacks); +- if (ipc != NULL) { +- /* Sending anything will get us a list of nodes */ +- xmlNode *poke = create_xml_node(NULL, "poke"); +- +- crm_ipc_send(mainloop_get_ipc_client(ipc), poke, 0, 0, NULL); +- free_xml(poke); +- g_main_run(amainloop); +- } ++ default: ++ try_pacemaker(command, stack); + break; + } + return FALSE; +@@ -963,5 +996,7 @@ main(int argc, char **argv) + } + #endif + ++ try_pacemaker(command, try_stack); ++ + return (1); + } diff --git a/0019-Fix-corosync-Display-node-state-and-quorum-data-if-a.patch b/0019-Fix-corosync-Display-node-state-and-quorum-data-if-a.patch new file mode 100644 index 0000000..b7822e3 --- /dev/null +++ b/0019-Fix-corosync-Display-node-state-and-quorum-data-if-a.patch @@ -0,0 +1,94 @@ +From: Andrew Beekhof +Date: Thu, 3 Sep 2015 12:27:59 +1000 +Subject: [PATCH] Fix: corosync: Display node state and quorum data if + available + +(cherry picked from commit 4d4c92e515bbaf74917a311e19d5995b30c29430) +--- + mcp/pacemaker.c | 7 +++++++ + tools/crm_node.c | 17 ++++++++++------- + 2 files changed, 17 insertions(+), 7 deletions(-) + +diff --git a/mcp/pacemaker.c b/mcp/pacemaker.c +index f9fc015..9c3195e 100644 +--- a/mcp/pacemaker.c ++++ b/mcp/pacemaker.c +@@ -35,6 +35,8 @@ + + #include + #include ++ ++gboolean pcmk_quorate = FALSE; + gboolean fatal_error = FALSE; + GMainLoop *mainloop = NULL; + +@@ -560,6 +562,10 @@ update_process_clients(crm_client_t *client) + crm_node_t *node = NULL; + xmlNode *update = create_xml_node(NULL, "nodes"); + ++ if (is_corosync_cluster()) { ++ crm_xml_add_int(update, "quorate", pcmk_quorate); ++ } ++ + g_hash_table_iter_init(&iter, crm_peer_cache); + while (g_hash_table_iter_next(&iter, NULL, (gpointer *) & node)) { + xmlNode *xml = create_xml_node(update, "node"); +@@ -896,6 +902,7 @@ static gboolean + mcp_quorum_callback(unsigned long long seq, gboolean quorate) + { + /* Nothing to do */ ++ pcmk_quorate = quorate; + return TRUE; + } + +diff --git a/tools/crm_node.c b/tools/crm_node.c +index 308d4f9..9626120 100644 +--- a/tools/crm_node.c ++++ b/tools/crm_node.c +@@ -243,8 +243,16 @@ node_mcp_dispatch(const char *buffer, ssize_t length, gpointer userdata) + xmlNode *node = NULL; + GListPtr nodes = NULL; + GListPtr iter = NULL; ++ const char *quorate = crm_element_value(msg, "quorate"); + + crm_log_xml_trace(msg, "message"); ++ if (command == 'q' && quorate != NULL) { ++ fprintf(stdout, "%s\n", quorate); ++ crm_exit(pcmk_ok); ++ ++ } else if(command == 'q') { ++ crm_exit(1); ++ } + + for (node = __xml_first_child(msg); node != NULL; node = __xml_next(node)) { + crm_node_t *peer = calloc(1, sizeof(crm_node_t)); +@@ -258,7 +266,7 @@ node_mcp_dispatch(const char *buffer, ssize_t length, gpointer userdata) + for(iter = nodes; iter; iter = iter->next) { + crm_node_t *peer = iter->data; + if (command == 'l') { +- fprintf(stdout, "%u %s %s\n", peer->id, peer->uname, peer->state); ++ fprintf(stdout, "%u %s %s\n", peer->id, peer->uname, peer->state?peer->state:""); + + } else if (command == 'p') { + if(safe_str_eq(peer->state, CRM_NODE_MEMBER)) { +@@ -310,12 +318,6 @@ try_pacemaker(int command, enum cluster_type_e stack) + fprintf(stdout, "1\n"); + crm_exit(pcmk_ok); + +- case 'q': +- /* Implement one day? +- * Wouldn't be much for pacemakerd to track it and include in the poke reply +- */ +- return FALSE; +- + case 'R': + { + int lpc = 0; +@@ -338,6 +340,7 @@ try_pacemaker(int command, enum cluster_type_e stack) + + case 'i': + case 'l': ++ case 'q': + case 'p': + /* Go to pacemakerd */ + { diff --git a/0020-Fix-pacemakerd-Do-not-forget-about-nodes-that-leave-.patch b/0020-Fix-pacemakerd-Do-not-forget-about-nodes-that-leave-.patch new file mode 100644 index 0000000..e2da8a5 --- /dev/null +++ b/0020-Fix-pacemakerd-Do-not-forget-about-nodes-that-leave-.patch @@ -0,0 +1,23 @@ +From: Andrew Beekhof +Date: Thu, 3 Sep 2015 13:27:57 +1000 +Subject: [PATCH] Fix: pacemakerd: Do not forget about nodes that leave the + cluster + +(cherry picked from commit 2ac396ae6f54c9437bcf786eeccf94d4e2fdd77a) +--- + mcp/pacemaker.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/mcp/pacemaker.c b/mcp/pacemaker.c +index 9c3195e..88a6a1f 100644 +--- a/mcp/pacemaker.c ++++ b/mcp/pacemaker.c +@@ -1108,6 +1108,8 @@ main(int argc, char **argv) + cluster.cpg.cpg_deliver_fn = mcp_cpg_deliver; + cluster.cpg.cpg_confchg_fn = mcp_cpg_membership; + ++ crm_set_autoreap(FALSE); ++ + if(cluster_connect_cpg(&cluster) == FALSE) { + crm_err("Couldn't connect to Corosync's CPG service"); + rc = -ENOPROTOOPT; diff --git a/0021-Fix-pacemakerd-Track-node-state-in-pacemakerd.patch b/0021-Fix-pacemakerd-Track-node-state-in-pacemakerd.patch new file mode 100644 index 0000000..b2814a8 --- /dev/null +++ b/0021-Fix-pacemakerd-Track-node-state-in-pacemakerd.patch @@ -0,0 +1,58 @@ +From: Andrew Beekhof +Date: Thu, 3 Sep 2015 14:29:27 +1000 +Subject: [PATCH] Fix: pacemakerd: Track node state in pacemakerd + +(cherry picked from commit c186f54241c49bf20b1620767933b006063d613c) +--- + mcp/pacemaker.c | 22 +++++++++++++++++++++- + 1 file changed, 21 insertions(+), 1 deletion(-) + +diff --git a/mcp/pacemaker.c b/mcp/pacemaker.c +index 88a6a1f..9f00a21 100644 +--- a/mcp/pacemaker.c ++++ b/mcp/pacemaker.c +@@ -901,7 +901,6 @@ mcp_cpg_membership(cpg_handle_t handle, + static gboolean + mcp_quorum_callback(unsigned long long seq, gboolean quorate) + { +- /* Nothing to do */ + pcmk_quorate = quorate; + return TRUE; + } +@@ -909,8 +908,23 @@ mcp_quorum_callback(unsigned long long seq, gboolean quorate) + static void + mcp_quorum_destroy(gpointer user_data) + { ++ crm_info("connection lost"); ++} ++ ++#if SUPPORT_CMAN ++static gboolean ++mcp_cman_dispatch(unsigned long long seq, gboolean quorate) ++{ ++ pcmk_quorate = quorate; ++ return TRUE; ++} ++ ++static void ++mcp_cman_destroy(gpointer user_data) ++{ + crm_info("connection closed"); + } ++#endif + + int + main(int argc, char **argv) +@@ -1122,6 +1136,12 @@ main(int argc, char **argv) + } + } + ++#if SUPPORT_CMAN ++ if (rc == pcmk_ok && is_cman_cluster()) { ++ init_cman_connection(mcp_cman_dispatch, mcp_cman_destroy); ++ } ++#endif ++ + if(rc == pcmk_ok) { + local_name = get_local_node_name(); + update_node_processes(local_nodeid, local_name, get_process_list()); diff --git a/0022-Fix-PE-Resolve-memory-leak.patch b/0022-Fix-PE-Resolve-memory-leak.patch new file mode 100644 index 0000000..e7cd5b1 --- /dev/null +++ b/0022-Fix-PE-Resolve-memory-leak.patch @@ -0,0 +1,27 @@ +From: Andrew Beekhof +Date: Tue, 8 Sep 2015 12:02:54 +1000 +Subject: [PATCH] Fix: PE: Resolve memory leak + +(cherry picked from commit 4f48a79fd19be0e614716f0900e31985d4714ace) +--- + lib/pengine/unpack.c | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c +index 156a192..c4f3134 100644 +--- a/lib/pengine/unpack.c ++++ b/lib/pengine/unpack.c +@@ -276,9 +276,13 @@ destroy_digest_cache(gpointer ptr) + op_digest_cache_t *data = ptr; + + free_xml(data->params_all); ++ free_xml(data->params_secure); + free_xml(data->params_restart); ++ + free(data->digest_all_calc); + free(data->digest_restart_calc); ++ free(data->digest_secure_calc); ++ + free(data); + } + diff --git a/0023-Fix-cman-Purge-all-node-caches-for-crm_node-R.patch b/0023-Fix-cman-Purge-all-node-caches-for-crm_node-R.patch new file mode 100644 index 0000000..5ff7c08 --- /dev/null +++ b/0023-Fix-cman-Purge-all-node-caches-for-crm_node-R.patch @@ -0,0 +1,24 @@ +From: Andrew Beekhof +Date: Tue, 8 Sep 2015 12:03:56 +1000 +Subject: [PATCH] Fix: cman: Purge all node caches for crm_node -R + +(cherry picked from commit c445e135b6d52b1a5f3cfdacfa54a63b313c00d2) +--- + tools/crm_node.c | 4 +--- + 1 file changed, 1 insertion(+), 3 deletions(-) + +diff --git a/tools/crm_node.c b/tools/crm_node.c +index 9626120..48ee7c4 100644 +--- a/tools/crm_node.c ++++ b/tools/crm_node.c +@@ -607,9 +607,7 @@ try_cman(int command, enum cluster_type_e stack) + + switch (command) { + case 'R': +- if (tools_remove_node_cache(target_uname, CRM_SYSTEM_CRMD)) { +- crm_err("Failed to connect to "CRM_SYSTEM_CRMD" to remove node '%s'", target_uname); +- } ++ try_pacemaker(command, stack); + break; + + case 'e': diff --git a/0024-Refactor-membership-Safely-autoreap-nodes-without-co.patch b/0024-Refactor-membership-Safely-autoreap-nodes-without-co.patch new file mode 100644 index 0000000..35617cc --- /dev/null +++ b/0024-Refactor-membership-Safely-autoreap-nodes-without-co.patch @@ -0,0 +1,92 @@ +From: Andrew Beekhof +Date: Tue, 8 Sep 2015 12:05:04 +1000 +Subject: [PATCH] Refactor: membership: Safely autoreap nodes without code + duplication + +(cherry picked from commit acd660a1bdf40ada599041cb14d2128632d2e7a5) +--- + lib/cluster/membership.c | 43 +++++++++++++++++++++---------------------- + 1 file changed, 21 insertions(+), 22 deletions(-) + +diff --git a/lib/cluster/membership.c b/lib/cluster/membership.c +index b7958eb..3081e54 100644 +--- a/lib/cluster/membership.c ++++ b/lib/cluster/membership.c +@@ -795,8 +795,8 @@ crm_update_peer_expected(const char *source, crm_node_t * node, const char *expe + * called within a cache iteration if reaping is possible, + * otherwise reaping could invalidate the iterator. + */ +-crm_node_t * +-crm_update_peer_state(const char *source, crm_node_t * node, const char *state, int membership) ++static crm_node_t * ++crm_update_peer_state_iter(const char *source, crm_node_t * node, const char *state, int membership, GHashTableIter *iter) + { + gboolean is_member; + +@@ -822,13 +822,19 @@ crm_update_peer_state(const char *source, crm_node_t * node, const char *state, + free(last); + + if (!is_member && crm_autoreap) { +- if (status_type == crm_status_rstate) { ++ if(iter) { ++ crm_notice("Purged 1 peer with id=%u and/or uname=%s from the membership cache", node->id, node->uname); ++ g_hash_table_iter_remove(iter); ++ ++ } else if (status_type == crm_status_rstate) { + crm_remote_peer_cache_remove(node->uname); ++ + } else { + reap_crm_member(node->id, node->uname); + } + node = NULL; + } ++ + } else { + crm_trace("%s: Node %s[%u] - state is unchanged (%s)", source, node->uname, node->id, + state); +@@ -836,6 +842,12 @@ crm_update_peer_state(const char *source, crm_node_t * node, const char *state, + return node; + } + ++crm_node_t * ++crm_update_peer_state(const char *source, crm_node_t * node, const char *state, int membership) ++{ ++ return crm_update_peer_state_iter(source, node, state, membership, NULL); ++} ++ + /*! + * \internal + * \brief Reap all nodes from cache whose membership information does not match +@@ -853,26 +865,13 @@ crm_reap_unseen_nodes(uint64_t membership) + while (g_hash_table_iter_next(&iter, NULL, (gpointer *)&node)) { + if (node->last_seen != membership) { + if (node->state) { +- /* crm_update_peer_state() cannot be called here, because that +- * might modify the peer cache, invalidating our iterator ++ /* ++ * Calling crm_update_peer_state_iter() allows us to ++ * remove the node from crm_peer_cache without ++ * invalidating our iterator + */ +- if (safe_str_eq(node->state, CRM_NODE_LOST)) { +- crm_trace("Node %s[%u] - state is unchanged (%s)", +- node->uname, node->id, CRM_NODE_LOST); +- } else { +- char *last = node->state; +- +- node->state = strdup(CRM_NODE_LOST); +- crm_notice("Node %s[%u] - state is now %s (was %s)", +- node->uname, node->id, CRM_NODE_LOST, last); +- if (crm_status_callback) { +- crm_status_callback(crm_status_nstate, node, last); +- } +- if (crm_autoreap) { +- g_hash_table_iter_remove(&iter); +- } +- free(last); +- } ++ crm_update_peer_state_iter(__FUNCTION__, node, CRM_NODE_LOST, membership, &iter); ++ + } else { + crm_info("State of node %s[%u] is still unknown", + node->uname, node->id); diff --git a/0025-Fix-crmd-Prevent-segfault-by-correctly-detecting-whe.patch b/0025-Fix-crmd-Prevent-segfault-by-correctly-detecting-whe.patch new file mode 100644 index 0000000..a1797e9 --- /dev/null +++ b/0025-Fix-crmd-Prevent-segfault-by-correctly-detecting-whe.patch @@ -0,0 +1,23 @@ +From: Andrew Beekhof +Date: Wed, 9 Sep 2015 14:46:49 +1000 +Subject: [PATCH] Fix: crmd: Prevent segfault by correctly detecting when + notifications are not required + +(cherry picked from commit 5eb9f93ef666c75e5f32827a92b0a57ada063803) +--- + crmd/notify.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/crmd/notify.c b/crmd/notify.c +index ca2be0f..179af18 100644 +--- a/crmd/notify.c ++++ b/crmd/notify.c +@@ -141,7 +141,7 @@ crmd_notify_fencing_op(stonith_event_t * e) + { + char *desc = NULL; + +- if(notify_script) { ++ if(notify_script == NULL) { + return; + } + diff --git a/0026-Fix-crmd-don-t-add-node-ID-to-proxied-remote-node-re.patch b/0026-Fix-crmd-don-t-add-node-ID-to-proxied-remote-node-re.patch new file mode 100644 index 0000000..ba29678 --- /dev/null +++ b/0026-Fix-crmd-don-t-add-node-ID-to-proxied-remote-node-re.patch @@ -0,0 +1,29 @@ +From: Ken Gaillot +Date: Thu, 27 Aug 2015 11:00:02 -0500 +Subject: [PATCH] Fix: crmd: don't add node ID to proxied remote node requests + for attrd + +446a1005 incorrectly set F_ATTRD_HOST_ID for proxied remote node requests to +attrd. Since attrd only uses F_ATTRD_HOST_ID to associate a cluster node name +with an ID, it doesn't ever need to be set for remote nodes. + +Additionally, that revision used the proxying cluster node's node ID, which can +lead to node ID conflicts in attrd. + +(cherry picked from commit 6af6da534646dbadf3d8d1d63d0edb2844c72073) +--- + crmd/lrm_state.c | 1 - + 1 file changed, 1 deletion(-) + +diff --git a/crmd/lrm_state.c b/crmd/lrm_state.c +index c03fa0b..bea1027 100644 +--- a/crmd/lrm_state.c ++++ b/crmd/lrm_state.c +@@ -540,7 +540,6 @@ remote_proxy_cb(lrmd_t *lrmd, void *userdata, xmlNode *msg) + if (safe_str_eq(type, T_ATTRD) + && crm_element_value(request, F_ATTRD_HOST) == NULL) { + crm_xml_add(request, F_ATTRD_HOST, proxy->node_name); +- crm_xml_add_int(request, F_ATTRD_HOST_ID, get_local_nodeid(0)); + } + + rc = crm_ipc_send(proxy->ipc, request, flags, 5000, NULL); diff --git a/0027-Fix-pacemaker_remote-memory-leak-in-ipc_proxy_dispat.patch b/0027-Fix-pacemaker_remote-memory-leak-in-ipc_proxy_dispat.patch new file mode 100644 index 0000000..9dad48e --- /dev/null +++ b/0027-Fix-pacemaker_remote-memory-leak-in-ipc_proxy_dispat.patch @@ -0,0 +1,35 @@ +From: Ken Gaillot +Date: Mon, 14 Sep 2015 15:00:13 -0500 +Subject: [PATCH] Fix: pacemaker_remote: memory leak in ipc_proxy_dispatch() + +Detected via routine valgrind testing + +(cherry picked from commit 3bb439d1554cb5567b886c52107bd3bb6f27b696) +--- + lrmd/ipc_proxy.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +diff --git a/lrmd/ipc_proxy.c b/lrmd/ipc_proxy.c +index 9427393..2a5ad78 100644 +--- a/lrmd/ipc_proxy.c ++++ b/lrmd/ipc_proxy.c +@@ -223,9 +223,9 @@ ipc_proxy_dispatch(qb_ipcs_connection_t * c, void *data, size_t size) + } + + CRM_CHECK(client != NULL, crm_err("Invalid client"); +- return FALSE); ++ free_xml(request); return FALSE); + CRM_CHECK(client->id != NULL, crm_err("Invalid client: %p", client); +- return FALSE); ++ free_xml(request); return FALSE); + + /* this ensures that synced request/responses happen over the event channel + * in the crmd, allowing the crmd to process the messages async */ +@@ -241,6 +241,7 @@ ipc_proxy_dispatch(qb_ipcs_connection_t * c, void *data, size_t size) + crm_xml_add_int(msg, F_LRMD_IPC_MSG_FLAGS, flags); + add_message_xml(msg, F_LRMD_IPC_MSG, request); + lrmd_server_send_notify(ipc_proxy, msg); ++ free_xml(request); + free_xml(msg); + + return 0; diff --git a/0028-Log-The-package-version-is-more-informative.patch b/0028-Log-The-package-version-is-more-informative.patch new file mode 100644 index 0000000..543d9ab --- /dev/null +++ b/0028-Log-The-package-version-is-more-informative.patch @@ -0,0 +1,115 @@ +From: Andrew Beekhof +Date: Wed, 16 Sep 2015 09:14:39 +1000 +Subject: [PATCH] Log: The package version is more informative + +(cherry picked from commit 2b4d195e9e94777fc1953832fcce3637ffa2f449) +--- + crmd/cib.c | 2 +- + crmd/election.c | 2 +- + crmd/main.c | 5 ++--- + lib/ais/plugin.c | 2 +- + lib/common/utils.c | 4 ++-- + mcp/pacemaker.c | 4 ++-- + 6 files changed, 9 insertions(+), 10 deletions(-) + +diff --git a/crmd/cib.c b/crmd/cib.c +index 7ec5eda..41e9efb 100644 +--- a/crmd/cib.c ++++ b/crmd/cib.c +@@ -113,7 +113,7 @@ revision_check_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, vo + cmp = compare_version(revision, CRM_FEATURE_SET); + + if (cmp > 0) { +- crm_err("This build (%s) does not support the current resource configuration", VERSION); ++ crm_err("This build (%s) does not support the current resource configuration", PACEMAKER_VERSION); + crm_err("We can only support up to CRM feature set %s (current=%s)", + CRM_FEATURE_SET, revision); + crm_err("Shutting down the CRM"); +diff --git a/crmd/election.c b/crmd/election.c +index b542a66..adab4e3 100644 +--- a/crmd/election.c ++++ b/crmd/election.c +@@ -215,7 +215,7 @@ do_dc_takeover(long long action, + } + + update_attr_delegate(fsa_cib_conn, cib_none, XML_CIB_TAG_CRMCONFIG, NULL, NULL, NULL, NULL, +- "dc-version", VERSION "-" BUILD_VERSION, FALSE, NULL, NULL); ++ "dc-version", PACEMAKER_VERSION "-" BUILD_VERSION, FALSE, NULL, NULL); + + update_attr_delegate(fsa_cib_conn, cib_none, XML_CIB_TAG_CRMCONFIG, NULL, NULL, NULL, NULL, + "cluster-infrastructure", cluster_type, FALSE, NULL, NULL); +diff --git a/crmd/main.c b/crmd/main.c +index e9a69b4..75ed91c 100644 +--- a/crmd/main.c ++++ b/crmd/main.c +@@ -89,13 +89,12 @@ main(int argc, char **argv) + crmd_metadata(); + return 0; + } else if (argc - optind == 1 && safe_str_eq("version", argv[optind])) { +- fprintf(stdout, "CRM Version: "); +- fprintf(stdout, "%s (%s)\n", VERSION, BUILD_VERSION); ++ fprintf(stdout, "CRM Version: %s (%s)\n", PACEMAKER_VERSION, BUILD_VERSION); + return 0; + } + + crm_log_init(NULL, LOG_INFO, TRUE, FALSE, argc, argv, FALSE); +- crm_notice("CRM Git Version: %s\n", BUILD_VERSION); ++ crm_notice("CRM Git Version: %s (%s)\n", PACEMAKER_VERSION, BUILD_VERSION); + + if (optind > argc) { + ++argerr; +diff --git a/lib/ais/plugin.c b/lib/ais/plugin.c +index ab534fa..cf2a131 100644 +--- a/lib/ais/plugin.c ++++ b/lib/ais/plugin.c +@@ -201,7 +201,7 @@ static struct corosync_exec_handler pcmk_exec_service[] = { + */ + /* *INDENT-OFF* */ + struct corosync_service_engine pcmk_service_handler = { +- .name = (char *)"Pacemaker Cluster Manager "PACKAGE_VERSION, ++ .name = (char *)"Pacemaker Cluster Manager "PACEMAKER_VERSION, + .id = PCMK_SERVICE_ID, + .private_data_size = 0, + .flow_control = COROSYNC_LIB_FLOW_CONTROL_NOT_REQUIRED, +diff --git a/lib/common/utils.c b/lib/common/utils.c +index 628cf2f..2364f5c 100644 +--- a/lib/common/utils.c ++++ b/lib/common/utils.c +@@ -1603,13 +1603,13 @@ crm_help(char cmd, int exit_code) + FILE *stream = (exit_code ? stderr : stdout); + + if (cmd == 'v' || cmd == '$') { +- fprintf(stream, "Pacemaker %s\n", VERSION); ++ fprintf(stream, "Pacemaker %s\n", PACEMAKER_VERSION); + fprintf(stream, "Written by Andrew Beekhof\n"); + goto out; + } + + if (cmd == '!') { +- fprintf(stream, "Pacemaker %s (Build: %s): %s\n", VERSION, BUILD_VERSION, CRM_FEATURES); ++ fprintf(stream, "Pacemaker %s (Build: %s): %s\n", PACEMAKER_VERSION, BUILD_VERSION, CRM_FEATURES); + goto out; + } + +diff --git a/mcp/pacemaker.c b/mcp/pacemaker.c +index 9f00a21..910d154 100644 +--- a/mcp/pacemaker.c ++++ b/mcp/pacemaker.c +@@ -972,7 +972,7 @@ main(int argc, char **argv) + shutdown = TRUE; + break; + case 'F': +- printf("Pacemaker %s (Build: %s)\n Supporting v%s: %s\n", VERSION, BUILD_VERSION, ++ printf("Pacemaker %s (Build: %s)\n Supporting v%s: %s\n", PACEMAKER_VERSION, BUILD_VERSION, + CRM_FEATURE_SET, CRM_FEATURES); + crm_exit(pcmk_ok); + default: +@@ -1039,7 +1039,7 @@ main(int argc, char **argv) + crm_exit(ENODATA); + } + +- crm_notice("Starting Pacemaker %s (Build: %s): %s", VERSION, BUILD_VERSION, CRM_FEATURES); ++ crm_notice("Starting Pacemaker %s (Build: %s): %s", PACEMAKER_VERSION, BUILD_VERSION, CRM_FEATURES); + mainloop = g_main_new(FALSE); + sysrq_init(); + diff --git a/0029-Fix-crm_resource-Allow-the-resource-configuration-to.patch b/0029-Fix-crm_resource-Allow-the-resource-configuration-to.patch new file mode 100644 index 0000000..942b464 --- /dev/null +++ b/0029-Fix-crm_resource-Allow-the-resource-configuration-to.patch @@ -0,0 +1,127 @@ +From: Andrew Beekhof +Date: Thu, 17 Sep 2015 09:46:38 +1000 +Subject: [PATCH] Fix: crm_resource: Allow the resource configuration to be + modified for --force-{check,start,..} calls + +(cherry picked from commit 1206f735a8ddb33c77152c736828e823e7755c34) +--- + tools/crm_resource.c | 36 +++++++++++++++++++++++++++++++----- + tools/crm_resource.h | 2 +- + tools/crm_resource_runtime.c | 14 +++++++++++++- + 3 files changed, 45 insertions(+), 7 deletions(-) + +diff --git a/tools/crm_resource.c b/tools/crm_resource.c +index 156bbea..2a94362 100644 +--- a/tools/crm_resource.c ++++ b/tools/crm_resource.c +@@ -247,6 +247,7 @@ main(int argc, char **argv) + const char *prop_set = NULL; + const char *rsc_long_cmd = NULL; + const char *longname = NULL; ++ GHashTable *override_params = NULL; + + char *xml_file = NULL; + crm_ipc_t *crmd_channel = NULL; +@@ -503,11 +504,35 @@ main(int argc, char **argv) + } + } + +- if (optind < argc && argv[optind] != NULL) { ++ if (optind < argc ++ && argv[optind] != NULL ++ && rsc_cmd == 0 ++ && rsc_long_cmd) { ++ ++ override_params = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); ++ while (optind < argc && argv[optind] != NULL) { ++ char *name = calloc(1, strlen(argv[optind])); ++ char *value = calloc(1, strlen(argv[optind])); ++ int rc = sscanf(argv[optind], "%[^=]=%s", name, value); ++ ++ if(rc == 2) { ++ g_hash_table_replace(override_params, name, value); ++ ++ } else { ++ CMD_ERR("Error parsing '%s' as a name=value pair for --%s", argv[optind], rsc_long_cmd); ++ free(value); ++ free(name); ++ argerr++; ++ } ++ optind++; ++ } ++ ++ } else if (optind < argc && argv[optind] != NULL && rsc_cmd == 0) { + CMD_ERR("non-option ARGV-elements: "); + while (optind < argc && argv[optind] != NULL) { +- CMD_ERR("%s ", argv[optind++]); +- ++argerr; ++ CMD_ERR("[%d of %d] %s ", optind, argc, argv[optind]); ++ optind++; ++ argerr++; + } + } + +@@ -516,7 +541,8 @@ main(int argc, char **argv) + } + + if (argerr) { +- crm_help('?', EX_USAGE); ++ CMD_ERR("Invalid option(s) supplied, use --help for valid usage"); ++ return crm_exit(EX_USAGE); + } + + our_pid = calloc(1, 11); +@@ -631,7 +657,7 @@ main(int argc, char **argv) + rc = wait_till_stable(timeout_ms, cib_conn); + + } else if (rsc_cmd == 0 && rsc_long_cmd) { /* force-(stop|start|check) */ +- rc = cli_resource_execute(rsc_id, rsc_long_cmd, cib_conn, &data_set); ++ rc = cli_resource_execute(rsc_id, rsc_long_cmd, override_params, cib_conn, &data_set); + + } else if (rsc_cmd == 'A' || rsc_cmd == 'a') { + GListPtr lpc = NULL; +diff --git a/tools/crm_resource.h b/tools/crm_resource.h +index 5a206e0..d4c3b05 100644 +--- a/tools/crm_resource.h ++++ b/tools/crm_resource.h +@@ -74,7 +74,7 @@ int cli_resource_search(const char *rsc, pe_working_set_t * data_set); + int cli_resource_delete(cib_t *cib_conn, crm_ipc_t * crmd_channel, const char *host_uname, resource_t * rsc, pe_working_set_t * data_set); + int cli_resource_restart(resource_t * rsc, const char *host, int timeout_ms, cib_t * cib); + int cli_resource_move(const char *rsc_id, const char *host_name, cib_t * cib, pe_working_set_t *data_set); +-int cli_resource_execute(const char *rsc_id, const char *rsc_action, cib_t * cib, pe_working_set_t *data_set); ++int cli_resource_execute(const char *rsc_id, const char *rsc_action, GHashTable *override_hash, cib_t * cib, pe_working_set_t *data_set); + + int cli_resource_update_attribute(const char *rsc_id, const char *attr_set, const char *attr_id, + const char *attr_name, const char *attr_value, bool recursive, +diff --git a/tools/crm_resource_runtime.c b/tools/crm_resource_runtime.c +index b9427bc..ce9db01 100644 +--- a/tools/crm_resource_runtime.c ++++ b/tools/crm_resource_runtime.c +@@ -1297,7 +1297,7 @@ wait_till_stable(int timeout_ms, cib_t * cib) + } + + int +-cli_resource_execute(const char *rsc_id, const char *rsc_action, cib_t * cib, pe_working_set_t *data_set) ++cli_resource_execute(const char *rsc_id, const char *rsc_action, GHashTable *override_hash, cib_t * cib, pe_working_set_t *data_set) + { + int rc = pcmk_ok; + svc_action_t *op = NULL; +@@ -1360,6 +1360,18 @@ cli_resource_execute(const char *rsc_id, const char *rsc_action, cib_t * cib, pe + setenv("OCF_TRACE_RA", "1", 1); + } + ++ if(op && override_hash) { ++ GHashTableIter iter; ++ char *name = NULL; ++ char *value = NULL; ++ ++ g_hash_table_iter_init(&iter, override_hash); ++ while (g_hash_table_iter_next(&iter, (gpointer *) & name, (gpointer *) & value)) { ++ printf("Overriding the cluser configuration for '%s' with '%s' = '%s'\n", rsc->id, name, value); ++ g_hash_table_replace(op->params, strdup(name), strdup(value)); ++ } ++ } ++ + if(op == NULL) { + /* Re-run but with stderr enabled so we can display a sane error message */ + crm_enable_stderr(TRUE); diff --git a/0030-Log-lrmd-Improved-logging-when-no-pacemaker-remote-a.patch b/0030-Log-lrmd-Improved-logging-when-no-pacemaker-remote-a.patch new file mode 100644 index 0000000..6bff962 --- /dev/null +++ b/0030-Log-lrmd-Improved-logging-when-no-pacemaker-remote-a.patch @@ -0,0 +1,34 @@ +From: Andrew Beekhof +Date: Thu, 17 Sep 2015 14:43:15 +1000 +Subject: [PATCH] Log: lrmd: Improved logging when no pacemaker remote authkey + is available + +(cherry picked from commit 20c2178f076ff32fdf9ba9a467c193b8dac2f9e5) +--- + lib/lrmd/lrmd_client.c | 8 ++++++-- + 1 file changed, 6 insertions(+), 2 deletions(-) + +diff --git a/lib/lrmd/lrmd_client.c b/lib/lrmd/lrmd_client.c +index 42bdf2b..1f1ffde 100644 +--- a/lib/lrmd/lrmd_client.c ++++ b/lib/lrmd/lrmd_client.c +@@ -1061,13 +1061,17 @@ lrmd_tls_set_key(gnutls_datum_t * key) + if (set_key(key, specific_location) == 0) { + crm_debug("Using custom authkey location %s", specific_location); + return 0; ++ ++ } else { ++ crm_err("No lrmd remote key found at %s, trying default locations", specific_location); + } + +- if (set_key(key, DEFAULT_REMOTE_KEY_LOCATION)) { ++ if (set_key(key, DEFAULT_REMOTE_KEY_LOCATION) != 0) { + rc = set_key(key, ALT_REMOTE_KEY_LOCATION); + } ++ + if (rc) { +- crm_err("No lrmd remote key found"); ++ crm_err("No lrmd remote key found at %s", DEFAULT_REMOTE_KEY_LOCATION); + return -1; + } + diff --git a/0031-Fix-liblrmd-don-t-print-error-if-remote-key-environm.patch b/0031-Fix-liblrmd-don-t-print-error-if-remote-key-environm.patch new file mode 100644 index 0000000..0210482 --- /dev/null +++ b/0031-Fix-liblrmd-don-t-print-error-if-remote-key-environm.patch @@ -0,0 +1,38 @@ +From: Ken Gaillot +Date: Wed, 23 Sep 2015 10:45:39 -0500 +Subject: [PATCH] Fix: liblrmd: don't print error if remote key environment + variable unset + +20c2178 added error logging if the remote key was unable to be read, +however it would also log an error in the usual case where the +environment variable was simply unset. + +(cherry picked from commit dec3349f1252e2c2c18ed110b8cc4a2b2212b613) +--- + lib/lrmd/lrmd_client.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/lib/lrmd/lrmd_client.c b/lib/lrmd/lrmd_client.c +index 1f1ffde..f365e59 100644 +--- a/lib/lrmd/lrmd_client.c ++++ b/lib/lrmd/lrmd_client.c +@@ -1062,8 +1062,8 @@ lrmd_tls_set_key(gnutls_datum_t * key) + crm_debug("Using custom authkey location %s", specific_location); + return 0; + +- } else { +- crm_err("No lrmd remote key found at %s, trying default locations", specific_location); ++ } else if (specific_location) { ++ crm_err("No valid lrmd remote key found at %s, trying default location", specific_location); + } + + if (set_key(key, DEFAULT_REMOTE_KEY_LOCATION) != 0) { +@@ -1071,7 +1071,7 @@ lrmd_tls_set_key(gnutls_datum_t * key) + } + + if (rc) { +- crm_err("No lrmd remote key found at %s", DEFAULT_REMOTE_KEY_LOCATION); ++ crm_err("No valid lrmd remote key found at %s", DEFAULT_REMOTE_KEY_LOCATION); + return -1; + } + diff --git a/0032-Fix-Tools-Repair-the-logging-of-interesting-command-.patch b/0032-Fix-Tools-Repair-the-logging-of-interesting-command-.patch new file mode 100644 index 0000000..fda67b2 --- /dev/null +++ b/0032-Fix-Tools-Repair-the-logging-of-interesting-command-.patch @@ -0,0 +1,182 @@ +From: Andrew Beekhof +Date: Mon, 28 Sep 2015 14:54:28 +1000 +Subject: [PATCH] Fix: Tools: Repair the logging of 'interesting' command-lines + +(cherry picked from commit b7d6608d8b33b4e9580e04f25446176bac832fb7) +--- + tools/attrd_updater.c | 1 + + tools/cibadmin.c | 8 ++++++-- + tools/crm_attribute.c | 6 +++++- + tools/crm_resource.c | 30 +++++++++++++++++++++++------- + 4 files changed, 35 insertions(+), 10 deletions(-) + +diff --git a/tools/attrd_updater.c b/tools/attrd_updater.c +index 878dab5..11462ee 100644 +--- a/tools/attrd_updater.c ++++ b/tools/attrd_updater.c +@@ -150,6 +150,7 @@ main(int argc, char **argv) + case 'v': + command = flag; + attr_value = optarg; ++ crm_log_args(argc, argv); /* Too much? */ + break; + default: + ++argerr; +diff --git a/tools/cibadmin.c b/tools/cibadmin.c +index 6b90536..c16d3c7 100644 +--- a/tools/cibadmin.c ++++ b/tools/cibadmin.c +@@ -213,7 +213,7 @@ main(int argc, char **argv) + int option_index = 0; + + crm_xml_init(); /* Sets buffer allocation strategy */ +- crm_log_preinit(NULL, argc, argv); ++ crm_log_cli_init("cibadmin"); + crm_set_options(NULL, "command [options] [data]", long_options, + "Provides direct access to the cluster configuration." + "\n\nAllows the configuration, or sections of it, to be queried, modified, replaced and deleted." +@@ -286,6 +286,7 @@ main(int argc, char **argv) + break; + case 'B': + cib_action = CIB_OP_BUMP; ++ crm_log_args(argc, argv); + break; + case 'V': + command_options = command_options | cib_verbose; +@@ -303,13 +304,16 @@ main(int argc, char **argv) + case 'X': + crm_trace("Option %c => %s", flag, optarg); + admin_input_xml = optarg; ++ crm_log_args(argc, argv); + break; + case 'x': + crm_trace("Option %c => %s", flag, optarg); + admin_input_file = optarg; ++ crm_log_args(argc, argv); + break; + case 'p': + admin_input_stdin = TRUE; ++ crm_log_args(argc, argv); + break; + case 'N': + case 'h': +@@ -334,6 +338,7 @@ main(int argc, char **argv) + case 'f': + force_flag = TRUE; + command_options |= cib_quorum_override; ++ crm_log_args(argc, argv); + break; + case 'a': + output = createEmptyCib(1); +@@ -355,7 +360,6 @@ main(int argc, char **argv) + quiet = FALSE; + } + +- crm_log_init(NULL, LOG_CRIT, FALSE, FALSE, argc, argv, quiet); + while (bump_log_num > 0) { + crm_bump_log_level(argc, argv); + bump_log_num--; +diff --git a/tools/crm_attribute.c b/tools/crm_attribute.c +index c37b096..fc2f7c7 100644 +--- a/tools/crm_attribute.c ++++ b/tools/crm_attribute.c +@@ -146,11 +146,15 @@ main(int argc, char **argv) + case '?': + crm_help(flag, EX_OK); + break; +- case 'D': + case 'G': ++ command = flag; ++ attr_value = optarg; ++ break; ++ case 'D': + case 'v': + command = flag; + attr_value = optarg; ++ crm_log_args(argc, argv); + break; + case 'q': + case 'Q': +diff --git a/tools/crm_resource.c b/tools/crm_resource.c +index 2a94362..1b2976b 100644 +--- a/tools/crm_resource.c ++++ b/tools/crm_resource.c +@@ -304,6 +304,7 @@ main(int argc, char **argv) + || safe_str_eq("force-check", longname)) { + rsc_cmd = flag; + rsc_long_cmd = longname; ++ crm_log_args(argc, argv); + + } else if (safe_str_eq("list-ocf-providers", longname) + || safe_str_eq("list-ocf-alternatives", longname) +@@ -433,6 +434,7 @@ main(int argc, char **argv) + break; + case 'f': + do_force = TRUE; ++ crm_log_args(argc, argv); + break; + case 'i': + prop_id = optarg; +@@ -452,41 +454,55 @@ main(int argc, char **argv) + case 'T': + timeout_ms = crm_get_msec(optarg); + break; ++ + case 'C': + case 'R': + case 'P': +- rsc_cmd = 'C'; ++ crm_log_args(argc, argv); + require_resource = FALSE; + require_crmd = TRUE; ++ rsc_cmd = 'C'; + break; ++ + case 'F': +- rsc_cmd = flag; ++ crm_log_args(argc, argv); + require_crmd = TRUE; ++ rsc_cmd = flag; ++ break; ++ ++ case 'U': ++ case 'B': ++ case 'M': ++ case 'D': ++ crm_log_args(argc, argv); ++ rsc_cmd = flag; + break; ++ + case 'L': + case 'c': + case 'l': + case 'q': + case 'w': +- case 'D': + case 'W': +- case 'M': +- case 'U': +- case 'B': + case 'O': + case 'o': + case 'A': + case 'a': + rsc_cmd = flag; + break; ++ + case 'j': + print_pending = TRUE; + break; + case 'p': +- case 'g': + case 'd': + case 'S': ++ crm_log_args(argc, argv); ++ prop_name = optarg; ++ rsc_cmd = flag; ++ break; + case 'G': ++ case 'g': + prop_name = optarg; + rsc_cmd = flag; + break; diff --git a/0033-Feature-Tools-Do-not-send-command-lines-to-syslog.patch b/0033-Feature-Tools-Do-not-send-command-lines-to-syslog.patch new file mode 100644 index 0000000..c01d782 --- /dev/null +++ b/0033-Feature-Tools-Do-not-send-command-lines-to-syslog.patch @@ -0,0 +1,46 @@ +From: Andrew Beekhof +Date: Mon, 28 Sep 2015 15:02:10 +1000 +Subject: [PATCH] Feature: Tools: Do not send command lines to syslog + +(cherry picked from commit 8dae6838312c6a60c2e4b7ffa73a100fd5d0dce3) +--- + lib/common/logging.c | 8 -------- + 1 file changed, 8 deletions(-) + +diff --git a/lib/common/logging.c b/lib/common/logging.c +index b18b841..6879023 100644 +--- a/lib/common/logging.c ++++ b/lib/common/logging.c +@@ -928,24 +928,17 @@ crm_log_args(int argc, char **argv) + { + int lpc = 0; + int len = 0; +- int restore = FALSE; + int existing_len = 0; + int line = __LINE__; + static int logged = 0; + + char *arg_string = NULL; +- struct qb_log_callsite *args_cs = +- qb_log_callsite_get(__func__, __FILE__, ARGS_FMT, LOG_NOTICE, line, 0); + + if (argc == 0 || argv == NULL || logged) { + return; + } + + logged = 1; +- qb_bit_set(args_cs->targets, QB_LOG_SYSLOG); /* Turn on syslog too */ +- +- restore = qb_log_ctl(QB_LOG_SYSLOG, QB_LOG_CONF_STATE_GET, 0); +- qb_log_ctl(QB_LOG_SYSLOG, QB_LOG_CONF_ENABLED, QB_TRUE); + + for (; lpc < argc; lpc++) { + if (argv[lpc] == NULL) { +@@ -958,7 +951,6 @@ crm_log_args(int argc, char **argv) + } + + qb_log_from_external_source(__func__, __FILE__, ARGS_FMT, LOG_NOTICE, line, 0, arg_string); +- qb_log_ctl(QB_LOG_SYSLOG, QB_LOG_CONF_ENABLED, restore); + + free(arg_string); + } diff --git a/0034-Log-cibadmin-Default-once-again-to-LOG_CRIT.patch b/0034-Log-cibadmin-Default-once-again-to-LOG_CRIT.patch new file mode 100644 index 0000000..ccc3f1e --- /dev/null +++ b/0034-Log-cibadmin-Default-once-again-to-LOG_CRIT.patch @@ -0,0 +1,21 @@ +From: Andrew Beekhof +Date: Mon, 28 Sep 2015 18:45:32 +1000 +Subject: [PATCH] Log: cibadmin: Default once again to LOG_CRIT + +(cherry picked from commit d0d6118cbee3eccb3467058eadd91e08d3f4a42f) +--- + tools/cibadmin.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/tools/cibadmin.c b/tools/cibadmin.c +index c16d3c7..84531f8 100644 +--- a/tools/cibadmin.c ++++ b/tools/cibadmin.c +@@ -214,6 +214,7 @@ main(int argc, char **argv) + + crm_xml_init(); /* Sets buffer allocation strategy */ + crm_log_cli_init("cibadmin"); ++ set_crm_log_level(LOG_CRIT); + crm_set_options(NULL, "command [options] [data]", long_options, + "Provides direct access to the cluster configuration." + "\n\nAllows the configuration, or sections of it, to be queried, modified, replaced and deleted." diff --git a/0035-Fix-crm_resource-Correctly-update-existing-meta-attr.patch b/0035-Fix-crm_resource-Correctly-update-existing-meta-attr.patch new file mode 100644 index 0000000..33670ac --- /dev/null +++ b/0035-Fix-crm_resource-Correctly-update-existing-meta-attr.patch @@ -0,0 +1,87 @@ +From: Andrew Beekhof +Date: Wed, 30 Sep 2015 17:33:00 +1000 +Subject: [PATCH] Fix: crm_resource: Correctly update existing meta attributes + regardless of their position in the heirarchy + +(cherry picked from commit f367348c832c64e2dc480dc96d2e0c2aa88639ba) + +Conflicts: + tools/crm_resource_runtime.c +--- + tools/crm_resource_runtime.c | 44 ++++++++++++++++++++++++++++++++++++-------- + 1 file changed, 36 insertions(+), 8 deletions(-) + +diff --git a/tools/crm_resource_runtime.c b/tools/crm_resource_runtime.c +index ce9db01..a04adb9 100644 +--- a/tools/crm_resource_runtime.c ++++ b/tools/crm_resource_runtime.c +@@ -213,10 +213,11 @@ cli_resource_update_attribute(const char *rsc_id, const char *attr_set, const ch + } + + if (safe_str_eq(attr_set_type, XML_TAG_ATTR_SETS)) { +- rc = find_resource_attr(cib, XML_ATTR_ID, uber_parent(rsc)->id, XML_TAG_META_SETS, attr_set, attr_id, +- attr_name, &local_attr_id); +- if(rc == pcmk_ok && do_force == FALSE) { +- if (BE_QUIET == FALSE) { ++ if (do_force == FALSE) { ++ rc = find_resource_attr(cib, XML_ATTR_ID, uber_parent(rsc)->id, ++ XML_TAG_META_SETS, attr_set, attr_id, ++ attr_name, &local_attr_id); ++ if (rc == pcmk_ok && BE_QUIET == FALSE) { + printf("WARNING: There is already a meta attribute for '%s' called '%s' (id=%s)\n", + uber_parent(rsc)->id, attr_name, local_attr_id); + printf(" Delete '%s' first or use --force to override\n", local_attr_id); +@@ -224,7 +225,7 @@ cli_resource_update_attribute(const char *rsc_id, const char *attr_set, const ch + return -ENOTUNIQ; + } + +- } else if(rsc->parent) { ++ } else if(rsc->parent && do_force == FALSE) { + + switch(rsc->parent->variant) { + case pe_group: +@@ -234,14 +235,41 @@ cli_resource_update_attribute(const char *rsc_id, const char *attr_set, const ch + break; + case pe_master: + case pe_clone: +- rsc = rsc->parent; +- if (BE_QUIET == FALSE) { +- printf("Updating '%s' for '%s'...\n", rsc->id, rsc_id); ++ ++ rc = find_resource_attr(cib, XML_ATTR_ID, rsc_id, attr_set_type, attr_set, attr_id, attr_name, &local_attr_id); ++ free(local_attr_id); ++ ++ if(rc != pcmk_ok) { ++ rsc = rsc->parent; ++ if (BE_QUIET == FALSE) { ++ printf("Updating '%s' on '%s', the parent of '%s'\n", attr_name, rsc->id, rsc_id); ++ } + } + break; + default: + break; + } ++ ++ } else if (rsc->parent && BE_QUIET == FALSE) { ++ printf("Forcing update of '%s' for '%s' instead of '%s'\n", attr_name, rsc_id, rsc->parent->id); ++ ++ } else if(rsc->parent == NULL && rsc->children) { ++ resource_t *child = rsc->children->data; ++ ++ if(child->variant == pe_native) { ++ lookup_id = clone_strip(child->id); /* Could be a cloned group! */ ++ rc = find_resource_attr(cib, XML_ATTR_ID, lookup_id, attr_set_type, attr_set, attr_id, attr_name, &local_attr_id); ++ ++ if(rc == pcmk_ok) { ++ rsc = child; ++ if (BE_QUIET == FALSE) { ++ printf("A value for '%s' already exists in child '%s', updating that instead of '%s'\n", attr_name, lookup_id, rsc_id); ++ } ++ } ++ ++ free(local_attr_id); ++ free(lookup_id); ++ } + } + + lookup_id = clone_strip(rsc->id); /* Could be a cloned group! */ diff --git a/0036-Log-crm_resource-restart-Improved-user-feedback-on-f.patch b/0036-Log-crm_resource-restart-Improved-user-feedback-on-f.patch new file mode 100644 index 0000000..4dded82 --- /dev/null +++ b/0036-Log-crm_resource-restart-Improved-user-feedback-on-f.patch @@ -0,0 +1,27 @@ +From: Andrew Beekhof +Date: Mon, 5 Oct 2015 12:27:59 +1100 +Subject: [PATCH] Log: crm_resource --restart: Improved user feedback on + failure + +(cherry picked from commit b557a39973a1fb85b2791be67dc03cfd32c22d89) +--- + tools/crm_resource_runtime.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/tools/crm_resource_runtime.c b/tools/crm_resource_runtime.c +index a04adb9..878fd0b 100644 +--- a/tools/crm_resource_runtime.c ++++ b/tools/crm_resource_runtime.c +@@ -1040,6 +1040,12 @@ cli_resource_restart(resource_t * rsc, const char *host, int timeout_ms, cib_t * + pe_working_set_t data_set; + + if(resource_is_running_on(rsc, host) == FALSE) { ++ const char *id = rsc->clone_name?rsc->clone_name:rsc->id; ++ if(host) { ++ printf("%s is not running on %s and so cannot be restarted\n", id, host); ++ } else { ++ printf("%s is not running anywhere and so cannot be restarted\n", id); ++ } + return -ENXIO; + } + diff --git a/0037-Fix-crm_resource-Correctly-delete-existing-meta-attr.patch b/0037-Fix-crm_resource-Correctly-delete-existing-meta-attr.patch new file mode 100644 index 0000000..5699706 --- /dev/null +++ b/0037-Fix-crm_resource-Correctly-delete-existing-meta-attr.patch @@ -0,0 +1,179 @@ +From: "Gao,Yan" +Date: Wed, 30 Sep 2015 16:59:43 +0200 +Subject: [PATCH] Fix: crm_resource: Correctly delete existing meta attributes + regardless of their position in the heirarchy + +Use the same logics as "--set-parameter" for "--delete-parameter". + +(cherry picked from commit cdee10c7310ab433b006126bc087f6b8dff3843e) + +Conflicts: + tools/crm_resource_runtime.c +--- + tools/crm_resource_runtime.c | 109 ++++++++++++++++++++++--------------------- + 1 file changed, 55 insertions(+), 54 deletions(-) + +diff --git a/tools/crm_resource_runtime.c b/tools/crm_resource_runtime.c +index 878fd0b..2d51e88 100644 +--- a/tools/crm_resource_runtime.c ++++ b/tools/crm_resource_runtime.c +@@ -190,47 +190,20 @@ find_resource_attr(cib_t * the_cib, const char *attr, const char *rsc, const cha + return rc; + } + +-int +-cli_resource_update_attribute(const char *rsc_id, const char *attr_set, const char *attr_id, +- const char *attr_name, const char *attr_value, bool recursive, +- cib_t * cib, pe_working_set_t * data_set) ++static resource_t * ++find_matching_attr_resource(resource_t * rsc, const char * rsc_id, const char * attr_set, const char * attr_id, ++ const char * attr_name, cib_t * cib, const char * cmd) + { + int rc = pcmk_ok; +- static bool need_init = TRUE; +- + char *lookup_id = NULL; + char *local_attr_id = NULL; +- char *local_attr_set = NULL; +- +- xmlNode *xml_top = NULL; +- xmlNode *xml_obj = NULL; +- +- bool use_attributes_tag = FALSE; +- resource_t *rsc = find_rsc_or_clone(rsc_id, data_set); +- +- if (rsc == NULL) { +- return -ENXIO; +- } +- +- if (safe_str_eq(attr_set_type, XML_TAG_ATTR_SETS)) { +- if (do_force == FALSE) { +- rc = find_resource_attr(cib, XML_ATTR_ID, uber_parent(rsc)->id, +- XML_TAG_META_SETS, attr_set, attr_id, +- attr_name, &local_attr_id); +- if (rc == pcmk_ok && BE_QUIET == FALSE) { +- printf("WARNING: There is already a meta attribute for '%s' called '%s' (id=%s)\n", +- uber_parent(rsc)->id, attr_name, local_attr_id); +- printf(" Delete '%s' first or use --force to override\n", local_attr_id); +- } +- return -ENOTUNIQ; +- } + +- } else if(rsc->parent && do_force == FALSE) { ++ if(rsc->parent && do_force == FALSE) { + + switch(rsc->parent->variant) { + case pe_group: + if (BE_QUIET == FALSE) { +- printf("Updating '%s' for '%s' will not apply to its peers in '%s'\n", attr_name, rsc_id, rsc->parent->id); ++ printf("Performing %s of '%s' for '%s' will not apply to its peers in '%s'\n", cmd, attr_name, rsc_id, rsc->parent->id); + } + break; + case pe_master: +@@ -242,7 +215,7 @@ cli_resource_update_attribute(const char *rsc_id, const char *attr_set, const ch + if(rc != pcmk_ok) { + rsc = rsc->parent; + if (BE_QUIET == FALSE) { +- printf("Updating '%s' on '%s', the parent of '%s'\n", attr_name, rsc->id, rsc_id); ++ printf("Performing %s of '%s' on '%s', the parent of '%s'\n", cmd, attr_name, rsc->id, rsc_id); + } + } + break; +@@ -251,7 +224,7 @@ cli_resource_update_attribute(const char *rsc_id, const char *attr_set, const ch + } + + } else if (rsc->parent && BE_QUIET == FALSE) { +- printf("Forcing update of '%s' for '%s' instead of '%s'\n", attr_name, rsc_id, rsc->parent->id); ++ printf("Forcing %s of '%s' for '%s' instead of '%s'\n", cmd, attr_name, rsc_id, rsc->parent->id); + + } else if(rsc->parent == NULL && rsc->children) { + resource_t *child = rsc->children->data; +@@ -263,7 +236,7 @@ cli_resource_update_attribute(const char *rsc_id, const char *attr_set, const ch + if(rc == pcmk_ok) { + rsc = child; + if (BE_QUIET == FALSE) { +- printf("A value for '%s' already exists in child '%s', updating that instead of '%s'\n", attr_name, lookup_id, rsc_id); ++ printf("A value for '%s' already exists in child '%s', performing %s on that instead of '%s'\n", attr_name, lookup_id, cmd, rsc_id); + } + } + +@@ -272,6 +245,51 @@ cli_resource_update_attribute(const char *rsc_id, const char *attr_set, const ch + } + } + ++ return rsc; ++} ++ ++int ++cli_resource_update_attribute(const char *rsc_id, const char *attr_set, const char *attr_id, ++ const char *attr_name, const char *attr_value, bool recursive, ++ cib_t * cib, pe_working_set_t * data_set) ++{ ++ int rc = pcmk_ok; ++ static bool need_init = TRUE; ++ ++ char *lookup_id = NULL; ++ char *local_attr_id = NULL; ++ char *local_attr_set = NULL; ++ ++ xmlNode *xml_top = NULL; ++ xmlNode *xml_obj = NULL; ++ ++ bool use_attributes_tag = FALSE; ++ resource_t *rsc = find_rsc_or_clone(rsc_id, data_set); ++ ++ if (rsc == NULL) { ++ return -ENXIO; ++ } ++ ++ if (safe_str_eq(attr_set_type, XML_TAG_ATTR_SETS)) { ++ if (do_force == FALSE) { ++ rc = find_resource_attr(cib, XML_ATTR_ID, uber_parent(rsc)->id, ++ XML_TAG_META_SETS, attr_set, attr_id, ++ attr_name, &local_attr_id); ++ if (rc == pcmk_ok && BE_QUIET == FALSE) { ++ printf("WARNING: There is already a meta attribute for '%s' called '%s' (id=%s)\n", ++ uber_parent(rsc)->id, attr_name, local_attr_id); ++ printf(" Delete '%s' first or use --force to override\n", local_attr_id); ++ } ++ free(local_attr_id); ++ if (rc == pcmk_ok) { ++ return -ENOTUNIQ; ++ } ++ } ++ ++ } else { ++ rsc = find_matching_attr_resource(rsc, rsc_id, attr_set, attr_id, attr_name, cib, "update"); ++ } ++ + lookup_id = clone_strip(rsc->id); /* Could be a cloned group! */ + rc = find_resource_attr(cib, XML_ATTR_ID, lookup_id, attr_set_type, attr_set, attr_id, attr_name, + &local_attr_id); +@@ -401,25 +419,8 @@ cli_resource_delete_attribute(const char *rsc_id, const char *attr_set, const ch + return -ENXIO; + } + +- if(rsc->parent && safe_str_eq(attr_set_type, XML_TAG_META_SETS)) { +- +- switch(rsc->parent->variant) { +- case pe_group: +- if (BE_QUIET == FALSE) { +- printf("Removing '%s' for '%s' will not apply to its peers in '%s'\n", attr_name, rsc_id, rsc->parent->id); +- } +- break; +- case pe_master: +- case pe_clone: +- rsc = rsc->parent; +- if (BE_QUIET == FALSE) { +- printf("Removing '%s' from '%s' for '%s'...\n", attr_name, rsc->id, rsc_id); +- } +- break; +- default: +- break; +- } +- ++ if(safe_str_eq(attr_set_type, XML_TAG_META_SETS)) { ++ rsc = find_matching_attr_resource(rsc, rsc_id, attr_set, attr_id, attr_name, cib, "delete"); + } + + lookup_id = clone_strip(rsc->id); diff --git a/0038-Fix-crm_resource-Correctly-observe-force-when-deleti.patch b/0038-Fix-crm_resource-Correctly-observe-force-when-deleti.patch new file mode 100644 index 0000000..f5aaaea --- /dev/null +++ b/0038-Fix-crm_resource-Correctly-observe-force-when-deleti.patch @@ -0,0 +1,75 @@ +From: Andrew Beekhof +Date: Thu, 8 Oct 2015 13:38:07 +1100 +Subject: [PATCH] Fix: crm_resource: Correctly observe --force when deleting + and updating attributes + +(cherry picked from commit bd232e36403ea807635cabd336d8bb3101710891) +--- + tools/crm_resource_runtime.c | 25 +++++++++++++++++++++---- + 1 file changed, 21 insertions(+), 4 deletions(-) + +diff --git a/tools/crm_resource_runtime.c b/tools/crm_resource_runtime.c +index 2d51e88..c3f5275 100644 +--- a/tools/crm_resource_runtime.c ++++ b/tools/crm_resource_runtime.c +@@ -123,8 +123,9 @@ find_resource_attr(cib_t * the_cib, const char *attr, const char *rsc, const cha + xmlNode *xml_search = NULL; + char *xpath_string = NULL; + +- CRM_ASSERT(value != NULL); +- *value = NULL; ++ if(value) { ++ *value = NULL; ++ } + + if(the_cib == NULL) { + return -ENOTCONN; +@@ -176,7 +177,7 @@ find_resource_attr(cib_t * the_cib, const char *attr, const char *rsc, const cha + crm_element_value(child, XML_NVPAIR_ATTR_VALUE), ID(child)); + } + +- } else { ++ } else if(value) { + const char *tmp = crm_element_value(xml_search, attr); + + if (tmp) { +@@ -198,8 +199,10 @@ find_matching_attr_resource(resource_t * rsc, const char * rsc_id, const char * + char *lookup_id = NULL; + char *local_attr_id = NULL; + +- if(rsc->parent && do_force == FALSE) { ++ if(do_force == TRUE) { ++ return rsc; + ++ } else if(rsc->parent) { + switch(rsc->parent->variant) { + case pe_group: + if (BE_QUIET == FALSE) { +@@ -270,6 +273,13 @@ cli_resource_update_attribute(const char *rsc_id, const char *attr_set, const ch + return -ENXIO; + } + ++ if(attr_id == NULL ++ && do_force == FALSE ++ && pcmk_ok != find_resource_attr( ++ cib, XML_ATTR_ID, uber_parent(rsc)->id, NULL, NULL, NULL, attr_name, NULL)) { ++ printf("\n"); ++ } ++ + if (safe_str_eq(attr_set_type, XML_TAG_ATTR_SETS)) { + if (do_force == FALSE) { + rc = find_resource_attr(cib, XML_ATTR_ID, uber_parent(rsc)->id, +@@ -419,6 +429,13 @@ cli_resource_delete_attribute(const char *rsc_id, const char *attr_set, const ch + return -ENXIO; + } + ++ if(attr_id == NULL ++ && do_force == FALSE ++ && find_resource_attr( ++ cib, XML_ATTR_ID, uber_parent(rsc)->id, NULL, NULL, NULL, attr_name, NULL) != pcmk_ok) { ++ printf("\n"); ++ } ++ + if(safe_str_eq(attr_set_type, XML_TAG_META_SETS)) { + rsc = find_matching_attr_resource(rsc, rsc_id, attr_set, attr_id, attr_name, cib, "delete"); + } diff --git a/pacemaker-1.1.8-cast-align.patch b/pacemaker-1.1.8-cast-align.patch deleted file mode 100644 index 634e576..0000000 --- a/pacemaker-1.1.8-cast-align.patch +++ /dev/null @@ -1,12 +0,0 @@ -diff --git a/configure.ac b/configure.ac -index be8261a..60fe8ed 100644 ---- a/configure.ac -+++ b/configure.ac -@@ -1681,6 +1681,7 @@ else - -Wall - -Waggregate-return - -Wbad-function-cast -+ -Wcast-align - -Wdeclaration-after-statement - -Wendif-labels - -Wfloat-equal diff --git a/pacemaker-63f8e9a-rollup.patch b/pacemaker-63f8e9a-rollup.patch new file mode 100644 index 0000000..ef14d87 --- /dev/null +++ b/pacemaker-63f8e9a-rollup.patch @@ -0,0 +1,5904 @@ +diff --git a/ChangeLog b/ChangeLog +index d70edbd..e445890 100644 +--- a/ChangeLog ++++ b/ChangeLog +@@ -1,4 +1,218 @@ + ++* Wed Jun 24 2015 Andrew Beekhof Pacemaker-1.1.13-1 ++- Update source tarball to revision: 2a1847e ++- Changesets: 750 ++- Diff: 156 files changed, 11323 insertions(+), 3725 deletions(-) ++ ++- Features added since Pacemaker-1.1.12 ++ + Allow fail-counts to be removed en-mass when the new attrd is in operation ++ + attrd supports private attributes (not written to CIB) ++ + crmd: Ensure a watchdog device is in use if stonith-watchdog-timeout is configured ++ + crmd: If configured, trigger the watchdog immediately if we loose quorum and no-quorum-policy=suicide ++ + crm_diff: Support generating a difference without versions details if --no-version/-u is supplied ++ + crm_resource: Implement an intelligent restart capability ++ + Fencing: Advertise the watchdog device for fencing operations ++ + Fencing: Allow the cluster to recover resources if the watchdog is in use ++ + fencing: cl#5134 - Support random fencing delay to avoid double fencing ++ + mcp: Allow orphan children to initiate node panic via SIGQUIT ++ + mcp: Turn on sbd integration if pacemakerd finds it running ++ + mcp: Two new error codes that result in machine reset or power off ++ + Officially support the resource-discovery attribute for location constraints ++ + PE: Allow natural ordering of colocation sets ++ + PE: Support non-actionable degraded mode for OCF ++ + pengine: cl#5207 - Display "UNCLEAN" for resources running on unclean offline nodes ++ + remote: pcmk remote client tool for use with container wrapper script ++ + Support machine panics for some kinds of errors (via sbd if available) ++ + tools: add crm_resource --wait option ++ + tools: attrd_updater supports --query and --all options ++ + tools: attrd_updater: Allow attributes to be set for other nodes ++ ++- Changes since Pacemaker-1.1.12 ++ + pengine: exclusive discovery implies rsc is only allowed on exclusive subset of nodes ++ + acl: Correctly implement the 'reference' acl directive ++ + acl: Do not delay evaluation of added nodes in some situations ++ + attrd: b22b1fe did uuid test too early ++ + attrd: Clean out the node cache when requested by the admin ++ + attrd: fixes double free in attrd legacy ++ + attrd: properly write attributes for peers once uuid is discovered ++ + attrd: refresh should force an immediate write-out of all attributes ++ + attrd: Simplify how node deletions happen ++ + Bug rhbz#1067544 - Tools: Correctly handle --ban, --move and --locate for master/slave groups ++ + Bug rhbz#1181824 - Ensure the DC can be reliably fenced ++ + cib: Ability to upgrade cib validation schema in legacy mode ++ + cib: Always generate digests for cib diffs in legacy mode ++ + cib: assignment where comparison intended ++ + cib: Avoid nodeid conflicts we don't care about ++ + cib: Correctly add "update-origin", "update-client" and "update-user" attributes for cib ++ + cib: Correctly set up signal handlers ++ + cib: Correctly track node state ++ + cib: Do not update on disk backups if we're just querying them ++ + cib: Enable cib legacy mode for plugin-based clusters ++ + cib: Ensure file-based backends treat '-o section' consistently with the native backend ++ + cib: Ensure upgrade operations from a non-DC get an acknowledgement ++ + cib: No need to enforce cib digests for v2 diffs in legacy mode ++ + cib: Revert d153b86 to instantly get cib synchronized in legacy mode ++ + cib: tls sock cleanup for remote cib connections ++ + cli: Ensure subsequent unknown long options are correctly detected ++ + cluster: Invoke crm_remove_conflicting_peer() only when the new node's uname is being assigned in the node cache ++ + common: Increment current and age for lib common as a result of APIs being added ++ + corosync: Bug cl#5232 - Somewhat gracefully handle nodes with invalid UUIDs ++ + corosync: Avoid unnecessary repeated CMAP API calls ++ + crmd/pengine: handle on-fail=ignore properly ++ + crmd: Add "on_node" attribute for *_last_failure_0 lrm resource operations ++ + crmd: All peers need to track node shutdown requests ++ + crmd: Cached copies of transient attributes cease to be valid once a node leaves the membership ++ + crmd: Correctly add the local option that validates against schema for pengine to calculate ++ + crmd: Disable debug logging that results in significant overhead ++ + crmd: do not remove connection resources during re-probe ++ + crmd: don't update fail count twice for same failure ++ + crmd: Ensure remote connection resources timeout properly during 'migrate_from' action ++ + crmd: Ensure throttle_mode() does something on Linux ++ + crmd: Fixes crash when remote connection migration fails ++ + crmd: gracefully handle remote node disconnects during op execution ++ + crmd: Handle remote connection failures while executing ops on remote connection ++ + crmd: include remote nodes when forcing cluster wide resource reprobe ++ + crmd: never stop recurring monitor ops for pcmk remote during incomplete migration ++ + crmd: Prevent the old version of DC from being fenced when it shuts down for rolling-upgrade ++ + crmd: Prevent use-of-NULL during reprobe ++ + crmd: properly update job limit for baremetal remote-nodes ++ + crmd: Remote-node throttle jobs count towards cluster-node hosting conneciton rsc ++ + crmd: Reset stonith failcount to recover transitioner when the node rejoins ++ + crmd: resolves memory leak in crmd. ++ + crmd: respect start-failure-is-fatal even for artifically injected events ++ + crmd: Wait for all pending operations to complete before poking the policy engine ++ + crmd: When container's host is fenced, cancel in-flight operations ++ + crm_attribute: Correctly update config options when -o crm_config is specified ++ + crm_failcount: Better error reporting when no resource is specified ++ + crm_mon: add exit reason to resource failure output ++ + crm_mon: Fill CRM_notify_node in traps with node's uname rather than node's id if possible ++ + crm_mon: Repair notification delivery when the v2 patch format is in use ++ + crm_node: Correctly remove nodes from the CIB by nodeid ++ + crm_report: More patterns for finding logs on non-DC nodes ++ + crm_resource: Allow resource restart operations to be node specific ++ + crm_resource: avoid deletion of lrm cache on node with resource discovery disabled. ++ + crm_resource: Calculate how long to wait for a restart based on the resource timeouts ++ + crm_resource: Clean up memory in --restart error paths ++ + crm_resource: Display the locations of all anonymous clone children when supplying the children's common ID ++ + crm_resource: Ensure --restart sets/clears meta attributes ++ + crm_resource: Ensure fail-counts are purged when we redetect the state of all resources ++ + crm_resource: Implement --timeout for resource restart operations ++ + crm_resource: Include group members when calculating the next timeout ++ + crm_resource: Memory leak in error paths ++ + crm_resource: Prevent use-after-free ++ + crm_resource: Repair regression test outputs ++ + crm_resource: Use-after-free when restarting a resource ++ + dbus: ref count leaks ++ + dbus: Ensure both the read and write queues get dispatched ++ + dbus: Fail gracefully if malloc fails ++ + dbus: handle dispatch queue when multiple replies need to be processed ++ + dbus: Notice when dbus connections get disabled ++ + dbus: Remove double-free introduced while trying to make coverity shut up ++ + ensure if B is colocated with A, B can never run without A ++ + fence_legacy: Avoid passing 'port' to cluster-glue agents ++ + fencing: Allow nodes to be purged from the member cache ++ + fencing: Correctly make args for fencing agents ++ + fencing: Correctly wait for self-fencing to occur when the watchdog is in use ++ + fencing: Ensure the hostlist parameter is set for watchdog agents ++ + fencing: Force 'stonith-ng' as the system name ++ + fencing: Gracefully handle invalid metadata from agents ++ + fencing: If configured, wait stonith-watchdog-timer seconds for self-fencing to complete ++ + fencing: Reject actions for devices that haven't been explicitly registered yet ++ + ipc: properly allocate server enforced buffer size on client ++ + ipc: use server enforced buffer during ipc client send ++ + lrmd, services: interpret LSB status codes properly ++ + lrmd: add back support for class heartbeat agents ++ + lrmd: cancel pending async connection during disconnect ++ + lrmd: enable ipc proxy for docker-wrapper privileged mode ++ + lrmd: fix rescheduling of systemd monitor op during start ++ + lrmd: Handle systemd reporting 'done' before a resource is actually stopped ++ + lrmd: Hint to child processes that using sd_notify is not required ++ + lrmd: Log with the correct personality ++ + lrmd: Prevent glib assert triggered by timers being removed from mainloop more than once ++ + lrmd: report original timeout when systemd operation completes ++ + lrmd: store failed operation exit reason in cib ++ + mainloop: resolves race condition mainloop poll involving modification of ipc connections ++ + make targetted reprobe for remote node work, crm_resource -C -N ++ + mcp: Allow a configurable delay when debugging shutdown issues ++ + mcp: Avoid requiring 'export' for SYS-V sysconfig options ++ + Membership: Detect and resolve nodes that change their ID ++ + pacemakerd: resolves memory leak of xml structure in pacemakerd ++ + pengine: ability to launch resources in isolated containers ++ + pengine: add #kind=remote for baremetal remote-nodes ++ + pengine: allow baremetal remote-nodes to recover without requiring fencing when cluster-node fails ++ + pengine: allow remote-nodes to be placed in maintenance mode ++ + pengine: Avoid trailing whitespaces when printing resource state ++ + pengine: cl#5130 - Choose nodes capable of running all the colocated utilization resources ++ + pengine: cl#5130 - Only check the capacities of the nodes that are allowed to run the resource ++ + pengine: Correctly compare feature set to determine how to unpack meta attributes ++ + pengine: disable migrations for resources with isolation containers ++ + pengine: disable reloading of resources within isolated container wrappers ++ + pengine: Do not aggregate children in a pending state into the started/stopped/etc lists ++ + pengine: Do not record duplicate copies of the failed actions ++ + pengine: Do not reschedule monitors that are no longer needed while resource definitions have changed ++ + pengine: Fence baremetal remote when recurring monitor op fails ++ + pengine: Fix colocation with unmanaged resources ++ + pengine: Fix the behaviors of multi-state resources with asymmetrical ordering ++ + pengine: fixes pengine crash with orphaned remote node connection resource ++ + pengine: fixes segfault caused by malformed log warning ++ + pengine: handle cloned isolated resources in a sane way ++ + pengine: handle isolated resource scenario, cloned group of isolated resources ++ + pengine: Handle ordering between stateful and migratable resources ++ + pengine: imply stop in container node resources when host node is fenced ++ + pengine: only fence baremetal remote when connection can fails or can not be recovered ++ + pengine: only kill process group on timeout when on-fail does not equal block. ++ + pengine: per-node control over resource discovery ++ + pengine: prefer migration target for remote node connections ++ + pengine: prevent disabling rsc discovery per node in certain situations ++ + pengine: Prevent use-after-free in sort_rsc_process_order() ++ + pengine: properly handle ordering during remote connection partial migration ++ + pengine: properly recover remote-nodes when cluster-node proxy goes offline ++ + pengine: remove unnecessary whitespace from notify environment variables ++ + pengine: require-all feature for ordered clones ++ + pengine: Resolve memory leaks ++ + pengine: resource discovery mode for location constraints ++ + pengine: restart master instances on instance attribute changes ++ + pengine: Turn off legacy unpacking of resource options into the meta hashtable ++ + pengine: Watchdog integration is sufficient for fencing ++ + Perform systemd reloads asynchronously ++ + ping: Correctly advertise multiplier default ++ + Prefer to inherit the watchdog timeout from SBD ++ + properly record stop args after reload ++ + provide fake meta data for ra class heartbeat ++ + remote: report timestamps for remote connection resource operations ++ + remote: Treat recv msg timeout as a disconnect ++ + service: Prevent potential use-of-NULL in metadata lookups ++ + solaris: Allow compilation when dirent.d_type is not available ++ + solaris: Correctly replace the linux swab functions ++ + solaris: Disable throttling since /proc doesn't exist ++ + stonith-ng: Correctly observe the watchdog completion timeout ++ + stonith-ng: Correctly track node state ++ + stonith-ng: Reset mainloop source IDs after removing them ++ + systemd: Correctly handle long running stop actions ++ + systemd: Ensure failed monitor operations always return ++ + systemd: Ensure we don't call dbus_message_unref() with NULL ++ + systemd: fix crash caused when canceling in-flight operation ++ + systemd: Kindly ask dbus NOT to kill the process if the dbus connection fails ++ + systemd: Perform actions asynchronously ++ + systemd: Perform monitor operations without blocking ++ + systemd: Tell systemd not to take DBus down from underneath us ++ + systemd: Trick systemd into not stopping our services before us during shutdown ++ + tools: Improve crm_mon output with certain option combinations ++ + upstart: Monitor actions always return 'ok' or 'not running' ++ + upstart: Perform more parts of monitor operations without blocking ++ + xml: add 'require-all' to xml schema for constraints ++ + xml: cl#5231 - Unset the deleted attributes in the resulting diffs ++ + xml: Clone the latest constraint schema in preparation for changes" ++ + xml: Correctly create v1 patchsets when deleting attributes ++ + xml: Do not change the ordering of properties when applying v1 cib diffs ++ + xml: Do not dump deleted attributes ++ + xml: Do not prune leaves from v1 cib diffs that are being created with digests ++ + xml: Ensure ACLs are reapplied before calculating what a replace operation changed ++ + xml: Fix upgrade-1.3.xsl to correctly transform ACL rules with "attribute" ++ + xml: Prevent assert errors in crm_element_value() on applying a patch without version information ++ + xml: Prevent potential use-of-NULL ++ ++ + * Tue Jul 22 2014 Andrew Beekhof Pacemaker-1.1.12-1 + - Update source tarball to revision: 93a037d + - Changesets: 795 +diff --git a/attrd/commands.c b/attrd/commands.c +index 442c5f8..18c0523 100644 +--- a/attrd/commands.c ++++ b/attrd/commands.c +@@ -289,6 +289,9 @@ attrd_client_update(xmlNode *xml) + + crm_info("Expanded %s=%s to %d", attr, value, int_value); + crm_xml_add_int(xml, F_ATTRD_VALUE, int_value); ++ ++ /* Replacing the value frees the previous memory, so re-query it */ ++ value = crm_element_value(xml, F_ATTRD_VALUE); + } + } + +diff --git a/cib/callbacks.c b/cib/callbacks.c +index 71c487e..1452ded 100644 +--- a/cib/callbacks.c ++++ b/cib/callbacks.c +@@ -40,6 +40,8 @@ + #include + #include "common.h" + ++static unsigned long cib_local_bcast_num = 0; ++ + typedef struct cib_local_notify_s { + xmlNode *notify_src; + char *client_id; +@@ -48,7 +50,13 @@ typedef struct cib_local_notify_s { + } cib_local_notify_t; + + int next_client_id = 0; ++ ++#if SUPPORT_PLUGIN ++gboolean legacy_mode = TRUE; ++#else + gboolean legacy_mode = FALSE; ++#endif ++ + qb_ipcs_service_t *ipcs_ro = NULL; + qb_ipcs_service_t *ipcs_rw = NULL; + qb_ipcs_service_t *ipcs_shm = NULL; +@@ -82,8 +90,12 @@ static gboolean cib_read_legacy_mode(void) + return legacy; + } + +-static gboolean cib_legacy_mode(void) ++gboolean cib_legacy_mode(void) + { ++#if SUPPORT_PLUGIN ++ return TRUE; ++#endif ++ + if(cib_read_legacy_mode()) { + return TRUE; + } +@@ -442,6 +454,54 @@ do_local_notify(xmlNode * notify_src, const char *client_id, + } + + static void ++local_notify_destroy_callback(gpointer data) ++{ ++ cib_local_notify_t *notify = data; ++ ++ free_xml(notify->notify_src); ++ free(notify->client_id); ++ free(notify); ++} ++ ++static void ++check_local_notify(int bcast_id) ++{ ++ cib_local_notify_t *notify = NULL; ++ ++ if (!local_notify_queue) { ++ return; ++ } ++ ++ notify = g_hash_table_lookup(local_notify_queue, GINT_TO_POINTER(bcast_id)); ++ ++ if (notify) { ++ do_local_notify(notify->notify_src, notify->client_id, notify->sync_reply, ++ notify->from_peer); ++ g_hash_table_remove(local_notify_queue, GINT_TO_POINTER(bcast_id)); ++ } ++} ++ ++static void ++queue_local_notify(xmlNode * notify_src, const char *client_id, gboolean sync_reply, ++ gboolean from_peer) ++{ ++ cib_local_notify_t *notify = calloc(1, sizeof(cib_local_notify_t)); ++ ++ notify->notify_src = notify_src; ++ notify->client_id = strdup(client_id); ++ notify->sync_reply = sync_reply; ++ notify->from_peer = from_peer; ++ ++ if (!local_notify_queue) { ++ local_notify_queue = g_hash_table_new_full(g_direct_hash, ++ g_direct_equal, NULL, ++ local_notify_destroy_callback); ++ } ++ ++ g_hash_table_insert(local_notify_queue, GINT_TO_POINTER(cib_local_bcast_num), notify); ++} ++ ++static void + parse_local_options_v1(crm_client_t * cib_client, int call_type, int call_options, const char *host, + const char *op, gboolean * local_notify, gboolean * needs_reply, + gboolean * process, gboolean * needs_forward) +@@ -814,9 +874,12 @@ send_peer_reply(xmlNode * msg, xmlNode * result_diff, const char *originator, gb + int diff_del_admin_epoch = 0; + + const char *digest = NULL; ++ int format = 1; + + CRM_LOG_ASSERT(result_diff != NULL); + digest = crm_element_value(result_diff, XML_ATTR_DIGEST); ++ crm_element_value_int(result_diff, "format", &format); ++ + cib_diff_version_details(result_diff, + &diff_add_admin_epoch, &diff_add_epoch, &diff_add_updates, + &diff_del_admin_epoch, &diff_del_epoch, &diff_del_updates); +@@ -829,7 +892,9 @@ send_peer_reply(xmlNode * msg, xmlNode * result_diff, const char *originator, gb + crm_xml_add(msg, F_CIB_GLOBAL_UPDATE, XML_BOOLEAN_TRUE); + crm_xml_add(msg, F_CIB_OPERATION, CIB_OP_APPLY_DIFF); + +- CRM_ASSERT(digest != NULL); ++ if (format == 1) { ++ CRM_ASSERT(digest != NULL); ++ } + + add_message_xml(msg, F_CIB_UPDATE_DIFF, result_diff); + crm_log_xml_explicit(msg, "copy"); +@@ -1039,6 +1104,27 @@ cib_process_request(xmlNode * request, gboolean force_synchronous, gboolean priv + */ + crm_trace("Completed slave update"); + ++ } else if (cib_legacy_mode() && ++ rc == pcmk_ok && result_diff != NULL && !(call_options & cib_inhibit_bcast)) { ++ gboolean broadcast = FALSE; ++ ++ cib_local_bcast_num++; ++ crm_xml_add_int(request, F_CIB_LOCAL_NOTIFY_ID, cib_local_bcast_num); ++ broadcast = send_peer_reply(request, result_diff, originator, TRUE); ++ ++ if (broadcast && client_id && local_notify && op_reply) { ++ ++ /* If we have been asked to sync the reply, ++ * and a bcast msg has gone out, we queue the local notify ++ * until we know the bcast message has been received */ ++ local_notify = FALSE; ++ crm_trace("Queuing local %ssync notification for %s", ++ (call_options & cib_sync_call) ? "" : "a-", client_id); ++ ++ queue_local_notify(op_reply, client_id, (call_options & cib_sync_call), from_peer); ++ op_reply = NULL; /* the reply is queued, so don't free here */ ++ } ++ + } else if (call_options & cib_discard_reply) { + crm_trace("Caller isn't interested in reply"); + +@@ -1322,6 +1408,11 @@ cib_peer_callback(xmlNode * msg, void *private_data) + + if (cib_legacy_mode() && (originator == NULL || crm_str_eq(originator, cib_our_uname, TRUE))) { + /* message is from ourselves */ ++ int bcast_id = 0; ++ ++ if (!(crm_element_value_int(msg, F_CIB_LOCAL_NOTIFY_ID, &bcast_id))) { ++ check_local_notify(bcast_id); ++ } + return; + + } else if (crm_peer_cache == NULL) { +diff --git a/cib/callbacks.h b/cib/callbacks.h +index 7549a6c..bca9992 100644 +--- a/cib/callbacks.h ++++ b/cib/callbacks.h +@@ -73,6 +73,8 @@ void cib_shutdown(int nsig); + void initiate_exit(void); + void terminate_cib(const char *caller, gboolean fast); + ++extern gboolean cib_legacy_mode(void); ++ + #if SUPPORT_HEARTBEAT + extern void cib_ha_peer_callback(HA_Message * msg, void *private_data); + extern int cib_ccm_dispatch(gpointer user_data); +diff --git a/cib/main.c b/cib/main.c +index 2a48054..e20a2b6 100644 +--- a/cib/main.c ++++ b/cib/main.c +@@ -438,6 +438,13 @@ cib_peer_update_callback(enum crm_status_type type, crm_node_t * node, const voi + + if (cib_shutdown_flag && crm_active_peers() < 2 && crm_hash_table_size(client_connections) == 0) { + crm_info("No more peers"); ++ /* @TODO ++ * terminate_cib() calls crm_cluster_disconnect() which calls ++ * crm_peer_destroy() which destroys the peer caches, which a peer ++ * status callback shouldn't do. For now, there is a workaround in ++ * crm_update_peer_proc(), but CIB should be refactored to avoid ++ * destroying the peer caches here. ++ */ + terminate_cib(__FUNCTION__, FALSE); + } + } +diff --git a/cib/messages.c b/cib/messages.c +index 9c66349..363562c 100644 +--- a/cib/messages.c ++++ b/cib/messages.c +@@ -297,7 +297,14 @@ cib_process_upgrade_server(const char *op, int options, const char *section, xml + crm_xml_add(up, F_CIB_CALLOPTS, crm_element_value(req, F_CIB_CALLOPTS)); + crm_xml_add(up, F_CIB_CALLID, crm_element_value(req, F_CIB_CALLID)); + +- send_cluster_message(NULL, crm_msg_cib, up, FALSE); ++ if (cib_legacy_mode() && cib_is_master) { ++ rc = cib_process_upgrade( ++ op, options, section, up, input, existing_cib, result_cib, answer); ++ ++ } else { ++ send_cluster_message(NULL, crm_msg_cib, up, FALSE); ++ } ++ + free_xml(up); + + } else if(rc == pcmk_ok) { +diff --git a/crmd/lrm.c b/crmd/lrm.c +index 74fede4..062f769 100644 +--- a/crmd/lrm.c ++++ b/crmd/lrm.c +@@ -454,8 +454,6 @@ get_rsc_metadata(const char *type, const char *rclass, const char *provider, boo + + snprintf(key, len, "%s::%s:%s", type, rclass, provider); + if(force == FALSE) { +- snprintf(key, len, "%s::%s:%s", type, rclass, provider); +- + crm_trace("Retreiving cached metadata for %s", key); + metadata = g_hash_table_lookup(metadata_hash, key); + } +@@ -581,7 +579,7 @@ resource_supports_action(xmlNode *metadata, const char *name) + for (action = __xml_first_child(actions); action != NULL; action = __xml_next(action)) { + if (crm_str_eq((const char *)action->name, "action", TRUE)) { + value = crm_element_value(action, "name"); +- if (safe_str_eq("reload", value)) { ++ if (safe_str_eq(name, value)) { + return TRUE; + } + } +@@ -606,16 +604,18 @@ append_restart_list(lrmd_event_data_t *op, xmlNode *metadata, xmlNode * update, + + if(resource_supports_action(metadata, "reload")) { + restart = create_xml_node(NULL, XML_TAG_PARAMS); +- list = build_parameter_list(op, metadata, restart, "unique", FALSE, FALSE); +- } ++ /* Any parameters with unique="1" should be added into the "op-force-restart" list. */ ++ list = build_parameter_list(op, metadata, restart, "unique", TRUE, FALSE); + +- if (list == NULL) { ++ } else { + /* Resource does not support reloads */ + return; + } + + digest = calculate_operation_digest(restart, version); +- crm_xml_add(update, XML_LRM_ATTR_OP_RESTART, list); ++ /* Add "op-force-restart" and "op-restart-digest" to indicate the resource supports reload, ++ * no matter if it actually supports any parameters with unique="1"). */ ++ crm_xml_add(update, XML_LRM_ATTR_OP_RESTART, list? list: ""); + crm_xml_add(update, XML_LRM_ATTR_RESTART_DIGEST, digest); + + crm_trace("%s: %s, %s", op->rsc_id, digest, list); +diff --git a/crmd/throttle.c b/crmd/throttle.c +index 165050c..169594b 100644 +--- a/crmd/throttle.c ++++ b/crmd/throttle.c +@@ -92,41 +92,60 @@ int throttle_num_cores(void) + return cores; + } + ++/* ++ * \internal ++ * \brief Return name of /proc file containing the CIB deamon's load statistics ++ * ++ * \return Newly allocated memory with file name on success, NULL otherwise ++ * ++ * \note It is the caller's responsibility to free the return value. ++ * This will return NULL if the daemon is being run via valgrind. ++ * This should be called only on Linux systems. ++ */ + static char *find_cib_loadfile(void) + { + DIR *dp; + struct dirent *entry; + struct stat statbuf; + char *match = NULL; ++ char procpath[128]; ++ char value[64]; ++ char key[16]; + + dp = opendir("/proc"); + if (!dp) { + /* no proc directory to search through */ + crm_notice("Can not read /proc directory to track existing components"); +- return FALSE; ++ return NULL; + } + ++ /* Iterate through contents of /proc */ + while ((entry = readdir(dp)) != NULL) { +- char procpath[128]; +- char value[64]; +- char key[16]; + FILE *file; + int pid; + +- strcpy(procpath, "/proc/"); +- /* strlen("/proc/") + strlen("/status") + 1 = 14 +- * 128 - 14 = 114 */ +- strncat(procpath, entry->d_name, 114); +- +- if (lstat(procpath, &statbuf)) { ++ /* We're only interested in entries whose name is a PID, ++ * so skip anything non-numeric or that is too long. ++ * ++ * 114 = 128 - strlen("/proc/") - strlen("/status") - 1 ++ */ ++ pid = atoi(entry->d_name); ++ if ((pid <= 0) || (strlen(entry->d_name) > 114)) { + continue; + } +- if (!S_ISDIR(statbuf.st_mode) || !isdigit(entry->d_name[0])) { ++ ++ /* We're only interested in subdirectories */ ++ strcpy(procpath, "/proc/"); ++ strcat(procpath, entry->d_name); ++ if (lstat(procpath, &statbuf) || !S_ISDIR(statbuf.st_mode)) { + continue; + } + ++ /* Read the first entry ("Name:") from the process's status file. ++ * We could handle the valgrind case if we parsed the cmdline file ++ * instead, but that's more of a pain than it's worth. ++ */ + strcat(procpath, "/status"); +- + file = fopen(procpath, "r"); + if (!file) { + continue; +@@ -137,17 +156,11 @@ static char *find_cib_loadfile(void) + } + fclose(file); + +- if (safe_str_neq("cib", value)) { +- continue; +- } +- +- pid = atoi(entry->d_name); +- if (pid <= 0) { +- continue; ++ if (safe_str_eq("cib", value)) { ++ /* We found the CIB! */ ++ match = crm_strdup_printf("/proc/%d/stat", pid); ++ break; + } +- +- match = crm_strdup_printf("/proc/%d/stat", pid); +- break; + } + + closedir(dp); +@@ -214,6 +227,10 @@ static bool throttle_cib_load(float *load) + last_utime = 0; + last_stime = 0; + loadfile = find_cib_loadfile(); ++ if (loadfile == NULL) { ++ crm_warn("Couldn't find CIB load file"); ++ return FALSE; ++ } + ticks_per_s = sysconf(_SC_CLK_TCK); + crm_trace("Found %s", loadfile); + } +diff --git a/cts/CIB.py b/cts/CIB.py +index cdfc7ca..82d02d7 100644 +--- a/cts/CIB.py ++++ b/cts/CIB.py +@@ -312,7 +312,7 @@ Description=Dummy resource that takes a while to start + Type=notify + ExecStart=/usr/bin/python -c 'import time, systemd.daemon; time.sleep(10); systemd.daemon.notify("READY=1"); time.sleep(86400)' + ExecStop=/bin/sleep 10 +-ExecStop=/bin/kill -s KILL $MAINPID ++ExecStop=/bin/kill -s KILL \$MAINPID + """ + + os.system("cat <<-END >/tmp/DummySD.service\n%s\nEND" % (dummy_service_file)) +diff --git a/cts/CTStests.py b/cts/CTStests.py +index 14ab4bf..f817004 100644 +--- a/cts/CTStests.py ++++ b/cts/CTStests.py +@@ -1105,7 +1105,7 @@ class MaintenanceMode(CTSTest): + # fail the resource right after turning Maintenance mode on + # verify it is not recovered until maintenance mode is turned off + if action == "On": +- pats.append("pengine.*: warning: Processing failed op %s for %s on" % (self.action, self.rid)) ++ pats.append("pengine.*: warning:.* Processing failed op %s for %s on" % (self.action, self.rid)) + else: + pats.append(self.templates["Pat:RscOpOK"] % (self.rid, "stop_0")) + pats.append(self.templates["Pat:RscOpOK"] % (self.rid, "start_0")) +@@ -1314,7 +1314,8 @@ class ResourceRecover(CTSTest): + self.debug("Shooting %s aka. %s" % (rsc.clone_id, rsc.id)) + + pats = [] +- pats.append("pengine.*: warning: Processing failed op %s for %s on" % (self.action, self.rid)) ++ pats.append(r"pengine.*: warning:.* Processing failed op %s for (%s|%s) on" % (self.action, ++ rsc.id, rsc.clone_id)) + + if rsc.managed(): + pats.append(self.templates["Pat:RscOpOK"] % (self.rid, "stop_0")) +@@ -2647,32 +2648,31 @@ class RemoteDriver(CTSTest): + self.remote_node_added = 0 + self.remote_rsc_added = 0 + self.remote_rsc = "remote-rsc" ++ self.remote_use_reconnect_interval = self.Env.RandomGen.choice(["true","false"]) + self.cib_cmd = """cibadmin -C -o %s -X '%s' """ + +- def del_rsc(self, node, rsc): +- ++ def get_othernode(self, node): + for othernode in self.Env["nodes"]: + if othernode == node: + # we don't want to try and use the cib that we just shutdown. + # find a cluster node that is not our soon to be remote-node. + continue +- rc = self.rsh(othernode, "crm_resource -D -r %s -t primitive" % (rsc)) +- if rc != 0: +- self.fail_string = ("Removal of resource '%s' failed" % (rsc)) +- self.failed = 1 +- return ++ else: ++ return othernode ++ ++ def del_rsc(self, node, rsc): ++ othernode = self.get_othernode(node) ++ rc = self.rsh(othernode, "crm_resource -D -r %s -t primitive" % (rsc)) ++ if rc != 0: ++ self.fail_string = ("Removal of resource '%s' failed" % (rsc)) ++ self.failed = 1 + + def add_rsc(self, node, rsc_xml): +- for othernode in self.CM.Env["nodes"]: +- if othernode == node: +- # we don't want to try and use the cib that we just shutdown. +- # find a cluster node that is not our soon to be remote-node. +- continue +- rc = self.rsh(othernode, self.cib_cmd % ("resources", rsc_xml)) +- if rc != 0: +- self.fail_string = "resource creation failed" +- self.failed = 1 +- return ++ othernode = self.get_othernode(node) ++ rc = self.rsh(othernode, self.cib_cmd % ("resources", rsc_xml)) ++ if rc != 0: ++ self.fail_string = "resource creation failed" ++ self.failed = 1 + + def add_primitive_rsc(self, node): + rsc_xml = """ +@@ -2687,7 +2687,24 @@ class RemoteDriver(CTSTest): + self.remote_rsc_added = 1 + + def add_connection_rsc(self, node): +- rsc_xml = """ ++ if self.remote_use_reconnect_interval == "true": ++ # use reconnect interval and make sure to set cluster-recheck-interval as well. ++ rsc_xml = """ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++""" % (self.remote_node, node) ++ self.rsh(self.get_othernode(node), self.templates["SetCheckInterval"] % ("45s")) ++ else: ++ # not using reconnect interval ++ rsc_xml = """ + + + +@@ -2698,6 +2715,7 @@ class RemoteDriver(CTSTest): + + + """ % (self.remote_node, node) ++ + self.add_rsc(node, rsc_xml) + if self.failed == 0: + self.remote_node_added = 1 +@@ -2836,7 +2854,7 @@ class RemoteDriver(CTSTest): + self.CM.ns.WaitForNodeToComeUp(node, 120); + + pats = [ ] +- watch = self.create_watch(pats, 120) ++ watch = self.create_watch(pats, 200) + watch.setwatch() + pats.append(self.templates["Pat:RscOpOK"] % (self.remote_node, "start")) + if self.remote_rsc_added == 1: +@@ -2927,12 +2945,19 @@ class RemoteDriver(CTSTest): + pats.append(self.templates["Pat:RscOpOK"] % (self.remote_node, "stop")) + + self.set_timer("remoteMetalCleanup") ++ ++ if self.remote_use_reconnect_interval == "true": ++ self.debug("Cleaning up re-check interval") ++ self.rsh(self.get_othernode(node), self.templates["ClearCheckInterval"]) + if self.remote_rsc_added == 1: ++ self.debug("Cleaning up dummy rsc put on remote node") + self.rsh(node, "crm_resource -U -r %s -N %s" % (self.remote_rsc, self.remote_node)) + self.del_rsc(node, self.remote_rsc) + if self.remote_node_added == 1: ++ self.debug("Cleaning up remote node connection resource") + self.rsh(node, "crm_resource -U -r %s" % (self.remote_node)) + self.del_rsc(node, self.remote_node) ++ + watch.lookforall() + self.log_timer("remoteMetalCleanup") + +diff --git a/cts/environment.py b/cts/environment.py +index 6edf331..a3399c3 100644 +--- a/cts/environment.py ++++ b/cts/environment.py +@@ -160,7 +160,7 @@ class Environment: + self.data["Stack"] = "heartbeat" + + elif name == "openais" or name == "ais" or name == "whitetank": +- self.data["Stack"] = "openais (whitetank)" ++ self.data["Stack"] = "corosync (plugin v0)" + + elif name == "corosync" or name == "cs" or name == "mcp": + self.data["Stack"] = "corosync 2.x" +@@ -351,6 +351,10 @@ class Environment: + self["DoFencing"]=1 + elif args[i+1] == "0" or args[i+1] == "no": + self["DoFencing"]=0 ++ elif args[i+1] == "phd": ++ self["DoStonith"]=1 ++ self["stonith-type"] = "fence_phd_kvm" ++ self["stonith-params"] = "pcmk_arg_map=domain:uname,delay=0" + elif args[i+1] == "rhcs" or args[i+1] == "xvm" or args[i+1] == "virt": + self["DoStonith"]=1 + self["stonith-type"] = "fence_xvm" +diff --git a/cts/patterns.py b/cts/patterns.py +index 8398c7e..1bc05a6 100644 +--- a/cts/patterns.py ++++ b/cts/patterns.py +@@ -32,6 +32,9 @@ class BasePatterns: + + "UUIDQueryCmd" : "crmadmin -N", + ++ "SetCheckInterval" : "cibadmin --modify -c --xml-text ''", ++ "ClearCheckInterval" : "cibadmin --delete --xpath \"//nvpair[@name='cluster-recheck-interval']\"", ++ + "MaintenanceModeOn" : "cibadmin --modify -c --xml-text ''", + "MaintenanceModeOff" : "cibadmin --delete --xpath \"//nvpair[@name='maintenance-mode']\"", + +@@ -291,6 +294,9 @@ class crm_cs_v0(BasePatterns): + r"error:.*Connection to cib_shm failed", + r"error:.*Connection to cib_shm.* closed", + r"error:.*STONITH connection failed", ++ r"error: Connection to stonith-ng failed", ++ r"crit: Fencing daemon connection failed", ++ r"error: Connection to stonith-ng.* closed", + ] + + self.components["corosync"] = [ +diff --git a/doc/Pacemaker_Explained/en-US/Ch-Stonith.txt b/doc/Pacemaker_Explained/en-US/Ch-Stonith.txt +index 02525d6..a3c02cb 100644 +--- a/doc/Pacemaker_Explained/en-US/Ch-Stonith.txt ++++ b/doc/Pacemaker_Explained/en-US/Ch-Stonith.txt +@@ -343,7 +343,7 @@ http://www.clusterlabs.org/doc/[Clusters from Scratch] guide for those details. + # cibadmin -C -o resources --xml-file stonith.xml + ---- + +-. Set stonith-enabled to true: ++. Set +stonith-enabled+ to true: + + + ---- + # crm_attribute -t crm_config -n stonith-enabled -v true +@@ -831,3 +831,29 @@ Put together, the configuration looks like this: + + + ---- ++ ++== Remapping Reboots == ++ ++When the cluster needs to reboot a node, whether because +stonith-action+ is +reboot+ or because ++a reboot was manually requested (such as by `stonith_admin --reboot`), it will remap that to ++other commands in two cases: ++ ++. If the chosen fencing device does not support the +reboot+ command, the cluster ++ will ask it to perform +off+ instead. ++ ++. If a fencing topology level with multiple devices must be executed, the cluster ++ will ask all the devices to perform +off+, then ask the devices to perform +on+. ++ ++To understand the second case, consider the example of a node with redundant ++power supplies connected to intelligent power switches. Rebooting one switch ++and then the other would have no effect on the node. Turning both switches off, ++and then on, actually reboots the node. ++ ++In such a case, the fencing operation will be treated as successful as long as ++the +off+ commands succeed, because then it is safe for the cluster to recover ++any resources that were on the node. Timeouts and errors in the +on+ phase will ++be logged but ignored. ++ ++When a reboot operation is remapped, any action-specific timeout for the ++remapped action will be used (for example, +pcmk_off_timeout+ will be used when ++executing the +off+ command, not +pcmk_reboot_timeout+). +diff --git a/doc/asciidoc.reference b/doc/asciidoc.reference +index a9a171b..9323864 100644 +--- a/doc/asciidoc.reference ++++ b/doc/asciidoc.reference +@@ -1,31 +1,49 @@ ++= Single-chapter part of the documentation = ++ ++== Go-to reference chapter for how we use AsciiDoc on this project == ++ ++[NOTE] ++====== ++This is *not* an attempt for fully self-hosted AsciiDoc document, ++consider it a plaintext full of AsciiDoc samples (it's up to the reader ++to recognize the borderline) at documentation writers' disposal ++to somewhat standardize the style{empty}footnote:[ ++ style of both source notation and final visual appearance ++]. ++ + See also: + http://powerman.name/doc/asciidoc ++====== + +-Commands: `some-tool --with option` +-Files: '/tmp/file.name' +-Italic: _some text_ ++Emphasis: _some test_ + Mono: +some text+ +-Bold: *some text* +-Super: ^some text^ +-Sub: ~some text~ ++Strong: *some text* ++Super: ^some text^ ++Sub: ~some text~ + Quotes: + ``double quoted'' + `single quoted' + +-Tool: command ++Command: `some-tool --with option` ++Newly introduced term: ++ 'some text' (another form of emphasis as of this edit) ++ ++File: mono + Literal: mono ++Tool: command ++Option: mono ++Replaceable: emphasis mono + Varname: mono +-Option: italic +-Emphasis: italic bold +-Replaceable: italic mono ++Term encountered on system (e.g., menu choice, hostname): ++ strong + + +-.Title for Eaxmple ++.Title for Example + ===== + Some text + ===== + +-.Title for Eaxmple with XML Listing ++.Title for Example with XML Listing + ===== + [source,XML] + ----- +@@ -49,4 +67,4 @@ Section anchors: + + References to section anchors: + +-<> or <> +\ No newline at end of file ++<> or <> +diff --git a/doc/shared/en-US/pacemaker-intro.txt b/doc/shared/en-US/pacemaker-intro.txt +index bf432fc..6b898c9 100644 +--- a/doc/shared/en-US/pacemaker-intro.txt ++++ b/doc/shared/en-US/pacemaker-intro.txt +@@ -1,41 +1,62 @@ + +-== What Is Pacemaker? == ++== What Is 'Pacemaker'? == + +-Pacemaker is a cluster resource manager. ++Pacemaker is a 'cluster resource manager', that is, a logic responsible ++for a life-cycle of deployed software -- indirectly perhaps even whole ++systems or their interconnections -- under its control within a set of ++computers (a.k.a. 'cluster nodes', 'nodes' for short) and driven by ++prescribed rules. + + It achieves maximum availability for your cluster services +-(aka. resources) by detecting and recovering from node- and ++(a.k.a. 'resources') by detecting and recovering from node- and + resource-level failures by making use of the messaging and membership + capabilities provided by your preferred cluster infrastructure (either + http://www.corosync.org/[Corosync] or +-http://linux-ha.org/wiki/Heartbeat[Heartbeat]). ++http://linux-ha.org/wiki/Heartbeat[Heartbeat]), and possibly by ++utilizing other parts of the overall cluster stack. ++ ++.High Availability Clusters ++[NOTE] ++For *the goal of minimal downtime* a term 'high availability' was coined ++and together with its acronym, 'HA', is well-established in the sector. ++To differentiate this sort of clusters from high performance computing ++('HPC') ones, should a context require it (apparently, not the case in ++this document), using 'HA cluster' is an option. + + Pacemaker's key features include: + + * Detection and recovery of node and service-level failures + * Storage agnostic, no requirement for shared storage + * Resource agnostic, anything that can be scripted can be clustered +- * Supports fencing (aka. STONITH) for ensuring data integrity ++ * Supports 'fencing' (also referred to as the 'STONITH' acronym, ++ <> later on) for ensuring data integrity + * Supports large and small clusters + * Supports both quorate and resource-driven clusters + * Supports practically any redundancy configuration +- * Automatically replicated configuration that can be updated from any node +- * Ability to specify cluster-wide service ordering, colocation and anti-colocation ++ * Automatically replicated configuration that can be updated ++ from any node ++ * Ability to specify cluster-wide service ordering, ++ colocation and anti-colocation + * Support for advanced service types + ** Clones: for services which need to be active on multiple nodes +- ** Multi-state: for services with multiple modes (eg. master/slave, primary/secondary) +- * Unified, scriptable, cluster management tools. ++ ** Multi-state: for services with multiple modes ++ (e.g. master/slave, primary/secondary) ++ * Unified, scriptable cluster management tools + + == Pacemaker Architecture == + + At the highest level, the cluster is made up of three pieces: + +- * Non-cluster-aware components. These pieces ++ * *Non-cluster-aware components*. These pieces + include the resources themselves; scripts that start, stop and + monitor them; and a local daemon that masks the differences + between the different standards these scripts implement. ++ Even though interactions of these resources when run as multiple ++ instances can resemble a distributed system, they still lack ++ the proper HA mechanisms and/or autonomous cluster-wide governance ++ as subsumed in the following item. + +- * Resource management. Pacemaker provides the brain that processes ++ * *Resource management*. Pacemaker provides the brain that processes + and reacts to events regarding the cluster. These events include + nodes joining or leaving the cluster; resource events caused by + failures, maintenance and scheduled activities; and other +@@ -44,21 +65,24 @@ At the highest level, the cluster is made up of three pieces: + events. This may include moving resources, stopping nodes and even + forcing them offline with remote power switches. + +- * Low-level infrastructure. Projects like Corosync, CMAN and +- Heartbeat provide reliable messaging, membership and quorum ++ * *Low-level infrastructure*. Projects like 'Corosync', 'CMAN' and ++ 'Heartbeat' provide reliable messaging, membership and quorum + information about the cluster. + + When combined with Corosync, Pacemaker also supports popular open +-source cluster filesystems. +-footnote:[Even though Pacemaker also supports Heartbeat, the filesystems need +-to use the stack for messaging and membership, and Corosync seems to be +-what they're standardizing on. Technically, it would be possible for them to +-support Heartbeat as well, but there seems little interest in this.] ++source cluster filesystems.{empty}footnote:[ ++ Even though Pacemaker also supports Heartbeat, the filesystems need to ++ use the stack for messaging and membership, and Corosync seems to be ++ what they're standardizing on. Technically, it would be possible for ++ them to support Heartbeat as well, but there seems little interest ++ in this. ++] + + Due to past standardization within the cluster filesystem community, +-cluster filesystems make use of a common distributed lock manager, which makes +-use of Corosync for its messaging and membership capabilities (which nodes +-are up/down) and Pacemaker for fencing services. ++cluster filesystems make use of a common 'distributed lock manager', ++which makes use of Corosync for its messaging and membership ++capabilities (which nodes are up/down) and Pacemaker for fencing ++services. + + .The Pacemaker Stack + image::images/pcmk-stack.png["The Pacemaker stack",width="10cm",height="7.5cm",align="center"] +@@ -67,75 +91,79 @@ image::images/pcmk-stack.png["The Pacemaker stack",width="10cm",height="7.5cm",a + + Pacemaker itself is composed of five key components: + +- * Cluster Information Base (CIB) +- * Cluster Resource Management daemon (CRMd) +- * Local Resource Management daemon (LRMd) +- * Policy Engine (PEngine or PE) +- * Fencing daemon (STONITHd) ++ * 'Cluster Information Base' ('CIB') ++ * 'Cluster Resource Management daemon' ('CRMd') ++ * 'Local Resource Management daemon' ('LRMd') ++ * 'Policy Engine' ('PEngine' or 'PE') ++ * Fencing daemon ('STONITHd') + + .Internal Components + image::images/pcmk-internals.png["Subsystems of a Pacemaker cluster",align="center",scaledwidth="65%"] + + The CIB uses XML to represent both the cluster's configuration and + current state of all resources in the cluster. The contents of the CIB +-are automatically kept in sync across the entire cluster and are used +-by the PEngine to compute the ideal state of the cluster and how it +-should be achieved. ++are automatically kept in sync across the entire cluster and are used by ++the PEngine to compute the ideal state of the cluster and how it should ++be achieved. + +-This list of instructions is then fed to the Designated +-Controller (DC). Pacemaker centralizes all cluster decision making by +-electing one of the CRMd instances to act as a master. Should the +-elected CRMd process (or the node it is on) fail, a new one is +-quickly established. ++This list of instructions is then fed to the 'Designated Controller' ++('DC'). Pacemaker centralizes all cluster decision making by electing ++one of the CRMd instances to act as a master. Should the elected CRMd ++process (or the node it is on) fail, a new one is quickly established. + + The DC carries out the PEngine's instructions in the required order by + passing them to either the Local Resource Management daemon (LRMd) or + CRMd peers on other nodes via the cluster messaging infrastructure + (which in turn passes them on to their LRMd process). + +-The peer nodes all report the results of their operations back to the +-DC and, based on the expected and actual results, will either execute +-any actions that needed to wait for the previous one to complete, or +-abort processing and ask the PEngine to recalculate the ideal cluster +-state based on the unexpected results. ++The peer nodes all report the results of their operations back to the DC ++and, based on the expected and actual results, will either execute any ++actions that needed to wait for the previous one to complete, or abort ++processing and ask the PEngine to recalculate the ideal cluster state ++based on the unexpected results. + + In some cases, it may be necessary to power off nodes in order to + protect shared data or complete resource recovery. For this, Pacemaker + comes with STONITHd. + +-STONITH is an acronym for Shoot-The-Other-Node-In-The-Head and is +-usually implemented with a remote power switch. ++[[s-intro-stonith]] ++.STONITH ++[NOTE] ++*STONITH* is an acronym for 'Shoot-The-Other-Node-In-The-Head', ++a recommended practice that misbehaving node is best to be promptly ++'fenced' (shut off, cut from shared resources or otherwise immobilized), ++and is usually implemented with a remote power switch. + + In Pacemaker, STONITH devices are modeled as resources (and configured + in the CIB) to enable them to be easily monitored for failure, however +-STONITHd takes care of understanding the STONITH topology such that +-its clients simply request a node be fenced, and it does the rest. ++STONITHd takes care of understanding the STONITH topology such that its ++clients simply request a node be fenced, and it does the rest. + + == Types of Pacemaker Clusters == + + Pacemaker makes no assumptions about your environment. This allows it + to support practically any + http://en.wikipedia.org/wiki/High-availability_cluster#Node_configurations[redundancy +-configuration] including Active/Active, Active/Passive, N+1, N+M, +-N-to-1 and N-to-N. ++configuration] including 'Active/Active', 'Active/Passive', 'N+1', ++'N+M', 'N-to-1' and 'N-to-N'. + + .Active/Passive Redundancy + image::images/pcmk-active-passive.png["Active/Passive Redundancy",width="10cm",height="7.5cm",align="center"] + +-Two-node Active/Passive clusters using Pacemaker and DRBD are a +-cost-effective solution for many High Availability situations. ++Two-node Active/Passive clusters using Pacemaker and 'DRBD' are ++a cost-effective solution for many High Availability situations. + + .Shared Failover + image::images/pcmk-shared-failover.png["Shared Failover",width="10cm",height="7.5cm",align="center"] + + By supporting many nodes, Pacemaker can dramatically reduce hardware + costs by allowing several active/passive clusters to be combined and +-share a common backup node ++share a common backup node. + + .N to N Redundancy + image::images/pcmk-active-active.png["N to N Redundancy",width="10cm",height="7.5cm",align="center"] + +-When shared storage is available, every node can potentially be used +-for failover. Pacemaker can even run multiple copies of services to +-spread out the workload. ++When shared storage is available, every node can potentially be used for ++failover. Pacemaker can even run multiple copies of services to spread ++out the workload. + +diff --git a/extra/resources/Dummy b/extra/resources/Dummy +index aec2a0c..8a38ef5 100644 +--- a/extra/resources/Dummy ++++ b/extra/resources/Dummy +@@ -137,7 +137,7 @@ dummy_stop() { + if [ $? = $OCF_SUCCESS ]; then + rm ${OCF_RESKEY_state} + fi +- rm ${VERIFY_SERIALIZED_FILE} ++ rm -f ${VERIFY_SERIALIZED_FILE} + return $OCF_SUCCESS + } + +diff --git a/extra/resources/ping b/extra/resources/ping +index e7b9973..ca9db75 100755 +--- a/extra/resources/ping ++++ b/extra/resources/ping +@@ -43,8 +43,7 @@ meta_data() { + 1.0 + + +-Every time the monitor action is run, this resource agent records (in the CIB) the current number of ping nodes the host can connect to. +-It is essentially the same as pingd except that it uses the system ping tool to obtain the results. ++Every time the monitor action is run, this resource agent records (in the CIB) the current number of nodes the host can connect to using the system fping (preferred) or ping tool. + + node connectivity + +diff --git a/fencing/README.md b/fencing/README.md +new file mode 100644 +index 0000000..a50c69b +--- /dev/null ++++ b/fencing/README.md +@@ -0,0 +1,145 @@ ++# Directory contents ++ ++* `admin.c`, `stonith_admin.8`: `stonith_admin` command-line tool and its man ++ page ++* `commands.c`, `internal.h`, `main.c`, `remote.c`, `stonithd.7`: stonithd and ++ its man page ++* `fence_dummy`, `fence_legacy`, `fence_legacy.8`, `fence_pcmk`, ++ `fence_pcmk.8`: Pacemaker-supplied fence agents and their man pages ++* `regression.py(.in)`: regression tests for `stonithd` ++* `standalone_config.c`, `standalone_config.h`: abandoned project ++* `test.c`: `stonith-test` command-line tool ++ ++# How fencing requests are handled ++ ++## Bird's eye view ++ ++In the broadest terms, stonith works like this: ++ ++1. The initiator (an external program such as `stonith_admin`, or the cluster ++ itself via the `crmd`) asks the local `stonithd`, "Hey, can you fence this ++ node?" ++1. The local `stonithd` asks all the `stonithd's` in the cluster (including ++ itself), "Hey, what fencing devices do you have access to that can fence ++ this node?" ++1. Each `stonithd` in the cluster replies with a list of available devices that ++ it knows about. ++1. Once the original `stonithd` gets all the replies, it asks the most ++ appropriate `stonithd` peer to actually carry out the fencing. It may send ++ out more than one such request if the target node must be fenced with ++ multiple devices. ++1. The chosen `stonithd(s)` call the appropriate fencing resource agent(s) to ++ do the fencing, then replies to the original `stonithd` with the result. ++1. The original `stonithd` broadcasts the result to all `stonithd's`. ++1. Each `stonithd` sends the result to each of its local clients (including, at ++ some point, the initiator). ++ ++## Detailed view ++ ++### Initiating a fencing request ++ ++A fencing request can be initiated by the cluster or externally, using the ++libfencing API. ++ ++* The cluster always initiates fencing via `crmd/te_actions.c:te_fence_node()` ++ (which calls the `fence()` API). This occurs when a graph synapse contains a ++ `CRM_OP_FENCE` XML operation. ++* The main external clients are `stonith_admin` and `stonith-test`. ++ ++Highlights of the fencing API: ++* `stonith_api_new()` creates and returns a new `stonith_t` object, whose ++ `cmds` member has methods for connect, disconnect, fence, etc. ++* the `fence()` method creates and sends a `STONITH_OP_FENCE XML` request with ++ the desired action and target node. Callers do not have to choose or even ++ have any knowledge about particular fencing devices. ++ ++### Fencing queries ++ ++The function calls for a stonith request go something like this as of this writing: ++ ++The local `stonithd` receives the client's request via an IPC or messaging ++layer callback, which calls ++* `stonith_command()`, which (for requests) calls ++ * `handle_request()`, which (for `STONITH_OP_FENCE` from a client) calls ++ * `initiate_remote_stonith_op()`, which creates a `STONITH_OP_QUERY` XML ++ request with the target, desired action, timeout, etc.. then broadcasts ++ the operation to the cluster group (i.e. all `stonithd` instances) and ++ starts a timer. The query is broadcast because (1) location constraints ++ might prevent the local node from accessing the stonith device directly, ++ and (2) even if the local node does have direct access, another node ++ might be preferred to carry out the fencing. ++ ++Each `stonithd` receives the original `stonithd's STONITH_OP_QUERY` broadcast ++request via IPC or messaging layer callback, which calls: ++* `stonith_command()`, which (for requests) calls ++ * `handle_request()`, which (for `STONITH_OP_QUERY` from a peer) calls ++ * `stonith_query()`, which calls ++ * `get_capable_devices()` with `stonith_query_capable_device_db()` to add ++ device information to an XML reply and send it. (A message is ++ considered a reply if it contains `T_STONITH_REPLY`, which is only set ++ by `stonithd` peers, not clients.) ++ ++The original `stonithd` receives all peers' `STONITH_OP_QUERY` replies via IPC ++or messaging layer callback, which calls: ++* `stonith_command()`, which (for replies) calls ++ * `handle_reply()` which (for `STONITH_OP_QUERY`) calls ++ * `process_remote_stonith_query()`, which allocates a new query result ++ structure, parses device information into it, and adds it to operation ++ object. It increments the number of replies received for this operation, ++ and compares it against the expected number of replies (i.e. the number ++ of active peers), and if this is the last expected reply, calls ++ * `call_remote_stonith()`, which calculates the timeout and sends ++ `STONITH_OP_FENCE` request(s) to carry out the fencing. If the target ++ node has a fencing "topology" (which allows specifications such as ++ "this node can be fenced either with device A, or devices B and C in ++ combination"), it will choose the device(s), and send out as many ++ requests as needed. If it chooses a device, it will choose the peer; a ++ peer is preferred if it has "verified" access to the desired device, ++ meaning that it has the device "running" on it and thus has a monitor ++ operation ensuring reachability. ++ ++### Fencing operations ++ ++Each `STONITH_OP_FENCE` request goes something like this as of this writing: ++ ++The chosen peer `stonithd` receives the `STONITH_OP_FENCE` request via IPC or ++messaging layer callback, which calls: ++* `stonith_command()`, which (for requests) calls ++ * `handle_request()`, which (for `STONITH_OP_FENCE` from a peer) calls ++ * `stonith_fence()`, which calls ++ * `schedule_stonith_command()` (using supplied device if ++ `F_STONITH_DEVICE` was set, otherwise the highest-priority capable ++ device obtained via `get_capable_devices()` with ++ `stonith_fence_get_devices_cb()`), which adds the operation to the ++ device's pending operations list and triggers processing. ++ ++The chosen peer `stonithd's` mainloop is triggered and calls ++* `stonith_device_dispatch()`, which calls ++ * `stonith_device_execute()`, which pops off the next item from the device's ++ pending operations list. If acting as the (internally implemented) watchdog ++ agent, it panics the node, otherwise it calls ++ * `stonith_action_create()` and `stonith_action_execute_async()` to call the fencing agent. ++ ++The chosen peer stonithd's mainloop is triggered again once the fencing agent returns, and calls ++* `stonith_action_async_done()` which adds the results to an action object then calls its ++ * done callback (`st_child_done()`), which calls `schedule_stonith_command()` ++ for a new device if there are further required actions to execute or if the ++ original action failed, then builds and sends an XML reply to the original ++ `stonithd` (via `stonith_send_async_reply()`), then checks whether any ++ pending actions are the same as the one just executed and merges them if so. ++ ++### Fencing replies ++ ++The original `stonithd` receives the `STONITH_OP_FENCE` reply via IPC or ++messaging layer callback, which calls: ++* `stonith_command()`, which (for replies) calls ++ * `handle_reply()`, which calls ++ * `process_remote_stonith_exec()`, which calls either ++ `call_remote_stonith()` (to retry a failed operation, or try the next ++ device in a topology is appropriate, which issues a new ++ `STONITH_OP_FENCE` request, proceeding as before) or `remote_op_done()` ++ (if the operation is definitively failed or successful). ++ * remote_op_done() broadcasts the result to all peers. ++ ++Finally, all peers receive the broadcast result and call ++* `remote_op_done()`, which sends the result to all local clients. +diff --git a/fencing/commands.c b/fencing/commands.c +index c9975d3..0d2d614 100644 +--- a/fencing/commands.c ++++ b/fencing/commands.c +@@ -53,15 +53,24 @@ GHashTable *topology = NULL; + GList *cmd_list = NULL; + + struct device_search_s { ++ /* target of fence action */ + char *host; ++ /* requested fence action */ + char *action; ++ /* timeout to use if a device is queried dynamically for possible targets */ + int per_device_timeout; ++ /* number of registered fencing devices at time of request */ + int replies_needed; ++ /* number of device replies received so far */ + int replies_received; ++ /* whether the target is eligible to perform requested action (or off) */ + bool allow_suicide; + ++ /* private data to pass to search callback function */ + void *user_data; ++ /* function to call when all replies have been received */ + void (*callback) (GList * devices, void *user_data); ++ /* devices capable of performing requested action (or off if remapping) */ + GListPtr capable; + }; + +@@ -173,6 +182,17 @@ get_action_timeout(stonith_device_t * device, const char *action, int default_ti + char buffer[64] = { 0, }; + const char *value = NULL; + ++ /* If "reboot" was requested but the device does not support it, ++ * we will remap to "off", so check timeout for "off" instead ++ */ ++ if (safe_str_eq(action, "reboot") ++ && is_not_set(device->flags, st_device_supports_reboot)) { ++ crm_trace("%s doesn't support reboot, using timeout for off instead", ++ device->id); ++ action = "off"; ++ } ++ ++ /* If the device config specified an action-specific timeout, use it */ + snprintf(buffer, sizeof(buffer) - 1, "pcmk_%s_timeout", action); + value = g_hash_table_lookup(device->params, buffer); + if (value) { +@@ -1241,6 +1261,38 @@ search_devices_record_result(struct device_search_s *search, const char *device, + } + } + ++/* ++ * \internal ++ * \brief Check whether the local host is allowed to execute a fencing action ++ * ++ * \param[in] device Fence device to check ++ * \param[in] action Fence action to check ++ * \param[in] target Hostname of fence target ++ * \param[in] allow_suicide Whether self-fencing is allowed for this operation ++ * ++ * \return TRUE if local host is allowed to execute action, FALSE otherwise ++ */ ++static gboolean ++localhost_is_eligible(const stonith_device_t *device, const char *action, ++ const char *target, gboolean allow_suicide) ++{ ++ gboolean localhost_is_target = safe_str_eq(target, stonith_our_uname); ++ ++ if (device && action && device->on_target_actions ++ && strstr(device->on_target_actions, action)) { ++ if (!localhost_is_target) { ++ crm_trace("%s operation with %s can only be executed for localhost not %s", ++ action, device->id, target); ++ return FALSE; ++ } ++ ++ } else if (localhost_is_target && !allow_suicide) { ++ crm_trace("%s operation does not support self-fencing", action); ++ return FALSE; ++ } ++ return TRUE; ++} ++ + static void + can_fence_host_with_device(stonith_device_t * dev, struct device_search_s *search) + { +@@ -1258,19 +1310,20 @@ can_fence_host_with_device(stonith_device_t * dev, struct device_search_s *searc + goto search_report_results; + } + +- if (dev->on_target_actions && +- search->action && +- strstr(dev->on_target_actions, search->action)) { +- /* this device can only execute this action on the target node */ +- +- if(safe_str_neq(host, stonith_our_uname)) { +- crm_trace("%s operation with %s can only be executed for localhost not %s", +- search->action, dev->id, host); ++ /* Short-circuit query if this host is not allowed to perform the action */ ++ if (safe_str_eq(search->action, "reboot")) { ++ /* A "reboot" *might* get remapped to "off" then "on", so short-circuit ++ * only if all three are disallowed. If only one or two are disallowed, ++ * we'll report that with the results. We never allow suicide for ++ * remapped "on" operations because the host is off at that point. ++ */ ++ if (!localhost_is_eligible(dev, "reboot", host, search->allow_suicide) ++ && !localhost_is_eligible(dev, "off", host, search->allow_suicide) ++ && !localhost_is_eligible(dev, "on", host, FALSE)) { + goto search_report_results; + } +- +- } else if(safe_str_eq(host, stonith_our_uname) && search->allow_suicide == FALSE) { +- crm_trace("%s operation does not support self-fencing", search->action); ++ } else if (!localhost_is_eligible(dev, search->action, host, ++ search->allow_suicide)) { + goto search_report_results; + } + +@@ -1423,6 +1476,85 @@ struct st_query_data { + int call_options; + }; + ++/* ++ * \internal ++ * \brief Add action-specific attributes to query reply XML ++ * ++ * \param[in,out] xml XML to add attributes to ++ * \param[in] action Fence action ++ * \param[in] device Fence device ++ */ ++static void ++add_action_specific_attributes(xmlNode *xml, const char *action, ++ stonith_device_t *device) ++{ ++ int action_specific_timeout; ++ int delay_max; ++ ++ CRM_CHECK(xml && action && device, return); ++ ++ if (is_action_required(action, device)) { ++ crm_trace("Action %s is required on %s", action, device->id); ++ crm_xml_add_int(xml, F_STONITH_DEVICE_REQUIRED, 1); ++ } ++ ++ action_specific_timeout = get_action_timeout(device, action, 0); ++ if (action_specific_timeout) { ++ crm_trace("Action %s has timeout %dms on %s", ++ action, action_specific_timeout, device->id); ++ crm_xml_add_int(xml, F_STONITH_ACTION_TIMEOUT, action_specific_timeout); ++ } ++ ++ delay_max = get_action_delay_max(device, action); ++ if (delay_max > 0) { ++ crm_trace("Action %s has maximum random delay %dms on %s", ++ action, delay_max, device->id); ++ crm_xml_add_int(xml, F_STONITH_DELAY_MAX, delay_max / 1000); ++ } ++} ++ ++/* ++ * \internal ++ * \brief Add "disallowed" attribute to query reply XML if appropriate ++ * ++ * \param[in,out] xml XML to add attribute to ++ * \param[in] action Fence action ++ * \param[in] device Fence device ++ * \param[in] target Fence target ++ * \param[in] allow_suicide Whether self-fencing is allowed ++ */ ++static void ++add_disallowed(xmlNode *xml, const char *action, stonith_device_t *device, ++ const char *target, gboolean allow_suicide) ++{ ++ if (!localhost_is_eligible(device, action, target, allow_suicide)) { ++ crm_trace("Action %s on %s is disallowed for local host", ++ action, device->id); ++ crm_xml_add(xml, F_STONITH_ACTION_DISALLOWED, XML_BOOLEAN_TRUE); ++ } ++} ++ ++/* ++ * \internal ++ * \brief Add child element with action-specific values to query reply XML ++ * ++ * \param[in,out] xml XML to add attribute to ++ * \param[in] action Fence action ++ * \param[in] device Fence device ++ * \param[in] target Fence target ++ * \param[in] allow_suicide Whether self-fencing is allowed ++ */ ++static void ++add_action_reply(xmlNode *xml, const char *action, stonith_device_t *device, ++ const char *target, gboolean allow_suicide) ++{ ++ xmlNode *child = create_xml_node(xml, F_STONITH_ACTION); ++ ++ crm_xml_add(child, XML_ATTR_ID, action); ++ add_action_specific_attributes(child, action, device); ++ add_disallowed(child, action, device, target, allow_suicide); ++} ++ + static void + stonith_query_capable_device_cb(GList * devices, void *user_data) + { +@@ -1432,13 +1564,12 @@ stonith_query_capable_device_cb(GList * devices, void *user_data) + xmlNode *list = NULL; + GListPtr lpc = NULL; + +- /* Pack the results into data */ ++ /* Pack the results into XML */ + list = create_xml_node(NULL, __FUNCTION__); + crm_xml_add(list, F_STONITH_TARGET, query->target); + for (lpc = devices; lpc != NULL; lpc = lpc->next) { + stonith_device_t *device = g_hash_table_lookup(device_list, lpc->data); +- int action_specific_timeout; +- int delay_max; ++ const char *action = query->action; + + if (!device) { + /* It is possible the device got unregistered while +@@ -1448,24 +1579,44 @@ stonith_query_capable_device_cb(GList * devices, void *user_data) + + available_devices++; + +- action_specific_timeout = get_action_timeout(device, query->action, 0); + dev = create_xml_node(list, F_STONITH_DEVICE); + crm_xml_add(dev, XML_ATTR_ID, device->id); + crm_xml_add(dev, "namespace", device->namespace); + crm_xml_add(dev, "agent", device->agent); + crm_xml_add_int(dev, F_STONITH_DEVICE_VERIFIED, device->verified); +- if (is_action_required(query->action, device)) { +- crm_xml_add_int(dev, F_STONITH_DEVICE_REQUIRED, 1); +- } +- if (action_specific_timeout) { +- crm_xml_add_int(dev, F_STONITH_ACTION_TIMEOUT, action_specific_timeout); ++ ++ /* If the originating stonithd wants to reboot the node, and we have a ++ * capable device that doesn't support "reboot", remap to "off" instead. ++ */ ++ if (is_not_set(device->flags, st_device_supports_reboot) ++ && safe_str_eq(query->action, "reboot")) { ++ crm_trace("%s doesn't support reboot, using values for off instead", ++ device->id); ++ action = "off"; + } + +- delay_max = get_action_delay_max(device, query->action); +- if (delay_max > 0) { +- crm_xml_add_int(dev, F_STONITH_DELAY_MAX, delay_max / 1000); ++ /* Add action-specific values if available */ ++ add_action_specific_attributes(dev, action, device); ++ if (safe_str_eq(query->action, "reboot")) { ++ /* A "reboot" *might* get remapped to "off" then "on", so after ++ * sending the "reboot"-specific values in the main element, we add ++ * sub-elements for "off" and "on" values. ++ * ++ * We short-circuited earlier if "reboot", "off" and "on" are all ++ * disallowed for the local host. However if only one or two are ++ * disallowed, we send back the results and mark which ones are ++ * disallowed. If "reboot" is disallowed, this might cause problems ++ * with older stonithd versions, which won't check for it. Older ++ * versions will ignore "off" and "on", so they are not a problem. ++ */ ++ add_disallowed(dev, action, device, query->target, ++ is_set(query->call_options, st_opt_allow_suicide)); ++ add_action_reply(dev, "off", device, query->target, ++ is_set(query->call_options, st_opt_allow_suicide)); ++ add_action_reply(dev, "on", device, query->target, FALSE); + } + ++ /* A query without a target wants device parameters */ + if (query->target == NULL) { + xmlNode *attrs = create_xml_node(dev, XML_TAG_ATTRS); + +@@ -1481,7 +1632,7 @@ stonith_query_capable_device_cb(GList * devices, void *user_data) + } + + if (list != NULL) { +- crm_trace("Attaching query list output"); ++ crm_log_xml_trace(list, "Add query results"); + add_message_xml(query->reply, F_STONITH_CALLDATA, list); + } + stonith_send_reply(query->reply, query->call_options, query->remote_peer, query->client_id); +@@ -1766,6 +1917,14 @@ st_child_done(GPid pid, int rc, const char *output, gpointer user_data) + continue; + } + ++ /* Duplicate merging will do the right thing for either type of remapped ++ * reboot. If the executing stonithd remapped an unsupported reboot to ++ * off, then cmd->action will be reboot and will be merged with any ++ * other reboot requests. If the originating stonithd remapped a ++ * topology reboot to off then on, we will get here once with ++ * cmd->action "off" and once with "on", and they will be merged ++ * separately with similar requests. ++ */ + crm_notice + ("Merging stonith action %s for node %s originating from client %s with identical stonith request from client %s", + cmd_other->action, cmd_other->victim, cmd_other->client_name, cmd->client_name); +diff --git a/fencing/internal.h b/fencing/internal.h +index 46bd3bf..5fb8f9c 100644 +--- a/fencing/internal.h ++++ b/fencing/internal.h +@@ -51,6 +51,17 @@ typedef struct stonith_device_s { + gboolean api_registered; + } stonith_device_t; + ++/* These values are used to index certain arrays by "phase". Usually an ++ * operation has only one "phase", so phase is always zero. However, some ++ * reboots are remapped to "off" then "on", in which case "reboot" will be ++ * phase 0, "off" will be phase 1 and "on" will be phase 2. ++ */ ++enum st_remap_phase { ++ st_phase_requested = 0, ++ st_phase_off = 1, ++ st_phase_on = 2 ++}; ++ + typedef struct remote_fencing_op_s { + /* The unique id associated with this operation */ + char *id; +@@ -97,7 +108,7 @@ typedef struct remote_fencing_op_s { + long long call_options; + + /*! The current state of the remote operation. This indicates +- * what phase the op is in, query, exec, done, duplicate, failed. */ ++ * what stage the op is in, query, exec, done, duplicate, failed. */ + enum op_state state; + /*! The node that owns the remote operation */ + char *originator; +@@ -114,10 +125,17 @@ typedef struct remote_fencing_op_s { + + /*! The current topology level being executed */ + guint level; +- +- /*! List of required devices the topology must execute regardless of what +- * topology level they exist at. */ +- GListPtr required_list; ++ /*! The current operation phase being executed */ ++ enum st_remap_phase phase; ++ ++ /* For phase 0 or 1 (requested action or a remapped "off"), required devices ++ * will be executed regardless of what topology level is being executed ++ * currently. For phase 1 (remapped "on"), required devices will not be ++ * attempted, because the cluster will execute them automatically when the ++ * node next joins the cluster. ++ */ ++ /*! Lists of devices marked as required for each phase */ ++ GListPtr required_list[3]; + /*! The device list of all the devices at the current executing topology level. */ + GListPtr devices_list; + /*! Current entry in the topology device list */ +@@ -129,6 +147,20 @@ typedef struct remote_fencing_op_s { + + } remote_fencing_op_t; + ++/* ++ * Complex fencing requirements are specified via fencing topologies. ++ * A topology consists of levels; each level is a list of fencing devices. ++ * Topologies are stored in a hash table by node name. When a node needs to be ++ * fenced, if it has an entry in the topology table, the levels are tried ++ * sequentially, and the devices in each level are tried sequentially. ++ * Fencing is considered successful as soon as any level succeeds; ++ * a level is considered successful if all its devices succeed. ++ * Essentially, all devices at a given level are "and-ed" and the ++ * levels are "or-ed". ++ * ++ * This structure is used for the topology table entries. ++ * Topology levels start from 1, so levels[0] is unused and always NULL. ++ */ + typedef struct stonith_topology_s { + char *node; + GListPtr levels[ST_LEVEL_MAX]; +diff --git a/fencing/main.c b/fencing/main.c +index a499175..46d7352 100644 +--- a/fencing/main.c ++++ b/fencing/main.c +@@ -1234,7 +1234,7 @@ struct qb_ipcs_service_handlers ipc_callbacks = { + static void + st_peer_update_callback(enum crm_status_type type, crm_node_t * node, const void *data) + { +- if (type == crm_status_uname) { ++ if (type != crm_status_processes) { + /* + * This is a hack until we can send to a nodeid and/or we fix node name lookups + * These messages are ignored in stonith_peer_callback() +diff --git a/fencing/regression.py.in b/fencing/regression.py.in +index fe6d418..b4e6f08 100644 +--- a/fencing/regression.py.in ++++ b/fencing/regression.py.in +@@ -23,861 +23,937 @@ import shlex + import time + + def output_from_command(command): +- test = subprocess.Popen(shlex.split(command), stdout=subprocess.PIPE, stderr=subprocess.PIPE) +- test.wait() ++ test = subprocess.Popen(shlex.split(command), stdout=subprocess.PIPE, stderr=subprocess.PIPE) ++ test.wait() + +- return test.communicate()[0].split("\n") ++ return test.communicate()[0].split("\n") + + class Test: +- def __init__(self, name, description, verbose = 0, with_cpg = 0): +- self.name = name +- self.description = description +- self.cmds = [] +- self.verbose = verbose ++ def __init__(self, name, description, verbose = 0, with_cpg = 0): ++ self.name = name ++ self.description = description ++ self.cmds = [] ++ self.verbose = verbose + +- self.result_txt = "" +- self.cmd_tool_output = "" +- self.result_exitcode = 0; ++ self.result_txt = "" ++ self.cmd_tool_output = "" ++ self.result_exitcode = 0; + +- self.stonith_options = "-s" +- self.enable_corosync = 0 ++ self.stonith_options = "-s" ++ self.enable_corosync = 0 + +- if with_cpg: +- self.stonith_options = "-c" +- self.enable_corosync = 1 ++ if with_cpg: ++ self.stonith_options = "-c" ++ self.enable_corosync = 1 + +- self.stonith_process = None +- self.stonith_output = "" +- self.stonith_patterns = [] +- self.negative_stonith_patterns = [] ++ self.stonith_process = None ++ self.stonith_output = "" ++ self.stonith_patterns = [] ++ self.negative_stonith_patterns = [] + +- self.executed = 0 ++ self.executed = 0 + +- rsc_classes = output_from_command("crm_resource --list-standards") ++ rsc_classes = output_from_command("crm_resource --list-standards") + +- def __new_cmd(self, cmd, args, exitcode, stdout_match = "", no_wait = 0, stdout_negative_match = "", kill=None): +- self.cmds.append( +- { +- "cmd" : cmd, +- "kill" : kill, +- "args" : args, +- "expected_exitcode" : exitcode, +- "stdout_match" : stdout_match, +- "stdout_negative_match" : stdout_negative_match, +- "no_wait" : no_wait, +- } +- ) ++ def __new_cmd(self, cmd, args, exitcode, stdout_match = "", no_wait = 0, stdout_negative_match = "", kill=None): ++ self.cmds.append( ++ { ++ "cmd" : cmd, ++ "kill" : kill, ++ "args" : args, ++ "expected_exitcode" : exitcode, ++ "stdout_match" : stdout_match, ++ "stdout_negative_match" : stdout_negative_match, ++ "no_wait" : no_wait, ++ } ++ ) + +- def stop_pacemaker(self): +- cmd = shlex.split("killall -9 -q pacemakerd") +- test = subprocess.Popen(cmd, stdout=subprocess.PIPE) +- test.wait() ++ def stop_pacemaker(self): ++ cmd = shlex.split("killall -9 -q pacemakerd") ++ test = subprocess.Popen(cmd, stdout=subprocess.PIPE) ++ test.wait() + +- def start_environment(self): +- ### make sure we are in full control here ### +- self.stop_pacemaker() ++ def start_environment(self): ++ ### make sure we are in full control here ### ++ self.stop_pacemaker() + +- cmd = shlex.split("killall -9 -q stonithd") +- test = subprocess.Popen(cmd, stdout=subprocess.PIPE) +- test.wait() ++ cmd = shlex.split("killall -9 -q stonithd") ++ test = subprocess.Popen(cmd, stdout=subprocess.PIPE) ++ test.wait() + +- if self.verbose: +- self.stonith_options = self.stonith_options + " -V" +- print "Starting stonithd with %s" % self.stonith_options ++ if self.verbose: ++ self.stonith_options = self.stonith_options + " -V" ++ print "Starting stonithd with %s" % self.stonith_options + +- if os.path.exists("/tmp/stonith-regression.log"): +- os.remove('/tmp/stonith-regression.log') ++ if os.path.exists("/tmp/stonith-regression.log"): ++ os.remove('/tmp/stonith-regression.log') + +- self.stonith_process = subprocess.Popen( +- shlex.split("@CRM_DAEMON_DIR@/stonithd %s -l /tmp/stonith-regression.log" % self.stonith_options)) ++ self.stonith_process = subprocess.Popen( ++ shlex.split("@CRM_DAEMON_DIR@/stonithd %s -l /tmp/stonith-regression.log" % self.stonith_options)) + +- time.sleep(1) +- +- def clean_environment(self): +- if self.stonith_process: +- self.stonith_process.terminate() +- self.stonith_process.wait() +- +- self.stonith_output = "" +- self.stonith_process = None +- +- f = open('/tmp/stonith-regression.log', 'r') +- for line in f.readlines(): +- self.stonith_output = self.stonith_output + line +- +- if self.verbose: +- print "Daemon Output Start" +- print self.stonith_output +- print "Daemon Output End" +- os.remove('/tmp/stonith-regression.log') +- +- def add_stonith_log_pattern(self, pattern): +- self.stonith_patterns.append(pattern) +- +- def add_stonith_negative_log_pattern(self, pattern): +- self.negative_stonith_patterns.append(pattern) +- +- def add_cmd(self, cmd, args): +- self.__new_cmd(cmd, args, 0, "") +- +- def add_cmd_no_wait(self, cmd, args): +- self.__new_cmd(cmd, args, 0, "", 1) +- +- def add_cmd_check_stdout(self, cmd, args, match, no_match = ""): +- self.__new_cmd(cmd, args, 0, match, 0, no_match) +- +- def add_expected_fail_cmd(self, cmd, args, exitcode = 255): +- self.__new_cmd(cmd, args, exitcode, "") +- +- def get_exitcode(self): +- return self.result_exitcode +- +- def print_result(self, filler): +- print "%s%s" % (filler, self.result_txt) +- +- def run_cmd(self, args): +- cmd = shlex.split(args['args']) +- cmd.insert(0, args['cmd']) +- +- if self.verbose: +- print "\n\nRunning: "+" ".join(cmd) +- test = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) +- +- if args['kill']: +- if self.verbose: +- print "Also running: "+args['kill'] +- subprocess.Popen(shlex.split(args['kill'])) +- +- if args['no_wait'] == 0: +- test.wait() +- else: +- return 0 +- +- output_res = test.communicate() +- output = output_res[0] + output_res[1] +- +- if self.verbose: +- print output +- +- if args['stdout_match'] != "" and output.count(args['stdout_match']) == 0: +- test.returncode = -2 +- print "STDOUT string '%s' was not found in cmd output: %s" % (args['stdout_match'], output) +- +- if args['stdout_negative_match'] != "" and output.count(args['stdout_negative_match']) != 0: +- test.returncode = -2 +- print "STDOUT string '%s' was found in cmd output: %s" % (args['stdout_negative_match'], output) +- +- return test.returncode; +- +- +- def count_negative_matches(self, outline): +- count = 0 +- for line in self.negative_stonith_patterns: +- if outline.count(line): +- count = 1 +- if self.verbose: +- print "This pattern should not have matched = '%s" % (line) +- return count +- +- def match_stonith_patterns(self): +- negative_matches = 0 +- cur = 0 +- pats = self.stonith_patterns +- total_patterns = len(self.stonith_patterns) +- +- if len(self.stonith_patterns) == 0: +- return +- +- for line in self.stonith_output.split("\n"): +- negative_matches = negative_matches + self.count_negative_matches(line) +- if len(pats) == 0: +- continue +- cur = -1 +- for p in pats: +- cur = cur + 1 +- if line.count(pats[cur]): +- del pats[cur] +- break +- +- if len(pats) > 0 or negative_matches: +- if self.verbose: +- for p in pats: +- print "Pattern Not Matched = '%s'" % p +- +- self.result_txt = "FAILURE - '%s' failed. %d patterns out of %d not matched. %d negative matches." % (self.name, len(pats), total_patterns, negative_matches) +- self.result_exitcode = -1 +- +- def run(self): +- res = 0 +- i = 1 +- self.start_environment() +- +- if self.verbose: +- print "\n--- START TEST - %s" % self.name +- +- self.result_txt = "SUCCESS - '%s'" % (self.name) +- self.result_exitcode = 0 +- for cmd in self.cmds: +- res = self.run_cmd(cmd) +- if res != cmd['expected_exitcode']: +- print "Step %d FAILED - command returned %d, expected %d" % (i, res, cmd['expected_exitcode']) +- self.result_txt = "FAILURE - '%s' failed at step %d. Command: %s %s" % (self.name, i, cmd['cmd'], cmd['args']) +- self.result_exitcode = -1 +- break +- else: +- if self.verbose: +- print "Step %d SUCCESS" % (i) +- i = i + 1 +- self.clean_environment() +- +- if self.result_exitcode == 0: +- self.match_stonith_patterns() +- +- print self.result_txt +- if self.verbose: +- print "--- END TEST - %s\n" % self.name +- +- self.executed = 1 +- return res ++ time.sleep(1) ++ ++ def clean_environment(self): ++ if self.stonith_process: ++ self.stonith_process.terminate() ++ self.stonith_process.wait() ++ ++ self.stonith_output = "" ++ self.stonith_process = None ++ ++ f = open('/tmp/stonith-regression.log', 'r') ++ for line in f.readlines(): ++ self.stonith_output = self.stonith_output + line ++ ++ if self.verbose: ++ print "Daemon Output Start" ++ print self.stonith_output ++ print "Daemon Output End" ++ os.remove('/tmp/stonith-regression.log') ++ ++ def add_stonith_log_pattern(self, pattern): ++ self.stonith_patterns.append(pattern) ++ ++ def add_stonith_negative_log_pattern(self, pattern): ++ self.negative_stonith_patterns.append(pattern) ++ ++ def add_cmd(self, cmd, args): ++ self.__new_cmd(cmd, args, 0, "") ++ ++ def add_cmd_no_wait(self, cmd, args): ++ self.__new_cmd(cmd, args, 0, "", 1) ++ ++ def add_cmd_check_stdout(self, cmd, args, match, no_match = ""): ++ self.__new_cmd(cmd, args, 0, match, 0, no_match) ++ ++ def add_expected_fail_cmd(self, cmd, args, exitcode = 255): ++ self.__new_cmd(cmd, args, exitcode, "") ++ ++ def get_exitcode(self): ++ return self.result_exitcode ++ ++ def print_result(self, filler): ++ print "%s%s" % (filler, self.result_txt) ++ ++ def run_cmd(self, args): ++ cmd = shlex.split(args['args']) ++ cmd.insert(0, args['cmd']) ++ ++ if self.verbose: ++ print "\n\nRunning: "+" ".join(cmd) ++ test = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) ++ ++ if args['kill']: ++ if self.verbose: ++ print "Also running: "+args['kill'] ++ subprocess.Popen(shlex.split(args['kill'])) ++ ++ if args['no_wait'] == 0: ++ test.wait() ++ else: ++ return 0 ++ ++ output_res = test.communicate() ++ output = output_res[0] + output_res[1] ++ ++ if self.verbose: ++ print output ++ ++ if args['stdout_match'] != "" and output.count(args['stdout_match']) == 0: ++ test.returncode = -2 ++ print "STDOUT string '%s' was not found in cmd output: %s" % (args['stdout_match'], output) ++ ++ if args['stdout_negative_match'] != "" and output.count(args['stdout_negative_match']) != 0: ++ test.returncode = -2 ++ print "STDOUT string '%s' was found in cmd output: %s" % (args['stdout_negative_match'], output) ++ ++ return test.returncode; ++ ++ ++ def count_negative_matches(self, outline): ++ count = 0 ++ for line in self.negative_stonith_patterns: ++ if outline.count(line): ++ count = 1 ++ if self.verbose: ++ print "This pattern should not have matched = '%s" % (line) ++ return count ++ ++ def match_stonith_patterns(self): ++ negative_matches = 0 ++ cur = 0 ++ pats = self.stonith_patterns ++ total_patterns = len(self.stonith_patterns) ++ ++ if len(self.stonith_patterns) == 0: ++ return ++ ++ for line in self.stonith_output.split("\n"): ++ negative_matches = negative_matches + self.count_negative_matches(line) ++ if len(pats) == 0: ++ continue ++ cur = -1 ++ for p in pats: ++ cur = cur + 1 ++ if line.count(pats[cur]): ++ del pats[cur] ++ break ++ ++ if len(pats) > 0 or negative_matches: ++ if self.verbose: ++ for p in pats: ++ print "Pattern Not Matched = '%s'" % p ++ ++ self.result_txt = "FAILURE - '%s' failed. %d patterns out of %d not matched. %d negative matches." % (self.name, len(pats), total_patterns, negative_matches) ++ self.result_exitcode = -1 ++ ++ def run(self): ++ res = 0 ++ i = 1 ++ self.start_environment() ++ ++ if self.verbose: ++ print "\n--- START TEST - %s" % self.name ++ ++ self.result_txt = "SUCCESS - '%s'" % (self.name) ++ self.result_exitcode = 0 ++ for cmd in self.cmds: ++ res = self.run_cmd(cmd) ++ if res != cmd['expected_exitcode']: ++ print "Step %d FAILED - command returned %d, expected %d" % (i, res, cmd['expected_exitcode']) ++ self.result_txt = "FAILURE - '%s' failed at step %d. Command: %s %s" % (self.name, i, cmd['cmd'], cmd['args']) ++ self.result_exitcode = -1 ++ break ++ else: ++ if self.verbose: ++ print "Step %d SUCCESS" % (i) ++ i = i + 1 ++ self.clean_environment() ++ ++ if self.result_exitcode == 0: ++ self.match_stonith_patterns() ++ ++ print self.result_txt ++ if self.verbose: ++ print "--- END TEST - %s\n" % self.name ++ ++ self.executed = 1 ++ return res + + class Tests: +- def __init__(self, verbose = 0): +- self.tests = [] +- self.verbose = verbose +- self.autogen_corosync_cfg = 0 +- if not os.path.exists("/etc/corosync/corosync.conf"): +- self.autogen_corosync_cfg = 1 +- +- def new_test(self, name, description, with_cpg = 0): +- test = Test(name, description, self.verbose, with_cpg) +- self.tests.append(test) +- return test +- +- def print_list(self): +- print "\n==== %d TESTS FOUND ====" % (len(self.tests)) +- print "%35s - %s" % ("TEST NAME", "TEST DESCRIPTION") +- print "%35s - %s" % ("--------------------", "--------------------") +- for test in self.tests: +- print "%35s - %s" % (test.name, test.description) +- print "==== END OF LIST ====\n" +- +- +- def start_corosync(self): +- if self.verbose: +- print "Starting corosync" +- +- test = subprocess.Popen("corosync", stdout=subprocess.PIPE) +- test.wait() +- time.sleep(10) +- +- def stop_corosync(self): +- cmd = shlex.split("killall -9 -q corosync") +- test = subprocess.Popen(cmd, stdout=subprocess.PIPE) +- test.wait() +- +- def run_single(self, name): +- for test in self.tests: +- if test.name == name: +- test.run() +- break; +- +- def run_tests_matching(self, pattern): +- for test in self.tests: +- if test.name.count(pattern) != 0: +- test.run() +- +- def run_cpg_only(self): +- for test in self.tests: +- if test.enable_corosync: +- test.run() +- +- def run_no_cpg(self): +- for test in self.tests: +- if not test.enable_corosync: +- test.run() +- +- def run_tests(self): +- for test in self.tests: +- test.run() +- +- def exit(self): +- for test in self.tests: +- if test.executed == 0: +- continue +- +- if test.get_exitcode() != 0: +- sys.exit(-1) +- +- sys.exit(0) +- +- def print_results(self): +- failures = 0; +- success = 0; +- print "\n\n======= FINAL RESULTS ==========" +- print "\n--- FAILURE RESULTS:" +- for test in self.tests: +- if test.executed == 0: +- continue +- +- if test.get_exitcode() != 0: +- failures = failures + 1 +- test.print_result(" ") +- else: +- success = success + 1 +- +- if failures == 0: +- print " None" +- +- print "\n--- TOTALS\n Pass:%d\n Fail:%d\n" % (success, failures) +- def build_api_sanity_tests(self): +- verbose_arg = "" +- if self.verbose: +- verbose_arg = "-V" +- +- test = self.new_test("standalone_low_level_api_test", "Sanity test client api in standalone mode.") +- test.add_cmd("@CRM_DAEMON_DIR@/stonith-test", "-t %s" % (verbose_arg)) +- +- test = self.new_test("cpg_low_level_api_test", "Sanity test client api using mainloop and cpg.", 1) +- test.add_cmd("@CRM_DAEMON_DIR@/stonith-test", "-m %s" % (verbose_arg)) +- +- def build_custom_timeout_tests(self): +- # custom timeout without topology +- test = self.new_test("cpg_custom_timeout_1", +- "Verify per device timeouts work as expected without using topology.", 1) +- test.add_cmd("stonith_admin", "-R false1 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"") +- test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node3\" -o \"pcmk_off_timeout=1\"") +- test.add_cmd("stonith_admin", "-R false2 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node3\" -o \"pcmk_off_timeout=4\"") +- test.add_cmd("stonith_admin", "-F node3 -t 2") +- # timeout is 2+1+4 = 7 +- test.add_stonith_log_pattern("remote op timeout set to 7") +- +- # custom timeout _WITH_ topology +- test = self.new_test("cpg_custom_timeout_2", +- "Verify per device timeouts work as expected _WITH_ topology.", 1) +- test.add_cmd("stonith_admin", "-R false1 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"") +- test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node3\" -o \"pcmk_off_timeout=1\"") +- test.add_cmd("stonith_admin", "-R false2 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node3\" -o \"pcmk_off_timeout=4000\"") +- test.add_cmd("stonith_admin", "-r node3 -i 1 -v false1") +- test.add_cmd("stonith_admin", "-r node3 -i 2 -v true1") +- test.add_cmd("stonith_admin", "-r node3 -i 3 -v false2") +- test.add_cmd("stonith_admin", "-F node3 -t 2") +- # timeout is 2+1+4000 = 4003 +- test.add_stonith_log_pattern("remote op timeout set to 4003") +- +- def build_fence_merge_tests(self): +- +- ### Simple test that overlapping fencing operations get merged +- test = self.new_test("cpg_custom_merge_single", +- "Verify overlapping identical fencing operations are merged, no fencing levels used.", 1) +- test.add_cmd("stonith_admin", "-R false1 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node3\"") +- test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node3\" ") +- test.add_cmd("stonith_admin", "-R false2 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node3\"") +- test.add_cmd_no_wait("stonith_admin", "-F node3 -t 10") +- test.add_cmd("stonith_admin", "-F node3 -t 10") +- ### one merger will happen +- test.add_stonith_log_pattern("Merging stonith action off for node node3 originating from client") +- ### the pattern below signifies that both the original and duplicate operation completed +- test.add_stonith_log_pattern("Operation off of node3 by") +- test.add_stonith_log_pattern("Operation off of node3 by") +- +- ### Test that multiple mergers occur +- test = self.new_test("cpg_custom_merge_multiple", +- "Verify multiple overlapping identical fencing operations are merged", 1) +- test.add_cmd("stonith_admin", "-R false1 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node3\"") +- test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node3\" ") +- test.add_cmd("stonith_admin", "-R false2 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node3\"") +- test.add_cmd_no_wait("stonith_admin", "-F node3 -t 10") +- test.add_cmd_no_wait("stonith_admin", "-F node3 -t 10") +- test.add_cmd_no_wait("stonith_admin", "-F node3 -t 10") +- test.add_cmd_no_wait("stonith_admin", "-F node3 -t 10") +- test.add_cmd("stonith_admin", "-F node3 -t 10") +- ### 4 mergers should occur +- test.add_stonith_log_pattern("Merging stonith action off for node node3 originating from client") +- test.add_stonith_log_pattern("Merging stonith action off for node node3 originating from client") +- test.add_stonith_log_pattern("Merging stonith action off for node node3 originating from client") +- test.add_stonith_log_pattern("Merging stonith action off for node node3 originating from client") +- ### the pattern below signifies that both the original and duplicate operation completed +- test.add_stonith_log_pattern("Operation off of node3 by") +- test.add_stonith_log_pattern("Operation off of node3 by") +- test.add_stonith_log_pattern("Operation off of node3 by") +- test.add_stonith_log_pattern("Operation off of node3 by") +- test.add_stonith_log_pattern("Operation off of node3 by") +- +- ### Test that multiple mergers occur with topologies used +- test = self.new_test("cpg_custom_merge_with_topology", +- "Verify multiple overlapping identical fencing operations are merged with fencing levels.", 1) +- test.add_cmd("stonith_admin", "-R false1 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node3\"") +- test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node3\" ") +- test.add_cmd("stonith_admin", "-R false2 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node3\"") +- test.add_cmd("stonith_admin", "-r node3 -i 1 -v false1") +- test.add_cmd("stonith_admin", "-r node3 -i 1 -v false2") +- test.add_cmd("stonith_admin", "-r node3 -i 2 -v true1") +- test.add_cmd_no_wait("stonith_admin", "-F node3 -t 10") +- test.add_cmd_no_wait("stonith_admin", "-F node3 -t 10") +- test.add_cmd_no_wait("stonith_admin", "-F node3 -t 10") +- test.add_cmd_no_wait("stonith_admin", "-F node3 -t 10") +- test.add_cmd("stonith_admin", "-F node3 -t 10") +- ### 4 mergers should occur +- test.add_stonith_log_pattern("Merging stonith action off for node node3 originating from client") +- test.add_stonith_log_pattern("Merging stonith action off for node node3 originating from client") +- test.add_stonith_log_pattern("Merging stonith action off for node node3 originating from client") +- test.add_stonith_log_pattern("Merging stonith action off for node node3 originating from client") +- ### the pattern below signifies that both the original and duplicate operation completed +- test.add_stonith_log_pattern("Operation off of node3 by") +- test.add_stonith_log_pattern("Operation off of node3 by") +- test.add_stonith_log_pattern("Operation off of node3 by") +- test.add_stonith_log_pattern("Operation off of node3 by") +- test.add_stonith_log_pattern("Operation off of node3 by") +- +- +- test = self.new_test("cpg_custom_no_merge", +- "Verify differing fencing operations are not merged", 1) +- test.add_cmd("stonith_admin", "-R false1 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node3 node2\"") +- test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node3 node2\" ") +- test.add_cmd("stonith_admin", "-R false2 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node3 node2\"") +- test.add_cmd("stonith_admin", "-r node3 -i 1 -v false1") +- test.add_cmd("stonith_admin", "-r node3 -i 1 -v false2") +- test.add_cmd("stonith_admin", "-r node3 -i 2 -v true1") +- test.add_cmd_no_wait("stonith_admin", "-F node2 -t 10") +- test.add_cmd("stonith_admin", "-F node3 -t 10") +- test.add_stonith_negative_log_pattern("Merging stonith action off for node node3 originating from client") +- +- def build_standalone_tests(self): +- test_types = [ +- { +- "prefix" : "standalone" , +- "use_cpg" : 0, +- }, +- { +- "prefix" : "cpg" , +- "use_cpg" : 1, +- }, +- ] +- +- # test what happens when all devices timeout +- for test_type in test_types: +- test = self.new_test("%s_fence_multi_device_failure" % test_type["prefix"], +- "Verify that all devices timeout, a fencing failure is returned.", test_type["use_cpg"]) +- test.add_cmd("stonith_admin", "-R false1 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"") +- test.add_cmd("stonith_admin", "-R false2 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"") +- test.add_cmd("stonith_admin", "-R false3 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"") +- if test_type["use_cpg"] == 1: +- test.add_expected_fail_cmd("stonith_admin", "-F node3 -t 2", 194) +- test.add_stonith_log_pattern("remote op timeout set to 6") +- else: +- test.add_expected_fail_cmd("stonith_admin", "-F node3 -t 2", 55) +- +- test.add_stonith_log_pattern("for host 'node3' with device 'false1' returned: ") +- test.add_stonith_log_pattern("for host 'node3' with device 'false2' returned: ") +- test.add_stonith_log_pattern("for host 'node3' with device 'false3' returned: ") +- +- # test what happens when multiple devices can fence a node, but the first device fails. +- for test_type in test_types: +- test = self.new_test("%s_fence_device_failure_rollover" % test_type["prefix"], +- "Verify that when one fence device fails for a node, the others are tried.", test_type["use_cpg"]) +- test.add_cmd("stonith_admin", "-R false1 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"") +- test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") +- test.add_cmd("stonith_admin", "-R false2 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"") +- test.add_cmd("stonith_admin", "-F node3 -t 2") +- +- if test_type["use_cpg"] == 1: +- test.add_stonith_log_pattern("remote op timeout set to 6") +- +- # simple topology test for one device +- for test_type in test_types: +- if test_type["use_cpg"] == 0: +- continue +- +- test = self.new_test("%s_topology_simple" % test_type["prefix"], +- "Verify all fencing devices at a level are used.", test_type["use_cpg"]) +- test.add_cmd("stonith_admin", "-R true -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") +- +- test.add_cmd("stonith_admin", "-r node3 -i 1 -v true") +- test.add_cmd("stonith_admin", "-F node3 -t 2") +- +- test.add_stonith_log_pattern("remote op timeout set to 2") +- test.add_stonith_log_pattern("for host 'node3' with device 'true' returned: 0") +- +- +- # add topology, delete topology, verify fencing still works +- for test_type in test_types: +- if test_type["use_cpg"] == 0: +- continue +- +- test = self.new_test("%s_topology_add_remove" % test_type["prefix"], +- "Verify fencing occurrs after all topology levels are removed", test_type["use_cpg"]) +- test.add_cmd("stonith_admin", "-R true -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") +- +- test.add_cmd("stonith_admin", "-r node3 -i 1 -v true") +- test.add_cmd("stonith_admin", "-d node3 -i 1") +- test.add_cmd("stonith_admin", "-F node3 -t 2") +- +- test.add_stonith_log_pattern("remote op timeout set to 2") +- test.add_stonith_log_pattern("for host 'node3' with device 'true' returned: 0") +- +- # test what happens when the first fencing level has multiple devices. +- for test_type in test_types: +- if test_type["use_cpg"] == 0: +- continue +- +- test = self.new_test("%s_topology_device_fails" % test_type["prefix"], +- "Verify if one device in a level fails, the other is tried.", test_type["use_cpg"]) +- test.add_cmd("stonith_admin", "-R false -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"") +- test.add_cmd("stonith_admin", "-R true -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") +- +- test.add_cmd("stonith_admin", "-r node3 -i 1 -v false") +- test.add_cmd("stonith_admin", "-r node3 -i 2 -v true") +- test.add_cmd("stonith_admin", "-F node3 -t 20") +- +- test.add_stonith_log_pattern("remote op timeout set to 40") +- test.add_stonith_log_pattern("for host 'node3' with device 'false' returned: -201") +- test.add_stonith_log_pattern("for host 'node3' with device 'true' returned: 0") +- +- # test what happens when the first fencing level fails. +- for test_type in test_types: +- if test_type["use_cpg"] == 0: +- continue +- +- test = self.new_test("%s_topology_multi_level_fails" % test_type["prefix"], +- "Verify if one level fails, the next leve is tried.", test_type["use_cpg"]) +- test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") +- test.add_cmd("stonith_admin", "-R true2 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") +- test.add_cmd("stonith_admin", "-R true3 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") +- test.add_cmd("stonith_admin", "-R true4 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") +- test.add_cmd("stonith_admin", "-R false1 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"") +- test.add_cmd("stonith_admin", "-R false2 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"") +- +- test.add_cmd("stonith_admin", "-r node3 -i 1 -v false1") +- test.add_cmd("stonith_admin", "-r node3 -i 1 -v true1") +- test.add_cmd("stonith_admin", "-r node3 -i 2 -v true2") +- test.add_cmd("stonith_admin", "-r node3 -i 2 -v false2") +- test.add_cmd("stonith_admin", "-r node3 -i 3 -v true3") +- test.add_cmd("stonith_admin", "-r node3 -i 3 -v true4") +- +- test.add_cmd("stonith_admin", "-F node3 -t 2") +- +- test.add_stonith_log_pattern("remote op timeout set to 12") +- test.add_stonith_log_pattern("for host 'node3' with device 'false1' returned: -201") +- test.add_stonith_log_pattern("for host 'node3' with device 'false2' returned: -201") +- test.add_stonith_log_pattern("for host 'node3' with device 'true3' returned: 0") +- test.add_stonith_log_pattern("for host 'node3' with device 'true4' returned: 0") +- +- +- # test what happens when the first fencing level had devices that no one has registered +- for test_type in test_types: +- if test_type["use_cpg"] == 0: +- continue +- +- test = self.new_test("%s_topology_missing_devices" % test_type["prefix"], +- "Verify topology can continue with missing devices.", test_type["use_cpg"]) +- test.add_cmd("stonith_admin", "-R true2 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") +- test.add_cmd("stonith_admin", "-R true3 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") +- test.add_cmd("stonith_admin", "-R true4 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") +- test.add_cmd("stonith_admin", "-R false2 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"") +- +- test.add_cmd("stonith_admin", "-r node3 -i 1 -v false1") +- test.add_cmd("stonith_admin", "-r node3 -i 1 -v true1") +- test.add_cmd("stonith_admin", "-r node3 -i 2 -v true2") +- test.add_cmd("stonith_admin", "-r node3 -i 2 -v false2") +- test.add_cmd("stonith_admin", "-r node3 -i 3 -v true3") +- test.add_cmd("stonith_admin", "-r node3 -i 3 -v true4") +- +- test.add_cmd("stonith_admin", "-F node3 -t 2") +- +- # Test what happens if multiple fencing levels are defined, and then the first one is removed. +- for test_type in test_types: +- if test_type["use_cpg"] == 0: +- continue +- +- test = self.new_test("%s_topology_level_removal" % test_type["prefix"], +- "Verify level removal works.", test_type["use_cpg"]) +- test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") +- test.add_cmd("stonith_admin", "-R true2 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") +- test.add_cmd("stonith_admin", "-R true3 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") +- test.add_cmd("stonith_admin", "-R true4 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") +- test.add_cmd("stonith_admin", "-R false1 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"") +- test.add_cmd("stonith_admin", "-R false2 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"") +- +- test.add_cmd("stonith_admin", "-r node3 -i 1 -v false1") +- test.add_cmd("stonith_admin", "-r node3 -i 1 -v true1") +- +- test.add_cmd("stonith_admin", "-r node3 -i 2 -v true2") +- test.add_cmd("stonith_admin", "-r node3 -i 2 -v false2") +- +- test.add_cmd("stonith_admin", "-r node3 -i 3 -v true3") +- test.add_cmd("stonith_admin", "-r node3 -i 3 -v true4") +- +- # Now remove level 2, verify none of the devices in level two are hit. +- test.add_cmd("stonith_admin", "-d node3 -i 2") +- +- test.add_cmd("stonith_admin", "-F node3 -t 20") +- +- test.add_stonith_log_pattern("remote op timeout set to 8") +- test.add_stonith_log_pattern("for host 'node3' with device 'false1' returned: -201") +- test.add_stonith_negative_log_pattern("for host 'node3' with device 'false2' returned: ") +- test.add_stonith_log_pattern("for host 'node3' with device 'true3' returned: 0") +- test.add_stonith_log_pattern("for host 'node3' with device 'true4' returned: 0") +- +- # test the stonith builds the correct list of devices that can fence a node. +- for test_type in test_types: +- test = self.new_test("%s_list_devices" % test_type["prefix"], +- "Verify list of devices that can fence a node is correct", test_type["use_cpg"]) +- test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node3\"") +- test.add_cmd("stonith_admin", "-R true2 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") +- test.add_cmd("stonith_admin", "-R true3 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") +- +- test.add_cmd_check_stdout("stonith_admin", "-l node1 -V", "true2", "true1") +- test.add_cmd_check_stdout("stonith_admin", "-l node1 -V", "true3", "true1") +- +- # simple test of device monitor +- for test_type in test_types: +- test = self.new_test("%s_monitor" % test_type["prefix"], +- "Verify device is reachable", test_type["use_cpg"]) +- test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node3\"") +- test.add_cmd("stonith_admin", "-R false1 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node3\"") +- +- test.add_cmd("stonith_admin", "-Q true1") +- test.add_cmd("stonith_admin", "-Q false1") +- test.add_expected_fail_cmd("stonith_admin", "-Q true2", 237) +- +- # Verify monitor occurs for duration of timeout period on failure +- for test_type in test_types: +- test = self.new_test("%s_monitor_timeout" % test_type["prefix"], +- "Verify monitor uses duration of timeout period given.", test_type["use_cpg"]) +- test.add_cmd("stonith_admin", "-R true1 -a fence_dummy_monitor_fail -o \"pcmk_host_list=node3\"") +- test.add_expected_fail_cmd("stonith_admin", "-Q true1 -t 5", 195) +- test.add_stonith_log_pattern("Attempt 2 to execute") +- +- # Verify monitor occurs for duration of timeout period on failure, but stops at max retries +- for test_type in test_types: +- test = self.new_test("%s_monitor_timeout_max_retries" % test_type["prefix"], +- "Verify monitor retries until max retry value or timeout is hit.", test_type["use_cpg"]) +- test.add_cmd("stonith_admin", "-R true1 -a fence_dummy_monitor_fail -o \"pcmk_host_list=node3\"") +- test.add_expected_fail_cmd("stonith_admin", "-Q true1 -t 15",195) +- test.add_stonith_log_pattern("Attempted to execute agent fence_dummy_monitor_fail (list) the maximum number of times") +- +- # simple register test +- for test_type in test_types: +- test = self.new_test("%s_register" % test_type["prefix"], +- "Verify devices can be registered and un-registered", test_type["use_cpg"]) +- test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node3\"") +- +- test.add_cmd("stonith_admin", "-Q true1") +- +- test.add_cmd("stonith_admin", "-D true1") +- +- test.add_expected_fail_cmd("stonith_admin", "-Q true1", 237) +- +- +- # simple reboot test +- for test_type in test_types: +- test = self.new_test("%s_reboot" % test_type["prefix"], +- "Verify devices can be rebooted", test_type["use_cpg"]) +- test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node3\"") +- +- test.add_cmd("stonith_admin", "-B node3 -t 2") +- +- test.add_cmd("stonith_admin", "-D true1") +- +- test.add_expected_fail_cmd("stonith_admin", "-Q true1", 237) +- +- # test fencing history. +- for test_type in test_types: +- if test_type["use_cpg"] == 0: +- continue +- test = self.new_test("%s_fence_history" % test_type["prefix"], +- "Verify last fencing operation is returned.", test_type["use_cpg"]) +- test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node3\"") +- +- test.add_cmd("stonith_admin", "-F node3 -t 2 -V") +- +- test.add_cmd_check_stdout("stonith_admin", "-H node3", "was able to turn off node node3", "") +- +- # simple test of dynamic list query +- for test_type in test_types: +- test = self.new_test("%s_dynamic_list_query" % test_type["prefix"], +- "Verify dynamic list of fencing devices can be retrieved.", test_type["use_cpg"]) +- test.add_cmd("stonith_admin", "-R true1 -a fence_dummy_list") +- test.add_cmd("stonith_admin", "-R true2 -a fence_dummy_list") +- test.add_cmd("stonith_admin", "-R true3 -a fence_dummy_list") +- +- test.add_cmd_check_stdout("stonith_admin", "-l fake_port_1", "3 devices found") +- +- +- # fence using dynamic list query +- for test_type in test_types: +- test = self.new_test("%s_fence_dynamic_list_query" % test_type["prefix"], +- "Verify dynamic list of fencing devices can be retrieved.", test_type["use_cpg"]) +- test.add_cmd("stonith_admin", "-R true1 -a fence_dummy_list") +- test.add_cmd("stonith_admin", "-R true2 -a fence_dummy_list") +- test.add_cmd("stonith_admin", "-R true3 -a fence_dummy_list") +- +- test.add_cmd("stonith_admin", "-F fake_port_1 -t 5 -V"); +- +- # simple test of query using status action +- for test_type in test_types: +- test = self.new_test("%s_status_query" % test_type["prefix"], +- "Verify dynamic list of fencing devices can be retrieved.", test_type["use_cpg"]) +- test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_check=status\"") +- test.add_cmd("stonith_admin", "-R true2 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_check=status\"") +- test.add_cmd("stonith_admin", "-R true3 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_check=status\"") +- +- test.add_cmd_check_stdout("stonith_admin", "-l fake_port_1", "3 devices found") +- +- # test what happens when no reboot action is advertised +- for test_type in test_types: +- test = self.new_test("%s_no_reboot_support" % test_type["prefix"], +- "Verify reboot action defaults to off when no reboot action is advertised by agent.", test_type["use_cpg"]) +- test.add_cmd("stonith_admin", "-R true1 -a fence_dummy_no_reboot -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") +- test.add_cmd("stonith_admin", "-B node1 -t 5 -V"); +- test.add_stonith_log_pattern("does not advertise support for 'reboot', performing 'off'") +- test.add_stonith_log_pattern("with device 'true1' returned: 0 (OK)"); +- +- # make sure reboot is used when reboot action is advertised +- for test_type in test_types: +- test = self.new_test("%s_with_reboot_support" % test_type["prefix"], +- "Verify reboot action can be used when metadata advertises it.", test_type["use_cpg"]) +- test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") +- test.add_cmd("stonith_admin", "-B node1 -t 5 -V"); +- test.add_stonith_negative_log_pattern("does not advertise support for 'reboot', performing 'off'") +- test.add_stonith_log_pattern("with device 'true1' returned: 0 (OK)"); +- +- def build_nodeid_tests(self): +- our_uname = output_from_command("uname -n") +- if our_uname: +- our_uname = our_uname[0] +- +- ### verify nodeid is supplied when nodeid is in the metadata parameters +- test = self.new_test("cpg_supply_nodeid", +- "Verify nodeid is given when fence agent has nodeid as parameter", 1) +- +- test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=%s\"" % (our_uname)) +- test.add_cmd("stonith_admin", "-F %s -t 3" % (our_uname)) +- test.add_stonith_log_pattern("For stonith action (off) for victim %s, adding nodeid" % (our_uname)) +- +- ### verify nodeid is _NOT_ supplied when nodeid is not in the metadata parameters +- test = self.new_test("cpg_do_not_supply_nodeid", +- "Verify nodeid is _NOT_ given when fence agent does not have nodeid as parameter", 1) +- +- test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=%s\"" % (our_uname)) +- test.add_cmd("stonith_admin", "-F %s -t 3" % (our_uname)) +- test.add_stonith_negative_log_pattern("For stonith action (off) for victim %s, adding nodeid" % (our_uname)) +- +- ### verify nodeid use doesn't explode standalone mode +- test = self.new_test("standalone_do_not_supply_nodeid", +- "Verify nodeid in metadata parameter list doesn't kill standalone mode", 0) +- +- test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=%s\"" % (our_uname)) +- test.add_cmd("stonith_admin", "-F %s -t 3" % (our_uname)) +- test.add_stonith_negative_log_pattern("For stonith action (off) for victim %s, adding nodeid" % (our_uname)) +- +- +- def build_unfence_tests(self): +- our_uname = output_from_command("uname -n") +- if our_uname: +- our_uname = our_uname[0] +- +- ### verify unfencing using automatic unfencing +- test = self.new_test("cpg_unfence_required_1", +- "Verify require unfencing on all devices when automatic=true in agent's metadata", 1) +- test.add_cmd("stonith_admin", "-R true1 -a fence_dummy_automatic_unfence -o \"mode=pass\" -o \"pcmk_host_list=%s\"" % (our_uname)) +- test.add_cmd("stonith_admin", "-R true2 -a fence_dummy_automatic_unfence -o \"mode=pass\" -o \"pcmk_host_list=%s\"" % (our_uname)) +- test.add_cmd("stonith_admin", "-U %s -t 3" % (our_uname)) +- # both devices should be executed +- test.add_stonith_log_pattern("with device 'true1' returned: 0 (OK)"); +- test.add_stonith_log_pattern("with device 'true2' returned: 0 (OK)"); +- +- +- ### verify unfencing using automatic unfencing fails if any of the required agents fail +- test = self.new_test("cpg_unfence_required_2", +- "Verify require unfencing on all devices when automatic=true in agent's metadata", 1) +- test.add_cmd("stonith_admin", "-R true1 -a fence_dummy_automatic_unfence -o \"mode=pass\" -o \"pcmk_host_list=%s\"" % (our_uname)) +- test.add_cmd("stonith_admin", "-R true2 -a fence_dummy_automatic_unfence -o \"mode=fail\" -o \"pcmk_host_list=%s\"" % (our_uname)) +- test.add_expected_fail_cmd("stonith_admin", "-U %s -t 6" % (our_uname), 143) +- +- ### verify unfencing using automatic devices with topology +- test = self.new_test("cpg_unfence_required_3", +- "Verify require unfencing on all devices even when required devices are at different topology levels", 1) +- test.add_cmd("stonith_admin", "-R true1 -a fence_dummy_automatic_unfence -o \"mode=pass\" -o \"pcmk_host_list=%s node3\"" % (our_uname)) +- test.add_cmd("stonith_admin", "-R true2 -a fence_dummy_automatic_unfence -o \"mode=pass\" -o \"pcmk_host_list=%s node3\"" % (our_uname)) +- test.add_cmd("stonith_admin", "-r %s -i 1 -v true1" % (our_uname)) +- test.add_cmd("stonith_admin", "-r %s -i 2 -v true2" % (our_uname)) +- test.add_cmd("stonith_admin", "-U %s -t 3" % (our_uname)) +- test.add_stonith_log_pattern("with device 'true1' returned: 0 (OK)"); +- test.add_stonith_log_pattern("with device 'true2' returned: 0 (OK)"); +- +- +- ### verify unfencing using automatic devices with topology +- test = self.new_test("cpg_unfence_required_4", +- "Verify all required devices are executed even with topology levels fail.", 1) +- test.add_cmd("stonith_admin", "-R true1 -a fence_dummy_automatic_unfence -o \"mode=pass\" -o \"pcmk_host_list=%s node3\"" % (our_uname)) +- test.add_cmd("stonith_admin", "-R true2 -a fence_dummy_automatic_unfence -o \"mode=pass\" -o \"pcmk_host_list=%s node3\"" % (our_uname)) +- test.add_cmd("stonith_admin", "-R true3 -a fence_dummy_automatic_unfence -o \"mode=pass\" -o \"pcmk_host_list=%s node3\"" % (our_uname)) +- test.add_cmd("stonith_admin", "-R true4 -a fence_dummy_automatic_unfence -o \"mode=pass\" -o \"pcmk_host_list=%s node3\"" % (our_uname)) +- test.add_cmd("stonith_admin", "-R false1 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=%s node3\"" % (our_uname)) +- test.add_cmd("stonith_admin", "-R false2 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=%s node3\"" % (our_uname)) +- test.add_cmd("stonith_admin", "-R false3 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=%s node3\"" % (our_uname)) +- test.add_cmd("stonith_admin", "-R false4 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=%s node3\"" % (our_uname)) +- test.add_cmd("stonith_admin", "-r %s -i 1 -v true1" % (our_uname)) +- test.add_cmd("stonith_admin", "-r %s -i 1 -v false1" % (our_uname)) +- test.add_cmd("stonith_admin", "-r %s -i 2 -v false2" % (our_uname)) +- test.add_cmd("stonith_admin", "-r %s -i 2 -v true2" % (our_uname)) +- test.add_cmd("stonith_admin", "-r %s -i 2 -v false3" % (our_uname)) +- test.add_cmd("stonith_admin", "-r %s -i 2 -v true3" % (our_uname)) +- test.add_cmd("stonith_admin", "-r %s -i 3 -v false4" % (our_uname)) +- test.add_cmd("stonith_admin", "-r %s -i 4 -v true4" % (our_uname)) +- test.add_cmd("stonith_admin", "-U %s -t 3" % (our_uname)) +- test.add_stonith_log_pattern("with device 'true1' returned: 0 (OK)"); +- test.add_stonith_log_pattern("with device 'true2' returned: 0 (OK)"); +- test.add_stonith_log_pattern("with device 'true3' returned: 0 (OK)"); +- test.add_stonith_log_pattern("with device 'true4' returned: 0 (OK)"); +- +- ### verify unfencing using on_target device +- test = self.new_test("cpg_unfence_on_target_1", +- "Verify unfencing with on_target = true", 1) +- test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=%s\"" % (our_uname)) +- test.add_cmd("stonith_admin", "-U %s -t 3" % (our_uname)) +- test.add_stonith_log_pattern("(on) to be executed on the target node") +- +- +- ### verify failure of unfencing using on_target device +- test = self.new_test("cpg_unfence_on_target_2", +- "Verify failure unfencing with on_target = true", 1) +- test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=%s node_fake_1234\"" % (our_uname)) +- test.add_expected_fail_cmd("stonith_admin", "-U node_fake_1234 -t 3", 237) +- test.add_stonith_log_pattern("(on) to be executed on the target node") +- +- +- ### verify unfencing using on_target device with topology +- test = self.new_test("cpg_unfence_on_target_3", +- "Verify unfencing with on_target = true using topology", 1) +- +- test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=%s node3\"" % (our_uname)) +- test.add_cmd("stonith_admin", "-R true2 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=%s node3\"" % (our_uname)) +- +- test.add_cmd("stonith_admin", "-r %s -i 1 -v true1" % (our_uname)) +- test.add_cmd("stonith_admin", "-r %s -i 2 -v true2" % (our_uname)) +- +- test.add_cmd("stonith_admin", "-U %s -t 3" % (our_uname)) +- test.add_stonith_log_pattern("(on) to be executed on the target node") +- +- ### verify unfencing using on_target device with topology fails when victim node doesn't exist +- test = self.new_test("cpg_unfence_on_target_4", +- "Verify unfencing failure with on_target = true using topology", 1) +- +- test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=%s node_fake\"" % (our_uname)) +- test.add_cmd("stonith_admin", "-R true2 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=%s node_fake\"" % (our_uname)) +- +- test.add_cmd("stonith_admin", "-r node_fake -i 1 -v true1") +- test.add_cmd("stonith_admin", "-r node_fake -i 2 -v true2") +- +- test.add_expected_fail_cmd("stonith_admin", "-U node_fake -t 3", 237) +- test.add_stonith_log_pattern("(on) to be executed on the target node") +- +- +- def setup_environment(self, use_corosync): +- if self.autogen_corosync_cfg and use_corosync: +- corosync_conf = (""" ++ def __init__(self, verbose = 0): ++ self.tests = [] ++ self.verbose = verbose ++ self.autogen_corosync_cfg = 0 ++ if not os.path.exists("/etc/corosync/corosync.conf"): ++ self.autogen_corosync_cfg = 1 ++ ++ def new_test(self, name, description, with_cpg = 0): ++ test = Test(name, description, self.verbose, with_cpg) ++ self.tests.append(test) ++ return test ++ ++ def print_list(self): ++ print "\n==== %d TESTS FOUND ====" % (len(self.tests)) ++ print "%35s - %s" % ("TEST NAME", "TEST DESCRIPTION") ++ print "%35s - %s" % ("--------------------", "--------------------") ++ for test in self.tests: ++ print "%35s - %s" % (test.name, test.description) ++ print "==== END OF LIST ====\n" ++ ++ ++ def start_corosync(self): ++ if self.verbose: ++ print "Starting corosync" ++ ++ test = subprocess.Popen("corosync", stdout=subprocess.PIPE) ++ test.wait() ++ time.sleep(10) ++ ++ def stop_corosync(self): ++ cmd = shlex.split("killall -9 -q corosync") ++ test = subprocess.Popen(cmd, stdout=subprocess.PIPE) ++ test.wait() ++ ++ def run_single(self, name): ++ for test in self.tests: ++ if test.name == name: ++ test.run() ++ break; ++ ++ def run_tests_matching(self, pattern): ++ for test in self.tests: ++ if test.name.count(pattern) != 0: ++ test.run() ++ ++ def run_cpg_only(self): ++ for test in self.tests: ++ if test.enable_corosync: ++ test.run() ++ ++ def run_no_cpg(self): ++ for test in self.tests: ++ if not test.enable_corosync: ++ test.run() ++ ++ def run_tests(self): ++ for test in self.tests: ++ test.run() ++ ++ def exit(self): ++ for test in self.tests: ++ if test.executed == 0: ++ continue ++ ++ if test.get_exitcode() != 0: ++ sys.exit(-1) ++ ++ sys.exit(0) ++ ++ def print_results(self): ++ failures = 0; ++ success = 0; ++ print "\n\n======= FINAL RESULTS ==========" ++ print "\n--- FAILURE RESULTS:" ++ for test in self.tests: ++ if test.executed == 0: ++ continue ++ ++ if test.get_exitcode() != 0: ++ failures = failures + 1 ++ test.print_result(" ") ++ else: ++ success = success + 1 ++ ++ if failures == 0: ++ print " None" ++ ++ print "\n--- TOTALS\n Pass:%d\n Fail:%d\n" % (success, failures) ++ def build_api_sanity_tests(self): ++ verbose_arg = "" ++ if self.verbose: ++ verbose_arg = "-V" ++ ++ test = self.new_test("standalone_low_level_api_test", "Sanity test client api in standalone mode.") ++ test.add_cmd("@CRM_DAEMON_DIR@/stonith-test", "-t %s" % (verbose_arg)) ++ ++ test = self.new_test("cpg_low_level_api_test", "Sanity test client api using mainloop and cpg.", 1) ++ test.add_cmd("@CRM_DAEMON_DIR@/stonith-test", "-m %s" % (verbose_arg)) ++ ++ def build_custom_timeout_tests(self): ++ # custom timeout without topology ++ test = self.new_test("cpg_custom_timeout_1", ++ "Verify per device timeouts work as expected without using topology.", 1) ++ test.add_cmd("stonith_admin", "-R false1 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"") ++ test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node3\" -o \"pcmk_off_timeout=1\"") ++ test.add_cmd("stonith_admin", "-R false2 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node3\" -o \"pcmk_off_timeout=4\"") ++ test.add_cmd("stonith_admin", "-F node3 -t 2") ++ # timeout is 2+1+4 = 7 ++ test.add_stonith_log_pattern("remote op timeout set to 7") ++ ++ # custom timeout _WITH_ topology ++ test = self.new_test("cpg_custom_timeout_2", ++ "Verify per device timeouts work as expected _WITH_ topology.", 1) ++ test.add_cmd("stonith_admin", "-R false1 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"") ++ test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node3\" -o \"pcmk_off_timeout=1\"") ++ test.add_cmd("stonith_admin", "-R false2 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node3\" -o \"pcmk_off_timeout=4000\"") ++ test.add_cmd("stonith_admin", "-r node3 -i 1 -v false1") ++ test.add_cmd("stonith_admin", "-r node3 -i 2 -v true1") ++ test.add_cmd("stonith_admin", "-r node3 -i 3 -v false2") ++ test.add_cmd("stonith_admin", "-F node3 -t 2") ++ # timeout is 2+1+4000 = 4003 ++ test.add_stonith_log_pattern("remote op timeout set to 4003") ++ ++ def build_fence_merge_tests(self): ++ ++ ### Simple test that overlapping fencing operations get merged ++ test = self.new_test("cpg_custom_merge_single", ++ "Verify overlapping identical fencing operations are merged, no fencing levels used.", 1) ++ test.add_cmd("stonith_admin", "-R false1 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node3\"") ++ test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node3\" ") ++ test.add_cmd("stonith_admin", "-R false2 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node3\"") ++ test.add_cmd_no_wait("stonith_admin", "-F node3 -t 10") ++ test.add_cmd("stonith_admin", "-F node3 -t 10") ++ ### one merger will happen ++ test.add_stonith_log_pattern("Merging stonith action off for node node3 originating from client") ++ ### the pattern below signifies that both the original and duplicate operation completed ++ test.add_stonith_log_pattern("Operation off of node3 by") ++ test.add_stonith_log_pattern("Operation off of node3 by") ++ ++ ### Test that multiple mergers occur ++ test = self.new_test("cpg_custom_merge_multiple", ++ "Verify multiple overlapping identical fencing operations are merged", 1) ++ test.add_cmd("stonith_admin", "-R false1 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node3\"") ++ test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"delay=2\" -o \"pcmk_host_list=node3\" ") ++ test.add_cmd("stonith_admin", "-R false2 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node3\"") ++ test.add_cmd_no_wait("stonith_admin", "-F node3 -t 10") ++ test.add_cmd_no_wait("stonith_admin", "-F node3 -t 10") ++ test.add_cmd_no_wait("stonith_admin", "-F node3 -t 10") ++ test.add_cmd_no_wait("stonith_admin", "-F node3 -t 10") ++ test.add_cmd("stonith_admin", "-F node3 -t 10") ++ ### 4 mergers should occur ++ test.add_stonith_log_pattern("Merging stonith action off for node node3 originating from client") ++ test.add_stonith_log_pattern("Merging stonith action off for node node3 originating from client") ++ test.add_stonith_log_pattern("Merging stonith action off for node node3 originating from client") ++ test.add_stonith_log_pattern("Merging stonith action off for node node3 originating from client") ++ ### the pattern below signifies that both the original and duplicate operation completed ++ test.add_stonith_log_pattern("Operation off of node3 by") ++ test.add_stonith_log_pattern("Operation off of node3 by") ++ test.add_stonith_log_pattern("Operation off of node3 by") ++ test.add_stonith_log_pattern("Operation off of node3 by") ++ test.add_stonith_log_pattern("Operation off of node3 by") ++ ++ ### Test that multiple mergers occur with topologies used ++ test = self.new_test("cpg_custom_merge_with_topology", ++ "Verify multiple overlapping identical fencing operations are merged with fencing levels.", 1) ++ test.add_cmd("stonith_admin", "-R false1 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node3\"") ++ test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node3\" ") ++ test.add_cmd("stonith_admin", "-R false2 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node3\"") ++ test.add_cmd("stonith_admin", "-r node3 -i 1 -v false1") ++ test.add_cmd("stonith_admin", "-r node3 -i 1 -v false2") ++ test.add_cmd("stonith_admin", "-r node3 -i 2 -v true1") ++ test.add_cmd_no_wait("stonith_admin", "-F node3 -t 10") ++ test.add_cmd_no_wait("stonith_admin", "-F node3 -t 10") ++ test.add_cmd_no_wait("stonith_admin", "-F node3 -t 10") ++ test.add_cmd_no_wait("stonith_admin", "-F node3 -t 10") ++ test.add_cmd("stonith_admin", "-F node3 -t 10") ++ ### 4 mergers should occur ++ test.add_stonith_log_pattern("Merging stonith action off for node node3 originating from client") ++ test.add_stonith_log_pattern("Merging stonith action off for node node3 originating from client") ++ test.add_stonith_log_pattern("Merging stonith action off for node node3 originating from client") ++ test.add_stonith_log_pattern("Merging stonith action off for node node3 originating from client") ++ ### the pattern below signifies that both the original and duplicate operation completed ++ test.add_stonith_log_pattern("Operation off of node3 by") ++ test.add_stonith_log_pattern("Operation off of node3 by") ++ test.add_stonith_log_pattern("Operation off of node3 by") ++ test.add_stonith_log_pattern("Operation off of node3 by") ++ test.add_stonith_log_pattern("Operation off of node3 by") ++ ++ ++ test = self.new_test("cpg_custom_no_merge", ++ "Verify differing fencing operations are not merged", 1) ++ test.add_cmd("stonith_admin", "-R false1 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node3 node2\"") ++ test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node3 node2\" ") ++ test.add_cmd("stonith_admin", "-R false2 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node3 node2\"") ++ test.add_cmd("stonith_admin", "-r node3 -i 1 -v false1") ++ test.add_cmd("stonith_admin", "-r node3 -i 1 -v false2") ++ test.add_cmd("stonith_admin", "-r node3 -i 2 -v true1") ++ test.add_cmd_no_wait("stonith_admin", "-F node2 -t 10") ++ test.add_cmd("stonith_admin", "-F node3 -t 10") ++ test.add_stonith_negative_log_pattern("Merging stonith action off for node node3 originating from client") ++ ++ def build_standalone_tests(self): ++ test_types = [ ++ { ++ "prefix" : "standalone" , ++ "use_cpg" : 0, ++ }, ++ { ++ "prefix" : "cpg" , ++ "use_cpg" : 1, ++ }, ++ ] ++ ++ # test what happens when all devices timeout ++ for test_type in test_types: ++ test = self.new_test("%s_fence_multi_device_failure" % test_type["prefix"], ++ "Verify that all devices timeout, a fencing failure is returned.", test_type["use_cpg"]) ++ test.add_cmd("stonith_admin", "-R false1 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"") ++ test.add_cmd("stonith_admin", "-R false2 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"") ++ test.add_cmd("stonith_admin", "-R false3 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"") ++ if test_type["use_cpg"] == 1: ++ test.add_expected_fail_cmd("stonith_admin", "-F node3 -t 2", 194) ++ test.add_stonith_log_pattern("remote op timeout set to 6") ++ else: ++ test.add_expected_fail_cmd("stonith_admin", "-F node3 -t 2", 55) ++ ++ test.add_stonith_log_pattern("for host 'node3' with device 'false1' returned: ") ++ test.add_stonith_log_pattern("for host 'node3' with device 'false2' returned: ") ++ test.add_stonith_log_pattern("for host 'node3' with device 'false3' returned: ") ++ ++ # test what happens when multiple devices can fence a node, but the first device fails. ++ for test_type in test_types: ++ test = self.new_test("%s_fence_device_failure_rollover" % test_type["prefix"], ++ "Verify that when one fence device fails for a node, the others are tried.", test_type["use_cpg"]) ++ test.add_cmd("stonith_admin", "-R false1 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"") ++ test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") ++ test.add_cmd("stonith_admin", "-R false2 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"") ++ test.add_cmd("stonith_admin", "-F node3 -t 2") ++ ++ if test_type["use_cpg"] == 1: ++ test.add_stonith_log_pattern("remote op timeout set to 6") ++ ++ # simple topology test for one device ++ for test_type in test_types: ++ if test_type["use_cpg"] == 0: ++ continue ++ ++ test = self.new_test("%s_topology_simple" % test_type["prefix"], ++ "Verify all fencing devices at a level are used.", test_type["use_cpg"]) ++ test.add_cmd("stonith_admin", "-R true -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") ++ ++ test.add_cmd("stonith_admin", "-r node3 -i 1 -v true") ++ test.add_cmd("stonith_admin", "-F node3 -t 2") ++ ++ test.add_stonith_log_pattern("remote op timeout set to 2") ++ test.add_stonith_log_pattern("for host 'node3' with device 'true' returned: 0") ++ ++ ++ # add topology, delete topology, verify fencing still works ++ for test_type in test_types: ++ if test_type["use_cpg"] == 0: ++ continue ++ ++ test = self.new_test("%s_topology_add_remove" % test_type["prefix"], ++ "Verify fencing occurrs after all topology levels are removed", test_type["use_cpg"]) ++ test.add_cmd("stonith_admin", "-R true -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") ++ ++ test.add_cmd("stonith_admin", "-r node3 -i 1 -v true") ++ test.add_cmd("stonith_admin", "-d node3 -i 1") ++ test.add_cmd("stonith_admin", "-F node3 -t 2") ++ ++ test.add_stonith_log_pattern("remote op timeout set to 2") ++ test.add_stonith_log_pattern("for host 'node3' with device 'true' returned: 0") ++ ++ # test what happens when the first fencing level has multiple devices. ++ for test_type in test_types: ++ if test_type["use_cpg"] == 0: ++ continue ++ ++ test = self.new_test("%s_topology_device_fails" % test_type["prefix"], ++ "Verify if one device in a level fails, the other is tried.", test_type["use_cpg"]) ++ test.add_cmd("stonith_admin", "-R false -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"") ++ test.add_cmd("stonith_admin", "-R true -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") ++ ++ test.add_cmd("stonith_admin", "-r node3 -i 1 -v false") ++ test.add_cmd("stonith_admin", "-r node3 -i 2 -v true") ++ test.add_cmd("stonith_admin", "-F node3 -t 20") ++ ++ test.add_stonith_log_pattern("remote op timeout set to 40") ++ test.add_stonith_log_pattern("for host 'node3' with device 'false' returned: -201") ++ test.add_stonith_log_pattern("for host 'node3' with device 'true' returned: 0") ++ ++ # test what happens when the first fencing level fails. ++ for test_type in test_types: ++ if test_type["use_cpg"] == 0: ++ continue ++ ++ test = self.new_test("%s_topology_multi_level_fails" % test_type["prefix"], ++ "Verify if one level fails, the next leve is tried.", test_type["use_cpg"]) ++ test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") ++ test.add_cmd("stonith_admin", "-R true2 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") ++ test.add_cmd("stonith_admin", "-R true3 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") ++ test.add_cmd("stonith_admin", "-R true4 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") ++ test.add_cmd("stonith_admin", "-R false1 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"") ++ test.add_cmd("stonith_admin", "-R false2 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"") ++ ++ test.add_cmd("stonith_admin", "-r node3 -i 1 -v false1") ++ test.add_cmd("stonith_admin", "-r node3 -i 1 -v true1") ++ test.add_cmd("stonith_admin", "-r node3 -i 2 -v true2") ++ test.add_cmd("stonith_admin", "-r node3 -i 2 -v false2") ++ test.add_cmd("stonith_admin", "-r node3 -i 3 -v true3") ++ test.add_cmd("stonith_admin", "-r node3 -i 3 -v true4") ++ ++ test.add_cmd("stonith_admin", "-F node3 -t 3") ++ ++ test.add_stonith_log_pattern("remote op timeout set to 18") ++ test.add_stonith_log_pattern("for host 'node3' with device 'false1' returned: -201") ++ test.add_stonith_log_pattern("for host 'node3' with device 'false2' returned: -201") ++ test.add_stonith_log_pattern("for host 'node3' with device 'true3' returned: 0") ++ test.add_stonith_log_pattern("for host 'node3' with device 'true4' returned: 0") ++ ++ ++ # test what happens when the first fencing level had devices that no one has registered ++ for test_type in test_types: ++ if test_type["use_cpg"] == 0: ++ continue ++ ++ test = self.new_test("%s_topology_missing_devices" % test_type["prefix"], ++ "Verify topology can continue with missing devices.", test_type["use_cpg"]) ++ test.add_cmd("stonith_admin", "-R true2 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") ++ test.add_cmd("stonith_admin", "-R true3 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") ++ test.add_cmd("stonith_admin", "-R true4 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") ++ test.add_cmd("stonith_admin", "-R false2 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"") ++ ++ test.add_cmd("stonith_admin", "-r node3 -i 1 -v false1") ++ test.add_cmd("stonith_admin", "-r node3 -i 1 -v true1") ++ test.add_cmd("stonith_admin", "-r node3 -i 2 -v true2") ++ test.add_cmd("stonith_admin", "-r node3 -i 2 -v false2") ++ test.add_cmd("stonith_admin", "-r node3 -i 3 -v true3") ++ test.add_cmd("stonith_admin", "-r node3 -i 3 -v true4") ++ ++ test.add_cmd("stonith_admin", "-F node3 -t 2") ++ ++ # Test what happens if multiple fencing levels are defined, and then the first one is removed. ++ for test_type in test_types: ++ if test_type["use_cpg"] == 0: ++ continue ++ ++ test = self.new_test("%s_topology_level_removal" % test_type["prefix"], ++ "Verify level removal works.", test_type["use_cpg"]) ++ test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") ++ test.add_cmd("stonith_admin", "-R true2 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") ++ test.add_cmd("stonith_admin", "-R true3 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") ++ test.add_cmd("stonith_admin", "-R true4 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") ++ test.add_cmd("stonith_admin", "-R false1 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"") ++ test.add_cmd("stonith_admin", "-R false2 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"") ++ ++ test.add_cmd("stonith_admin", "-r node3 -i 1 -v false1") ++ test.add_cmd("stonith_admin", "-r node3 -i 1 -v true1") ++ ++ test.add_cmd("stonith_admin", "-r node3 -i 2 -v true2") ++ test.add_cmd("stonith_admin", "-r node3 -i 2 -v false2") ++ ++ test.add_cmd("stonith_admin", "-r node3 -i 3 -v true3") ++ test.add_cmd("stonith_admin", "-r node3 -i 3 -v true4") ++ ++ # Now remove level 2, verify none of the devices in level two are hit. ++ test.add_cmd("stonith_admin", "-d node3 -i 2") ++ ++ test.add_cmd("stonith_admin", "-F node3 -t 20") ++ ++ test.add_stonith_log_pattern("remote op timeout set to 8") ++ test.add_stonith_log_pattern("for host 'node3' with device 'false1' returned: -201") ++ test.add_stonith_negative_log_pattern("for host 'node3' with device 'false2' returned: ") ++ test.add_stonith_log_pattern("for host 'node3' with device 'true3' returned: 0") ++ test.add_stonith_log_pattern("for host 'node3' with device 'true4' returned: 0") ++ ++ # test the stonith builds the correct list of devices that can fence a node. ++ for test_type in test_types: ++ test = self.new_test("%s_list_devices" % test_type["prefix"], ++ "Verify list of devices that can fence a node is correct", test_type["use_cpg"]) ++ test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node3\"") ++ test.add_cmd("stonith_admin", "-R true2 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") ++ test.add_cmd("stonith_admin", "-R true3 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") ++ ++ test.add_cmd_check_stdout("stonith_admin", "-l node1 -V", "true2", "true1") ++ test.add_cmd_check_stdout("stonith_admin", "-l node1 -V", "true3", "true1") ++ ++ # simple test of device monitor ++ for test_type in test_types: ++ test = self.new_test("%s_monitor" % test_type["prefix"], ++ "Verify device is reachable", test_type["use_cpg"]) ++ test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node3\"") ++ test.add_cmd("stonith_admin", "-R false1 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node3\"") ++ ++ test.add_cmd("stonith_admin", "-Q true1") ++ test.add_cmd("stonith_admin", "-Q false1") ++ test.add_expected_fail_cmd("stonith_admin", "-Q true2", 237) ++ ++ # Verify monitor occurs for duration of timeout period on failure ++ for test_type in test_types: ++ test = self.new_test("%s_monitor_timeout" % test_type["prefix"], ++ "Verify monitor uses duration of timeout period given.", test_type["use_cpg"]) ++ test.add_cmd("stonith_admin", "-R true1 -a fence_dummy_monitor_fail -o \"pcmk_host_list=node3\"") ++ test.add_expected_fail_cmd("stonith_admin", "-Q true1 -t 5", 195) ++ test.add_stonith_log_pattern("Attempt 2 to execute") ++ ++ # Verify monitor occurs for duration of timeout period on failure, but stops at max retries ++ for test_type in test_types: ++ test = self.new_test("%s_monitor_timeout_max_retries" % test_type["prefix"], ++ "Verify monitor retries until max retry value or timeout is hit.", test_type["use_cpg"]) ++ test.add_cmd("stonith_admin", "-R true1 -a fence_dummy_monitor_fail -o \"pcmk_host_list=node3\"") ++ test.add_expected_fail_cmd("stonith_admin", "-Q true1 -t 15",195) ++ test.add_stonith_log_pattern("Attempted to execute agent fence_dummy_monitor_fail (list) the maximum number of times") ++ ++ # simple register test ++ for test_type in test_types: ++ test = self.new_test("%s_register" % test_type["prefix"], ++ "Verify devices can be registered and un-registered", test_type["use_cpg"]) ++ test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node3\"") ++ ++ test.add_cmd("stonith_admin", "-Q true1") ++ ++ test.add_cmd("stonith_admin", "-D true1") ++ ++ test.add_expected_fail_cmd("stonith_admin", "-Q true1", 237) ++ ++ ++ # simple reboot test ++ for test_type in test_types: ++ test = self.new_test("%s_reboot" % test_type["prefix"], ++ "Verify devices can be rebooted", test_type["use_cpg"]) ++ test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node3\"") ++ ++ test.add_cmd("stonith_admin", "-B node3 -t 2") ++ ++ test.add_cmd("stonith_admin", "-D true1") ++ ++ test.add_expected_fail_cmd("stonith_admin", "-Q true1", 237) ++ ++ # test fencing history. ++ for test_type in test_types: ++ if test_type["use_cpg"] == 0: ++ continue ++ test = self.new_test("%s_fence_history" % test_type["prefix"], ++ "Verify last fencing operation is returned.", test_type["use_cpg"]) ++ test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node3\"") ++ ++ test.add_cmd("stonith_admin", "-F node3 -t 2 -V") ++ ++ test.add_cmd_check_stdout("stonith_admin", "-H node3", "was able to turn off node node3", "") ++ ++ # simple test of dynamic list query ++ for test_type in test_types: ++ test = self.new_test("%s_dynamic_list_query" % test_type["prefix"], ++ "Verify dynamic list of fencing devices can be retrieved.", test_type["use_cpg"]) ++ test.add_cmd("stonith_admin", "-R true1 -a fence_dummy_list") ++ test.add_cmd("stonith_admin", "-R true2 -a fence_dummy_list") ++ test.add_cmd("stonith_admin", "-R true3 -a fence_dummy_list") ++ ++ test.add_cmd_check_stdout("stonith_admin", "-l fake_port_1", "3 devices found") ++ ++ ++ # fence using dynamic list query ++ for test_type in test_types: ++ test = self.new_test("%s_fence_dynamic_list_query" % test_type["prefix"], ++ "Verify dynamic list of fencing devices can be retrieved.", test_type["use_cpg"]) ++ test.add_cmd("stonith_admin", "-R true1 -a fence_dummy_list") ++ test.add_cmd("stonith_admin", "-R true2 -a fence_dummy_list") ++ test.add_cmd("stonith_admin", "-R true3 -a fence_dummy_list") ++ ++ test.add_cmd("stonith_admin", "-F fake_port_1 -t 5 -V"); ++ ++ # simple test of query using status action ++ for test_type in test_types: ++ test = self.new_test("%s_status_query" % test_type["prefix"], ++ "Verify dynamic list of fencing devices can be retrieved.", test_type["use_cpg"]) ++ test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_check=status\"") ++ test.add_cmd("stonith_admin", "-R true2 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_check=status\"") ++ test.add_cmd("stonith_admin", "-R true3 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_check=status\"") ++ ++ test.add_cmd_check_stdout("stonith_admin", "-l fake_port_1", "3 devices found") ++ ++ # test what happens when no reboot action is advertised ++ for test_type in test_types: ++ test = self.new_test("%s_no_reboot_support" % test_type["prefix"], ++ "Verify reboot action defaults to off when no reboot action is advertised by agent.", test_type["use_cpg"]) ++ test.add_cmd("stonith_admin", "-R true1 -a fence_dummy_no_reboot -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") ++ test.add_cmd("stonith_admin", "-B node1 -t 5 -V"); ++ test.add_stonith_log_pattern("does not advertise support for 'reboot', performing 'off'") ++ test.add_stonith_log_pattern("with device 'true1' returned: 0 (OK)"); ++ ++ # make sure reboot is used when reboot action is advertised ++ for test_type in test_types: ++ test = self.new_test("%s_with_reboot_support" % test_type["prefix"], ++ "Verify reboot action can be used when metadata advertises it.", test_type["use_cpg"]) ++ test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") ++ test.add_cmd("stonith_admin", "-B node1 -t 5 -V"); ++ test.add_stonith_negative_log_pattern("does not advertise support for 'reboot', performing 'off'") ++ test.add_stonith_log_pattern("with device 'true1' returned: 0 (OK)"); ++ ++ def build_nodeid_tests(self): ++ our_uname = output_from_command("uname -n") ++ if our_uname: ++ our_uname = our_uname[0] ++ ++ ### verify nodeid is supplied when nodeid is in the metadata parameters ++ test = self.new_test("cpg_supply_nodeid", ++ "Verify nodeid is given when fence agent has nodeid as parameter", 1) ++ ++ test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=%s\"" % (our_uname)) ++ test.add_cmd("stonith_admin", "-F %s -t 3" % (our_uname)) ++ test.add_stonith_log_pattern("For stonith action (off) for victim %s, adding nodeid" % (our_uname)) ++ ++ ### verify nodeid is _NOT_ supplied when nodeid is not in the metadata parameters ++ test = self.new_test("cpg_do_not_supply_nodeid", ++ "Verify nodeid is _NOT_ given when fence agent does not have nodeid as parameter", 1) ++ ++ test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=%s\"" % (our_uname)) ++ test.add_cmd("stonith_admin", "-F %s -t 3" % (our_uname)) ++ test.add_stonith_negative_log_pattern("For stonith action (off) for victim %s, adding nodeid" % (our_uname)) ++ ++ ### verify nodeid use doesn't explode standalone mode ++ test = self.new_test("standalone_do_not_supply_nodeid", ++ "Verify nodeid in metadata parameter list doesn't kill standalone mode", 0) ++ ++ test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=%s\"" % (our_uname)) ++ test.add_cmd("stonith_admin", "-F %s -t 3" % (our_uname)) ++ test.add_stonith_negative_log_pattern("For stonith action (off) for victim %s, adding nodeid" % (our_uname)) ++ ++ ++ def build_unfence_tests(self): ++ our_uname = output_from_command("uname -n") ++ if our_uname: ++ our_uname = our_uname[0] ++ ++ ### verify unfencing using automatic unfencing ++ test = self.new_test("cpg_unfence_required_1", ++ "Verify require unfencing on all devices when automatic=true in agent's metadata", 1) ++ test.add_cmd("stonith_admin", "-R true1 -a fence_dummy_automatic_unfence -o \"mode=pass\" -o \"pcmk_host_list=%s\"" % (our_uname)) ++ test.add_cmd("stonith_admin", "-R true2 -a fence_dummy_automatic_unfence -o \"mode=pass\" -o \"pcmk_host_list=%s\"" % (our_uname)) ++ test.add_cmd("stonith_admin", "-U %s -t 3" % (our_uname)) ++ # both devices should be executed ++ test.add_stonith_log_pattern("with device 'true1' returned: 0 (OK)"); ++ test.add_stonith_log_pattern("with device 'true2' returned: 0 (OK)"); ++ ++ ++ ### verify unfencing using automatic unfencing fails if any of the required agents fail ++ test = self.new_test("cpg_unfence_required_2", ++ "Verify require unfencing on all devices when automatic=true in agent's metadata", 1) ++ test.add_cmd("stonith_admin", "-R true1 -a fence_dummy_automatic_unfence -o \"mode=pass\" -o \"pcmk_host_list=%s\"" % (our_uname)) ++ test.add_cmd("stonith_admin", "-R true2 -a fence_dummy_automatic_unfence -o \"mode=fail\" -o \"pcmk_host_list=%s\"" % (our_uname)) ++ test.add_expected_fail_cmd("stonith_admin", "-U %s -t 6" % (our_uname), 143) ++ ++ ### verify unfencing using automatic devices with topology ++ test = self.new_test("cpg_unfence_required_3", ++ "Verify require unfencing on all devices even when required devices are at different topology levels", 1) ++ test.add_cmd("stonith_admin", "-R true1 -a fence_dummy_automatic_unfence -o \"mode=pass\" -o \"pcmk_host_list=%s node3\"" % (our_uname)) ++ test.add_cmd("stonith_admin", "-R true2 -a fence_dummy_automatic_unfence -o \"mode=pass\" -o \"pcmk_host_list=%s node3\"" % (our_uname)) ++ test.add_cmd("stonith_admin", "-r %s -i 1 -v true1" % (our_uname)) ++ test.add_cmd("stonith_admin", "-r %s -i 2 -v true2" % (our_uname)) ++ test.add_cmd("stonith_admin", "-U %s -t 3" % (our_uname)) ++ test.add_stonith_log_pattern("with device 'true1' returned: 0 (OK)"); ++ test.add_stonith_log_pattern("with device 'true2' returned: 0 (OK)"); ++ ++ ++ ### verify unfencing using automatic devices with topology ++ test = self.new_test("cpg_unfence_required_4", ++ "Verify all required devices are executed even with topology levels fail.", 1) ++ test.add_cmd("stonith_admin", "-R true1 -a fence_dummy_automatic_unfence -o \"mode=pass\" -o \"pcmk_host_list=%s node3\"" % (our_uname)) ++ test.add_cmd("stonith_admin", "-R true2 -a fence_dummy_automatic_unfence -o \"mode=pass\" -o \"pcmk_host_list=%s node3\"" % (our_uname)) ++ test.add_cmd("stonith_admin", "-R true3 -a fence_dummy_automatic_unfence -o \"mode=pass\" -o \"pcmk_host_list=%s node3\"" % (our_uname)) ++ test.add_cmd("stonith_admin", "-R true4 -a fence_dummy_automatic_unfence -o \"mode=pass\" -o \"pcmk_host_list=%s node3\"" % (our_uname)) ++ test.add_cmd("stonith_admin", "-R false1 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=%s node3\"" % (our_uname)) ++ test.add_cmd("stonith_admin", "-R false2 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=%s node3\"" % (our_uname)) ++ test.add_cmd("stonith_admin", "-R false3 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=%s node3\"" % (our_uname)) ++ test.add_cmd("stonith_admin", "-R false4 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=%s node3\"" % (our_uname)) ++ test.add_cmd("stonith_admin", "-r %s -i 1 -v true1" % (our_uname)) ++ test.add_cmd("stonith_admin", "-r %s -i 1 -v false1" % (our_uname)) ++ test.add_cmd("stonith_admin", "-r %s -i 2 -v false2" % (our_uname)) ++ test.add_cmd("stonith_admin", "-r %s -i 2 -v true2" % (our_uname)) ++ test.add_cmd("stonith_admin", "-r %s -i 2 -v false3" % (our_uname)) ++ test.add_cmd("stonith_admin", "-r %s -i 2 -v true3" % (our_uname)) ++ test.add_cmd("stonith_admin", "-r %s -i 3 -v false4" % (our_uname)) ++ test.add_cmd("stonith_admin", "-r %s -i 4 -v true4" % (our_uname)) ++ test.add_cmd("stonith_admin", "-U %s -t 3" % (our_uname)) ++ test.add_stonith_log_pattern("with device 'true1' returned: 0 (OK)"); ++ test.add_stonith_log_pattern("with device 'true2' returned: 0 (OK)"); ++ test.add_stonith_log_pattern("with device 'true3' returned: 0 (OK)"); ++ test.add_stonith_log_pattern("with device 'true4' returned: 0 (OK)"); ++ ++ ### verify unfencing using on_target device ++ test = self.new_test("cpg_unfence_on_target_1", ++ "Verify unfencing with on_target = true", 1) ++ test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=%s\"" % (our_uname)) ++ test.add_cmd("stonith_admin", "-U %s -t 3" % (our_uname)) ++ test.add_stonith_log_pattern("(on) to be executed on the target node") ++ ++ ++ ### verify failure of unfencing using on_target device ++ test = self.new_test("cpg_unfence_on_target_2", ++ "Verify failure unfencing with on_target = true", 1) ++ test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=%s node_fake_1234\"" % (our_uname)) ++ test.add_expected_fail_cmd("stonith_admin", "-U node_fake_1234 -t 3", 237) ++ test.add_stonith_log_pattern("(on) to be executed on the target node") ++ ++ ++ ### verify unfencing using on_target device with topology ++ test = self.new_test("cpg_unfence_on_target_3", ++ "Verify unfencing with on_target = true using topology", 1) ++ ++ test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=%s node3\"" % (our_uname)) ++ test.add_cmd("stonith_admin", "-R true2 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=%s node3\"" % (our_uname)) ++ ++ test.add_cmd("stonith_admin", "-r %s -i 1 -v true1" % (our_uname)) ++ test.add_cmd("stonith_admin", "-r %s -i 2 -v true2" % (our_uname)) ++ ++ test.add_cmd("stonith_admin", "-U %s -t 3" % (our_uname)) ++ test.add_stonith_log_pattern("(on) to be executed on the target node") ++ ++ ### verify unfencing using on_target device with topology fails when victim node doesn't exist ++ test = self.new_test("cpg_unfence_on_target_4", ++ "Verify unfencing failure with on_target = true using topology", 1) ++ ++ test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=%s node_fake\"" % (our_uname)) ++ test.add_cmd("stonith_admin", "-R true2 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=%s node_fake\"" % (our_uname)) ++ ++ test.add_cmd("stonith_admin", "-r node_fake -i 1 -v true1") ++ test.add_cmd("stonith_admin", "-r node_fake -i 2 -v true2") ++ ++ test.add_expected_fail_cmd("stonith_admin", "-U node_fake -t 3", 237) ++ test.add_stonith_log_pattern("(on) to be executed on the target node") ++ ++ def build_remap_tests(self): ++ test = self.new_test("cpg_remap_simple", ++ "Verify sequential topology reboot is remapped to all-off-then-all-on", 1) ++ test.add_cmd("stonith_admin", ++ """-R true1 -a fence_dummy -o "mode=pass" -o "pcmk_host_list=node_fake" """ ++ """-o "pcmk_off_timeout=1" -o "pcmk_reboot_timeout=10" """) ++ test.add_cmd("stonith_admin", ++ """-R true2 -a fence_dummy -o "mode=pass" -o "pcmk_host_list=node_fake" """ ++ """-o "pcmk_off_timeout=2" -o "pcmk_reboot_timeout=20" """) ++ test.add_cmd("stonith_admin", "-r node_fake -i 1 -v true1 -v true2") ++ test.add_cmd("stonith_admin", "-B node_fake -t 5") ++ test.add_stonith_log_pattern("Remapping multiple-device reboot of node_fake") ++ # timeout should be sum of off timeouts (1+2=3), not reboot timeouts (10+20=30) ++ test.add_stonith_log_pattern("remote op timeout set to 3 for fencing of node node_fake") ++ test.add_stonith_log_pattern("perform op off node_fake with true1") ++ test.add_stonith_log_pattern("perform op off node_fake with true2") ++ test.add_stonith_log_pattern("Remapped off of node_fake complete, remapping to on") ++ # fence_dummy sets "on" as an on_target action ++ test.add_stonith_log_pattern("Ignoring true1 'on' failure (no capable peers) for node_fake") ++ test.add_stonith_log_pattern("Ignoring true2 'on' failure (no capable peers) for node_fake") ++ test.add_stonith_log_pattern("Undoing remap of reboot of node_fake") ++ ++ test = self.new_test("cpg_remap_automatic", ++ "Verify remapped topology reboot skips automatic 'on'", 1) ++ test.add_cmd("stonith_admin", ++ """-R true1 -a fence_dummy_automatic_unfence """ ++ """-o "mode=pass" -o "pcmk_host_list=node_fake" """) ++ test.add_cmd("stonith_admin", ++ """-R true2 -a fence_dummy_automatic_unfence """ ++ """-o "mode=pass" -o "pcmk_host_list=node_fake" """) ++ test.add_cmd("stonith_admin", "-r node_fake -i 1 -v true1 -v true2") ++ test.add_cmd("stonith_admin", "-B node_fake -t 5") ++ test.add_stonith_log_pattern("Remapping multiple-device reboot of node_fake") ++ test.add_stonith_log_pattern("perform op off node_fake with true1") ++ test.add_stonith_log_pattern("perform op off node_fake with true2") ++ test.add_stonith_log_pattern("Remapped off of node_fake complete, remapping to on") ++ test.add_stonith_log_pattern("Undoing remap of reboot of node_fake") ++ test.add_stonith_negative_log_pattern("perform op on node_fake with") ++ test.add_stonith_negative_log_pattern("'on' failure") ++ ++ test = self.new_test("cpg_remap_complex_1", ++ "Verify remapped topology reboot in second level works if non-remapped first level fails", 1) ++ test.add_cmd("stonith_admin", """-R false1 -a fence_dummy -o "mode=fail" -o "pcmk_host_list=node_fake" """) ++ test.add_cmd("stonith_admin", """-R true1 -a fence_dummy -o "mode=pass" -o "pcmk_host_list=node_fake" """) ++ test.add_cmd("stonith_admin", """-R true2 -a fence_dummy -o "mode=pass" -o "pcmk_host_list=node_fake" """) ++ test.add_cmd("stonith_admin", "-r node_fake -i 1 -v false1") ++ test.add_cmd("stonith_admin", "-r node_fake -i 2 -v true1 -v true2") ++ test.add_cmd("stonith_admin", "-B node_fake -t 5") ++ test.add_stonith_log_pattern("perform op reboot node_fake with false1") ++ test.add_stonith_log_pattern("Remapping multiple-device reboot of node_fake") ++ test.add_stonith_log_pattern("perform op off node_fake with true1") ++ test.add_stonith_log_pattern("perform op off node_fake with true2") ++ test.add_stonith_log_pattern("Remapped off of node_fake complete, remapping to on") ++ test.add_stonith_log_pattern("Ignoring true1 'on' failure (no capable peers) for node_fake") ++ test.add_stonith_log_pattern("Ignoring true2 'on' failure (no capable peers) for node_fake") ++ test.add_stonith_log_pattern("Undoing remap of reboot of node_fake") ++ ++ test = self.new_test("cpg_remap_complex_2", ++ "Verify remapped topology reboot failure in second level proceeds to third level", 1) ++ test.add_cmd("stonith_admin", """-R false1 -a fence_dummy -o "mode=fail" -o "pcmk_host_list=node_fake" """) ++ test.add_cmd("stonith_admin", """-R false2 -a fence_dummy -o "mode=fail" -o "pcmk_host_list=node_fake" """) ++ test.add_cmd("stonith_admin", """-R true1 -a fence_dummy -o "mode=pass" -o "pcmk_host_list=node_fake" """) ++ test.add_cmd("stonith_admin", """-R true2 -a fence_dummy -o "mode=pass" -o "pcmk_host_list=node_fake" """) ++ test.add_cmd("stonith_admin", """-R true3 -a fence_dummy -o "mode=pass" -o "pcmk_host_list=node_fake" """) ++ test.add_cmd("stonith_admin", "-r node_fake -i 1 -v false1") ++ test.add_cmd("stonith_admin", "-r node_fake -i 2 -v true1 -v false2 -v true3") ++ test.add_cmd("stonith_admin", "-r node_fake -i 3 -v true2") ++ test.add_cmd("stonith_admin", "-B node_fake -t 5") ++ test.add_stonith_log_pattern("perform op reboot node_fake with false1") ++ test.add_stonith_log_pattern("Remapping multiple-device reboot of node_fake") ++ test.add_stonith_log_pattern("perform op off node_fake with true1") ++ test.add_stonith_log_pattern("perform op off node_fake with false2") ++ test.add_stonith_log_pattern("Attempted to execute agent fence_dummy (off) the maximum number of times") ++ test.add_stonith_log_pattern("Undoing remap of reboot of node_fake") ++ test.add_stonith_log_pattern("perform op reboot node_fake with true2") ++ test.add_stonith_negative_log_pattern("node_fake with true3") ++ ++ def setup_environment(self, use_corosync): ++ if self.autogen_corosync_cfg and use_corosync: ++ corosync_conf = (""" + totem { + version: 2 + crypto_cipher: none +@@ -908,15 +984,15 @@ logging { + } + """) + +- os.system("cat <<-END >>/etc/corosync/corosync.conf\n%s\nEND" % (corosync_conf)) ++ os.system("cat <<-END >>/etc/corosync/corosync.conf\n%s\nEND" % (corosync_conf)) + + +- if use_corosync: +- ### make sure we are in control ### +- self.stop_corosync() +- self.start_corosync() ++ if use_corosync: ++ ### make sure we are in control ### ++ self.stop_corosync() ++ self.start_corosync() + +- monitor_fail_agent = ("""#!/usr/bin/python ++ monitor_fail_agent = ("""#!/usr/bin/python + import sys + def main(): + for line in sys.stdin.readlines(): +@@ -927,7 +1003,7 @@ if __name__ == "__main__": + main() + """) + +- dynamic_list_agent = ("""#!/usr/bin/python ++ dynamic_list_agent = ("""#!/usr/bin/python + import sys + def main(): + for line in sys.stdin.readlines(): +@@ -942,140 +1018,141 @@ if __name__ == "__main__": + """) + + +- os.system("cat <<-END >>/usr/sbin/fence_dummy_list\n%s\nEND" % (dynamic_list_agent)) +- os.system("chmod 711 /usr/sbin/fence_dummy_list") ++ os.system("cat <<-END >>/usr/sbin/fence_dummy_list\n%s\nEND" % (dynamic_list_agent)) ++ os.system("chmod 711 /usr/sbin/fence_dummy_list") + +- os.system("cat <<-END >>/usr/sbin/fence_dummy_monitor_fail\n%s\nEND" % (monitor_fail_agent)) +- os.system("chmod 711 /usr/sbin/fence_dummy_monitor_fail") ++ os.system("cat <<-END >>/usr/sbin/fence_dummy_monitor_fail\n%s\nEND" % (monitor_fail_agent)) ++ os.system("chmod 711 /usr/sbin/fence_dummy_monitor_fail") + +- os.system("cp /usr/share/pacemaker/tests/cts/fence_dummy /usr/sbin/fence_dummy") ++ os.system("cp /usr/share/pacemaker/tests/cts/fence_dummy /usr/sbin/fence_dummy") + +- # modifies dummy agent to do require unfencing +- os.system("cat /usr/share/pacemaker/tests/cts/fence_dummy | sed 's/on_target=/automatic=/g' > /usr/sbin/fence_dummy_automatic_unfence"); +- os.system("chmod 711 /usr/sbin/fence_dummy_automatic_unfence") ++ # modifies dummy agent to do require unfencing ++ os.system("cat /usr/share/pacemaker/tests/cts/fence_dummy | sed 's/on_target=/automatic=/g' > /usr/sbin/fence_dummy_automatic_unfence"); ++ os.system("chmod 711 /usr/sbin/fence_dummy_automatic_unfence") + +- # modifies dummy agent to not advertise reboot +- os.system("cat /usr/share/pacemaker/tests/cts/fence_dummy | sed 's/^.*.*//g' > /usr/sbin/fence_dummy_no_reboot"); +- os.system("chmod 711 /usr/sbin/fence_dummy_no_reboot") ++ # modifies dummy agent to not advertise reboot ++ os.system("cat /usr/share/pacemaker/tests/cts/fence_dummy | sed 's/^.*.*//g' > /usr/sbin/fence_dummy_no_reboot"); ++ os.system("chmod 711 /usr/sbin/fence_dummy_no_reboot") + +- def cleanup_environment(self, use_corosync): +- if use_corosync: +- self.stop_corosync() ++ def cleanup_environment(self, use_corosync): ++ if use_corosync: ++ self.stop_corosync() + +- if self.verbose and os.path.exists('/var/log/corosync.log'): +- print "Corosync output" +- f = open('/var/log/corosync.log', 'r') +- for line in f.readlines(): +- print line.strip() +- os.remove('/var/log/corosync.log') ++ if self.verbose and os.path.exists('/var/log/corosync.log'): ++ print "Corosync output" ++ f = open('/var/log/corosync.log', 'r') ++ for line in f.readlines(): ++ print line.strip() ++ os.remove('/var/log/corosync.log') + +- if self.autogen_corosync_cfg: +- os.system("rm -f /etc/corosync/corosync.conf") ++ if self.autogen_corosync_cfg: ++ os.system("rm -f /etc/corosync/corosync.conf") + +- os.system("rm -f /usr/sbin/fence_dummy_monitor_fail") +- os.system("rm -f /usr/sbin/fence_dummy_list") +- os.system("rm -f /usr/sbin/fence_dummy") +- os.system("rm -f /usr/sbin/fence_dummy_automatic_unfence") +- os.system("rm -f /usr/sbin/fence_dummy_no_reboot") ++ os.system("rm -f /usr/sbin/fence_dummy_monitor_fail") ++ os.system("rm -f /usr/sbin/fence_dummy_list") ++ os.system("rm -f /usr/sbin/fence_dummy") ++ os.system("rm -f /usr/sbin/fence_dummy_automatic_unfence") ++ os.system("rm -f /usr/sbin/fence_dummy_no_reboot") + + class TestOptions: +- def __init__(self): +- self.options = {} +- self.options['list-tests'] = 0 +- self.options['run-all'] = 1 +- self.options['run-only'] = "" +- self.options['run-only-pattern'] = "" +- self.options['verbose'] = 0 +- self.options['invalid-arg'] = "" +- self.options['cpg-only'] = 0 +- self.options['no-cpg'] = 0 +- self.options['show-usage'] = 0 +- +- def build_options(self, argv): +- args = argv[1:] +- skip = 0 +- for i in range(0, len(args)): +- if skip: +- skip = 0 +- continue +- elif args[i] == "-h" or args[i] == "--help": +- self.options['show-usage'] = 1 +- elif args[i] == "-l" or args[i] == "--list-tests": +- self.options['list-tests'] = 1 +- elif args[i] == "-V" or args[i] == "--verbose": +- self.options['verbose'] = 1 +- elif args[i] == "-n" or args[i] == "--no-cpg": +- self.options['no-cpg'] = 1 +- elif args[i] == "-c" or args[i] == "--cpg-only": +- self.options['cpg-only'] = 1 +- elif args[i] == "-r" or args[i] == "--run-only": +- self.options['run-only'] = args[i+1] +- skip = 1 +- elif args[i] == "-p" or args[i] == "--run-only-pattern": +- self.options['run-only-pattern'] = args[i+1] +- skip = 1 +- +- def show_usage(self): +- print "usage: " + sys.argv[0] + " [options]" +- print "If no options are provided, all tests will run" +- print "Options:" +- print "\t [--help | -h] Show usage" +- print "\t [--list-tests | -l] Print out all registered tests." +- print "\t [--cpg-only | -c] Only run tests that require corosync." +- print "\t [--no-cpg | -n] Only run tests that do not require corosync" +- print "\t [--run-only | -r 'testname'] Run a specific test" +- print "\t [--verbose | -V] Verbose output" +- print "\t [--run-only-pattern | -p 'string'] Run only tests containing the string value" +- print "\n\tExample: Run only the test 'start_top'" +- print "\t\t python ./regression.py --run-only start_stop" +- print "\n\tExample: Run only the tests with the string 'systemd' present in them" +- print "\t\t python ./regression.py --run-only-pattern systemd" ++ def __init__(self): ++ self.options = {} ++ self.options['list-tests'] = 0 ++ self.options['run-all'] = 1 ++ self.options['run-only'] = "" ++ self.options['run-only-pattern'] = "" ++ self.options['verbose'] = 0 ++ self.options['invalid-arg'] = "" ++ self.options['cpg-only'] = 0 ++ self.options['no-cpg'] = 0 ++ self.options['show-usage'] = 0 ++ ++ def build_options(self, argv): ++ args = argv[1:] ++ skip = 0 ++ for i in range(0, len(args)): ++ if skip: ++ skip = 0 ++ continue ++ elif args[i] == "-h" or args[i] == "--help": ++ self.options['show-usage'] = 1 ++ elif args[i] == "-l" or args[i] == "--list-tests": ++ self.options['list-tests'] = 1 ++ elif args[i] == "-V" or args[i] == "--verbose": ++ self.options['verbose'] = 1 ++ elif args[i] == "-n" or args[i] == "--no-cpg": ++ self.options['no-cpg'] = 1 ++ elif args[i] == "-c" or args[i] == "--cpg-only": ++ self.options['cpg-only'] = 1 ++ elif args[i] == "-r" or args[i] == "--run-only": ++ self.options['run-only'] = args[i+1] ++ skip = 1 ++ elif args[i] == "-p" or args[i] == "--run-only-pattern": ++ self.options['run-only-pattern'] = args[i+1] ++ skip = 1 ++ ++ def show_usage(self): ++ print "usage: " + sys.argv[0] + " [options]" ++ print "If no options are provided, all tests will run" ++ print "Options:" ++ print "\t [--help | -h] Show usage" ++ print "\t [--list-tests | -l] Print out all registered tests." ++ print "\t [--cpg-only | -c] Only run tests that require corosync." ++ print "\t [--no-cpg | -n] Only run tests that do not require corosync" ++ print "\t [--run-only | -r 'testname'] Run a specific test" ++ print "\t [--verbose | -V] Verbose output" ++ print "\t [--run-only-pattern | -p 'string'] Run only tests containing the string value" ++ print "\n\tExample: Run only the test 'start_top'" ++ print "\t\t python ./regression.py --run-only start_stop" ++ print "\n\tExample: Run only the tests with the string 'systemd' present in them" ++ print "\t\t python ./regression.py --run-only-pattern systemd" + + def main(argv): +- o = TestOptions() +- o.build_options(argv) +- +- use_corosync = 1 +- +- tests = Tests(o.options['verbose']) +- tests.build_standalone_tests() +- tests.build_custom_timeout_tests() +- tests.build_api_sanity_tests() +- tests.build_fence_merge_tests() +- tests.build_unfence_tests() +- tests.build_nodeid_tests() +- +- if o.options['list-tests']: +- tests.print_list() +- sys.exit(0) +- elif o.options['show-usage']: +- o.show_usage() +- sys.exit(0) +- +- print "Starting ..." +- +- if o.options['no-cpg']: +- use_corosync = 0 +- +- tests.setup_environment(use_corosync) +- +- if o.options['run-only-pattern'] != "": +- tests.run_tests_matching(o.options['run-only-pattern']) +- tests.print_results() +- elif o.options['run-only'] != "": +- tests.run_single(o.options['run-only']) +- tests.print_results() +- elif o.options['no-cpg']: +- tests.run_no_cpg() +- tests.print_results() +- elif o.options['cpg-only']: +- tests.run_cpg_only() +- tests.print_results() +- else: +- tests.run_tests() +- tests.print_results() +- +- tests.cleanup_environment(use_corosync) +- tests.exit() ++ o = TestOptions() ++ o.build_options(argv) ++ ++ use_corosync = 1 ++ ++ tests = Tests(o.options['verbose']) ++ tests.build_standalone_tests() ++ tests.build_custom_timeout_tests() ++ tests.build_api_sanity_tests() ++ tests.build_fence_merge_tests() ++ tests.build_unfence_tests() ++ tests.build_nodeid_tests() ++ tests.build_remap_tests() ++ ++ if o.options['list-tests']: ++ tests.print_list() ++ sys.exit(0) ++ elif o.options['show-usage']: ++ o.show_usage() ++ sys.exit(0) ++ ++ print "Starting ..." ++ ++ if o.options['no-cpg']: ++ use_corosync = 0 ++ ++ tests.setup_environment(use_corosync) ++ ++ if o.options['run-only-pattern'] != "": ++ tests.run_tests_matching(o.options['run-only-pattern']) ++ tests.print_results() ++ elif o.options['run-only'] != "": ++ tests.run_single(o.options['run-only']) ++ tests.print_results() ++ elif o.options['no-cpg']: ++ tests.run_no_cpg() ++ tests.print_results() ++ elif o.options['cpg-only']: ++ tests.run_cpg_only() ++ tests.print_results() ++ else: ++ tests.run_tests() ++ tests.print_results() ++ ++ tests.cleanup_environment(use_corosync) ++ tests.exit() + if __name__=="__main__": +- main(sys.argv) ++ main(sys.argv) +diff --git a/fencing/remote.c b/fencing/remote.c +index a568035..2c00b5f 100644 +--- a/fencing/remote.c ++++ b/fencing/remote.c +@@ -47,17 +47,37 @@ + + #define TIMEOUT_MULTIPLY_FACTOR 1.2 + ++/* When one stonithd queries its peers for devices able to handle a fencing ++ * request, each peer will reply with a list of such devices available to it. ++ * Each reply will be parsed into a st_query_result_t, with each device's ++ * information kept in a device_properties_t. ++ */ ++ ++typedef struct device_properties_s { ++ /* Whether access to this device has been verified */ ++ gboolean verified; ++ ++ /* The remaining members are indexed by the operation's "phase" */ ++ ++ /* Whether this device has been executed in each phase */ ++ gboolean executed[3]; ++ /* Whether this device is disallowed from executing in each phase */ ++ gboolean disallowed[3]; ++ /* Action-specific timeout for each phase */ ++ int custom_action_timeout[3]; ++ /* Action-specific maximum random delay for each phase */ ++ int delay_max[3]; ++} device_properties_t; ++ + typedef struct st_query_result_s { ++ /* Name of peer that sent this result */ + char *host; +- int devices; +- /* only try peers for non-topology based operations once */ ++ /* Only try peers for non-topology based operations once */ + gboolean tried; +- GListPtr device_list; +- GHashTable *custom_action_timeouts; +- GHashTable *delay_maxes; +- /* Subset of devices that peer has verified connectivity on */ +- GHashTable *verified_devices; +- ++ /* Number of entries in the devices table */ ++ int ndevices; ++ /* Devices available to this host that are capable of fencing the target */ ++ GHashTable *devices; + } st_query_result_t; + + GHashTable *remote_op_list = NULL; +@@ -67,8 +87,8 @@ extern xmlNode *stonith_create_op(int call_id, const char *token, const char *op + int call_options); + + static void report_timeout_period(remote_fencing_op_t * op, int op_timeout); +-static int get_op_total_timeout(remote_fencing_op_t * op, st_query_result_t * chosen_peer, +- int default_timeout); ++static int get_op_total_timeout(const remote_fencing_op_t *op, ++ const st_query_result_t *chosen_peer); + + static gint + sort_strings(gconstpointer a, gconstpointer b) +@@ -83,15 +103,126 @@ free_remote_query(gpointer data) + st_query_result_t *query = data; + + crm_trace("Free'ing query result from %s", query->host); ++ g_hash_table_destroy(query->devices); + free(query->host); +- g_list_free_full(query->device_list, free); +- g_hash_table_destroy(query->custom_action_timeouts); +- g_hash_table_destroy(query->delay_maxes); +- g_hash_table_destroy(query->verified_devices); + free(query); + } + } + ++struct peer_count_data { ++ const remote_fencing_op_t *op; ++ gboolean verified_only; ++ int count; ++}; ++ ++/*! ++ * \internal ++ * \brief Increment a counter if a device has not been executed yet ++ * ++ * \param[in] key Device ID (ignored) ++ * \param[in] value Device properties ++ * \param[in] user_data Peer count data ++ */ ++static void ++count_peer_device(gpointer key, gpointer value, gpointer user_data) ++{ ++ device_properties_t *props = (device_properties_t*)value; ++ struct peer_count_data *data = user_data; ++ ++ if (!props->executed[data->op->phase] ++ && (!data->verified_only || props->verified)) { ++ ++(data->count); ++ } ++} ++ ++/*! ++ * \internal ++ * \brief Check the number of available devices in a peer's query results ++ * ++ * \param[in] op Operation that results are for ++ * \param[in] peer Peer to count ++ * \param[in] verified_only Whether to count only verified devices ++ * ++ * \return Number of devices available to peer that were not already executed ++ */ ++static int ++count_peer_devices(const remote_fencing_op_t *op, const st_query_result_t *peer, ++ gboolean verified_only) ++{ ++ struct peer_count_data data; ++ ++ data.op = op; ++ data.verified_only = verified_only; ++ data.count = 0; ++ if (peer) { ++ g_hash_table_foreach(peer->devices, count_peer_device, &data); ++ } ++ return data.count; ++} ++ ++/*! ++ * \internal ++ * \brief Search for a device in a query result ++ * ++ * \param[in] op Operation that result is for ++ * \param[in] peer Query result for a peer ++ * \param[in] device Device ID to search for ++ * ++ * \return Device properties if found, NULL otherwise ++ */ ++static device_properties_t * ++find_peer_device(const remote_fencing_op_t *op, const st_query_result_t *peer, ++ const char *device) ++{ ++ device_properties_t *props = g_hash_table_lookup(peer->devices, device); ++ ++ return (props && !props->executed[op->phase] ++ && !props->disallowed[op->phase])? props : NULL; ++} ++ ++/*! ++ * \internal ++ * \brief Find a device in a peer's device list and mark it as executed ++ * ++ * \param[in] op Operation that peer result is for ++ * \param[in,out] peer Peer with results to search ++ * \param[in] device ID of device to mark as done ++ * \param[in] verified_devices_only Only consider verified devices ++ * ++ * \return TRUE if device was found and marked, FALSE otherwise ++ */ ++static gboolean ++grab_peer_device(const remote_fencing_op_t *op, st_query_result_t *peer, ++ const char *device, gboolean verified_devices_only) ++{ ++ device_properties_t *props = find_peer_device(op, peer, device); ++ ++ if ((props == NULL) || (verified_devices_only && !props->verified)) { ++ return FALSE; ++ } ++ ++ crm_trace("Removing %s from %s (%d remaining)", ++ device, peer->host, count_peer_devices(op, peer, FALSE)); ++ props->executed[op->phase] = TRUE; ++ return TRUE; ++} ++ ++/* ++ * \internal ++ * \brief Free the list of required devices for a particular phase ++ * ++ * \param[in,out] op Operation to modify ++ * \param[in] phase Phase to modify ++ */ ++static void ++free_required_list(remote_fencing_op_t *op, enum st_remap_phase phase) ++{ ++ if (op->required_list[phase]) { ++ g_list_free_full(op->required_list[phase], free); ++ op->required_list[phase] = NULL; ++ } ++} ++ + static void + clear_remote_op_timers(remote_fencing_op_t * op) + { +@@ -137,13 +268,100 @@ free_remote_op(gpointer data) + g_list_free_full(op->devices_list, free); + op->devices_list = NULL; + } +- if (op->required_list) { +- g_list_free_full(op->required_list, free); +- op->required_list = NULL; +- } ++ free_required_list(op, st_phase_requested); ++ free_required_list(op, st_phase_off); ++ free_required_list(op, st_phase_on); + free(op); + } + ++/* ++ * \internal ++ * \brief Return an operation's originally requested action (before any remap) ++ * ++ * \param[in] op Operation to check ++ * ++ * \return Operation's original action ++ */ ++static const char * ++op_requested_action(const remote_fencing_op_t *op) ++{ ++ return ((op->phase > st_phase_requested)? "reboot" : op->action); ++} ++ ++/* ++ * \internal ++ * \brief Remap a "reboot" operation to the "off" phase ++ * ++ * \param[in,out] op Operation to remap ++ */ ++static void ++op_phase_off(remote_fencing_op_t *op) ++{ ++ crm_info("Remapping multiple-device reboot of %s (%s) to off", ++ op->target, op->id); ++ op->phase = st_phase_off; ++ ++ /* Happily, "off" and "on" are shorter than "reboot", so we can reuse the ++ * memory allocation at each phase. ++ */ ++ strcpy(op->action, "off"); ++} ++ ++/*! ++ * \internal ++ * \brief Advance a remapped reboot operation to the "on" phase ++ * ++ * \param[in,out] op Operation to remap ++ */ ++static void ++op_phase_on(remote_fencing_op_t *op) ++{ ++ GListPtr iter = NULL; ++ ++ crm_info("Remapped off of %s complete, remapping to on for %s.%.8s", ++ op->target, op->client_name, op->id); ++ op->phase = st_phase_on; ++ strcpy(op->action, "on"); ++ ++ /* Any devices that are required for "on" will be automatically executed by ++ * the cluster when the node next joins, so we skip them here. ++ */ ++ for (iter = op->required_list[op->phase]; iter != NULL; iter = iter->next) { ++ GListPtr match = g_list_find_custom(op->devices_list, iter->data, ++ sort_strings); ++ ++ if (match) { ++ op->devices_list = g_list_remove(op->devices_list, match->data); ++ } ++ } ++ ++ /* We know this level will succeed, because phase 1 completed successfully ++ * and we ignore any errors from phase 2. So we can free the required list, ++ * which will keep them from being executed after the device list is done. ++ */ ++ free_required_list(op, op->phase); ++ ++ /* Rewind device list pointer */ ++ op->devices = op->devices_list; ++} ++ ++/*! ++ * \internal ++ * \brief Reset a remapped reboot operation ++ * ++ * \param[in,out] op Operation to reset ++ */ ++static void ++undo_op_remap(remote_fencing_op_t *op) ++{ ++ if (op->phase > 0) { ++ crm_info("Undoing remap of reboot of %s for %s.%.8s", ++ op->target, op->client_name, op->id); ++ op->phase = st_phase_requested; ++ strcpy(op->action, "reboot"); ++ } ++} ++ + static xmlNode * + create_op_done_notify(remote_fencing_op_t * op, int rc) + { +@@ -271,6 +489,7 @@ remote_op_done(remote_fencing_op_t * op, xmlNode * data, int rc, int dup) + + op->completed = time(NULL); + clear_remote_op_timers(op); ++ undo_op_remap(op); + + if (op->notify_sent == TRUE) { + crm_err("Already sent notifications for '%s of %s by %s' (for=%s@%s.%.8s, state=%d): %s", +@@ -279,10 +498,12 @@ remote_op_done(remote_fencing_op_t * op, xmlNode * data, int rc, int dup) + goto remote_op_done_cleanup; + } + +- if (!op->delegate && data) { ++ if (!op->delegate && data && rc != -ENODEV && rc != -EHOSTUNREACH) { + xmlNode *ndata = get_xpath_object("//@" F_STONITH_DELEGATE, data, LOG_TRACE); + if(ndata) { + op->delegate = crm_element_value_copy(ndata, F_STONITH_DELEGATE); ++ } else { ++ op->delegate = crm_element_value_copy(data, F_ORIG); + } + } + +@@ -377,6 +598,16 @@ remote_op_timeout(gpointer userdata) + + crm_debug("Action %s (%s) for %s (%s) timed out", + op->action, op->id, op->target, op->client_name); ++ ++ if (op->phase == st_phase_on) { ++ /* A remapped reboot operation timed out in the "on" phase, but the ++ * "off" phase completed successfully, so quit trying any further ++ * devices, and return success. ++ */ ++ remote_op_done(op, NULL, pcmk_ok, FALSE); ++ return FALSE; ++ } ++ + op->state = st_failed; + + remote_op_done(op, NULL, -ETIME, FALSE); +@@ -426,22 +657,43 @@ topology_is_empty(stonith_topology_t *tp) + return TRUE; + } + ++/* ++ * \internal ++ * \brief Add a device to the required list for a particular phase ++ * ++ * \param[in,out] op Operation to modify ++ * \param[in] phase Phase to modify ++ * \param[in] device Device ID to add ++ */ + static void +-add_required_device(remote_fencing_op_t * op, const char *device) ++add_required_device(remote_fencing_op_t *op, enum st_remap_phase phase, ++ const char *device) + { +- GListPtr match = g_list_find_custom(op->required_list, device, sort_strings); +- if (match) { +- /* device already marked required */ +- return; ++ GListPtr match = g_list_find_custom(op->required_list[phase], device, ++ sort_strings); ++ ++ if (!match) { ++ op->required_list[phase] = g_list_prepend(op->required_list[phase], ++ strdup(device)); + } +- op->required_list = g_list_prepend(op->required_list, strdup(device)); ++} + +- /* make sure the required devices is in the current list of devices to be executed */ +- if (op->devices_list) { +- GListPtr match = g_list_find_custom(op->devices_list, device, sort_strings); +- if (match == NULL) { +- op->devices_list = g_list_append(op->devices_list, strdup(device)); +- } ++/* ++ * \internal ++ * \brief Remove a device from the required list for the current phase ++ * ++ * \param[in,out] op Operation to modify ++ * \param[in] device Device ID to remove ++ */ ++static void ++remove_required_device(remote_fencing_op_t *op, const char *device) ++{ ++ GListPtr match = g_list_find_custom(op->required_list[op->phase], device, ++ sort_strings); ++ ++ if (match) { ++ op->required_list[op->phase] = g_list_remove(op->required_list[op->phase], ++ match->data); + } + } + +@@ -458,18 +710,6 @@ set_op_device_list(remote_fencing_op_t * op, GListPtr devices) + for (lpc = devices; lpc != NULL; lpc = lpc->next) { + op->devices_list = g_list_append(op->devices_list, strdup(lpc->data)); + } +- +- /* tack on whatever required devices have not been executed +- * to the end of the current devices list. This ensures that +- * the required devices will get executed regardless of what topology +- * level they exist at. */ +- for (lpc = op->required_list; lpc != NULL; lpc = lpc->next) { +- GListPtr match = g_list_find_custom(op->devices_list, lpc->data, sort_strings); +- if (match == NULL) { +- op->devices_list = g_list_append(op->devices_list, strdup(lpc->data)); +- } +- } +- + op->devices = op->devices_list; + } + +@@ -491,6 +731,7 @@ find_topology_for_host(const char *host) + crm_info("Bad regex '%s' for fencing level", tp->node); + } else { + status = regexec(&r_patt, host, 0, NULL, 0); ++ regfree(&r_patt); + } + + if (status == 0) { +@@ -529,6 +770,9 @@ stonith_topology_next(remote_fencing_op_t * op) + + set_bit(op->call_options, st_opt_topology); + ++ /* This is a new level, so undo any remapping left over from previous */ ++ undo_op_remap(op); ++ + do { + op->level++; + +@@ -539,6 +783,15 @@ stonith_topology_next(remote_fencing_op_t * op) + op->level, op->target, g_list_length(tp->levels[op->level]), + op->client_name, op->originator, op->id); + set_op_device_list(op, tp->levels[op->level]); ++ ++ if (g_list_next(op->devices_list) && safe_str_eq(op->action, "reboot")) { ++ /* A reboot has been requested for a topology level with multiple ++ * devices. Instead of rebooting the devices sequentially, we will ++ * turn them all off, then turn them all on again. (Think about ++ * switched power outlets for redundant power supplies.) ++ */ ++ op_phase_off(op); ++ } + return pcmk_ok; + } + +@@ -563,6 +816,7 @@ merge_duplicates(remote_fencing_op_t * op) + g_hash_table_iter_init(&iter, remote_op_list); + while (g_hash_table_iter_next(&iter, NULL, (void **)&other)) { + crm_node_t *peer = NULL; ++ const char *other_action = op_requested_action(other); + + if (other->state > st_exec) { + /* Must be in-progress */ +@@ -570,8 +824,9 @@ merge_duplicates(remote_fencing_op_t * op) + } else if (safe_str_neq(op->target, other->target)) { + /* Must be for the same node */ + continue; +- } else if (safe_str_neq(op->action, other->action)) { +- crm_trace("Must be for the same action: %s vs. ", op->action, other->action); ++ } else if (safe_str_neq(op->action, other_action)) { ++ crm_trace("Must be for the same action: %s vs. %s", ++ op->action, other_action); + continue; + } else if (safe_str_eq(op->client_name, other->client_name)) { + crm_trace("Must be for different clients: %s", op->client_name); +@@ -602,7 +857,7 @@ merge_duplicates(remote_fencing_op_t * op) + if (other->total_timeout == 0) { + crm_trace("Making a best-guess as to the timeout used"); + other->total_timeout = op->total_timeout = +- TIMEOUT_MULTIPLY_FACTOR * get_op_total_timeout(op, NULL, op->base_timeout); ++ TIMEOUT_MULTIPLY_FACTOR * get_op_total_timeout(op, NULL); + } + crm_notice + ("Merging stonith action %s for node %s originating from client %s.%.8s with identical request from %s@%s.%.8s (%ds)", +@@ -792,16 +1047,16 @@ initiate_remote_stonith_op(crm_client_t * client, xmlNode * request, gboolean ma + op->id, op->state); + } + +- query = stonith_create_op(op->client_callid, op->id, STONITH_OP_QUERY, NULL, 0); ++ query = stonith_create_op(op->client_callid, op->id, STONITH_OP_QUERY, ++ NULL, op->call_options); + + crm_xml_add(query, F_STONITH_REMOTE_OP_ID, op->id); + crm_xml_add(query, F_STONITH_TARGET, op->target); +- crm_xml_add(query, F_STONITH_ACTION, op->action); ++ crm_xml_add(query, F_STONITH_ACTION, op_requested_action(op)); + crm_xml_add(query, F_STONITH_ORIGIN, op->originator); + crm_xml_add(query, F_STONITH_CLIENTID, op->client_id); + crm_xml_add(query, F_STONITH_CLIENTNAME, op->client_name); + crm_xml_add_int(query, F_STONITH_TIMEOUT, op->base_timeout); +- crm_xml_add_int(query, F_STONITH_CALLOPTS, op->call_options); + + send_cluster_message(NULL, crm_msg_stonith_ng, query, FALSE); + free_xml(query); +@@ -835,7 +1090,7 @@ find_best_peer(const char *device, remote_fencing_op_t * op, enum find_best_peer + st_query_result_t *peer = iter->data; + + crm_trace("Testing result from %s for %s with %d devices: %d %x", +- peer->host, op->target, peer->devices, peer->tried, options); ++ peer->host, op->target, peer->ndevices, peer->tried, options); + if ((options & FIND_PEER_SKIP_TARGET) && safe_str_eq(peer->host, op->target)) { + continue; + } +@@ -844,25 +1099,13 @@ find_best_peer(const char *device, remote_fencing_op_t * op, enum find_best_peer + } + + if (is_set(op->call_options, st_opt_topology)) { +- /* Do they have the next device of the current fencing level? */ +- GListPtr match = NULL; +- +- if (verified_devices_only && !g_hash_table_lookup(peer->verified_devices, device)) { +- continue; +- } + +- match = g_list_find_custom(peer->device_list, device, sort_strings); +- if (match) { +- crm_trace("Removing %s from %s (%d remaining)", (char *)match->data, peer->host, +- g_list_length(peer->device_list)); +- peer->device_list = g_list_remove(peer->device_list, match->data); ++ if (grab_peer_device(op, peer, device, verified_devices_only)) { + return peer; + } + +- } else if (peer->devices > 0 && peer->tried == FALSE) { +- if (verified_devices_only && !g_hash_table_size(peer->verified_devices)) { +- continue; +- } ++ } else if ((peer->tried == FALSE) ++ && count_peer_devices(op, peer, verified_devices_only)) { + + /* No topology: Use the current best peer */ + crm_trace("Simple fencing"); +@@ -883,11 +1126,14 @@ stonith_choose_peer(remote_fencing_op_t * op) + do { + if (op->devices) { + device = op->devices->data; +- crm_trace("Checking for someone to fence %s with %s", op->target, device); ++ crm_trace("Checking for someone to fence (%s) %s with %s", ++ op->action, op->target, device); + } else { +- crm_trace("Checking for someone to fence %s", op->target); ++ crm_trace("Checking for someone to fence (%s) %s", ++ op->action, op->target); + } + ++ /* Best choice is a peer other than the target with verified access */ + peer = find_best_peer(device, op, FIND_PEER_SKIP_TARGET|FIND_PEER_VERIFIED_ONLY); + if (peer) { + crm_trace("Found verified peer %s for %s", peer->host, device?device:""); +@@ -899,62 +1145,101 @@ stonith_choose_peer(remote_fencing_op_t * op) + return NULL; + } + ++ /* If no other peer has verified access, next best is unverified access */ + peer = find_best_peer(device, op, FIND_PEER_SKIP_TARGET); + if (peer) { + crm_trace("Found best unverified peer %s", peer->host); + return peer; + } + +- peer = find_best_peer(device, op, FIND_PEER_TARGET_ONLY); +- if(peer) { +- crm_trace("%s will fence itself", peer->host); +- return peer; ++ /* If no other peer can do it, last option is self-fencing ++ * (which is never allowed for the "on" phase of a remapped reboot) ++ */ ++ if (op->phase != st_phase_on) { ++ peer = find_best_peer(device, op, FIND_PEER_TARGET_ONLY); ++ if (peer) { ++ crm_trace("%s will fence itself", peer->host); ++ return peer; ++ } + } + +- /* Try the next fencing level if there is one */ +- } while (is_set(op->call_options, st_opt_topology) ++ /* Try the next fencing level if there is one (unless we're in the "on" ++ * phase of a remapped "reboot", because we ignore errors in that case) ++ */ ++ } while ((op->phase != st_phase_on) ++ && is_set(op->call_options, st_opt_topology) + && stonith_topology_next(op) == pcmk_ok); + +- crm_notice("Couldn't find anyone to fence %s with %s", op->target, device?device:""); ++ crm_notice("Couldn't find anyone to fence (%s) %s with %s", ++ op->action, op->target, (device? device : "any device")); + return NULL; + } + + static int +-get_device_timeout(st_query_result_t * peer, const char *device, int default_timeout) ++get_device_timeout(const remote_fencing_op_t *op, const st_query_result_t *peer, ++ const char *device) + { +- gpointer res; +- int delay_max = 0; ++ device_properties_t *props; + + if (!peer || !device) { +- return default_timeout; ++ return op->base_timeout; + } + +- res = g_hash_table_lookup(peer->delay_maxes, device); +- if (res && GPOINTER_TO_INT(res) > 0) { +- delay_max = GPOINTER_TO_INT(res); ++ props = g_hash_table_lookup(peer->devices, device); ++ if (!props) { ++ return op->base_timeout; + } + +- res = g_hash_table_lookup(peer->custom_action_timeouts, device); ++ return (props->custom_action_timeout[op->phase]? ++ props->custom_action_timeout[op->phase] : op->base_timeout) ++ + props->delay_max[op->phase]; ++} + +- return res ? GPOINTER_TO_INT(res) + delay_max : default_timeout + delay_max; ++struct timeout_data { ++ const remote_fencing_op_t *op; ++ const st_query_result_t *peer; ++ int total_timeout; ++}; ++ ++/*! ++ * \internal ++ * \brief Add timeout to a total if device has not been executed yet ++ * ++ * \param[in] key GHashTable key (device ID) ++ * \param[in] value GHashTable value (device properties) ++ * \param[in] user_data Timeout data ++ */ ++static void ++add_device_timeout(gpointer key, gpointer value, gpointer user_data) ++{ ++ const char *device_id = key; ++ device_properties_t *props = value; ++ struct timeout_data *timeout = user_data; ++ ++ if (!props->executed[timeout->op->phase] ++ && !props->disallowed[timeout->op->phase]) { ++ timeout->total_timeout += get_device_timeout(timeout->op, ++ timeout->peer, device_id); ++ } + } + + static int +-get_peer_timeout(st_query_result_t * peer, int default_timeout) ++get_peer_timeout(const remote_fencing_op_t *op, const st_query_result_t *peer) + { +- int total_timeout = 0; ++ struct timeout_data timeout; + +- GListPtr cur = NULL; ++ timeout.op = op; ++ timeout.peer = peer; ++ timeout.total_timeout = 0; + +- for (cur = peer->device_list; cur; cur = cur->next) { +- total_timeout += get_device_timeout(peer, cur->data, default_timeout); +- } ++ g_hash_table_foreach(peer->devices, add_device_timeout, &timeout); + +- return total_timeout ? total_timeout : default_timeout; ++ return (timeout.total_timeout? timeout.total_timeout : op->base_timeout); + } + + static int +-get_op_total_timeout(remote_fencing_op_t * op, st_query_result_t * chosen_peer, int default_timeout) ++get_op_total_timeout(const remote_fencing_op_t *op, ++ const st_query_result_t *chosen_peer) + { + int total_timeout = 0; + stonith_topology_t *tp = find_topology_for_host(op->target); +@@ -977,11 +1262,11 @@ get_op_total_timeout(remote_fencing_op_t * op, st_query_result_t * chosen_peer, + } + for (device_list = tp->levels[i]; device_list; device_list = device_list->next) { + for (iter = op->query_results; iter != NULL; iter = iter->next) { +- st_query_result_t *peer = iter->data; ++ const st_query_result_t *peer = iter->data; + +- if (g_list_find_custom(peer->device_list, device_list->data, sort_strings)) { +- total_timeout += +- get_device_timeout(peer, device_list->data, default_timeout); ++ if (find_peer_device(op, peer, device_list->data)) { ++ total_timeout += get_device_timeout(op, peer, ++ device_list->data); + break; + } + } /* End Loop3: match device with peer that owns device, find device's timeout period */ +@@ -989,12 +1274,12 @@ get_op_total_timeout(remote_fencing_op_t * op, st_query_result_t * chosen_peer, + } /*End Loop1: iterate through fencing levels */ + + } else if (chosen_peer) { +- total_timeout = get_peer_timeout(chosen_peer, default_timeout); ++ total_timeout = get_peer_timeout(op, chosen_peer); + } else { +- total_timeout = default_timeout; ++ total_timeout = op->base_timeout; + } + +- return total_timeout ? total_timeout : default_timeout; ++ return total_timeout ? total_timeout : op->base_timeout; + } + + static void +@@ -1049,6 +1334,55 @@ report_timeout_period(remote_fencing_op_t * op, int op_timeout) + } + } + ++/* ++ * \internal ++ * \brief Advance an operation to the next device in its topology ++ * ++ * \param[in,out] op Operation to advance ++ * \param[in] device ID of device just completed ++ * \param[in] msg XML reply that contained device result (if available) ++ * \param[in] rc Return code of device's execution ++ */ ++static void ++advance_op_topology(remote_fencing_op_t *op, const char *device, xmlNode *msg, ++ int rc) ++{ ++ /* Advance to the next device at this topology level, if any */ ++ if (op->devices) { ++ op->devices = op->devices->next; ++ } ++ ++ /* If this device was required, it's not anymore */ ++ remove_required_device(op, device); ++ ++ /* If there are no more devices at this topology level, ++ * run through any required devices not already executed ++ */ ++ if (op->devices == NULL) { ++ op->devices = op->required_list[op->phase]; ++ } ++ ++ if ((op->devices == NULL) && (op->phase == st_phase_off)) { ++ /* We're done with this level and with required devices, but we had ++ * remapped "reboot" to "off", so start over with "on". If any devices ++ * need to be turned back on, op->devices will be non-NULL after this. ++ */ ++ op_phase_on(op); ++ } ++ ++ if (op->devices) { ++ /* Necessary devices remain, so execute the next one */ ++ crm_trace("Next for %s on behalf of %s@%s (rc was %d)", ++ op->target, op->originator, op->client_name, rc); ++ call_remote_stonith(op, NULL); ++ } else { ++ /* We're done with all devices and phases, so finalize operation */ ++ crm_trace("Marking complex fencing op for %s as complete", op->target); ++ op->state = st_done; ++ remote_op_done(op, msg, rc, FALSE); ++ } ++} ++ + void + call_remote_stonith(remote_fencing_op_t * op, st_query_result_t * peer) + { +@@ -1061,7 +1395,7 @@ call_remote_stonith(remote_fencing_op_t * op, st_query_result_t * peer) + } + + if (!op->op_timer_total) { +- int total_timeout = get_op_total_timeout(op, peer, op->base_timeout); ++ int total_timeout = get_op_total_timeout(op, peer); + + op->total_timeout = TIMEOUT_MULTIPLY_FACTOR * total_timeout; + op->op_timer_total = g_timeout_add(1000 * op->total_timeout, remote_op_timeout, op); +@@ -1071,13 +1405,13 @@ call_remote_stonith(remote_fencing_op_t * op, st_query_result_t * peer) + } + + if (is_set(op->call_options, st_opt_topology) && op->devices) { +- /* Ignore any preference, they might not have the device we need */ +- /* When using topology, the stonith_choose_peer function pops off +- * the peer from the op's query results. Make sure to calculate +- * the op_timeout before calling this function when topology is in use */ ++ /* Ignore any peer preference, they might not have the device we need */ ++ /* When using topology, stonith_choose_peer() removes the device from ++ * further consideration, so be sure to calculate timeout beforehand */ + peer = stonith_choose_peer(op); ++ + device = op->devices->data; +- timeout = get_device_timeout(peer, device, op->base_timeout); ++ timeout = get_device_timeout(op, peer, device); + } + + if (peer) { +@@ -1094,15 +1428,15 @@ call_remote_stonith(remote_fencing_op_t * op, st_query_result_t * peer) + crm_xml_add_int(remote_op, F_STONITH_CALLOPTS, op->call_options); + + if (device) { +- timeout_one = +- TIMEOUT_MULTIPLY_FACTOR * get_device_timeout(peer, device, op->base_timeout); ++ timeout_one = TIMEOUT_MULTIPLY_FACTOR * ++ get_device_timeout(op, peer, device); + crm_info("Requesting that %s perform op %s %s with %s for %s (%ds)", peer->host, + op->action, op->target, device, op->client_name, timeout_one); + crm_xml_add(remote_op, F_STONITH_DEVICE, device); + crm_xml_add(remote_op, F_STONITH_MODE, "slave"); + + } else { +- timeout_one = TIMEOUT_MULTIPLY_FACTOR * get_peer_timeout(peer, op->base_timeout); ++ timeout_one = TIMEOUT_MULTIPLY_FACTOR * get_peer_timeout(op, peer); + crm_info("Requesting that %s perform op %s %s for %s (%ds, %ds)", + peer->host, op->action, op->target, op->client_name, timeout_one, stonith_watchdog_timeout_ms); + crm_xml_add(remote_op, F_STONITH_MODE, "smart"); +@@ -1115,16 +1449,18 @@ call_remote_stonith(remote_fencing_op_t * op, st_query_result_t * peer) + } + + if(stonith_watchdog_timeout_ms > 0 && device && safe_str_eq(device, "watchdog")) { +- crm_notice("Waiting %ds for %s to self-terminate for %s.%.8s (%p)", +- stonith_watchdog_timeout_ms/1000, op->target, op->client_name, op->id, device); ++ crm_notice("Waiting %ds for %s to self-fence (%s) for %s.%.8s (%p)", ++ stonith_watchdog_timeout_ms/1000, op->target, ++ op->action, op->client_name, op->id, device); + op->op_timer_one = g_timeout_add(stonith_watchdog_timeout_ms, remote_op_watchdog_done, op); + +- /* TODO: We should probably look into peer->device_list to verify watchdog is going to be in use */ ++ /* TODO check devices to verify watchdog will be in use */ + } else if(stonith_watchdog_timeout_ms > 0 + && safe_str_eq(peer->host, op->target) + && safe_str_neq(op->action, "on")) { +- crm_notice("Waiting %ds for %s to self-terminate for %s.%.8s (%p)", +- stonith_watchdog_timeout_ms/1000, op->target, op->client_name, op->id, device); ++ crm_notice("Waiting %ds for %s to self-fence (%s) for %s.%.8s (%p)", ++ stonith_watchdog_timeout_ms/1000, op->target, ++ op->action, op->client_name, op->id, device); + op->op_timer_one = g_timeout_add(stonith_watchdog_timeout_ms, remote_op_watchdog_done, op); + + } else { +@@ -1137,13 +1473,23 @@ call_remote_stonith(remote_fencing_op_t * op, st_query_result_t * peer) + free_xml(remote_op); + return; + ++ } else if (op->phase == st_phase_on) { ++ /* A remapped "on" cannot be executed, but the node was already ++ * turned off successfully, so ignore the error and continue. ++ */ ++ crm_warn("Ignoring %s 'on' failure (no capable peers) for %s after successful 'off'", ++ device, op->target); ++ advance_op_topology(op, device, NULL, pcmk_ok); ++ return; ++ + } else if (op->owner == FALSE) { +- crm_err("The termination of %s for %s is not ours to control", op->target, op->client_name); ++ crm_err("Fencing (%s) of %s for %s is not ours to control", ++ op->action, op->target, op->client_name); + + } else if (op->query_timer == 0) { + /* We've exhausted all available peers */ +- crm_info("No remaining peers capable of terminating %s for %s (%d)", op->target, +- op->client_name, op->state); ++ crm_info("No remaining peers capable of fencing (%s) %s for %s (%d)", ++ op->target, op->action, op->client_name, op->state); + CRM_LOG_ASSERT(op->state < st_done); + remote_op_timeout(op); + +@@ -1153,33 +1499,37 @@ call_remote_stonith(remote_fencing_op_t * op, st_query_result_t * peer) + /* if the operation never left the query state, + * but we have all the expected replies, then no devices + * are available to execute the fencing operation. */ ++ + if(stonith_watchdog_timeout_ms && (device == NULL || safe_str_eq(device, "watchdog"))) { +- crm_notice("Waiting %ds for %s to self-terminate for %s.%.8s (%p)", +- stonith_watchdog_timeout_ms/1000, op->target, op->client_name, op->id, device); ++ crm_notice("Waiting %ds for %s to self-fence (%s) for %s.%.8s (%p)", ++ stonith_watchdog_timeout_ms/1000, op->target, ++ op->action, op->client_name, op->id, device); + + op->op_timer_one = g_timeout_add(stonith_watchdog_timeout_ms, remote_op_watchdog_done, op); + return; + } + + if (op->state == st_query) { +- crm_info("None of the %d peers have devices capable of terminating %s for %s (%d)", +- op->replies, op->target, op->client_name, op->state); ++ crm_info("None of the %d peers have devices capable of fencing (%s) %s for %s (%d)", ++ op->replies, op->action, op->target, op->client_name, ++ op->state); + + rc = -ENODEV; + } else { +- crm_info("None of the %d peers are capable of terminating %s for %s (%d)", +- op->replies, op->target, op->client_name, op->state); ++ crm_info("None of the %d peers are capable of fencing (%s) %s for %s (%d)", ++ op->replies, op->action, op->target, op->client_name, ++ op->state); + } + + op->state = st_failed; + remote_op_done(op, NULL, rc, FALSE); + + } else if (device) { +- crm_info("Waiting for additional peers capable of terminating %s with %s for %s.%.8s", +- op->target, device, op->client_name, op->id); ++ crm_info("Waiting for additional peers capable of fencing (%s) %s with %s for %s.%.8s", ++ op->action, op->target, device, op->client_name, op->id); + } else { +- crm_info("Waiting for additional peers capable of terminating %s for %s%.8s", +- op->target, op->client_name, op->id); ++ crm_info("Waiting for additional peers capable of fencing (%s) %s for %s%.8s", ++ op->action, op->target, op->client_name, op->id); + } + } + +@@ -1200,7 +1550,7 @@ sort_peers(gconstpointer a, gconstpointer b) + const st_query_result_t *peer_a = a; + const st_query_result_t *peer_b = b; + +- return (peer_b->devices - peer_a->devices); ++ return (peer_b->ndevices - peer_a->ndevices); + } + + /*! +@@ -1212,7 +1562,7 @@ all_topology_devices_found(remote_fencing_op_t * op) + { + GListPtr device = NULL; + GListPtr iter = NULL; +- GListPtr match = NULL; ++ device_properties_t *match = NULL; + stonith_topology_t *tp = NULL; + gboolean skip_target = FALSE; + int i; +@@ -1236,7 +1586,7 @@ all_topology_devices_found(remote_fencing_op_t * op) + if (skip_target && safe_str_eq(peer->host, op->target)) { + continue; + } +- match = g_list_find_custom(peer->device_list, device->data, sort_strings); ++ match = find_peer_device(op, peer, device->data); + } + if (!match) { + return FALSE; +@@ -1247,10 +1597,169 @@ all_topology_devices_found(remote_fencing_op_t * op) + return TRUE; + } + ++/* ++ * \internal ++ * \brief Parse action-specific device properties from XML ++ * ++ * \param[in] msg XML element containing the properties ++ * \param[in] peer Name of peer that sent XML (for logs) ++ * \param[in] device Device ID (for logs) ++ * \param[in] action Action the properties relate to (for logs) ++ * \param[in] phase Phase the properties relate to ++ * \param[in,out] props Device properties to update ++ */ ++static void ++parse_action_specific(xmlNode *xml, const char *peer, const char *device, ++ const char *action, remote_fencing_op_t *op, ++ enum st_remap_phase phase, device_properties_t *props) ++{ ++ int required; ++ ++ props->custom_action_timeout[phase] = 0; ++ crm_element_value_int(xml, F_STONITH_ACTION_TIMEOUT, ++ &props->custom_action_timeout[phase]); ++ if (props->custom_action_timeout[phase]) { ++ crm_trace("Peer %s with device %s returned %s action timeout %d", ++ peer, device, action, props->custom_action_timeout[phase]); ++ } ++ ++ props->delay_max[phase] = 0; ++ crm_element_value_int(xml, F_STONITH_DELAY_MAX, &props->delay_max[phase]); ++ if (props->delay_max[phase]) { ++ crm_trace("Peer %s with device %s returned maximum of random delay %d for %s", ++ peer, device, props->delay_max[phase], action); ++ } ++ ++ required = 0; ++ crm_element_value_int(xml, F_STONITH_DEVICE_REQUIRED, &required); ++ if (required) { ++ /* If the action is marked as required, add the device to the ++ * operation's list of required devices for this phase. We use this ++ * for unfencing when executing a topology. In phase 0 (requested ++ * action) or phase 1 (remapped "off"), required devices get executed ++ * regardless of their topology level; in phase 2 (remapped "on"), ++ * required devices are not attempted, because the cluster will ++ * execute them automatically later. ++ */ ++ crm_trace("Peer %s requires device %s to execute for action %s", ++ peer, device, action); ++ add_required_device(op, phase, device); ++ } ++ ++ /* If a reboot is remapped to off+on, it's possible that a node is allowed ++ * to perform one action but not another. ++ */ ++ if (crm_is_true(crm_element_value(xml, F_STONITH_ACTION_DISALLOWED))) { ++ props->disallowed[phase] = TRUE; ++ crm_trace("Peer %s is disallowed from executing %s for device %s", ++ peer, action, device); ++ } ++} ++ ++/* ++ * \internal ++ * \brief Parse one device's properties from peer's XML query reply ++ * ++ * \param[in] xml XML node containing device properties ++ * \param[in,out] op Operation that query and reply relate to ++ * \param[in,out] result Peer's results ++ * \param[in] device ID of device being parsed ++ */ ++static void ++add_device_properties(xmlNode *xml, remote_fencing_op_t *op, ++ st_query_result_t *result, const char *device) ++{ ++ xmlNode *child; ++ int verified = 0; ++ device_properties_t *props = calloc(1, sizeof(device_properties_t)); ++ ++ /* Add a new entry to this result's devices list */ ++ CRM_ASSERT(props != NULL); ++ g_hash_table_insert(result->devices, strdup(device), props); ++ ++ /* Peers with verified (monitored) access will be preferred */ ++ crm_element_value_int(xml, F_STONITH_DEVICE_VERIFIED, &verified); ++ if (verified) { ++ crm_trace("Peer %s has confirmed a verified device %s", ++ result->host, device); ++ props->verified = TRUE; ++ } ++ ++ /* Parse action-specific device properties */ ++ parse_action_specific(xml, result->host, device, op_requested_action(op), ++ op, st_phase_requested, props); ++ for (child = __xml_first_child(xml); child != NULL; child = __xml_next(child)) { ++ /* Replies for "reboot" operations will include the action-specific ++ * values for "off" and "on" in child elements, just in case the reboot ++ * winds up getting remapped. ++ */ ++ if (safe_str_eq(ID(child), "off")) { ++ parse_action_specific(child, result->host, device, "off", ++ op, st_phase_off, props); ++ } else if (safe_str_eq(ID(child), "on")) { ++ parse_action_specific(child, result->host, device, "on", ++ op, st_phase_on, props); ++ } ++ } ++} ++ ++/* ++ * \internal ++ * \brief Parse a peer's XML query reply and add it to operation's results ++ * ++ * \param[in,out] op Operation that query and reply relate to ++ * \param[in] host Name of peer that sent this reply ++ * \param[in] ndevices Number of devices expected in reply ++ * \param[in] xml XML node containing device list ++ * ++ * \return Newly allocated result structure with parsed reply ++ */ ++static st_query_result_t * ++add_result(remote_fencing_op_t *op, const char *host, int ndevices, xmlNode *xml) ++{ ++ st_query_result_t *result = calloc(1, sizeof(st_query_result_t)); ++ xmlNode *child; ++ ++ CRM_CHECK(result != NULL, return NULL); ++ result->host = strdup(host); ++ result->devices = g_hash_table_new_full(crm_str_hash, g_str_equal, free, free); ++ ++ /* Each child element describes one capable device available to the peer */ ++ for (child = __xml_first_child(xml); child != NULL; child = __xml_next(child)) { ++ const char *device = ID(child); ++ ++ if (device) { ++ add_device_properties(child, op, result, device); ++ } ++ } ++ ++ result->ndevices = g_hash_table_size(result->devices); ++ CRM_CHECK(ndevices == result->ndevices, ++ crm_err("Query claimed to have %d devices but %d found", ++ ndevices, result->ndevices)); ++ ++ op->query_results = g_list_insert_sorted(op->query_results, result, sort_peers); ++ return result; ++} ++ ++/* ++ * \internal ++ * \brief Handle a peer's reply to our fencing query ++ * ++ * Parse a query result from XML and store it in the remote operation ++ * table, and when enough replies have been received, issue a fencing request. ++ * ++ * \param[in] msg XML reply received ++ * ++ * \return pcmk_ok on success, -errno on error ++ * ++ * \note See initiate_remote_stonith_op() for how the XML query was initially ++ * formed, and stonith_query() for how the peer formed its XML reply. ++ */ + int + process_remote_stonith_query(xmlNode * msg) + { +- int devices = 0; ++ int ndevices = 0; + gboolean host_is_target = FALSE; + gboolean have_all_replies = FALSE; + const char *id = NULL; +@@ -1259,7 +1768,6 @@ process_remote_stonith_query(xmlNode * msg) + st_query_result_t *result = NULL; + uint32_t replies_expected; + xmlNode *dev = get_xpath_object("//@" F_STONITH_REMOTE_OP_ID, msg, LOG_ERR); +- xmlNode *child = NULL; + + CRM_CHECK(dev != NULL, return -EPROTO); + +@@ -1268,7 +1776,7 @@ process_remote_stonith_query(xmlNode * msg) + + dev = get_xpath_object("//@" F_STONITH_AVAILABLE_DEVICES, msg, LOG_ERR); + CRM_CHECK(dev != NULL, return -EPROTO); +- crm_element_value_int(dev, F_STONITH_AVAILABLE_DEVICES, &devices); ++ crm_element_value_int(dev, F_STONITH_AVAILABLE_DEVICES, &ndevices); + + op = g_hash_table_lookup(remote_op_list, id); + if (op == NULL) { +@@ -1283,75 +1791,13 @@ process_remote_stonith_query(xmlNode * msg) + host = crm_element_value(msg, F_ORIG); + host_is_target = safe_str_eq(host, op->target); + +- if (devices <= 0) { +- /* If we're doing 'known' then we might need to fire anyway */ +- crm_trace("Query result %d of %d from %s for %s/%s (%d devices) %s", +- op->replies, replies_expected, host, +- op->target, op->action, devices, id); +- if (have_all_replies) { +- crm_info("All query replies have arrived, continuing (%d expected/%d received for id %s)", +- replies_expected, op->replies, id); +- call_remote_stonith(op, NULL); +- } +- return pcmk_ok; +- } +- + crm_info("Query result %d of %d from %s for %s/%s (%d devices) %s", + op->replies, replies_expected, host, +- op->target, op->action, devices, id); +- result = calloc(1, sizeof(st_query_result_t)); +- result->host = strdup(host); +- result->devices = devices; +- result->custom_action_timeouts = g_hash_table_new_full(crm_str_hash, g_str_equal, free, NULL); +- result->delay_maxes = g_hash_table_new_full(crm_str_hash, g_str_equal, free, NULL); +- result->verified_devices = g_hash_table_new_full(crm_str_hash, g_str_equal, free, NULL); +- +- for (child = __xml_first_child(dev); child != NULL; child = __xml_next(child)) { +- const char *device = ID(child); +- int action_timeout = 0; +- int delay_max = 0; +- int verified = 0; +- int required = 0; +- +- if (device) { +- result->device_list = g_list_prepend(result->device_list, strdup(device)); +- crm_element_value_int(child, F_STONITH_ACTION_TIMEOUT, &action_timeout); +- crm_element_value_int(child, F_STONITH_DELAY_MAX, &delay_max); +- crm_element_value_int(child, F_STONITH_DEVICE_VERIFIED, &verified); +- crm_element_value_int(child, F_STONITH_DEVICE_REQUIRED, &required); +- if (action_timeout) { +- crm_trace("Peer %s with device %s returned action timeout %d", +- result->host, device, action_timeout); +- g_hash_table_insert(result->custom_action_timeouts, +- strdup(device), GINT_TO_POINTER(action_timeout)); +- } +- if (delay_max > 0) { +- crm_trace("Peer %s with device %s returned maximum of random delay %d", +- result->host, device, delay_max); +- g_hash_table_insert(result->delay_maxes, +- strdup(device), GINT_TO_POINTER(delay_max)); +- } +- if (verified) { +- crm_trace("Peer %s has confirmed a verified device %s", result->host, device); +- g_hash_table_insert(result->verified_devices, +- strdup(device), GINT_TO_POINTER(verified)); +- } +- if (required) { +- crm_trace("Peer %s requires device %s to execute for action %s", +- result->host, device, op->action); +- /* This matters when executing a topology. Required devices will get +- * executed regardless of their topology level. We use this for unfencing. */ +- add_required_device(op, device); +- } +- } ++ op->target, op->action, ndevices, id); ++ if (ndevices > 0) { ++ result = add_result(op, host, ndevices, dev); + } + +- CRM_CHECK(devices == g_list_length(result->device_list), +- crm_err("Mis-match: Query claimed to have %d devices but %d found", devices, +- g_list_length(result->device_list))); +- +- op->query_results = g_list_insert_sorted(op->query_results, result, sort_peers); +- + if (is_set(op->call_options, st_opt_topology)) { + /* If we start the fencing before all the topology results are in, + * it is possible fencing levels will be skipped because of the missing +@@ -1368,11 +1814,13 @@ process_remote_stonith_query(xmlNode * msg) + } + + } else if (op->state == st_query) { ++ int nverified = count_peer_devices(op, result, TRUE); ++ + /* We have a result for a non-topology fencing op that looks promising, + * go ahead and start fencing before query timeout */ +- if (host_is_target == FALSE && g_hash_table_size(result->verified_devices)) { ++ if (result && (host_is_target == FALSE) && nverified) { + /* we have a verified device living on a peer that is not the target */ +- crm_trace("Found %d verified devices", g_hash_table_size(result->verified_devices)); ++ crm_trace("Found %d verified devices", nverified); + call_remote_stonith(op, result); + + } else if (have_all_replies) { +@@ -1384,14 +1832,25 @@ process_remote_stonith_query(xmlNode * msg) + crm_trace("Waiting for more peer results before launching fencing operation"); + } + +- } else if (op->state == st_done) { ++ } else if (result && (op->state == st_done)) { + crm_info("Discarding query result from %s (%d devices): Operation is in state %d", +- result->host, result->devices, op->state); ++ result->host, result->ndevices, op->state); + } + + return pcmk_ok; + } + ++/* ++ * \internal ++ * \brief Handle a peer's reply to a fencing request ++ * ++ * Parse a fencing reply from XML, and either finalize the operation ++ * or attempt another device as appropriate. ++ * ++ * \param[in] msg XML reply received ++ * ++ * \return pcmk_ok on success, -errno on error ++ */ + int + process_remote_stonith_exec(xmlNode * msg) + { +@@ -1472,26 +1931,20 @@ process_remote_stonith_exec(xmlNode * msg) + return rc; + } + +- /* An operation completed succesfully but has not yet been marked as done. +- * Continue the topology if more devices exist at the current level, otherwise +- * mark as done. */ ++ if ((op->phase == 2) && (rc != pcmk_ok)) { ++ /* A remapped "on" failed, but the node was already turned off ++ * successfully, so ignore the error and continue. ++ */ ++ crm_warn("Ignoring %s 'on' failure (exit code %d) for %s after successful 'off'", ++ device, rc, op->target); ++ rc = pcmk_ok; ++ } ++ + if (rc == pcmk_ok) { +- GListPtr required_match = g_list_find_custom(op->required_list, device, sort_strings); +- if (op->devices) { +- /* Success, are there any more? */ +- op->devices = op->devices->next; +- } +- if (required_match) { +- op->required_list = g_list_remove(op->required_list, required_match->data); +- } +- /* if no more devices at this fencing level, we are done, +- * else we need to contine with executing the next device in the list */ +- if (op->devices == NULL) { +- crm_trace("Marking complex fencing op for %s as complete", op->target); +- op->state = st_done; +- remote_op_done(op, msg, rc, FALSE); +- return rc; +- } ++ /* An operation completed successfully. Try another device if ++ * necessary, otherwise mark the operation as done. */ ++ advance_op_topology(op, device, msg, rc); ++ return rc; + } else { + /* This device failed, time to try another topology level. If no other + * levels are available, mark this operation as failed and report results. */ +@@ -1516,7 +1969,7 @@ process_remote_stonith_exec(xmlNode * msg) + /* fall-through and attempt other fencing action using another peer */ + } + +- /* Retry on failure or execute the rest of the topology */ ++ /* Retry on failure */ + crm_trace("Next for %s on behalf of %s@%s (rc was %d)", op->target, op->originator, + op->client_name, rc); + call_remote_stonith(op, NULL); +@@ -1595,6 +2048,9 @@ stonith_check_fence_tolerance(int tolerance, const char *target, const char *act + continue; + } else if (rop->state != st_done) { + continue; ++ /* We don't have to worry about remapped reboots here ++ * because if state is done, any remapping has been undone ++ */ + } else if (strcmp(rop->action, action) != 0) { + continue; + } else if ((rop->completed + tolerance) < now) { +diff --git a/include/crm/fencing/internal.h b/include/crm/fencing/internal.h +index a6f58b1..a59151b 100644 +--- a/include/crm/fencing/internal.h ++++ b/include/crm/fencing/internal.h +@@ -63,6 +63,8 @@ xmlNode *create_device_registration_xml(const char *id, const char *namespace, c + # define F_STONITH_TOLERANCE "st_tolerance" + /*! Action specific timeout period returned in query of fencing devices. */ + # define F_STONITH_ACTION_TIMEOUT "st_action_timeout" ++/*! Host in query result is not allowed to run this action */ ++# define F_STONITH_ACTION_DISALLOWED "st_action_disallowed" + /*! Maximum of random fencing delay for a device */ + # define F_STONITH_DELAY_MAX "st_delay_max" + /*! Has this device been verified using a monitor type +diff --git a/include/crm/lrmd.h b/include/crm/lrmd.h +index e3a0d63..730cad3 100644 +--- a/include/crm/lrmd.h ++++ b/include/crm/lrmd.h +@@ -200,8 +200,6 @@ typedef struct lrmd_event_data_s { + enum ocf_exitcode rc; + /*! The lrmd status returned for exec_complete events */ + int op_status; +- /*! exit failure reason string from resource agent operation */ +- const char *exit_reason; + /*! stdout from resource agent operation */ + const char *output; + /*! Timestamp of when op ran */ +@@ -226,6 +224,9 @@ typedef struct lrmd_event_data_s { + * to the proper client. */ + const char *remote_nodename; + ++ /*! exit failure reason string from resource agent operation */ ++ const char *exit_reason; ++ + } lrmd_event_data_t; + + lrmd_event_data_t *lrmd_copy_event(lrmd_event_data_t * event); +diff --git a/include/crm/pengine/status.h b/include/crm/pengine/status.h +index 4bfa3fe..4214959 100644 +--- a/include/crm/pengine/status.h ++++ b/include/crm/pengine/status.h +@@ -137,10 +137,6 @@ struct node_shared_s { + gboolean shutdown; + gboolean expected_up; + gboolean is_dc; +- gboolean rsc_discovery_enabled; +- +- gboolean remote_requires_reset; +- gboolean remote_was_fenced; + + int num_resources; + GListPtr running_rsc; /* resource_t* */ +@@ -157,14 +153,17 @@ struct node_shared_s { + GHashTable *digest_cache; + + gboolean maintenance; ++ gboolean rsc_discovery_enabled; ++ gboolean remote_requires_reset; ++ gboolean remote_was_fenced; + }; + + struct node_s { + int weight; + gboolean fixed; +- int rsc_discover_mode; + int count; + struct node_shared_s *details; ++ int rsc_discover_mode; + }; + + # include +@@ -262,7 +261,6 @@ struct resource_s { + int migration_threshold; + + gboolean is_remote_node; +- gboolean exclusive_discover; + + unsigned long long flags; + +@@ -296,6 +294,7 @@ struct resource_s { + char *pending_task; + + const char *isolation_wrapper; ++ gboolean exclusive_discover; + }; + + struct pe_action_s { +diff --git a/lib/cib/cib_ops.c b/lib/cib/cib_ops.c +index 5f73559..8966ae2 100644 +--- a/lib/cib/cib_ops.c ++++ b/lib/cib/cib_ops.c +@@ -373,7 +373,10 @@ cib_process_modify(const char *op, int options, const char *section, xmlNode * r + + for (lpc = 0; lpc < max; lpc++) { + xmlNode *match = getXpathResult(xpathObj, lpc); +- crm_debug("Destroying %s", (char *)xmlGetNodePath(match)); ++ xmlChar *match_path = xmlGetNodePath(match); ++ ++ crm_debug("Destroying %s", match_path); ++ free(match_path); + free_xml(match); + } + +diff --git a/lib/cib/cib_utils.c b/lib/cib/cib_utils.c +index 28b8e81..d321517 100644 +--- a/lib/cib/cib_utils.c ++++ b/lib/cib/cib_utils.c +@@ -533,7 +533,7 @@ cib_perform_op(const char *op, int call_options, cib_op_t * fn, gboolean is_quer + int current_schema = get_schema_version(schema); + + if (minimum_schema == 0) { +- minimum_schema = get_schema_version("pacemaker-1.1"); ++ minimum_schema = get_schema_version("pacemaker-1.2"); + } + + /* Does the CIB support the "update-*" attributes... */ +diff --git a/lib/cluster/membership.c b/lib/cluster/membership.c +index 28f41cb..b7958eb 100644 +--- a/lib/cluster/membership.c ++++ b/lib/cluster/membership.c +@@ -734,6 +734,14 @@ crm_update_peer_proc(const char *source, crm_node_t * node, uint32_t flag, const + if (crm_status_callback) { + crm_status_callback(crm_status_processes, node, &last); + } ++ ++ /* The client callback shouldn't touch the peer caches, ++ * but as a safety net, bail if the peer cache was destroyed. ++ */ ++ if (crm_peer_cache == NULL) { ++ return NULL; ++ } ++ + if (crm_autoreap) { + node = crm_update_peer_state(__FUNCTION__, node, + is_set(node->processes, crm_get_cluster_proc())? +diff --git a/lib/common/Makefile.am b/lib/common/Makefile.am +index f5c0766..a593f40 100644 +--- a/lib/common/Makefile.am ++++ b/lib/common/Makefile.am +@@ -37,7 +37,7 @@ if BUILD_CIBSECRETS + libcrmcommon_la_SOURCES += cib_secrets.c + endif + +-libcrmcommon_la_LDFLAGS = -version-info 8:0:5 ++libcrmcommon_la_LDFLAGS = -version-info 7:0:4 + libcrmcommon_la_LIBADD = @LIBADD_DL@ $(GNUTLSLIBS) + libcrmcommon_la_SOURCES += $(top_builddir)/lib/gnu/md5.c + +diff --git a/lib/common/xml.c b/lib/common/xml.c +index e272049..8eed245 100644 +--- a/lib/common/xml.c ++++ b/lib/common/xml.c +@@ -3430,12 +3430,18 @@ dump_xml_attr(xmlAttrPtr attr, int options, char **buffer, int *offset, int *max + { + char *p_value = NULL; + const char *p_name = NULL; ++ xml_private_t *p = NULL; + + CRM_ASSERT(buffer != NULL); + if (attr == NULL || attr->children == NULL) { + return; + } + ++ p = attr->_private; ++ if (p && is_set(p->flags, xpf_deleted)) { ++ return; ++ } ++ + p_name = (const char *)attr->name; + p_value = crm_xml_escape((const char *)attr->children->content); + buffer_print(*buffer, *max, *offset, " %s=\"%s\"", p_name, p_value); +@@ -3812,6 +3818,10 @@ dump_xml_comment(xmlNode * data, int options, char **buffer, int *offset, int *m + void + crm_xml_dump(xmlNode * data, int options, char **buffer, int *offset, int *max, int depth) + { ++ if(data == NULL) { ++ *offset = 0; ++ *max = 0; ++ } + #if 0 + if (is_not_set(options, xml_log_option_filtered)) { + /* Turning this code on also changes the PE tests for some reason +@@ -4564,6 +4574,8 @@ subtract_xml_object(xmlNode * parent, xmlNode * left, xmlNode * right, + /* changes to name/value pairs */ + for (xIter = crm_first_attr(left); xIter != NULL; xIter = xIter->next) { + const char *prop_name = (const char *)xIter->name; ++ xmlAttrPtr right_attr = NULL; ++ xml_private_t *p = NULL; + + if (strcmp(prop_name, XML_ATTR_ID) == 0) { + continue; +@@ -4582,8 +4594,13 @@ subtract_xml_object(xmlNode * parent, xmlNode * left, xmlNode * right, + continue; + } + ++ right_attr = xmlHasProp(right, (const xmlChar *)prop_name); ++ if (right_attr) { ++ p = right_attr->_private; ++ } ++ + right_val = crm_element_value(right, prop_name); +- if (right_val == NULL) { ++ if (right_val == NULL || (p && is_set(p->flags, xpf_deleted))) { + /* new */ + *changed = TRUE; + if (full) { +diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c +index 80f0064..67114c2 100644 +--- a/lib/fencing/st_client.c ++++ b/lib/fencing/st_client.c +@@ -1100,57 +1100,62 @@ stonith_api_device_metadata(stonith_t * stonith, int call_options, const char *a + if (safe_str_eq(provider, "redhat")) { + stonith_action_t *action = stonith_action_create(agent, "metadata", NULL, 0, 5, NULL, NULL); + int exec_rc = stonith_action_execute(action, &rc, &buffer); ++ xmlNode *xml = NULL; ++ xmlNode *actions = NULL; ++ xmlXPathObject *xpathObj = NULL; + + if (exec_rc < 0 || rc != 0 || buffer == NULL) { ++ crm_warn("Could not obtain metadata for %s", agent); + crm_debug("Query failed: %d %d: %s", exec_rc, rc, crm_str(buffer)); + free(buffer); /* Just in case */ + return -EINVAL; ++ } + +- } else { +- +- xmlNode *xml = string2xml(buffer); +- xmlNode *actions = NULL; +- xmlXPathObject *xpathObj = NULL; ++ xml = string2xml(buffer); ++ if(xml == NULL) { ++ crm_warn("Metadata for %s is invalid", agent); ++ free(buffer); ++ return -EINVAL; ++ } + +- xpathObj = xpath_search(xml, "//actions"); +- if (numXpathResults(xpathObj) > 0) { +- actions = getXpathResult(xpathObj, 0); +- } ++ xpathObj = xpath_search(xml, "//actions"); ++ if (numXpathResults(xpathObj) > 0) { ++ actions = getXpathResult(xpathObj, 0); ++ } + +- freeXpathObject(xpathObj); ++ freeXpathObject(xpathObj); + +- /* Now fudge the metadata so that the start/stop actions appear */ +- xpathObj = xpath_search(xml, "//action[@name='stop']"); +- if (numXpathResults(xpathObj) <= 0) { +- xmlNode *tmp = NULL; ++ /* Now fudge the metadata so that the start/stop actions appear */ ++ xpathObj = xpath_search(xml, "//action[@name='stop']"); ++ if (numXpathResults(xpathObj) <= 0) { ++ xmlNode *tmp = NULL; + +- tmp = create_xml_node(actions, "action"); +- crm_xml_add(tmp, "name", "stop"); +- crm_xml_add(tmp, "timeout", "20s"); ++ tmp = create_xml_node(actions, "action"); ++ crm_xml_add(tmp, "name", "stop"); ++ crm_xml_add(tmp, "timeout", "20s"); + +- tmp = create_xml_node(actions, "action"); +- crm_xml_add(tmp, "name", "start"); +- crm_xml_add(tmp, "timeout", "20s"); +- } ++ tmp = create_xml_node(actions, "action"); ++ crm_xml_add(tmp, "name", "start"); ++ crm_xml_add(tmp, "timeout", "20s"); ++ } + +- freeXpathObject(xpathObj); ++ freeXpathObject(xpathObj); + +- /* Now fudge the metadata so that the port isn't required in the configuration */ +- xpathObj = xpath_search(xml, "//parameter[@name='port']"); +- if (numXpathResults(xpathObj) > 0) { +- /* We'll fill this in */ +- xmlNode *tmp = getXpathResult(xpathObj, 0); ++ /* Now fudge the metadata so that the port isn't required in the configuration */ ++ xpathObj = xpath_search(xml, "//parameter[@name='port']"); ++ if (numXpathResults(xpathObj) > 0) { ++ /* We'll fill this in */ ++ xmlNode *tmp = getXpathResult(xpathObj, 0); + +- crm_xml_add(tmp, "required", "0"); +- } ++ crm_xml_add(tmp, "required", "0"); ++ } + +- freeXpathObject(xpathObj); +- free(buffer); +- buffer = dump_xml_formatted(xml); +- free_xml(xml); +- if (!buffer) { +- return -EINVAL; +- } ++ freeXpathObject(xpathObj); ++ free(buffer); ++ buffer = dump_xml_formatted(xml); ++ free_xml(xml); ++ if (!buffer) { ++ return -EINVAL; + } + + } else { +@@ -1280,7 +1285,10 @@ stonith_api_query(stonith_t * stonith, int call_options, const char *target, + + CRM_LOG_ASSERT(match != NULL); + if(match != NULL) { +- crm_info("%s[%d] = %s", "//@agent", lpc, xmlGetNodePath(match)); ++ xmlChar *match_path = xmlGetNodePath(match); ++ ++ crm_info("%s[%d] = %s", "//@agent", lpc, match_path); ++ free(match_path); + *devices = stonith_key_value_add(*devices, NULL, crm_element_value(match, XML_ATTR_ID)); + } + } +diff --git a/lib/lrmd/Makefile.am b/lib/lrmd/Makefile.am +index e98d1e5..f961ae1 100644 +--- a/lib/lrmd/Makefile.am ++++ b/lib/lrmd/Makefile.am +@@ -25,7 +25,7 @@ AM_CPPFLAGS = -I$(top_builddir)/include -I$(top_srcdir)/include \ + lib_LTLIBRARIES = liblrmd.la + + liblrmd_la_SOURCES = lrmd_client.c proxy_common.c +-liblrmd_la_LDFLAGS = -version-info 3:0:0 ++liblrmd_la_LDFLAGS = -version-info 3:0:2 + liblrmd_la_LIBADD = $(top_builddir)/lib/common/libcrmcommon.la \ + $(top_builddir)/lib/services/libcrmservice.la \ + $(top_builddir)/lib/fencing/libstonithd.la +diff --git a/lib/pengine/Makefile.am b/lib/pengine/Makefile.am +index 29b7206..78da075 100644 +--- a/lib/pengine/Makefile.am ++++ b/lib/pengine/Makefile.am +@@ -30,7 +30,7 @@ libpe_rules_la_LDFLAGS = -version-info 2:4:0 + libpe_rules_la_SOURCES = rules.c common.c + libpe_rules_la_LIBADD = $(top_builddir)/lib/common/libcrmcommon.la + +-libpe_status_la_LDFLAGS = -version-info 8:0:0 ++libpe_status_la_LDFLAGS = -version-info 8:0:4 + libpe_status_la_SOURCES = status.c unpack.c utils.c complex.c native.c group.c clone.c rules.c common.c + libpe_status_la_LIBADD = @CURSESLIBS@ $(top_builddir)/lib/common/libcrmcommon.la + +diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c +index 73c44a8..106c674 100644 +--- a/lib/pengine/unpack.c ++++ b/lib/pengine/unpack.c +@@ -2834,8 +2834,9 @@ static bool check_operation_expiry(resource_t *rsc, node_t *node, int rc, xmlNod + + node_t *remote_node = pe_find_node(data_set->nodes, rsc->id); + if (remote_node && remote_node->details->remote_was_fenced == 0) { +- +- crm_info("Waiting to clear monitor failure for remote node %s until fencing has occured", rsc->id); ++ if (strstr(ID(xml_op), "last_failure")) { ++ crm_info("Waiting to clear monitor failure for remote node %s until fencing has occured", rsc->id); ++ } + /* disabling failure timeout for this operation because we believe + * fencing of the remote node should occur first. */ + failure_timeout = 0; +@@ -2866,6 +2867,9 @@ static bool check_operation_expiry(resource_t *rsc, node_t *node, int rc, xmlNod + } else { + expired = FALSE; + } ++ } else if (rsc->remote_reconnect_interval && strstr(ID(xml_op), "last_failure")) { ++ /* always clear last failure when reconnect interval is set */ ++ clear_failcount = 1; + } + } + +diff --git a/lib/services/pcmk-dbus.h b/lib/services/pcmk-dbus.h +index afb8a2a..b9a713b 100644 +--- a/lib/services/pcmk-dbus.h ++++ b/lib/services/pcmk-dbus.h +@@ -1,3 +1,7 @@ ++#ifndef DBUS_TIMEOUT_USE_DEFAULT ++# define DBUS_TIMEOUT_USE_DEFAULT -1 ++#endif ++ + DBusConnection *pcmk_dbus_connect(void); + void pcmk_dbus_connection_setup_with_select(DBusConnection *c); + void pcmk_dbus_disconnect(DBusConnection *connection); +diff --git a/lrmd/lrmd.c b/lrmd/lrmd.c +index bd4d33e..0cf98cc 100644 +--- a/lrmd/lrmd.c ++++ b/lrmd/lrmd.c +@@ -219,6 +219,7 @@ free_lrmd_cmd(lrmd_cmd_t * cmd) + } + free(cmd->origin); + free(cmd->action); ++ free(cmd->real_action); + free(cmd->userdata_str); + free(cmd->rsc_id); + free(cmd->output); +diff --git a/pacemaker.spec.in b/pacemaker.spec.in +index 0e3200f..2dfb4a6 100644 +--- a/pacemaker.spec.in ++++ b/pacemaker.spec.in +@@ -54,7 +54,7 @@ + + Name: pacemaker + Summary: Scalable High-Availability cluster resource manager +-Version: 1.1.11 ++Version: 1.1.13 + Release: %{pcmk_release}%{?dist} + License: GPLv2+ and LGPLv2+ + Url: http://www.clusterlabs.org +diff --git a/pengine/Makefile.am b/pengine/Makefile.am +index d14d911..31532cf 100644 +--- a/pengine/Makefile.am ++++ b/pengine/Makefile.am +@@ -61,7 +61,7 @@ endif + noinst_HEADERS = allocate.h utils.h pengine.h + #utils.h pengine.h + +-libpengine_la_LDFLAGS = -version-info 8:0:0 ++libpengine_la_LDFLAGS = -version-info 8:0:4 + # -L$(top_builddir)/lib/pils -lpils -export-dynamic -module -avoid-version + libpengine_la_SOURCES = pengine.c allocate.c utils.c constraints.c + libpengine_la_SOURCES += native.c group.c clone.c master.c graph.c utilization.c +diff --git a/pengine/allocate.c b/pengine/allocate.c +index 4b6fca1..68cafd4 100644 +--- a/pengine/allocate.c ++++ b/pengine/allocate.c +@@ -1681,10 +1681,38 @@ apply_remote_node_ordering(pe_working_set_t *data_set) + resource_t *remote_rsc = NULL; + resource_t *container = NULL; + ++ if (action->rsc == NULL) { ++ continue; ++ } ++ ++ /* Special case. */ ++ if (action->rsc && ++ action->rsc->is_remote_node && ++ safe_str_eq(action->task, CRM_OP_CLEAR_FAILCOUNT)) { ++ ++ /* if we are clearing the failcount of an actual remote node connect ++ * resource, then make sure this happens before allowing the connection ++ * to start if we are planning on starting the connection during this ++ * transition */ ++ custom_action_order(action->rsc, ++ NULL, ++ action, ++ action->rsc, ++ generate_op_key(action->rsc->id, RSC_START, 0), ++ NULL, ++ pe_order_optional, ++ data_set); ++ ++ continue; ++ } ++ ++ /* detect if the action occurs on a remote node. if so create ++ * ordering constraints that guarantee the action occurs while ++ * the remote node is active (after start, before stop...) things ++ * like that */ + if (action->node == NULL || + is_remote_node(action->node) == FALSE || + action->node->details->remote_rsc == NULL || +- action->rsc == NULL || + is_set(action->flags, pe_action_pseudo)) { + continue; + } +diff --git a/pengine/regression.sh b/pengine/regression.sh +index d57da17..d184798 100755 +--- a/pengine/regression.sh ++++ b/pengine/regression.sh +@@ -566,6 +566,8 @@ do_test colocated-utilization-primitive-2 "Colocated Utilization - Choose the mo + do_test colocated-utilization-group "Colocated Utilization - Group" + do_test colocated-utilization-clone "Colocated Utilization - Clone" + ++do_test utilization-check-allowed-nodes "Only check the capacities of the nodes that can run the resource" ++ + echo "" + do_test reprobe-target_rc "Ensure correct target_rc for reprobe of inactive resources" + do_test node-maintenance-1 "cl#5128 - Node maintenance" +diff --git a/pengine/test10/utilization-check-allowed-nodes.dot b/pengine/test10/utilization-check-allowed-nodes.dot +new file mode 100644 +index 0000000..d09efbc +--- /dev/null ++++ b/pengine/test10/utilization-check-allowed-nodes.dot +@@ -0,0 +1,19 @@ ++digraph "g" { ++"load_stopped_node1 node1" [ style=bold color="green" fontcolor="orange"] ++"load_stopped_node2 node2" [ style=bold color="green" fontcolor="orange"] ++"probe_complete node1" -> "probe_complete" [ style = bold] ++"probe_complete node1" [ style=bold color="green" fontcolor="black"] ++"probe_complete node2" -> "probe_complete" [ style = bold] ++"probe_complete node2" [ style=bold color="green" fontcolor="black"] ++"probe_complete" -> "rsc1_start_0 node2" [ style = bold] ++"probe_complete" [ style=bold color="green" fontcolor="orange"] ++"rsc1_monitor_0 node1" -> "probe_complete node1" [ style = bold] ++"rsc1_monitor_0 node1" [ style=bold color="green" fontcolor="black"] ++"rsc1_monitor_0 node2" -> "probe_complete node2" [ style = bold] ++"rsc1_monitor_0 node2" [ style=bold color="green" fontcolor="black"] ++"rsc1_start_0 node2" [ style=bold color="green" fontcolor="black"] ++"rsc2_monitor_0 node1" -> "probe_complete node1" [ style = bold] ++"rsc2_monitor_0 node1" [ style=bold color="green" fontcolor="black"] ++"rsc2_monitor_0 node2" -> "probe_complete node2" [ style = bold] ++"rsc2_monitor_0 node2" [ style=bold color="green" fontcolor="black"] ++} +diff --git a/pengine/test10/utilization-check-allowed-nodes.exp b/pengine/test10/utilization-check-allowed-nodes.exp +new file mode 100644 +index 0000000..134ccb3 +--- /dev/null ++++ b/pengine/test10/utilization-check-allowed-nodes.exp +@@ -0,0 +1,112 @@ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ +diff --git a/pengine/test10/utilization-check-allowed-nodes.scores b/pengine/test10/utilization-check-allowed-nodes.scores +new file mode 100644 +index 0000000..26887e2 +--- /dev/null ++++ b/pengine/test10/utilization-check-allowed-nodes.scores +@@ -0,0 +1,5 @@ ++Allocation scores: ++native_color: rsc1 allocation score on node1: -INFINITY ++native_color: rsc1 allocation score on node2: 0 ++native_color: rsc2 allocation score on node1: -INFINITY ++native_color: rsc2 allocation score on node2: 0 +diff --git a/pengine/test10/utilization-check-allowed-nodes.summary b/pengine/test10/utilization-check-allowed-nodes.summary +new file mode 100644 +index 0000000..12bf19a +--- /dev/null ++++ b/pengine/test10/utilization-check-allowed-nodes.summary +@@ -0,0 +1,26 @@ ++ ++Current cluster status: ++Online: [ node1 node2 ] ++ ++ rsc1 (ocf::pacemaker:Dummy): Stopped ++ rsc2 (ocf::pacemaker:Dummy): Stopped ++ ++Transition Summary: ++ * Start rsc1 (node2) ++ ++Executing cluster transition: ++ * Resource action: rsc1 monitor on node2 ++ * Resource action: rsc1 monitor on node1 ++ * Resource action: rsc2 monitor on node2 ++ * Resource action: rsc2 monitor on node1 ++ * Pseudo action: probe_complete ++ * Pseudo action: load_stopped_node1 ++ * Pseudo action: load_stopped_node2 ++ * Resource action: rsc1 start on node2 ++ ++Revised cluster status: ++Online: [ node1 node2 ] ++ ++ rsc1 (ocf::pacemaker:Dummy): Started node2 ++ rsc2 (ocf::pacemaker:Dummy): Stopped ++ +diff --git a/pengine/test10/utilization-check-allowed-nodes.xml b/pengine/test10/utilization-check-allowed-nodes.xml +new file mode 100644 +index 0000000..39cf51f +--- /dev/null ++++ b/pengine/test10/utilization-check-allowed-nodes.xml +@@ -0,0 +1,39 @@ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ +diff --git a/pengine/utilization.c b/pengine/utilization.c +index 982fcc9..db41b21 100644 +--- a/pengine/utilization.c ++++ b/pengine/utilization.c +@@ -344,9 +344,10 @@ process_utilization(resource_t * rsc, node_t ** prefer, pe_working_set_t * data_ + int alloc_details = scores_log_level + 1; + + if (safe_str_neq(data_set->placement_strategy, "default")) { +- GListPtr gIter = NULL; ++ GHashTableIter iter; + GListPtr colocated_rscs = NULL; + gboolean any_capable = FALSE; ++ node_t *node = NULL; + + colocated_rscs = find_colocated_rscs(colocated_rscs, rsc, rsc); + if (colocated_rscs) { +@@ -356,8 +357,11 @@ process_utilization(resource_t * rsc, node_t ** prefer, pe_working_set_t * data_ + + unallocated_utilization = sum_unallocated_utilization(rsc, colocated_rscs); + +- for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) { +- node_t *node = (node_t *) gIter->data; ++ g_hash_table_iter_init(&iter, rsc->allowed_nodes); ++ while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) { ++ if (can_run_resources(node) == FALSE || node->weight < 0) { ++ continue; ++ } + + if (have_enough_capacity(node, rscs_id, unallocated_utilization)) { + any_capable = TRUE; +@@ -371,8 +375,11 @@ process_utilization(resource_t * rsc, node_t ** prefer, pe_working_set_t * data_ + } + + if (any_capable) { +- for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) { +- node_t *node = (node_t *) gIter->data; ++ g_hash_table_iter_init(&iter, rsc->allowed_nodes); ++ while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) { ++ if (can_run_resources(node) == FALSE || node->weight < 0) { ++ continue; ++ } + + if (have_enough_capacity(node, rscs_id, unallocated_utilization) == FALSE) { + pe_rsc_debug(rsc, "Resource %s and its colocated resources cannot be allocated to node %s: no enough capacity", +@@ -394,8 +401,11 @@ process_utilization(resource_t * rsc, node_t ** prefer, pe_working_set_t * data_ + } + + if (any_capable == FALSE) { +- for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) { +- node_t *node = (node_t *) gIter->data; ++ g_hash_table_iter_init(&iter, rsc->allowed_nodes); ++ while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) { ++ if (can_run_resources(node) == FALSE || node->weight < 0) { ++ continue; ++ } + + if (have_enough_capacity(node, rsc->id, rsc->utilization) == FALSE) { + pe_rsc_debug(rsc, "Resource %s cannot be allocated to node %s: no enough capacity", +diff --git a/tools/fake_transition.c b/tools/fake_transition.c +index e8c37f7..fe5de95 100644 +--- a/tools/fake_transition.c ++++ b/tools/fake_transition.c +@@ -65,11 +65,14 @@ inject_transient_attr(xmlNode * cib_node, const char *name, const char *value) + xmlNode *attrs = NULL; + xmlNode *container = NULL; + xmlNode *nvp = NULL; ++ xmlChar *node_path; + const char *node_uuid = ID(cib_node); + char *nvp_id = crm_concat(name, node_uuid, '-'); + +- quiet_log("Injecting attribute %s=%s into %s '%s'", name, value, xmlGetNodePath(cib_node), ++ node_path = xmlGetNodePath(cib_node); ++ quiet_log("Injecting attribute %s=%s into %s '%s'", name, value, node_path, + ID(cib_node)); ++ free(node_path); + + attrs = first_named_child(cib_node, XML_TAG_TRANSIENT_NODEATTRS); + if (attrs == NULL) { +diff --git a/valgrind-pcmk.suppressions b/valgrind-pcmk.suppressions +index e7caa55..2e382df 100644 +--- a/valgrind-pcmk.suppressions ++++ b/valgrind-pcmk.suppressions +@@ -20,6 +20,15 @@ + } + + { ++ Another bash leak ++ Memcheck:Leak ++ fun:malloc ++ fun:xmalloc ++ fun:set_default_locale ++ fun:main ++} ++ ++{ + Ignore option parsing + Memcheck:Leak + fun:realloc +@@ -294,4 +303,4 @@ + obj:*/libgobject-* + fun:call_init.part.0 + fun:_dl_init +-} +\ No newline at end of file ++} +diff --git a/version.m4 b/version.m4 +index 22faf65..3d5e96b 100644 +--- a/version.m4 ++++ b/version.m4 +@@ -1 +1 @@ +-m4_define([VERSION_NUMBER], [1.1.12]) ++m4_define([VERSION_NUMBER], [1.1.13]) diff --git a/pacemaker-781a388.patch b/pacemaker-781a388.patch deleted file mode 100644 index 2fe05ce..0000000 --- a/pacemaker-781a388.patch +++ /dev/null @@ -1,39349 +0,0 @@ -diff --git a/ChangeLog b/ChangeLog -index fa9cea4..180c363 100644 ---- a/ChangeLog -+++ b/ChangeLog -@@ -1,3 +1,102 @@ -+* Fri Mar 08 2013 Andrew Beekhof Pacemaker-1.1.9-1 -+- Update source tarball to revision: 7e42d77 -+- Statistics: -+ Changesets: 731 -+ Diff: 1301 files changed, 92909 insertions(+), 57455 deletions(-) -+ -+- Features added in Pacemaker-1.1.9 -+ + corosync: Allow cman and corosync 2.0 nodes to use a name other than uname() -+ + corosync: Use queues to avoid blocking when sending CPG messages -+ + ipc: Compress messages that exceed the configured IPC message limit -+ + ipc: Use queues to prevent slow clients from blocking the server -+ + ipc: Use shared memory by default -+ + lrmd: Support nagios remote monitoring -+ + lrmd: Pacemaker Remote Daemon for extending pacemaker functionality outside corosync cluster. -+ + pengine: Check for master/slave resources that are not OCF agents -+ + pengine: Support a 'requires' resource meta-attribute for controlling whether it needs quorum, fencing or nothing -+ + pengine: Support for resource container -+ + pengine: Support resources that require unfencing before start -+ -+- Changes since Pacemaker-1.1.8 -+ + attrd: Correctly handle deletion of non-existant attributes -+ + Bug cl#5135 - Improved detection of the active cluster type -+ + Bug rhbz#913093 - Use crm_node instead of uname -+ + cib: Avoid use-after-free by correctly support cib_no_children for non-xpath queries -+ + cib: Correctly process XML diff's involving element removal -+ + cib: Performance improvements for non-DC nodes -+ + cib: Prevent error message by correctly handling peer replies -+ + cib: Prevent ordering changes when applying xml diffs -+ + cib: Remove text nodes from cib replace operations -+ + cluster: Detect node name collisions in corosync -+ + cluster: Preserve corosync membership state when matching node name/id entries -+ + cman: Force fenced to terminate on shutdown -+ + cman: Ignore qdisk 'nodes' -+ + core: Drop per-user core directories -+ + corosync: Avoid errors when closing failed connections -+ + corosync: Ensure peer state is preserved when matching names to nodeids -+ + corosync: Clean up CMAP connections after querying node name -+ + corosync: Correctly detect corosync 2.0 clusters even if we don't have permission to access it -+ + crmd: Bug cl#5144 - Do not updated the expected status of failed nodes -+ + crmd: Correctly determin if cluster disconnection was abnormal -+ + crmd: Correctly relay messages for remote clients (bnc#805626, bnc#804704) -+ + crmd: Correctly stall the FSA when waiting for additional inputs -+ + crmd: Detect and recover when we are evicted from CPG -+ + crmd: Differentiate between a node that is up and coming up in peer_update_callback() -+ + crmd: Have cib operation timeouts scale with node count -+ + crmd: Improved continue/wait logic in do_dc_join_finalize() -+ + crmd: Prevent election storms caused by getrusage() values being too close -+ + crmd: Prevent timeouts when performing pacemaker level membership negotiation -+ + crmd: Prevent use-after-free of fsa_message_queue during exit -+ + crmd: Store all current actions when stalling the FSA -+ + crm_mon: Do not try to render a blank cib and indicate the previous output is now stale -+ + crm_mon: Fixes crm_mon crash when using snmp traps. -+ + crm_mon: Look for the correct error codes when applying configuration updates -+ + crm_report: Ensure policy engine logs are found -+ + crm_report: Fix node list detection -+ + crm_resource: Have crm_resource generate a valid transition key when sending resource commands to the crmd -+ + date/time: Bug cl#5118 - Correctly convert seconds-since-epoch to the current time -+ + fencing: Attempt to provide more information that just 'generic error' for failed actions -+ + fencing: Correctly record completed but previously unknown fencing operations -+ + fencing: Correctly terminate when all device options have been exhausted -+ + fencing: cov#739453 - String not null terminated -+ + fencing: Do not merge new fencing requests with stale ones from dead nodes -+ + fencing: Do not start fencing until entire device topology is found or query results timeout. -+ + fencing: Do not wait for the query timeout if all replies have arrived -+ + fencing: Fix passing of parameters from CMAN containing '=' -+ + fencing: Fix non-comparison when sorting devices by priority -+ + fencing: On failure, only try a topology device once from the remote level. -+ + fencing: Only try peers for non-topology based operations once -+ + fencing: Retry stonith device for duration of action's timeout period. -+ + heartbeat: Remove incorrect assert during cluster connect -+ + ipc: Bug cl#5110 - Prevent 100% CPU usage when looking for synchronous replies -+ + ipc: Use 50k as the default compression threshold -+ + legacy: Prevent assertion failure on routing ais messages (bnc#805626) -+ + legacy: Re-enable logging from the pacemaker plugin -+ + legacy: Relax the 'active' check for plugin based clusters to avoid false negatives -+ + legacy: Skip peer process check if the process list is empty in crm_is_corosync_peer_active() -+ + mcp: Only define HA_DEBUGLOG to avoid agent calls to ocf_log printing everything twice -+ + mcp: Re-attach to existing pacemaker components when mcp fails -+ + pengine: Any location constraint for the slave role applies to all roles -+ + pengine: Avoid leaking memory when cleaning up failcounts and using containers -+ + pengine: Bug cl#5101 - Ensure stop order is preserved for partially active groups -+ + pengine: Bug cl#5140 - Allow set members to be stopped when the subseqent set has require-all=false -+ + pengine: Bug cl#5143 - Prevent shuffling of anonymous master/slave instances -+ + pengine: Bug rhbz#880249 - Ensure orphan masters are demoted before being stopped -+ + pengine: Bug rhbz#880249 - Teach the PE how to recover masters into primitives -+ + pengine: cl#5025 - Automatically clear failcount for start/monitor failures after resource parameters change -+ + pengine: cl#5099 - Probe operation uses the timeout value from the minimum interval monitor by default (#bnc776386) -+ + pengine: cl#5111 - When clone/master child rsc has on-fail=stop, insure all children stop on failure. -+ + pengine: cl#5142 - Do not delete orphaned children of an anonymous clone -+ + pengine: Correctly unpack active anonymous clones -+ + pengine: Ensure previous migrations are closed out before attempting another one -+ + pengine: Introducing the whitebox container resources feature -+ + pengine: Prevent double-free for cloned primitive from template -+ + pengine: Process rsc_ticket dependencies earlier for correctly allocating resources (bnc#802307) -+ + pengine: Remove special cases for fencing resources -+ + pengine: rhbz#902459 - Remove rsc node status for orphan resources -+ + systemd: Gracefully handle unexpected DBus return types -+ + Replace the use of the insecure mktemp(3) with mkstemp(3) -+ - * Thu Sep 20 2012 Andrew Beekhof Pacemaker-1.1.8-1 - - - Update source tarball to revision: 1a5341f -@@ -650,7 +749,7 @@ - - No longer remove RPATH data, it prevents us finding libperl.so and no other - libraries were being hardcoded - - Compile in support for heartbeat --- Conditionally add heartbeat-devel and corosynclib-devel to the -devel requirements -+- Conditionally add heartbeat-devel and corosynclib-devel to the -devel requirements - depending on which stacks are supported - - * Mon Aug 17 2009 Andrew Beekhof - 1.0.5-1 -@@ -682,7 +781,7 @@ - * Fri Jul 24 2009 Andrew Beekhof - 1.0.4-3 - - Initial Fedora checkin - - Include an AUTHORS and license file in each package --- Change the library package name to pacemaker-libs to be more -+- Change the library package name to pacemaker-libs to be more - Fedora compliant - - Remove execute permissions from xml related files - - Reference the new cluster-glue devel package name -diff --git a/Makefile.am b/Makefile.am -index 4f742e4..8cd9342 100644 ---- a/Makefile.am -+++ b/Makefile.am -@@ -34,6 +34,7 @@ ACLOCAL_AMFLAGS = -I m4 - - testdir = $(datadir)/$(PACKAGE)/tests/ - test_SCRIPTS = coverage.sh BasicSanity.sh -+test_DATA = valgrind-pcmk.suppressions - - # Scratch file for ad-hoc testing - scratch_SOURCES = scratch.c -diff --git a/cib/Makefile.am b/cib/Makefile.am -index 12493ee..220451d 100644 ---- a/cib/Makefile.am -+++ b/cib/Makefile.am -@@ -5,12 +5,12 @@ - # modify it under the terms of the GNU General Public License - # as published by the Free Software Foundation; either version 2 - # of the License, or (at your option) any later version. --# -+# - # This program is distributed in the hope that it will be useful, - # but WITHOUT ANY WARRANTY; without even the implied warranty of - # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - # GNU General Public License for more details. --# -+# - # You should have received a copy of the GNU General Public License - # along with this program; if not, write to the Free Software - # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -@@ -33,10 +33,11 @@ COMMONLIBS = $(top_builddir)/lib/common/libcrmcommon.la \ - halib_PROGRAMS = cib cibmon - - if BUILD_HELP --man8_MANS = -+man8_MANS = - %.8: % - echo Creating $@ -- chmod a+x $< -+ chmod a+x $(top_builddir)/cib/$< -+ $(top_builddir)/cib/$< --help - $(HELP2MAN) --output $@ --no-info --section 8 --name "Part of the Pacemaker cluster resource manager" $(top_builddir)/cib/$< - endif - -@@ -50,7 +51,7 @@ cib_LDADD = $(top_builddir)/lib/cluster/libcrmcluster.la \ - $(COMMONLIBS) $(CRYPTOLIB) $(CLUSTERLIBS) - - cibmon_SOURCES = cibmon.c --cibmon_LDADD = $(COMMONLIBS) -+cibmon_LDADD = $(COMMONLIBS) - - clean-generic: - rm -f *.log *.debug *.xml *~ -diff --git a/cib/callbacks.c b/cib/callbacks.c -index 07b0d45..754e218 100644 ---- a/cib/callbacks.c -+++ b/cib/callbacks.c -@@ -235,9 +235,14 @@ cib_common_callback(qb_ipcs_connection_t * c, void *data, size_t size, gboolean - } - - crm_trace("Inbound: %.200s", data); -- if (op_request == NULL || cib_client == NULL) { -+ if (op_request == NULL) { -+ crm_trace("Invalid message from %p", c); - crm_ipcs_send_ack(cib_client, id, "nack", __FUNCTION__, __LINE__); - return 0; -+ -+ } else if(cib_client == NULL) { -+ crm_trace("Invalid client %p", c); -+ return 0; - } - - if (is_set(call_options, cib_sync_call)) { -@@ -692,12 +697,28 @@ cib_process_request(xmlNode * request, gboolean force_synchronous, gboolean priv - } - - if (cib_status != pcmk_ok) { -+ const char *call = crm_element_value(request, F_CIB_CALLID); -+ - rc = cib_status; - crm_err("Operation ignored, cluster configuration is invalid." - " Please repair and restart: %s", pcmk_strerror(cib_status)); -- op_reply = cib_construct_reply(request, the_cib, cib_status); -+ -+ op_reply = create_xml_node(NULL, "cib-reply"); -+ crm_xml_add(op_reply, F_TYPE, T_CIB); -+ crm_xml_add(op_reply, F_CIB_OPERATION, op); -+ crm_xml_add(op_reply, F_CIB_CALLID, call); -+ crm_xml_add(op_reply, F_CIB_CLIENTID, client_id); -+ crm_xml_add_int(op_reply, F_CIB_CALLOPTS, call_options); -+ crm_xml_add_int(op_reply, F_CIB_RC, rc); -+ -+ crm_trace("Attaching reply output"); -+ add_message_xml(op_reply, F_CIB_CALLDATA, the_cib); -+ -+ crm_log_xml_explicit(op_reply, "cib:reply"); - - } else if (process) { -+ time_t finished = 0; -+ - int now = time(NULL); - int level = LOG_INFO; - const char *section = crm_element_value(request, F_CIB_SECTION); -@@ -744,7 +765,9 @@ cib_process_request(xmlNode * request, gboolean force_synchronous, gboolean priv - the_cib ? crm_element_value(the_cib, XML_ATTR_GENERATION) : "0", - the_cib ? crm_element_value(the_cib, XML_ATTR_NUMUPDATES) : "0"); - -- if ((now + 1) < time(NULL)) { -+ finished = time(NULL); -+ if (finished - now > 3) { -+ crm_trace("%s operation took %ds to complete", op, finished - now); - crm_write_blackbox(0, NULL); - } - -@@ -817,41 +840,6 @@ cib_process_request(xmlNode * request, gboolean force_synchronous, gboolean priv - return; - } - --xmlNode * --cib_construct_reply(xmlNode * request, xmlNode * output, int rc) --{ -- int lpc = 0; -- xmlNode *reply = NULL; -- const char *name = NULL; -- const char *value = NULL; -- -- const char *names[] = { -- F_CIB_OPERATION, -- F_CIB_CALLID, -- F_CIB_CLIENTID, -- F_CIB_CALLOPTS -- }; -- static int max = DIMOF(names); -- -- crm_trace("Creating a basic reply"); -- reply = create_xml_node(NULL, "cib-reply"); -- crm_xml_add(reply, F_TYPE, T_CIB); -- -- for (lpc = 0; lpc < max; lpc++) { -- name = names[lpc]; -- value = crm_element_value(request, name); -- crm_xml_add(reply, name, value); -- } -- -- crm_xml_add_int(reply, F_CIB_RC, rc); -- -- if (output != NULL) { -- crm_trace("Attaching reply output"); -- add_message_xml(reply, F_CIB_CALLDATA, output); -- } -- return reply; --} -- - int - cib_process_command(xmlNode * request, xmlNode ** reply, xmlNode ** cib_diff, gboolean privileged) - { -@@ -870,6 +858,7 @@ cib_process_command(xmlNode * request, xmlNode ** reply, xmlNode ** cib_diff, gb - - const char *op = NULL; - const char *section = NULL; -+ const char *call_id = crm_element_value(request, F_CIB_CALLID); - - int rc = pcmk_ok; - int rc2 = pcmk_ok; -@@ -1034,9 +1023,9 @@ cib_process_command(xmlNode * request, xmlNode ** reply, xmlNode ** cib_diff, gb - } - - if ((call_options & cib_inhibit_notify) == 0) { -- const char *call_id = crm_element_value(request, F_CIB_CALLID); - const char *client = crm_element_value(request, F_CIB_CLIENTNAME); - -+ crm_trace("Sending notifications"); - #ifdef SUPPORT_POSTNOTIFY - cib_post_notify(call_options, op, input, rc, the_cib); - #endif -@@ -1070,9 +1059,25 @@ cib_process_command(xmlNode * request, xmlNode ** reply, xmlNode ** cib_diff, gb - - done: - if ((call_options & cib_discard_reply) == 0) { -- *reply = cib_construct_reply(request, output, rc); -+ const char *caller = crm_element_value(request, F_CIB_CLIENTID); -+ -+ *reply = create_xml_node(NULL, "cib-reply"); -+ crm_xml_add(*reply, F_TYPE, T_CIB); -+ crm_xml_add(*reply, F_CIB_OPERATION, op); -+ crm_xml_add(*reply, F_CIB_CALLID, call_id); -+ crm_xml_add(*reply, F_CIB_CLIENTID, caller); -+ crm_xml_add_int(*reply, F_CIB_CALLOPTS, call_options); -+ crm_xml_add_int(*reply, F_CIB_RC, rc); -+ -+ if (output != NULL) { -+ crm_trace("Attaching reply output"); -+ add_message_xml(*reply, F_CIB_CALLDATA, output); -+ } -+ - crm_log_xml_explicit(*reply, "cib:reply"); - } -+ -+ crm_trace("cleanup"); - #if ENABLE_ACL - if (filtered_current_cib != NULL) { - free_xml(filtered_current_cib); -@@ -1082,6 +1087,7 @@ cib_process_command(xmlNode * request, xmlNode ** reply, xmlNode ** cib_diff, gb - if (call_type >= 0) { - cib_op_cleanup(call_type, call_options, &input, &output); - } -+ crm_trace("done"); - return rc; - } - -@@ -1206,9 +1212,7 @@ cib_ccm_dispatch(gpointer user_data) - - /* eventually it might be nice to recover and reconnect... but until then... */ - crm_err("Exiting to recover from CCM connection failure"); -- crm_exit(2); -- -- return -1; -+ return crm_exit(ENOTCONN); - } - - int current_instance = 0; -@@ -1419,9 +1423,9 @@ terminate_cib(const char *caller, gboolean fast) - qb_ipcs_destroy(ipcs_shm); - - if (fast) { -- crm_exit(EX_USAGE); -+ crm_exit(EINVAL); - } else { -- crm_exit(EX_OK); -+ crm_exit(pcmk_ok); - } - } - } -diff --git a/cib/cibmon.c b/cib/cibmon.c -index f0e173b..fa12d26 100644 ---- a/cib/cibmon.c -+++ b/cib/cibmon.c -@@ -251,5 +251,5 @@ cibmon_diff(const char *event, xmlNode * msg) - void - cibmon_shutdown(int nsig) - { -- crm_exit(EX_OK); -+ crm_exit(pcmk_ok); - } -diff --git a/cib/common.c b/cib/common.c -index 3fd1b73..0d66857 100644 ---- a/cib/common.c -+++ b/cib/common.c -@@ -1,16 +1,16 @@ --/* -+/* - * Copyright (C) 2008 Andrew Beekhof -- * -+ * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. -- * -+ * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. -- * -+ * - * You should have received a copy of the GNU General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -@@ -219,11 +219,12 @@ cib_get_operation_id(const char *op, int *operation) - - operation_hash = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, g_hash_destroy_str); - for (lpc = 1; lpc < max_msg_types; lpc++) { -- /* coverity[returned_null] Ignore */ - int *value = malloc(sizeof(int)); - -- *value = lpc; -- g_hash_table_insert(operation_hash, (gpointer) cib_server_ops[lpc].operation, value); -+ if(value) { -+ *value = lpc; -+ g_hash_table_insert(operation_hash, (gpointer) cib_server_ops[lpc].operation, value); -+ } - } - } - -@@ -336,11 +337,13 @@ cib_op_can_run(int call_type, int call_options, gboolean privileged, gboolean gl - int - cib_op_prepare(int call_type, xmlNode * request, xmlNode ** input, const char **section) - { -+ crm_trace("Prepare %d", call_type); - return cib_server_ops[call_type].prepare(request, input, section); - } - - int - cib_op_cleanup(int call_type, int options, xmlNode ** input, xmlNode ** output) - { -+ crm_trace("Cleanup %d", call_type); - return cib_server_ops[call_type].cleanup(options, input, output); - } -diff --git a/cib/io.c b/cib/io.c -index 1fd020f..b94030f 100644 ---- a/cib/io.c -+++ b/cib/io.c -@@ -1,16 +1,16 @@ --/* -+/* - * Copyright (C) 2004 Andrew Beekhof -- * -+ * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. -- * -+ * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. -- * -+ * - * You should have received a copy of the GNU General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -@@ -533,19 +533,13 @@ activateCibXml(xmlNode * new_cib, gboolean to_disk, const char *op) - } - - static void --cib_diskwrite_complete(GPid pid, gint status, gpointer user_data) -+cib_diskwrite_complete(mainloop_child_t * p, pid_t pid, int core, int signo, int exitcode) - { -- int exitcode = -1; -- -- if (WIFSIGNALED(status)) { -- int signo = WTERMSIG(status); -- int core = WCOREDUMP(status); -- -+ if (signo) { - crm_notice("Disk write process terminated with signal %d (pid=%d, core=%d)", signo, pid, - core); - -- } else if (WIFEXITED(status)) { -- exitcode = WEXITSTATUS(status); -+ } else { - do_crm_log(exitcode == 0 ? LOG_TRACE : LOG_ERR, "Disk write process exited (pid=%d, rc=%d)", - pid, exitcode); - } -@@ -562,7 +556,7 @@ int - write_cib_contents(gpointer p) - { - int fd = -1; -- int exit_rc = EX_OK; -+ int exit_rc = pcmk_ok; - char *digest = NULL; - xmlNode *cib_status_root = NULL; - -@@ -608,7 +602,7 @@ write_cib_contents(gpointer p) - - if (pid) { - /* Parent */ -- g_child_watch_add(pid, cib_diskwrite_complete, NULL); -+ mainloop_child_add(pid, 0, "disk-writer", NULL, cib_diskwrite_complete); - if (bb_state == QB_LOG_STATE_ENABLED) { - /* Re-enable now that it it safe */ - qb_log_ctl(QB_LOG_BLACKBOX, QB_LOG_CONF_ENABLED, QB_TRUE); -@@ -619,9 +613,6 @@ write_cib_contents(gpointer p) - - /* A-synchronous write out after a fork() */ - -- /* Don't log anything unless strictly necessary */ -- set_crm_log_level(LOG_ERR); -- - /* In theory we can scribble on "the_cib" here and not affect the parent - * But lets be safe anyway - */ -@@ -645,7 +636,7 @@ write_cib_contents(gpointer p) - /* check the admin didnt modify it underneath us */ - if (validate_on_disk_cib(primary_file, NULL) == FALSE) { - crm_err("%s was manually modified while the cluster was active!", primary_file); -- exit_rc = 1; -+ exit_rc = pcmk_err_cib_modified; - goto cleanup; - } - -@@ -657,14 +648,14 @@ write_cib_contents(gpointer p) - - rc = link(primary_file, backup_file); - if (rc < 0) { -- exit_rc = 4; -+ exit_rc = pcmk_err_cib_backup; - crm_perror(LOG_ERR, "Cannot link %s to %s", primary_file, backup_file); - goto cleanup; - } - - rc = link(digest_file, backup_digest); - if (rc < 0 && errno != ENOENT) { -- exit_rc = 5; -+ exit_rc = pcmk_err_cib_backup; - crm_perror(LOG_ERR, "Cannot link %s to %s", digest_file, backup_digest); - goto cleanup; - } -@@ -696,9 +687,9 @@ write_cib_contents(gpointer p) - umask(S_IWGRP | S_IWOTH | S_IROTH); - - tmp_cib_fd = mkstemp(tmp_cib); -- if (write_xml_fd(cib_local, tmp_cib, tmp_cib_fd, FALSE) <= 0) { -+ if (tmp_cib_fd < 0 || write_xml_fd(cib_local, tmp_cib, tmp_cib_fd, FALSE) <= 0) { - crm_err("Changes couldn't be written to %s", tmp_cib); -- exit_rc = 2; -+ exit_rc = pcmk_err_cib_save; - goto cleanup; - } - -@@ -708,9 +699,9 @@ write_cib_contents(gpointer p) - admin_epoch ? admin_epoch : "0", epoch ? epoch : "0", digest); - - tmp_digest_fd = mkstemp(tmp_digest); -- if (write_cib_digest(cib_local, tmp_digest, tmp_digest_fd, digest) <= 0) { -+ if (tmp_digest_fd < 0 || write_cib_digest(cib_local, tmp_digest, tmp_digest_fd, digest) <= 0) { - crm_err("Digest couldn't be written to %s", tmp_digest); -- exit_rc = 3; -+ exit_rc = pcmk_err_cib_save; - goto cleanup; - } - crm_debug("Wrote digest %s to disk", digest); -diff --git a/cib/main.c b/cib/main.c -index 878aad6..6b56274 100644 ---- a/cib/main.c -+++ b/cib/main.c -@@ -478,13 +478,13 @@ cib_init(void) - - if (startCib("cib.xml") == FALSE) { - crm_crit("Cannot start CIB... terminating"); -- crm_exit(1); -+ crm_exit(ENODATA); - } - - if (stand_alone == FALSE) { - if (crm_cluster_connect(&crm_cluster) == FALSE) { - crm_crit("Cannot sign in to the cluster... terminating"); -- crm_exit(100); -+ crm_exit(DAEMON_RESPAWN_STOP); - } - cib_our_uname = crm_cluster.uname; - if (is_openais_cluster()) { -@@ -522,31 +522,24 @@ cib_init(void) - cib_our_uname = strdup("localhost"); - } - -- ipcs_ro = mainloop_add_ipc_server(cib_channel_ro, QB_IPC_NATIVE, &ipc_ro_callbacks); -- ipcs_rw = mainloop_add_ipc_server(cib_channel_rw, QB_IPC_NATIVE, &ipc_rw_callbacks); -- ipcs_shm = mainloop_add_ipc_server(cib_channel_shm, QB_IPC_SHM, &ipc_rw_callbacks); -+ cib_ipc_servers_init(&ipcs_ro, -+ &ipcs_rw, -+ &ipcs_shm, -+ &ipc_ro_callbacks, -+ &ipc_rw_callbacks); - - if (stand_alone) { - cib_is_master = TRUE; - } - -- if (ipcs_ro != NULL && ipcs_rw != NULL && ipcs_shm != NULL) { -- /* Create the mainloop and run it... */ -- mainloop = g_main_new(FALSE); -- crm_info("Starting %s mainloop", crm_system_name); -+ /* Create the mainloop and run it... */ -+ mainloop = g_main_new(FALSE); -+ crm_info("Starting %s mainloop", crm_system_name); - -- g_main_run(mainloop); -+ g_main_run(mainloop); -+ cib_ipc_servers_destroy(ipcs_ro, ipcs_rw, ipcs_shm); - -- } else { -- crm_err("Failed to create IPC servers: shutting down and inhibiting respawn"); -- crm_exit(100); -- } -- -- qb_ipcs_destroy(ipcs_ro); -- qb_ipcs_destroy(ipcs_rw); -- qb_ipcs_destroy(ipcs_shm); -- -- return crm_exit(0); -+ return crm_exit(pcmk_ok); - } - - gboolean -diff --git a/cib/messages.c b/cib/messages.c -index e2892f2..8ea57c4 100644 ---- a/cib/messages.c -+++ b/cib/messages.c -@@ -132,7 +132,7 @@ cib_process_quit(const char *op, int options, const char *section, xmlNode * req - crm_trace("Processing \"%s\" event", op); - - crm_warn("The CRMd has asked us to exit... complying"); -- crm_exit(0); -+ crm_exit(pcmk_ok); - return result; - } - -diff --git a/cib/remote.c b/cib/remote.c -index caa7057..d260e35 100644 ---- a/cib/remote.c -+++ b/cib/remote.c -@@ -133,6 +133,11 @@ init_remote_listener(int port, gboolean encrypted) - - /* create server socket */ - ssock = malloc(sizeof(int)); -+ if(ssock == NULL) { -+ crm_perror(LOG_ERR, "Can not create server socket." ERROR_SUFFIX); -+ return -1; -+ } -+ - *ssock = socket(AF_INET, SOCK_STREAM, 0); - if (*ssock == -1) { - crm_perror(LOG_ERR, "Can not create server socket." ERROR_SUFFIX); -@@ -314,20 +319,6 @@ cib_remote_listen(gpointer data) - return TRUE; - } - -- if (ssock == remote_tls_fd) { --#ifdef HAVE_GNUTLS_GNUTLS_H -- /* create gnutls session for the server socket */ -- new_client->remote->tls_session = -- crm_create_anon_tls_session(csock, GNUTLS_SERVER, anon_cred_s); -- -- if (new_client->remote->tls_session == NULL) { -- crm_err("TLS session creation failed"); -- close(csock); -- return TRUE; -- } --#endif -- } -- - num_clients++; - - crm_client_init(); -@@ -338,19 +329,29 @@ cib_remote_listen(gpointer data) - - g_hash_table_insert(client_connections, new_client->id /* Should work */ , new_client); - -- /* clients have a few seconds to perform handshake. */ -- new_client->remote->auth_timeout = -- g_timeout_add(REMOTE_AUTH_TIMEOUT, remote_auth_timeout_cb, new_client); -- - if (ssock == remote_tls_fd) { - #ifdef HAVE_GNUTLS_GNUTLS_H - new_client->kind = CRM_CLIENT_TLS; -+ -+ /* create gnutls session for the server socket */ -+ new_client->remote->tls_session = -+ crm_create_anon_tls_session(csock, GNUTLS_SERVER, anon_cred_s); -+ -+ if (new_client->remote->tls_session == NULL) { -+ crm_err("TLS session creation failed"); -+ close(csock); -+ return TRUE; -+ } - #endif - } else { - new_client->kind = CRM_CLIENT_TCP; - new_client->remote->tcp_socket = csock; - } - -+ /* clients have a few seconds to perform handshake. */ -+ new_client->remote->auth_timeout = -+ g_timeout_add(REMOTE_AUTH_TIMEOUT, remote_auth_timeout_cb, new_client); -+ - new_client->remote->source = - mainloop_add_fd("cib-remote-client", G_PRIORITY_DEFAULT, csock, new_client, - &remote_client_fd_callbacks); -diff --git a/configure.ac b/configure.ac -index 454677a..be8261a 100644 ---- a/configure.ac -+++ b/configure.ac -@@ -4,7 +4,7 @@ dnl - dnl License: GNU General Public License (GPL) - - dnl =============================================== --dnl Bootstrap -+dnl Bootstrap - dnl =============================================== - AC_PREREQ(2.59) - -@@ -19,7 +19,7 @@ dnl checks for compiler characteristics - dnl checks for library functions - dnl checks for system services - --AC_INIT(pacemaker, 1.1.8, pacemaker@oss.clusterlabs.org,,http://www.clusterlabs.org) -+AC_INIT(pacemaker, 1.1.9, pacemaker@oss.clusterlabs.org,,http://www.clusterlabs.org) - CRM_DTD_VERSION="1.2" - - PCMK_FEATURES="" -@@ -61,7 +61,7 @@ AC_SUBST(PACKAGE_SERIES) - AC_SUBST(PACKAGE_VERSION) - - dnl automake >= 1.11 offers --enable-silent-rules for suppressing the output from --dnl normal compilation. When a failure occurs, it will then display the full -+dnl normal compilation. When a failure occurs, it will then display the full - dnl command line - dnl Wrap in m4_ifdef to avoid breaking on older platforms - m4_ifdef([AM_SILENT_RULES],[AM_SILENT_RULES([yes])]) -@@ -102,7 +102,7 @@ AC_CHECK_SIZEOF(long long) - AC_STRUCT_TIMEZONE - - dnl =============================================== --dnl Helpers -+dnl Helpers - dnl =============================================== - cc_supports_flag() { - local CFLAGS="$@" -@@ -112,7 +112,7 @@ cc_supports_flag() { - } - - try_extract_header_define() { -- AC_MSG_CHECKING(if $2 in $1 exists. If not defaulting to $3) -+ AC_MSG_CHECKING(if $2 in $1 exists) - Cfile=$srcdir/extract_define.$2.${$} - printf "#include \n" > ${Cfile}.c - printf "#include <%s>\n" $1 >> ${Cfile}.c -@@ -120,14 +120,19 @@ try_extract_header_define() { - printf "#ifdef %s\n" $2 >> ${Cfile}.c - printf "printf(\"%%s\", %s);\n" $2 >> ${Cfile}.c - printf "#endif \n return 0; }\n" >> ${Cfile}.c -- $CC $CFLAGS ${Cfile}.c -o ${Cfile} -- value=`${Cfile}` -+ $CC $CFLAGS ${Cfile}.c -o ${Cfile} 2>/dev/null -+ value= -+ if test -x ${Cfile}; then -+ value=`${Cfile} 2>/dev/null` -+ fi - if test x"${value}" == x""; then - value=$3 -+ AC_MSG_RESULT(default: $value) -+ else -+ AC_MSG_RESULT($value) - fi -- AC_MSG_RESULT($value) - printf $value -- rm -rf ${Cfile}.c ${Cfile} ${Cfile}.dSYM ${Cfile}.gcno -+ rm -rf ${Cfile}.cc ${Cfile} ${Cfile}.dSYM ${Cfile}.gcno - } - - extract_header_define() { -@@ -164,7 +169,7 @@ AC_ARG_ENABLE([fatal-warnings], - [default=yes]]) - - AC_ARG_ENABLE([quiet], --[ --enable-quiet -+[ --enable-quiet - Supress make output unless there is an error - [default=no]]) - -@@ -173,7 +178,7 @@ AC_ARG_ENABLE([thread-safe], - [default=no]]) - - AC_ARG_ENABLE([bundled-ltdl], --[ --enable-bundled-ltdl Configure, build and install the standalone ltdl library bundled with ${PACKAGE} [default=no]]) -+[ --enable-bundled-ltdl Configure, build and install the standalone ltdl library bundled with ${PACKAGE} [default=no]]) - LTDL_LIBS="" - - AC_ARG_ENABLE([no-stack], -@@ -189,14 +194,14 @@ AC_ARG_ENABLE([systemd], - Do not build support for the Systemd init system [default=yes]]) - - AC_ARG_WITH(ais, -- [ --with-ais -+ [ --with-ais - Support the Corosync messaging and membership layer ], - [ SUPPORT_CS=$withval ], - [ SUPPORT_CS=try ], - ) - - AC_ARG_WITH(corosync, -- [ --with-corosync -+ [ --with-corosync - Support the Corosync messaging and membership layer ], - [ SUPPORT_CS=$withval ] - dnl initialized in AC_ARG_WITH(ais...) already, -@@ -204,21 +209,21 @@ dnl don't reset to try if it was given as --without-ais - ) - - AC_ARG_WITH(heartbeat, -- [ --with-heartbeat -+ [ --with-heartbeat - Support the Heartbeat messaging and membership layer ], - [ SUPPORT_HEARTBEAT=$withval ], - [ SUPPORT_HEARTBEAT=try ], - ) - - AC_ARG_WITH(cman, -- [ --with-cman -+ [ --with-cman - Support the consumption of membership and quorum from cman ], - [ SUPPORT_CMAN=$withval ], - [ SUPPORT_CMAN=try ], - ) - - AC_ARG_WITH(cpg, -- [ --with-cs-quorum -+ [ --with-cs-quorum - Support the consumption of membership and quorum from corosync ], - [ SUPPORT_CS_QUORUM=$withval ], - [ SUPPORT_CS_QUORUM=try ], -@@ -230,7 +235,7 @@ AC_ARG_WITH(nagios, - [ SUPPORT_NAGIOS=$withval ], - [ SUPPORT_NAGIOS=try ], - ) -- -+ - AC_ARG_WITH(nagios-plugin-dir, - [ --with-nagios-plugin-dir=DIR - Directory for nagios plugins [${NAGIOS_PLUGIN_DIR}]], -@@ -244,14 +249,14 @@ AC_ARG_WITH(nagios-metadata-dir, - ) - - AC_ARG_WITH(snmp, -- [ --with-snmp -+ [ --with-snmp - Support the SNMP protocol ], - [ SUPPORT_SNMP=$withval ], - [ SUPPORT_SNMP=try ], - ) - - AC_ARG_WITH(esmtp, -- [ --with-esmtp -+ [ --with-esmtp - Support the sending mail notifications with the esmtp library ], - [ SUPPORT_ESMTP=$withval ], - [ SUPPORT_ESMTP=try ], -@@ -264,14 +269,21 @@ AC_ARG_WITH(acl, - [ SUPPORT_ACL=no ], - ) - -+AC_ARG_WITH(cibsecrets, -+ [ --with-cibsecrets -+ Support CIB secrets ], -+ [ SUPPORT_CIBSECRETS=$withval ], -+ [ SUPPORT_CIBSECRETS=no ], -+) -+ - CSPREFIX="" - AC_ARG_WITH(ais-prefix, - [ --with-ais-prefix=DIR Prefix used when Corosync was installed [$prefix]], -- [ CSPREFIX=$withval ], -+ [ CSPREFIX=$withval ], - [ CSPREFIX=$prefix ]) - - LCRSODIR="" --AC_ARG_WITH(lcrso-dir, -+AC_ARG_WITH(lcrso-dir, - [ --with-lcrso-dir=DIR Corosync lcrso files. ], - [ LCRSODIR="$withval" ]) - -@@ -282,7 +294,7 @@ AC_ARG_WITH(initdir, - - SUPPORT_PROFILING=0 - AC_ARG_WITH(profiling, -- [ --with-profiling -+ [ --with-profiling - Support gprof profiling ], - [ SUPPORT_PROFILING=$withval ]) - -@@ -381,7 +393,7 @@ case $libdir in - ;; - esac - --dnl Expand autoconf variables so that we dont end up with '${prefix}' -+dnl Expand autoconf variables so that we dont end up with '${prefix}' - dnl in #defines and python scripts - dnl NOTE: Autoconf deliberately leaves them unexpanded to allow - dnl make exec_prefix=/foo install -@@ -414,7 +426,7 @@ AC_SUBST(docdir) - for j in prefix exec_prefix bindir sbindir libexecdir datadir sysconfdir \ - sharedstatedir localstatedir libdir includedir oldincludedir infodir \ - mandir INITDIR docdir --do -+do - dirname=`eval echo '${'${j}'}'` - if - test ! -d "$dirname" -@@ -426,7 +438,7 @@ done - dnl This OS-based decision-making is poor autotools practice; - dnl feature-based mechanisms are strongly preferred. - dnl --dnl So keep this section to a bare minimum; regard as a "necessary evil". -+dnl So keep this section to a bare minimum; regard as a "necessary evil". - - case "$host_os" in - *bsd*) LIBS="-L/usr/local/lib" -@@ -435,18 +447,18 @@ case "$host_os" in - ;; - *solaris*) - ;; --*linux*) -+*linux*) - AC_DEFINE_UNQUOTED(ON_LINUX, 1, Compiling for Linux platform) - CFLAGS="$CFLAGS -I${prefix}/include" - ;; --darwin*) -+darwin*) - AC_DEFINE_UNQUOTED(ON_DARWIN, 1, Compiling for Darwin platform) - LIBS="$LIBS -L${prefix}/lib" - CFLAGS="$CFLAGS -I${prefix}/include" - ;; - esac - --dnl Eventually remove this -+dnl Eventually remove this - CFLAGS="$CFLAGS -I${prefix}/include/heartbeat" - - AC_SUBST(INIT_EXT) -@@ -475,14 +487,14 @@ AC_COMPILE_IFELSE( - #include - ], - [ --int max = 512; -+int max = 512; - uint64_t bignum = 42; - char *buffer = malloc(max); - const char *random = "random"; - snprintf(buffer, max-1, "", bignum, random); - fprintf(stderr, "Result: %s\n", buffer); - ] -- )], -+ )], - [U64T="%lu"], - [U64T="%llu"] - ) -@@ -641,7 +653,7 @@ else - GPKGNAME="glib-2.0" - fi - --if -+if - $PKGCONFIG --exists $GPKGNAME - then - GLIBCONFIG="$PKGCONFIG $GPKGNAME" -@@ -652,22 +664,12 @@ else - $PKGCONFIG --cflags $GPKGNAME; echo $? - $PKGCONFIG $GPKGNAME; echo $? - set +x -- -+ - AC_MSG_ERROR(You need glib2-devel installed in order to build ${PACKAGE}) - fi - AC_MSG_RESULT(using $GLIBCONFIG) - --AC_CHECK_LIB(glib-2.0, g_hash_table_get_values) --if test "x$ac_cv_lib_glib_2_0_g_hash_table_get_values" != x""yes; then -- AC_DEFINE_UNQUOTED(NEED_G_HASH_ITER, 1, glib-2.0 has no hashtable iterators) --fi -- --AC_CHECK_LIB(glib-2.0, g_list_free_full) --if test "x$ac_cv_lib_glib_2_0_g_list_free_full" != x""yes; then -- AC_DEFINE_UNQUOTED(NEED_G_LIST_FREE_FULL, 1, glib-2.0 has no g_list_free_full) --fi -- --if -+if - $PKGCONFIG --exists systemd - then - systemdunitdir=`$PKGCONFIG --variable=systemdsystemunitdir systemd` -@@ -754,6 +756,7 @@ AC_CHECK_HEADERS(sys/dir.h) - AC_CHECK_HEADERS(sys/ioctl.h) - AC_CHECK_HEADERS(sys/param.h) - AC_CHECK_HEADERS(sys/poll.h) -+AC_CHECK_HEADERS(sys/reboot.h) - AC_CHECK_HEADERS(sys/resource.h) - AC_CHECK_HEADERS(sys/select.h) - AC_CHECK_HEADERS(sys/socket.h) -@@ -770,7 +773,7 @@ AC_CHECK_HEADERS(time.h) - AC_CHECK_HEADERS(unistd.h) - AC_CHECK_HEADERS(winsock.h) - --dnl These headers need prerequisits before the tests will pass -+dnl These headers need prerequisits before the tests will pass - dnl AC_CHECK_HEADERS(net/if.h) - dnl AC_CHECK_HEADERS(netinet/icmp6.h) - dnl AC_CHECK_HEADERS(netinet/ip6.h) -@@ -920,7 +923,7 @@ if test "x$CURSESLIBS" != "x"; then - fi - - dnl Check for printw() prototype compatibility --if test X"$CURSESLIBS" != X"" && cc_supports_flag -Wcast-qual && cc_supports_flag -Werror; then -+if test X"$CURSESLIBS" != X"" && cc_supports_flag -Wcast-qual && cc_supports_flag -Werror; then - AC_MSG_CHECKING(whether printw() requires argument of "const char *") - ac_save_LIBS=$LIBS - LIBS="$CURSESLIBS $LIBS" -@@ -939,7 +942,7 @@ if test X"$CURSESLIBS" != X"" && cc_supports_flag -Wcast-qual && cc_supports_fla - #endif - ], - [printw((const char *)"Test");] -- )], -+ )], - [ac_cv_compatible_printw=yes], - [ac_cv_compatible_printw=no] - ) -@@ -963,7 +966,7 @@ dnl Profiling and GProf - dnl ======================================================================== - - case $SUPPORT_PROFILING in -- 1|yes|true) -+ 1|yes|true) - SUPPORT_PROFILING=1 - - dnl Enable gprof -@@ -983,7 +986,7 @@ esac - AC_DEFINE_UNQUOTED(SUPPORT_PROFILING, $SUPPORT_PROFILING, Support for gprof profiling) - - case $SUPPORT_GCOV in -- 1|yes|true) -+ 1|yes|true) - SUPPORT_GCOV=1 - - dnl Enable gprof -@@ -995,10 +998,10 @@ case $SUPPORT_GCOV in - - dnl Turn off optimization so code coverage tool - dnl can get accurate line numbers -- AC_MSG_NOTICE(Old CFLAGS: $CFLAGS) -+ AC_MSG_NOTICE(Old CFLAGS: $CFLAGS) - CFLAGS=`echo $CFLAGS | sed -e 's/-O.\ //g' -e 's/-Wp,-D_FORTIFY_SOURCE=.\ //g'` - CFLAGS="$CFLAGS -O0" -- AC_MSG_NOTICE(New CFLAGS: $CFLAGS) -+ AC_MSG_NOTICE(New CFLAGS: $CFLAGS) - - dnl Update features - PCMK_FEATURES="$PCMK_FEATURES gcov" -@@ -1021,25 +1024,24 @@ fi - - PKG_CHECK_MODULES(libqb, libqb, HAVE_libqb=1, HAVE_libqb=0) - AC_CHECK_HEADERS(qb/qbipc_common.h) --AC_CHECK_LIB(qb, qb_ipcc_is_connected) --AC_CHECK_FUNCS(qb_ipcc_is_connected) -+AC_CHECK_LIB(qb, qb_ipcs_connection_auth_set) - - LIBQB_LOG=1 - PCMK_FEATURES="$PCMK_FEATURES libqb-logging libqb-ipc" - --if test $ac_cv_lib_qb_qb_ipcc_is_connected != yes; then -- AC_MSG_FAILURE(Version of IPC in libqb is not new enough) -+if -+ !pkg-config --atleast-version 0.13 libqb -+then -+ AC_MSG_FAILURE(Version of libqb is too old: v0.13 or greater requried) - fi - --AC_DEFINE_UNQUOTED(LIBQB_LOGGING, $LIBQB_LOG, Use libqb for logging) --AC_DEFINE_UNQUOTED(LIBQB_IPC, 0, Use libqb for IPC) -- - LIBS="$LIBS $libqb_LIBS" - - AC_CHECK_HEADERS(heartbeat/hb_config.h) - AC_CHECK_HEADERS(heartbeat/glue_config.h) - AC_CHECK_HEADERS(stonith/stonith.h) - AC_CHECK_HEADERS(agent_config.h) -+ - GLUE_HEADER=none - HAVE_GLUE=0 - if test "$ac_cv_header_heartbeat_glue_config_h" = "yes"; then -@@ -1111,7 +1113,7 @@ AC_DEFINE_UNQUOTED(CRM_DAEMON_DIR,"$CRM_DAEMON_DIR", Location for Pacemaker daem - AC_SUBST(CRM_DAEMON_DIR) - - HB_DAEMON_DIR=`try_extract_header_define $GLUE_HEADER HA_LIBHBDIR $libdir/heartbeat` --AC_DEFINE_UNQUOTED(HB_DAEMON_DIR,"$HB_DAEMON_DIR", Location for Heartbeat expects Pacemaker daemons to be in) -+AC_DEFINE_UNQUOTED(HB_DAEMON_DIR,"$HB_DAEMON_DIR", Location Heartbeat expects Pacemaker daemons to be in) - AC_SUBST(HB_DAEMON_DIR) - - dnl Needed so that the Corosync plugin can clear out the directory as Heartbeat does -@@ -1119,7 +1121,7 @@ HA_STATE_DIR=`try_extract_header_define $GLUE_HEADER HA_VARRUNDIR ${localstatedi - AC_DEFINE_UNQUOTED(HA_STATE_DIR,"$HA_STATE_DIR", Where Heartbeat keeps state files and sockets) - AC_SUBST(HA_STATE_DIR) - --CRM_RSCTMP_DIR=`try_extract_header_define agent_config.h HA_RSCTMPDIR $HA_STATE_DIR/heartbeat/rsctmp` -+CRM_RSCTMP_DIR=`try_extract_header_define agent_config.h HA_RSCTMPDIR $HA_STATE_DIR/resource-agents` - AC_MSG_CHECKING(Scratch dir for resource agents) - AC_MSG_RESULT($CRM_RSCTMP_DIR) - AC_DEFINE_UNQUOTED(CRM_RSCTMP_DIR,"$CRM_RSCTMP_DIR", Where resource agents should keep state files) -@@ -1160,7 +1162,7 @@ elif test -x $GIT -a -d .git; then - - else - # The current directory name make a reasonable default -- # Most generated archives will include the hash or tag -+ # Most generated archives will include the hash or tag - BASE=`basename $PWD` - BUILD_VERSION=`echo $BASE | sed s:.*[[Pp]]acemaker-::` - AC_MSG_RESULT(directory based hash: $BUILD_VERSION) -@@ -1201,7 +1203,7 @@ AM_CONDITIONAL(BUILD_SYSTEMD, test $HAVE_systemd = 1) - case $SUPPORT_NAGIOS in - 1|yes|true|try) - SUPPORT_NAGIOS=1;; -- *) -+ *) - SUPPORT_NAGIOS=0;; - esac - -@@ -1295,13 +1297,13 @@ else - SUPPORT_CS=1 - CFLAGS="$CFLAGS $oldipc_FLAGS $cpg_FLAGS $cfg_FLAGS" - COROSYNC_LIBS="$COROSYNC_LIBS $oldipc_LIBS $cpg_LIBS $cfg_LIBS" -- -+ - elif test $HAVE_libqb = 1; then - SUPPORT_CS=1 - CS_USES_LIBQB=1 - CFLAGS="$CFLAGS $libqb_FLAGS $cpg_FLAGS $cfg_FLAGS" - COROSYNC_LIBS="$COROSYNC_LIBS $libqb_LIBS $cpg_LIBS $cfg_LIBS" -- AC_CHECK_LIB(corosync_common, cs_strerror) -+ AC_CHECK_LIB(corosync_common, cs_strerror) - - else - aisreason="corosync/libqb IPC libraries not found by pkg_config" -@@ -1318,7 +1320,7 @@ if test $SUPPORT_CS = 1 -a x$HAVE_oldipc = x0 ; then - dnl The only option now is the built-in quorum API - CFLAGS="$CFLAGS $cmap_CFLAGS $quorum_CFLAGS" - COROSYNC_LIBS="$COROSYNC_LIBS $cmap_LIBS $quorum_LIBS" -- -+ - STACKS="$STACKS corosync-native" - AC_DEFINE_UNQUOTED(SUPPORT_CS_QUORUM, 1, Support the consumption of membership and quorum from corosync) - fi -@@ -1352,9 +1354,9 @@ if test $SUPPORT_CS = 1; then - elif test $SUPPORT_CS != 0; then - SUPPORT_CS=0 - if test $missingisfatal = 0; then -- AC_MSG_WARN(Unable to support Corosync: $aisreason) -+ AC_MSG_WARN(Unable to support Corosync: $aisreason) - else -- AC_MSG_FAILURE(Unable to support Corosync: $aisreason) -+ AC_MSG_FAILURE(Unable to support Corosync: $aisreason) - fi - fi - -@@ -1453,9 +1455,9 @@ else - SNMPLIBS="" - SUPPORT_SNMP=0 - if test $missingisfatal = 0; then -- AC_MSG_WARN(Unable to support SNMP) -+ AC_MSG_WARN(Unable to support SNMP) - else -- AC_MSG_FAILURE(Unable to support SNMP) -+ AC_MSG_FAILURE(Unable to support SNMP) - fi - else - SUPPORT_SNMP=1 -@@ -1512,9 +1514,9 @@ else - if test $SUPPORT_ESMTP = no; then - SUPPORT_ESMTP=0 - if test $missingisfatal = 0; then -- AC_MSG_WARN(Unable to support ESMTP) -+ AC_MSG_WARN(Unable to support ESMTP) - else -- AC_MSG_FAILURE(Unable to support ESMTP) -+ AC_MSG_FAILURE(Unable to support ESMTP) - fi - else - SUPPORT_ESMTP=1 -@@ -1527,7 +1529,7 @@ AM_CONDITIONAL(ENABLE_ESMTP, test "$SUPPORT_ESMTP" = "1") - AC_DEFINE_UNQUOTED(ENABLE_ESMTP, $SUPPORT_ESMTP, Build in support for sending mail notifications with ESMTP) - - dnl ======================================================================== --dnl ACL -+dnl ACL - dnl ======================================================================== - - case $SUPPORT_ACL in -@@ -1551,9 +1553,9 @@ else - - if test $SUPPORT_ACL = 0; then - if test $missingisfatal = 0; then -- AC_MSG_WARN(Unable to support ACL. You need to use libqb > 0.13.0) -+ AC_MSG_WARN(Unable to support ACL. You need to use libqb > 0.13.0) - else -- AC_MSG_FAILURE(Unable to support ACL. You need to use libqb > 0.13.0) -+ AC_MSG_FAILURE(Unable to support ACL. You need to use libqb > 0.13.0) - fi - fi - fi -@@ -1566,6 +1568,32 @@ AM_CONDITIONAL(ENABLE_ACL, test "$SUPPORT_ACL" = "1") - AC_DEFINE_UNQUOTED(ENABLE_ACL, $SUPPORT_ACL, Build in support for CIB ACL) - - dnl ======================================================================== -+dnl CIB secrets -+dnl ======================================================================== -+ -+case $SUPPORT_CIBSECRETS in -+ 1|yes|true|try) -+ SUPPORT_CIBSECRETS=1;; -+ *) -+ SUPPORT_CIBSECRETS=0;; -+esac -+ -+AC_DEFINE_UNQUOTED(SUPPORT_CIBSECRETS, $SUPPORT_CIBSECRETS, Support CIB secrets) -+AM_CONDITIONAL(BUILD_CIBSECRETS, test $SUPPORT_CIBSECRETS = 1) -+ -+if test $SUPPORT_CIBSECRETS = 1; then -+ PCMK_FEATURES="$PCMK_FEATURES cibsecrets" -+ -+ LRM_CIBSECRETS_DIR="${localstatedir}/lib/pacemaker/lrm/secrets" -+ AC_DEFINE_UNQUOTED(LRM_CIBSECRETS_DIR,"$LRM_CIBSECRETS_DIR", Location for CIB secrets) -+ AC_SUBST(LRM_CIBSECRETS_DIR) -+ -+ LRM_LEGACY_CIBSECRETS_DIR="${localstatedir}/lib/heartbeat/lrm/secrets" -+ AC_DEFINE_UNQUOTED(LRM_LEGACY_CIBSECRETS_DIR,"$LRM_LEGACY_CIBSECRETS_DIR", Legacy location for CIB secrets) -+ AC_SUBST(LRM_LEGACY_CIBSECRETS_DIR) -+fi -+ -+dnl ======================================================================== - dnl GnuTLS - dnl ======================================================================== - -@@ -1652,21 +1680,21 @@ else - -fstack-protector-all - -Wall - -Waggregate-return -- -Wbad-function-cast -- -Wcast-align -+ -Wbad-function-cast -+ -Wcast-align - -Wdeclaration-after-statement - -Wendif-labels - -Wfloat-equal - -Wformat=2 - -Wformat-security - -Wformat-nonliteral -- -Wmissing-prototypes -- -Wmissing-declarations -+ -Wmissing-prototypes -+ -Wmissing-declarations - -Wnested-externs - -Wno-long-long - -Wno-strict-aliasing - -Wunused-but-set-variable -- -Wpointer-arith -+ -Wpointer-arith - -Wstrict-prototypes - -Wunsigned-char - -Wwrite-strings" -@@ -1740,7 +1768,7 @@ AC_SUBST(LIBADD_INTL) dnl extra flags for GNU gettext stuff... - - AC_SUBST(LOCALE) - --dnl Options for cleaning up the compiler output -+dnl Options for cleaning up the compiler output - QUIET_LIBTOOL_OPTS="" - QUIET_MAKE_OPTS="" - if test "x${enable_quiet}" = "xyes"; then -@@ -1778,6 +1806,7 @@ pengine/Makefile \ - doc/Makefile \ - doc/Pacemaker_Explained/publican.cfg \ - doc/Clusters_from_Scratch/publican.cfg \ -+ doc/Pacemaker_Remote/publican.cfg \ - include/Makefile \ - include/crm/Makefile \ - include/crm/cib/Makefile \ -@@ -1806,20 +1835,26 @@ lib/Makefile \ - mcp/Makefile \ - mcp/pacemaker \ - mcp/pacemaker.service \ -+ mcp/pacemaker.upstart \ -+ mcp/pacemaker.combined.upstart \ - fencing/Makefile \ - fencing/regression.py \ - lrmd/Makefile \ - lrmd/regression.py \ -+ lrmd/pacemaker_remote.service \ -+ lrmd/pacemaker_remote \ - extra/Makefile \ - extra/resources/Makefile \ - extra/rgmanager/Makefile \ - tools/Makefile \ - tools/crm_report \ -+ tools/report.common \ -+ tools/cibsecret \ - xml/Makefile \ - lib/gnu/Makefile \ - ) - --dnl Now process the entire list of files added by previous -+dnl Now process the entire list of files added by previous - dnl calls to AC_CONFIG_FILES() - AC_OUTPUT() - -@@ -1850,4 +1885,3 @@ AC_MSG_RESULT([]) - AC_MSG_RESULT([ CFLAGS = ${CFLAGS}]) - AC_MSG_RESULT([ Libraries = ${LIBS}]) - AC_MSG_RESULT([ Stack Libraries = ${CLUSTERLIBS}]) -- -diff --git a/crmd/callbacks.c b/crmd/callbacks.c -index 954473f..f88fc93 100644 ---- a/crmd/callbacks.c -+++ b/crmd/callbacks.c -@@ -194,7 +194,7 @@ peer_update_callback(enum crm_status_type type, crm_node_t * node, const void *d - /* down->confirmed = TRUE; Only stonith-ng returning should imply completion */ - stop_te_timer(down->timer); - -- erase_node_from_join(node->uname); -+ crm_update_peer_join(__FUNCTION__, node, crm_join_none); - crm_update_peer_expected(__FUNCTION__, node, CRMD_JOINSTATE_DOWN); - check_join_state(fsa_state, __FUNCTION__); - -@@ -208,8 +208,7 @@ peer_update_callback(enum crm_status_type type, crm_node_t * node, const void *d - } else if (appeared == FALSE) { - crm_notice("Stonith/shutdown of %s not matched", node->uname); - -- erase_node_from_join(node->uname); -- crm_update_peer_expected(__FUNCTION__, node, CRMD_JOINSTATE_DOWN); -+ crm_update_peer_join(__FUNCTION__, node, crm_join_none); - check_join_state(fsa_state, __FUNCTION__); - - abort_transition(INFINITY, tg_restart, "Node failure", NULL); -diff --git a/crmd/control.c b/crmd/control.c -index 50db30b..7f423db 100644 ---- a/crmd/control.c -+++ b/crmd/control.c -@@ -118,6 +118,10 @@ do_ha_control(long long action, - } - fsa_our_uname = cluster->uname; - fsa_our_uuid = cluster->uuid; -+ if(cluster->uuid == NULL) { -+ crm_err("Could not obtain local uuid"); -+ registered = FALSE; -+ } - - if (registered == FALSE) { - set_bit(fsa_input_register, R_HA_DISCONNECTED); -@@ -193,6 +197,7 @@ extern xmlNode *max_generation_xml; - extern GHashTable *resource_history; - extern GHashTable *voted; - extern GHashTable *reload_hash; -+extern char *te_client_id; - - void log_connected_client(gpointer key, gpointer value, gpointer user_data); - -@@ -205,20 +210,92 @@ log_connected_client(gpointer key, gpointer value, gpointer user_data) - } - - int -+crmd_fast_exit(int rc) -+{ -+ if (is_set(fsa_input_register, R_STAYDOWN)) { -+ crm_warn("Inhibiting respawn: %d -> %d", rc, 100); -+ rc = 100; -+ } -+ -+ if (rc == pcmk_ok && is_set(fsa_input_register, R_IN_RECOVERY)) { -+ crm_err("Could not recover from internal error"); -+ rc = pcmk_err_generic; -+ } -+ return crm_exit(rc); -+} -+ -+int - crmd_exit(int rc) - { - GListPtr gIter = NULL; -+ GMainLoop *mloop = crmd_mainloop; -+ -+ static bool in_progress = FALSE; -+ -+ if(in_progress && rc == 0) { -+ crm_debug("Exit is already in progress"); -+ return rc; -+ -+ } else if(in_progress) { -+ crm_notice("Error during shutdown process, terminating now: %s (%d)", pcmk_strerror(rc), rc); -+ crm_write_blackbox(SIGTRAP, NULL); -+ crmd_fast_exit(rc); -+ } -+ -+ in_progress = TRUE; -+ crm_trace("Preparing to exit: %d", rc); -+ -+ /* Suppress secondary errors resulting from us disconnecting everything */ -+ set_bit(fsa_input_register, R_HA_DISCONNECTED); -+ -+/* Close all IPC servers and clients to ensure any and all shared memory files are cleaned up */ -+ -+ if(ipcs) { -+ crm_trace("Closing IPC server"); -+ mainloop_del_ipc_server(ipcs); -+ ipcs = NULL; -+ } - - if (attrd_ipc) { -+ crm_trace("Closing attrd connection"); - crm_ipc_close(attrd_ipc); - crm_ipc_destroy(attrd_ipc); -+ attrd_ipc = NULL; - } -- if (crmd_mainloop) { -- g_main_loop_quit(crmd_mainloop); -- g_main_loop_unref(crmd_mainloop); -+ -+ if (pe_subsystem && pe_subsystem->client && pe_subsystem->client->ipcs) { -+ crm_trace("Disconnecting Policy Engine"); -+ qb_ipcs_disconnect(pe_subsystem->client->ipcs); -+ } -+ -+ if(stonith_api) { -+ crm_trace("Disconnecting fencing API"); -+ clear_bit(fsa_input_register, R_ST_REQUIRED); -+ stonith_api->cmds->free(stonith_api); stonith_api = NULL; -+ } -+ -+ if (rc == pcmk_ok && crmd_mainloop == NULL) { -+ crm_debug("No mainloop detected"); -+ rc = EPROTO; - } -+ -+ /* On an error, just get out. -+ * -+ * Otherwise, make the effort to have mainloop exit gracefully so -+ * that it (mostly) cleans up after itself and valgrind has less -+ * to report on - allowing real errors stand out -+ */ -+ if(rc != pcmk_ok) { -+ crm_notice("Forcing immediate exit: %s (%d)", pcmk_strerror(rc), rc); -+ crm_write_blackbox(SIGTRAP, NULL); -+ return crmd_fast_exit(rc); -+ } -+ -+/* Clean up as much memory as possible for valgrind */ -+ - #if SUPPORT_HEARTBEAT - if (fsa_cluster_conn) { -+ crm_trace("Disconnecting heartbeat"); - fsa_cluster_conn->llc_ops->delete(fsa_cluster_conn); - fsa_cluster_conn = NULL; - } -@@ -233,58 +310,104 @@ crmd_exit(int rc) - fsa_cause2string(fsa_data->fsa_cause), fsa_data->origin); - delete_fsa_input(fsa_data); - } -- g_list_free(fsa_message_queue); -- fsa_message_queue = NULL; - -- crm_client_cleanup(); -- empty_uuid_cache(); -- crm_peer_destroy(); - clear_bit(fsa_input_register, R_MEMBERSHIP); -+ g_list_free(fsa_message_queue); fsa_message_queue = NULL; - -- if (te_subsystem->client && te_subsystem->client->ipcs) { -- crm_debug("Full destroy: TE"); -- qb_ipcs_disconnect(te_subsystem->client->ipcs); -- } -- free(te_subsystem); -- -- if (pe_subsystem->client && pe_subsystem->client->ipcs) { -- crm_debug("Full destroy: PE"); -- qb_ipcs_disconnect(pe_subsystem->client->ipcs); -- } -- free(pe_subsystem); -- -- free(cib_subsystem); -+ free(pe_subsystem); pe_subsystem = NULL; -+ free(te_subsystem); te_subsystem = NULL; -+ free(cib_subsystem); cib_subsystem = NULL; - - if (reload_hash) { -- g_hash_table_destroy(reload_hash); -+ crm_trace("Destroying reload cache with %d members", g_hash_table_size(reload_hash)); -+ g_hash_table_destroy(reload_hash); reload_hash = NULL; - } -+ - if (voted) { -- g_hash_table_destroy(voted); -+ crm_trace("Destroying voted cache with %d members", g_hash_table_size(voted)); -+ g_hash_table_destroy(voted); voted = NULL; - } - - cib_delete(fsa_cib_conn); - fsa_cib_conn = NULL; - -+ verify_stopped(fsa_state, LOG_WARNING); -+ clear_bit(fsa_input_register, R_LRM_CONNECTED); - lrm_state_destroy_all(); - -- free(transition_timer); -- free(integration_timer); -- free(finalization_timer); -- free(election_trigger); -- free(election_timeout); -- free(shutdown_escalation_timer); -- free(wait_timer); -- free(recheck_timer); -+ /* This basically will not work, since mainloop has a reference to it */ -+ mainloop_destroy_trigger(fsa_source); fsa_source = NULL; -+ -+ mainloop_destroy_trigger(config_read); config_read = NULL; -+ mainloop_destroy_trigger(stonith_reconnect); stonith_reconnect = NULL; -+ mainloop_destroy_trigger(transition_trigger); transition_trigger = NULL; - -- free(fsa_our_dc_version); -- free(fsa_our_uname); -- free(fsa_our_uuid); -- free(fsa_our_dc); -+ crm_client_cleanup(); -+ crm_peer_destroy(); - -- free(max_generation_from); -- free_xml(max_generation_xml); -+ crm_timer_stop(transition_timer); -+ crm_timer_stop(integration_timer); -+ crm_timer_stop(finalization_timer); -+ crm_timer_stop(election_trigger); -+ crm_timer_stop(election_timeout); -+ crm_timer_stop(shutdown_escalation_timer); -+ crm_timer_stop(wait_timer); -+ crm_timer_stop(recheck_timer); -+ -+ free(transition_timer); transition_timer = NULL; -+ free(integration_timer); integration_timer = NULL; -+ free(finalization_timer); finalization_timer = NULL; -+ free(election_trigger); election_trigger = NULL; -+ free(election_timeout); election_timeout = NULL; -+ free(shutdown_escalation_timer); shutdown_escalation_timer = NULL; -+ free(wait_timer); wait_timer = NULL; -+ free(recheck_timer); recheck_timer = NULL; -+ -+ free(fsa_our_dc_version); fsa_our_dc_version = NULL; -+ free(fsa_our_uname); fsa_our_uname = NULL; -+ free(fsa_our_uuid); fsa_our_uuid = NULL; -+ free(fsa_our_dc); fsa_our_dc = NULL; -+ -+ free(te_uuid); te_uuid = NULL; -+ free(te_client_id); te_client_id = NULL; -+ free(fsa_pe_ref); fsa_pe_ref = NULL; -+ free(failed_stop_offset); failed_stop_offset = NULL; -+ free(failed_start_offset); failed_start_offset = NULL; -+ -+ free(max_generation_from); max_generation_from = NULL; -+ free_xml(max_generation_xml); max_generation_xml = NULL; -+ -+ mainloop_destroy_signal(SIGUSR1); -+ mainloop_destroy_signal(SIGTERM); -+ mainloop_destroy_signal(SIGTRAP); -+ mainloop_destroy_signal(SIGCHLD); -+ -+ if (mloop) { -+ int lpc = 0; -+ GMainContext *ctx = g_main_loop_get_context(crmd_mainloop); -+ -+ /* Don't re-enter this block */ -+ crmd_mainloop = NULL; -+ -+ crm_trace("Draining mainloop %d %d", g_main_loop_is_running(mloop), g_main_context_pending(ctx)); -+ -+ while(g_main_context_pending(ctx) && lpc < 10) { -+ lpc++; -+ crm_trace("Iteration %d", lpc); -+ g_main_context_dispatch(ctx); -+ } - -- return crm_exit(rc); -+ crm_trace("Closing mainloop %d %d", g_main_loop_is_running(mloop), g_main_context_pending(ctx)); -+ g_main_loop_quit(mloop); -+ -+ /* Won't do anything yet, since we're inside it now */ -+ g_main_loop_unref(mloop); -+ -+ crm_trace("Done %d", rc); -+ } -+ -+ /* Graceful */ -+ return rc; - } - - /* A_EXIT_0, A_EXIT_1 */ -@@ -293,31 +416,22 @@ do_exit(long long action, - enum crmd_fsa_cause cause, - enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) - { -- int exit_code = 0; -+ int exit_code = pcmk_ok; - int log_level = LOG_INFO; - const char *exit_type = "gracefully"; - - if (action & A_EXIT_1) { -- exit_code = 1; -+ /* exit_code = pcmk_err_generic; */ - log_level = LOG_ERR; - exit_type = "forcefully"; -+ exit_code = pcmk_err_generic; - } - - verify_stopped(cur_state, LOG_ERR); - do_crm_log(log_level, "Performing %s - %s exiting the CRMd", - fsa_action2string(action), exit_type); - -- if (is_set(fsa_input_register, R_IN_RECOVERY)) { -- crm_err("Could not recover from internal error"); -- exit_code = 2; -- } -- if (is_set(fsa_input_register, R_STAYDOWN)) { -- crm_warn("Inhibiting respawn by Heartbeat"); -- exit_code = 100; -- } -- - crm_info("[%s] stopped (%d)", crm_system_name, exit_code); -- delete_fsa_input(msg_data); - crmd_exit(exit_code); - } - -@@ -335,6 +449,7 @@ do_startup(long long action, - - fsa_source = mainloop_add_trigger(G_PRIORITY_HIGH, crm_fsa_trigger, NULL); - config_read = mainloop_add_trigger(G_PRIORITY_HIGH, crm_read_options, NULL); -+ transition_trigger = mainloop_add_trigger(G_PRIORITY_LOW, te_graph_trigger, NULL); - - crm_debug("Creating CIB and LRM objects"); - fsa_cib_conn = cib_new(); -@@ -539,7 +654,7 @@ crmd_ipc_dispatch(qb_ipcs_connection_t * c, void *data, size_t size) - crm_log_xml_trace(msg, "CRMd[inbound]"); - - crm_xml_add(msg, F_CRM_SYS_FROM, client->id); -- if (crmd_authorize_message(msg, client)) { -+ if (crmd_authorize_message(msg, client, NULL)) { - route_message(C_IPC_MESSAGE, msg); - } - -@@ -603,7 +718,8 @@ do_stop(long long action, - stop_subsystem(pe_subsystem, FALSE); - } - -- mainloop_del_ipc_server(ipcs); -+ crm_trace("Closing IPC server"); -+ mainloop_del_ipc_server(ipcs); ipcs = NULL; - register_fsa_input(C_FSA_INTERNAL, I_TERMINATE, NULL); - } - -@@ -671,7 +787,7 @@ do_started(long long action, - } - - crm_debug("Init server comms"); -- ipcs = mainloop_add_ipc_server(CRM_SYSTEM_CRMD, QB_IPC_NATIVE, &crmd_callbacks); -+ ipcs = crmd_ipc_server_init(&crmd_callbacks); - if (ipcs == NULL) { - crm_err("Failed to create IPC server: shutting down and inhibiting respawn"); - register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); -@@ -697,7 +813,7 @@ do_recover(long long action, - enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) - { - set_bit(fsa_input_register, R_IN_RECOVERY); -- crm_err("Action %s (%.16llx) not supported", fsa_action2string(action), action); -+ crm_warn("Fast-tracking shutdown in response to errors"); - - register_fsa_input(C_FSA_INTERNAL, I_TERMINATE, NULL); - } -@@ -861,6 +977,6 @@ crm_shutdown(int nsig) - - } else { - crm_info("exit from shutdown"); -- crmd_exit(EX_OK); -+ crmd_exit(pcmk_ok); - } - } -diff --git a/crmd/corosync.c b/crmd/corosync.c -index 989d25f..6385780 100644 ---- a/crmd/corosync.c -+++ b/crmd/corosync.c -@@ -140,7 +140,7 @@ crmd_quorum_destroy(gpointer user_data) - { - if (is_not_set(fsa_input_register, R_HA_DISCONNECTED)) { - crm_err("connection terminated"); -- crmd_exit(1); -+ crmd_exit(ENOLINK); - - } else { - crm_info("connection closed"); -@@ -152,7 +152,7 @@ crmd_ais_destroy(gpointer user_data) - { - if (is_not_set(fsa_input_register, R_HA_DISCONNECTED)) { - crm_err("connection terminated"); -- crmd_exit(1); -+ crmd_exit(ENOLINK); - - } else { - crm_info("connection closed"); -@@ -165,7 +165,7 @@ crmd_cman_destroy(gpointer user_data) - { - if (is_not_set(fsa_input_register, R_HA_DISCONNECTED)) { - crm_err("connection terminated"); -- crmd_exit(1); -+ crmd_exit(ENOLINK); - - } else { - crm_info("connection closed"); -diff --git a/crmd/crmd_lrm.h b/crmd/crmd_lrm.h -index 0fddb21..d0ca58c 100644 ---- a/crmd/crmd_lrm.h -+++ b/crmd/crmd_lrm.h -@@ -1,27 +1,28 @@ --/* -+/* - * Copyright (C) 2004 Andrew Beekhof -- * -+ * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. -- * -+ * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. -- * -+ * - * You should have received a copy of the GNU General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ - - extern gboolean verify_stopped(enum crmd_fsa_state cur_state, int log_level); --extern void lrm_clear_last_failure(const char *rsc_id); -+extern void lrm_clear_last_failure(const char *rsc_id, const char *node_name); - void lrm_op_callback(lrmd_event_data_t * op); - - typedef struct resource_history_s { - char *id; -+ uint32_t last_callid; - lrmd_rsc_info_t rsc; - lrmd_event_data_t *last; - lrmd_event_data_t *failed; -@@ -46,7 +47,9 @@ struct recurring_op_s { - - typedef struct lrm_state_s { - const char *node_name; -+ /* reserved for lrm_state.c usage only */ - void *conn; -+ /* reserved for remote_lrmd_ra.c usage only */ - void *remote_ra_data; - - GHashTable *resource_history; -@@ -64,7 +67,7 @@ struct pending_deletion_op_s { - xmlNode *do_lrm_query_internal(lrm_state_t * lrm_state, gboolean is_replace); - - /*! -- * \brief Clear all state information from a single state entry. -+ * \brief Clear all state information from a single state entry. - * \note This does not close the lrmd connection - */ - void lrm_state_reset_tables(lrm_state_t * lrm_state); -diff --git a/crmd/crmd_messages.h b/crmd/crmd_messages.h -index 50a56cd..6688e92 100644 ---- a/crmd/crmd_messages.h -+++ b/crmd/crmd_messages.h -@@ -100,7 +100,10 @@ extern gboolean add_pending_outgoing_reply(const char *originating_node_name, - const char *crm_msg_reference, - const char *sys_to, const char *sys_from); - --extern gboolean crmd_authorize_message(xmlNode * client_msg, crm_client_t * curr_client); -+gboolean crmd_is_proxy_session(const char *session); -+void crmd_proxy_send(const char *session, xmlNode *msg); -+ -+extern gboolean crmd_authorize_message(xmlNode * client_msg, crm_client_t * curr_client, const char *proxy_session); - - extern gboolean send_request(xmlNode * msg, char **msg_reference); - -diff --git a/crmd/crmd_utils.h b/crmd/crmd_utils.h -index a26b114..92d14b1 100644 ---- a/crmd/crmd_utils.h -+++ b/crmd/crmd_utils.h -@@ -78,6 +78,7 @@ xmlNode *create_node_state(const char *uname, const char *in_cluster, - const char *exp_state, gboolean clear_shutdown, const char *src); - - int crmd_exit(int rc); -+int crmd_fast_exit(int rc); - gboolean stop_subsystem(struct crm_subsystem_s *centry, gboolean force_quit); - gboolean start_subsystem(struct crm_subsystem_s *centry); - -@@ -85,18 +86,19 @@ void fsa_dump_actions(long long action, const char *text); - void fsa_dump_inputs(int log_level, const char *text, long long input_register); - - gboolean update_dc(xmlNode * msg); --void erase_node_from_join(const char *node); -+void crm_update_peer_join(const char *source, crm_node_t * node, enum crm_join_phase phase); - xmlNode *do_update_node_cib(crm_node_t * node, int flags, xmlNode * parent, const char *source); - void populate_cib_nodes(enum node_update_flags flags, const char *source); - void crm_update_quorum(gboolean quorum, gboolean force_update); - void erase_status_tag(const char *uname, const char *tag, int options); --void update_attrd(const char *host, const char *name, const char *value, const char *user_name); -+void update_attrd(const char *host, const char *name, const char *value, const char *user_name, gboolean is_remote_node); - - int crmd_join_phase_count(enum crm_join_phase phase); - void crmd_join_phase_log(int level); - - const char *get_timer_desc(fsa_timer_t * timer); - gboolean too_many_st_failures(void); -+void st_fail_count_reset(const char * target); - - # define fsa_register_cib_callback(id, flag, data, fn) do { \ - fsa_cib_conn->cmds->register_callback( \ -diff --git a/crmd/election.c b/crmd/election.c -index daa0f66..1946858 100644 ---- a/crmd/election.c -+++ b/crmd/election.c -@@ -180,22 +180,6 @@ struct election_data_s { - unsigned int winning_bornon; - }; - --static void --log_member_name(gpointer key, gpointer value, gpointer user_data) --{ -- const crm_node_t *node = value; -- -- if (crm_is_peer_active(node)) { -- crm_err("%s: %s proc=%.32x", (char *)user_data, (char *)key, node->processes); -- } --} -- --static void --log_node(gpointer key, gpointer value, gpointer user_data) --{ -- crm_err("%s: %s", (char *)user_data, (char *)key); --} -- - void - do_election_check(long long action, - enum crmd_fsa_cause cause, -@@ -220,15 +204,21 @@ do_election_check(long long action, - crm_timer_stop(election_timeout); - register_fsa_input(C_FSA_INTERNAL, I_ELECTION_DC, NULL); - if (voted_size > num_members) { -- char *data = NULL; -- -- data = strdup("member"); -- g_hash_table_foreach(crm_peer_cache, log_member_name, data); -- free(data); -+ GHashTableIter gIter; -+ const crm_node_t *node; -+ char *key = NULL; -+ -+ g_hash_table_iter_init(&gIter, crm_peer_cache); -+ while (g_hash_table_iter_next(&gIter, NULL, (gpointer *) & node)) { -+ if (crm_is_peer_active(node)) { -+ crm_err("member: %s proc=%.32x", node->uname, node->processes); -+ } -+ } - -- data = strdup("voted"); -- g_hash_table_foreach(voted, log_node, data); -- free(data); -+ g_hash_table_iter_init(&gIter, voted); -+ while (g_hash_table_iter_next(&gIter, (gpointer *) & key, NULL)) { -+ crm_err("voted: %s", key); -+ } - - } - crm_debug("Destroying voted hash"); -@@ -280,7 +270,9 @@ do_election_count_vote(long long action, - CRM_CHECK(vote->msg != NULL, crm_err("Bogus data from %s", msg_data->origin); return); - - if(crm_peer_cache == NULL) { -- CRM_LOG_ASSERT(is_set(fsa_input_register, R_SHUTDOWN)); -+ if(is_not_set(fsa_input_register, R_SHUTDOWN)) { -+ crm_err("Internal error, no peer cache"); -+ } - return; - } - -@@ -492,6 +484,7 @@ feature_update_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, vo - if (rc != pcmk_ok) { - fsa_data_t *msg_data = NULL; - -+ crm_notice("Update failed: %s (%d)", pcmk_strerror(rc), rc); - register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); - } - } -@@ -513,7 +506,8 @@ do_dc_takeover(long long action, - - for (gIter = stonith_cleanup_list; gIter != NULL; gIter = gIter->next) { - char *target = gIter->data; -- const char *uuid = get_uuid(target); -+ crm_node_t *target_node = crm_get_peer(0, target); -+ const char *uuid = crm_peer_uuid(target_node); - - crm_notice("Marking %s, target of a previous stonith action, as clean", target); - send_stonith_update(NULL, target, uuid); -diff --git a/crmd/fsa.c b/crmd/fsa.c -index 92490b6..b5c442b 100644 ---- a/crmd/fsa.c -+++ b/crmd/fsa.c -@@ -48,14 +48,14 @@ char *fsa_our_uname = NULL; - ll_cluster_t *fsa_cluster_conn; - #endif - --fsa_timer_t *wait_timer = NULL; --fsa_timer_t *recheck_timer = NULL; --fsa_timer_t *election_trigger = NULL; --fsa_timer_t *election_timeout = NULL; --fsa_timer_t *transition_timer = NULL; -+fsa_timer_t *wait_timer = NULL; /* How long to wait before retrying to connect to the cib/lrmd/ccm */ -+fsa_timer_t *recheck_timer = NULL; /* Periodically re-run the PE to account for time based rules/preferences */ -+fsa_timer_t *election_trigger = NULL; /* How long to wait at startup, or after an election, for the DC to make contact */ -+fsa_timer_t *election_timeout = NULL; /* How long to declare an election over - even if not everyone voted */ -+fsa_timer_t *transition_timer = NULL; /* How long to delay the start of a new transition with the expectation something else might happen too */ - fsa_timer_t *integration_timer = NULL; - fsa_timer_t *finalization_timer = NULL; --fsa_timer_t *shutdown_escalation_timer = NULL; -+fsa_timer_t *shutdown_escalation_timer = NULL; /* How long to wait for the DC to stop all resources and give us the all-clear to shut down */ - - volatile gboolean do_fsa_stall = FALSE; - volatile long long fsa_input_register = 0; -@@ -362,6 +362,7 @@ s_crmd_fsa_actions(fsa_data_t * fsa_data) - - /* - * Medium priority actions -+ * - Membership - */ - } else if (fsa_actions & A_DC_TAKEOVER) { - do_fsa_action(fsa_data, A_DC_TAKEOVER, do_dc_takeover); -@@ -373,10 +374,6 @@ s_crmd_fsa_actions(fsa_data_t * fsa_data) - do_fsa_action(fsa_data, A_ELECTION_CHECK, do_election_check); - } else if (fsa_actions & A_ELECTION_START) { - do_fsa_action(fsa_data, A_ELECTION_START, do_election_vote); -- } else if (fsa_actions & A_TE_HALT) { -- do_fsa_action(fsa_data, A_TE_HALT, do_te_invoke); -- } else if (fsa_actions & A_TE_CANCEL) { -- do_fsa_action(fsa_data, A_TE_CANCEL, do_te_invoke); - } else if (fsa_actions & A_DC_JOIN_OFFER_ALL) { - do_fsa_action(fsa_data, A_DC_JOIN_OFFER_ALL, do_dc_join_offer_all); - } else if (fsa_actions & A_DC_JOIN_OFFER_ONE) { -@@ -385,24 +382,28 @@ s_crmd_fsa_actions(fsa_data_t * fsa_data) - do_fsa_action(fsa_data, A_DC_JOIN_PROCESS_REQ, do_dc_join_filter_offer); - } else if (fsa_actions & A_DC_JOIN_PROCESS_ACK) { - do_fsa_action(fsa_data, A_DC_JOIN_PROCESS_ACK, do_dc_join_ack); -+ } else if (fsa_actions & A_DC_JOIN_FINALIZE) { -+ do_fsa_action(fsa_data, A_DC_JOIN_FINALIZE, do_dc_join_finalize); -+ } else if (fsa_actions & A_CL_JOIN_ANNOUNCE) { -+ do_fsa_action(fsa_data, A_CL_JOIN_ANNOUNCE, do_cl_join_announce); - - /* - * Low(er) priority actions - * Make sure the CIB is always updated before invoking the - * PE, and the PE before the TE - */ -- } else if (fsa_actions & A_DC_JOIN_FINALIZE) { -- do_fsa_action(fsa_data, A_DC_JOIN_FINALIZE, do_dc_join_finalize); -+ } else if (fsa_actions & A_TE_HALT) { -+ do_fsa_action(fsa_data, A_TE_HALT, do_te_invoke); -+ } else if (fsa_actions & A_TE_CANCEL) { -+ do_fsa_action(fsa_data, A_TE_CANCEL, do_te_invoke); - } else if (fsa_actions & A_LRM_INVOKE) { - do_fsa_action(fsa_data, A_LRM_INVOKE, do_lrm_invoke); - } else if (fsa_actions & A_PE_INVOKE) { - do_fsa_action(fsa_data, A_PE_INVOKE, do_pe_invoke); - } else if (fsa_actions & A_TE_INVOKE) { - do_fsa_action(fsa_data, A_TE_INVOKE, do_te_invoke); -- } else if (fsa_actions & A_CL_JOIN_ANNOUNCE) { -- do_fsa_action(fsa_data, A_CL_JOIN_ANNOUNCE, do_cl_join_announce); - -- /* sub-system stop */ -+ /* Shutdown actions */ - } else if (fsa_actions & A_DC_RELEASED) { - do_fsa_action(fsa_data, A_DC_RELEASED, do_dc_release); - } else if (fsa_actions & A_PE_STOP) { -@@ -489,6 +490,8 @@ do_state_transition(long long actions, - level = LOG_NOTICE; - } else if (cur_state == S_ELECTION) { - level = LOG_NOTICE; -+ } else if (cur_state == S_STARTING) { -+ level = LOG_NOTICE; - } else if (next_state == S_RECOVERY) { - level = LOG_WARNING; - } -diff --git a/crmd/heartbeat.c b/crmd/heartbeat.c -index 568e529..1d63190 100644 ---- a/crmd/heartbeat.c -+++ b/crmd/heartbeat.c -@@ -424,13 +424,15 @@ crmd_client_status_callback(const char *node, const char *client, const char *st - crm_notice("Status update: Client %s/%s now has status [%s] (DC=%s)", - node, client, status, AM_I_DC ? "true" : "false"); - -+ peer = crm_get_peer(0, node); -+ - if (safe_str_eq(status, ONLINESTATUS)) { - /* remove the cached value in case it changed */ - crm_trace("Uncaching UUID for %s", node); -- unget_uuid(node); -+ free(peer->uuid); -+ peer->uuid = NULL; - } - -- peer = crm_get_peer(0, node); - crm_update_peer_proc(__FUNCTION__, peer, crm_proc_crmd, status); - - if (AM_I_DC) { -diff --git a/crmd/join_client.c b/crmd/join_client.c -index aba2d56..70b3246 100644 ---- a/crmd/join_client.c -+++ b/crmd/join_client.c -@@ -263,8 +263,8 @@ do_cl_join_finalize_respond(long long action, - - /* Just in case attrd was still around too */ - if (is_not_set(fsa_input_register, R_SHUTDOWN)) { -- update_attrd(fsa_our_uname, "terminate", NULL, NULL); -- update_attrd(fsa_our_uname, XML_CIB_ATTR_SHUTDOWN, NULL, NULL); -+ update_attrd(fsa_our_uname, "terminate", NULL, NULL, FALSE); -+ update_attrd(fsa_our_uname, XML_CIB_ATTR_SHUTDOWN, NULL, NULL, FALSE); - } - } - -@@ -273,7 +273,7 @@ do_cl_join_finalize_respond(long long action, - - if (AM_I_DC == FALSE) { - register_fsa_input_adv(cause, I_NOT_DC, NULL, A_NOTHING, TRUE, __FUNCTION__); -- update_attrd(NULL, NULL, NULL, NULL); -+ update_attrd(NULL, NULL, NULL, NULL, FALSE); - } - - free_xml(tmp1); -diff --git a/crmd/join_dc.c b/crmd/join_dc.c -index 473e323..b45fff2 100644 ---- a/crmd/join_dc.c -+++ b/crmd/join_dc.c -@@ -40,18 +40,49 @@ static int current_join_id = 0; - unsigned long long saved_ccm_membership_id = 0; - - void -+crm_update_peer_join(const char *source, crm_node_t * node, enum crm_join_phase phase) -+{ -+ enum crm_join_phase last = 0; -+ -+ if(node == NULL) { -+ crm_err("%s: Could not set join-%u to %d for NULL", source, current_join_id, phase); -+ return; -+ } -+ -+ last = node->join; -+ -+ if(phase == last) { -+ crm_trace("%s: Node %s[%u] - join-%u phase still %u", -+ source, node->uname, node->id, current_join_id, last); -+ -+ } else if (phase <= crm_join_none) { -+ node->join = phase; -+ crm_info("%s: Node %s[%u] - join-%u phase %u -> %u", -+ source, node->uname, node->id, current_join_id, last, phase); -+ -+ } else if(phase == last + 1) { -+ node->join = phase; -+ crm_info("%s: Node %s[%u] - join-%u phase %u -> %u", -+ source, node->uname, node->id, current_join_id, last, phase); -+ } else { -+ crm_err("%s: Node %s[%u] - join-%u phase cannot transition from %u to %u", -+ source, node->uname, node->id, current_join_id, last, phase); -+ -+ } -+} -+ -+void - initialize_join(gboolean before) - { - GHashTableIter iter; - crm_node_t *peer = NULL; -- char *key = NULL; - - /* clear out/reset a bunch of stuff */ - crm_debug("join-%d: Initializing join data (flag=%s)", - current_join_id, before ? "true" : "false"); - - g_hash_table_iter_init(&iter, crm_peer_cache); -- while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) &peer)) { -+ while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &peer)) { - crm_update_peer_join(__FUNCTION__, peer, crm_join_none); - } - -@@ -69,16 +100,6 @@ initialize_join(gboolean before) - } - } - --void --erase_node_from_join(const char *uname) --{ -- -- if (uname != NULL) { -- crm_node_t *peer = crm_get_peer(0, uname); -- crm_update_peer_join(__FUNCTION__, peer, crm_join_none); -- } --} -- - static void - join_make_offer(gpointer key, gpointer value, gpointer user_data) - { -@@ -107,7 +128,7 @@ join_make_offer(gpointer key, gpointer value, gpointer user_data) - return; - } - -- erase_node_from_join(join_to); -+ crm_update_peer_join(__FUNCTION__, (crm_node_t*)member, crm_join_none); - - if (crm_is_peer_active(member)) { - crm_node_t *peer = crm_get_peer(0, join_to); -@@ -206,7 +227,7 @@ do_dc_join_offer_one(long long action, - crm_info("join-%d: Processing %s request from %s in state %s", - current_join_id, op, join_to, fsa_state2string(cur_state)); - -- erase_node_from_join(join_to); -+ crm_update_peer_join(__FUNCTION__, member, crm_join_none); - join_make_offer(NULL, member, NULL); - - /* always offer to the DC (ourselves) -@@ -516,7 +537,7 @@ finalize_join_for(gpointer key, gpointer value, gpointer user_data) - crm_trace("Creating node entry for %s", join_to); - - tmp1 = create_xml_node(NULL, XML_CIB_TAG_NODE); -- set_uuid(tmp1, XML_ATTR_UUID, join_to); -+ set_uuid(tmp1, XML_ATTR_UUID, join_node); - crm_xml_add(tmp1, XML_ATTR_UNAME, join_to); - - fsa_cib_anon_update(XML_CIB_TAG_NODES, tmp1, -@@ -617,19 +638,18 @@ do_dc_join_final(long long action, - enum crmd_fsa_input current_input, fsa_data_t * msg_data) - { - crm_debug("Ensuring DC, quorum and node attributes are up-to-date"); -- update_attrd(NULL, NULL, NULL, NULL); -+ update_attrd(NULL, NULL, NULL, NULL, FALSE); - crm_update_quorum(crm_have_quorum, TRUE); - } - - int crmd_join_phase_count(enum crm_join_phase phase) - { - int count = 0; -- const char *key; - crm_node_t *peer; - GHashTableIter iter; - - g_hash_table_iter_init(&iter, crm_peer_cache); -- while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) &peer)) { -+ while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &peer)) { - if(peer->join == phase) { - count++; - } -@@ -639,12 +659,11 @@ int crmd_join_phase_count(enum crm_join_phase phase) - - void crmd_join_phase_log(int level) - { -- const char *key; - crm_node_t *peer; - GHashTableIter iter; - - g_hash_table_iter_init(&iter, crm_peer_cache); -- while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) &peer)) { -+ while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &peer)) { - const char *state = "unknown"; - switch(peer->join) { - case crm_join_nack: -diff --git a/crmd/lrm.c b/crmd/lrm.c -index b2e1a6b..31f00d7 100644 ---- a/crmd/lrm.c -+++ b/crmd/lrm.c -@@ -141,6 +141,7 @@ update_history_cache(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, lrmd_event_ - return; - } - -+ entry->last_callid = op->call_id; - target_rc = rsc_op_expected_rc(op); - if (op->op_status == PCMK_LRM_OP_CANCELLED) { - if (op->interval > 0) { -@@ -255,6 +256,9 @@ do_lrm_control(long long action, - - lrm_state_t *lrm_state = NULL; - -+ if(fsa_our_uname == NULL) { -+ return; /* Nothing to do */ -+ } - lrm_state = lrm_state_find_or_create(fsa_our_uname); - if (lrm_state == NULL) { - register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); -@@ -270,6 +274,7 @@ do_lrm_control(long long action, - } - - clear_bit(fsa_input_register, R_LRM_CONNECTED); -+ crm_info("Disconnecting from the LRM"); - lrm_state_disconnect(lrm_state); - lrm_state_reset_tables(lrm_state); - crm_notice("Disconnected from the LRM"); -@@ -300,7 +305,7 @@ do_lrm_control(long long action, - } - - set_bit(fsa_input_register, R_LRM_CONNECTED); -- crm_debug("LRM connection established"); -+ crm_info("LRM connection established"); - } - - if (action & ~(A_LRM_CONNECT | A_LRM_DISCONNECT)) { -@@ -417,6 +422,7 @@ lrm_state_verify_stopped(lrm_state_t * lrm_state, enum crmd_fsa_state cur_state, - static char * - get_rsc_metadata(const char *type, const char *class, const char *provider) - { -+ int rc = 0; - char *metadata = NULL; - - /* Always use a local connection for this operation */ -@@ -431,7 +437,7 @@ get_rsc_metadata(const char *type, const char *class, const char *provider) - } - - crm_trace("Retreiving metadata for %s::%s:%s", type, class, provider); -- lrm_state_get_metadata(lrm_state, class, provider, type, &metadata, 0); -+ rc = lrm_state_get_metadata(lrm_state, class, provider, type, &metadata, 0); - - if (metadata) { - /* copy the metadata because the LRM likes using -@@ -443,7 +449,7 @@ get_rsc_metadata(const char *type, const char *class, const char *provider) - metadata = m_copy; - - } else { -- crm_warn("No metadata found for %s::%s:%s", type, class, provider); -+ crm_warn("No metadata found for %s::%s:%s: %s (%d)", type, class, provider, pcmk_strerror(rc), rc); - } - - return metadata; -@@ -496,11 +502,11 @@ get_rsc_restart_list(lrmd_rsc_info_t * rsc, lrmd_event_data_t * op) - } - - len = strlen(rsc->type) + strlen(rsc->class) + strlen(provider) + 4; -- /* coverity[returned_null] Ignore */ - key = malloc(len); -- snprintf(key, len, "%s::%s:%s", rsc->type, rsc->class, provider); -- -- reload = g_hash_table_lookup(reload_hash, key); -+ if(key) { -+ snprintf(key, len, "%s::%s:%s", rsc->type, rsc->class, provider); -+ reload = g_hash_table_lookup(reload_hash, key); -+ } - - if (reload && ((now - 9) > reload->last_query) - && safe_str_eq(op->op_type, RSC_START)) { -@@ -518,6 +524,10 @@ get_rsc_restart_list(lrmd_rsc_info_t * rsc, lrmd_event_data_t * op) - key = NULL; - reload->metadata = get_rsc_metadata(rsc->type, rsc->class, provider); - -+ if(reload->metadata == NULL) { -+ goto cleanup; -+ } -+ - metadata = string2xml(reload->metadata); - if (metadata == NULL) { - crm_err("Metadata for %s::%s:%s is not valid XML", -@@ -972,7 +982,7 @@ delete_op_entry(lrm_state_t * lrm_state, lrmd_event_data_t * op, const char *rsc - } - - void --lrm_clear_last_failure(const char *rsc_id) -+lrm_clear_last_failure(const char *rsc_id, const char *node_name) - { - char *attr = NULL; - GHashTableIter iter; -@@ -982,10 +992,17 @@ lrm_clear_last_failure(const char *rsc_id) - - attr = generate_op_key(rsc_id, "last_failure", 0); - -- /* This clears last failure for every lrm state that has this rsc. */ -+ /* This clears last failure for every lrm state that has this rsc.*/ - for (state_entry = lrm_state_list; state_entry != NULL; state_entry = state_entry->next) { - lrm_state_t *lrm_state = state_entry->data; - -+ if (node_name != NULL) { -+ if (strcmp(node_name, lrm_state->node_name) != 0) { -+ /* filter by node_name if node_name is present */ -+ continue; -+ } -+ } -+ - delete_op_entry(lrm_state, NULL, rsc_id, attr, 0); - - if (!lrm_state->resource_history) { -@@ -1001,7 +1018,7 @@ lrm_clear_last_failure(const char *rsc_id) - } - } - free(attr); -- -+ g_list_free(lrm_state_list); - } - - static gboolean -@@ -1192,17 +1209,24 @@ do_lrm_invoke(long long action, - const char *operation = NULL; - ha_msg_input_t *input = fsa_typed_data(fsa_dt_ha_msg); - const char *user_name = NULL; -- const char *remote_node = NULL; -+ const char *target_node = NULL; -+ gboolean is_remote_node = FALSE; - - if (input->xml != NULL) { - /* Remote node operations are routed here to their remote connections */ -- remote_node = crm_element_value(input->xml, XML_LRM_ATTR_TARGET); -+ target_node = crm_element_value(input->xml, XML_LRM_ATTR_TARGET); -+ } -+ if (target_node == NULL) { -+ target_node = fsa_our_uname; -+ } else if (safe_str_neq(target_node, fsa_our_uname)) { -+ is_remote_node = TRUE; - } -- lrm_state = lrm_state_find(remote_node ? remote_node : fsa_our_uname); - -- if (lrm_state == NULL && remote_node) { -+ lrm_state = lrm_state_find(target_node); -+ -+ if (lrm_state == NULL && is_remote_node) { - crm_err("no lrmd connection for remote node %s found on cluster node %s. Can not process request.", -- remote_node, fsa_our_uname); -+ target_node, fsa_our_uname); - return; - } - -@@ -1245,14 +1269,16 @@ do_lrm_invoke(long long action, - * we want to fail. We then pass that event to the lrmd client callback - * so it will be processed as if it actually came from the lrmd. */ - op = construct_op(lrm_state, input->xml, ID(xml_rsc), "asyncmon"); -+ CRM_ASSERT(op != NULL); -+ - free((char *)op->user_data); - op->user_data = NULL; - entry = g_hash_table_lookup(lrm_state->resource_history, op->rsc_id); - /* Make sure the call id is greater than the last successful operation, - * otherwise the failure will not result in a possible recovery of the resource - * as it could appear the failure occurred before the successful start */ -- if (entry && entry->last) { -- op->call_id = entry->last->call_id + 1; -+ if (entry) { -+ op->call_id = entry->last_callid + 1; - if (op->call_id < 0) { - op->call_id = 1; - } -@@ -1260,7 +1286,8 @@ do_lrm_invoke(long long action, - op->interval = 0; - op->op_status = PCMK_LRM_OP_DONE; - op->rc = PCMK_EXECRA_UNKNOWN_ERROR; -- CRM_ASSERT(op != NULL); -+ op->t_run = time(NULL); -+ op->t_rcchange = op->t_run; - - #if ENABLE_ACL - if (user_name && is_privileged(user_name) == FALSE) { -@@ -1297,6 +1324,20 @@ do_lrm_invoke(long long action, - - fsa_cib_update(XML_CIB_TAG_STATUS, fragment, cib_quorum_override, rc, user_name); - crm_info("Forced a local LRM refresh: call=%d", rc); -+ -+ if(strcmp(CRM_SYSTEM_CRMD, from_sys) != 0) { -+ xmlNode *reply = create_request( -+ CRM_OP_INVOKE_LRM, fragment, -+ from_host, from_sys, CRM_SYSTEM_LRMD, fsa_our_uuid); -+ -+ crm_debug("ACK'ing refresh from %s (%s)", from_sys, from_host); -+ -+ if (relay_message(reply, TRUE) == FALSE) { -+ crm_log_xml_err(reply, "Unable to route reply"); -+ } -+ free_xml(reply); -+ } -+ - free_xml(fragment); - - } else if (safe_str_eq(crm_op, CRM_OP_LRM_QUERY)) { -@@ -1311,7 +1352,7 @@ do_lrm_invoke(long long action, - free_xml(data); - - } else if (safe_str_eq(operation, CRM_OP_PROBED)) { -- update_attrd(NULL, CRM_OP_PROBED, XML_BOOLEAN_TRUE, user_name); -+ update_attrd(lrm_state->node_name, CRM_OP_PROBED, XML_BOOLEAN_TRUE, user_name, is_remote_node); - - } else if (safe_str_eq(crm_op, CRM_OP_REPROBE)) { - GHashTableIter gIter; -@@ -1331,7 +1372,20 @@ do_lrm_invoke(long long action, - /* And finally, _delete_ the value in attrd - * Setting it to FALSE results in the PE sending us back here again - */ -- update_attrd(NULL, CRM_OP_PROBED, NULL, user_name); -+ update_attrd(lrm_state->node_name, CRM_OP_PROBED, NULL, user_name, is_remote_node); -+ -+ if(strcmp(CRM_SYSTEM_CRMD, from_sys) != 0) { -+ xmlNode *reply = create_request( -+ CRM_OP_INVOKE_LRM, NULL, -+ from_host, from_sys, CRM_SYSTEM_LRMD, fsa_our_uuid); -+ -+ crm_debug("ACK'ing re-probe from %s (%s)", from_sys, from_host); -+ -+ if (relay_message(reply, TRUE) == FALSE) { -+ crm_log_xml_err(reply, "Unable to route reply"); -+ } -+ free_xml(reply); -+ } - - } else if (operation != NULL) { - lrmd_rsc_info_t *rsc = NULL; -@@ -1428,17 +1482,15 @@ do_lrm_invoke(long long action, - free(op_key); - lrmd_free_event(op); - -- } else if (safe_str_eq(operation, CRMD_ACTION_DELETE)) { -- int cib_rc = pcmk_ok; -- -- CRM_ASSERT(rsc != NULL); -+ } else if (rsc != NULL && safe_str_eq(operation, CRMD_ACTION_DELETE)) { - -- cib_rc = delete_rsc_status(lrm_state, rsc->id, cib_dryrun | cib_sync_call, user_name); -+#if ENABLE_ACL -+ int cib_rc = delete_rsc_status(lrm_state, rsc->id, cib_dryrun | cib_sync_call, user_name); - if (cib_rc != pcmk_ok) { - lrmd_event_data_t *op = NULL; - - crm_err -- ("Attempt of deleting resource status '%s' from CIB for %s (user=%s) on %s failed: (rc=%d) %s", -+ ("Attempted deletion of resource status '%s' from CIB for %s (user=%s) on %s failed: (rc=%d) %s", - rsc->id, from_sys, user_name ? user_name : "unknown", from_host, cib_rc, - pcmk_strerror(cib_rc)); - -@@ -1454,7 +1506,7 @@ do_lrm_invoke(long long action, - lrmd_free_event(op); - return; - } -- -+#endif - delete_resource(lrm_state, rsc->id, rsc, NULL, from_sys, from_host, user_name, input); - - } else if (rsc != NULL) { -@@ -1637,6 +1689,7 @@ verify_stopped(enum crmd_fsa_state cur_state, int log_level) - } - - set_bit(fsa_input_register, R_SENT_RSC_STOP); -+ g_list_free(lrm_state_list); lrm_state_list = NULL; - return res; - } - -@@ -1710,7 +1763,7 @@ do_lrm_rsc_op(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, const char *operat - } - - /* now do the op */ -- crm_debug("Performing key=%s op=%s_%s_%d", transition, rsc->id, operation, op->interval); -+ crm_info("Performing key=%s op=%s_%s_%d", transition, rsc->id, operation, op->interval); - - if (fsa_state != S_NOT_DC && fsa_state != S_POLICY_ENGINE && fsa_state != S_TRANSITION_ENGINE) { - if (safe_str_neq(operation, "fail") -@@ -1839,13 +1892,21 @@ do_update_resource(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, lrmd_event_da - iter = create_xml_node(iter, XML_CIB_TAG_STATE); - - if (safe_str_eq(lrm_state->node_name, fsa_our_uname)) { -- set_uuid(iter, XML_ATTR_UUID, lrm_state->node_name); - uuid = fsa_our_uuid; -+ - } else { - /* remote nodes uuid and uname are equal */ -- crm_xml_add(iter, XML_ATTR_UUID, lrm_state->node_name); - uuid = lrm_state->node_name; -+ crm_xml_add(iter, XML_NODE_IS_REMOTE, "true"); - } -+ -+ CRM_LOG_ASSERT(uuid != NULL); -+ if(uuid == NULL) { -+ rc = -EINVAL; -+ goto done; -+ } -+ -+ crm_xml_add(iter, XML_ATTR_UUID, uuid); - crm_xml_add(iter, XML_ATTR_UNAME, lrm_state->node_name); - crm_xml_add(iter, XML_ATTR_ORIGIN, __FUNCTION__); - -@@ -1871,6 +1932,8 @@ do_update_resource(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, lrmd_event_da - goto cleanup; - } - -+ crm_log_xml_trace(update, __FUNCTION__); -+ - /* make it an asyncronous call and be done with it - * - * Best case: -@@ -1893,7 +1956,7 @@ do_update_resource(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, lrmd_event_da - if (rc > 0) { - last_resource_update = rc; - } -- -+ done: - /* the return code is a call number, not an error code */ - crm_trace("Sent resource state update message: %d for %s=%d on %s", rc, - op->op_type, op->interval, op->rsc_id); -diff --git a/crmd/lrm_state.c b/crmd/lrm_state.c -index 0c15926..2eec178 100644 ---- a/crmd/lrm_state.c -+++ b/crmd/lrm_state.c -@@ -18,6 +18,7 @@ - - #include - #include -+#include - - #include - #include -@@ -26,6 +27,20 @@ - #include - - GHashTable *lrm_state_table = NULL; -+GHashTable *proxy_table = NULL; -+int lrmd_internal_proxy_send(lrmd_t * lrmd, xmlNode *msg); -+void lrmd_internal_set_proxy_callback(lrmd_t * lrmd, void *userdata, void (*callback)(lrmd_t *lrmd, void *userdata, xmlNode *msg)); -+ -+typedef struct remote_proxy_s { -+ char *node_name; -+ char *session_id; -+ -+ gboolean is_local; -+ -+ crm_ipc_t *ipc; -+ mainloop_io_t *source; -+ -+} remote_proxy_t; - - static void - history_cache_destroy(gpointer data) -@@ -70,14 +85,16 @@ free_recurring_op(gpointer value) - lrm_state_t * - lrm_state_create(const char *node_name) - { -+ lrm_state_t *state = NULL; - -- lrm_state_t *state = calloc(1, sizeof(lrm_state_t)); -+ if (!node_name) { -+ crm_err("No node name given for lrm state object"); -+ return NULL; -+ } - -+ state = calloc(1, sizeof(lrm_state_t)); - if (!state) { - return NULL; -- } else if (!node_name) { -- crm_err("No node name given for lrm state object"); -- return NULL; - } - - state->node_name = strdup(node_name); -@@ -102,6 +119,19 @@ lrm_state_destroy(const char *node_name) - g_hash_table_remove(lrm_state_table, node_name); - } - -+static gboolean -+remote_proxy_remove_by_node(gpointer key, gpointer value, gpointer user_data) -+{ -+ remote_proxy_t *proxy = value; -+ const char *node_name = user_data; -+ -+ if (safe_str_eq(node_name, proxy->node_name)) { -+ return TRUE; -+ } -+ -+ return FALSE; -+} -+ - static void - internal_lrm_state_destroy(gpointer data) - { -@@ -111,16 +141,21 @@ internal_lrm_state_destroy(gpointer data) - return; - } - -+ crm_trace("Destroying proxy table with %d members", g_hash_table_size(proxy_table)); -+ g_hash_table_foreach_remove(proxy_table, remote_proxy_remove_by_node, (char *) lrm_state->node_name); - remote_ra_cleanup(lrm_state); - lrmd_api_delete(lrm_state->conn); - - if (lrm_state->resource_history) { -+ crm_trace("Destroying history op cache with %d members", g_hash_table_size(lrm_state->resource_history)); - g_hash_table_destroy(lrm_state->resource_history); - } - if (lrm_state->deletion_ops) { -+ crm_trace("Destroying deletion op cache with %d members", g_hash_table_size(lrm_state->deletion_ops)); - g_hash_table_destroy(lrm_state->deletion_ops); - } - if (lrm_state->pending_ops) { -+ crm_trace("Destroying pending op cache with %d members", g_hash_table_size(lrm_state->pending_ops)); - g_hash_table_destroy(lrm_state->pending_ops); - } - -@@ -143,6 +178,20 @@ lrm_state_reset_tables(lrm_state_t * lrm_state) - } - } - -+static void -+remote_proxy_free(gpointer data) -+{ -+ remote_proxy_t *proxy = data; -+ crm_debug("Signing out of the IPC Service"); -+ -+ if (proxy->source != NULL) { -+ mainloop_del_ipc_client(proxy->source); -+ } -+ -+ free(proxy->node_name); -+ free(proxy->session_id); -+} -+ - gboolean - lrm_state_init_local(void) - { -@@ -156,6 +205,13 @@ lrm_state_init_local(void) - return FALSE; - } - -+ proxy_table = -+ g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, remote_proxy_free); -+ if (!proxy_table) { -+ g_hash_table_destroy(lrm_state_table); -+ return FALSE; -+ } -+ - return TRUE; - } - -@@ -163,7 +219,12 @@ void - lrm_state_destroy_all(void) - { - if (lrm_state_table) { -- g_hash_table_destroy(lrm_state_table); -+ crm_trace("Destroying state table with %d members", g_hash_table_size(lrm_state_table)); -+ g_hash_table_destroy(lrm_state_table); lrm_state_table = NULL; -+ } -+ if(proxy_table) { -+ crm_trace("Destroying proxy table with %d members", g_hash_table_size(proxy_table)); -+ g_hash_table_destroy(proxy_table); proxy_table = NULL; - } - } - -@@ -246,6 +307,221 @@ lrm_state_ipc_connect(lrm_state_t * lrm_state) - return ret; - } - -+static void -+remote_proxy_notify_destroy(lrmd_t *lrmd, const char *session_id) -+{ -+ /* sending to the remote node that an ipc connection has been destroyed */ -+ xmlNode *msg = create_xml_node(NULL, T_LRMD_IPC_PROXY); -+ crm_xml_add(msg, F_LRMD_IPC_OP, "destroy"); -+ crm_xml_add(msg, F_LRMD_IPC_SESSION, session_id); -+ lrmd_internal_proxy_send(lrmd, msg); -+ free_xml(msg); -+} -+ -+static void -+remote_proxy_relay_event(lrmd_t *lrmd, const char *session_id, xmlNode *msg) -+{ -+ /* sending to the remote node an event msg. */ -+ xmlNode *event = create_xml_node(NULL, T_LRMD_IPC_PROXY); -+ crm_xml_add(event, F_LRMD_IPC_OP, "event"); -+ crm_xml_add(event, F_LRMD_IPC_SESSION, session_id); -+ add_message_xml(event, F_LRMD_IPC_MSG, msg); -+ lrmd_internal_proxy_send(lrmd, event); -+ free_xml(event); -+} -+ -+static void -+remote_proxy_relay_response(lrmd_t *lrmd, const char *session_id, xmlNode *msg, int msg_id) -+{ -+ /* sending to the remote node a response msg. */ -+ xmlNode *response = create_xml_node(NULL, T_LRMD_IPC_PROXY); -+ crm_xml_add(response, F_LRMD_IPC_OP, "response"); -+ crm_xml_add(response, F_LRMD_IPC_SESSION, session_id); -+ crm_xml_add_int(response, F_LRMD_IPC_MSG_ID, msg_id); -+ add_message_xml(response, F_LRMD_IPC_MSG, msg); -+ lrmd_internal_proxy_send(lrmd, response); -+ free_xml(response); -+} -+ -+static int -+remote_proxy_dispatch_internal(const char *buffer, ssize_t length, gpointer userdata) -+{ -+ xmlNode *xml = NULL; -+ remote_proxy_t *proxy = userdata; -+ lrm_state_t *lrm_state = lrm_state_find(proxy->node_name); -+ -+ if (lrm_state == NULL) { -+ return 0; -+ } -+ -+ xml = string2xml(buffer); -+ if (xml == NULL) { -+ crm_warn("Received a NULL msg from IPC service."); -+ return 1; -+ } -+ -+ remote_proxy_relay_event(lrm_state->conn, proxy->session_id, xml); -+ free_xml(xml); -+ return 1; -+} -+ -+static void -+remote_proxy_disconnected(void *userdata) -+{ -+ remote_proxy_t *proxy = userdata; -+ lrm_state_t *lrm_state = lrm_state_find(proxy->node_name); -+ -+ crm_trace("destroying %p", userdata); -+ -+ proxy->source = NULL; -+ proxy->ipc = NULL; -+ -+ if (lrm_state && lrm_state->conn) { -+ remote_proxy_notify_destroy(lrm_state->conn, proxy->session_id); -+ } -+ g_hash_table_remove(proxy_table, proxy->session_id); -+} -+ -+static remote_proxy_t * -+remote_proxy_new(const char *node_name, const char *session_id, const char *channel) -+{ -+ static struct ipc_client_callbacks proxy_callbacks = { -+ .dispatch = remote_proxy_dispatch_internal, -+ .destroy = remote_proxy_disconnected -+ }; -+ remote_proxy_t *proxy = calloc(1, sizeof(remote_proxy_t)); -+ -+ proxy->node_name = strdup(node_name); -+ proxy->session_id = strdup(session_id); -+ -+ if (safe_str_eq(channel, CRM_SYSTEM_CRMD)) { -+ proxy->is_local = TRUE; -+ } else { -+ proxy->source = mainloop_add_ipc_client(channel, G_PRIORITY_LOW, 512 * 1024 /* 512k */ , proxy, &proxy_callbacks); -+ proxy->ipc = mainloop_get_ipc_client(proxy->source); -+ -+ if (proxy->source == NULL) { -+ remote_proxy_free(proxy); -+ return NULL; -+ } -+ } -+ -+ g_hash_table_insert(proxy_table, proxy->session_id, proxy); -+ -+ return proxy; -+} -+ -+gboolean -+crmd_is_proxy_session(const char *session) -+{ -+ return g_hash_table_lookup(proxy_table, session) ? TRUE : FALSE; -+} -+ -+void -+crmd_proxy_send(const char *session, xmlNode *msg) -+{ -+ remote_proxy_t *proxy = g_hash_table_lookup(proxy_table, session); -+ lrm_state_t *lrm_state = NULL; -+ -+ if (!proxy) { -+ return; -+ } -+ lrm_state = lrm_state_find(proxy->node_name); -+ if (lrm_state) { -+ remote_proxy_relay_event(lrm_state->conn, session, msg); -+ } -+} -+ -+static void -+crmd_proxy_dispatch(const char *user, -+ const char *session, -+ xmlNode *msg) -+{ -+ -+#if ENABLE_ACL -+ determine_request_user(user, msg, F_CRM_USER); -+#endif -+ crm_log_xml_trace(msg, "CRMd-PROXY[inbound]"); -+ -+ crm_xml_add(msg, F_CRM_SYS_FROM, session); -+ if (crmd_authorize_message(msg, NULL, session)) { -+ route_message(C_IPC_MESSAGE, msg); -+ } -+ -+ trigger_fsa(fsa_source); -+} -+ -+static void -+remote_proxy_cb(lrmd_t *lrmd, void *userdata, xmlNode *msg) -+{ -+ lrm_state_t *lrm_state = userdata; -+ xmlNode *op_reply = NULL; -+ const char *op = crm_element_value(msg, F_LRMD_IPC_OP); -+ const char *session = crm_element_value(msg, F_LRMD_IPC_SESSION); -+ const char *user = crm_element_value(msg, F_LRMD_IPC_USER); -+ int msg_id = 0; -+ -+ /* sessions are raw ipc connections to IPC, -+ * all we do is proxy requests/responses exactly -+ * like they are given to us at the ipc level. */ -+ -+ CRM_CHECK(op != NULL, return); -+ CRM_CHECK(session != NULL, return); -+ -+ crm_element_value_int(msg, F_LRMD_IPC_MSG_ID, &msg_id); -+ -+ /* This is msg from remote ipc client going to real ipc server */ -+ if (safe_str_eq(op, "new")) { -+ const char *channel = crm_element_value(msg, F_LRMD_IPC_IPC_SERVER); -+ -+ CRM_CHECK(channel != NULL, return); -+ -+ if (remote_proxy_new(lrm_state->node_name, session, channel) == NULL) { -+ remote_proxy_notify_destroy(lrmd, session); -+ } -+ crm_info("new remote proxy client established, session id %s", session); -+ } else if (safe_str_eq(op, "destroy")) { -+ g_hash_table_remove(proxy_table, session); -+ -+ } else if (safe_str_eq(op, "request")) { -+ int flags = 0; -+ xmlNode *request = get_message_xml(msg, F_LRMD_IPC_MSG); -+ remote_proxy_t *proxy = g_hash_table_lookup(proxy_table, session); -+ -+ CRM_CHECK(request != NULL, return); -+ -+ if (proxy == NULL) { -+ /* proxy connection no longer exists */ -+ remote_proxy_notify_destroy(lrmd, session); -+ return; -+ } else if ((proxy->is_local == FALSE) && (crm_ipc_connected(proxy->ipc) == FALSE)) { -+ g_hash_table_remove(proxy_table, session); -+ return; -+ } -+ crm_element_value_int(msg, F_LRMD_IPC_MSG_FLAGS, &flags); -+ -+ if (proxy->is_local) { -+ /* this is for the crmd, which we are, so don't try -+ * and connect/send to ourselves over ipc. instead -+ * do it directly. */ -+ if (flags & crm_ipc_client_response) { -+ op_reply = create_xml_node(NULL, "ack"); -+ crm_xml_add(op_reply, "function", __FUNCTION__); -+ crm_xml_add_int(op_reply, "line", __LINE__); -+ } -+ crmd_proxy_dispatch(user, session, request); -+ } else { -+ /* TODO make this async. */ -+ crm_ipc_send(proxy->ipc, request, flags, 10000, &op_reply); -+ } -+ } -+ -+ if (op_reply) { -+ remote_proxy_relay_response(lrmd, session, op_reply, msg_id); -+ free_xml(op_reply); -+ } -+} -+ - int - lrm_state_remote_connect_async(lrm_state_t * lrm_state, const char *server, int port, - int timeout_ms) -@@ -258,6 +534,7 @@ lrm_state_remote_connect_async(lrm_state_t * lrm_state, const char *server, int - return -1; - } - ((lrmd_t *) lrm_state->conn)->cmds->set_callback(lrm_state->conn, remote_lrm_op_callback); -+ lrmd_internal_set_proxy_callback(lrm_state->conn, lrm_state, remote_proxy_cb); - } - - crm_trace("initiating remote connection to %s at %d with timeout %d", server, port, timeout_ms); -diff --git a/crmd/main.c b/crmd/main.c -index 1ae4c7a..749c4b7 100644 ---- a/crmd/main.c -+++ b/crmd/main.c -@@ -62,6 +62,7 @@ main(int argc, char **argv) - int index = 0; - int argerr = 0; - -+ crmd_mainloop = g_main_new(FALSE); - crm_log_init(NULL, LOG_INFO, TRUE, FALSE, argc, argv, FALSE); - crm_set_options(NULL, "[options]", long_options, - "Daemon for aggregating resource and node failures as well as co-ordinating the cluster's response"); -@@ -138,7 +139,6 @@ crmd_init(void) - - if (state == S_PENDING || state == S_STARTING) { - /* Create the mainloop and run it... */ -- crmd_mainloop = g_main_new(FALSE); - crm_trace("Starting %s's mainloop", crm_system_name); - - #ifdef REALTIME_SUPPORT -@@ -163,6 +163,6 @@ crmd_init(void) - exit_code = 1; - } - -- crm_info("[%s] stopped (%d)", crm_system_name, exit_code); -- return crmd_exit(exit_code); -+ crm_info("%u stopped: %s (%d)", getpid(), pcmk_strerror(exit_code), exit_code); -+ return crmd_fast_exit(exit_code); - } -diff --git a/crmd/membership.c b/crmd/membership.c -index 18cd6b9..370d1a2 100644 ---- a/crmd/membership.c -+++ b/crmd/membership.c -@@ -40,48 +40,34 @@ int last_peer_update = 0; - - extern GHashTable *voted; - --struct update_data_s { -- const char *caller; -- xmlNode *parent; -- int flags; --}; -- - extern gboolean check_join_state(enum crmd_fsa_state cur_state, const char *source); - - static void --check_dead_member(const char *uname, GHashTable * members) -+reap_dead_nodes(gpointer key, gpointer value, gpointer user_data) - { -- CRM_CHECK(uname != NULL, return); -- if (members != NULL && g_hash_table_lookup(members, uname) != NULL) { -- crm_err("%s didnt really leave the membership!", uname); -- return; -- } -- -- erase_node_from_join(uname); -- if (voted != NULL) { -- g_hash_table_remove(voted, uname); -- } -+ crm_node_t *node = value; - -- if (safe_str_eq(fsa_our_uname, uname)) { -- crm_err("We're not part of the cluster anymore"); -- register_fsa_input(C_FSA_INTERNAL, I_ERROR, NULL); -+ if (crm_is_peer_active(node) == FALSE) { -+ crm_update_peer_join(__FUNCTION__, node, crm_join_none); - -- } else if (AM_I_DC == FALSE && safe_str_eq(uname, fsa_our_dc)) { -- crm_warn("Our DC node (%s) left the cluster", uname); -- register_fsa_input(C_FSA_INTERNAL, I_ELECTION, NULL); -+ if(node->uname) { -+ if (voted != NULL) { -+ g_hash_table_remove(voted, node->uname); -+ } - -- } else if (fsa_state == S_INTEGRATION || fsa_state == S_FINALIZE_JOIN) { -- check_join_state(fsa_state, __FUNCTION__); -- } --} -+ if (safe_str_eq(fsa_our_uname, node->uname)) { -+ crm_err("We're not part of the cluster anymore"); -+ register_fsa_input(C_FSA_INTERNAL, I_ERROR, NULL); - --static void --reap_dead_nodes(gpointer key, gpointer value, gpointer user_data) --{ -- crm_node_t *node = value; -+ } else if (AM_I_DC == FALSE && safe_str_eq(node->uname, fsa_our_dc)) { -+ crm_warn("Our DC node (%s) left the cluster", node->uname); -+ register_fsa_input(C_FSA_INTERNAL, I_ELECTION, NULL); -+ } -+ } - -- if (crm_is_peer_active(node) == FALSE) { -- check_dead_member(node->uname, NULL); -+ if (fsa_state == S_INTEGRATION || fsa_state == S_FINALIZE_JOIN) { -+ check_join_state(fsa_state, __FUNCTION__); -+ } - fail_incompletable_actions(transition_graph, node->uuid); - } - } -@@ -129,8 +115,13 @@ crmd_node_update_complete(xmlNode * msg, int call_id, int rc, xmlNode * output, - if (rc == pcmk_ok) { - crm_trace("Node update %d complete", call_id); - -+ } else if(call_id < pcmk_ok) { -+ crm_err("Node update failed: %s (%d)", pcmk_strerror(call_id), call_id); -+ crm_log_xml_debug(msg, "failed"); -+ register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); -+ - } else { -- crm_err("Node update %d failed", call_id); -+ crm_err("Node update %d failed: %s (%d)", call_id, pcmk_strerror(rc), rc); - crm_log_xml_debug(msg, "failed"); - register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); - } -@@ -142,7 +133,7 @@ do_update_node_cib(crm_node_t * node, int flags, xmlNode * parent, const char *s - const char *value = NULL; - xmlNode *node_state = create_xml_node(parent, XML_CIB_TAG_STATE); - -- set_uuid(node_state, XML_ATTR_UUID, node->uname); -+ set_uuid(node_state, XML_ATTR_UUID, node); - - if (crm_element_value(node_state, XML_ATTR_UUID) == NULL) { - crm_info("Node update for %s cancelled: no id", node->uname); -@@ -189,37 +180,20 @@ do_update_node_cib(crm_node_t * node, int flags, xmlNode * parent, const char *s - return node_state; - } - --static void --ghash_update_cib_node(gpointer key, gpointer value, gpointer user_data) --{ -- crm_node_t *node = value; -- struct update_data_s *data = (struct update_data_s *)user_data; -- -- do_update_node_cib(node, data->flags, data->parent, data->caller); --} -- --static void --create_cib_node_definition(gpointer key, gpointer value, gpointer user_data) --{ -- crm_node_t *node = value; -- xmlNode *cib_nodes = user_data; -- xmlNode *cib_new_node = NULL; -- -- crm_trace("Creating node entry for %s/%s", node->uname, node->uuid); -- cib_new_node = create_xml_node(cib_nodes, XML_CIB_TAG_NODE); -- crm_xml_add(cib_new_node, XML_ATTR_ID, node->uuid); -- crm_xml_add(cib_new_node, XML_ATTR_UNAME, node->uname); --} - - static void - node_list_update_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) - { -- if (rc != pcmk_ok) { -- fsa_data_t *msg_data = NULL; -+ fsa_data_t *msg_data = NULL; - -- crm_err("CIB Update %d failed: %s", call_id, pcmk_strerror(rc)); -- crm_log_xml_warn(output, "update:failed"); -+ if(call_id < pcmk_ok) { -+ crm_err("Node list update failed: %s (%d)", pcmk_strerror(call_id), call_id); -+ crm_log_xml_debug(msg, "update:failed"); -+ register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); - -+ } else if(rc < pcmk_ok) { -+ crm_err("Node update %d failed: %s (%d)", call_id, pcmk_strerror(rc), rc); -+ crm_log_xml_debug(msg, "update:failed"); - register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); - } - } -@@ -247,11 +221,21 @@ populate_cib_nodes(enum node_update_flags flags, const char *source) - #endif - - if (from_hashtable) { -- /* if(uname_is_uuid()) { */ -- /* g_hash_table_foreach(crm_peer_id_cache, create_cib_node_definition, node_list); */ -- /* } else { */ -- g_hash_table_foreach(crm_peer_cache, create_cib_node_definition, node_list); -- /* } */ -+ GHashTableIter iter; -+ crm_node_t *node = NULL; -+ -+ g_hash_table_iter_init(&iter, crm_peer_cache); -+ while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) { -+ xmlNode *new_node = NULL; -+ -+ crm_trace("Creating node entry for %s/%s", node->uname, node->uuid); -+ if(node->uuid && node->uname) { -+ /* We need both to be valid */ -+ new_node = create_xml_node(node_list, XML_CIB_TAG_NODE); -+ crm_xml_add(new_node, XML_ATTR_ID, node->uuid); -+ crm_xml_add(new_node, XML_ATTR_UNAME, node->uname); -+ } -+ } - } - - crm_trace("Populating section from %s", from_hashtable ? "hashtable" : "cluster"); -@@ -261,20 +245,20 @@ populate_cib_nodes(enum node_update_flags flags, const char *source) - - free_xml(node_list); - -- if (crm_peer_cache != NULL && AM_I_DC) { -+ if (call_id >= pcmk_ok && crm_peer_cache != NULL && AM_I_DC) { - /* - * There is no need to update the local CIB with our values if - * we've not seen valid membership data - */ -- struct update_data_s update_data; -+ GHashTableIter iter; -+ crm_node_t *node = NULL; - - node_list = create_xml_node(NULL, XML_CIB_TAG_STATUS); - -- update_data.caller = source; -- update_data.parent = node_list; -- update_data.flags = flags; -- -- g_hash_table_foreach(crm_peer_cache, ghash_update_cib_node, &update_data); -+ g_hash_table_iter_init(&iter, crm_peer_cache); -+ while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) { -+ do_update_node_cib(node, flags, node_list, source); -+ } - - fsa_cib_update(XML_CIB_TAG_STATUS, node_list, call_options, call_id, NULL); - fsa_register_cib_callback(call_id, FALSE, NULL, crmd_node_update_complete); -@@ -293,7 +277,7 @@ cib_quorum_update_complete(xmlNode * msg, int call_id, int rc, xmlNode * output, - crm_trace("Quorum update %d complete", call_id); - - } else { -- crm_err("Quorum update %d failed", call_id); -+ crm_err("Quorum update %d failed: %s (%d)", call_id, pcmk_strerror(rc), rc); - crm_log_xml_debug(msg, "failed"); - register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); - } -@@ -310,7 +294,7 @@ crm_update_quorum(gboolean quorum, gboolean force_update) - - update = create_xml_node(NULL, XML_TAG_CIB); - crm_xml_add_int(update, XML_ATTR_HAVE_QUORUM, quorum); -- set_uuid(update, XML_ATTR_DC_UUID, fsa_our_uname); -+ crm_xml_add(update, XML_ATTR_DC_UUID, fsa_our_uuid); - - fsa_cib_update(XML_TAG_CIB, update, call_options, call_id, NULL); - crm_debug("Updating quorum status to %s (call=%d)", quorum ? "true" : "false", call_id); -diff --git a/crmd/messages.c b/crmd/messages.c -index 9780090..dec84f9 100644 ---- a/crmd/messages.c -+++ b/crmd/messages.c -@@ -159,7 +159,7 @@ register_fsa_input_adv(enum crmd_fsa_cause cause, enum crmd_fsa_input input, - case C_STARTUP: - crm_err("Copying %s data (from %s)" - " not yet implemented", fsa_cause2string(cause), raised_from); -- crmd_exit(1); -+ crmd_exit(pcmk_err_generic); - break; - } - crm_trace("%s data copied", fsa_cause2string(fsa_data->fsa_cause)); -@@ -256,7 +256,7 @@ delete_fsa_input(fsa_data_t * fsa_data) - if (fsa_data->data != NULL) { - crm_err("Dont know how to free %s data from %s", - fsa_cause2string(fsa_data->fsa_cause), fsa_data->origin); -- crmd_exit(1); -+ crmd_exit(pcmk_err_generic); - } - break; - } -@@ -466,6 +466,10 @@ relay_message(xmlNode * msg, gboolean originated_locally) - #if SUPPORT_COROSYNC - if (is_openais_cluster()) { - dest = text2msg_type(sys_to); -+ -+ if (dest == crm_msg_none || dest > crm_msg_stonith_ng) { -+ dest = crm_msg_crmd; -+ } - } - #endif - ROUTER_RESULT("Message result: External relay"); -@@ -517,7 +521,7 @@ process_hello_message(xmlNode * hello, - } - - gboolean --crmd_authorize_message(xmlNode * client_msg, crm_client_t * curr_client) -+crmd_authorize_message(xmlNode * client_msg, crm_client_t * curr_client, const char *proxy_session) - { - char *client_name = NULL; - char *major_version = NULL; -@@ -526,8 +530,9 @@ crmd_authorize_message(xmlNode * client_msg, crm_client_t * curr_client) - - xmlNode *xml = NULL; - const char *op = crm_element_value(client_msg, F_CRM_TASK); -+ const char *uuid = curr_client ? curr_client->id : proxy_session; - -- if (curr_client == NULL) { -+ if (uuid == NULL) { - crm_warn("Message [%s] not authorized", crm_element_value(client_msg, XML_ATTR_REFERENCE)); - return FALSE; - -@@ -541,7 +546,7 @@ crmd_authorize_message(xmlNode * client_msg, crm_client_t * curr_client) - if (auth_result == TRUE) { - if (client_name == NULL) { - crm_err("Bad client details (client_name=%s, uuid=%s)", -- crm_str(client_name), curr_client->id); -+ crm_str(client_name), uuid); - auth_result = FALSE; - } - } -@@ -559,15 +564,19 @@ crmd_authorize_message(xmlNode * client_msg, crm_client_t * curr_client) - } - - if (auth_result == TRUE) { -- crm_trace("Accepted client %s", crm_client_name(curr_client)); -- curr_client->userdata = strdup(client_name); -+ crm_trace("Accepted client %s", client_name); -+ if (curr_client) { -+ curr_client->userdata = strdup(client_name); -+ } - - crm_trace("Triggering FSA: %s", __FUNCTION__); - mainloop_set_trigger(fsa_source); - - } else { - crm_warn("Rejected client logon request"); -- qb_ipcs_disconnect(curr_client->ipcs); -+ if (curr_client) { -+ qb_ipcs_disconnect(curr_client->ipcs); -+ } - } - - free(minor_version); -@@ -602,26 +611,33 @@ static enum crmd_fsa_input - handle_failcount_op(xmlNode * stored_msg) - { - const char *rsc = NULL; -+ const char *uname = NULL; -+ gboolean is_remote_node = FALSE; - xmlNode *xml_rsc = get_xpath_object("//" XML_CIB_TAG_RESOURCE, stored_msg, LOG_ERR); - - if (xml_rsc) { - rsc = ID(xml_rsc); - } - -+ uname = crm_element_value(stored_msg, XML_LRM_ATTR_TARGET); -+ if (crm_element_value(stored_msg, XML_LRM_ATTR_ROUTER_NODE)) { -+ is_remote_node = TRUE; -+ } -+ - if (rsc) { - char *attr = NULL; - - crm_info("Removing failcount for %s", rsc); - - attr = crm_concat("fail-count", rsc, '-'); -- update_attrd(NULL, attr, NULL, NULL); -+ update_attrd(uname, attr, NULL, NULL, is_remote_node); - free(attr); - - attr = crm_concat("last-failure", rsc, '-'); -- update_attrd(NULL, attr, NULL, NULL); -+ update_attrd(uname, attr, NULL, NULL, is_remote_node); - free(attr); - -- lrm_clear_last_failure(rsc); -+ lrm_clear_last_failure(rsc, uname); - } else { - crm_log_xml_warn(stored_msg, "invalid failcount op"); - } -@@ -766,7 +782,9 @@ handle_request(xmlNode * stored_msg) - crm_notice("Current ping state: %s", fsa_state2string(fsa_state)); - - msg = create_reply(stored_msg, ping); -- relay_message(msg, TRUE); -+ if(msg) { -+ relay_message(msg, TRUE); -+ } - - free_xml(ping); - free_xml(msg); -@@ -851,7 +869,7 @@ handle_shutdown_request(xmlNode * stored_msg) - crm_log_xml_trace(stored_msg, "message"); - - now_s = crm_itoa(now); -- update_attrd(host_from, XML_CIB_ATTR_SHUTDOWN, now_s, NULL); -+ update_attrd(host_from, XML_CIB_ATTR_SHUTDOWN, now_s, NULL, FALSE); - free(now_s); - - /* will be picked up by the TE as long as its running */ -@@ -900,6 +918,9 @@ send_msg_via_ipc(xmlNode * msg, const char *sys) - #endif - do_lrm_invoke(A_LRM_INVOKE, C_IPC_MESSAGE, fsa_state, I_MESSAGE, &fsa_data); - -+ } else if (sys != NULL && crmd_is_proxy_session(sys)) { -+ crmd_proxy_send(sys, msg); -+ - } else { - crm_err("Unknown Sub-system (%s)... discarding message.", crm_str(sys)); - send_ok = FALSE; -diff --git a/crmd/remote_lrmd_ra.c b/crmd/remote_lrmd_ra.c -index 5e51f5e..07cd67c 100644 ---- a/crmd/remote_lrmd_ra.c -+++ b/crmd/remote_lrmd_ra.c -@@ -396,7 +396,7 @@ handle_remote_ra_exec(gpointer user_data) - fsa_cib_delete(XML_CIB_TAG_STATUS, status, cib_quorum_override, rc, NULL); - crm_info("Forced a remote LRM refresh before connection start: call=%d", rc); - crm_log_xml_trace(status, "CLEAR LRM"); -- free(status); -+ free_xml(status); - - rc = handle_remote_ra_start(lrm_state, cmd, cmd->timeout); - if (rc == 0) { -diff --git a/crmd/subsystems.c b/crmd/subsystems.c -index ce12f42..a4d07b3 100644 ---- a/crmd/subsystems.c -+++ b/crmd/subsystems.c -@@ -1,16 +1,16 @@ --/* -+/* - * Copyright (C) 2004 Andrew Beekhof -- * -+ * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. -- * -+ * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. -- * -+ * - * You should have received a copy of the GNU General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -@@ -45,26 +45,19 @@ - #include - - static void --crmdManagedChildDied(GPid pid, gint status, gpointer user_data) -+crmd_child_exit(mainloop_child_t * p, pid_t pid, int core, int signo, int exitcode) - { -- struct crm_subsystem_s *the_subsystem = user_data; -- -- if (WIFSIGNALED(status)) { -- int signo = WTERMSIG(status); -- int core = WCOREDUMP(status); -+ /* struct crm_subsystem_s *the_subsystem = mainloop_child_userdata(p); */ -+ const char *name = mainloop_child_name(p); - -+ if (signo) { - crm_notice("Child process %s terminated with signal %d (pid=%d, core=%d)", -- the_subsystem->name, signo, the_subsystem->pid, core); -- -- } else if (WIFEXITED(status)) { -- int exitcode = WEXITSTATUS(status); -- -- do_crm_log(exitcode == 0 ? LOG_INFO : LOG_ERR, -- "Child process %s exited (pid=%d, rc=%d)", the_subsystem->name, -- the_subsystem->pid, exitcode); -+ name, signo, pid, core); - - } else { -- crm_err("Process %s:[%d] exited?", the_subsystem->name, the_subsystem->pid); -+ do_crm_log(exitcode == 0 ? LOG_INFO : LOG_ERR, -+ "Child process %s exited (pid=%d, rc=%d)", name, -+ pid, exitcode); - } - } - -@@ -150,7 +143,7 @@ start_subsystem(struct crm_subsystem_s * the_subsystem) - return FALSE; - - default: /* Parent */ -- g_child_watch_add(pid, crmdManagedChildDied, the_subsystem); -+ mainloop_child_add(pid, 0, the_subsystem->name, the_subsystem, crmd_child_exit); - crm_trace("Client %s is has pid: %d", the_subsystem->name, pid); - the_subsystem->pid = pid; - return TRUE; -@@ -185,6 +178,5 @@ start_subsystem(struct crm_subsystem_s * the_subsystem) - /* Should not happen */ - crm_perror(LOG_ERR, "FATAL: Cannot exec %s", the_subsystem->command); - -- crmd_exit(100); /* Suppress respawning */ -- return TRUE; /* never reached */ -+ return crm_exit(DAEMON_RESPAWN_STOP); /* Suppress respawning */ - } -diff --git a/crmd/te_actions.c b/crmd/te_actions.c -index 895a809..b533f58 100644 ---- a/crmd/te_actions.c -+++ b/crmd/te_actions.c -@@ -69,10 +69,6 @@ send_stonith_update(crm_action_t * action, const char *target, const char *uuid) - CRM_CHECK(target != NULL, return); - CRM_CHECK(uuid != NULL, return); - -- if (get_node_uuid(0, target) == NULL) { -- set_node_uuid(target, uuid); -- } -- - /* Make sure the membership and join caches are accurate */ - peer = crm_get_peer(0, target); - if (peer->uuid == NULL) { -@@ -82,7 +78,7 @@ send_stonith_update(crm_action_t * action, const char *target, const char *uuid) - crm_update_peer_proc(__FUNCTION__, peer, crm_proc_none, NULL); - crm_update_peer_state(__FUNCTION__, peer, CRM_NODE_LOST, 0); - crm_update_peer_expected(__FUNCTION__, peer, CRMD_JOINSTATE_DOWN); -- erase_node_from_join(target); -+ crm_update_peer_join(__FUNCTION__, peer, crm_join_none); - - node_state = - do_update_node_cib(peer, -@@ -464,7 +460,9 @@ te_rsc_command(crm_graph_t * graph, crm_action_t * action) - } - - value = crm_meta_value(action->params, XML_OP_ATTR_PENDING); -- if (crm_is_true(value) && safe_str_neq(task, CRMD_ACTION_CANCEL)) { -+ if (crm_is_true(value) -+ && safe_str_neq(task, CRMD_ACTION_CANCEL) -+ && safe_str_neq(task, CRMD_ACTION_DELETE)) { - /* write a "pending" entry to the CIB, inhibit notification */ - crm_debug("Recording pending op %s in the CIB", task_uuid); - cib_action_update(action, PCMK_LRM_OP_PENDING, PCMK_EXECRA_STATUS_UNKNOWN); -@@ -504,11 +502,15 @@ notify_crmd(crm_graph_t * graph) - case tg_restart: - type = "restart"; - if (fsa_state == S_TRANSITION_ENGINE) { -- if (transition_timer->period_ms > 0) { -- crm_timer_stop(transition_timer); -- crm_timer_start(transition_timer); -- } else if (too_many_st_failures() == FALSE) { -- event = I_PE_CALC; -+ if (too_many_st_failures() == FALSE) { -+ if (transition_timer->period_ms > 0) { -+ crm_timer_stop(transition_timer); -+ crm_timer_start(transition_timer); -+ } else { -+ event = I_PE_CALC; -+ } -+ } else { -+ event = I_TE_SUCCESS; - } - - } else if (fsa_state == S_POLICY_ENGINE) { -diff --git a/crmd/te_callbacks.c b/crmd/te_callbacks.c -index a91f192..4e09d71 100644 ---- a/crmd/te_callbacks.c -+++ b/crmd/te_callbacks.c -@@ -1,16 +1,16 @@ --/* -+/* - * Copyright (C) 2004 Andrew Beekhof -- * -+ * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. -- * -+ * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. -- * -+ * - * You should have received a copy of the GNU General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -@@ -63,7 +63,7 @@ process_resource_updates(xmlXPathObject * xpathObj) - - - */ -- int lpc = 0, max = xpathObj->nodesetval->nodeNr; -+ int lpc = 0, max = numXpathResults(xpathObj); - - for (lpc = 0; lpc < max; lpc++) { - xmlNode *rsc_op = getXpathResult(xpathObj, lpc); -@@ -76,6 +76,7 @@ process_resource_updates(xmlXPathObject * xpathObj) - void - te_update_diff(const char *event, xmlNode * msg) - { -+ int lpc, max; - int rc = -1; - const char *op = NULL; - -@@ -119,7 +120,7 @@ te_update_diff(const char *event, xmlNode * msg) - crm_debug("Processing diff (%s): %d.%d.%d -> %d.%d.%d (%s)", op, - diff_del_admin_epoch, diff_del_epoch, diff_del_updates, - diff_add_admin_epoch, diff_add_epoch, diff_add_updates, fsa_state2string(fsa_state)); -- log_cib_diff(LOG_DEBUG_2, diff, op); -+ log_cib_diff(LOG_DEBUG_2, diff, __FUNCTION__); - - if (cib_config_changed(NULL, NULL, &diff)) { - abort_transition(INFINITY, tg_restart, "Non-status change", diff); -@@ -130,79 +131,72 @@ te_update_diff(const char *event, xmlNode * msg) - xpathObj = - xpath_search(diff, - "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//" XML_CIB_TAG_TICKETS); -- if (xpathObj && xpathObj->nodesetval->nodeNr > 0) { -+ if (numXpathResults(xpathObj) > 0) { - xmlNode *aborted = getXpathResult(xpathObj, 0); - - abort_transition(INFINITY, tg_restart, "Ticket attribute: update", aborted); - goto bail; - -- } else if (xpathObj) { -- xmlXPathFreeObject(xpathObj); - } -+ freeXpathObject(xpathObj); - - /* Tickets Attributes - Removed */ - xpathObj = - xpath_search(diff, - "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_REMOVED "//" XML_CIB_TAG_TICKETS); -- if (xpathObj && xpathObj->nodesetval->nodeNr > 0) { -+ if (numXpathResults(xpathObj) > 0) { - xmlNode *aborted = getXpathResult(xpathObj, 0); - - abort_transition(INFINITY, tg_restart, "Ticket attribute: removal", aborted); - goto bail; -- -- } else if (xpathObj) { -- xmlXPathFreeObject(xpathObj); - } -+ freeXpathObject(xpathObj); - - /* Transient Attributes - Added/Updated */ - xpathObj = - xpath_search(diff, - "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//" - XML_TAG_TRANSIENT_NODEATTRS "//" XML_CIB_TAG_NVPAIR); -- if (xpathObj && xpathObj->nodesetval->nodeNr > 0) { -- int lpc; -+ max = numXpathResults(xpathObj); - -- for (lpc = 0; lpc < xpathObj->nodesetval->nodeNr; lpc++) { -- xmlNode *attr = getXpathResult(xpathObj, lpc); -- const char *name = crm_element_value(attr, XML_NVPAIR_ATTR_NAME); -- const char *value = NULL; -- -- if (safe_str_eq(CRM_OP_PROBED, name)) { -- value = crm_element_value(attr, XML_NVPAIR_ATTR_VALUE); -- } -+ for (lpc = 0; lpc < max; lpc++) { -+ xmlNode *attr = getXpathResult(xpathObj, lpc); -+ const char *name = crm_element_value(attr, XML_NVPAIR_ATTR_NAME); -+ const char *value = NULL; - -- if (crm_is_true(value) == FALSE) { -- abort_transition(INFINITY, tg_restart, "Transient attribute: update", attr); -- crm_log_xml_trace(attr, "Abort"); -- goto bail; -- } -+ if (safe_str_eq(CRM_OP_PROBED, name)) { -+ value = crm_element_value(attr, XML_NVPAIR_ATTR_VALUE); - } - -- } else if (xpathObj) { -- xmlXPathFreeObject(xpathObj); -+ if (crm_is_true(value) == FALSE) { -+ abort_transition(INFINITY, tg_restart, "Transient attribute: update", attr); -+ crm_log_xml_trace(attr, "Abort"); -+ goto bail; -+ } - } - -+ freeXpathObject(xpathObj); -+ - /* Transient Attributes - Removed */ - xpathObj = - xpath_search(diff, - "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_REMOVED "//" - XML_TAG_TRANSIENT_NODEATTRS); -- if (xpathObj && xpathObj->nodesetval->nodeNr > 0) { -+ if (numXpathResults(xpathObj) > 0) { - xmlNode *aborted = getXpathResult(xpathObj, 0); - - abort_transition(INFINITY, tg_restart, "Transient attribute: removal", aborted); - goto bail; - -- } else if (xpathObj) { -- xmlXPathFreeObject(xpathObj); - } -+ freeXpathObject(xpathObj); - - /* - * Check for and fast-track the processing of LRM refreshes - * In large clusters this can result in _huge_ speedups - * - * Unfortunately we can only do so when there are no pending actions -- * Otherwise we could miss updates we're waiting for and stall -+ * Otherwise we could miss updates we're waiting for and stall - * - */ - xpathObj = NULL; -@@ -213,84 +207,71 @@ te_update_diff(const char *event, xmlNode * msg) - XML_LRM_TAG_RESOURCE); - } - -- if (xpathObj) { -- int updates = xpathObj->nodesetval->nodeNr; -- -- if (updates > 1) { -- /* Updates by, or in response to, TE actions will never contain updates -- * for more than one resource at a time -- */ -- crm_debug("Detected LRM refresh - %d resources updated: Skipping all resource events", -- updates); -- crm_log_xml_trace(diff, "lrm-refresh"); -- abort_transition(INFINITY, tg_restart, "LRM Refresh", NULL); -- goto bail; -- } -- xmlXPathFreeObject(xpathObj); -+ max = numXpathResults(xpathObj); -+ if (max > 1) { -+ /* Updates by, or in response to, TE actions will never contain updates -+ * for more than one resource at a time -+ */ -+ crm_debug("Detected LRM refresh - %d resources updated: Skipping all resource events", max); -+ crm_log_xml_trace(diff, "lrm-refresh"); -+ abort_transition(INFINITY, tg_restart, "LRM Refresh", NULL); -+ goto bail; - } -+ freeXpathObject(xpathObj); - - /* Process operation updates */ - xpathObj = - xpath_search(diff, - "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//" XML_LRM_TAG_RSC_OP); -- if (xpathObj) { -+ if (numXpathResults(xpathObj)) { - process_resource_updates(xpathObj); -- xmlXPathFreeObject(xpathObj); - } -+ freeXpathObject(xpathObj); - - /* Detect deleted (as opposed to replaced or added) actions - eg. crm_resource -C */ - xpathObj = xpath_search(diff, "//" XML_TAG_DIFF_REMOVED "//" XML_LRM_TAG_RSC_OP); -- if (xpathObj) { -- int lpc = 0, max = xpathObj->nodesetval->nodeNr; -- -- for (lpc = 0; lpc < max; lpc++) { -- int max = 0; -- const char *op_id = NULL; -- char *rsc_op_xpath = NULL; -- xmlXPathObject *op_match = NULL; -- xmlNode *match = getXpathResult(xpathObj, lpc); -- -- CRM_CHECK(match != NULL, continue); -- -- op_id = ID(match); -- -- max = strlen(rsc_op_template) + strlen(op_id) + 1; -- rsc_op_xpath = calloc(1, max); -- snprintf(rsc_op_xpath, max, rsc_op_template, op_id); -- -- op_match = xpath_search(diff, rsc_op_xpath); -- if (op_match == NULL || op_match->nodesetval->nodeNr == 0) { -- /* Prevent false positives by matching cancelations too */ -- const char *node = get_node_id(match); -- crm_action_t *cancelled = get_cancel_action(op_id, node); -- -- if (cancelled == NULL) { -- crm_debug("No match for deleted action %s (%s on %s)", rsc_op_xpath, op_id, -- node); -- abort_transition(INFINITY, tg_restart, "Resource op removal", match); -- if (op_match) { -- xmlXPathFreeObject(op_match); -- } -- free(rsc_op_xpath); -- goto bail; -- -- } else { -- crm_debug("Deleted lrm_rsc_op %s on %s was for graph event %d", -- op_id, node, cancelled->id); -- } -- } -+ max = numXpathResults(xpathObj); -+ for (lpc = 0; lpc < max; lpc++) { -+ int path_max = 0; -+ const char *op_id = NULL; -+ char *rsc_op_xpath = NULL; -+ xmlXPathObject *op_match = NULL; -+ xmlNode *match = getXpathResult(xpathObj, lpc); -+ -+ CRM_CHECK(match != NULL, continue); -+ -+ op_id = ID(match); -+ -+ path_max = strlen(rsc_op_template) + strlen(op_id) + 1; -+ rsc_op_xpath = calloc(1, path_max); -+ snprintf(rsc_op_xpath, path_max, rsc_op_template, op_id); -+ -+ op_match = xpath_search(diff, rsc_op_xpath); -+ if (numXpathResults(op_match) == 0) { -+ /* Prevent false positives by matching cancelations too */ -+ const char *node = get_node_id(match); -+ crm_action_t *cancelled = get_cancel_action(op_id, node); -+ -+ if (cancelled == NULL) { -+ crm_debug("No match for deleted action %s (%s on %s)", rsc_op_xpath, op_id, -+ node); -+ abort_transition(INFINITY, tg_restart, "Resource op removal", match); -+ freeXpathObject(op_match); -+ free(rsc_op_xpath); -+ goto bail; - -- if (op_match) { -- xmlXPathFreeObject(op_match); -+ } else { -+ crm_debug("Deleted lrm_rsc_op %s on %s was for graph event %d", -+ op_id, node, cancelled->id); - } -- free(rsc_op_xpath); - } -+ -+ freeXpathObject(op_match); -+ free(rsc_op_xpath); - } - - bail: -- if (xpathObj) { -- xmlXPathFreeObject(xpathObj); -- } -+ freeXpathObject(xpathObj); - } - - gboolean -@@ -323,13 +304,13 @@ process_te_message(xmlNode * msg, xmlNode * xml_data) - crm_debug("Processing (N)ACK %s from %s", crm_element_value(msg, F_CRM_REFERENCE), from); - - xpathObj = xpath_search(xml_data, "//" XML_LRM_TAG_RSC_OP); -- if (xpathObj) { -+ if (numXpathResults(xpathObj)) { - process_resource_updates(xpathObj); -- xmlXPathFreeObject(xpathObj); -- xpathObj = NULL; -+ freeXpathObject(xpathObj); - - } else { - crm_log_xml_err(msg, "Invalid (N)ACK"); -+ freeXpathObject(xpathObj); - return FALSE; - } - -@@ -345,6 +326,7 @@ process_te_message(xmlNode * msg, xmlNode * xml_data) - GHashTable *stonith_failures = NULL; - struct st_fail_rec { - int count; -+ int last_rc; - }; - - gboolean -@@ -363,12 +345,52 @@ too_many_st_failures(void) - if (value->count > 10) { - crm_notice("Too many failures to fence %s (%d), giving up", key, value->count); - return TRUE; -+ } else if (value->last_rc == -ENODEV) { -+ crm_notice("No devices found in cluster to fence %s, giving up", key); -+ return TRUE; - } - } - return FALSE; - } - - void -+st_fail_count_reset(const char *target) -+{ -+ struct st_fail_rec *rec = NULL; -+ -+ if (stonith_failures) { -+ rec = g_hash_table_lookup(stonith_failures, target); -+ } -+ -+ if (rec) { -+ rec->count = 0; -+ rec->last_rc = 0; -+ } -+} -+ -+static void -+st_fail_count_increment(const char *target, int rc) -+{ -+ struct st_fail_rec *rec = NULL; -+ -+ if (stonith_failures == NULL) { -+ stonith_failures = -+ g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, free); -+ } -+ -+ rec = g_hash_table_lookup(stonith_failures, target); -+ if (rec) { -+ rec->count++; -+ } else { -+ rec = malloc(sizeof(struct st_fail_rec)); -+ rec->count = 1; -+ g_hash_table_insert(stonith_failures, strdup(target), rec); -+ } -+ rec->last_rc = rc; -+ -+} -+ -+void - tengine_stonith_callback(stonith_t * stonith, stonith_callback_data_t * data) - { - char *uuid = NULL; -@@ -376,7 +398,6 @@ tengine_stonith_callback(stonith_t * stonith, stonith_callback_data_t * data) - int stonith_id = -1; - int transition_id = -1; - crm_action_t *action = NULL; -- struct st_fail_rec *rec = NULL; - int call_id = data->call_id; - int rc = data->rc; - char *userdata = data->userdata; -@@ -413,10 +434,6 @@ tengine_stonith_callback(stonith_t * stonith, stonith_callback_data_t * data) - } - - stop_te_timer(action->timer); -- if (stonith_failures == NULL) { -- stonith_failures = -- g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, free); -- } - - if (rc == pcmk_ok) { - const char *target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); -@@ -429,10 +446,7 @@ tengine_stonith_callback(stonith_t * stonith, stonith_callback_data_t * data) - send_stonith_update(action, target, uuid); - } - } -- rec = g_hash_table_lookup(stonith_failures, target); -- if (rec) { -- rec->count = 0; -- } -+ st_fail_count_reset(target); - - } else { - const char *target = crm_element_value_const(action->xml, XML_LRM_ATTR_TARGET); -@@ -445,14 +459,7 @@ tengine_stonith_callback(stonith_t * stonith, stonith_callback_data_t * data) - abort_transition(INFINITY, tg_restart, "Stonith failed", NULL); - } - -- rec = g_hash_table_lookup(stonith_failures, target); -- if (rec) { -- rec->count++; -- } else { -- rec = malloc(sizeof(struct st_fail_rec)); -- rec->count = 1; -- g_hash_table_insert(stonith_failures, strdup(target), rec); -- } -+ st_fail_count_increment(target, rc); - } - - update_graph(transition_graph, action); -@@ -539,7 +546,7 @@ action_timer_callback(gpointer data) - - if (timer->action->type != action_type_rsc) { - send_update = FALSE; -- } else if (safe_str_eq(task, "cancel")) { -+ } else if (safe_str_eq(task, RSC_CANCEL)) { - /* we dont need to update the CIB with these */ - send_update = FALSE; - } -diff --git a/crmd/te_events.c b/crmd/te_events.c -index e289a8b..521cef6 100644 ---- a/crmd/te_events.c -+++ b/crmd/te_events.c -@@ -97,8 +97,34 @@ fail_incompletable_actions(crm_graph_t * graph, const char *down_node) - return FALSE; - } - -+static const char * -+get_uname_from_event(xmlNode * event) -+{ -+ xmlNode *node = event; -+ -+ while (node != NULL && safe_str_neq(XML_CIB_TAG_STATE, TYPE(node))) { -+ node = node->parent; -+ } -+ -+ CRM_CHECK(node != NULL, return NULL); -+ return crm_element_value(node, XML_ATTR_UNAME); -+} -+ -+static gboolean -+get_is_remote_from_event(xmlNode * event) -+{ -+ xmlNode *node = event; -+ -+ while (node != NULL && safe_str_neq(XML_CIB_TAG_STATE, TYPE(node))) { -+ node = node->parent; -+ } -+ -+ CRM_CHECK(node != NULL, return FALSE); -+ return crm_element_value(node, XML_NODE_IS_REMOTE) ? TRUE : FALSE; -+} -+ - static gboolean --update_failcount(xmlNode * event, const char *event_node, int rc, int target_rc, gboolean do_update) -+update_failcount(xmlNode * event, const char *event_node_uuid, int rc, int target_rc, gboolean do_update) - { - int interval = 0; - -@@ -108,7 +134,8 @@ update_failcount(xmlNode * event, const char *event_node, int rc, int target_rc, - - const char *value = NULL; - const char *id = crm_element_value(event, XML_LRM_ATTR_TASK_KEY); -- const char *on_uname = get_uname(event_node); -+ const char *on_uname = get_uname_from_event(event); -+ const char *origin = crm_element_value(event, XML_ATTR_ORIGIN); - - if (rc == 99) { - /* this is an internal code for "we're busy, try again" */ -@@ -118,6 +145,12 @@ update_failcount(xmlNode * event, const char *event_node, int rc, int target_rc, - return FALSE; - } - -+ if (safe_str_eq(origin, "build_active_RAs")) { -+ crm_debug("No update for %s (rc=%d) on %s: Old failure from lrm status refresh", -+ id, rc, on_uname); -+ return FALSE; -+ } -+ - if (failed_stop_offset == NULL) { - failed_stop_offset = strdup(INFINITY_S); - } -@@ -126,7 +159,12 @@ update_failcount(xmlNode * event, const char *event_node, int rc, int target_rc, - failed_start_offset = strdup(INFINITY_S); - } - -- CRM_CHECK(on_uname != NULL, return TRUE); -+ if (on_uname == NULL) { -+ /* uname not in event, check cache */ -+ on_uname = crm_peer_uname(event_node_uuid); -+ CRM_CHECK(on_uname != NULL, return TRUE); -+ } -+ - CRM_CHECK(parse_op_key(id, &rsc_id, &task, &interval), crm_err("Couldn't parse: %s", ID(event)); - goto bail); - CRM_CHECK(task != NULL, goto bail); -@@ -160,16 +198,17 @@ update_failcount(xmlNode * event, const char *event_node, int rc, int target_rc, - - if (do_update) { - char *now = crm_itoa(time(NULL)); -+ gboolean is_remote_node = get_is_remote_from_event(event); - - crm_warn("Updating failcount for %s on %s after failed %s:" - " rc=%d (update=%s, time=%s)", rsc_id, on_uname, task, rc, value, now); - - attr_name = crm_concat("fail-count", rsc_id, '-'); -- update_attrd(on_uname, attr_name, value, NULL); -+ update_attrd(on_uname, attr_name, value, NULL, is_remote_node); - free(attr_name); - - attr_name = crm_concat("last-failure", rsc_id, '-'); -- update_attrd(on_uname, attr_name, now, NULL); -+ update_attrd(on_uname, attr_name, now, NULL, is_remote_node); - free(attr_name); - - free(now); -diff --git a/crmd/te_utils.c b/crmd/te_utils.c -index 0bac2f7..1e37d9a 100644 ---- a/crmd/te_utils.c -+++ b/crmd/te_utils.c -@@ -1,16 +1,16 @@ --/* -+/* - * Copyright (C) 2004 Andrew Beekhof -- * -+ * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. -- * -+ * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. -- * -+ * - * You should have received a copy of the GNU General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -@@ -90,7 +90,9 @@ tengine_stonith_connection_destroy(stonith_t * st, stonith_event_t * e) - } - - /* cbchan will be garbage at this point, arrange for it to be reset */ -- stonith_api->state = stonith_disconnected; -+ if(stonith_api) { -+ stonith_api->state = stonith_disconnected; -+ } - - if (AM_I_DC) { - fail_incompletable_stonith(transition_graph); -@@ -102,24 +104,64 @@ tengine_stonith_connection_destroy(stonith_t * st, stonith_event_t * e) - # include - #endif - -+char *te_client_id = NULL; -+ -+#ifdef HAVE_SYS_REBOOT_H -+# include -+# include -+#endif -+ - static void - tengine_stonith_notify(stonith_t * st, stonith_event_t * st_event) - { -+ if(te_client_id == NULL) { -+ te_client_id = g_strdup_printf("%s.%d", crm_system_name, getpid()); -+ } -+ - if (st_event == NULL) { - crm_err("Notify data not found"); - return; - } - - if (st_event->result == pcmk_ok && crm_str_eq(st_event->target, fsa_our_uname, TRUE)) { -- crm_err("We were alegedly just fenced by %s for %s!", st_event->executioner, -- st_event->origin); -- register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, NULL, NULL, __FUNCTION__); -+ crm_crit("We were alegedly just fenced by %s for %s with %s!", st_event->executioner, -+ st_event->origin, st_event->device); /* Dumps blackbox if enabled */ -+ -+ qb_log_fini(); /* Try to get the above log message to disk - somehow */ -+ -+ /* Get out ASAP and do not come back up. -+ * -+ * Triggering a reboot is also not the worst idea either since -+ * the rest of the cluster thinks we're safely down -+ */ -+ -+#ifdef RB_HALT_SYSTEM -+ reboot(RB_HALT_SYSTEM); -+#endif -+ -+ /* -+ * If reboot() fails or is not supported, coming back up will -+ * probably lead to a situation where the other nodes set our -+ * status to 'lost' because of the fencing callback and will -+ * discard subsequent election votes with: -+ * -+ * Election 87 (current: 5171, owner: 103): Processed vote from east-03 (Peer is not part of our cluster) -+ * -+ * So just stay dead, something is seriously messed up anyway. -+ * -+ */ -+ exit(100); /* None of our wrappers since we already called qb_log_fini() */ - return; - } - -+ if (st_event->result == pcmk_ok && -+ safe_str_eq(st_event->operation, T_STONITH_NOTIFY_FENCE)) { -+ st_fail_count_reset(st_event->target); -+ } -+ - crm_notice("Peer %s was%s terminated (%s) by %s for %s: %s (ref=%s) by client %s", - st_event->target, st_event->result == pcmk_ok ? "" : " not", -- st_event->operation, -+ st_event->action, - st_event->executioner ? st_event->executioner : "", - st_event->origin, pcmk_strerror(st_event->result), st_event->id, - st_event->client_origin ? st_event->client_origin : ""); -@@ -144,11 +186,27 @@ tengine_stonith_notify(stonith_t * st, stonith_event_t * st_event) - } - #endif - -- if (st_event->result == pcmk_ok) { -- gboolean we_are_executioner = safe_str_eq(st_event->executioner, fsa_our_uname); -+ if (st_event->result == pcmk_ok) { -+ crm_node_t *peer = crm_get_peer(0, st_event->target); -+ const char *uuid = crm_peer_uuid(peer); -+ gboolean we_are_executioner = safe_str_eq(st_event->executioner, fsa_our_uname); - - crm_trace("target=%s dc=%s", st_event->target, fsa_our_dc); -- if (fsa_our_dc == NULL || safe_str_eq(fsa_our_dc, st_event->target)) { -+ if(AM_I_DC) { -+ /* The DC always sends updates */ -+ send_stonith_update(NULL, st_event->target, uuid); -+ -+ if (st_event->client_origin && safe_str_neq(st_event->client_origin, te_client_id)) { -+ -+ /* Abort the current transition graph if it wasn't us -+ * that invoked stonith to fence someone -+ */ -+ crm_info("External fencing operation from %s fenced %s", st_event->client_origin, st_event->target); -+ abort_transition(INFINITY, tg_restart, "External Fencing Operation", NULL); -+ } -+ -+ /* Assume it was our leader if we dont currently have one */ -+ } else if (fsa_our_dc == NULL || safe_str_eq(fsa_our_dc, st_event->target)) { - crm_notice("Target %s our leader %s (recorded: %s)", - fsa_our_dc ? "was" : "may have been", st_event->target, - fsa_our_dc ? fsa_our_dc : ""); -@@ -158,26 +216,18 @@ tengine_stonith_notify(stonith_t * st, stonith_event_t * st_event) - * have them do so too after the election - */ - if (we_are_executioner) { -- const char *uuid = get_uuid(st_event->target); -- - send_stonith_update(NULL, st_event->target, uuid); - } - stonith_cleanup_list = g_list_append(stonith_cleanup_list, strdup(st_event->target)); - -- } else if (AM_I_DC && -- st_event->client_origin && -- safe_str_neq(st_event->client_origin, crm_system_name)) { -- const char *uuid = get_uuid(st_event->target); -- -- /* If a remote process outside of pacemaker invoked stonith to -- * fence someone, report the fencing result to the cib -- * and abort the transition graph. */ -- crm_info("External fencing operation from %s fenced %s", st_event->client_origin, -- st_event->target); -- send_stonith_update(NULL, st_event->target, uuid); -- abort_transition(INFINITY, tg_restart, "External Fencing Operation", NULL); - } -- } -+ -+ /* Everyone records them as safely down */ -+ crm_update_peer_proc(__FUNCTION__, peer, crm_proc_none, NULL); -+ crm_update_peer_state(__FUNCTION__, peer, CRM_NODE_LOST, 0); -+ crm_update_peer_expected(__FUNCTION__, peer, CRMD_JOINSTATE_DOWN); -+ crm_update_peer_join(__FUNCTION__, peer, crm_join_none); -+ } - } - - gboolean -@@ -307,8 +357,8 @@ te_graph_trigger(gpointer user_data) - void - trigger_graph_processing(const char *fn, int line) - { -- mainloop_set_trigger(transition_trigger); - crm_trace("%s:%d - Triggered graph processing", fn, line); -+ mainloop_set_trigger(transition_trigger); - } - - void -@@ -327,32 +377,44 @@ abort_transition_graph(int abort_priority, enum transition_action abort_action, - int diff_del_updates = 0; - int diff_del_epoch = 0; - int diff_del_admin_epoch = 0; -+ -+ const char *uname = ""; -+ xmlNode *search = reason; - xmlNode *diff = get_xpath_object("//" F_CIB_UPDATE_RESULT "//diff", reason, LOG_DEBUG_2); - - magic = crm_element_value(reason, XML_ATTR_TRANSITION_MAGIC); - -+ while(search) { -+ const char *kind = TYPE(search); -+ if(safe_str_eq(XML_CIB_TAG_STATE, kind) -+ || safe_str_eq(XML_CIB_TAG_NODE, kind)) { -+ uname = crm_peer_uname(ID(search)); -+ } -+ search = search->parent; -+ } -+ - if (diff) { - cib_diff_version_details(diff, - &diff_add_admin_epoch, &diff_add_epoch, &diff_add_updates, - &diff_del_admin_epoch, &diff_del_epoch, &diff_del_updates); - if (crm_str_eq(TYPE(reason), XML_CIB_TAG_NVPAIR, TRUE)) { - crm_info -- ("%s:%d - Triggered transition abort (complete=%d, tag=%s, id=%s, name=%s, value=%s, magic=%s, cib=%d.%d.%d) : %s", -- fn, line, transition_graph->complete, TYPE(reason), ID(reason), NAME(reason), -+ ("%s:%d - Triggered transition abort (complete=%d, node=%s, tag=%s, id=%s, name=%s, value=%s, magic=%s, cib=%d.%d.%d) : %s", -+ fn, line, transition_graph->complete, uname, TYPE(reason), ID(reason), NAME(reason), - VALUE(reason), magic ? magic : "NA", diff_add_admin_epoch, diff_add_epoch, - diff_add_updates, abort_text); - } else { - crm_info -- ("%s:%d - Triggered transition abort (complete=%d, tag=%s, id=%s, magic=%s, cib=%d.%d.%d) : %s", -- fn, line, transition_graph->complete, TYPE(reason), ID(reason), -+ ("%s:%d - Triggered transition abort (complete=%d, node=%s, tag=%s, id=%s, magic=%s, cib=%d.%d.%d) : %s", -+ fn, line, transition_graph->complete, uname, TYPE(reason), ID(reason), - magic ? magic : "NA", diff_add_admin_epoch, diff_add_epoch, diff_add_updates, - abort_text); - } - - } else { - crm_info -- ("%s:%d - Triggered transition abort (complete=%d, tag=%s, id=%s, magic=%s) : %s", -- fn, line, transition_graph->complete, TYPE(reason), ID(reason), -+ ("%s:%d - Triggered transition abort (complete=%d, node=%s, tag=%s, id=%s, magic=%s) : %s", -+ fn, line, transition_graph->complete, uname, TYPE(reason), ID(reason), - magic ? magic : "NA", abort_text); - } - -@@ -388,7 +450,7 @@ abort_transition_graph(int abort_priority, enum transition_action abort_action, - if (transition_timer->period_ms > 0) { - crm_timer_stop(transition_timer); - crm_timer_start(transition_timer); -- } else if (too_many_st_failures() == FALSE) { -+ } else { - register_fsa_input(C_FSA_INTERNAL, I_PE_CALC, NULL); - } - return; -diff --git a/crmd/tengine.c b/crmd/tengine.c -index 9ff458c..8e236f1 100644 ---- a/crmd/tengine.c -+++ b/crmd/tengine.c -@@ -106,10 +106,6 @@ do_te_control(long long action, - te_uuid = crm_generate_uuid(); - crm_info("Registering TE UUID: %s", te_uuid); - -- if (transition_trigger == NULL) { -- transition_trigger = mainloop_add_trigger(G_PRIORITY_LOW, te_graph_trigger, NULL); -- } -- - if (pcmk_ok != - fsa_cib_conn->cmds->add_notify_callback(fsa_cib_conn, T_CIB_DIFF_NOTIFY, te_update_diff)) { - crm_err("Could not set CIB notification callback"); -diff --git a/crmd/utils.c b/crmd/utils.c -index d06b785..07c71b2 100644 ---- a/crmd/utils.c -+++ b/crmd/utils.c -@@ -131,6 +131,7 @@ crm_timer_popped(gpointer data) - - if (timer == election_trigger && election_trigger->counter > 5) { - crm_notice("We appear to be in an election loop, something may be wrong"); -+ crm_write_blackbox(0, NULL); - election_trigger->counter = 0; - } - -@@ -983,7 +984,7 @@ update_dc(xmlNode * msg) - crm_info("Set DC to %s (%s)", crm_str(fsa_our_dc), crm_str(fsa_our_dc_version)); - - } else if (last_dc != NULL) { -- crm_debug("Unset DC. Was %s", crm_str(last_dc)); -+ crm_info("Unset DC. Was %s", crm_str(last_dc)); - } - - free(last_dc); -@@ -1018,12 +1019,36 @@ erase_status_tag(const char *uname, const char *tag, int options) - - crm_ipc_t *attrd_ipc = NULL; - -+static int -+update_without_attrd(const char *host_uuid, const char *name, const char *value, const char *user_name) -+{ -+ if (fsa_cib_conn == NULL) { -+ return -1; -+ } -+ -+ crm_trace("updating status for host_uuid %s, %s=%s", host_uuid, name ? name : "", value ? value : ""); -+ return update_attr_delegate(fsa_cib_conn, cib_none, XML_CIB_TAG_STATUS, host_uuid, NULL, NULL, -+ NULL, name, value, FALSE, user_name); -+} -+ - void --update_attrd(const char *host, const char *name, const char *value, const char *user_name) -+update_attrd(const char *host, const char *name, const char *value, const char *user_name, gboolean is_remote_node) - { - gboolean rc; - int max = 5; - -+ /* TODO eventually we will want to update/replace the attrd with -+ * something that can handle remote nodes as well as cluster nodes */ -+ if (is_remote_node) { -+ /* host is required for updating a remote node */ -+ CRM_CHECK(host != NULL, return;); -+ /* remote node uname and uuid are equal */ -+ if (update_without_attrd(host, name, value, user_name) < pcmk_ok) { -+ crm_err("Could not update attribute %s for remote-node %s", name, host); -+ } -+ return; -+ } -+ - if (attrd_ipc == NULL) { - attrd_ipc = crm_ipc_new(T_ATTRD, 0); - } -@@ -1037,10 +1062,10 @@ update_attrd(const char *host, const char *name, const char *value, const char * - - rc = attrd_update_delegate(attrd_ipc, 'U', host, name, value, XML_CIB_TAG_STATUS, NULL, - NULL, user_name); -- if (rc > 0) { -+ if (rc == pcmk_ok) { - break; - -- } else if (rc != -EAGAIN && rc != -EREMOTEIO) { -+ } else if (rc != -EAGAIN && rc != -EALREADY) { - crm_info("Disconnecting from attrd: %s (%d)", pcmk_strerror(rc), rc); - crm_ipc_close(attrd_ipc); - } -@@ -1049,7 +1074,7 @@ update_attrd(const char *host, const char *name, const char *value, const char * - - } while (max--); - -- if (rc < 0) { -+ if (rc != pcmk_ok) { - if (name) { - crm_err("Could not send attrd %s update%s: %s (%d)", - name, is_set(fsa_input_register, R_SHUTDOWN) ? " at shutdown" : "", -diff --git a/doc/Clusters_from_Scratch/en-US/Revision_History.xml b/doc/Clusters_from_Scratch/en-US/Revision_History.xml -index 59e961f..19dd319 100644 ---- a/doc/Clusters_from_Scratch/en-US/Revision_History.xml -+++ b/doc/Clusters_from_Scratch/en-US/Revision_History.xml -@@ -8,43 +8,43 @@ - - - -- 1 -+ 1-0 - Mon May 17 2010 - AndrewBeekhofandrew@beekhof.net - Import from Pages.app - - -- 2 -+ 2-0 - Wed Sep 22 2010 - RaoulScarazzinirasca@miamammausalinux.org - Italian translation - - -- 3 -+ 3-0 - Wed Feb 9 2011 - AndrewBeekhofandrew@beekhof.net - Updated for Fedora 13 - - -- 4 -+ 4-0 - Wed Oct 5 2011 - AndrewBeekhofandrew@beekhof.net - Update the GFS2 section to use CMAN - - -- 5 -+ 5-0 - Fri Feb 10 2012 - AndrewBeekhofandrew@beekhof.net - Generate docbook content from asciidoc sources - - -- 6 -+ 6-0 - Tues July 3 2012 - AndrewBeekhofandrew@beekhof.net - Updated for Fedora 17 - - -- 7 -+ 7-0 - Fri Sept 14 2012 - DavidVosseldvossel@redhat.com - Updated for pcs -diff --git a/doc/Makefile.am b/doc/Makefile.am -index 1661df6..663315e 100644 ---- a/doc/Makefile.am -+++ b/doc/Makefile.am -@@ -7,12 +7,12 @@ - # modify it under the terms of the GNU General Public License - # as published by the Free Software Foundation; either version 2 - # of the License, or (at your option) any later version. --# -+# - # This program is distributed in the hope that it will be useful, - # but WITHOUT ANY WARRANTY; without even the implied warranty of - # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - # GNU General Public License for more details. --# -+# - # You should have received a copy of the GNU General Public License - # along with this program; if not, write to the Free Software - # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -@@ -22,12 +22,12 @@ MAINTAINERCLEANFILES = Makefile.in - helpdir = $(datadir)/$(PACKAGE) - - ascii = crm_fencing.txt acls.txt --docbook = Pacemaker_Explained Clusters_from_Scratch -+docbook = Pacemaker_Explained Clusters_from_Scratch Pacemaker_Remote - doc_DATA = README.hb2openais $(ascii) $(generated_docs) - - publican_docs = --generated_docs = --generated_mans = -+generated_docs = -+generated_mans = - - DOCBOOK_FORMATS := html-desktop - DOCBOOK_LANGS := en-US -@@ -35,8 +35,10 @@ DOTs = $(wildcard */en-US/images/*.dot) - SVG = $(wildcard */en-US/images/pcmk-*.svg) $(DOTs:%.dot=%.svg) - - PNGS = $(SVG:%.svg=%-small.png) $(SVG:%.svg=%.png) $(SVG:%.svg=%-large.png) \ -- Pacemaker_Explained/en-US/images/Policy-Engine-big.png -- Pacemaker_Explained/en-US/images/Policy-Engine-small.png -+ Pacemaker_Explained/en-US/images/Policy-Engine-big.png \ -+ Pacemaker_Explained/en-US/images/Policy-Engine-small.png \ -+ Pacemaker_Remote/en-US/images/pcmk-ha-cluster-stack.png \ -+ Pacemaker_Remote/en-US/images/pcmk-ha-remote-stack.png - - BRAND_PNGS = publican-clusterlabs/en-US/images/title_logo.png \ - publican-clusterlabs/en-US/images/image_left.png \ -@@ -62,7 +64,7 @@ publican_docs += $(docbook) - endif - endif - --EXTRA_DIST = $(docbook:%=%.xml) -+EXTRA_DIST = $(docbook:%=%.xml) - - %.html: %.txt - $(ASCIIDOC) --unsafe --backend=xhtml11 $< -@@ -80,11 +82,11 @@ EXTRA_DIST = $(docbook:%=%.xml) - sed -i.sed 's///' $@ - sed -i.sed 's/ //' $@ # Fix line endings - sed -i.sed 's/\ lang="en"//' $@ # Never specify a language in the chapters -- sed -i.sed 's/simpara/para/g' $@ # publican doesn't correctly render footnotes with simpara -+ sed -i.sed 's/simpara/para/g' $@ # publican doesn't correctly render footnotes with simpara - sed -i.sed 's/.*.*//g' $@ # Remove dangling tag - sed -i.sed 's/.*preface>//g' $@ # Remove preface elements - sed -i.sed 's:::g' $@ # Remove empty title -- sed -i.sed 's/chapter/section/g' $@ # Chapters become sections, so that books can become chapters -+ sed -i.sed 's/chapter/section/g' $@ # Chapters become sections, so that books can become chapters - sed -i.sed 's/<.*bookinfo.*>//g' $@ # Strip out bookinfo, we don't need it - -grep -qis "//' $@ # We just want the appendix tag - -grep -vqis "/chapter>/g' $@ # Rename to chapter -@@ -94,7 +96,7 @@ CFS_TXT=$(wildcard Clusters_from_Scratch/en-US/*.txt) - CFS_XML=$(CFS_TXT:%.txt=%.xml) - - # We have to hardcode the book name --# With '%' the test for 'newness' fails -+# With '%' the test for 'newness' fails - Clusters_from_Scratch.build: $(PNGS) $(wildcard Clusters_from_Scratch/en-US/*.xml) $(CFS_XML) - @echo Building $(@:%.build=%) because of $? - rm -rf $(@:%.build=%)/publish/* -@@ -106,7 +108,7 @@ PE_TXT=$(wildcard Pacemaker_Explained/en-US/*.txt) - PE_XML=$(PE_TXT:%.txt=%.xml) - - # We have to hardcode the book name --# With '%' the test for 'newness' fails -+# With '%' the test for 'newness' fails - Pacemaker_Explained.build: $(PNGS) $(wildcard Pacemaker_Explained/en-US/*.xml) $(PE_XML) - @echo Building $(@:%.build=%) because of $? - rm -rf $(@:%.build=%)/publish/* -@@ -114,6 +116,19 @@ Pacemaker_Explained.build: $(PNGS) $(wildcard Pacemaker_Explained/en-US/*.xml) $ - rm -rf $(@:%.build=%)/tmp - touch $@ - -+ -+PR_TXT=$(wildcard Pacemaker_Remote/en-US/*.txt) -+PR_XML=$(PR_TXT:%.txt=%.xml) -+ -+# We have to hardcode the book name -+# With '%' the test for 'newness' fails -+Pacemaker_Remote.build: $(PNGS) $(wildcard Pacemaker_Remote/en-US/*.xml) $(PR_XML) -+ @echo Building $(@:%.build=%) because of $? -+ rm -rf $(@:%.build=%)/publish/* -+ cd $(@:%.build=%) && RPM_BUILD_DIR="" $(PUBLICAN) build --publish --langs=$(DOCBOOK_LANGS) --formats=$(DOCBOOK_FORMATS) -+ rm -rf $(@:%.build=%)/tmp -+ touch $@ -+ - # Update the translation template - pot: - for book in $(docbook); do \ -@@ -161,7 +176,7 @@ pdf: - make DOCBOOK_FORMATS="pdf" ASCIIDOC_CLI_TYPE=$(ASCIIDOC_CLI_TYPE) all-local - - # Make sure www-(pcs|crmsh) happen in serial --www: -+www: - make www-pcs - make www-crmsh - make $(generated_docs) $(ascii) -@@ -183,7 +198,6 @@ www-cli: - if BUILD_DOCBOOK - for book in $(docbook); do \ - echo Uploading $$book...; \ -- echo "Requires Corosync 2.x and optimized for the $(ASCIIDOC_CLI_TYPE) CLI
" > $$book/publish/build-$(PACKAGE_SERIES)-$(ASCIIDOC_CLI_TYPE).txt; \ - echo "Generated on `date` from version: $(shell git log --pretty="format:%h %d" -n 1)" >> $$book/publish/build-$(PACKAGE_SERIES)-$(ASCIIDOC_CLI_TYPE).txt; \ - for lang in `ls -1 $$book/publish | grep [a-z][a-z]-[A-Z][A-Z]`; do \ - mv $$book/publish/$$lang/Pacemaker/$(PACKAGE_SERIES)-$(ASCIIDOC_CLI_TYPE)/epub/$$book/Pacemaker-1.1{-$(ASCIIDOC_CLI_TYPE),}-$$book-$$lang.epub; \ -@@ -195,7 +209,7 @@ if BUILD_DOCBOOK - endif - - clean-local: -- -rm -rf $(generated_docs) $(generated_mans) $(docbook_build) $(CFS_XML) $(PE_XML) -+ -rm -rf $(generated_docs) $(generated_mans) $(docbook_build) $(CFS_XML) $(PE_XML) $(PR_XML) - for book in $(docbook); do rm -rf $$book/tmp $$book/publish; done - - foo: -diff --git a/doc/Pacemaker_Explained/en-US/Ch-Constraints.txt b/doc/Pacemaker_Explained/en-US/Ch-Constraints.txt -index e199bf5..2acb9fe 100644 ---- a/doc/Pacemaker_Explained/en-US/Ch-Constraints.txt -+++ b/doc/Pacemaker_Explained/en-US/Ch-Constraints.txt -@@ -484,6 +484,49 @@ limit to the number of sets that can be specified. - .Visual representation of the start order for the three sets defined above - image::images/three-sets.png["Three ordered sets",width="16cm",height="7.5cm",align="center"] - -+ -+== Resource Set OR Logic == -+ -+The unordered set logic discussed so far has all been "AND" logic. -+To illustrate this take the 3 resource set figure in the previous section. -+Those sets can be expressed, +(A and B) then (C) then (D) then (E and F)+ -+ -+Say for example we want change the first set, (A and B), to use "OR" logic -+so the sets look like this, +(A or B) then (C) then (D) then (E and F)+. -+This functionality can be achieved through the use of the +require-all+ -+option. By default this option is 'require-all=true' which is why the -+"AND" logic is used by default. Changing +require-all=false+ means only one -+resource in the set needs to be started before continuing on to the next set. -+ -+Note that the 'require-all=false' option only makes sense to use in conjunction -+with unordered sets, 'sequential=false'. Think of it like this, 'sequential=false' -+modifies the set to be an unordered set that uses "AND" logic by default, by adding -+'require-all=false' the unordered set's "AND" logic is flipped to "OR" logic. -+ -+.Resource Set "OR" logic. Three ordered sets, where the first set is internally unordered with "OR" logic. -+====== -+[source,XML] -+------- -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+------- -+====== -+ -+ - [[s-resource-sets-collocation]] - == Collocating Sets of Resources == - -diff --git a/doc/Pacemaker_Explained/en-US/Ch-Options.txt b/doc/Pacemaker_Explained/en-US/Ch-Options.txt -index 0f46bbd..7b7d2db 100644 ---- a/doc/Pacemaker_Explained/en-US/Ch-Options.txt -+++ b/doc/Pacemaker_Explained/en-US/Ch-Options.txt -@@ -7,7 +7,7 @@ with the rest of cluster options is simply a matter of parsing. These - options are used by the configuration database which is, by design, - mostly ignorant of the content it holds. So the decision was made to - place them in an easy to find location. -- -+ - == Configuration Version == - - indexterm:[Configuration Version,Cluster] -@@ -34,7 +34,7 @@ _Never set this value to zero_, in such cases the cluster cannot tell - the difference between your configuration and the "empty" one used - when nothing is found on disk. - --| epoch | -+| epoch | - indexterm:[epoch,Cluster Option] - indexterm:[Cluster,Option,epoch] - Incremented every time the configuration is updated (usually by the admin) -@@ -43,7 +43,7 @@ Incremented every time the configuration is updated (usually by the admin) - indexterm:[num_updates,Cluster Option] - indexterm:[Cluster,Option,num_updates] - Incremented every time the configuration or status is updated (usually by the cluster) -- -+ - |========================================================= - - == Other Fields == -@@ -51,7 +51,7 @@ Incremented every time the configuration or status is updated (usually by the cl - [width="95%",cols="2m,5<",options="header",align="center"] - |========================================================= - |Field |Description -- -+ - | validate-with | - indexterm:[validate-with,Cluster Option] - indexterm:[Cluster,Option,validate-with] -@@ -59,7 +59,7 @@ Determines the type of validation being done on the configuration. If - set to "none", the cluster will not verify that updates conform to the - DTD (nor reject ones that don't). This option can be useful when - operating a mixed version cluster during an upgrade. -- -+ - |========================================================= - - == Fields Maintained by the Cluster == -@@ -69,7 +69,7 @@ operating a mixed version cluster during an upgrade. - |========================================================= - |Field |Description - --|cib-last-written | -+|cib-last-written | - indexterm:[cib-last-written,Cluster Property] - indexterm:[Cluster,Property,cib-last-written] - Indicates when the configuration was last written to disk. Informational purposes only. -@@ -87,7 +87,28 @@ indexterm:[Cluster,Property,have-quorum] - Indicates if the cluster has quorum. If false, this may mean that the - cluster cannot start resources or fence other nodes. See - +no-quorum-policy+ below. -- -+ -+| dc-version | -+indexterm:[dc-version,Cluster Peroperty] -+indexterm:[Cluster,Peroperty,dc-version] -+Version of Pacemaker on the cluster's DC. -+ -+Often includes the hash which identifies the exact Git changeset it -+was built from. Used for diagnostic purposes. -+ -+| cluster-infrastructure | -+indexterm:[cluster-infrastructure,Cluster Peroperty] -+indexterm:[Cluster,Peroperty,cluster-infrastructure] -+The messaging stack on which Pacemaker is currently running. -+Used for informational and diagnostic purposes. -+ -+| expected-quorum-votes | -+indexterm:[expected-quorum-votes,Cluster Peroperty] -+indexterm:[Cluster,Peroperty,expected-quorum-votes] -+The number of nodes expected to be in the cluster -+ -+Used to calculate quorum in Corosync 1.x (not CMAN) based clusters. -+ - |========================================================= - - Note that although these fields can be written to by the admin, in -@@ -130,7 +151,7 @@ For now we will describe the simple case where each option is present at most on - [width="95%",cols="5m,2,11<",options="header",align="center"] - |========================================================= - |Option |Default |Description -- -+ - | batch-limit | 30 | - indexterm:[batch-limit,Cluster Option] - indexterm:[Cluster,Option,batch-limit] -@@ -157,7 +178,7 @@ What to do when the cluster does not have quorum. Allowed values: - - * suicide - fence all nodes in the affected cluster partition - --| symmetric-cluster | TRUE | -+| symmetric-cluster | TRUE | - indexterm:[symmetric-cluster,Cluster Option] - indexterm:[Cluster,Option,symmetric-cluster] - Can all resources run on any node by default? -@@ -170,7 +191,7 @@ shot? If you value your data, set up a STONITH device and enable this. - - If true, or unset, the cluster will refuse to start resources unless - one or more STONITH resources have been configured also. -- -+ - | stonith-action | reboot | - indexterm:[stonith-action,Cluster Option] - indexterm:[Cluster,Option,stonith-action] -@@ -185,28 +206,33 @@ Round trip delay over the network (excluding action execution). The - "correct" value will depend on the speed and load of your network and - cluster nodes. - --| stop-orphan-resources | TRUE | -+| stop-all-resources | FALSE | -+indexterm:[stop-all-resources,Cluster Option] -+indexterm:[Cluster,Option,stop-all-resources] -+Should the cluster stop all stop -+ -+| resources-orphan-resources | TRUE | - indexterm:[stop-orphan-resources,Cluster Option] - indexterm:[Cluster,Option,stop-orphan-resources] - Should deleted resources be stopped? - --| stop-orphan-actions | TRUE | -+| stop-orphan-actions | TRUE | - indexterm:[stop-orphan-actions,Cluster Option] - indexterm:[Cluster,Option,stop-orphan-actions] - Should deleted actions be cancelled? - --| start-failure-is-fatal | TRUE | -+| start-failure-is-fatal | TRUE | - indexterm:[start-failure-is-fatal,Cluster Option] - indexterm:[Cluster,Option,start-failure-is-fatal] - When set to FALSE, the cluster will instead use the resource's - +failcount+ and value for +resource-failure-stickiness+. - --| pe-error-series-max | -1 (all) | -+| pe-error-series-max | -1 (all) | - indexterm:[pe-error-series-max,Cluster Option] - indexterm:[Cluster,Option,pe-error-series-max] - The number of PE inputs resulting in ERRORs to save. Used when reporting problems. - --| pe-warn-series-max | -1 (all) | -+| pe-warn-series-max | -1 (all) | - indexterm:[pe-warn-series-max,Cluster Option] - indexterm:[Cluster,Option,pe-warn-series-max] - The number of PE inputs resulting in WARNINGs to save. Used when reporting problems. -@@ -215,12 +241,87 @@ The number of PE inputs resulting in WARNINGs to save. Used when reporting probl - indexterm:[pe-input-series-max,Cluster Option] - indexterm:[Cluster,Option,pe-input-series-max] - The number of "normal" PE inputs to save. Used when reporting problems. -- -+ -+|default-resource-stickiness | 0 | -+indexterm:[default-resource-stickiness,Cluster Option] -+indexterm:[Cluster,Option,default-resource-stickiness] -++Deprecated:+ See <> instead -+ -+| is-managed-default | TRUE | -+indexterm:[is-managed-default,Cluster Option] -+indexterm:[Cluster,Option,is-managed-default] -++Deprecated:+ See <> instead -+ -+| maintenance-mode | FALSE | -+indexterm:[maintenance-mode,Cluster Option] -+indexterm:[Cluster,Option,maintenance-mode] -+Should the cluster monitor resources and start/stop them as required -+ -+| stonith-timeout | 60s | -+indexterm:[stonith-timeout,Cluster Option] -+indexterm:[Cluster,Option,stonith-timeout] -+How long to wait for the STONITH action to complete -+ -+| default-action-timeout | 20s | -+indexterm:[default-action-timeout,Cluster Option] -+indexterm:[Cluster,Option,default-action-timeout] -++Deprecated:+ See <> instead -+ -+| dc-deadtime | 20s | -+indexterm:[dc-deadtime,Cluster Option] -+indexterm:[Cluster,Option,dc-deadtime] -+How long to wait for a response from other nodes during startup. -+ -+The "correct" value will depend on the speed/load of your network and the type of switches used. -+ -+| cluster-recheck-interval | 15min | -+indexterm:[cluster-recheck-interval,Cluster Option] -+indexterm:[Cluster,Option,cluster-recheck-interval] -+Polling interval for time based changes to options, resource parameters and constraints. -+ -+The Cluster is primarily event driven, however the configuration can have elements that change based on time. To ensure these changes take effect, we can optionally poll the cluster's status for changes. -+ -+Allowed values: Zero disables polling. Positive values are an interval in seconds (unless other SI units are specified. eg. 5min) -+ -+| election-timeout | 2min | -+indexterm:[election-timeout,Cluster Option] -+indexterm:[Cluster,Option,election-timeout] -++Advanced Use Only+ -+ -+If need to adjust this value, it probably indicates the presence of a bug. -+ -+| shutdown-escalation | 20min | -+indexterm:[shutdown-escalation,Cluster Option] -+indexterm:[Cluster,Option,shutdown-escalation] -++Advanced Use Only+ -+ -+If need to adjust this value, it probably indicates the presence of a bug. -+ -+| crmd-integration-timeout | 3min | -+indexterm:[crmd-integration-timeout,Cluster Option] -+indexterm:[Cluster,Option,crmd-integration-timeout] -++Advanced Use Only+ -+ -+If need to adjust this value, it probably indicates the presence of a bug. -+ -+| crmd-finalization-timeout | 30min | -+indexterm:[crmd-finalization-timeout,Cluster Option] -+indexterm:[Cluster,Option,crmd-finalization-timeout] -++Advanced Use Only+ -+ -+If need to adjust this value, it probably indicates the presence of a bug. -+ -+| crmd-transition-delay | | -+indexterm:[crmd-transition-delay,Cluster Option] -+indexterm:[Cluster,Option,crmd-transition-delay] -++Advanced Use Only+ Enabling this option will slow down cluster recovery under all conditions. -+ -+Delay cluster recovery for the configured interval to allow for additional/related events to occur. Useful if your configuration is sensitive to the order in which ping updates arrive. -+ - |========================================================= - - You can always obtain an up-to-date list of cluster options, including --their default values, by running the `pengine --metadata` command. -+their default values, by running the `man pengine` and `man crmd` commands. - - == Querying and Setting Cluster Options == - -diff --git a/doc/Pacemaker_Explained/en-US/Ch-Resources.txt b/doc/Pacemaker_Explained/en-US/Ch-Resources.txt -index 8eacb05..3436bf8 100644 ---- a/doc/Pacemaker_Explained/en-US/Ch-Resources.txt -+++ b/doc/Pacemaker_Explained/en-US/Ch-Resources.txt -@@ -12,7 +12,7 @@ The cluster doesn't need to understand how the resource works because - it relies on the resource agent to do the right thing when given a - +start+, +stop+ or +monitor+ command. - --For this reason it is crucial that resource agents are well tested. -+For this reason it is crucial that resource agents are well tested. - - Typically resource agents come in the form of shell scripts, however - they can be written using any technology (such as C, Python or Perl) -@@ -23,7 +23,7 @@ that the author is comfortable with. - - indexterm:[Resource,class] - --There are five classes of agents supported by Pacemaker: -+There are six classes of agents supported by Pacemaker: - - * OCF - * LSB -@@ -31,6 +31,7 @@ There are five classes of agents supported by Pacemaker: - * Systemd - * Fencing - * Service -+* Nagios - - indexterm:[Resource,Heartbeat] - indexterm:[Heartbeat,Resources] -@@ -83,7 +84,7 @@ of as ip it will be passed to the script as +OCF_RESKEY_ip+. The - number and purpose of the parameters is completely arbitrary, however - your script should advertise any that it supports using the - +meta-data+ command. -- -+ - - The OCF class is the most preferred one as it is an industry standard, - highly flexible (allowing parameters to be passed to agents in a -@@ -183,6 +184,23 @@ There is also an additional class, STONITH, which is used exclusively - for fencing related resources. This is discussed later in - <>. - -+=== Nagios Plugins === -+indexterm:[Resource,Nagios Plugins] -+indexterm:[Nagios Plugins,Resources] -+ -+Nagios plugins allow us to monitor services on the remote hosts. -+http://nagiosplugins.org[Nagios Plugins]. -+ -+Pacemaker is able to do remote monitoring with the plugins _if they are -+present_. -+ -+An use case is to configure them as resources belonging to a resource -+container, which usually is a VM, and the container will be restarted -+if any of them has failed. While they can also be configured as ordinary -+resources to be just used for monitoring hosts or services via network. -+ -+The supported parameters are same as the long options of a nagios plugin. -+ - [[primitive-resource]] - == Resource Properties == - -@@ -235,7 +253,7 @@ might produce: - - ===== - --[NOTE] -+[NOTE] - ===== - One of the main drawbacks to system services (such as LSB, Systemd and - Upstart) resources is that they do not allow any parameters! -@@ -267,7 +285,7 @@ behave and can be easily set using the `--meta` option of the - |Field - |Default - |Description -- -+ - |priority - |+0+ - |If not all resources can be active, the cluster will stop lower -@@ -356,6 +374,22 @@ indexterm:[Resource,Option,target-role] - indexterm:[multiple-active,Resource Option] - indexterm:[Resource,Option,multiple-active] - -+|remote-node -+|++ (disabled) -+|The name of the remote-node this resource defines. This both enables the resource as a remote-node and defines the unique name used to identify the remote-node. If no other parameters are set, this value will also be assumed as the hostname to connect to at port 3121. +WARNING+ This value cannot overlap with any resource or node IDs. -+ -+|remote-port -+|+3121+ -+|Configure a custom port to use for the guest connection to pacemaker_remote. -+ -+|remote-addr -+|+remote-node+ value used as hostname -+|The ip address or hostname to connect to if remote-node's name is not the hostname of the guest. -+ -+|+remote-connect-timeout+ -+|+60s+ -+|How long before a pending guest connection will time out. -+ - |========================================================= - - If you performed the following commands on the previous LSB Email resource -@@ -428,7 +462,7 @@ The list of instance attributes supported by an OCF script can be - found by calling the resource script with the `meta-data` command. - The output contains an XML description of all the supported - attributes, their purpose and default values. -- -+ - .Displaying the metadata for the Dummy resource agent template - ===== - [source,C] -@@ -442,14 +476,14 @@ attributes, their purpose and default values. - - - 1.0 -- -+ - - This is a Dummy Resource Agent. It does absolutely nothing except - keep track of whether its running or not. - Its purpose in life is for testing and to serve as a template for RA writers. - - Dummy resource agent -- -+ - - - -@@ -458,7 +492,7 @@ attributes, their purpose and default values. - State file - - -- -+ - - - Dummy attribute that can be changed to cause a reload -@@ -467,7 +501,7 @@ attributes, their purpose and default values. - - - -- -+ - - - -@@ -491,7 +525,7 @@ indexterm:[Resource,Action] - By default, the cluster will not ensure your resources are still - healthy. To instruct the cluster to do this, you need to add a - +monitor+ operation to the resource's definition. -- -+ - .An OCF resource with a recurring health check - ===== - [source,XML] -@@ -575,7 +609,7 @@ To set a default value for a operation option, simply add it to the - would default each operation's +timeout+ to 20 seconds. If an - operation's definition also includes a value for +timeout+, then that - value would be used instead (for that operation only). -- -+ - ==== When Resources Take a Long Time to Start/Stop ==== - - There are a number of implicit operations that the cluster will always -@@ -584,7 +618,7 @@ perform - +start+, +stop+ and a non-recurring +monitor+ operation - of these is taking too long, then you can create an entry for them and - simply specify a new value. - --.An OCF resource with custom timeouts for its implicit actions -+.An OCF resource with custom timeouts for its implicit actions - ===== - [source,XML] - ------- -@@ -613,11 +647,11 @@ provide each monitor with a different value for a common parameter. - The OCF standard creates a special parameter called +OCF_CHECK_LEVEL+ - for this purpose and dictates that it is _"made available to the - resource agent without the normal +OCF_RESKEY+ prefix"_. -- -+ - Whatever name you choose, you can specify it by adding an - +instance_attributes+ block to the op tag. Note that it is up to each - resource agent to look for the parameter and decide how to use it. -- -+ - .An OCF resource with two recurring health checks, performing different levels of checks - specified via +OCF_CHECK_LEVEL+. - ===== - [source,XML] -@@ -649,7 +683,7 @@ However, there can be times when you only want to disable it - temporarily. In such cases, simply add +enabled="false"+ to the - operation's definition. - --.Example of an OCF resource with a disabled health check -+.Example of an OCF resource with a disabled health check - ===== - [source,XML] - ------- -diff --git a/doc/Pacemaker_Explained/en-US/Ch-Stonith.txt b/doc/Pacemaker_Explained/en-US/Ch-Stonith.txt -index 1df1b9f..f6108a1 100644 ---- a/doc/Pacemaker_Explained/en-US/Ch-Stonith.txt -+++ b/doc/Pacemaker_Explained/en-US/Ch-Stonith.txt -@@ -1,6 +1,6 @@ - = Configure STONITH = - --//// -+//// - We prefer [[ch-stonith]], but older versions of asciidoc dont deal well - with that construct for chapter headings - //// -@@ -9,7 +9,6 @@ indexterm:[STONITH, Configuration] - - == What Is STONITH == - -- - STONITH is an acronym for Shoot-The-Other-Node-In-The-Head and it - protects your data from being corrupted by rogue nodes or concurrent - access. -@@ -39,38 +38,241 @@ from a network fault. - Likewise, any device that relies on the machine being active (such as - SSH-based "devices" used during testing) are inappropriate. - --== Configuring STONITH == -+== Differences of STONITH Resources == -+ -+Stonith resources are somewhat special in Pacemaker. -+ -+In previous versions, only "running" resources could be used by -+Pacemaker for fencing. This requirement has been relaxed to allow -+other parts of the cluster (such as resources like DRBD) to reliably -+initiate fencing. footnote:[Fencing a node while Pacemaker was moving -+stonith resources around would otherwise fail] -+ -+Now all nodes have access to their definitions and instantiate them -+on-the-fly when needed, however preference is given to 'verified' -+instances which are the ones the cluster has explicitly started. -+ -+In the case of a cluster split, the partition with a verified instance -+will have a slight advantage as stonith-ng in the other partition will -+have to hear from all its current peers before choosing a node to -+perform the fencing. -+ -+[NOTE] -+=========== -+To disable a fencing device/resource, 'target-role' can be set as you would for a normal resource. -+=========== -+ -+[NOTE] -+=========== -+To prevent a specific node from using a fencing device, location constraints will work as expected. -+=========== -+ -+[IMPORTANT] -+=========== -+ -+Currently there is a limitation that fencing resources may only have a -+one set of meta-attributes and one set of instance-attributes. This -+can be revisited if it becomes a significant limitation for people. -+ -+=========== -+ -+.Properties of Fencing Devices -+[width="95%",cols="1m,1m,1m,5<",options="header",align="center"] -+|========================================================= -+ -+|Field -+|Type -+|Default -+|Description -+ -+|stonith-timeout -+|time -+|60s -+|How long to wait for the STONITH action to complete per a stonith device. -+ Overrides the stonith-timeout cluster property -+ indexterm:[stonith-timeout,Fencing] -+ indexterm:[Fencing,Property,stonith-timeout] -+ -+|priority -+|integer -+|0 -+|The priority of the stonith resource. Devices are tried in order of highest priority to lowest. -+ indexterm:[priority,Fencing] -+ indexterm:[Fencing,Property,priority] -+ -+|pcmk_host_argument -+|string -+|port -+|Advanced use only: An alternate parameter to supply instead of 'port' -+ Some devices do not support the standard 'port' parameter or may provide additional ones. Use this to specify an alternate, device-specific, parameter that should indicate the machine to be fenced. A value of 'none' can be used to tell the cluster not to supply any additional parameters. -+ indexterm:[pcmk_host_argument,Fencing] -+ indexterm:[Fencing,Property,pcmk_host_argument] -+ -+|pcmk_host_map -+|string -+| -+|A mapping of host names to ports numbers for devices that do not support host names. -+ Eg. node1:1;node2:2,3 would tell the cluster to use port 1 for node1 and ports 2 and 3 for node2 -+ indexterm:[pcmk_host_map,Fencing] -+ indexterm:[Fencing,Property,pcmk_host_map] -+ -+|pcmk_host_list -+|string -+| -+|A list of machines controlled by this device (Optional unless pcmk_host_check=static-list). -+ indexterm:[pcmk_host_list,Fencing] -+ indexterm:[Fencing,Property,pcmk_host_list] -+ -+|pcmk_host_check -+|string -+|dynamic-list -+|How to determin which machines are controlled by the device. -+ Allowed values: dynamic-list (query the device), static-list (check the pcmk_host_list attribute), none (assume every device can fence every machine) -+ indexterm:[pcmk_host_check,Fencing] -+ indexterm:[Fencing,Property,pcmk_host_check] -+ -+|pcmk_reboot_action -+|string -+|reboot -+|Advanced use only: An alternate command to run instead of 'reboot' -+ Some devices do not support the standard commands or may provide additional ones. Use this to specify an alternate, device-specific, command that implements the 'reboot' action. -+ indexterm:[pcmk_reboot_action,Fencing] -+ indexterm:[Fencing,Property,pcmk_reboot_action] -+ -+|pcmk_reboot_timeout -+|time -+|60s -+|Advanced use only: Specify an alternate timeout to use for reboot actions instead of stonith-timeout -+ Some devices need much more/less time to complete than normal. Use this to specify an alternate, device-specific, timeout for 'reboot' actions. -+ indexterm:[pcmk_reboot_timeout,Fencing] -+ indexterm:[Fencing,Property,pcmk_reboot_timeout] -+ -+|pcmk_reboot_retries -+|integer -+|2 -+|Advanced use only: The maximum number of times to retry the 'reboot' command within the timeout period -+ Some devices do not support multiple connections. Operations may 'fail' if the device is busy with another task so Pacemaker will automatically retry the operation, if there is time remaining. Use this option to alter the number of times Pacemaker retries 'reboot' actions before giving up. -+ indexterm:[pcmk_reboot_retries,Fencing] -+ indexterm:[Fencing,Property,pcmk_reboot_retries] -+ -+|pcmk_off_action -+|string -+|off -+|Advanced use only: An alternate command to run instead of 'off' -+ Some devices do not support the standard commands or may provide additional ones. Use this to specify an alternate, device-specific, command that implements the 'off' action. -+ indexterm:[pcmk_off_action,Fencing] -+ indexterm:[Fencing,Property,pcmk_off_action] -+ -+|pcmk_off_timeout -+|time -+|60s -+|Advanced use only: Specify an alternate timeout to use for off actions instead of stonith-timeout -+ Some devices need much more/less time to complete than normal. Use this to specify an alternate, device-specific, timeout for 'off' actions. -+ indexterm:[pcmk_off_timeout,Fencing] -+ indexterm:[Fencing,Property,pcmk_off_timeout] -+ -+|pcmk_off_retries -+|integer -+|2 -+|Advanced use only: The maximum number of times to retry the 'off' command within the timeout period -+ Some devices do not support multiple connections. Operations may 'fail' if the device is busy with another task so Pacemaker will automatically retry the operation, if there is time remaining. Use this option to alter the number of times Pacemaker retries 'off' actions before giving up. -+ indexterm:[pcmk_off_retries,Fencing] -+ indexterm:[Fencing,Property,pcmk_off_retries] -+ -+|pcmk_list_action -+|string -+|list -+|Advanced use only: An alternate command to run instead of 'list' -+ Some devices do not support the standard commands or may provide additional ones. Use this to specify an alternate, device-specific, command that implements the 'list' action. -+ indexterm:[pcmk_list_action,Fencing] -+ indexterm:[Fencing,Property,pcmk_list_action] -+ -+|pcmk_list_timeout -+|time -+|60s -+|Advanced use only: Specify an alternate timeout to use for list actions instead of stonith-timeout -+ Some devices need much more/less time to complete than normal. Use this to specify an alternate, device-specific, timeout for 'list' actions. -+ indexterm:[pcmk_list_timeout,Fencing] -+ indexterm:[Fencing,Property,pcmk_list_timeout] -+ -+|pcmk_list_retries -+|integer -+|2 -+|Advanced use only: The maximum number of times to retry the 'list' command within the timeout period -+ Some devices do not support multiple connections. Operations may 'fail' if the device is busy with another task so Pacemaker will automatically retry the operation, if there is time remaining. Use this option to alter the number of times Pacemaker retries 'list' actions before giving up. -+ indexterm:[pcmk_list_retries,Fencing] -+ indexterm:[Fencing,Property,pcmk_list_retries] -+ -+|pcmk_monitor_action -+|string -+|monitor -+|Advanced use only: An alternate command to run instead of 'monitor' -+ Some devices do not support the standard commands or may provide additional ones. Use this to specify an alternate, device-specific, command that implements the 'monitor' action. -+ indexterm:[pcmk_monitor_action,Fencing] -+ indexterm:[Fencing,Property,pcmk_monitor_action] -+ -+|pcmk_monitor_timeout -+|time -+|60s -+|Advanced use only: Specify an alternate timeout to use for monitor actions instead of stonith-timeout -+ Some devices need much more/less time to complete than normal. Use this to specify an alternate, device-specific, timeout for 'monitor' actions. -+ indexterm:[pcmk_monitor_timeout,Fencing] -+ indexterm:[Fencing,Property,pcmk_monitor_timeout] -+ -+|pcmk_monitor_retries -+|integer -+|2 -+|Advanced use only: The maximum number of times to retry the 'monitor' command within the timeout period -+ Some devices do not support multiple connections. Operations may 'fail' if the device is busy with another task so Pacemaker will automatically retry the operation, if there is time remaining. Use this option to alter the number of times Pacemaker retries 'monitor' actions before giving up. -+ indexterm:[pcmk_monitor_retries,Fencing] -+ indexterm:[Fencing,Property,pcmk_monitor_retries] -+ -+|pcmk_status_action -+|string -+|status -+|Advanced use only: An alternate command to run instead of 'status' -+ Some devices do not support the standard commands or may provide additional ones. Use this to specify an alternate, device-specific, command that implements the 'status' action. -+ indexterm:[pcmk_status_action,Fencing] -+ indexterm:[Fencing,Property,pcmk_status_action] -+ -+|pcmk_status_timeout -+|time -+|60s -+|Advanced use only: Specify an alternate timeout to use for status actions instead of stonith-timeout -+ Some devices need much more/less time to complete than normal. Use this to specify an alternate, device-specific, timeout for 'status' actions. -+ indexterm:[pcmk_status_timeout,Fencing] -+ indexterm:[Fencing,Property,pcmk_status_timeout] -+ -+|pcmk_status_retries -+|integer -+|2 -+|Advanced use only: The maximum number of times to retry the 'status' command within the timeout period -+ Some devices do not support multiple connections. Operations may 'fail' if the device is busy with another task so Pacemaker will automatically retry the operation, if there is time remaining. Use this option to alter the number of times Pacemaker retries 'status' actions before giving up. -+ indexterm:[pcmk_status_retries,Fencing] -+ indexterm:[Fencing,Property,pcmk_status_retries] -+ -+|========================================================= - --ifdef::pcs[] --. Find the correct driver: +pcs stonith list+ -- --. Find the parameters associated with the device: +pcs stonith describe + -+== Configuring STONITH == - --. Create a local config to make changes to +pcs cluster cib stonith_cfg+ -+[NOTE] -+=========== - --. Create the fencing resource using +pcs -f stonith_cfg stonith create -- [stonith device options]+ -+Both configuration shells include functionality to simplify the -+process below, particularly the step for deciding which parameters are -+required. However since this document deals only with core -+components, you should refer to the Stonith chapter of +Clusters from -+Scratch+ for those details. - --. Set stonith-enable to true. +pcs -f stonith_cfg property set stonith-enabled=true+ --endif::pcs[] -+=========== - --ifdef::crmsh[] - . Find the correct driver: +stonith_admin --list-installed+ - --. Since every device is different, the parameters needed to configure -- it will vary. To find out the parameters associated with the device, -- run: +stonith_admin --metadata --agent type+ -+. Find the required parameters associated with the device: +stonith_admin --metadata --agent + - -- The output should be XML formatted text containing additional -- parameter descriptions. We will endevor to make the output more -- friendly in a later version. -- --. Enter the shell crm Create an editable copy of the existing -- configuration +cib new stonith+ Create a fencing resource containing a -- primitive resource with a class of stonith, a type of type and a -- parameter for each of the values returned in step 2: +configure -- primitive ...+ --endif::crmsh[] -+. Create a file called +stonith.xml+ containing a primitive resource -+ with a class of 'stonith', a type of and a parameter -+ for each of the values returned in step 2. - - . If the device does not know how to fence nodes based on their uname, - you may also need to set the special +pcmk_host_map+ parameter. See -@@ -84,19 +286,15 @@ endif::crmsh[] - port parameter, you may also need to set the special - +pcmk_host_argument+ parameter. See +man stonithd+ for details. - --ifdef::crmsh[] --. Upload it into the CIB from the shell: +cib commit stonith+ --endif::crmsh[] -+. Upload it into the CIB using cibadmin: +cibadmin -C -o resources --xml-file stonith.xml+ - --ifdef::pcs[] --. Commit the new configuration. +pcs cluster push cib stonith_cfg+ --endif::pcs[] -+. Set stonith-enabled to true. +crm_attribute -t crm_config -n stonith-enabled -v true+ - - . Once the stonith resource is running, you can test it by executing: - +stonith_admin --reboot nodename+. Although you might want to stop the - cluster on that machine first. - --== Example == -+=== Example === - - Assuming we have an chassis containing four nodes and an IPMI device - active on 10.0.0.1, then we would chose the fence_ipmilan driver in step -@@ -104,33 +302,11 @@ active on 10.0.0.1, then we would chose the fence_ipmilan driver in step - - .Obtaining a list of STONITH Parameters - --ifdef::pcs[] --[source,Bash] ------ --# pcs stonith describe fence_ipmilan --Stonith options for: fence_ipmilan -- auth: IPMI Lan Auth type (md5, password, or none) -- ipaddr: IPMI Lan IP to talk to -- passwd: Password (if required) to control power on IPMI device -- passwd_script: Script to retrieve password (if required) -- lanplus: Use Lanplus -- login: Username/Login (if required) to control power on IPMI device -- action: Operation to perform. Valid operations: on, off, reboot, status, list, diag, monitor or metadata -- timeout: Timeout (sec) for IPMI operation -- cipher: Ciphersuite to use (same as ipmitool -C parameter) -- method: Method to fence (onoff or cycle) -- power_wait: Wait X seconds after on/off operation -- delay: Wait X seconds before fencing is started -- privlvl: Privilege level on IPMI device -- verbose: Verbose mode ------ --endif::pcs[] -- --ifdef::crmsh[] - [source,C] - ---- - # stonith_admin --metadata -a fence_ipmilan - ---- -+ - [source,XML] - ---- - -@@ -218,97 +394,107 @@ To use fence_ipmilan with HP iLO 3 you have to enable lanplus option (lanplus / - - - ---- --endif::crmsh[] - - from which we would create a STONITH resource fragment that might look --like this -+like this: - - .Sample STONITH Resource --ifdef::pcs[] --[source,Bash] ------ --# pcs cluster cib stonith_cfg --# pcs -f stonith_cfg stonith create impi-fencing fence_ipmilan \ -- pcmk_host_list="pcmk-1 pcmk-2" ipaddr=10.0.0.1 login=testuser \ -- passwd=acd123 op monitor interval=60s --# pcs -f stonith_cfg stonith -- impi-fencing (stonith:fence_ipmilan) Stopped ------ --endif::pcs[] -- --ifdef::crmsh[] --[source,Bash] -+[source,XML] - ---- --# crm crm(live)# cib new stonith --INFO: stonith shadow CIB created --crm(stonith)# configure primitive impi-fencing stonith::fence_ipmilan \ -- params pcmk_host_list="pcmk-1 pcmk-2" ipaddr=10.0.0.1 login=testuser passwd=abc123 \ -- op monitor interval="60s" -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ - ---- --endif::crmsh[] - - And finally, since we disabled it earlier, we need to re-enable STONITH. --At this point we should have the following configuration. - --ifdef::pcs[] - [source,Bash] - ---- --# pcs -f stonith_cfg property set stonith-enabled=true --# pcs -f stonith_cfg property --dc-version: 1.1.8-1.el7-60a19ed12fdb4d5c6a6b6767f52e5391e447fec0 --cluster-infrastructure: corosync --no-quorum-policy: ignore --stonith-enabled: true -+# crm_attribute -t crm_config -n stonith-enabled -v true - ---- --endif::pcs[] - --Now push the configuration into the cluster. -+== Advanced Fencing Configurations == - --ifdef::pcs[] --[source,C] ------ --# pcs cluster push cib stonith_cfg ------ --endif::pcs[] -+Some people consider that having one fencing device is a single point -+of failure footnote:[Not true, since a node or resource must fail -+before fencing even has a chance to], others prefer removing the node -+from the storage and network instead of turning it off. - --ifdef::crmsh[] --[source,Bash] -+Whatever the reason, Pacemaker supports fencing nodes with multiple -+devices through a feature called fencing topologies. -+ -+Simply create the individual devices as you normally would and then -+define one or more fencing levels in the fencing-topology section in -+the configuration. -+ -+* Each level is attempted in +ascending index+ order -+* If a device fails, +processing terminates+ for the current level. -+ No further devices in that level are exercised and the next level is attempted instead. -+* If the operation succeeds for all the listed devices in a level, the level is deemed to have passed -+* The operation is finished +when a level has passed+ (success), or all levels have been attempted (failed) -+* If the operation failed, the next step is determined by the Policy Engine and/or crmd. -+ -+Some possible uses of topologies include: -+ -+* try poison-pill and fail back to power -+* try disk and network, and fall back to power if either fails -+* initiate a kdump and then poweroff the node -+ -+.Properties of Fencing Levels -+[width="95%",cols="1m,6<",options="header",align="center"] -+|========================================================= -+ -+|Field -+|Description -+ -+|id -+|Your name for the level -+ indexterm:[id,fencing-level] -+ indexterm:[Fencing,fencing-level,id] -+ -+|target -+|The node to which this level applies -+ indexterm:[target,fencing-level] -+ indexterm:[Fencing,fencing-level,target] -+ -+|index -+|The order in which to attempt the levels. -+ Levels are attempted in +ascending index+ order +until one succeeds+. -+ indexterm:[index,fencing-level] -+ indexterm:[Fencing,fencing-level,index] -+ -+|devices -+|A comma separated list of devices for which the -+ indexterm:[devices,fencing-level] -+ indexterm:[Fencing,fencing-level,devices] -+ -+|========================================================= -+ -+.Example use of Fencing Topologies -+[source,XML] - ---- --crm(stonith)# configure property stonith-enabled="true" --crm(stonith)# configure shownode pcmk-1 --node pcmk-2 --primitive WebData ocf:linbit:drbd \ -- params drbd_resource="wwwdata" \ -- op monitor interval="60s" --primitive WebFS ocf:heartbeat:Filesystem \ -- params device="/dev/drbd/by-res/wwwdata" directory="/var/www/html" fstype="gfs2" --primitive WebSite ocf:heartbeat:apache \ -- params configfile="/etc/httpd/conf/httpd.conf" \ -- op monitor interval="1min" --primitive ClusterIP ocf:heartbeat:IPaddr2 \ -- params ip="192.168.122.101" cidr_netmask="32" clusterip_hash="sourceip" \ -- op monitor interval="30s"primitive ipmi-fencing stonith::fence_ipmilan \ params pcmk_host_list="pcmk-1 pcmk-2" ipaddr=10.0.0.1 login=testuser passwd=abc123 \ op monitor interval="60s"ms WebDataClone WebData \ -- meta master-max="2" master-node-max="1" clone-max="2" clone-node-max="1" notify="true" --clone WebFSClone WebFS --clone WebIP ClusterIP \ -- meta globally-unique="true" clone-max="2" clone-node-max="2" --clone WebSiteClone WebSite --colocation WebSite-with-WebFS inf: WebSiteClone WebFSClone --colocation fs_on_drbd inf: WebFSClone WebDataClone:Master --colocation website-with-ip inf: WebSiteClone WebIP --order WebFS-after-WebData inf: WebDataClone:promote WebFSClone:start --order WebSite-after-WebFS inf: WebFSClone WebSiteClone --order apache-after-ip inf: WebIP WebSiteClone --property $id="cib-bootstrap-options" \ -- dc-version="1.1.5-bdd89e69ba545404d02445be1f3d72e6a203ba2f" \ -- cluster-infrastructure="openais" \ -- expected-quorum-votes="2" \ -- stonith-enabled="true" \ -- no-quorum-policy="ignore" --rsc_defaults $id="rsc-options" \ -- resource-stickiness="100" --crm(stonith)# cib commit stonithINFO: commited 'stonith' shadow CIB to the cluster --crm(stonith)# quit --bye -+ -+ -+ ... -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ ... -+ -+ -+ - ---- --endif::crmsh[] -diff --git a/doc/Pacemaker_Explained/en-US/Revision_History.xml b/doc/Pacemaker_Explained/en-US/Revision_History.xml -index a351d9c..0afc90b 100644 ---- a/doc/Pacemaker_Explained/en-US/Revision_History.xml -+++ b/doc/Pacemaker_Explained/en-US/Revision_History.xml -@@ -6,19 +6,19 @@ - - - -- 1 -+ 1-0 - 19 Oct 2009 - AndrewBeekhofandrew@beekhof.net - Import from Pages.app - - -- 2 -+ 2-0 - 26 Oct 2009 - AndrewBeekhofandrew@beekhof.net - Cleanup and reformatting of docbook xml complete - - -- 3 -+ 3-0 - Tue Nov 12 2009 - AndrewBeekhofandrew@beekhof.net - -@@ -29,7 +29,7 @@ - - - -- 4 -+ 4-0 - Mon Oct 8 2012 - AndrewBeekhofandrew@beekhof.net - -diff --git a/doc/Pacemaker_Remote/en-US/Author_Group.xml b/doc/Pacemaker_Remote/en-US/Author_Group.xml -new file mode 100644 -index 0000000..3d9056e ---- /dev/null -+++ b/doc/Pacemaker_Remote/en-US/Author_Group.xml -@@ -0,0 +1,11 @@ -+ -+ -+ -+ -+ DavidVossel -+ Red Hat -+ Primary author -+ dvossel@redhat.com -+ -+ -diff --git a/doc/Pacemaker_Remote/en-US/Book_Info.xml b/doc/Pacemaker_Remote/en-US/Book_Info.xml -new file mode 100644 -index 0000000..426599e ---- /dev/null -+++ b/doc/Pacemaker_Remote/en-US/Book_Info.xml -@@ -0,0 +1,56 @@ -+ -+ -+%BOOK_ENTITIES; -+]> -+ -+ Pacemaker Remote -+ Extending High Availablity into Virtual Nodes -+ 1 -+ 0 -+ -+ -+ The document exists as both a reference and deployment guide for the Pacemaker Remote service. -+ -+ -+ The KVM and Linux Container walk-through tutorials will use: -+ -+ -+ -+ &DISTRO; &DISTRO_VERSION; as the host operating system -+ -+ -+ -+ -+ Pacemaker Remote to perform resource management within virtual nodes -+ -+ -+ -+ -+ libvirt to manage KVM and LXC virtual nodes -+ -+ -+ -+ -+ Corosync to provide messaging and membership services on the host nodes -+ -+ -+ -+ -+ Pacemaker to perform resource management on host nodes -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -diff --git a/doc/Pacemaker_Remote/en-US/Ch-Example.txt b/doc/Pacemaker_Remote/en-US/Ch-Example.txt -new file mode 100644 -index 0000000..ca94044 ---- /dev/null -+++ b/doc/Pacemaker_Remote/en-US/Ch-Example.txt -@@ -0,0 +1,107 @@ -+= Quick Example = -+ -+If you already know how to use pacemaker, you'll likely be able to grasp this new concept of remote-nodes by reading through this quick example without having to sort through all the detailed walk-through steps. Here are the key configuration ingredients that make this possible using libvirt and KVM virtual guests. These steps strip everything down to the very basics. -+ -+== Mile High View of Configuration Steps == -+ -+* +Put an authkey with this path, /etc/pacemaker/authkey, on every cluster-node and virtual machine+. This secures remote communication and authentication. -+ -+Run this command if you want to make a somewhat random authkey. -+ -+[source,C] -+---- -+dd if=/dev/urandom of=/etc/pacemaker/authkey bs=4096 count=1 -+---- -+ -+* +Install pacemaker_remote packages every virtual machine, enable pacemaker_remote on startup, and poke hole in firewall for tcp port 3121.+ -+ -+[source,C] -+---- -+yum install pacemaker-remote resource-agents -+systemctl enable pacemaker_remote -+# If you just want to see this work, disable iptables and ip6tables on most distros. -+# You may have to put selinux in permissive mode as well for the time being. -+firewall-cmd --add-port 3121/tcp --permanent -+---- -+ -+* +Give each virtual machine a static network address and unique hostname+ -+ -+* +Tell pacemaker to launch a virtual machine and that the virtual machine is a remote-node capable of running resources by using the "remote-node" meta-attribute.+ -+ -+with pcs -+ -+[source,C] -+---- -+# pcs resource create vm-guest1 VirtualDomain hypervisor="qemu:///system" config="vm-guest1.xml" meta +remote-node=guest1+ -+---- -+ -+raw xml -+[source,XML] -+---- -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+---- -+ -+In the example above the meta-attribute 'remote-node=guest1' tells pacemaker that this resource is a remote-node with the hostname 'guest1' that is capable of being integrated into the cluster. The cluster will attempt to contact the virtual machine's pacemaker_remote service at the hostname 'guest1' after it launches. -+ -+== What those steps just did == -+ -+Those steps just told pacemaker to launch a virtual machine called vm-guest1 and integrate that virtual machine as a remote-node called 'guest1'. -+ -+Example crm_mon output after guest1 is integrated into cluster. -+ -+[source,C] -+---- -+Last updated: Wed Mar 13 13:52:39 2013 -+Last change: Wed Mar 13 13:25:17 2013 via crmd on node1 -+Stack: corosync -+Current DC: node1 (24815808) - partition with quorum -+Version: 1.1.10 -+2 Nodes configured, unknown expected votes -+2 Resources configured. -+ -+Online: [ node1 guest1] -+ -+vm-guest1 (ocf::heartbeat:VirtualDomain): Started node1 -+---- -+ -+Now, you could place a resource, such as a webserver on guest1. -+ -+[source,C] -+---- -+# pcs resource create webserver apache params configfile=/etc/httpd/conf/httpd.conf op monitor interval=30s -+# pcs constraint webserver prefers guest1 -+---- -+ -+Now the crm_mon output would show a webserver launched on the guest1 remote-node. -+ -+[source,C] -+---- -+Last updated: Wed Mar 13 13:52:39 2013 -+Last change: Wed Mar 13 13:25:17 2013 via crmd on node1 -+Stack: corosync -+Current DC: node1 (24815808) - partition with quorum -+Version: 1.1.10 -+2 Nodes configured, unknown expected votes -+2 Resources configured. -+ -+Online: [ node1 guest1] -+ -+vm-guest1 (ocf::heartbeat:VirtualDomain): Started node1 -+webserver (ocf::heartbeat::apache): Started guest1 -+---- -+ -+== Accessing Cluster from Remote-node == -+ -+It is worth noting that after 'guest1' is integrated into the cluster, all the pacemaker cli tools immediately become available to the remote node. This means things like crm_mon, crm_resource, and crm_attribute will work natively on the remote-node as long as the connection between the remote-node and cluster-node exists. This is particularly important for any master/slave resources executing on the remote-node that need access to crm_master to set the nodes transient attributes. -+ -diff --git a/doc/Pacemaker_Remote/en-US/Ch-Future.txt b/doc/Pacemaker_Remote/en-US/Ch-Future.txt -new file mode 100644 -index 0000000..93c082f ---- /dev/null -+++ b/doc/Pacemaker_Remote/en-US/Ch-Future.txt -@@ -0,0 +1,15 @@ -+= Future Features = -+ -+Basic KVM and Linux container integration was the first phase of development for pacemaker_remote and was completed for Pacemaker v1.1.10. Here are some planned features that expand upon this initial functionality. -+ -+== Libvirt Sandbox Support == -+ -+Once the libvirt-sandbox project is integrated with pacemaker_remote, we will gain the ability to preform per-resource linux container isolation with very little performance impact. This functionality will allow resources living on a single node to be isolated from one another. At that point CPU and memory limits could be set per-resource dynamically just using the cluster config. -+ -+== Bare-metal Support == -+ -+The pacemaker_remote daemon already has the ability to run on bare-metal hardware nodes, but the policy engine logic for integrating bare-metal nodes is not complete. There are some complications involved with understanding a bare-metal node's state that virtual nodes don't have. Once this logic is complete, pacemaker will be able to integrate bare-metal nodes in the same way virtual remote-nodes currently are. Some special considerations for fencing will need to be addressed. -+ -+== KVM Migration Support == -+ -+Pacemaker's policy engine is limited in its ability to perform live migrations of KVM resources when resource dependencies are involved. This limitation affects how resources living within a KVM remote-node are handled when a live migration takes place. Currently when a live migration is performed on a KVM remote-node, all the resources within that remote-node have to be stopped before the migration takes place and started once again after migration has finished. This policy engine limitation is fully explained in this bug report, http://bugs.clusterlabs.org/show_bug.cgi?id=5055#c3 -diff --git a/doc/Pacemaker_Remote/en-US/Ch-Intro.txt b/doc/Pacemaker_Remote/en-US/Ch-Intro.txt -new file mode 100644 -index 0000000..c7b3001 ---- /dev/null -+++ b/doc/Pacemaker_Remote/en-US/Ch-Intro.txt -@@ -0,0 +1,55 @@ -+= Extending High Availability Cluster into Virtual Nodes = -+ -+== Overview == -+The recent addition of the +pacemaker_remote+ service supported by +Pacemaker version 1.1.10 and greater+ allows nodes not running the cluster stack (pacemaker+corosync) to integrate into the cluster and have the cluster manage their resources just as if they were a real cluster node. This means that pacemaker clusters are now capable of managing both launching virtual environments (KVM/LXC) as well as launching the resources that live within those virtual environments without requiring the virtual environments to run pacemaker or corosync. -+ -+== Terms == -++cluster-node+ - A baremetal hardware node running the High Availability stack (pacemaker + corosync) -+ -++remote-node+ - A virtual guest node running the pacemaker_remote service. -+ -++pacemaker_remote+ - A service daemon capable of performing remote application management within virtual guests (kvm and lxc) in both pacemaker cluster environments and standalone (non-cluster) environments. This service is an enhanced version of pacemaker's local resource manage daemon (LRMD) that is capable of managing and monitoring LSB, OCF, upstart, and systemd resources on a guest remotely. It also allows for most of pacemaker's cli tools (crm_mon, crm_resource, crm_master, crm_attribute, ect..) to work natively on remote-nodes. -+ -++LXC+ - A Linux Container defined by the libvirt-lxc Linux container driver. http://libvirt.org/drvlxc.html -+ -+== Virtual Machine Use Case == -+The use of pacemaker_remote in virtual machines solves a deployment scenario that has traditionally been difficult to solve. -+ -++"I want a pacemaker cluster to manage virtual machine resources, but I also want pacemaker to be able to manage the resources that live within those virtual machines."+ -+ -+In the past, users desiring this deployment had to make a decision. They would either have to sacrifice the ability of monitoring resources residing within virtual guests by running the cluster stack on the baremetal nodes, or run another cluster instance on the virtual guests where they potentially run into corosync scalability issues. There is a third scenario where the virtual guests run the cluster stack and join the same network as the baremetal nodes, but that can quickly hit issues with scalability as well. -+ -+With the pacemaker_remote service we have a new option. -+ -+* The baremetal cluster-nodes run the cluster stack (paceamaker+corosync). -+* The virtual remote-nodes run the pacemaker_remote service (nearly zero configuration required on the virtual machine side) -+* The cluster stack on the cluster-nodes launch the virtual machines and immediately connect to the pacemaker_remote service, allowing the virtual machines to integrate into the cluster just as if they were a real cluster-node. -+ -+The key difference here between the virtual machine remote-nodes and the cluster-nodes is that the remote-nodes are not running the cluster stack. This means the remote nodes will never become the DC, and they do not take place in quorum. On the hand this also means that the remote-nodes are not bound to the scalability limits associated with the cluster stack either. +No 16 node corosync member limits+ to deal with. That isn't to say remote-nodes can scale indefinitely, but the expectation is that remote-nodes scale horizontally much further than cluster-nodes. Other than the quorum limitation, these remote-nodes behave just like cluster nodes in respects to resource management. The cluster is fully capable of managing and monitoring resources on each remote-node. You can build constraints against remote-nodes, put them in standby, or whatever else you'd expect to be able to do with normal cluster-nodes. They even show up in the crm_mon output as you would expect cluster-nodes to. -+ -+To solidify the concept, an example cluster deployment integrating remote-nodes could look like this. -+ -+* 16 cluster-nodes running corosync+pacemaker stack. -+* 64 pacemaker managed virtual machine resources running pacemaker_remote configured as remote-nodes. -+* 64 pacemaker managed webserver and database resources configured to run on the 64 remote-nodes. -+ -+With this deployment you would have 64 webservers and databases running on 64 virtual machines on 16 hardware nodes all of which are managed and monitored by the same pacemaker deployment. -+ -+== Linux Container Use Case == -+ -++I want to isolate and limit the system resources (cpu, memory, filesystem) a cluster resource can consume without using virtual machines.+ -+ -+Using pacemaker_remote with Linux containers (libvirt-lxc) opens up some interesting possibilities for isolating resources in the cluster without the use of a hypervisor. We now have the ability to both define a contained environment with cpu and memory utilization limits and then assign resources to that contained environment all managed from within pacemaker. The LXC Walk-through section of this document outlines how pacemaker_remote can be used to bring Linux containers into the cluster as remote-nodes capable of executing resources. -+ -+== Expanding the Cluster Stack == -+ -+=== Traditional HA Stack === -+ -+image::images/pcmk-ha-cluster-stack.png["The Traditional Pacemaker Corosync HA Stack.",width="17cm",height="9cm",align="center"] -+ -+ -+=== Remote-Node Enabled HA Stack === -+ -+The stack grows one additional layer vertical so we can go further horizontal. -+ -+image::images/pcmk-ha-remote-stack.png["Placing Pacemaker Remote into the Traditional HA Stack.",width="20cm",height="10cm",align="center"] -diff --git a/doc/Pacemaker_Remote/en-US/Ch-KVM-Tutorial.txt b/doc/Pacemaker_Remote/en-US/Ch-KVM-Tutorial.txt -new file mode 100644 -index 0000000..fe00775 ---- /dev/null -+++ b/doc/Pacemaker_Remote/en-US/Ch-KVM-Tutorial.txt -@@ -0,0 +1,483 @@ -+= KVM Walk-through = -+ -++What this tutorial is:+ This tutorial is an in-depth walk-through of how to get pacemaker to manage a KVM guest instance and integrate that guest into the cluster as a remote-node. -+ -++What this tutorial is not:+ This tutorial is not a realistic deployment scenario. The steps shown here are meant to get users familiar with the concept of remote-nodes as quickly as possible. -+ -+== Step 1: Setup the Host == -+ -+This tutorial was created using Fedora 18 on the host and guest nodes. Anything that is capable of running libvirt and pacemaker v1.1.10 or greater will do though. An installation guide for installing Fedora 18 can be found here, http://docs.fedoraproject.org/en-US/Fedora/18/html/Installation_Guide/. -+ -+Fedora 18 (or similar distro) host preparation steps. -+ -+=== SElinux and Firewall === -+In order to simply this tutorial we will disable the selinux and the firewall on the host. -++WARNING:+ These actions will open a significant security threat to machines exposed to the outside world. -+[source,C] -+---- -+# setenforce 0 -+# sed -i.bak "s/SELINUX=enforcing/SELINUX=permissive/g" /etc/selinux/config -+# systemctl disable iptables.service -+# systemctl disable ip6tables.service -+# rm '/etc/systemd/system/basic.target.wants/iptables.service' -+# rm '/etc/systemd/system/basic.target.wants/ip6tables.service' -+# systemctl stop iptables.service -+# systemctl stop ip6tables.service -+---- -+ -+=== Install Cluster Software === -+ -+[source,C] -+---- -+# yum install -y pacemaker corosync pcs resource-agents -+---- -+ -+=== Setup Corosync === -+ -+Running the command below will attempt to detect the network address corosync should bind to. -+ -+[source,C] -+---- -+# export corosync_addr=`ip addr | grep "inet " | tail -n 1 | awk '{print $4}' | sed s/255/0/g` -+---- -+ -+Display and verify that address is correct -+ -+[source,C] -+---- -+# echo $corosync_addr -+---- -+ -+In many cases the address will be 192.168.1.0 if you are behind a standard home router. -+ -+Now copy over the example corosync.conf. This code will inject your bindaddress and enable the vote quorum api which is required by pacemaker. -+ -+[source,C] -+---- -+# cp /etc/corosync/corosync.conf.example /etc/corosync/corosync.conf -+# sed -i.bak "s/.*\tbindnetaddr:.*/bindnetaddr:\ $corosync_addr/g" /etc/corosync/corosync.conf -+# cat << END >> /etc/corosync/corosync.conf -+quorum { -+ provider: corosync_votequorum -+ expected_votes: 2 -+} -+END -+---- -+ -+=== Verify Cluster Software === -+ -+Start the cluster -+ -+[source,C] -+---- -+# pcs cluster start -+---- -+ -+Verify corosync membership -+ -+[source,C] -+---- -+# pcs status corosync -+ -+Membership information -+ Nodeid Votes Name -+1795270848 1 example-host (local) -+---- -+ -+Verify pacemaker status. At first the 'pcs cluster status' output will look like this. -+ -+[source,C] -+---- -+# pcs status -+ -+ Last updated: Thu Mar 14 12:26:00 2013 -+ Last change: Thu Mar 14 12:25:55 2013 via crmd on example-host -+ Stack: corosync -+ Current DC: -+ Version: 1.1.10 -+ 1 Nodes configured, unknown expected votes -+ 0 Resources configured. -+---- -+ -+After about a minute you should see your host as a single node in the cluster. -+ -+[source,C] -+---- -+# pcs status -+ -+ Last updated: Thu Mar 14 12:28:23 2013 -+ Last change: Thu Mar 14 12:25:55 2013 via crmd on example-host -+ Stack: corosync -+ Current DC: example-host (1795270848) - partition WITHOUT quorum -+ Version: 1.1.8-9b13ea1 -+ 1 Nodes configured, unknown expected votes -+ 0 Resources configured. -+ -+ Online: [ example-host ] -+---- -+ -+Go ahead and stop the cluster for now after verifying everything is in order. -+ -+[source,C] -+---- -+# pcs cluster stop -+---- -+ -+=== Install Virtualization Software === -+ -+[source,C] -+---- -+# yum install -y kvm libvirt qemu-system qemu-kvm bridge-utils virt-manager -+# systemctl enable libvirtd.service -+---- -+ -+reboot the host -+ -+== Step2: Create the KVM guest == -+ -+I am not going to outline the installation steps required to create a kvm guest. There are plenty of tutorials available elsewhere that do that. I recommend using a Fedora 18 or greater distro as your guest as that is what I am testing this tutorial with. -+ -+=== Setup Guest Network === -+ -+Run the commands below to set up a static ip address (192.168.122.10) and hostname (guest1). -+ -+[source,C] -+---- -+export remote_hostname=guest1 -+export remote_ip=192.168.122.10 -+export remote_gateway=192.168.122.1 -+ -+yum remove -y NetworkManager -+ -+rm -f /etc/hostname -+cat << END >> /etc/hostname -+$remote_hostname -+END -+ -+hostname $remote_hostname -+ -+cat << END >> /etc/sysconfig/network -+HOSTNAME=$remote_hostname -+GATEWAY=$remote_gateway -+END -+ -+sed -i.bak "s/.*BOOTPROTO=.*/BOOTPROTO=none/g" /etc/sysconfig/network-scripts/ifcfg-eth0 -+ -+cat << END >> /etc/sysconfig/network-scripts/ifcfg-eth0 -+IPADDR0=$remote_ip -+PREFIX0=24 -+GATEWAY0=$remote_gateway -+DNS1=$remote_gateway -+END -+ -+systemctl restart network -+systemctl enable network.service -+systemctl enable sshd -+systemctl start sshd -+ -+echo "checking connectivity" -+ping www.google.com -+---- -+ -+To simplify the tutorial we'll go ahead and disable selinux on the guest. We'll also need to poke a hole through the firewall on port 3121 (the default port for pacemaker_remote) so the host can contact the guest. -+ -+[source,C] -+---- -+# setenforce 0 -+# sed -i.bak "s/SELINUX=enforcing/SELINUX=permissive/g" /etc/selinux/config -+ -+# firewall-cmd --add-port 3121/tcp --permanent -+---- -+ -+If you still encounter connection issues just disable iptables and ipv6tables on the guest like we did on the host to guarantee you'll be able to contact the guest from the host. -+ -+At this point you should be able to ssh into the guest from the host. -+ -+=== Setup Pacemaker Remote === -+ -+On the +HOST+ machine run these commands to generate an authkey and copy it to the /etc/pacemaker folder on both the host and guest. -+ -+[source,C] -+---- -+# mkdir /etc/pacemaker -+# dd if=/dev/urandom of=/etc/pacemaker/authkey bs=4096 count=1 -+# scp -r /etc/pacemaker root@192.168.122.10:/etc/ -+---- -+ -+Now on the +GUEST+ install pacemaker-remote package and enable the daemon to run at startup. In the commands below you will notice the 'pacemaker' and 'pacemaker_remote' packages are being installed. The 'pacemaker' package is not required. The only reason it is being installed for this tutorial is because it contains the a 'Dummy' resource agent we will be using later on to test the remote-node. -+ -+[source,C] -+---- -+# yum install -y pacemaker paceamaker-remote resource-agents -+# systemctl enable pacemaker_remote.service -+---- -+ -+Now start pacemaker_remote on the guest and verify the start was successful. -+ -+[source,C] -+---- -+# systemctl start pacemaker_remote.service -+ -+# systemctl status pacemaker_remote -+ -+ pacemaker_remote.service - Pacemaker Remote Service -+ Loaded: loaded (/usr/lib/systemd/system/pacemaker_remote.service; enabled) -+ Active: active (running) since Thu 2013-03-14 18:24:04 EDT; 2min 8s ago -+ Main PID: 1233 (pacemaker_remot) -+ CGroup: name=systemd:/system/pacemaker_remote.service -+ └─1233 /usr/sbin/pacemaker_remoted -+ -+ Mar 14 18:24:04 guest1 systemd[1]: Starting Pacemaker Remote Service... -+ Mar 14 18:24:04 guest1 systemd[1]: Started Pacemaker Remote Service. -+ Mar 14 18:24:04 guest1 pacemaker_remoted[1233]: notice: lrmd_init_remote_tls_server: Starting a tls listener on port 3121. -+---- -+ -+=== Verify Host Connection to Guest === -+ -+Before moving forward it's worth going ahead and verifying the host can contact the guest on port 3121. Here's a trick you can use. Connect using telnet from the host. The connection will get destroyed, but how it is destroyed tells you whether it worked or not. -+ -+First add guest1 to the host machine's /etc/hosts file if you haven't already. This is required unless you have dns setup in a way where guest1's address can be discovered. -+ -+[source,C] -+---- -+# cat << END >> /etc/hosts -+192.168.122.10 guest1 -+END -+---- -+ -+If running the telnet command on the host results in this output before disconnecting, the connection works. -+[source,C] -+---- -+# telnet guest1 3121 -+ Trying 192.168.122.10... -+ Connected to guest1. -+ Escape character is '^]'. -+ Connection closed by foreign host. -+---- -+ -+If you see this, the connection is not working. -+[source,C] -+---- -+# telnet guest1 3121 -+Trying 192.168.122.10... -+telnet: connect to address 192.168.122.10: No route to host -+---- -+ -+Once you can successfully connect to the guest from the host, shutdown the guest. Pacemaker will be managing the virtual machine from this point forward. -+ -+== Step3: Integrate KVM guest into Cluster. == -+ -+Now the fun part, integrating the virtual machine you've just created into the cluster. It is incredibly simple. -+ -+=== Start the Cluster === -+On the host, start pacemaker. -+ -+[source,C] -+---- -+# pcs cluster start -+---- -+ -+Wait for the host to become the DC. The output of 'pcs status' should look similar to this after about a minute. -+ -+[source,C] -+---- -+Last updated: Thu Mar 14 16:41:22 2013 -+Last change: Thu Mar 14 16:41:08 2013 via crmd on example-host -+Stack: corosync -+Current DC: example-host (1795270848) - partition WITHOUT quorum -+Version: 1.1.10 -+1 Nodes configured, unknown expected votes -+0 Resources configured. -+ -+ -+Online: [ example-host ] -+---- -+ -+Now enable the cluster to work without quorum or stonith. This is required just for the sake of getting this tutorial to work with a single cluster-node. -+ -+[source,C] -+---- -+# pcs property set stonith-enabled=false -+# pcs property set no-quorum-policy=ignore -+---- -+ -+=== Integrate KVM Guest as remote-node === -+ -+If you didn't already do this earlier in the verify host to guest connection section, add the KVM guest's ip to the host's /etc/hosts file so we can connect by hostname. The command below will do that if you used the same ip address I used earlier. -+ -+[source,C] -+---- -+# cat << END >> /etc/hosts -+192.168.122.10 guest1 -+END -+---- -+ -+We will use the +VirtualDomain+ resource agent for the management of the virtual machine. This agent requires the virtual machine's xml config to be dumped to a file on disk. To do this pick out the name of the virtual machine you just created from the output of this list. -+ -+[source,C] -+---- -+# virsh list --all -+ Id Name State -+______________________________________________ -+ - guest1 shut off -+---- -+ -+In my case I named it guest1. Dump the xml to a file somewhere on the host using the following command. -+ -+[source,C] -+---- -+# virsh dumpxml guest1 > /root/guest1.xml -+---- -+ -+Now just register the resource with pacemaker and you're set! -+ -+[source,C] -+---- -+# pcs resource create vm-guest1 VirtualDomain hypervisor="qemu:///system" config="/root/guest1.xml" meta remote-node=guest1 -+---- -+ -+Once the 'vm-guest1' resource is started you will see 'guest1' appear in the 'pcs status' output as a node. The final 'pcs status' output should look something like this. -+ -+[source,C] -+---- -+Last updated: Fri Mar 15 09:30:30 2013 -+Last change: Thu Mar 14 17:21:35 2013 via cibadmin on example-host -+Stack: corosync -+Current DC: example-host (1795270848) - partition WITHOUT quorum -+Version: 1.1.10 -+2 Nodes configured, unknown expected votes -+2 Resources configured. -+ -+ -+Online: [ example-host guest1 ] -+ -+Full list of resources: -+ -+ vm-guest1 (ocf::heartbeat:VirtualDomain): Started example-host -+---- -+ -+=== Starting Resources on KVM Guest === -+ -+The commands below demonstrate how resources can be executed on both the remote-node and the cluster-node. -+ -+Create a few Dummy resources. Dummy resources are real resource agents used just for testing purposes. They actually execute on the host they are assigned to just like an apache server or database would, except their execution just means a file was created. When the resource is stopped, that the file it created is removed. -+ -+[source,C] -+---- -+# pcs resource create FAKE1 ocf:pacemaker:Dummy -+# pcs resource create FAKE2 ocf:pacemaker:Dummy -+# pcs resource create FAKE3 ocf:pacemaker:Dummy -+# pcs resource create FAKE4 ocf:pacemaker:Dummy -+# pcs resource create FAKE5 ocf:pacemaker:Dummy -+---- -+ -+Now check your 'pcs status' output. In the resource section you should see something like the following, where some of the resources got started on the cluster-node, and some started on the remote-node. -+ -+[source,C] -+---- -+Full list of resources: -+ -+ vm-guest1 (ocf::heartbeat:VirtualDomain): Started example-host -+ FAKE1 (ocf::pacemaker:Dummy): Started guest1 -+ FAKE2 (ocf::pacemaker:Dummy): Started guest1 -+ FAKE3 (ocf::pacemaker:Dummy): Started example-host -+ FAKE4 (ocf::pacemaker:Dummy): Started guest1 -+ FAKE5 (ocf::pacemaker:Dummy): Started example-host -+---- -+ -+ -+The remote-node, 'guest1', reacts just like any other node in the cluster. For example, pick out a resource that is running on your cluster-node. For my purposes I am picking FAKE3 from the output above. We can force FAKE3 to run on 'guest1' in the exact same way we would any other node. -+ -+[source,C] -+---- -+# pcs constraint FAKE3 prefers guest1 -+---- -+ -+Now looking at the bottom of the 'pcs status' output you'll see FAKE3 is on 'guest1'. -+ -+[source,C] -+---- -+Full list of resources: -+ -+ vm-guest1 (ocf::heartbeat:VirtualDomain): Started example-host -+ FAKE1 (ocf::pacemaker:Dummy): Started guest1 -+ FAKE2 (ocf::pacemaker:Dummy): Started guest1 -+ FAKE3 (ocf::pacemaker:Dummy): Started guest1 -+ FAKE4 (ocf::pacemaker:Dummy): Started example-host -+ FAKE5 (ocf::pacemaker:Dummy): Started example-host -+---- -+ -+=== Testing Remote-node Recovery and Fencing === -+ -+Pacemaker's policy engine is smart enough to know fencing remote-nodes associated with a virtual machine means shutting off/rebooting the virtual machine. No special configuration is necessary to make this happen. If you are interested in testing this functionality out, trying stopping the guest's pacemaker_remote daemon. This would be equivalent of abruptly terminating a cluster-node's corosync membership without properly shutting it down. -+ -+ssh into the guest and run this command. -+ -+[source,C] -+---- -+# kill -9 `pidof pacemaker_remoted` -+---- -+ -+After a few seconds or so you'll see this in your 'pcs status' output. The 'guest1' node will be show as offline as it is being recovered. -+ -+[source,C] -+---- -+Last updated: Fri Mar 15 11:00:31 2013 -+Last change: Fri Mar 15 09:54:16 2013 via cibadmin on example-host -+Stack: corosync -+Current DC: example-host (1795270848) - partition WITHOUT quorum -+Version: 1.1.10 -+2 Nodes configured, unknown expected votes -+7 Resources configured. -+ -+ -+Online: [ example-host ] -+OFFLINE: [ guest1 ] -+ -+Full list of resources: -+ -+ vm-guest1 (ocf::heartbeat:VirtualDomain): Started example-host -+ FAKE1 (ocf::pacemaker:Dummy): Stopped -+ FAKE2 (ocf::pacemaker:Dummy): Stopped -+ FAKE3 (ocf::pacemaker:Dummy): Stopped -+ FAKE4 (ocf::pacemaker:Dummy): Started example-host -+ FAKE5 (ocf::pacemaker:Dummy): Started example-host -+ -+Failed actions: -+ guest1_monitor_30000 (node=example-host, call=3, rc=7, status=complete): not running -+---- -+ -+Once recovery of the guest is complete, you'll see it automatically get re-integrated into the cluster. The final 'pcs status' output should look something like this. -+ -+[source,C] -+---- -+Last updated: Fri Mar 15 11:03:17 2013 -+Last change: Fri Mar 15 09:54:16 2013 via cibadmin on example-host -+Stack: corosync -+Current DC: example-host (1795270848) - partition WITHOUT quorum -+Version: 1.1.10 -+2 Nodes configured, unknown expected votes -+7 Resources configured. -+ -+ -+Online: [ example-host guest1 ] -+ -+Full list of resources: -+ -+ vm-guest1 (ocf::heartbeat:VirtualDomain): Started example-host -+ FAKE1 (ocf::pacemaker:Dummy): Started guest1 -+ FAKE2 (ocf::pacemaker:Dummy): Started guest1 -+ FAKE3 (ocf::pacemaker:Dummy): Started guest1 -+ FAKE4 (ocf::pacemaker:Dummy): Started example-host -+ FAKE5 (ocf::pacemaker:Dummy): Started example-host -+ -+Failed actions: -+ guest1_monitor_30000 (node=example-host, call=3, rc=7, status=complete): not running -+---- -+ -+=== Accessing Cluster Tools from Remote-node === -+ -+Besides just allowing the cluster to manage resources on a remote-node, pacemaker_remote has one other trick. +The pacemaker_remote daemon allows nearly all the pacemaker tools (crm_resource, crm_mon, crm_attribute, crm_master) to work on remote nodes natively.+ -+ -+Try it, run +crm_mon+ or +pcs status+ on the guest after pacemaker has integrated the remote-node into the cluster. These tools just work. These means resource agents such as master/slave resources which need access to tools like crm_master work seamlessly on the remote-nodes. -+ -diff --git a/doc/Pacemaker_Remote/en-US/Ch-LXC-Tutorial.txt b/doc/Pacemaker_Remote/en-US/Ch-LXC-Tutorial.txt -new file mode 100644 -index 0000000..c3459c0 ---- /dev/null -+++ b/doc/Pacemaker_Remote/en-US/Ch-LXC-Tutorial.txt -@@ -0,0 +1,328 @@ -+= Linux Container (LXC) Walk-through = -+ -++What this tutorial is:+ This tutorial demonstrates how pacemaker_remote can be used with Linux containers (managed by libvirt-lxc) to run cluster resources in an isolated environment. -+ -++What this tutorial is not:+ This tutorial is not a realistic deployment scenario. The steps shown here are meant to introduce users to the concept of managing Linux container environments with Pacemaker. -+ -+== Step 1: Setup LXC Host == -+ -+This tutorial was tested with Fedora 18. Anything that is capable of running libvirt and pacemaker v1.1.10 or greater will do though. An installation guide for installing Fedora 18 can be found here, http://docs.fedoraproject.org/en-US/Fedora/18/html/Installation_Guide/. -+ -+Fedora 18 (or similar distro) host preparation steps. -+ -+=== SElinux and Firewall Rules === -+In order to simply this tutorial we will disable the selinux and the firewall on the host. -+WARNING: These actions pose a significant security issues to machines exposed to the outside world. Basically, just don't do this on your production system. -+[source,C] -+---- -+# setenforce 0 -+# sed -i.bak "s/SELINUX=enforcing/SELINUX=permissive/g" /etc/selinux/config -+# firewall-cmd --add-port 3121/tcp --permanent -+ -+# systemctl disable iptables.service -+# systemctl disable ip6tables.service -+# rm '/etc/systemd/system/basic.target.wants/iptables.service' -+# rm '/etc/systemd/system/basic.target.wants/ip6tables.service' -+# systemctl stop iptables.service -+# systemctl stop ip6tables.service -+---- -+ -+=== Install Cluster Software on Host === -+ -+[source,C] -+---- -+# yum install -y pacemaker pacemaker-remote corosync pcs resource-agents -+---- -+ -+=== Configure Corosync === -+ -+Running the command below will attempt to detect the network address corosync should bind to. -+ -+[source,C] -+---- -+# export corosync_addr=`ip addr | grep "inet " | tail -n 1 | awk '{print $4}' | sed s/255/0/g` -+---- -+ -+Display and verify the address is correct -+ -+[source,C] -+---- -+# echo $corosync_addr -+---- -+ -+In most cases the address will be 192.168.1.0 if you are behind a standard home router. -+ -+Now copy over the example corosync.conf. This code will inject your bindaddress and enable the vote quorum api which is required by pacemaker. -+ -+[source,C] -+---- -+# cp /etc/corosync/corosync.conf.example /etc/corosync/corosync.conf -+# sed -i.bak "s/.*\tbindnetaddr:.*/bindnetaddr:\ $corosync_addr/g" /etc/corosync/corosync.conf -+# cat << END >> /etc/corosync/corosync.conf -+quorum { -+ provider: corosync_votequorum -+ expected_votes: 2 -+} -+END -+---- -+ -+=== Verify Cluster === -+ -+Start the cluster -+ -+[source,C] -+---- -+# pcs cluster start -+---- -+ -+Verify corosync membership -+ -+[source,C] -+---- -+# pcs status corosync -+ -+Membership information -+ Nodeid Votes Name -+1795270848 1 example-host (local) -+---- -+ -+Verify pacemaker status. At first the 'pcs cluster status' output will look like this. -+ -+[source,C] -+---- -+# pcs status -+ -+ Last updated: Thu Mar 14 12:26:00 2013 -+ Last change: Thu Mar 14 12:25:55 2013 via crmd on example-host -+ Stack: corosync -+ Current DC: -+ Version: 1.1.10 -+ 1 Nodes configured, unknown expected votes -+ 0 Resources configured. -+---- -+ -+After about a minute you should see your host as a single node in the cluster. -+ -+[source,C] -+---- -+# pcs status -+ -+ Last updated: Thu Mar 14 12:28:23 2013 -+ Last change: Thu Mar 14 12:25:55 2013 via crmd on example-host -+ Stack: corosync -+ Current DC: example-host (1795270848) - partition WITHOUT quorum -+ Version: 1.1.8-9b13ea1 -+ 1 Nodes configured, unknown expected votes -+ 0 Resources configured. -+ -+ Online: [ example-host ] -+---- -+ -+Go ahead and stop the cluster for now after verifying everything is in order. -+ -+[source,C] -+---- -+# pcs cluster stop -+---- -+ -+== Step 2: Setup LXC Environment == -+ -+=== Install Libvirt LXC software === -+ -+[source,C] -+---- -+# yum install -y libvirt libvirt-daemon-lxc wget -+# systemctl enable libvirtd -+---- -+ -+At this point, restart the host. -+ -+=== Generate Libvirt LXC domains === -+ -+I've attempted to simply this tutorial by creating a script to auto generate the libvirt-lxc xml domain definitions. -+ -+Download the script to whatever directory you want the containers to live in. In this example I am using the /root/lxc/ directory. -+ -+[source,C] -+---- -+# mkdir /root/lxc/ -+# cd /root/lxc/ -+# wget https://raw.github.com/davidvossel/pcmk-lxc-autogen/master/lxc-autogen -+# chmod 755 lxc-autogen -+---- -+ -+Now execute the script. -+ -+[source,C] -+---- -+# ./lxc-autogen -+---- -+ -+After executing the script you will see a bunch of directories and xml files are generated. Those xml files are the libvirt-lxc domain definitions, and the directories are used as some special mount points for each container. If you open up one of the xml files you'll be able to see how the cpu, memory, and filesystem resources for the container are defined. You can use the libvirt-lxc driver's documentation found here, http://libvirt.org/drvlxc.html, as a reference to help understand all the parts of the xml file. The lxc-autogen script is not complicated and is worth exploring in order to grasp how the environment is generated. -+ -+It is worth noting that this environment is dependent on use of libvirt's default network interface. Verify the commands below look the same as your environment. The default network address 192.168.122.1 should have been generated by automatically when you installed the virtualization software. -+ -+[source,C] -+---- -+# virsh net-list -+Name State Autostart Persistent -+________________________________________________________ -+default active yes yes -+ -+# virsh net-dumpxml default | grep -e "ip address=" -+ -+ -+---- -+ -+=== Generate the Authkey === -+ -+Generate the authkey used to secure connections between the host and the lxc guest pacemaker_remote instances. This is sort of a funny case because the lxc guests and the host will share the same key file in the /etc/pacemaker/ directory. If in a different deployment where the lxc guests do not share the host's /etc/pacemaker directory, this key will have to be copied into each lxc guest. -+ -+[source,C] -+---- -+# dd if=/dev/urandom of=/etc/pacemaker/authkey bs=4096 count=1 -+---- -+ -+== Step 3: Integrate LXC guests into Cluster. == -+ -+=== Start Cluster === -+On the host, start pacemaker. -+ -+[source,C] -+---- -+# pcs cluster start -+---- -+ -+Wait for the host to become the DC. The output of 'pcs status' should look similar to this after about a minute. -+ -+[source,C] -+---- -+Last updated: Thu Mar 14 16:41:22 2013 -+Last change: Thu Mar 14 16:41:08 2013 via crmd on example-host -+Stack: corosync -+Current DC: example-host (1795270848) - partition WITHOUT quorum -+Version: 1.1.10 -+1 Nodes configured, unknown expected votes -+0 Resources configured. -+ -+ -+Online: [ example-host ] -+---- -+ -+Now enable the cluster to work without quorum or stonith. This is required just for the sake of getting this tutorial to work with a single cluster-node. -+ -+[source,C] -+---- -+# pcs property set stonith-enabled=false -+# pcs property set no-quorum-policy=ignore -+---- -+ -+=== Integrate LXC Guests as remote-nodes === -+ -+If you ran the 'lxc-autogen' script with default parameters, 3 lxc domain definitions were created as .xml files. If you used the same directory I used for the lxc environment, the config files will be located in /root/lxc. Replace the 'config' parameters in the following pcs commands if yours should be different. -+ -+The pcs commands below each configure a lxc guest as a remote-node in pacemaker. Behind the scenes each lxc guest is launching an instance of pacemaker_remote allowing pacemaker to integrate the lxc guests as remote-nodes. The meta-attribute 'remote-node=' used in each command is what tells pacemaker that the lxc guest is both a resource and a remote-node capable of running resources. In this case, the 'remote-node' attribute also indicates to pacemaker that it can contact each lxc's pacemaker_remote service by using the remote-node name as the hostname. If you look in the /etc/hosts/ file you will see entries for each lxc guest. These entries were auto-generated earlier by the 'lxc-autogen' script. -+ -+[source,C] -+---- -+# pcs resource create container1 VirtualDomain force_stop="true" hypervisor="lxc:///" config="/root/lxc/lxc1.xml" meta remote-node=lxc1 -+# pcs resource create container2 VirtualDomain force_stop="true" hypervisor="lxc:///" config="/root/lxc/lxc2.xml" meta remote-node=lxc2 -+# pcs resource create container3 VirtualDomain force_stop="true" hypervisor="lxc:///" config="/root/lxc/lxc3.xml" meta remote-node=lxc3 -+---- -+ -+ -+After creating the container resources you 'pcs status' should look like this. -+ -+[source,C] -+---- -+Last updated: Mon Mar 18 17:15:46 2013 -+Last change: Mon Mar 18 17:15:26 2013 via cibadmin on guest1 -+Stack: corosync -+Current DC: example-host (175810752) - partition WITHOUT quorum -+Version: 1.1.10 -+4 Nodes configured, unknown expected votes -+6 Resources configured. -+ -+Online: [ example-host lxc1 lxc2 lxc3 ] -+ -+Full list of resources: -+ -+ container3 (ocf::heartbeat:VirtualDomain): Started example-host -+ container1 (ocf::heartbeat:VirtualDomain): Started example-host -+ container2 (ocf::heartbeat:VirtualDomain): Started example-host -+---- -+ -+ -+=== Starting Resources on LXC Guests === -+ -+Now that the lxc guests are integrated into the cluster, lets generate some Dummy resources to run on them. -+ -+Dummy resources are real resource agents used just for testing purposes. They actually execute on the node they are assigned to just like an apache server or database would, except their execution just means a file was created. When the resource is stopped, that the file it created is removed. -+ -+[source,C] -+---- -+# pcs resource create FAKE1 ocf:pacemaker:Dummy -+# pcs resource create FAKE2 ocf:pacemaker:Dummy -+# pcs resource create FAKE3 ocf:pacemaker:Dummy -+# pcs resource create FAKE4 ocf:pacemaker:Dummy -+# pcs resource create FAKE5 ocf:pacemaker:Dummy -+---- -+ -+ -+After creating the Dummy resources you will see that the resource got distributed among all the nodes. The 'pcs status' output should look similar to this. -+ -+[source,C] -+---- -+Last updated: Mon Mar 18 17:31:54 2013 -+Last change: Mon Mar 18 17:31:05 2013 via cibadmin on example-host -+Stack: corosync -+Current DC: example=host (175810752) - partition WITHOUT quorum -+Version: 1.1.10 -+4 Nodes configured, unknown expected votes -+11 Resources configured. -+ -+ -+Online: [ example-host lxc1 lxc2 lxc3 ] -+ -+Full list of resources: -+ -+ container3 (ocf::heartbeat:VirtualDomain): Started example-host -+ container1 (ocf::heartbeat:VirtualDomain): Started example-host -+ container2 (ocf::heartbeat:VirtualDomain): Started example-host -+ FAKE1 (ocf::pacemaker:Dummy): Started lxc1 -+ FAKE2 (ocf::pacemaker:Dummy): Started lxc2 -+ FAKE3 (ocf::pacemaker:Dummy): Started lxc3 -+ FAKE4 (ocf::pacemaker:Dummy): Started lxc1 -+ FAKE5 (ocf::pacemaker:Dummy): Started lxc2 -+---- -+ -+To witness that Dummy agents are running within the lxc guests browse one of the lxc domain's filesystem folders. Each lxc guest has a custom mount point for the '/var/run/'directory, which is the location the Dummy resources write their state files to. -+ -+[source,C] -+---- -+# ls lxc1-filesystem/var/run/ -+Dummy-FAKE4.state Dummy-FAKE.state -+---- -+ -+If you are curious, take a look at lxc1.xml to see how the filesystem is mounted. -+ -+=== Testing LXC Guest Failure === -+ -+You will be able to see each pacemaker_remoted process running in each lxc guest from the host machine. -+ -+[source,C] -+---- -+# ps -A | grep -e pacemaker_remote* -+ 9142 pts/2 00:00:00 pacemaker_remot -+10148 pts/4 00:00:00 pacemaker_remot -+10942 pts/6 00:00:00 pacemaker_remot -+---- -+ -+In order to see how the cluster reacts to a failed lxc guest. Try killing one of the pacemaker_remote instances. -+ -+[source,C] -+---- -+# kill -9 9142 -+---- -+ -+After a few moments the lxc guest that was running that instance of pacemaker_remote will be recovered along with all the resources running within that container. -diff --git a/doc/Pacemaker_Remote/en-US/Ch-Options.txt b/doc/Pacemaker_Remote/en-US/Ch-Options.txt -new file mode 100644 -index 0000000..9e14b31 ---- /dev/null -+++ b/doc/Pacemaker_Remote/en-US/Ch-Options.txt -@@ -0,0 +1,51 @@ -+= Configuration Explained = -+ -+The walk-through examples use some of these options, but don't explain exactly what they mean or do. This section is meant to be the go-to resource for all the options available for configuring remote-nodes. -+ -+== Resource Options == -+ -+When configuring a virtual machine or lxc resource to act as a remote-node, these are the metadata options available to both enable the resource as a remote-node and define the connection parameters. -+ -+.Metadata Options for configuring KVM/LXC resources as remote-nodes -+[width="95%",cols="1m,1,4<",options="header",align="center"] -+|========================================================= -+ -+|Option -+|Default -+|Description -+ -+|+remote-node+ -+| -+|The name of the remote-node this resource defines. This both enables the resource as a remote-node and defines the unique name used to identify the remote-node. If no other parameters are set, this value will also be assumed as the hostname to connect to at port 3121. +WARNING+ This value cannot overlap with any resource or node IDs. -+ -+|+remote-port+ -+|3121 -+|Configure a custom port to use for the guest connection to pacemaker_remote. -+ -+|+remote-addr+ -+|+remote-node+ value used as hostname -+|The ip address or hostname to connect to if remote-node's name is not the hostname of the guest. -+ -+|+remote-connect-timeout+ -+|60s -+|How long before a pending guest connection will time out. -+ -+|========================================================= -+ -+== Host and Guest Authentication == -+ -+Authentication and encryption of the connection between cluster-nodes (pacemaker) to remote-nodes (pacemaker_remote) is achieved using TLS with PSK encryption/authentication on +tcp port 3121+. This means both the cluster-node and remote-node must share the same private key. By default this +key must be placed at "/etc/pacemaker/authkey" on both cluster-nodes and remote-nodes+. -+ -+== Pacemaker and pacemaker_remote Options == -+ -+If you need to change the default port or authkey location for either pacemaker or pacemaker_remote, there are environment variables you can set that affect both of those daemons. These environment variables can be enabled by placing them in the /etc/sysconfig/pacemaker file. -+[source,C] -+---- -+#==#==# Pacemaker Remote -+# Use a custom directory for finding the authkey. -+PCMK_authkey_location=/etc/pacemaker/authkey -+# -+# Specify a custom port for Pacemaker Remote connections -+PCMK_remote_port=3121 -+---- -+ -diff --git a/doc/Pacemaker_Remote/en-US/Pacemaker_Remote.ent b/doc/Pacemaker_Remote/en-US/Pacemaker_Remote.ent -new file mode 100644 -index 0000000..65d8bad ---- /dev/null -+++ b/doc/Pacemaker_Remote/en-US/Pacemaker_Remote.ent -@@ -0,0 +1,6 @@ -+ -+ -+ -+ -+ -+ -diff --git a/doc/Pacemaker_Remote/en-US/Pacemaker_Remote.xml b/doc/Pacemaker_Remote/en-US/Pacemaker_Remote.xml -new file mode 100644 -index 0000000..9ee710c ---- /dev/null -+++ b/doc/Pacemaker_Remote/en-US/Pacemaker_Remote.xml -@@ -0,0 +1,17 @@ -+ -+ -+%BOOK_ENTITIES; -+]> -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -diff --git a/doc/Pacemaker_Remote/en-US/Revision_History.xml b/doc/Pacemaker_Remote/en-US/Revision_History.xml -new file mode 100644 -index 0000000..26d8ab6 ---- /dev/null -+++ b/doc/Pacemaker_Remote/en-US/Revision_History.xml -@@ -0,0 +1,25 @@ -+ -+ -+%BOOK_ENTITIES; -+]> -+ -+ Revision History -+ -+ -+ -+ 1 -+ Tue Mar 19 2013 -+ DavidVosseldvossel@redhat.com -+ Import from Pages.app -+ -+ -+ 2 -+ Tue May 13 2013 -+ DavidVosseldvossel@redhat.com -+ Added Future Features Section -+ -+ -+ -+ -+ -diff --git a/doc/Pacemaker_Remote/en-US/images/pcmk-ha-cluster-stack.png b/doc/Pacemaker_Remote/en-US/images/pcmk-ha-cluster-stack.png -new file mode 100644 -index 0000000..163ba45 -Binary files /dev/null and b/doc/Pacemaker_Remote/en-US/images/pcmk-ha-cluster-stack.png differ -diff --git a/doc/Pacemaker_Remote/en-US/images/pcmk-ha-remote-stack.png b/doc/Pacemaker_Remote/en-US/images/pcmk-ha-remote-stack.png -new file mode 100644 -index 0000000..11985a7 -Binary files /dev/null and b/doc/Pacemaker_Remote/en-US/images/pcmk-ha-remote-stack.png differ -diff --git a/doc/Pacemaker_Remote/publican.cfg.in b/doc/Pacemaker_Remote/publican.cfg.in -new file mode 100644 -index 0000000..314bb3d ---- /dev/null -+++ b/doc/Pacemaker_Remote/publican.cfg.in -@@ -0,0 +1,14 @@ -+# Config::Simple 4.59 -+# Fri Apr 23 15:33:52 2010 -+ -+docname: Pacemaker_Remote -+xml_lang: en-US -+#edition: 1 -+type: Book -+version: @PACKAGE_SERIES@ -+brand: @PUBLICAN_BRAND@ -+product: Pacemaker -+ -+chunk_first: 0 -+chunk_section_depth: 3 -+generate_section_toc_level: 4 -diff --git a/doc/openstack.md b/doc/openstack.md -index 6f994fb..7509a16 100644 ---- a/doc/openstack.md -+++ b/doc/openstack.md -@@ -15,15 +15,17 @@ Export your OpenStack credentials - export OS_REGION_NAME=... - export OS_TENANT_NAME=... - export OS_AUTH_URL=... -- export OS_USERNAME=... -+ export OS_USERNAME=... - export OS_PASSWORD=... - -+ export IMAGE_USER=fedora -+ - Allocate 5 floating IPs. For the purposes of the setup instructions --(and probably your sanity), they need to be consecutive and should --ideally start with a multiple of 10. Below we will assume -+(and probably your sanity), they need to be consecutive and to remain -+sane, should ideally start with a multiple of 10. Below we will assume - 10.16.16.60-64 - -- for n in 1 2 3 4 5; do nova floating-ip-create; done -+ for n in `seq 1 5`; do nova floating-ip-create; done - - Create some variables based on the IP addresses nova created for you: - -@@ -32,7 +34,11 @@ Create some variables based on the IP addresses nova created for you: - and a function for calculating offsets - - function nth_ipaddr() { -- echo $IP_BASE | awk -F. -v offset=$1 '{ printf "%s.%s.%s.%s\n", $1, $2, $3, $4 + offset }' -+ echo $IP_BASE | awk -F. -v offset=$1 '{ printf "%s.%s.%s.%s\n", $1, $2, $3, $4 + offset }' -+ } -+ -+ function ip_net() { -+ echo $IP_BASE | awk -F. '{ printf "%s.%s.%s.*\n", $1, $2, $3 }' - } - - Upload a public key that we can use to log into the images we create. -@@ -40,6 +46,15 @@ I created one especially for cluster testing and left it without a password. - - nova keypair-add --pub-key ~/.ssh/cluster Cluster - -+Make sure it gets used when connecting to the CTS master -+ -+ cat << EOF >> ~/.ssh/config -+ Host cts-master \`echo $IP_BASE | awk -F. '{ printf "%s.%s.%s.*", \$1, \$2, \$3 }'\` -+ User root -+ IdentityFile ~/.ssh/cluster -+ UserKnownHostsFile ~/.ssh/known.openstack -+ EOF -+ - Punch a hole in the firewall for SSH access and ping - - nova secgroup-add-rule default tcp 23 23 10.0.0.0/8 -@@ -54,93 +69,73 @@ Add the CTS master to /etc/hosts - Create helper scripts on a local host - - cat << END > ./master.sh -- -+ - echo export OS_REGION_NAME=$OS_REGION_NAME >> ~/.bashrc - echo export OS_TENANT_NAME=$OS_TENANT_NAME >> ~/.bashrc - echo export OS_AUTH_URL=$OS_AUTH_URL >> ~/.bashrc - echo export OS_USERNAME=$OS_USERNAME >> ~/.bashrc - echo export OS_PASSWORD=$OS_PASSWORD >> ~/.bashrc -- -+ - function nth_ipaddr() { -- echo $IP_BASE | awk -F. -v offset=\$1 '{ printf "%s.%s.%s.%s\n", \$1, \$2, \$3, \$4 + offset }' -+ echo $IP_BASE | awk -F. -v offset=\$1 '{ printf "%s.%s.%s.%s\n", \$1, \$2, \$3, \$4 + offset }' - } -- -+ - yum install -y python-novaclient git screen pdsh pdsh-mod-dshgroup -- -- git clone git://github.com/beekhof/fence_openstack.git -+ -+ git clone --depth 0 git://github.com/beekhof/fence_openstack.git - ln -s /root/fence_openstack/fence_openstack /sbin - - mkdir -p /root/.dsh/group/ - echo export cluster_name=openstack >> ~/.bashrc -- -+ - rm -f /root/.dsh/group/openstack -- for n in 1 2 3 4; do -+ for n in `seq 1 4`; do - echo "cluster-\$n" >> /root/.dsh/group/openstack - echo \`nth_ipaddr \$n\` cluster-\$n >> /etc/hosts - done -- -- cat << EOF >> /root/.ssh/config -- Host \`echo $IP_BASE | awk -F. '{ printf "%s.%s.%s.*", \$1, \$2, \$3 }'\` -- User root -- IdentityFile ~/.ssh/cluster - -- Host cts-master -+ cat << EOF >> /root/.ssh/config -+ Host cts-master \`echo $IP_BASE | awk -F. '{ printf "%s.%s.%s.*", \$1, \$2, \$3 }'\` - User root - IdentityFile ~/.ssh/cluster - EOF -- -+ - END - --Another script: -+Some images do not allow root to log in by default and insist on a -+'fedora' user. Create a script to disable this "feature": - - cat << EOF > fix-guest.sh - #!/bin/bash - # Re-allow root to log in - sudo sed -i s/.*ssh-/ssh-/ /root/.ssh/authorized_keys - EOF -- --## CTS master (Fedora-17) -+ -+## CTS master (Fedora-18) - - Create and update the master - -- nova boot --poll --image "Fedora 17" --key_name Cluster --flavor m1.tiny cts-master -+ nova boot --poll --image "Fedora 18" --key_name Cluster --flavor m1.tiny cts-master - nova add-floating-ip cts-master `nth_ipaddr 0` - --Some images do not allow root to log in by default and insist on a 'stack' user. --Disable this "feature". -+If your image does not allow root to log in by default, disable this -+"feature" with the script we created earlier: - -- scp fix-guest.sh stack@cts-master: -- ssh -l stack -t cts-master -- bash ./fix-guest.sh -+ scp fix-guest.sh $IMAGE_USER@cts-master: -+ ssh -l $IMAGE_USER -t cts-master -- bash ./fix-guest.sh - - Now we can set up the CTS master with the script we created earlier: - -- scp ~/.ssh/cluster root@cts-master:.ssh/id_rsa -- scp master.sh root@cts-master: -- ssh root@cts-master -- bash ./master.sh -- --## Create Guest Base Image -- --Create a guest that we can tweak -+ scp ~/.ssh/cluster cts-master:.ssh/id_rsa -+ scp master.sh cts-master: -+ ssh cts-master -- bash ./master.sh - -- nova boot --poll --image "Fedora 18 Alpha" --key_name Cluster --flavor m1.tiny TempGuest -- nova add-floating-ip TempGuest `nth_ipaddr 1` -- scp fix-guest.sh stack@`nth_ipaddr 1`: -- --Create snapshot with our changes called Fedora-18-base -- -- nova image-create --poll TempGuest Fedora-18-base -- --Release the IP and delete the temporary guest -- -- nova remove-floating-ip TempGuest `nth_ipaddr 1` -- nova delete TempGuest -- --### Create Guests -+### Create the Guests - - First create the guests - -- for n in 1 2 3 4; do -- nova boot --poll --image Fedora-18-base --key_name Cluster --flavor m1.tiny cluster-$n; -+ for n in `seq 1 4`; do -+ nova boot --poll --image "Fedora 18" --key_name Cluster --flavor m1.tiny cluster-$n; - nova add-floating-ip cluster-$n `nth_ipaddr $n` - done - -@@ -148,11 +143,14 @@ Then wait for everything to settle - - sleep 10 - --Now you can fix them -+### Fix the Guests -+ -+If your image does not allow root to log in by default, disable this -+"feature" with the script we created earlier: - -- for n in 1 2 3 4; do -- ssh -l stack -t `nth_ipaddr $n` -- bash ./fix-guest.sh; -- scp /etc/hosts root@`nth_ipaddr $n`:/etc/; -+ for n in `seq 1 4`; do -+ scp fix-guest.sh $IMAGE_USER@`nth_ipaddr $n`: -+ ssh -l $IMAGE_USER -t `nth_ipaddr $n` -- bash ./fix-guest.sh; - done - - ## Run CTS -@@ -161,20 +159,22 @@ Now you can fix them - - Switch to the CTS master - -- ssh -l root cts-master -+ ssh cts-master - - Clone Pacemaker for the latest version of CTS: - -- git clone git://github.com/ClusterLabs/pacemaker.git -- echo 'export PATH=\$PATH:/root/pacemaker/extra::/root/pacemaker/cts' >> ~/.bashrc -+ git clone --depth 0 git://github.com/ClusterLabs/pacemaker.git -+ echo 'export PATH=$PATH:/root/pacemaker/extra:/root/pacemaker/cts' >> ~/.bashrc -+ echo alias c=\'cluster-helper\' >> ~/.bashrc -+ . ~/.bashrc - --Now set up CTS to run from the local source tree -+Now set up CTS to run from the local source tree - - cts local-init - - Configure a cluster (this will install all needed packages and configure corosync on the guests in the $cluster_name group) - -- cluster-init -g openstack --yes --unicast fedora-17 -+ cluster-init -g openstack --yes --unicast --hosts fedora-18 - - ### Run - -diff --git a/doc/pcs-crmsh-quick-ref.md b/doc/pcs-crmsh-quick-ref.md -new file mode 100644 -index 0000000..a28960f ---- /dev/null -+++ b/doc/pcs-crmsh-quick-ref.md -@@ -0,0 +1,159 @@ -+## Display the configuration -+ -+ crmsh # crm configure show -+ pcs # pcs cluster cib -+ -+## Display the current status -+ -+ crmsh # crm_mon -1 -+ pcs # pcs status -+ -+## Node standby -+ -+ crmsh # crm node standby -+ pcs # pcs cluster standby pcmk-1 -+ -+ crmsh # crm node online -+ pcs # pcs cluster unstandby pcmk-1 -+ -+## Setting configuration options -+ -+ crmsh # crm configure property stonith-enabled=false -+ pcs # pcs property set stonith-enabled=false -+ -+## Listing available resources -+ -+ crmsh # crm ra classes -+ pcs # pcs resource standards -+ -+ crmsh # crm ra list ocf pacemaker -+ pcs # pcs resource agents ocf:pacemaker -+ -+## Creating a resource -+ -+ crmsh # crm configure primitive ClusterIP ocf:heartbeat:IPaddr2 \ -+ params ip=192.168.122.120 cidr_netmask=32 \ -+ op monitor interval=30s -+ pcs # pcs resource create ClusterIP IPaddr2 ip=192.168.0.120 cidr_netmask=32 -+ -+The standard and provider (`ocf:heartbeat`) are determined automatically since `IPaddr2` is unique. -+The monitor operation is automatically created based on the agent's metadata. -+ -+## Start a resource -+ crmsh # crm resource start ClusterIP -+ pcs # pcs resource start ClusterIP -+ -+## Stop a resource -+ -+ crmsh # crm resource stop ClusterIP -+ pcs # pcs resource stop ClusterIP -+ -+## Remove a resource -+ -+ crmsh # crm configure delete ClusterIP -+ pcs # -+ -+## Update a resource -+ crmsh # crm configure edit ClusterIP -+ pcs # pcs resource update ClusterIP clusterip_hash=sourceip -+ -+## Display a resource -+ -+ crmsh # -+ pcs # pcs resource show WebFS -+ -+## Resource defaults -+ -+ crmsh # crm configure rsc_defaults resource-stickiness=100 -+ pcs # pcs rsc defaults resource-stickiness=100 -+ -+Listing the current defaults: -+ -+ pcs # pcs rsc defaults -+ -+## Operation defaults -+ -+ crmsh # crm configure op_defaults timeout=240s -+ pcs # pcs resource op defaults timeout=240s -+ -+Listing the current defaults: -+ pcs # pcs resource op defaults -+ -+## Colocation -+ -+ crmsh # crm configure colocation website-with-ip INFINITY: WebSite ClusterIP -+ pcs # pcs constraint colocation add WebSite ClusterIP INFINITY -+ -+With roles -+ -+ crmsh # -+ pcs # -+ -+## Start/stop ordering -+ -+ crmsh # crm configure order apache-after-ip mandatory: ClusterIP WebSite -+ pcs # pcs constraint order ClusterIP then WebSite -+ -+With roles: -+ -+ crmsh # -+ pcs # -+ -+## Preferred locations -+ -+ crmsh # crm configure location prefer-pcmk-1 WebSite 50: pcmk-1 -+ pcs # pcs constraint location WebSite prefers pcmk-1=50 -+ -+With roles: -+ -+ crmsh # -+ pcs # -+ -+## Moving resources -+ -+ crmsh # crm resource move WebSite pcmk-1 -+ pcs # pcs constraint location WebSite prefers pcmk-1=INFINITY -+ -+ crmsh # crm resource unmove WebSite -+ pcs # pcs constraint rm location-WebSite-pcmk-1-INFINITY -+ -+## Creating a clone -+ -+ crmsh # configure clone WebIP ClusterIP meta globally-unique="true" clone-max="2" clone-node-max="2" -+ pcs # pcs resource clone ClusterIP globally-unique=true clone-max=2 clone-node-max=2 -+ -+## Creating a master/slave clone -+ -+ crmsh # crm configure ms WebDataClone WebData \ -+ meta master-max=1 master-node-max=1 \ -+ clone-max=2 clone-node-max=1 notify=true -+ pcs # resource master WebDataClone WebData \ -+ master-max=1 master-node-max=1 clone-max=2 clone-node-max=1 \ -+ notify=true -+ -+## ... -+ crmsh # -+ pcs # -+ -+ crmsh # -+ pcs # -+ -+ -+## Batch changes -+ -+ crmsh # crm -+ crmsh # cib new drbd_cfg -+ crmsh # configure primitive WebData ocf:linbit:drbd params drbd_resource=wwwdata \ -+ op monitor interval=60s -+ crmsh # configure ms WebDataClone WebData meta master-max=1 master-node-max=1 \ -+ clone-max=2 clone-node-max=1 notify=true -+ crmsh # cib commit drbd_cfg -+ crmsh # quit -+ -+ -+ pcs # pcs cluster cib drbd_cfg -+ pcs # pcs -f drbd_cfg resource create WebData ocf:linbit:drbd drbd_resource=wwwdata \ -+ op monitor interval=60s -+ pcs # pcs -f drbd_cfg resource master WebDataClone WebData master-max=1 master-node-max=1 \ -+ clone-max=2 clone-node-max=1 notify=true -+ pcs # pcs cluster push cib drbd_cfg -diff --git a/extra/cluster-clean b/extra/cluster-clean -index cf6e84f..9727dc6 100755 ---- a/extra/cluster-clean -+++ b/extra/cluster-clean -@@ -36,11 +36,14 @@ cluster-helper --list bullet $target - - if [ $kill != 0 ]; then - echo "Cleaning processes" -- cluster-helper $target -- "killall -q -9 corosync aisexec heartbeat pacemakerd ccm stonithd ha_logd lrmd crmd pengine attrd pingd mgmtd cib fenced dlm_controld gfs_controld" &> /dev/null - - # Bah. Force systemd to actually look at the process and realize its dead" -- cluster-helper $target -- "service corosync stop" &> /dev/null -- cluster-helper $target -- "service pacemaker stop" &> /dev/null -+ cluster-helper $target -- "service corosync stop" &> /dev/null & -+ cluster-helper $target -- "service pacemaker stop" &> /dev/null & -+ -+ cluster-helper $target -- "killall -q -9 corosync aisexec heartbeat pacemakerd pacemaker-remoted ccm stonithd ha_logd lrmd crmd pengine attrd pingd mgmtd cib fenced dlm_controld gfs_controld" &> /dev/null -+ cluster-helper $target -- 'kill -9 `pidof valgrind`' &> /dev/null -+ - if [ $kill == 2 ]; then - exit 0 - fi -@@ -58,6 +61,7 @@ log_files="$log_files pacemaker.log" - - state_files="" - state_files="$state_files 'cib.xml*'" -+state_files="$state_files 'valgrind-*'" - state_files="$state_files 'cib-*'" - state_files="$state_files 'core.*'" - state_files="$state_files hostcache" -@@ -74,7 +78,7 @@ done - - cluster-helper $target -- "find /dev/shm -name 'qb-*' -exec rm -f \{\} \;" - cluster-helper $target -- "find /var/lib/pacemaker/blackbox -name '*.*' -exec rm -f \{\} \;" --cluster-helper $target -- "find /tmp -name 'cts-*.valgrind' -exec rm -f \{\} \;" -+cluster-helper $target -- "find /tmp -name '*.valgrind' -exec rm -f \{\} \;" - - cluster-helper $target -- service rsyslog restart 2>&1 > /dev/null - cluster-helper $target -- logger -i -p daemon.info __clean_logs__ -diff --git a/extra/cluster-init b/extra/cluster-init -index 57ec42b..5dc71c2 100755 ---- a/extra/cluster-init -+++ b/extra/cluster-init -@@ -2,6 +2,7 @@ - - accept_defaults=0 - do_raw=0 -+ETCHOSTS=0 - CMAN=0 - do_heartbeat=0 - plugin_ver=-1 -@@ -11,6 +12,7 @@ nodelist=0 - pkgs="corosync xinetd nmap abrt-cli fence-agents perl-TimeDate gdb" - - transport="multicast" -+inaddr_any="no" - - INSTALL= - cs_conf= -@@ -76,6 +78,7 @@ function helptext() { - echo "" - echo "-d, --debug Enable debug logging for the cluster" - echo "-10 install stable-1.0 packages, implies: -p 0 -R rpm-test -I" -+ echo "--hosts Copy /etc/hosts from the test master to the nodes" - echo "-e, --extra package-list" - echo " Extra packages to install" - exit $1 -@@ -102,6 +105,7 @@ while true; do - - -R|--repo) rpm_repo=$2; shift; shift;; - -I|--install) INSTALL=Yes; shift;; -+ --hosts) ETCHOSTS=1; shift;; - - cman|--cman) CTYPE=cman; shift;; - -h|--heartbeat) CTYPE=heartbeat; shift;; -@@ -109,7 +113,7 @@ while true; do - -C|--nodelist) CTYPE=corosync; nodelist=1; shift;; - -o|--openais) CTYPE=openais; shift;; - --plugin|-p) CTYPE=plugin; plugin_ver=$2; shift; shift;; -- -u|--unicast) nodelist=1; transport=udpu; shift;; -+ -u|--unicast) nodelist=1; transport=udpu; inaddr_any="yes"; shift;; - -e|--extra) pkgs="$pkgs $2"; shift; shift;; - -t|--test) rpm_repo=rpm-test-next; pkgs="$pkgs valgrind"; shift;; - -@@ -139,9 +143,9 @@ while true; do - esac - done - --if [ x = "x$host_input" -a x = "x$CTS_GROUP" ]; then -+if [ x = "x$host_input" -a x = "x$cluster_name" ]; then - if [ -d $HOME/.dsh/group ]; then -- read -p "Please specify a dsh group you'd like to configure as a cluster? [] " -t 60 CTS_GROUP -+ read -p "Please specify a dsh group you'd like to configure as a cluster? [] " -t 60 cluster_name - else - read -p "Please specify a whitespace delimetered list of nodes you'd like to configure as a cluster? [] " -t 60 host_list - -@@ -151,9 +155,9 @@ if [ x = "x$host_input" -a x = "x$CTS_GROUP" ]; then - fi - fi - --if [ ! -z $CTS_GROUP ]; then -- host_input="-g $CTS_GROUP" -- dsh_group=`echo $CTS_GROUP | sed s/[a-zA-Z]*//` -+if [ ! -z $cluster_name ]; then -+ host_input="-g $cluster_name" -+ dsh_group=`echo $cluster_name | sed s/[a-zA-Z]*//` - fi - - if [ -z "$host_input" ]; then -@@ -541,6 +545,7 @@ totem { - secauth: off - - transport: $transport -+ inaddr_any: $inaddr_any - - # interface: define at least one interface to communicate - # over. If you define more than one interface stanza, you must -@@ -769,6 +774,10 @@ for host in $host_list; do - fi - fi - -+ if [ $ETCHOSTS = 1 ]; then -+ scp /etc/hosts root@${host}:/etc/hosts -+ fi -+ - if [ $pcmk_ver = 10 ]; then - scp /etc/hosts root@${host}:/etc/hosts - scp ~/.ssh/id_dsa.suse root@${host}:.ssh/id_dsa -diff --git a/extra/pcmk_snmp_helper.sh b/extra/pcmk_snmp_helper.sh -new file mode 100644 -index 0000000..7eca8e6 ---- /dev/null -+++ b/extra/pcmk_snmp_helper.sh -@@ -0,0 +1,54 @@ -+#!/bin/bash -+ -+# -+# Copyright (C) 2013 Florian CROUZAT -+# -+# This program is free software; you can redistribute it and/or -+# modify it under the terms of the GNU General Public -+# License as published by the Free Software Foundation; either -+# version 2 of the License, or (at your option) any later version. -+# -+# This software is distributed in the hope that it will be useful, -+# but WITHOUT ANY WARRANTY; without even the implied warranty of -+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+# General Public License for more details. -+# -+# You should have received a copy of the GNU General Public -+# License along with this library; if not, write to the Free Software -+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -+ -+# Resources: -+# crm ra meta ocf:pacemaker:ClusterMon -+# man 8 crm_mon -+ -+# Sample configuration -+# ================================ -+# primitive ClusterMon ocf:pacemaker:ClusterMon \ -+# params user="root" update="30" extra_options="-E /path/to/pcmk_snmp_helper.sh -e 192.168.1.2" \ -+# op monitor on-fail="restart" interval="10" -+# -+# clone ClusterMon-clone ClusterMon \ -+# meta target-role="Started" -+# ================================ -+ -+# The external agent is fed with environment variables allowing us to know -+# what transition happened and to react accordingly: -+# http://clusterlabs.org/doc/en-US/Pacemaker/1.1-crmsh/html/Pacemaker_Explained/s-notification-external.html -+ -+# Generates SNMP alerts for any failing monitor operation -+# OR -+# for any operations (even successful) that are not a monitor -+if [[ ${CRM_notify_rc} != 0 && ${CRM_notify_task} == "monitor" ]] || [[ ${CRM_notify_task} != "monitor" ]] ; then -+ # This trap is compliant with PACEMAKER MIB -+ # https://github.com/ClusterLabs/pacemaker/blob/master/extra/PCMK-MIB.txt -+ /usr/bin/snmptrap -v 2c -c public ${CRM_notify_recipient} "" PACEMAKER-MIB::pacemakerNotification \ -+ PACEMAKER-MIB::pacemakerNotificationNode s "${CRM_notify_node}" \ -+ PACEMAKER-MIB::pacemakerNotificationResource s "${CRM_notify_rsc}" \ -+ PACEMAKER-MIB::pacemakerNotificationOperation s "${CRM_notify_task}" \ -+ PACEMAKER-MIB::pacemakerNotificationDescription s "${CRM_notify_desc}" \ -+ PACEMAKER-MIB::pacemakerNotificationStatus i "${CRM_notify_status}" \ -+ PACEMAKER-MIB::pacemakerNotificationReturnCode i ${CRM_notify_rc} \ -+ PACEMAKER-MIB::pacemakerNotificationTargetReturnCode i ${CRM_notify_target_rc} && exit 0 || exit 1 -+fi -+ -+exit 0 -diff --git a/extra/resources/ping b/extra/resources/ping -index dd1662e..b9a69b8 100755 ---- a/extra/resources/ping -+++ b/extra/resources/ping -@@ -112,6 +112,15 @@ A catch all for any other options that need to be passed to ping. - - - -+ -+ -+Resource is failed if the score is less than failure_score. -+Default never fails. -+ -+failure_score -+ -+ -+ - - - Enables to use default attrd_updater verbose logging on every call. -@@ -172,7 +181,10 @@ ping_stop() { - ping_monitor() { - if [ -f ${OCF_RESKEY_pidfile} ]; then - ping_update -- return $OCF_SUCCESS -+ if [ $? -eq 0 ]; then -+ return $OCF_SUCCESS -+ fi -+ return $OCF_ERR_GENERIC - fi - return $OCF_NOT_RUNNING - } -@@ -277,7 +289,15 @@ ping_update() { - 0) ping_conditional_log debug "Updated $OCF_RESKEY_name = $score" ;; - *) ocf_log warn "Could not update $OCF_RESKEY_name = $score: rc=$rc";; - esac -- return $rc -+ if [ $rc -ne 0 ]; then -+ return $rc -+ fi -+ -+ if [ -n "$OCF_RESKEY_failure_score" -a "$score" -lt "$OCF_RESKEY_failure_score" ]; then -+ ocf_log warn "$OCF_RESKEY_name is less than failure_score($OCF_RESKEY_failure_score)" -+ return 1 -+ fi -+ return 0 - } - - : ${OCF_RESKEY_name:="pingd"} -@@ -285,6 +305,7 @@ ping_update() { - : ${OCF_RESKEY_attempts:="3"} - : ${OCF_RESKEY_multiplier:="1"} - : ${OCF_RESKEY_debug:="false"} -+: ${OCF_RESKEY_failure_score:="0"} - - : ${OCF_RESKEY_CRM_meta_timeout:="20000"} - : ${OCF_RESKEY_CRM_meta_globally_unique:="true"} -diff --git a/fencing/Makefile.am b/fencing/Makefile.am -index 03d1a32..1fcd706 100644 ---- a/fencing/Makefile.am -+++ b/fencing/Makefile.am -@@ -5,19 +5,19 @@ - # modify it under the terms of the GNU General Public License - # as published by the Free Software Foundation; either version 2 - # of the License, or (at your option) any later version. --# -+# - # This program is distributed in the hope that it will be useful, - # but WITHOUT ANY WARRANTY; without even the implied warranty of - # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - # GNU General Public License for more details. --# -+# - # You should have received a copy of the GNU General Public License - # along with this program; if not, write to the Free Software - # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - # - MAINTAINERCLEANFILES = Makefile.in - --SUBDIRS = -+SUBDIRS = - - ## binary progs - testdir = $(datadir)/$(PACKAGE)/tests/fencing -@@ -31,8 +31,8 @@ sbin_SCRIPTS = fence_legacy fence_pcmk - - noinst_HEADERS = internal.h - --man7_MANS = --man8_MANS = -+man7_MANS = -+man8_MANS = - - if BUILD_XML_HELP - man7_MANS += stonithd.7 -@@ -44,10 +44,11 @@ stonithd.7: stonithd.xml - endif - - if BUILD_HELP --man8_MANS += $(sbin_PROGRAMS:%=%.8) $(sbin_SCRIPTS:%=%.8) -+man8_MANS += $(sbin_PROGRAMS:%=%.8) $(sbin_SCRIPTS:%=%.8) - %.8: % - echo Creating $@ -- chmod a+x $< -+ chmod a+x $(top_builddir)/fencing/$< -+ $(top_builddir)/fencing/$< --help - $(HELP2MAN) --output $@ --no-info --section 8 --name "Part of the Pacemaker cluster resource manager" $(top_builddir)/fencing/$< - endif - -@@ -67,6 +68,7 @@ stonith_admin_LDADD = $(top_builddir)/lib/common/libcrmcommon.la \ - $(top_builddir)/lib/fencing/libstonithd.la \ - $(CRYPTOLIB) $(CLUSTERLIBS) - -+stonithd_CFLAGS = -I$(top_srcdir)/pengine - stonithd_SOURCES = main.c commands.c remote.c - if BUILD_STONITH_CONFIG - BUILT_SOURCES = standalone_config.h -@@ -74,7 +76,7 @@ BUILT_SOURCES = standalone_config.h - stonithd_SOURCES += standalone_config.c config.y config.l - stonithd_AM_LFLAGS = -o$(LEX_OUTPUT_ROOT).c - --# lex/yacc issues: -+# lex/yacc issues: - - endif - stonithd_YFLAGS = -d -@@ -82,6 +84,8 @@ stonithd_YFLAGS = -d - stonithd_LDADD = $(top_builddir)/lib/common/libcrmcommon.la \ - $(top_builddir)/lib/cluster/libcrmcluster.la \ - $(top_builddir)/lib/fencing/libstonithd.la \ -+ $(top_builddir)/lib/pengine/libpe_status.la \ -+ $(top_builddir)/pengine/libpengine.la \ - $(CRYPTOLIB) $(CLUSTERLIBS) - - CFLAGS = $(CFLAGS_COPY:-Werror=) -diff --git a/fencing/admin.c b/fencing/admin.c -index 52780b4..c029861 100644 ---- a/fencing/admin.c -+++ b/fencing/admin.c -@@ -464,20 +464,21 @@ main(int argc, char **argv) - } - - if (hp->state == st_failed) { -- printf("%s failed to %s node %s on behalf of %s at %s\n", -- hp->delegate ? hp->delegate : "We", action_s, hp->target, hp->origin, -- ctime(&complete)); -+ printf("%s failed to %s node %s on behalf of %s from %s at %s\n", -+ hp->delegate ? hp->delegate : "We", action_s, hp->target, -+ hp->client, hp->origin, ctime(&complete)); - - } else if (hp->state == st_done && hp->delegate) { -- printf("%s was able to %s node %s on behalf of %s at %s\n", -- hp->delegate, action_s, hp->target, hp->origin, ctime(&complete)); -+ printf("%s was able to %s node %s on behalf of %s from %s at %s\n", -+ hp->delegate, action_s, hp->target, -+ hp->client, hp->origin, ctime(&complete)); - - } else if (hp->state == st_done) { -- printf("We were able to %s node %s on behalf of %s at %s\n", -- action_s, hp->target, hp->origin, ctime(&complete)); -+ printf("We were able to %s node %s on behalf of %s from %s at %s\n", -+ action_s, hp->target, hp->client, hp->origin, ctime(&complete)); - } else { -- printf("%s wishes to %s node %s - %d %d\n", -- hp->origin, action_s, hp->target, hp->state, hp->completed); -+ printf("%s at %s wishes to %s node %s - %d %d\n", -+ hp->client, hp->origin, action_s, hp->target, hp->state, hp->completed); - } - - free(action_s); -@@ -498,9 +499,9 @@ main(int argc, char **argv) - action_s = strdup(latest->action); - } - -- printf("%s was able to %s node %s on behalf of %s at %s\n", -+ printf("%s was able to %s node %s on behalf of %s from %s at %s\n", - latest->delegate ? latest->delegate : "We", action_s, latest->target, -- latest->origin, ctime(&complete)); -+ latest->client, latest->origin, ctime(&complete)); - - free(action_s); - } -diff --git a/fencing/commands.c b/fencing/commands.c -index 3ebbba3..698dd94 100644 ---- a/fencing/commands.c -+++ b/fencing/commands.c -@@ -42,6 +42,10 @@ - #include - #include - -+#if SUPPORT_CIBSECRETS -+# include -+#endif -+ - #include - - GHashTable *device_list = NULL; -@@ -144,6 +148,7 @@ free_async_command(async_command_t * cmd) - free(cmd->client); - free(cmd->client_name); - free(cmd->origin); -+ free(cmd->mode); - free(cmd->op); - free(cmd); - } -@@ -182,26 +187,6 @@ create_async_command(xmlNode * msg) - return cmd; - } - --static int --stonith_manual_ack(xmlNode * msg, remote_fencing_op_t * op) --{ -- async_command_t *cmd = create_async_command(msg); -- xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, msg, LOG_ERR); -- -- if (cmd == NULL) { -- return -EINVAL; -- } -- -- cmd->device = strdup("manual_ack"); -- cmd->remote_op_id = strdup(op->id); -- -- crm_notice("Injecting manual confirmation that %s is safely off/down", -- crm_element_value(dev, F_STONITH_TARGET)); -- -- cmd->done_cb(0, 0, NULL, cmd); -- return pcmk_ok; --} -- - static gboolean - stonith_device_execute(stonith_device_t * device) - { -@@ -229,6 +214,23 @@ stonith_device_execute(stonith_device_t * device) - return TRUE; - } - -+#if SUPPORT_CIBSECRETS -+ if (replace_secret_params(device->id, device->params) < 0) { -+ /* replacing secrets failed! */ -+ if (safe_str_eq(cmd->action,"stop")) { -+ /* don't fail on stop! */ -+ crm_info("proceeding with the stop operation for %s", device->id); -+ -+ } else { -+ crm_err("failed to get secrets for %s, " -+ "considering resource not configured", device->id); -+ exec_rc = PCMK_EXECRA_NOT_CONFIGURED; -+ cmd->done_cb(0, exec_rc, NULL, cmd); -+ return TRUE; -+ } -+ } -+#endif -+ - action = stonith_action_create(device->agent, - cmd->action, - cmd->victim, -@@ -386,15 +388,12 @@ build_port_aliases(const char *hostmap, GListPtr * targets) - } - - static void --parse_host_line(const char *line, GListPtr * output) -+parse_host_line(const char *line, int max, GListPtr * output) - { - int lpc = 0; -- int max = 0; - int last = 0; - -- if (line) { -- max = strlen(line); -- } else { -+ if (max <= 0) { - return; - } - -@@ -404,7 +403,7 @@ parse_host_line(const char *line, GListPtr * output) - return; - } - -- crm_trace("Processing: %s", line); -+ crm_trace("Processing %d bytes: [%s]", max, line); - /* Skip initial whitespace */ - for (lpc = 0; lpc <= max && isspace(line[lpc]); lpc++) { - last = lpc + 1; -@@ -458,16 +457,24 @@ parse_host_list(const char *hosts) - for (lpc = 0; lpc <= max; lpc++) { - if (hosts[lpc] == '\n' || hosts[lpc] == 0) { - char *line = NULL; -+ int len = lpc - last; -+ -+ if(len > 1) { -+ line = malloc(1 + len); -+ } - -- line = calloc(1, 2 + lpc - last); -- snprintf(line, 1 + lpc - last, "%s", hosts + last); -- parse_host_line(line, &output); -- free(line); -+ if(line) { -+ snprintf(line, 1 + len, "%s", hosts + last); -+ line[len] = 0; /* Because it might be '\n' */ -+ parse_host_line(line, len, &output); -+ free(line); -+ } - - last = lpc + 1; - } - } - -+ crm_trace("Parsed %d entries from '%s'", g_list_length(output), hosts); - return output; - } - -@@ -503,11 +510,14 @@ is_nodeid_required(xmlNode * xml) - if (!xml) { - return FALSE; - } -+ - xpath = xpath_search(xml, "//parameter[@name='nodeid']"); -- if (!xpath || xpath->nodesetval->nodeNr <= 0) { -+ if (numXpathResults(xpath) <= 0) { -+ freeXpathObject(xpath); - return FALSE; - } - -+ freeXpathObject(xpath); - return TRUE; - } - -@@ -524,13 +534,13 @@ get_on_target_actions(xmlNode * xml) - } - - xpath = xpath_search(xml, "//action"); -+ max = numXpathResults(xpath); - -- if (!xpath || !xpath->nodesetval) { -+ if (max <= 0) { -+ freeXpathObject(xpath); - return NULL; - } - -- max = xpath->nodesetval->nodeNr; -- - actions = calloc(1, 512); - - for (lpc = 0; lpc < max; lpc++) { -@@ -551,6 +561,8 @@ get_on_target_actions(xmlNode * xml) - } - } - -+ freeXpathObject(xpath); -+ - if (!strlen(actions)) { - free(actions); - actions = NULL; -@@ -660,6 +672,8 @@ string_in_list(GListPtr list, const char *item) - - if (safe_str_eq(item, value)) { - return TRUE; -+ } else { -+ crm_trace("%d: '%s' != '%s'", lpc, item, value); - } - } - return FALSE; -@@ -673,6 +687,8 @@ status_search_cb(GPid pid, int rc, const char *output, gpointer user_data) - stonith_device_t *dev = cmd->device ? g_hash_table_lookup(device_list, cmd->device) : NULL; - gboolean can = FALSE; - -+ free_async_command(cmd); -+ - if (!dev) { - search_devices_record_result(search, NULL, FALSE); - return; -@@ -702,6 +718,8 @@ dynamic_list_search_cb(GPid pid, int rc, const char *output, gpointer user_data) - stonith_device_t *dev = cmd->device ? g_hash_table_lookup(device_list, cmd->device) : NULL; - gboolean can_fence = FALSE; - -+ free_async_command(cmd); -+ - /* Host/alias must be in the list output to be eligable to be fenced - * - * Will cause problems if down'd nodes aren't listed or (for virtual nodes) -@@ -1488,7 +1506,7 @@ static gint - sort_device_priority(gconstpointer a, gconstpointer b) - { - const stonith_device_t *dev_a = a; -- const stonith_device_t *dev_b = a; -+ const stonith_device_t *dev_b = b; - - if (dev_a->priority > dev_b->priority) { - return -1; -@@ -1526,7 +1544,7 @@ stonith_fence_get_devices_cb(GList * devices, void *user_data) - } - - /* no device found! */ -- stonith_send_async_reply(cmd, NULL, -EHOSTUNREACH, 0); -+ stonith_send_async_reply(cmd, NULL, -ENODEV, 0); - - free_async_command(cmd); - g_list_free_full(devices, free); -@@ -1536,7 +1554,6 @@ static int - stonith_fence(xmlNode * msg) - { - const char *device_id = NULL; -- int rc = -EHOSTUNREACH; - stonith_device_t *device = NULL; - async_command_t *cmd = create_async_command(msg); - xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, msg, LOG_ERR); -@@ -1550,10 +1567,10 @@ stonith_fence(xmlNode * msg) - device = g_hash_table_lookup(device_list, device_id); - if (device == NULL) { - crm_err("Requested device '%s' is not available", device_id); -- } else { -- schedule_stonith_command(cmd, device); -- rc = -EINPROGRESS; -+ return -ENODEV; - } -+ schedule_stonith_command(cmd, device); -+ - } else { - const char *host = crm_element_value(dev, F_STONITH_TARGET); - -@@ -1567,10 +1584,9 @@ stonith_fence(xmlNode * msg) - } - get_capable_devices(host, cmd->action, cmd->default_timeout, cmd, - stonith_fence_get_devices_cb); -- rc = -EINPROGRESS; - } - -- return rc; -+ return -EINPROGRESS; - } - - xmlNode * -@@ -1703,7 +1719,7 @@ stonith_send_reply(xmlNode * reply, int call_options, const char *remote_peer, - if (remote_peer) { - send_cluster_message(crm_get_peer(0, remote_peer), crm_msg_stonith_ng, reply, FALSE); - } else { -- do_local_reply(reply, client_id, call_options & st_opt_sync_call, remote_peer != NULL); -+ do_local_reply(reply, client_id, is_set(call_options, st_opt_sync_call), remote_peer != NULL); - } - } - -@@ -1798,8 +1814,12 @@ handle_request(crm_client_t * client, uint32_t id, uint32_t flags, xmlNode * req - rc = stonith_fence(request); - - } else if (call_options & st_opt_manual_ack) { -- remote_fencing_op_t *rop = initiate_remote_stonith_op(client, request, TRUE); -+ remote_fencing_op_t *rop = NULL; -+ xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, request, LOG_TRACE); -+ const char *target = crm_element_value(dev, F_STONITH_TARGET); - -+ crm_notice("Recieved manual confirmation that %s is fenced", target); -+ rop = initiate_remote_stonith_op(client, request, TRUE); - rc = stonith_manual_ack(request, rop); - - } else { -@@ -1927,6 +1947,13 @@ handle_request(crm_client_t * client, uint32_t id, uint32_t flags, xmlNode * req - * If in progress, a reply will happen async after the request - * processing is finished */ - if (rc != -EINPROGRESS) { -+ crm_trace("Reply handling: %p %u %u %d %d %s", client, client?client->request_id:0, -+ id, is_set(call_options, st_opt_sync_call), call_options, -+ crm_element_value(request, F_STONITH_CALLOPTS)); -+ -+ if (is_set(call_options, st_opt_sync_call)) { -+ CRM_ASSERT(client == NULL || client->request_id == id); -+ } - reply = stonith_construct_reply(request, output, data, rc); - stonith_send_reply(reply, call_options, remote_peer, client_id); - } -@@ -1979,8 +2006,8 @@ stonith_command(crm_client_t * client, uint32_t id, uint32_t flags, xmlNode * re - } - - crm_element_value_int(request, F_STONITH_CALLOPTS, &call_options); -- crm_debug("Processing %s%s from %s (%16x)", op, is_reply ? " reply" : "", -- client ? client->name : remote_peer, call_options); -+ crm_debug("Processing %s%s %u from %s (%16x)", op, is_reply ? " reply" : "", -+ id, client ? client->name : remote_peer, call_options); - - if (is_set(call_options, st_opt_sync_call)) { - CRM_ASSERT(client == NULL || client->request_id == id); -diff --git a/fencing/fence_dummy b/fencing/fence_dummy -new file mode 100644 -index 0000000..b202977 ---- /dev/null -+++ b/fencing/fence_dummy -@@ -0,0 +1,345 @@ -+#!/usr/bin/python -+ -+# The Following Agent Has Been Tested On: -+# -+# Virsh 0.3.3 on RHEL 5.2 with xen-3.0.3-51 -+# -+ -+import sys, time, random -+ -+#BEGIN_VERSION_GENERATION -+RELEASE_VERSION="3.1.6" -+BUILD_DATE="(built Mon Oct 24 12:14:08 UTC 2011)" -+REDHAT_COPYRIGHT="Copyright (C) Red Hat, Inc. 2004-2010 All rights reserved." -+#END_VERSION_GENERATION -+ -+all_opt = { -+ "help" : { -+ "getopt" : "h", -+ "longopt" : "help", -+ "help" : "-h, --help Display this help and exit", -+ "required" : "0", -+ "shortdesc" : "Display help and exit", -+ "order" : 54 }, -+ "version" : { -+ "getopt" : "V", -+ "longopt" : "version", -+ "help" : "-V, --version Output version information and exit", -+ "required" : "0", -+ "shortdesc" : "Display version information and exit", -+ "order" : 53 }, -+ "quiet" : { -+ "getopt" : "q", -+ "help" : "", -+ "order" : 50 }, -+ "verbose" : { -+ "getopt" : "v", -+ "longopt" : "verbose", -+ "help" : "-v, --verbose Verbose mode", -+ "required" : "0", -+ "shortdesc" : "Verbose mode", -+ "order" : 51 }, -+ "debug" : { -+ "getopt" : "D:", -+ "longopt" : "debug-file", -+ "help" : "-D, --debug-file= Debugging to output file", -+ "required" : "0", -+ "shortdesc" : "Write debug information to given file", -+ "order" : 52 }, -+ "delay" : { -+ "getopt" : "f:", -+ "longopt" : "delay", -+ "help" : "--delay Wait X seconds before fencing is started", -+ "required" : "0", -+ "shortdesc" : "Wait X seconds before fencing is started", -+ "default" : "0", -+ "order" : 200 }, -+ "action" : { -+ "getopt" : "o:", -+ "longopt" : "action", -+ "help" : "-o, --action= Action: status, reboot (default), off or on", -+ "required" : "1", -+ "shortdesc" : "Fencing Action", -+ "default" : "reboot", -+ "order" : 1 }, -+ "port" : { -+ "getopt" : "n:", -+ "longopt" : "plug", -+ "help" : "-n, --plug= Physical plug number on device or\n" + -+ " name of virtual machine", -+ "required" : "1", -+ "shortdesc" : "Physical plug number or name of virtual machine", -+ "order" : 1 }, -+ "switch" : { -+ "getopt" : "s:", -+ "longopt" : "switch", -+ "help" : "-s, --switch= Physical switch number on device", -+ "required" : "0", -+ "shortdesc" : "Physical switch number on device", -+ "order" : 1 }, -+ "uuid" : { -+ "getopt" : "U:", -+ "longopt" : "uuid", -+ "help" : "-U, --uuid UUID of the VM to fence.", -+ "required" : "0", -+ "shortdesc" : "The UUID of the virtual machine to fence.", -+ "order" : 1} -+} -+ -+common_opt = [ "retry_on", "delay" ] -+ -+def show_docs(options, docs = None): -+ device_opt = options["device_opt"] -+ -+ if docs == None: -+ docs = { } -+ docs["shortdesc"] = "Fence agent" -+ docs["longdesc"] = "" -+ -+ ## Process special options (and exit) -+ ##### -+ if options.has_key("-h"): -+ usage(device_opt) -+ sys.exit(0) -+ -+ if options.has_key("-o") and options["-o"].lower() == "metadata": -+ metadata(device_opt, options, docs) -+ sys.exit(0) -+ -+ if options.has_key("-V"): -+ print __main__.RELEASE_VERSION, __main__.BUILD_DATE -+ print __main__.REDHAT_COPYRIGHT -+ sys.exit(0) -+ -+def usage(avail_opt): -+ global all_opt -+ -+ print "Usage:" -+ print "\t" + os.path.basename(sys.argv[0]) + " [options]" -+ print "Options:" -+ -+ sorted_list = [ (key, all_opt[key]) for key in avail_opt ] -+ sorted_list.sort(lambda x, y: cmp(x[1]["order"], y[1]["order"])) -+ -+ for key, value in sorted_list: -+ if len(value["help"]) != 0: -+ print " " + value["help"] -+ -+def metadata(avail_opt, options, docs): -+ global all_opt -+ -+ sorted_list = [ (key, all_opt[key]) for key in avail_opt ] -+ sorted_list.sort(lambda x, y: cmp(x[1]["order"], y[1]["order"])) -+ -+ print "" -+ print "" -+ print "" + docs["longdesc"] + "" -+ if docs.has_key("vendorurl"): -+ print "" + docs["vendorurl"] + "" -+ print "" -+ for option, value in sorted_list: -+ if all_opt[option].has_key("shortdesc"): -+ print "\t" -+ -+ default = "" -+ if all_opt[option].has_key("default"): -+ default = "default=\""+str(all_opt[option]["default"])+"\"" -+ elif options.has_key("-" + all_opt[option]["getopt"][:-1]): -+ if options["-" + all_opt[option]["getopt"][:-1]]: -+ try: -+ default = "default=\"" + options["-" + all_opt[option]["getopt"][:-1]] + "\"" -+ except TypeError: -+ ## @todo/@note: Currently there is no clean way how to handle lists -+ ## we can create a string from it but we can't set it on command line -+ default = "default=\"" + str(options["-" + all_opt[option]["getopt"][:-1]]) +"\"" -+ elif options.has_key("-" + all_opt[option]["getopt"]): -+ default = "default=\"true\" " -+ -+ mixed = all_opt[option]["help"] -+ ## split it between option and help text -+ res = re.compile("^(.*--\S+)\s+", re.IGNORECASE | re.S).search(mixed) -+ if (None != res): -+ mixed = res.group(1) -+ mixed = mixed.replace("<", "<").replace(">", ">") -+ print "\t\t" -+ -+ if all_opt[option]["getopt"].count(":") > 0: -+ print "\t\t" -+ else: -+ print "\t\t" -+ -+ print "\t\t" + all_opt[option]["shortdesc"] + "" -+ print "\t" -+ print "" -+ print "" -+ if avail_opt.count("io_fencing") == 0: -+ print "\t" -+ print "\t" -+ print "\t" -+ else: -+ print "\t" -+ print "\t" -+ -+ print "\t" -+ print "\t" -+ print "\t" -+ print "\t" -+ print "" -+ print "" -+ -+def process_input(avail_opt): -+ global all_opt -+ global common_opt -+ -+ ## -+ ## Add options which are available for every fence agent -+ ##### -+ avail_opt.extend(common_opt) -+ -+ ## -+ ## Set standard environment -+ ##### -+ os.putenv("LANG", "C") -+ os.putenv("LC_ALL", "C") -+ -+ ## -+ ## Prepare list of options for getopt -+ ##### -+ getopt_string = "" -+ longopt_list = [ ] -+ for k in avail_opt: -+ if all_opt.has_key(k): -+ getopt_string += all_opt[k]["getopt"] -+ else: -+ fail_usage("Parse error: unknown option '"+k+"'") -+ -+ if all_opt.has_key(k) and all_opt[k].has_key("longopt"): -+ if all_opt[k]["getopt"].endswith(":"): -+ longopt_list.append(all_opt[k]["longopt"] + "=") -+ else: -+ longopt_list.append(all_opt[k]["longopt"]) -+ -+ ## Compatibility layer -+ if avail_opt.count("module_name") == 1: -+ getopt_string += "n:" -+ longopt_list.append("plug=") -+ -+ ## -+ ## Read options from command line or standard input -+ ##### -+ if len(sys.argv) > 1: -+ try: -+ opt, args = getopt.gnu_getopt(sys.argv[1:], getopt_string, longopt_list) -+ except getopt.GetoptError, error: -+ fail_usage("Parse error: " + error.msg) -+ -+ ## Transform longopt to short one which are used in fencing agents -+ ##### -+ old_opt = opt -+ opt = { } -+ for o in dict(old_opt).keys(): -+ if o.startswith("--"): -+ for x in all_opt.keys(): -+ if all_opt[x].has_key("longopt") and "--" + all_opt[x]["longopt"] == o: -+ opt["-" + all_opt[x]["getopt"].rstrip(":")] = dict(old_opt)[o] -+ else: -+ opt[o] = dict(old_opt)[o] -+ -+ ## Compatibility Layer -+ ##### -+ z = dict(opt) -+ if z.has_key("-T") == 1: -+ z["-o"] = "status" -+ if z.has_key("-n") == 1: -+ z["-m"] = z["-n"] -+ -+ opt = z -+ ## -+ ##### -+ else: -+ opt = { } -+ name = "" -+ for line in sys.stdin.readlines(): -+ line = line.strip() -+ if ((line.startswith("#")) or (len(line) == 0)): -+ continue -+ -+ (name, value) = (line + "=").split("=", 1) -+ value = value[:-1] -+ -+ ## Compatibility Layer -+ ###### -+ if name == "option": -+ name = "action" -+ -+ ## -+ ###### -+ if avail_opt.count(name) == 0: -+ sys.stderr.write("Parse error: Ignoring unknown option '"+line+"'\n") -+ continue -+ -+ if all_opt[name]["getopt"].endswith(":"): -+ opt["-"+all_opt[name]["getopt"].rstrip(":")] = value -+ elif ((value == "1") or (value.lower() == "yes") or (value.lower() == "on") or (value.lower() == "true")): -+ opt["-"+all_opt[name]["getopt"]] = "1" -+ return opt -+ -+def atexit_handler(): -+ try: -+ sys.stdout.close() -+ os.close(1) -+ except IOError: -+ sys.stderr.write("%s failed to close standard output\n"%(sys.argv[0])) -+ sys.exit(EC_GENERIC_ERROR) -+ -+def main(): -+ global all_opt -+ device_opt = [ "help", "version", "verbose", "debug", "action", "port", -+ "power_timeout", "random_sleep_range"] -+ -+ all_opt["random_sleep_range"] = { -+ "getopt" : "R:", -+ "longopt" : "random_sleep_range", -+ "help" : "--random_sleep-range=Issue a sleep between 1 and . Used for testing.", -+ "order" : 1 } -+ -+ all_opt["mode"] = { -+ "getopt" : "M:", -+ "longopt" : "mode", -+ "help" : "--mode=(pass|fail|random). Used for testing.", -+ "order" : 1 } -+ -+ ## Defaults for fence agent -+ docs = { } -+ docs["shortdesc"] = "Dummy fence agent" -+ docs["longdesc"] = "fence_dummy is a fake Fencing agent which reports success based on it's mode (pass|fail|random) without doing anything." -+ -+ atexit.register(atexit_handler) -+ options = process_input(device_opt) -+ show_docs(options, docs) -+ -+ # random sleep for testing -+ if options.has_key("-f"): -+ val = int(options["-f"]) -+ sys.stderr.write("delay sleep for %d seconds" % val) -+ time.sleep(val) -+ -+ if options.has_key("-R"): -+ val = int(options["-R"]) -+ ran = random.randint(1, val) -+ sys.stderr.write("random sleep for %d seconds" % ran) -+ time.sleep(ran) -+ -+ if options.has_key("-o") and (options["-o"] == "monitor"): -+ sys.exit(0) -+ -+ if options.has_key("-M"): -+ if options["-M"] == "pass": -+ sys.exit(0) -+ elif options["-M"] == "fail": -+ sys.exit(1) -+ -+ sys.exit(random.randint(0, 1)) -+ -+if __name__ == "__main__": -+ main() -diff --git a/fencing/fence_false b/fencing/fence_false -deleted file mode 100755 -index 3e41751..0000000 ---- a/fencing/fence_false -+++ /dev/null -@@ -1,97 +0,0 @@ --#!/usr/bin/python -- --# The Following Agent Has Been Tested On: --# --# Virsh 0.3.3 on RHEL 5.2 with xen-3.0.3-51 --# -- --import sys, time, random --sys.path.append("/usr/share/fence") --from fencing import * -- -- --#BEGIN_VERSION_GENERATION --RELEASE_VERSION="3.1.6" --BUILD_DATE="(built Mon Oct 24 12:14:08 UTC 2011)" --REDHAT_COPYRIGHT="Copyright (C) Red Hat, Inc. 2004-2010 All rights reserved." --#END_VERSION_GENERATION --plug_status="on" -- --def get_outlets_status(conn, options): -- result={} -- global plug_status -- -- if options.has_key("-o") and options["-o"] == "on": -- plug_status = "off" -- -- # This fake agent has no port data to list, so we have to make -- # something up for the list action. -- if options.has_key("-o") and options["-o"] == "list": -- result["fake_port_1"]=[plug_status, "fake"] -- result["fake_port_2"]=[plug_status, "fake"] -- elif (options.has_key("-n") == 0): -- fail_usage("Failed: You have to enter existing machine!") -- else: -- port=options["-n"] -- result[port]=[plug_status, "fake"] -- -- return result -- --def get_power_status(conn, options): -- outlets=get_outlets_status(conn,options) -- -- if len(outlets) == 0 or options.has_key("-n") == 0: -- fail_usage("Failed: You have to enter existing machine!") -- else: -- return outlets[options["-n"]][0] -- --def set_power_status(conn, options): -- global plug_status -- plug_status = "unknown" -- if options.has_key("-o") and options["-o"] == "on": -- plug_status = "off" -- --def main(): -- global all_opt -- device_opt = [ "help", "version", "agent", "quiet", "verbose", "debug", "action", "port", -- "no_password", "power_wait", "power_timeout", "random_sleep_range"] -- -- all_opt["random_sleep_range"] = { -- "getopt" : "R:", -- "longopt" : "random_sleep_range", -- "help" : "--random_sleep_range=Issue a sleep between 1 and . Used for testing.", -- "order" : 1 } -- -- atexit.register(atexit_handler) -- -- pinput = process_input(device_opt) -- -- # Fake options to keep the library happy -- #pinput["-p"] = "none" -- pinput["-a"] = "localhost" -- pinput["-C"] = "," -- -- options = check_input(device_opt, pinput) -- -- # random sleep for testing -- if options.has_key("-R"): -- val = int(options["-R"]) -- ran = random.randint(1, val) -- print "random sleep for %d seconds" % ran -- time.sleep(ran) -- -- if options.has_key("-o") and (options["-o"] == "monitor"): -- sys.exit(0) -- -- ## Defaults for fence agent -- docs = { } -- docs["shortdesc"] = "Fake fence agent" -- docs["longdesc"] = "fence_true is a fake Fencing agent which always reports failure without doing anything." -- show_docs(options, docs) -- -- ## Operate the fencing device -- result = fence_action(None, options, set_power_status, get_power_status, get_outlets_status) -- sys.exit(result) -- --if __name__ == "__main__": -- main() -diff --git a/fencing/fence_true b/fencing/fence_true -deleted file mode 100755 -index d94e335..0000000 ---- a/fencing/fence_true -+++ /dev/null -@@ -1,92 +0,0 @@ --#!/usr/bin/python -- --# The Following Agent Has Been Tested On: --# --# Virsh 0.3.3 on RHEL 5.2 with xen-3.0.3-51 --# -- --import sys, time, random --sys.path.append("/usr/share/fence") --from fencing import * -- --#BEGIN_VERSION_GENERATION --RELEASE_VERSION="3.1.6" --BUILD_DATE="(built Mon Oct 24 12:14:08 UTC 2011)" --REDHAT_COPYRIGHT="Copyright (C) Red Hat, Inc. 2004-2010 All rights reserved." --#END_VERSION_GENERATION --plug_status="on" -- --def get_outlets_status(conn, options): -- result={} -- -- # This fake agent has no port data to list, so we have to make -- # something up for the list action. -- if options.has_key("-o") and options["-o"] == "list": -- result["fake_port_1"]=[plug_status, "fake"] -- result["fake_port_2"]=[plug_status, "fake"] -- elif (options.has_key("-n") == 0): -- fail_usage("Failed: You have to enter existing machine!") -- else: -- port=options["-n"] -- result[port]=[plug_status, "fake"] -- -- return result -- --def get_power_status(conn, options): -- outlets=get_outlets_status(conn,options) -- -- if len(outlets) == 0 or options.has_key("-n") == 0: -- fail_usage("Failed: You have to enter existing machine!") -- else: -- return outlets[options["-n"]][0] -- --def set_power_status(conn, options): -- global plug_status -- plug_status = "off" -- if options.has_key("-o") and options["-o"] == "on": -- plug_status = "on" -- --def main(): -- global all_opt -- device_opt = [ "help", "version", "agent", "quiet", "verbose", "debug", "action", "port", -- "no_password", "power_wait", "power_timeout", "random_sleep_range"] -- -- all_opt["random_sleep_range"] = { -- "getopt" : "R:", -- "longopt" : "random_sleep_range", -- "help" : "--random_sleep-range=Issue a sleep between 1 and . Used for testing.", -- "order" : 1 } -- -- atexit.register(atexit_handler) -- -- pinput = process_input(device_opt) -- -- # Fake options to keep the library happy -- #pinput["-p"] = "none" -- pinput["-a"] = "localhost" -- pinput["-C"] = "," -- -- options = check_input(device_opt, pinput) -- -- # random sleep for testing -- if options.has_key("-R"): -- val = int(options["-R"]) -- ran = random.randint(1, val) -- print "random sleep for %d seconds" % ran -- time.sleep(ran) -- -- if options.has_key("-o") and (options["-o"] == "monitor"): -- sys.exit(0) -- -- ## Defaults for fence agent -- docs = { } -- docs["shortdesc"] = "Fake fence agent" -- docs["longdesc"] = "fence_true is a fake Fencing agent which always reports success without doing anything." -- show_docs(options, docs) -- -- ## Operate the fencing device -- result = fence_action(None, options, set_power_status, get_power_status, get_outlets_status) -- sys.exit(result) -- --if __name__ == "__main__": -- main() -diff --git a/fencing/internal.h b/fencing/internal.h -index de23e44..737fc18 100644 ---- a/fencing/internal.h -+++ b/fencing/internal.h -@@ -93,6 +93,8 @@ typedef struct remote_fencing_op_s { - char *originator; - /*! The local client id that initiated the fencing request */ - char *client_id; -+ /*! The client's call_id that initiated the fencing request */ -+ int client_callid; - /*! The name of client that initiated the fencing request */ - char *client_name; - /*! List of the received query results for all the nodes in the cpg group */ -@@ -158,6 +160,8 @@ extern void free_topology_entry(gpointer data); - - bool fencing_peer_active(crm_node_t *peer); - -+int stonith_manual_ack(xmlNode * msg, remote_fencing_op_t * op); -+ - extern char *stonith_our_uname; - extern gboolean stand_alone; - extern GHashTable *device_list; -diff --git a/fencing/main.c b/fencing/main.c -index 728f3dd..c7b67a1 100644 ---- a/fencing/main.c -+++ b/fencing/main.c -@@ -42,12 +42,15 @@ - #include - - #include -+#include -+#include - - #include - - #include - - char *stonith_our_uname = NULL; -+char *stonith_our_uuid = NULL; - - GMainLoop *mainloop = NULL; - -@@ -56,6 +59,10 @@ gboolean no_cib_connect = FALSE; - gboolean stonith_shutdown_flag = FALSE; - - qb_ipcs_service_t *ipcs = NULL; -+xmlNode *local_cib = NULL; -+ -+static cib_t *cib_api = NULL; -+static void *cib_library = NULL; - - static void stonith_shutdown(int nsig); - static void stonith_cleanup(void); -@@ -89,7 +96,10 @@ st_ipc_dispatch(qb_ipcs_connection_t * qbc, void *data, size_t size) - xmlNode *request = NULL; - crm_client_t *c = crm_client_get(qbc); - -- CRM_CHECK(c != NULL, goto cleanup); -+ if (c == NULL) { -+ crm_info("Invalid client: %p", qbc); -+ return 0; -+ } - - request = crm_ipcs_recv(c, data, size, &id, &flags); - if (request == NULL) { -@@ -106,7 +116,9 @@ st_ipc_dispatch(qb_ipcs_connection_t * qbc, void *data, size_t size) - c->name = g_strdup_printf("%s.%u", value, c->pid); - } - -+ crm_trace("Flags %u for command %u from %s", flags, id, crm_client_name(c)); - if (flags & crm_ipc_client_response) { -+ crm_trace("Need response"); - CRM_LOG_ASSERT(c->request_id == 0); /* This means the client has two synchronous events in-flight */ - c->request_id = id; /* Reply only to the last one */ - } -@@ -118,11 +130,6 @@ st_ipc_dispatch(qb_ipcs_connection_t * qbc, void *data, size_t size) - crm_log_xml_trace(request, "Client[inbound]"); - stonith_command(c, id, flags, request, NULL); - -- cleanup: -- if (c == NULL) { -- crm_log_xml_notice(request, "Invalid client"); -- } -- - free_xml(request); - return 0; - } -@@ -432,11 +439,7 @@ topology_register_helper(const char *node, int level, stonith_key_value_t * devi - static void - remove_cib_device(xmlXPathObjectPtr xpathObj) - { -- int max = 0, lpc = 0; -- -- if (xpathObj && xpathObj->nodesetval) { -- max = xpathObj->nodesetval->nodeNr; -- } -+ int max = numXpathResults(xpathObj), lpc = 0; - - for (lpc = 0; lpc < max; lpc++) { - const char *rsc_id = NULL; -@@ -460,11 +463,7 @@ remove_cib_device(xmlXPathObjectPtr xpathObj) - static void - remove_fencing_topology(xmlXPathObjectPtr xpathObj) - { -- int max = 0, lpc = 0; -- -- if (xpathObj && xpathObj->nodesetval) { -- max = xpathObj->nodesetval->nodeNr; -- } -+ int max = numXpathResults(xpathObj), lpc = 0; - - for (lpc = 0; lpc < max; lpc++) { - xmlNode *match = getXpathResult(xpathObj, lpc); -@@ -492,67 +491,9 @@ remove_fencing_topology(xmlXPathObjectPtr xpathObj) - } - - static void --register_cib_device(xmlXPathObjectPtr xpathObj, gboolean force) --{ -- int max = 0, lpc = 0; -- -- if (xpathObj && xpathObj->nodesetval) { -- max = xpathObj->nodesetval->nodeNr; -- } -- -- for (lpc = 0; lpc < max; lpc++) { -- const char *rsc_id = NULL; -- const char *agent = NULL; -- const char *standard = NULL; -- const char *provider = NULL; -- stonith_key_value_t *params = NULL; -- xmlNode *match = getXpathResult(xpathObj, lpc); -- xmlNode *attributes; -- xmlNode *attr; -- xmlNode *data; -- -- CRM_CHECK(match != NULL, continue); -- -- standard = crm_element_value(match, XML_AGENT_ATTR_CLASS); -- agent = crm_element_value(match, XML_EXPR_ATTR_TYPE); -- provider = crm_element_value(match, XML_AGENT_ATTR_PROVIDER); -- -- if (safe_str_neq(standard, "stonith") || !agent) { -- continue; -- } -- -- rsc_id = crm_element_value(match, XML_ATTR_ID); -- attributes = find_xml_node(match, XML_TAG_ATTR_SETS, FALSE); -- -- for (attr = __xml_first_child(attributes); attr; attr = __xml_next(attr)) { -- const char *name = crm_element_value(attr, XML_NVPAIR_ATTR_NAME); -- const char *value = crm_element_value(attr, XML_NVPAIR_ATTR_VALUE); -- -- if (!name || !value) { -- continue; -- } -- params = stonith_key_value_add(params, name, value); -- } -- -- data = create_device_registration_xml(rsc_id, provider, agent, params); -- -- if (force == FALSE && crm_element_value(match, XML_DIFF_MARKER)) { -- stonith_device_register(data, NULL, TRUE); -- } else { -- stonith_device_remove(rsc_id, TRUE); -- stonith_device_register(data, NULL, TRUE); -- } -- } --} -- --static void - register_fencing_topology(xmlXPathObjectPtr xpathObj, gboolean force) - { -- int max = 0, lpc = 0; -- -- if (xpathObj && xpathObj->nodesetval) { -- max = xpathObj->nodesetval->nodeNr; -- } -+ int max = numXpathResults(xpathObj), lpc = 0; - - for (lpc = 0; lpc < max; lpc++) { - int index = 0; -@@ -622,50 +563,158 @@ fencing_topology_init(xmlNode * msg) - - register_fencing_topology(xpathObj, TRUE); - -- if (xpathObj) { -- xmlXPathFreeObject(xpathObj); -+ freeXpathObject(xpathObj); -+} -+ -+#define rsc_name(x) x->clone_name?x->clone_name:x->id -+ -+static void cib_device_update(resource_t *rsc, pe_working_set_t *data_set) -+{ -+ node_t *node = NULL; -+ const char *value = NULL; -+ const char *rclass = NULL; -+ -+ if(rsc->children) { -+ GListPtr gIter = NULL; -+ for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) { -+ cib_device_update(gIter->data, data_set); -+ if(rsc->variant == pe_clone || rsc->variant == pe_master) { -+ crm_trace("Only processing one copy of the clone %s", rsc->id); -+ break; -+ } -+ } -+ return; -+ } -+ -+ if(g_hash_table_lookup(device_list, rsc_name(rsc))) { -+ stonith_device_remove(rsc_name(rsc), TRUE); -+ } -+ -+ rclass = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS); -+ if(safe_str_neq(rclass, "stonith")) { -+ return; -+ } -+ -+ value = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_TARGET_ROLE); -+ if(value && strcmp(RSC_STOPPED, value) == 0) { -+ crm_info("Device %s has been disabled", rsc->id); -+ return; -+ -+ } else if(stonith_our_uname) { -+ GHashTableIter iter; -+ -+ g_hash_table_iter_init(&iter, rsc->allowed_nodes); -+ while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) { -+ if(node && strcmp(node->details->uname, stonith_our_uname) == 0) { -+ break; -+ } -+ node = NULL; -+ } -+ } -+ -+ if(node == NULL) { -+ GHashTableIter iter; -+ -+ crm_info("Device %s has been disabled on %s: unknown", rsc->id, stonith_our_uname); -+ g_hash_table_iter_init(&iter, rsc->allowed_nodes); -+ while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) { -+ crm_trace("Available: %s = %d", node->details->uname, node->weight); -+ } -+ -+ return; -+ -+ } else if(node->weight < 0) { -+ crm_info("Device %s has been disabled on %s: score=%s", rsc->id, stonith_our_uname, score2char(node->weight)); -+ return; -+ -+ } else { -+ xmlNode *data; -+ GHashTableIter gIter; -+ stonith_key_value_t *params = NULL; -+ -+ const char *name = NULL; -+ const char *agent = crm_element_value(rsc->xml, XML_EXPR_ATTR_TYPE); -+ const char *provider = crm_element_value(rsc->xml, XML_AGENT_ATTR_PROVIDER); -+ -+ crm_info("Device %s is allowed on %s: score=%d", rsc->id, stonith_our_uname, node->weight); -+ get_rsc_attributes(rsc->parameters, rsc, node, data_set); -+ -+ g_hash_table_iter_init(&gIter, rsc->parameters); -+ while (g_hash_table_iter_next(&gIter, (gpointer *) & name, (gpointer *) & value)) { -+ if (!name || !value) { -+ continue; -+ } -+ params = stonith_key_value_add(params, name, value); -+ crm_trace(" %s=%s", name, value); -+ } -+ -+ data = create_device_registration_xml(rsc_name(rsc), provider, agent, params); -+ stonith_device_register(data, NULL, TRUE); -+ -+ stonith_key_value_freeall(params, 1, 1); -+ free_xml(data); - } - } - -+extern xmlNode *do_calculations(pe_working_set_t * data_set, xmlNode * xml_input, crm_time_t * now); -+extern node_t *create_node(const char *id, const char *uname, const char *type, const char *score, pe_working_set_t * data_set); -+ - static void --cib_stonith_devices_init(xmlNode * msg) -+cib_devices_update(void) - { -- xmlXPathObjectPtr xpathObj = NULL; -- const char *xpath = "//" XML_CIB_TAG_RESOURCE; -+ GListPtr gIter = NULL; -+ pe_working_set_t data_set; - -- crm_trace("Pushing in stonith devices"); -+ set_working_set_defaults(&data_set); -+ data_set.input = local_cib; -+ data_set.now = crm_time_new(NULL); -+ data_set.flags |= pe_flag_quick_location; -+ data_set.localhost = stonith_our_uname; - -- /* Grab everything */ -- xpathObj = xpath_search(msg, xpath); -+ cluster_status(&data_set); -+ do_calculations(&data_set, NULL, NULL); - -- if (xpathObj) { -- register_cib_device(xpathObj, TRUE); -- xmlXPathFreeObject(xpathObj); -+ for (gIter = data_set.resources; gIter != NULL; gIter = gIter->next) { -+ cib_device_update(gIter->data, &data_set); - } -+ data_set.input = NULL; /* Wasn't a copy */ -+ cleanup_alloc_calculations(&data_set); - } - - static void - update_cib_stonith_devices(const char *event, xmlNode * msg) - { -- -- const char *xpath_add = -- "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//" XML_CIB_TAG_RESOURCE; -- const char *xpath_del = -- "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_REMOVED "//" XML_CIB_TAG_RESOURCE; -+ const char *reason = "none"; -+ gboolean needs_update = FALSE; - xmlXPathObjectPtr xpath_obj = NULL; - -+ /* process new constraints */ -+ xpath_obj = xpath_search(msg, "//" F_CIB_UPDATE_RESULT "//" XML_CONS_TAG_RSC_LOCATION); -+ if (numXpathResults(xpath_obj) > 0) { -+ /* Safest and simplest to always recompute */ -+ needs_update = TRUE; -+ reason = "new location constraint"; -+ } -+ freeXpathObject(xpath_obj); -+ - /* process deletions */ -- xpath_obj = xpath_search(msg, xpath_del); -- if (xpath_obj) { -+ xpath_obj = xpath_search(msg, "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_REMOVED "//" XML_CIB_TAG_RESOURCE); -+ if (numXpathResults(xpath_obj) > 0) { - remove_cib_device(xpath_obj); -- xmlXPathFreeObject(xpath_obj); - } -+ freeXpathObject(xpath_obj); - - /* process additions */ -- xpath_obj = xpath_search(msg, xpath_add); -- if (xpath_obj) { -- register_cib_device(xpath_obj, FALSE); -- xmlXPathFreeObject(xpath_obj); -+ xpath_obj = xpath_search(msg, "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//" XML_CIB_TAG_RESOURCE); -+ if (numXpathResults(xpath_obj) > 0) { -+ needs_update = TRUE; -+ reason = "new resource"; -+ } -+ freeXpathObject(xpath_obj); -+ -+ if(needs_update) { -+ crm_info("Updating device list from the cib: %s", reason); -+ cib_devices_update(); - } - } - -@@ -680,25 +729,61 @@ update_fencing_topology(const char *event, xmlNode * msg) - xpathObj = xpath_search(msg, xpath); - - remove_fencing_topology(xpathObj); -- -- if (xpathObj) { -- xmlXPathFreeObject(xpathObj); -- } -+ freeXpathObject(xpathObj); - - /* Process additions and changes */ - xpath = "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//" XML_TAG_FENCING_LEVEL; - xpathObj = xpath_search(msg, xpath); - - register_fencing_topology(xpathObj, FALSE); -- -- if (xpathObj) { -- xmlXPathFreeObject(xpathObj); -- } -+ freeXpathObject(xpathObj); - } -+static bool have_cib_devices = FALSE; - - static void - update_cib_cache_cb(const char *event, xmlNode * msg) - { -+ int rc = pcmk_ok; -+ static int (*cib_apply_patch_event)(xmlNode *, xmlNode *, xmlNode **, int) = NULL; -+ -+ if(!have_cib_devices) { -+ crm_trace("Skipping updates until we get a full dump"); -+ return; -+ } -+ if (cib_apply_patch_event == NULL) { -+ cib_apply_patch_event = find_library_function(&cib_library, CIB_LIBRARY, "cib_apply_patch_event", TRUE); -+ } -+ -+ CRM_ASSERT(cib_apply_patch_event); -+ -+ /* Maintain a local copy of the CIB so that we have full access to the device definitions and location constraints */ -+ if (local_cib != NULL) { -+ xmlNode *cib_last = local_cib; -+ -+ local_cib = NULL; -+ rc = (*cib_apply_patch_event)(msg, cib_last, &local_cib, LOG_DEBUG); -+ free_xml(cib_last); -+ -+ switch (rc) { -+ case -pcmk_err_diff_resync: -+ case -pcmk_err_diff_failed: -+ crm_notice("[%s] Patch aborted: %s (%d)", event, pcmk_strerror(rc), rc); -+ case pcmk_ok: -+ break; -+ default: -+ crm_warn("[%s] ABORTED: %s (%d)", event, pcmk_strerror(rc), rc); -+ } -+ } -+ -+ if (local_cib == NULL) { -+ crm_trace("Re-requesting the full cib after diff failure"); -+ rc = cib_api->cmds->query(cib_api, NULL, &local_cib, cib_scope_local | cib_sync_call); -+ if(rc != pcmk_ok) { -+ crm_err("Couldnt retrieve the CIB: %s (%d)", pcmk_strerror(rc), rc); -+ } -+ CRM_ASSERT(local_cib != NULL); -+ } -+ - update_fencing_topology(event, msg); - update_cib_stonith_devices(event, msg); - } -@@ -706,8 +791,12 @@ update_cib_cache_cb(const char *event, xmlNode * msg) - static void - init_cib_cache_cb(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) - { -+ crm_info("Updating device list from the cib: init"); -+ have_cib_devices = TRUE; -+ local_cib = copy_xml(output); -+ - fencing_topology_init(msg); -- cib_stonith_devices_init(msg); -+ cib_devices_update(); - } - - static void -@@ -719,12 +808,10 @@ stonith_shutdown(int nsig) - g_main_quit(mainloop); - } else { - stonith_cleanup(); -- crm_exit(EX_OK); -+ crm_exit(pcmk_ok); - } - } - --cib_t *cib = NULL; -- - static void - cib_connection_destroy(gpointer user_data) - { -@@ -734,8 +821,8 @@ cib_connection_destroy(gpointer user_data) - } else { - crm_notice("Connection to the CIB terminated. Shutting down."); - } -- if (cib) { -- cib->cmds->signoff(cib); -+ if (cib_api) { -+ cib_api->cmds->signoff(cib_api); - } - stonith_shutdown(0); - } -@@ -743,8 +830,8 @@ cib_connection_destroy(gpointer user_data) - static void - stonith_cleanup(void) - { -- if (cib) { -- cib->cmds->signoff(cib); -+ if (cib_api) { -+ cib_api->cmds->signoff(cib_api); - } - - if (ipcs) { -@@ -753,6 +840,7 @@ stonith_cleanup(void) - crm_peer_destroy(); - crm_client_cleanup(); - free(stonith_our_uname); -+ free_xml(local_cib); - } - - /* *INDENT-OFF* */ -@@ -770,47 +858,39 @@ static struct crm_option long_options[] = { - static void - setup_cib(void) - { -- static void *cib_library = NULL; -- static cib_t *(*cib_new_fn) (void) = NULL; -- static const char *(*cib_err_fn) (int) = NULL; -- - int rc, retries = 0; -+ static cib_t *(*cib_new_fn) (void) = NULL; - -- if (cib_library == NULL) { -- cib_library = dlopen(CIB_LIBRARY, RTLD_LAZY); -- } -- if (cib_library && cib_new_fn == NULL) { -- cib_new_fn = dlsym(cib_library, "cib_new"); -- } -- if (cib_library && cib_err_fn == NULL) { -- cib_err_fn = dlsym(cib_library, "pcmk_strerror"); -+ if (cib_new_fn == NULL) { -+ cib_new_fn = find_library_function(&cib_library, CIB_LIBRARY, "cib_new", TRUE); - } -+ - if (cib_new_fn != NULL) { -- cib = (*cib_new_fn) (); -+ cib_api = (*cib_new_fn) (); - } - -- if (cib == NULL) { -+ if (cib_api == NULL) { - crm_err("No connection to the CIB"); - return; - } - - do { - sleep(retries); -- rc = cib->cmds->signon(cib, CRM_SYSTEM_CRMD, cib_command); -+ rc = cib_api->cmds->signon(cib_api, CRM_SYSTEM_CRMD, cib_command); - } while (rc == -ENOTCONN && ++retries < 5); - - if (rc != pcmk_ok) { -- crm_err("Could not connect to the CIB service: %d %p", rc, cib_err_fn); -+ crm_err("Could not connect to the CIB service: %s (%d)", pcmk_strerror(rc), rc); - - } else if (pcmk_ok != -- cib->cmds->add_notify_callback(cib, T_CIB_DIFF_NOTIFY, update_cib_cache_cb)) { -+ cib_api->cmds->add_notify_callback(cib_api, T_CIB_DIFF_NOTIFY, update_cib_cache_cb)) { - crm_err("Could not set CIB notification callback"); - - } else { -- rc = cib->cmds->query(cib, NULL, NULL, cib_scope_local); -- cib->cmds->register_callback(cib, rc, 120, FALSE, NULL, "init_cib_cache_cb", -- init_cib_cache_cb); -- cib->cmds->set_connection_dnotify(cib, cib_connection_destroy); -+ rc = cib_api->cmds->query(cib_api, NULL, NULL, cib_scope_local); -+ cib_api->cmds->register_callback(cib_api, rc, 120, FALSE, NULL, "init_cib_cache_cb", -+ init_cib_cache_cb); -+ cib_api->cmds->set_connection_dnotify(cib_api, cib_connection_destroy); - crm_notice("Watching for stonith topology changes"); - } - } -@@ -904,7 +984,7 @@ main(int argc, char **argv) - - printf(" \n"); - printf -- (" The priority of the stonith resource. The lower the number, the higher the priority.\n"); -+ (" The priority of the stonith resource. Devices are tried in order of highest priority to lowest.\n"); - printf(" \n"); - printf(" \n"); - -@@ -1011,11 +1091,10 @@ main(int argc, char **argv) - - if (crm_cluster_connect(&cluster) == FALSE) { - crm_crit("Cannot sign in to the cluster... terminating"); -- crm_exit(100); -- } else { -- stonith_our_uname = cluster.uname; -+ crm_exit(DAEMON_RESPAWN_STOP); - } - stonith_our_uname = cluster.uname; -+ stonith_our_uuid = cluster.uuid; - - if (no_cib_connect == FALSE) { - setup_cib(); -@@ -1031,11 +1110,8 @@ main(int argc, char **argv) - - topology = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, free_topology_entry); - -- ipcs = mainloop_add_ipc_server("stonith-ng", QB_IPC_NATIVE, &ipc_callbacks); -- if (ipcs == NULL) { -- crm_err("Failed to create IPC server: shutting down and inhibiting respawn"); -- crm_exit(100); -- } -+ stonith_ipc_server_init(&ipcs, &ipc_callbacks); -+ - #if SUPPORT_STONITH_CONFIG - if (((stand_alone == TRUE)) && !(standalone_cfg_read_file(STONITH_NG_CONF_FILE))) { - standalone_cfg_commit(); -diff --git a/fencing/regression.py.in b/fencing/regression.py.in -index 851ae17..6b203a2 100644 ---- a/fencing/regression.py.in -+++ b/fencing/regression.py.in -@@ -618,7 +618,7 @@ class Tests: - test = self.new_test("%s_monitor_timeout" % test_type["prefix"], - "Verify monitor uses duration of timeout period given.", test_type["use_cpg"]) - test.add_cmd("stonith_admin", "-R true1 -a fence_dummy_monitor_fail -o \"pcmk_host_list=node3\"") -- test.add_expected_fail_cmd("stonith_admin", "-Q true1 -t 5", 23) -+ test.add_expected_fail_cmd("stonith_admin", "-Q true1 -t 5", 195) - test.add_stonith_log_pattern("Attempt 2 to execute") - - # Verify monitor occurs for duration of timeout period on failure, but stops at max retries -@@ -626,7 +626,7 @@ class Tests: - test = self.new_test("%s_monitor_timeout_max_retries" % test_type["prefix"], - "Verify monitor retries until max retry value or timeout is hit.", test_type["use_cpg"]) - test.add_cmd("stonith_admin", "-R true1 -a fence_dummy_monitor_fail -o \"pcmk_host_list=node3\"") -- test.add_expected_fail_cmd("stonith_admin", "-Q true1 -t 15", 23) -+ test.add_expected_fail_cmd("stonith_admin", "-Q true1 -t 15",195) - test.add_stonith_log_pattern("Attempted to execute agent fence_dummy_monitor_fail (list) the maximum number of times") - - # simple register test -@@ -753,7 +753,7 @@ class Tests: - test = self.new_test("cpg_unfence_on_target_2", - "Verify failure unfencing with on_target = true", 1) - test.add_cmd("stonith_admin", "-R true1 -a fence_dummy_metadata_helper -o \"pcmk_host_list=%s node_fake_1234\"" % (our_uname)) -- test.add_expected_fail_cmd("stonith_admin", "-U node_fake_1234 -t 3", 194) -+ test.add_expected_fail_cmd("stonith_admin", "-U node_fake_1234 -t 3", 143) - test.add_stonith_log_pattern("(on) to be executed on the target node") - - -diff --git a/fencing/remote.c b/fencing/remote.c -index 61ea0c2..a3431f7 100644 ---- a/fencing/remote.c -+++ b/fencing/remote.c -@@ -76,6 +76,7 @@ free_remote_query(gpointer data) - - crm_trace("Free'ing query result from %s", query->host); - free(query->host); -+ g_list_free_full(query->device_list, free); - g_hash_table_destroy(query->custom_action_timeouts); - g_hash_table_destroy(query->verified_devices); - free(query); -@@ -487,6 +488,24 @@ static uint32_t fencing_active_peers(void) - return count; - } - -+int -+stonith_manual_ack(xmlNode * msg, remote_fencing_op_t * op) -+{ -+ xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, msg, LOG_ERR); -+ -+ op->state = st_done; -+ op->completed = time(NULL); -+ op->delegate = strdup("a human"); -+ -+ crm_notice("Injecting manual confirmation that %s is safely off/down", -+ crm_element_value(dev, F_STONITH_TARGET)); -+ -+ remote_op_done(op, msg, pcmk_ok, FALSE); -+ -+ /* Replies are sent via done_cb->stonith_send_async_reply()->do_local_reply() */ -+ return -EINPROGRESS; -+} -+ - /*! - * \internal - * \brief Create a new remote stonith op -@@ -533,11 +552,13 @@ create_remote_stonith_op(const char *client, xmlNode * request, gboolean peer) - - g_hash_table_replace(remote_op_list, op->id, op); - CRM_LOG_ASSERT(g_hash_table_lookup(remote_op_list, op->id) != NULL); -+ crm_trace("Created %s", op->id); - - op->state = st_query; - op->replies_expected = fencing_active_peers(); - op->action = crm_element_value_copy(dev, F_STONITH_ACTION); - op->originator = crm_element_value_copy(dev, F_STONITH_ORIGIN); -+ op->delegate = crm_element_value_copy(dev, F_STONITH_DELEGATE); /* May not be set */ - op->created = time(NULL); - - if (op->originator == NULL) { -@@ -555,6 +576,7 @@ create_remote_stonith_op(const char *client, xmlNode * request, gboolean peer) - op->target = crm_element_value_copy(dev, F_STONITH_TARGET); - op->request = copy_xml(request); /* TODO: Figure out how to avoid this */ - crm_element_value_int(request, F_STONITH_CALLOPTS, (int *)&(op->call_options)); -+ crm_element_value_int(request, F_STONITH_CALLID, (int *)&(op->client_callid)); - - crm_trace("%s new stonith op: %s - %s of %s for %s", - (peer -@@ -584,6 +606,7 @@ create_remote_stonith_op(const char *client, xmlNode * request, gboolean peer) - remote_fencing_op_t * - initiate_remote_stonith_op(crm_client_t * client, xmlNode * request, gboolean manual_ack) - { -+ int query_timeout = 0; - xmlNode *query = NULL; - const char *client_id = NULL; - remote_fencing_op_t *op = NULL; -@@ -597,7 +620,12 @@ initiate_remote_stonith_op(crm_client_t * client, xmlNode * request, gboolean ma - CRM_LOG_ASSERT(client_id != NULL); - op = create_remote_stonith_op(client_id, request, FALSE); - op->owner = TRUE; -- -+ if (manual_ack) { -+ crm_notice("Initiating manual confirmation for %s: %s", -+ op->target, op->id); -+ return op; -+ } -+ - CRM_CHECK(op->action, return NULL); - - if (stonith_topology_next(op) != pcmk_ok) { -@@ -621,16 +649,7 @@ initiate_remote_stonith_op(crm_client_t * client, xmlNode * request, gboolean ma - op->id, op->state); - } - -- query = stonith_create_op(0, op->id, STONITH_OP_QUERY, NULL, 0); -- -- if (!manual_ack) { -- int query_timeout = op->base_timeout * TIMEOUT_MULTIPLY_FACTOR; -- -- op->query_timer = g_timeout_add((1000 * query_timeout), remote_op_query_timeout, op); -- -- } else { -- crm_xml_add(query, F_STONITH_DEVICE, "manual_ack"); -- } -+ query = stonith_create_op(op->client_callid, op->id, STONITH_OP_QUERY, NULL, 0); - - crm_xml_add(query, F_STONITH_REMOTE_OP_ID, op->id); - crm_xml_add(query, F_STONITH_TARGET, op->target); -@@ -641,8 +660,11 @@ initiate_remote_stonith_op(crm_client_t * client, xmlNode * request, gboolean ma - crm_xml_add_int(query, F_STONITH_TIMEOUT, op->base_timeout); - - send_cluster_message(NULL, crm_msg_stonith_ng, query, FALSE); -- - free_xml(query); -+ -+ query_timeout = op->base_timeout * TIMEOUT_MULTIPLY_FACTOR; -+ op->query_timer = g_timeout_add((1000 * query_timeout), remote_op_query_timeout, op); -+ - return op; - } - -@@ -854,7 +876,7 @@ report_timeout_period(remote_fencing_op_t * op, int op_timeout) - } - - /* The client is connected to another node, relay this update to them */ -- update = stonith_create_op(0, op->id, STONITH_OP_TIMEOUT_UPDATE, NULL, 0); -+ update = stonith_create_op(op->client_callid, op->id, STONITH_OP_TIMEOUT_UPDATE, NULL, 0); - crm_xml_add(update, F_STONITH_REMOTE_OP_ID, op->id); - crm_xml_add(update, F_STONITH_CLIENTID, client_id); - crm_xml_add(update, F_STONITH_CALLID, call_id); -@@ -878,7 +900,7 @@ call_remote_stonith(remote_fencing_op_t * op, st_query_result_t * peer) - const char *device = NULL; - int timeout = op->base_timeout; - -- crm_trace("State for %s.%.8s: %d", op->target, op->client_name, op->id, op->state); -+ crm_trace("State for %s.%.8s: %s %d", op->target, op->client_name, op->id, op->state); - if (peer == NULL && !is_set(op->call_options, st_opt_topology)) { - peer = stonith_choose_peer(op); - } -@@ -905,7 +927,7 @@ call_remote_stonith(remote_fencing_op_t * op, st_query_result_t * peer) - - if (peer) { - int timeout_one = 0; -- xmlNode *query = stonith_create_op(0, op->id, STONITH_OP_FENCE, NULL, 0); -+ xmlNode *query = stonith_create_op(op->client_callid, op->id, STONITH_OP_FENCE, NULL, 0); - - crm_xml_add(query, F_STONITH_REMOTE_OP_ID, op->id); - crm_xml_add(query, F_STONITH_TARGET, op->target); -@@ -951,6 +973,25 @@ call_remote_stonith(remote_fencing_op_t * op, st_query_result_t * peer) - CRM_LOG_ASSERT(op->state < st_done); - remote_op_timeout(op); - -+ } else if(op->replies >= op->replies_expected || op->replies >= fencing_active_peers()) { -+ int rc = -EHOSTUNREACH; -+ -+ /* if the operation never left the query state, -+ * but we have all the expected replies, then no devices -+ * are available to execute the fencing operation. */ -+ if (op->state == st_query) { -+ crm_info("None of the %d peers have devices capable of terminating %s for %s (%d)", -+ op->replies, op->target, op->client_name, op->state); -+ -+ rc = -ENODEV; -+ } else { -+ crm_info("None of the %d peers are capable of terminating %s for %s (%d)", -+ op->replies, op->target, op->client_name, op->state); -+ } -+ -+ op->state = st_failed; -+ remote_op_done(op, NULL, rc, FALSE); -+ - } else if (device) { - crm_info("Waiting for additional peers capable of terminating %s with %s for %s.%.8s", - op->target, device, op->client_name, op->id); -@@ -1112,17 +1153,24 @@ process_remote_stonith_query(xmlNode * msg) - * query results. */ - if (op->state == st_query && all_topology_devices_found(op)) { - /* All the query results are in for the topology, start the fencing ops. */ -+ crm_trace("All topology devices found"); - call_remote_stonith(op, result); -+ -+ } else if(op->state == st_query && (op->replies >= op->replies_expected || op->replies >= active)) { -+ crm_info("All topology queries have arrived, continuing (%d, %d, %d) ", op->replies_expected, active, op->replies); -+ call_remote_stonith(op, NULL); - } -+ - } else if (op->state == st_query) { - /* We have a result for a non-topology fencing op that looks promising, - * go ahead and start fencing before query timeout */ - if (host_is_target == FALSE && g_hash_table_size(result->verified_devices)) { - /* we have a verified device living on a peer that is not the target */ -+ crm_trace("Found %d verified devices", g_hash_table_size(result->verified_devices)); - call_remote_stonith(op, result); - - } else if (safe_str_eq(op->action, "on")) { -- /* unfencing. */ -+ crm_trace("Unfencing %s", op->target); - call_remote_stonith(op, result); - - } else if(op->replies >= op->replies_expected || op->replies >= active) { -@@ -1182,15 +1230,15 @@ process_remote_stonith_exec(xmlNode * msg) - - if (op->devices && device && safe_str_neq(op->devices->data, device)) { - crm_err -- ("Received outdated reply for device %s to %s node %s. Operation already timed out at remote level.", -- device, op->action, op->target); -+ ("Received outdated reply for device %s (instead of %s) to %s node %s. Operation already timed out at remote level.", -+ device, op->devices->data, op->action, op->target); - return rc; - } - - if (safe_str_eq(crm_element_value(msg, F_SUBTYPE), "broadcast")) { - crm_debug("Marking call to %s for %s on behalf of %s@%s.%.8s: %s (%d)", - op->action, op->target, op->client_name, op->id, op->originator, -- rc == pcmk_ok ? "passed" : "failed", rc); -+ pcmk_strerror(rc), rc); - if (rc == pcmk_ok) { - op->state = st_done; - } else { -@@ -1212,7 +1260,7 @@ process_remote_stonith_exec(xmlNode * msg) - - crm_notice("Call to %s for %s on behalf of %s@%s: %s (%d)", - device, op->target, op->client_name, op->originator, -- rc == pcmk_ok ? "passed" : "failed", rc); -+ pcmk_strerror(rc), rc); - - /* We own the op, and it is complete. broadcast the result to all nodes - * and notify our local clients. */ -@@ -1282,8 +1330,10 @@ stonith_fence_history(xmlNode * msg, xmlNode ** output) - } - } - } -- *output = create_xml_node(NULL, F_STONITH_HISTORY_LIST); - -+ crm_trace("Looking for operations on %s in %p", target, remote_op_list); -+ -+ *output = create_xml_node(NULL, F_STONITH_HISTORY_LIST); - if (remote_op_list) { - GHashTableIter iter; - remote_fencing_op_t *op = NULL; -@@ -1297,11 +1347,13 @@ stonith_fence_history(xmlNode * msg, xmlNode ** output) - } - - rc = 0; -+ crm_trace("Attaching op %s", op->id); - entry = create_xml_node(*output, STONITH_OP_EXEC); - crm_xml_add(entry, F_STONITH_TARGET, op->target); - crm_xml_add(entry, F_STONITH_ACTION, op->action); - crm_xml_add(entry, F_STONITH_ORIGIN, op->originator); - crm_xml_add(entry, F_STONITH_DELEGATE, op->delegate); -+ crm_xml_add(entry, F_STONITH_CLIENTNAME, op->client_name); - crm_xml_add_int(entry, F_STONITH_DATE, op->completed); - crm_xml_add_int(entry, F_STONITH_STATE, op->state); - } -diff --git a/fencing/test.c b/fencing/test.c -index 5ae83f5..afedba8 100644 ---- a/fencing/test.c -+++ b/fencing/test.c -@@ -55,8 +55,8 @@ typedef void (*mainloop_test_iteration_cb) (int check_event); - mainloop_iter++; \ - mainloop_set_trigger(trig); \ - } else { \ -- crm_info("FAILURE = %s async_callback %d", __PRETTY_FUNCTION__, callback_rc); \ -- crm_exit(-1); \ -+ crm_err("FAILURE = %s async_callback %d", __PRETTY_FUNCTION__, callback_rc); \ -+ crm_exit(pcmk_err_generic); \ - } \ - callback_rc = 0; \ - -@@ -114,7 +114,7 @@ static void - st_callback(stonith_t * st, stonith_event_t * e) - { - if (st->state == stonith_disconnected) { -- crm_exit(1); -+ crm_exit(ENOTCONN); - } - - crm_notice("Operation %s requested by %s %s for peer %s. %s reported: %s (ref=%s)", -@@ -161,12 +161,12 @@ passive_test(void) - dispatch_helper(500); \ - } \ - if (rc != expected_rc) { \ -- crm_info("FAILURE - expected rc %d != %d(%s) for cmd - %s\n", expected_rc, rc, pcmk_strerror(rc), str); \ -- crm_exit(-1); \ -+ crm_err("FAILURE - expected rc %d != %d(%s) for cmd - %s\n", expected_rc, rc, pcmk_strerror(rc), str); \ -+ crm_exit(pcmk_err_generic); \ - } else if (expected_notifications) { \ -- crm_info("FAILURE - expected %d notifications, got only %d for cmd - %s\n", \ -+ crm_err("FAILURE - expected %d notifications, got only %d for cmd - %s\n", \ - num_notifications, num_notifications - expected_notifications, str); \ -- crm_exit(-1); \ -+ crm_exit(pcmk_err_generic); \ - } else { \ - if (verbose) { \ - crm_info("SUCCESS - %s: %d", str, rc); \ -@@ -251,7 +251,7 @@ run_standard_test(void) - "Status false_1_node1", 1, 0); - - single_test(st->cmds->fence(st, st_opts, "unknown-host", "off", 1, 0), -- "Fence unknown-host (expected failure)", 0, -113); -+ "Fence unknown-host (expected failure)", 0, -19); - - single_test(st->cmds->fence(st, st_opts, "false_1_node1", "off", 1, 0), - "Fence false_1_node1", 1, 0); -@@ -427,7 +427,7 @@ test_async_fence_timeout(int check_event) - int rc = 0; - - if (check_event) { -- if (callback_rc != -ETIME) { -+ if (callback_rc != -EHOSTUNREACH) { - mainloop_test_done(FALSE); - } else { - mainloop_test_done(TRUE); -@@ -533,7 +533,7 @@ iterate_mainloop_tests(gboolean event_ready) - if (mainloop_iter == (sizeof(callbacks) / sizeof(mainloop_test_iteration_cb))) { - /* all tests ran, everything passed */ - crm_info("ALL MAINLOOP TESTS PASSED!"); -- crm_exit(0); -+ crm_exit(pcmk_ok); - } - - callbacks[mainloop_iter] (event_ready); -@@ -560,7 +560,7 @@ test_shutdown(int nsig) - } - - if (rc) { -- crm_exit(-1); -+ crm_exit(pcmk_err_generic); - } - } - -diff --git a/include/crm/cib.h b/include/crm/cib.h -index 48e10af..7a694ac 100644 ---- a/include/crm/cib.h -+++ b/include/crm/cib.h -@@ -1,16 +1,16 @@ --/* -+/* - * Copyright (C) 2004 Andrew Beekhof -- * -+ * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. -- * -+ * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. -- * -+ * - * You should have received a copy of the GNU General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -@@ -175,6 +175,6 @@ void remove_cib_op_callback(int call_id, gboolean all_callbacks); - } while(0) - # include - --# define CIB_LIBRARY "libcib.so.2" -+# define CIB_LIBRARY "libcib.so.3" - - #endif -diff --git a/include/crm/cib/internal.h b/include/crm/cib/internal.h -index 0c7dfe6..94d86dc 100644 ---- a/include/crm/cib/internal.h -+++ b/include/crm/cib/internal.h -@@ -18,6 +18,7 @@ - #ifndef CIB_INTERNAL__H - # define CIB_INTERNAL__H - # include -+# include - - # define CIB_OP_SLAVE "cib_slave" - # define CIB_OP_SLAVEALL "cib_slave_all" -@@ -188,4 +189,5 @@ int cib_internal_op(cib_t * cib, const char *op, const char *host, - const char *section, xmlNode * data, - xmlNode ** output_data, int call_options, const char *user_name); - -+ - #endif -diff --git a/include/crm/cib/util.h b/include/crm/cib/util.h -index 03bf426..2a00937 100644 ---- a/include/crm/cib/util.h -+++ b/include/crm/cib/util.h -@@ -59,7 +59,7 @@ int delete_attr_delegate(cib_t * the_cib, int options, - const char *attr_id, const char *attr_name, - const char *attr_value, gboolean to_console, const char *user_name); - --int query_node_uuid(cib_t * the_cib, const char *uname, char **uuid); -+int query_node_uuid(cib_t * the_cib, const char *uname, char **uuid, int *is_remote_node); - - int query_node_uname(cib_t * the_cib, const char *uuid, char **uname); - -diff --git a/include/crm/cluster.h b/include/crm/cluster.h -index 5be940a..cac863f 100644 ---- a/include/crm/cluster.h -+++ b/include/crm/cluster.h -@@ -73,8 +73,7 @@ typedef struct crm_peer_node_s { - - void crm_peer_init(void); - void crm_peer_destroy(void); --char *get_corosync_uuid(uint32_t id, const char *uname); --const char *get_node_uuid(uint32_t id, const char *uname); -+char *get_corosync_uuid(crm_node_t *peer); - int get_corosync_id(int id, const char *uuid); - - typedef struct crm_cluster_s { -@@ -132,7 +131,6 @@ gboolean crm_is_peer_active(const crm_node_t * node); - guint reap_crm_member(uint32_t id, const char *name); - int crm_terminate_member(int nodeid, const char *uname, void *unused); - int crm_terminate_member_no_mainloop(int nodeid, const char *uname, int *connection); --gboolean crm_get_cluster_name(char **cname); - - # if SUPPORT_HEARTBEAT - gboolean crm_is_heartbeat_peer_active(const crm_node_t * node); -@@ -143,14 +141,11 @@ extern int ais_fd_sync; - gboolean crm_is_corosync_peer_active(const crm_node_t * node); - gboolean send_ais_text(int class, const char *data, gboolean local, - crm_node_t * node, enum crm_ais_msg_types dest); --gboolean get_ais_nodeid(uint32_t * id, char **uname); - # endif - --void empty_uuid_cache(void); --const char *get_uuid(const char *uname); --const char *get_uname(const char *uuid); --void set_uuid(xmlNode * node, const char *attr, const char *uname); --void unget_uuid(const char *uname); -+const char *crm_peer_uuid(crm_node_t *node); -+const char *crm_peer_uname(const char *uuid); -+void set_uuid(xmlNode *xml, const char *attr, crm_node_t *node); - - enum crm_status_type { - crm_status_uname, -@@ -182,7 +177,7 @@ gboolean is_openais_cluster(void); - gboolean is_classic_ais_cluster(void); - gboolean is_heartbeat_cluster(void); - --char *get_local_node_name(void); -+const char *get_local_node_name(void); - char *get_node_name(uint32_t nodeid); - - #endif -diff --git a/include/crm/cluster/internal.h b/include/crm/cluster/internal.h -index ff0fbf2..2fa8e08 100644 ---- a/include/crm/cluster/internal.h -+++ b/include/crm/cluster/internal.h -@@ -379,8 +379,6 @@ enum crm_quorum_source { - - enum crm_quorum_source get_quorum_source(void); - --void crm_update_peer_join(const char *source, crm_node_t * node, enum crm_join_phase phase); -- - void crm_update_peer_proc(const char *source, crm_node_t * peer, uint32_t flag, const char *status); - - crm_node_t *crm_update_peer(const char *source, unsigned int id, uint64_t born, uint64_t seen, -diff --git a/include/crm/common/Makefile.am b/include/crm/common/Makefile.am -index 48f871d..5d61df7 100644 ---- a/include/crm/common/Makefile.am -+++ b/include/crm/common/Makefile.am -@@ -22,3 +22,6 @@ headerdir=$(pkgincludedir)/crm/common - - header_HEADERS = xml.h ipc.h util.h iso8601.h mainloop.h logging.h - noinst_HEADERS = ipcs.h -+if BUILD_CIBSECRETS -+noinst_HEADERS += cib_secrets.h -+endif -diff --git a/include/crm/common/cib_secrets.h b/include/crm/common/cib_secrets.h -new file mode 100644 -index 0000000..566f445 ---- /dev/null -+++ b/include/crm/common/cib_secrets.h -@@ -0,0 +1,25 @@ -+/* -+ * cib_secrets.h -+ * -+ * Author: Dejan Muhamedagic -+ * Copyright (c) 2011 SUSE, Attachmate -+ * -+ * This program is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU General Public -+ * License as published by the Free Software Foundation; either -+ * version 2 of the License, or (at your option) any later version. -+ * -+ * This software is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public -+ * License along with this library; if not, write to the Free Software -+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -+ */ -+ -+/* -+ * load parameters from an ini file (cib_secrets.c) -+ */ -+int replace_secret_params(char * rsc_id, GHashTable * params); -diff --git a/include/crm/common/logging.h b/include/crm/common/logging.h -index bed6cd9..22b1ad7 100644 ---- a/include/crm/common/logging.h -+++ b/include/crm/common/logging.h -@@ -1,16 +1,16 @@ --/* -+/* - * Copyright (C) 2004 Andrew Beekhof -- * -+ * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. -- * -+ * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. -- * -+ * - * You should have received a copy of the GNU General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -@@ -39,6 +39,16 @@ extern gboolean crm_config_error; - extern gboolean crm_config_warning; - extern unsigned int crm_trace_nonlog; - -+enum xml_log_options -+{ -+ xml_log_option_filtered = 0x001, -+ xml_log_option_formatted = 0x002, -+ xml_log_option_diff_plus = 0x010, -+ xml_log_option_diff_minus = 0x020, -+ xml_log_option_diff_short = 0x040, -+ xml_log_option_diff_all = 0x100, -+}; -+ - void crm_enable_blackbox(int nsig); - void crm_enable_blackbox_tracing(int nsig); - void crm_write_blackbox(int nsig, struct qb_log_callsite *callsite); -@@ -123,7 +133,7 @@ unsigned int get_crm_log_level(void); - xml_cs = qb_log_callsite_get(__func__, __FILE__, "xml-blog", level, __LINE__, 0); \ - } \ - if (crm_is_callsite_active(xml_cs, level, 0)) { \ -- log_data_element(level, __FILE__, __PRETTY_FUNCTION__, __LINE__, text, xml, 0, TRUE); \ -+ log_data_element(level, __FILE__, __PRETTY_FUNCTION__, __LINE__, text, xml, 1, xml_log_option_formatted); \ - } \ - } while(0) - -diff --git a/include/crm/common/mainloop.h b/include/crm/common/mainloop.h -index d103107..0941f1b 100644 ---- a/include/crm/common/mainloop.h -+++ b/include/crm/common/mainloop.h -@@ -30,6 +30,8 @@ typedef struct trigger_s crm_trigger_t; - typedef struct mainloop_io_s mainloop_io_t; - typedef struct mainloop_child_s mainloop_child_t; - -+void mainloop_cleanup(void); -+ - crm_trigger_t *mainloop_add_trigger(int priority, int (*dispatch) (gpointer user_data), - gpointer userdata); - -@@ -79,22 +81,18 @@ void mainloop_del_fd(mainloop_io_t * client); - * Create a new tracked process - * To track a process group, use -pid - */ --void -- -- --mainloop_add_child(pid_t pid, -- int timeout, -- const char *desc, -- void *userdata, -- void (*callback) (mainloop_child_t * p, int status, int signo, int exitcode)); -- --void *mainloop_get_child_userdata(mainloop_child_t * child); --int -- mainloop_get_child_timeout(mainloop_child_t * child); -- --pid_t mainloop_get_child_pid(mainloop_child_t * child); --void -- mainloop_clear_child_userdata(mainloop_child_t * child); -+void mainloop_child_add(pid_t pid, -+ int timeout, -+ const char *desc, -+ void *userdata, -+ void (*callback) (mainloop_child_t * p, pid_t pid, int core, int signo, int exitcode)); -+ -+void *mainloop_child_userdata(mainloop_child_t * child); -+int mainloop_child_timeout(mainloop_child_t * child); -+const char *mainloop_child_name(mainloop_child_t * child); -+ -+pid_t mainloop_child_pid(mainloop_child_t * child); -+void mainloop_clear_child_userdata(mainloop_child_t * child); - - # define G_PRIORITY_MEDIUM (G_PRIORITY_HIGH/2) - -diff --git a/include/crm/common/util.h b/include/crm/common/util.h -index 966785e..f0dca36 100644 ---- a/include/crm/common/util.h -+++ b/include/crm/common/util.h -@@ -113,6 +113,8 @@ gboolean did_rsc_op_fail(lrmd_event_data_t * event, int target_rc); - char *crm_md5sum(const char *buffer); - - char *crm_generate_uuid(void); -+ -+void crm_build_path(const char *path_c, mode_t mode); - int crm_user_lookup(const char *name, uid_t * uid, gid_t * gid); - - int crm_exit(int rc); -diff --git a/include/crm/common/xml.h b/include/crm/common/xml.h -index d5147c8..9b2ced9 100644 ---- a/include/crm/common/xml.h -+++ b/include/crm/common/xml.h -@@ -1,16 +1,16 @@ --/* -+/* - * Copyright (C) 2004 Andrew Beekhof -- * -+ * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. -- * -+ * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. -- * -+ * - * You should have received a copy of the GNU General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -@@ -68,7 +68,7 @@ xmlDoc *getDocPtr(xmlNode * node); - * - * Copy all the attributes/properties from src into target. - * -- * Not recursive, does not return anything. -+ * Not recursive, does not return anything. - * - */ - void copy_in_properties(xmlNode * target, xmlNode * src); -@@ -108,7 +108,7 @@ const char *crm_xml_add_int(xmlNode * node, const char *name, int value); - void unlink_xml_node(xmlNode * node); - - /* -- * -+ * - */ - void purge_diff_markers(xmlNode * a_node); - -@@ -201,6 +201,8 @@ gboolean validate_xml_verbose(xmlNode * xml_blob); - int update_validation(xmlNode ** xml_blob, int *best, gboolean transform, gboolean to_logs); - int get_schema_version(const char *name); - const char *get_schema_name(int version); -+ -+void crm_xml_init(void); - void crm_xml_cleanup(void); - - static inline xmlNode * -@@ -210,7 +212,7 @@ __xml_first_child(xmlNode * parent) - - if (parent) { - child = parent->children; -- while (child && child->type != XML_ELEMENT_NODE) { -+ while (child && child->type == XML_TEXT_NODE) { - child = child->next; - } - } -@@ -222,7 +224,7 @@ __xml_next(xmlNode * child) - { - if (child) { - child = child->next; -- while (child && child->type != XML_ELEMENT_NODE) { -+ while (child && child->type == XML_TEXT_NODE) { - child = child->next; - } - } -@@ -238,6 +240,15 @@ xmlXPathObjectPtr xpath_search(xmlNode * xml_top, const char *path); - gboolean cli_config_update(xmlNode ** xml, int *best_version, gboolean to_logs); - xmlNode *expand_idref(xmlNode * input, xmlNode * top); - -+void freeXpathObject(xmlXPathObjectPtr xpathObj); - xmlNode *getXpathResult(xmlXPathObjectPtr xpathObj, int index); - -+static inline int numXpathResults(xmlXPathObjectPtr xpathObj) -+{ -+ if(xpathObj == NULL || xpathObj->nodesetval == NULL) { -+ return 0; -+ } -+ return xpathObj->nodesetval->nodeNr; -+} -+ - #endif -diff --git a/include/crm/crm.h b/include/crm/crm.h -index 5d69231..9c66563 100644 ---- a/include/crm/crm.h -+++ b/include/crm/crm.h -@@ -1,16 +1,16 @@ --/* -+/* - * Copyright (C) 2004 Andrew Beekhof -- * -+ * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. -- * -+ * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. -- * -+ * - * You should have received a copy of the GNU General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -@@ -56,7 +56,7 @@ - - # define CRM_META "CRM_meta" - --extern const char *crm_system_name; -+extern char *crm_system_name; - - /* *INDENT-OFF* */ - -@@ -66,6 +66,8 @@ extern const char *crm_system_name; - # define MAX_IPC_FAIL 5 - # define MAX_IPC_DELAY 120 - -+# define DAEMON_RESPAWN_STOP 100 -+ - # define MSG_LOG 1 - # define DOT_FSA_ACTIONS 1 - # define DOT_ALL_FSA_INPUTS 1 -@@ -119,10 +121,11 @@ extern const char *crm_system_name; - # define CRM_OP_TETIMEOUT "te_timeout" - # define CRM_OP_TRANSITION "transition" - # define CRM_OP_REGISTER "register" -+# define CRM_OP_IPC_FWD "ipc_fwd" - # define CRM_OP_DEBUG_UP "debug_inc" - # define CRM_OP_DEBUG_DOWN "debug_dec" - # define CRM_OP_INVOKE_LRM "lrm_invoke" --# define CRM_OP_LRM_REFRESH "lrm_refresh" -+# define CRM_OP_LRM_REFRESH "lrm_refresh" /* Deprecated */ - # define CRM_OP_LRM_QUERY "lrm_query" - # define CRM_OP_LRM_DELETE "lrm_delete" - # define CRM_OP_LRM_FAIL "lrm_fail" -diff --git a/include/crm/error.h b/include/crm/error.h -index 2e8d6f5..1613d66 100644 ---- a/include/crm/error.h -+++ b/include/crm/error.h -@@ -1,16 +1,16 @@ --/* -+/* - * Copyright (C) 2012 Andrew Beekhof -- * -+ * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. -- * -+ * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. -- * -+ * - * You should have received a copy of the GNU General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -@@ -39,17 +39,21 @@ - } while(0) - - # define pcmk_ok 0 --# define PCMK_ERROR_OFFSET 900 /* Replacements on non-linux systems, see include/portability.h */ --# define PCMK_CUSTOM_OFFSET 1000 /* Purely custom codes */ --# define pcmk_err_generic 1001 --# define pcmk_err_no_quorum 1002 --# define pcmk_err_dtd_validation 1003 --# define pcmk_err_transform_failed 1004 --# define pcmk_err_old_data 1005 --# define pcmk_err_diff_failed 1006 --# define pcmk_err_diff_resync 1007 -+# define PCMK_ERROR_OFFSET 190 /* Replacements on non-linux systems, see include/portability.h */ -+# define PCMK_CUSTOM_OFFSET 200 /* Purely custom codes */ -+# define pcmk_err_generic 201 -+# define pcmk_err_no_quorum 202 -+# define pcmk_err_dtd_validation 203 -+# define pcmk_err_transform_failed 204 -+# define pcmk_err_old_data 205 -+# define pcmk_err_diff_failed 206 -+# define pcmk_err_diff_resync 207 -+# define pcmk_err_cib_modified 208 -+# define pcmk_err_cib_backup 209 -+# define pcmk_err_cib_save 210 - - const char *pcmk_strerror(int rc); -+const char *pcmk_errorname(int rc); - const char *bz2_strerror(int rc); - - #endif -diff --git a/include/crm/lrmd.h b/include/crm/lrmd.h -index 0d40729..f627373 100644 ---- a/include/crm/lrmd.h -+++ b/include/crm/lrmd.h -@@ -33,16 +33,19 @@ typedef struct lrmd_key_value_s { - struct lrmd_key_value_s *next; - } lrmd_key_value_t; - -+#define LRMD_PROTOCOL_VERSION "1.0" - - /* *INDENT-OFF* */ - #define DEFAULT_REMOTE_KEY_LOCATION "/etc/pacemaker/authkey" - #define ALT_REMOTE_KEY_LOCATION "/etc/corosync/authkey" --#define DEFAULT_REMOTE_PORT 1984 -+#define DEFAULT_REMOTE_PORT 3121 - #define DEFAULT_REMOTE_USERNAME "lrmd" - - #define F_LRMD_OPERATION "lrmd_op" - #define F_LRMD_CLIENTNAME "lrmd_clientname" -+#define F_LRMD_IS_IPC_PROVIDER "lrmd_is_ipc_provider" - #define F_LRMD_CLIENTID "lrmd_clientid" -+#define F_LRMD_PROTOCOL_VERSION "lrmd_protocol_version" - #define F_LRMD_REMOTE_MSG_TYPE "lrmd_remote_msg_type" - #define F_LRMD_REMOTE_MSG_ID "lrmd_remote_msg_id" - #define F_LRMD_CALLBACK_TOKEN "lrmd_async_id" -@@ -83,9 +86,19 @@ typedef struct lrmd_key_value_s { - #define LRMD_OP_RSC_METADATA "lrmd_rsc_metadata" - #define LRMD_OP_POKE "lrmd_rsc_poke" - -+#define F_LRMD_IPC_OP "lrmd_ipc_op" -+#define F_LRMD_IPC_IPC_SERVER "lrmd_ipc_server" -+#define F_LRMD_IPC_SESSION "lrmd_ipc_session" -+#define F_LRMD_IPC_PROXY_NODE "lrmd_ipc_proxy_node" -+#define F_LRMD_IPC_USER "lrmd_ipc_user" -+#define F_LRMD_IPC_MSG "lrmd_ipc_msg" -+#define F_LRMD_IPC_MSG_ID "lrmd_ipc_msg_id" -+#define F_LRMD_IPC_MSG_FLAGS "lrmd_ipc_msg_flags" -+ - #define T_LRMD "lrmd" - #define T_LRMD_REPLY "lrmd_reply" - #define T_LRMD_NOTIFY "lrmd_notify" -+#define T_LRMD_IPC_PROXY "lrmd_ipc_proxy" - /* *INDENT-ON* */ - - /*! -diff --git a/include/crm/msg_xml.h b/include/crm/msg_xml.h -index 57ec9bd..8575163 100644 ---- a/include/crm/msg_xml.h -+++ b/include/crm/msg_xml.h -@@ -1,16 +1,16 @@ --/* -+/* - * Copyright (C) 2004 Andrew Beekhof -- * -+ * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. -- * -+ * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. -- * -+ * - * You should have received a copy of the GNU General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -@@ -256,6 +256,11 @@ - # define XML_LRM_ATTR_OP_RESTART "op-force-restart" - # define XML_LRM_ATTR_RESTART_DIGEST "op-restart-digest" - -+# define XML_RSC_OP_LAST_CHANGE "last-rc-change" -+# define XML_RSC_OP_LAST_RUN "last-run" -+# define XML_RSC_OP_T_EXEC "exec-time" -+# define XML_RSC_OP_T_QUEUE "queue-time" -+ - # define XML_LRM_ATTR_MIGRATE_SOURCE "migrate_source" - # define XML_LRM_ATTR_MIGRATE_TARGET "migrate_target" - -diff --git a/include/crm/pengine/internal.h b/include/crm/pengine/internal.h -index f7da80e..d654cb8 100644 ---- a/include/crm/pengine/internal.h -+++ b/include/crm/pengine/internal.h -@@ -1,16 +1,16 @@ --/* -+/* - * Copyright (C) 2004 Andrew Beekhof -- * -+ * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. -- * -+ * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. -- * -+ * - * You should have received a copy of the GNU General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -@@ -51,6 +51,8 @@ typedef struct notify_data_s { - - } notify_data_t; - -+bool pe_can_fence(pe_working_set_t *data_set, node_t *node); -+ - int merge_weights(int w1, int w2); - void add_hash_param(GHashTable * hash, const char *name, const char *value); - void append_hashtable(gpointer key, gpointer value, gpointer user_data); -@@ -101,10 +103,12 @@ void common_free(resource_t * rsc); - extern pe_working_set_t *pe_dataset; - - extern node_t *node_copy(node_t * this_node); --extern time_t get_timet_now(pe_working_set_t * data_set); --extern int get_failcount(node_t * node, resource_t * rsc, int *last_failure, -+extern time_t get_effective_time(pe_working_set_t * data_set); -+extern int get_failcount(node_t * node, resource_t * rsc, time_t *last_failure, - pe_working_set_t * data_set); --extern int get_failcount_all(node_t * node, resource_t * rsc, int *last_failure, -+extern int get_failcount_full(node_t * node, resource_t * rsc, time_t *last_failure, -+ bool effective, pe_working_set_t * data_set); -+extern int get_failcount_all(node_t * node, resource_t * rsc, time_t *last_failure, - pe_working_set_t * data_set); - - /* Binary like operators for lists of nodes */ -@@ -134,9 +138,6 @@ extern GListPtr node_list_minus(GListPtr list1, GListPtr list2, gboolean filter) - extern void pe_free_shallow(GListPtr alist); - extern void pe_free_shallow_adv(GListPtr alist, gboolean with_data); - --/* For creating the transition graph */ --extern xmlNode *action2xml(action_t * action, gboolean as_input); -- - /* Printing functions for debug */ - extern void print_node(const char *pre_text, node_t * node, gboolean details); - -diff --git a/include/crm/pengine/status.h b/include/crm/pengine/status.h -index 95130c7..bd0a9ba 100644 ---- a/include/crm/pengine/status.h -+++ b/include/crm/pengine/status.h -@@ -1,16 +1,16 @@ --/* -+/* - * Copyright (C) 2004 Andrew Beekhof -- * -+ * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. -- * -+ * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. -- * -+ * - * You should have received a copy of the GNU General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -@@ -58,19 +58,21 @@ enum pe_find { - # define pe_flag_is_managed_default 0x00000004ULL - # define pe_flag_maintenance_mode 0x00000008ULL - --# define pe_flag_stonith_enabled 0x00000010ULL -+# define pe_flag_stonith_enabled 0x00000010ULL - # define pe_flag_have_stonith_resource 0x00000020ULL - - # define pe_flag_stop_rsc_orphans 0x00000100ULL - # define pe_flag_stop_action_orphans 0x00000200ULL --# define pe_flag_stop_everything 0x00000400ULL -+# define pe_flag_stop_everything 0x00000400ULL - - # define pe_flag_start_failure_fatal 0x00001000ULL - # define pe_flag_remove_after_stop 0x00002000ULL - --# define pe_flag_startup_probes 0x00010000ULL -+# define pe_flag_startup_probes 0x00010000ULL - # define pe_flag_have_status 0x00020000ULL --# define pe_flag_have_remote_nodes 0x00040000ULL -+# define pe_flag_have_remote_nodes 0x00040000ULL -+ -+# define pe_flag_quick_location 0x00100000ULL - - typedef struct pe_working_set_s { - xmlNode *input; -@@ -114,6 +116,7 @@ typedef struct pe_working_set_s { - xmlNode *graph; - - GHashTable *template_rsc_sets; -+ const char *localhost; - - } pe_working_set_t; - -@@ -341,6 +344,7 @@ struct action_wrapper_s { - action_t *action; - }; - -+const char *rsc_printable_id(resource_t *rsc); - gboolean cluster_status(pe_working_set_t * data_set); - void set_working_set_defaults(pe_working_set_t * data_set); - void cleanup_calculations(pe_working_set_t * data_set); -diff --git a/include/crm/stonith-ng.h b/include/crm/stonith-ng.h -index 355d1ae..35f6e5a 100644 ---- a/include/crm/stonith-ng.h -+++ b/include/crm/stonith-ng.h -@@ -1,16 +1,16 @@ --/* -+/* - * Copyright (C) 2004 Andrew Beekhof -- * -+ * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. -- * -+ * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. -- * -+ * - * You should have received a copy of the GNU General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -@@ -86,6 +86,7 @@ typedef struct stonith_history_s { - int state; - - struct stonith_history_s *next; -+ char *client; - } stonith_history_t; - - typedef struct stonith_s stonith_t; -diff --git a/include/crm_internal.h b/include/crm_internal.h -index 6215a3a..62eb385 100644 ---- a/include/crm_internal.h -+++ b/include/crm_internal.h -@@ -31,6 +31,7 @@ - - # include - # include -+# include - - /* Dynamic loading of libraries */ - void *find_library_function(void **handle, const char *lib, const char *fn, int fatal); -@@ -38,7 +39,7 @@ void *convert_const_pointer(const void *ptr); - - /* For ACLs */ - char *uid2username(uid_t uid); --void determine_request_user(char *user, xmlNode * request, const char *field); -+void determine_request_user(const char *user, xmlNode * request, const char *field); - - # if ENABLE_ACL - # include -@@ -309,4 +310,22 @@ typedef struct { - - # endif - -+void -+attrd_ipc_server_init(qb_ipcs_service_t **ipcs, struct qb_ipcs_service_handlers *cb); -+void -+stonith_ipc_server_init(qb_ipcs_service_t **ipcs, struct qb_ipcs_service_handlers *cb); -+ -+qb_ipcs_service_t * -+crmd_ipc_server_init(struct qb_ipcs_service_handlers *cb); -+ -+void cib_ipc_servers_init(qb_ipcs_service_t **ipcs_ro, -+ qb_ipcs_service_t **ipcs_rw, -+ qb_ipcs_service_t **ipcs_shm, -+ struct qb_ipcs_service_handlers *ro_cb, -+ struct qb_ipcs_service_handlers *rw_cb); -+ -+void cib_ipc_servers_destroy(qb_ipcs_service_t *ipcs_ro, -+ qb_ipcs_service_t *ipcs_rw, -+ qb_ipcs_service_t *ipcs_shm); -+ - #endif /* CRM_INTERNAL__H */ -diff --git a/include/portability.h b/include/portability.h -index 681ddeb..b0f9f1c 100644 ---- a/include/portability.h -+++ b/include/portability.h -@@ -10,12 +10,12 @@ - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. -- * -+ * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. -- * -+ * - * You should have received a copy of the GNU General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -@@ -74,9 +74,9 @@ char *strndup(const char *str, size_t len); - # define USE_GNU - # endif - --# if NEED_G_HASH_ITER -+# include -+# if !GLIB_CHECK_VERSION(2,14,0) - --# include - typedef struct fake_ghi { - GHashTable *hash; - int nth; /* current index over the iteration */ -@@ -102,6 +102,9 @@ g_hash_table_get_values(GHashTable * hash_table) - g_hash_table_foreach(hash_table, g_hash_prepend_value, &values); - return values; - } -+# endif -+ -+# if !GLIB_CHECK_VERSION(2,16,0) - - static inline gboolean - g_hash_table_nth_data(gpointer key, gpointer value, gpointer user_data) -@@ -116,7 +119,6 @@ g_hash_table_nth_data(gpointer key, gpointer value, gpointer user_data) - return FALSE; - } - --/* Since: 2.16 */ - static inline void - g_hash_table_iter_init(GHashTableIter * iter, GHashTable * hash_table) - { -@@ -146,7 +148,6 @@ g_hash_table_iter_next(GHashTableIter * iter, gpointer * key, gpointer * value) - return found; - } - --/* Since: 2.16 */ - static inline void - g_hash_table_iter_remove(GHashTableIter * iter) - { -@@ -154,7 +155,6 @@ g_hash_table_iter_remove(GHashTableIter * iter) - iter->nth--; /* Or zero to be safe? */ - } - --/* Since: 2.16 */ - static inline int - g_strcmp0(const char *str1, const char *str2) - { -@@ -166,8 +166,7 @@ g_strcmp0(const char *str1, const char *str2) - } - # endif /* !HAVE_LIBGLIB_2_0 */ - --# ifdef NEED_G_LIST_FREE_FULL --# include -+# if !GLIB_CHECK_VERSION(2,28,0) - # include - /* Since: 2.28 */ - static inline void -@@ -180,27 +179,27 @@ g_list_free_full(GList * list, GDestroyNotify free_func) - - /* Replacement error codes for non-linux */ - # ifndef ENOTUNIQ --# define ENOTUNIQ 900 -+# define ENOTUNIQ 190 - # endif - - # ifndef ECOMM --# define ECOMM 901 -+# define ECOMM 191 - # endif - - # ifndef ELIBACC --# define ELIBACC 902 -+# define ELIBACC 192 - # endif - - # ifndef EREMOTEIO --# define EREMOTEIO 903 -+# define EREMOTEIO 193 - # endif - - # ifndef EUNATCH --# define EUNATCH 904 -+# define EUNATCH 194 - # endif - - # ifndef ENOKEY --# define ENOKEY 905 -+# define ENOKEY 195 - # endif - - /* -diff --git a/lib/ais/plugin.c b/lib/ais/plugin.c -index 01f11eb..1e372de 100644 ---- a/lib/ais/plugin.c -+++ b/lib/ais/plugin.c -@@ -1,16 +1,16 @@ - /* - * Copyright (C) 2004 Andrew Beekhof -- * -+ * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. -- * -+ * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. -- * -+ * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USAA -@@ -140,6 +140,7 @@ void pcmk_quorum(void *conn, ais_void_ptr * msg); - void pcmk_cluster_id_swab(void *msg); - void pcmk_cluster_id_callback(ais_void_ptr * message, unsigned int nodeid); - void ais_remove_peer(char *node_id); -+void ais_remove_peer_by_name(const char *node_name); - - static uint32_t - get_process_list(void) -@@ -203,14 +204,14 @@ struct corosync_service_engine pcmk_service_handler = { - .name = (char *)"Pacemaker Cluster Manager "PACKAGE_VERSION, - .id = PCMK_SERVICE_ID, - .private_data_size = 0, -- .flow_control = COROSYNC_LIB_FLOW_CONTROL_NOT_REQUIRED, -+ .flow_control = COROSYNC_LIB_FLOW_CONTROL_NOT_REQUIRED, - .allow_inquorate = CS_LIB_ALLOW_INQUORATE, - .lib_init_fn = pcmk_ipc_connect, - .lib_exit_fn = pcmk_ipc_exit, - .exec_init_fn = pcmk_startup, - .exec_exit_fn = pcmk_shutdown, - .config_init_fn = pcmk_config_init, -- .priority = 50, -+ .priority = 50, - .lib_engine = pcmk_lib_service, - .lib_engine_count = sizeof (pcmk_lib_service) / sizeof (struct corosync_lib_handler), - .exec_engine = pcmk_exec_service, -@@ -492,7 +493,7 @@ pcmk_wait_dispatch(void *arg) - } - - /* Broadcast the fact that one of our processes died -- * -+ * - * Try to get some logging of the cause out first though - * because we're probably about to get fenced - * -@@ -674,13 +675,13 @@ pcmk_startup(struct corosync_api_v1 *init_with) - } - - /* -- static void ais_print_node(const char *prefix, struct totem_ip_address *host) -+ static void ais_print_node(const char *prefix, struct totem_ip_address *host) - { - int len = 0; - char *buffer = NULL; - - ais_malloc0(buffer, INET6_ADDRSTRLEN+1); -- -+ - inet_ntop(host->family, host->addr, buffer, INET6_ADDRSTRLEN); - - len = strlen(buffer); -@@ -1026,7 +1027,7 @@ pcmk_ipc(void *conn, ais_void_ptr * msg) - transient = FALSE; - } - #if 0 -- /* If this check fails, the order of pcmk_children probably -+ /* If this check fails, the order of pcmk_children probably - * doesn't match that of the crm_ais_msg_types enum - */ - AIS_CHECK(transient || mutable->sender.pid == pcmk_children[type].pid, -@@ -1493,6 +1494,7 @@ route_ais_message(const AIS_Message * msg, gboolean local_origin) - if (mutable->host.local) { - void *conn = NULL; - const char *lookup = NULL; -+ int children_index = 0; - - if (dest == crm_msg_ais) { - process_ais_message(mutable); -@@ -1530,10 +1532,15 @@ route_ais_message(const AIS_Message * msg, gboolean local_origin) - } - - lookup = msg_type2text(dest); -- conn = pcmk_children[dest].async_conn; - -- /* the cluster fails in weird and wonderfully obscure ways when this is not true */ -- AIS_ASSERT(ais_str_eq(lookup, pcmk_children[dest].name)); -+ if (dest == crm_msg_pe && ais_str_eq(pcmk_children[7].name, lookup)) { -+ children_index = 7; -+ -+ } else { -+ children_index = dest; -+ } -+ -+ conn = pcmk_children[children_index].async_conn; - - if (mutable->header.id == service_id) { - mutable->header.id = 0; /* reset this back to zero for IPC messages */ -@@ -1712,6 +1719,42 @@ ais_remove_peer(char *node_id) - } - } - -+void -+ais_remove_peer_by_name(const char *node_name) -+{ -+ GHashTableIter iter; -+ gpointer key = 0; -+ crm_node_t *node = NULL; -+ GList *node_list = NULL; -+ -+ g_hash_table_iter_init(&iter, membership_list); -+ -+ while (g_hash_table_iter_next(&iter, &key, (void **)&node)) { -+ if (ais_str_eq(node_name, node->uname)) { -+ uint32_t node_id = GPOINTER_TO_UINT(key); -+ char *node_id_s = NULL; -+ -+ ais_malloc0(node_id_s, 32); -+ snprintf(node_id_s, 31, "%u", node_id); -+ node_list = g_list_append(node_list, node_id_s); -+ } -+ } -+ -+ if (node_list) { -+ GList *gIter = NULL; -+ -+ for (gIter = node_list; gIter != NULL; gIter = gIter->next) { -+ char *node_id_s = gIter->data; -+ -+ ais_remove_peer(node_id_s); -+ } -+ g_list_free_full(node_list, free); -+ -+ } else { -+ ais_warn("Peer %s is unkown", node_name); -+ } -+} -+ - gboolean - process_ais_message(const AIS_Message * msg) - { -@@ -1728,7 +1771,7 @@ process_ais_message(const AIS_Message * msg) - if (data && len > 12 && strncmp("remove-peer:", data, 12) == 0) { - char *node = data + 12; - -- ais_remove_peer(node); -+ ais_remove_peer_by_name(node); - } - - ais_free(data); -diff --git a/lib/ais/utils.c b/lib/ais/utils.c -index a9774ad..465e381 100644 ---- a/lib/ais/utils.c -+++ b/lib/ais/utils.c -@@ -1,16 +1,16 @@ - /* - * Copyright (C) 2004 Andrew Beekhof -- * -+ * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. -- * -+ * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. -- * -+ * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -@@ -63,7 +63,7 @@ log_ais_message(int level, const AIS_Message * msg) - } - - /* --static gboolean ghash_find_by_uname(gpointer key, gpointer value, gpointer user_data) -+static gboolean ghash_find_by_uname(gpointer key, gpointer value, gpointer user_data) - { - crm_node_t *node = value; - int id = GPOINTER_TO_INT(user_data); -@@ -140,6 +140,7 @@ spawn_child(crm_child_t * child) - { - int lpc = 0; - uid_t uid = 0; -+ gid_t gid = 0; - struct rlimit oflimits; - gboolean use_valgrind = FALSE; - gboolean use_callgrind = FALSE; -@@ -174,10 +175,11 @@ spawn_child(crm_child_t * child) - } - - if (child->uid) { -- if (pcmk_user_lookup(child->uid, &uid, NULL) < 0) { -+ if (pcmk_user_lookup(child->uid, &uid, &gid) < 0) { - ais_err("Invalid uid (%s) specified for %s", child->uid, child->name); - return FALSE; - } -+ ais_info("Using uid=%u and group=%u for process %s", uid, gid, child->name); - } - - child->pid = fork(); -@@ -206,22 +208,8 @@ spawn_child(crm_child_t * child) - } - opts_default[0] = ais_strdup(child->command);; - --#if 0 -- /* Dont set the group for now - it prevents connection to the cluster */ -- if (gid && setgid(gid) < 0) { -- ais_perror("Could not set group to %d", gid); -- } --#endif -- -- if (uid) { -- struct passwd *pwent = getpwuid(uid); -- -- if (pwent == NULL) { -- ais_perror("Cannot get password entry of uid: %d", uid); -- -- } else if (initgroups(pwent->pw_name, pwent->pw_gid) < 0) { -- ais_perror("Cannot initalize groups for %s (uid=%d)", pwent->pw_name, uid); -- } -+ if (uid && initgroups(child->uid, gid) < 0) { -+ ais_perror("Cannot initalize groups for %s", child->uid); - } - - if (uid && setuid(uid) < 0) { -diff --git a/lib/cib/Makefile.am b/lib/cib/Makefile.am -index dacac2b..13cd596 100644 ---- a/lib/cib/Makefile.am -+++ b/lib/cib/Makefile.am -@@ -32,9 +32,8 @@ if ENABLE_ACL - libcib_la_SOURCES += cib_acl.c - endif - --libcib_la_LDFLAGS = -version-info 2:1:0 $(top_builddir)/lib/common/libcrmcommon.la $(CRYPTOLIB) \ -- $(top_builddir)/lib/pengine/libpe_rules.la -- -+libcib_la_LDFLAGS = -version-info 3:1:0 -L$(top_builddir)/lib/pengine/.libs -+libcib_la_LIBADD = $(CRYPTOLIB) $(top_builddir)/lib/pengine/libpe_rules.la $(top_builddir)/lib/common/libcrmcommon.la - libcib_la_CFLAGS = -I$(top_srcdir) - - clean-generic: -diff --git a/lib/cib/cib_acl.c b/lib/cib/cib_acl.c -index 72dd6f2..1000345 100644 ---- a/lib/cib/cib_acl.c -+++ b/lib/cib/cib_acl.c -@@ -1,16 +1,16 @@ --/* -+/* - * Copyright (C) 2009 Yan Gao -- * -+ * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. -- * -+ * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. -- * -+ * - * You should have received a copy of the GNU General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -@@ -518,6 +518,7 @@ search_xml_children(GListPtr * children, xmlNode * root, - static int - search_xpath_objects(GListPtr * objects, xmlNode * xml_obj, const char *xpath) - { -+ int lpc = 0, max = 0; - int match_found = 0; - xmlXPathObjectPtr xpathObj = NULL; - -@@ -526,28 +527,24 @@ search_xpath_objects(GListPtr * objects, xmlNode * xml_obj, const char *xpath) - } - - xpathObj = xpath_search(xml_obj, xpath); -+ max = numXpathResults(xpathObj); - -- if (xpathObj == NULL || xpathObj->nodesetval == NULL || xpathObj->nodesetval->nodeNr < 1) { -+ if (max <= 0) { - crm_debug("No match for %s in %s", xpath, xmlGetNodePath(xml_obj)); -+ } - -- } else if (xpathObj->nodesetval->nodeNr > 0) { -- int lpc = 0, max = xpathObj->nodesetval->nodeNr; -- -- for (lpc = 0; lpc < max; lpc++) { -- xmlNode *match = getXpathResult(xpathObj, lpc); -- -- if (match == NULL) { -- continue; -- } -+ for (lpc = 0; lpc < max; lpc++) { -+ xmlNode *match = getXpathResult(xpathObj, lpc); - -- *objects = g_list_append(*objects, match); -- match_found++; -+ if (match == NULL) { -+ continue; - } -- } - -- if (xpathObj) { -- xmlXPathFreeObject(xpathObj); -+ *objects = g_list_append(*objects, match); -+ match_found++; - } -+ -+ freeXpathObject(xpathObj); - return match_found; - } - -diff --git a/lib/cib/cib_attrs.c b/lib/cib/cib_attrs.c -index 5604d20..4af077c 100644 ---- a/lib/cib/cib_attrs.c -+++ b/lib/cib/cib_attrs.c -@@ -138,7 +138,7 @@ find_nvpair_attr_delegate(cib_t * the_cib, const char *attr, const char *section - if (xml_has_children(xml_search)) { - xmlNode *child = NULL; - -- rc = -EINVAL; -+ rc = -ENOTUNIQ; - attr_msg(LOG_WARNING, "Multiple attributes match name=%s", attr_name); - - for (child = __xml_first_child(xml_search); child != NULL; child = __xml_next(child)) { -@@ -180,7 +180,7 @@ update_attr_delegate(cib_t * the_cib, int call_options, - CRM_CHECK(attr_name != NULL || attr_id != NULL, return -EINVAL); - - rc = find_nvpair_attr_delegate(the_cib, XML_ATTR_ID, section, node_uuid, set_type, set_name, -- attr_id, attr_name, FALSE, &local_attr_id, user_name); -+ attr_id, attr_name, to_console, &local_attr_id, user_name); - if (rc == pcmk_ok) { - attr_id = local_attr_id; - goto do_modify; -@@ -196,6 +196,7 @@ update_attr_delegate(cib_t * the_cib, int call_options, - const char *node_type = NULL; - xmlNode *cib_top = NULL; - -+ crm_trace("%s does not exist, create it", attr_name); - rc = cib_internal_op(the_cib, CIB_OP_QUERY, NULL, "/cib", NULL, &cib_top, - cib_sync_call | cib_scope_local | cib_xpath | cib_no_children, - user_name); -@@ -413,8 +414,43 @@ delete_attr_delegate(cib_t * the_cib, int options, - return rc; - } - --int --query_node_uuid(cib_t * the_cib, const char *uname, char **uuid) -+static int -+get_remote_node_uuid(cib_t * the_cib, const char *uname, char **uuid) -+{ -+#define REMOTE_NODE_XPATH "//nvpair[@name='remote-node'][@value='%s']" -+#define REMOTE_NODE_XPATH2 "//primitive[@type='remote'][@provider='pacemaker'][@id='%s']" -+ int rc = pcmk_ok; -+ char *xpath_string = NULL; -+ size_t len = strlen(REMOTE_NODE_XPATH) + strlen(uname) + 1; -+ xmlNode *xml_search = NULL; -+ -+ xpath_string = calloc(1, len); -+ sprintf(xpath_string, REMOTE_NODE_XPATH, uname); -+ rc = cib_internal_op(the_cib, CIB_OP_QUERY, NULL, xpath_string, NULL, &xml_search, -+ cib_sync_call | cib_scope_local | cib_xpath, NULL); -+ free(xpath_string); -+ free(xml_search); -+ -+ if (rc != pcmk_ok) { -+ len = strlen(REMOTE_NODE_XPATH2) + strlen(uname) + 1; -+ xpath_string = calloc(1, len); -+ sprintf(xpath_string, REMOTE_NODE_XPATH2, uname); -+ rc = cib_internal_op(the_cib, CIB_OP_QUERY, NULL, xpath_string, NULL, &xml_search, -+ cib_sync_call | cib_scope_local | cib_xpath, NULL); -+ -+ free(xpath_string); -+ free(xml_search); -+ } -+ -+ if (rc == pcmk_ok) { -+ *uuid = strdup(uname); -+ } -+ -+ return rc; -+} -+ -+static int -+get_cluster_node_uuid(cib_t * the_cib, const char *uname, char **uuid) - { - int rc = pcmk_ok; - xmlNode *a_child = NULL; -@@ -422,9 +458,6 @@ query_node_uuid(cib_t * the_cib, const char *uname, char **uuid) - xmlNode *fragment = NULL; - const char *child_name = NULL; - -- CRM_ASSERT(uname != NULL); -- CRM_ASSERT(uuid != NULL); -- - rc = the_cib->cmds->query(the_cib, XML_CIB_TAG_NODES, &fragment, - cib_sync_call | cib_scope_local); - if (rc != pcmk_ok) { -@@ -453,13 +486,36 @@ query_node_uuid(cib_t * the_cib, const char *uname, char **uuid) - } - } - -+ free_xml(fragment); -+ return rc; -+} -+ -+int -+query_node_uuid(cib_t * the_cib, const char *uname, char **uuid, int *is_remote_node) -+{ -+ int rc = pcmk_ok; -+ -+ CRM_ASSERT(uname != NULL); -+ CRM_ASSERT(uuid != NULL); -+ -+ rc = get_cluster_node_uuid(the_cib, uname, uuid); -+ if (rc != pcmk_ok) { -+ crm_debug("%s is not a cluster node, checking to see if remote-node", uname); -+ rc = get_remote_node_uuid(the_cib, uname, uuid); -+ if (rc != pcmk_ok) { -+ crm_debug("%s is not a remote node either", uname); -+ -+ } else if (is_remote_node) { -+ *is_remote_node = TRUE; -+ } -+ } -+ - if (rc != pcmk_ok) { - crm_debug("Could not map name=%s to a UUID: %s\n", uname, pcmk_strerror(rc)); - } else { - crm_info("Mapped %s to %s", uname, *uuid); - } - -- free_xml(fragment); - return rc; - } - -diff --git a/lib/cib/cib_client.c b/lib/cib/cib_client.c -index 51b589d..d1eaf87 100644 ---- a/lib/cib/cib_client.c -+++ b/lib/cib/cib_client.c -@@ -396,7 +396,10 @@ cib_new_variant(void) - void - cib_delete(cib_t * cib) - { -- GList *list = cib->notify_list; -+ GList *list = NULL; -+ if(cib) { -+ list = cib->notify_list; -+ } - - while (list != NULL) { - cib_notify_client_t *client = g_list_nth_data(list, 0); -@@ -405,10 +408,14 @@ cib_delete(cib_t * cib) - free(client); - } - -- g_hash_table_destroy(cib_op_callback_table); -- cib_op_callback_table = NULL; -- cib->cmds->free(cib); -- cib = NULL; -+ if(cib_op_callback_table) { -+ g_hash_table_destroy(cib_op_callback_table); -+ cib_op_callback_table = NULL; -+ } -+ -+ if(cib) { -+ cib->cmds->free(cib); -+ } - } - - int -diff --git a/lib/cib/cib_native.c b/lib/cib/cib_native.c -index 1366b4f..9553ba2 100644 ---- a/lib/cib/cib_native.c -+++ b/lib/cib/cib_native.c -@@ -385,8 +385,8 @@ cib_native_perform_op_delegate(cib_t * cib, const char *op, const char *host, co - free_xml(op_msg); - - if (rc < 0) { -- crm_perror(LOG_ERR, "Couldn't perform %s operation (timeout=%ds): %d", op, -- cib->call_timeout, rc); -+ crm_err("Couldn't perform %s operation (timeout=%ds): %s (%d)", op, -+ cib->call_timeout, pcmk_strerror(rc), rc); - rc = -ECOMM; - goto done; - } -diff --git a/lib/cib/cib_ops.c b/lib/cib/cib_ops.c -index 03521da..ceed536 100644 ---- a/lib/cib/cib_ops.c -+++ b/lib/cib/cib_ops.c -@@ -1,16 +1,16 @@ --/* -+/* - * Copyright (C) 2004 Andrew Beekhof -- * -+ * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. -- * -+ * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. -- * -+ * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -@@ -295,8 +295,16 @@ cib_process_delete(const char *op, int options, const char *section, xmlNode * r - } - - obj_root = get_object_root(section, *result_cib); -- if (replace_xml_child(NULL, obj_root, input, TRUE) == FALSE) { -- crm_trace("No matching object to delete"); -+ if(safe_str_eq(crm_element_name(input), section)) { -+ xmlNode *child = NULL; -+ for(child = __xml_first_child(input); child; child = __xml_next(child)) { -+ if (replace_xml_child(NULL, obj_root, child, TRUE) == FALSE) { -+ crm_trace("No matching object to delete: %s=%s", child->name, ID(child)); -+ } -+ } -+ -+ } else if (replace_xml_child(NULL, obj_root, input, TRUE) == FALSE) { -+ crm_trace("No matching object to delete: %s=%s", input->name, ID(input)); - } - - return pcmk_ok; -@@ -614,9 +622,9 @@ cib_process_diff(const char *op, int options, const char *section, xmlNode * req - - } else { - apply_diff = FALSE; -- log_level = LOG_ERR; -+ log_level = LOG_NOTICE; - reason = "+ and - versions in the diff did not change"; -- log_cib_diff(LOG_ERR, input, __FUNCTION__); -+ log_cib_diff(LOG_NOTICE, input, __FUNCTION__); - } - } - -@@ -628,7 +636,7 @@ cib_process_diff(const char *op, int options, const char *section, xmlNode * req - - } else if (apply_diff && diff_del_admin_epoch < this_admin_epoch) { - apply_diff = FALSE; -- log_level = LOG_WARNING; -+ log_level = LOG_NOTICE; - reason = "current \"" XML_ATTR_GENERATION_ADMIN "\" is greater than required"; - - } else if (apply_diff && diff_del_epoch > this_epoch) { -@@ -639,7 +647,7 @@ cib_process_diff(const char *op, int options, const char *section, xmlNode * req - - } else if (apply_diff && diff_del_epoch < this_epoch) { - apply_diff = FALSE; -- log_level = LOG_WARNING; -+ log_level = LOG_NOTICE; - reason = "current \"" XML_ATTR_GENERATION "\" is greater than required"; - - } else if (apply_diff && diff_del_updates > this_updates) { -@@ -650,7 +658,7 @@ cib_process_diff(const char *op, int options, const char *section, xmlNode * req - - } else if (apply_diff && diff_del_updates < this_updates) { - apply_diff = FALSE; -- log_level = LOG_WARNING; -+ log_level = LOG_NOTICE; - reason = "current \"" XML_ATTR_NUMUPDATES "\" is greater than required"; - } - -@@ -658,7 +666,7 @@ cib_process_diff(const char *op, int options, const char *section, xmlNode * req - free_xml(*result_cib); - *result_cib = NULL; - if (apply_xml_diff(existing_cib, input, result_cib) == FALSE) { -- log_level = LOG_NOTICE; -+ log_level = LOG_WARNING; - reason = "Failed application of an update diff"; - - if (options & cib_force_diff) { -@@ -775,6 +783,7 @@ apply_cib_diff(xmlNode * old, xmlNode * diff, xmlNode ** new) - gboolean - cib_config_changed(xmlNode * last, xmlNode * next, xmlNode ** diff) - { -+ int lpc = 0, max = 0; - gboolean config_changes = FALSE; - xmlXPathObject *xpathObj = NULL; - -@@ -789,13 +798,11 @@ cib_config_changed(xmlNode * last, xmlNode * next, xmlNode ** diff) - } - - xpathObj = xpath_search(*diff, "//" XML_CIB_TAG_CONFIGURATION); -- if (xpathObj && xpathObj->nodesetval->nodeNr > 0) { -+ if (numXpathResults(xpathObj) > 0) { - config_changes = TRUE; - goto done; -- -- } else if (xpathObj) { -- xmlXPathFreeObject(xpathObj); - } -+ freeXpathObject(xpathObj); - - /* - * Do not check XML_TAG_DIFF_ADDED "//" XML_TAG_CIB -@@ -803,44 +810,40 @@ cib_config_changed(xmlNode * last, xmlNode * next, xmlNode ** diff) - * every time if the checked value existed - */ - xpathObj = xpath_search(*diff, "//" XML_TAG_DIFF_REMOVED "//" XML_TAG_CIB); -- if (xpathObj) { -- int lpc = 0, max = xpathObj->nodesetval->nodeNr; -+ max = numXpathResults(xpathObj); - -- for (lpc = 0; lpc < max; lpc++) { -- xmlNode *top = getXpathResult(xpathObj, lpc); -+ for (lpc = 0; lpc < max; lpc++) { -+ xmlNode *top = getXpathResult(xpathObj, lpc); - -- if (crm_element_value(top, XML_ATTR_GENERATION) != NULL) { -- config_changes = TRUE; -- goto done; -- } -- if (crm_element_value(top, XML_ATTR_GENERATION_ADMIN) != NULL) { -- config_changes = TRUE; -- goto done; -- } -+ if (crm_element_value(top, XML_ATTR_GENERATION) != NULL) { -+ config_changes = TRUE; -+ goto done; -+ } -+ if (crm_element_value(top, XML_ATTR_GENERATION_ADMIN) != NULL) { -+ config_changes = TRUE; -+ goto done; -+ } - -- if (crm_element_value(top, XML_ATTR_VALIDATION) != NULL) { -- config_changes = TRUE; -- goto done; -- } -- if (crm_element_value(top, XML_ATTR_CRM_VERSION) != NULL) { -- config_changes = TRUE; -- goto done; -- } -- if (crm_element_value(top, "remote-clear-port") != NULL) { -- config_changes = TRUE; -- goto done; -- } -- if (crm_element_value(top, "remote-tls-port") != NULL) { -- config_changes = TRUE; -- goto done; -- } -+ if (crm_element_value(top, XML_ATTR_VALIDATION) != NULL) { -+ config_changes = TRUE; -+ goto done; -+ } -+ if (crm_element_value(top, XML_ATTR_CRM_VERSION) != NULL) { -+ config_changes = TRUE; -+ goto done; -+ } -+ if (crm_element_value(top, "remote-clear-port") != NULL) { -+ config_changes = TRUE; -+ goto done; -+ } -+ if (crm_element_value(top, "remote-tls-port") != NULL) { -+ config_changes = TRUE; -+ goto done; - } - } - - done: -- if (xpathObj) { -- xmlXPathFreeObject(xpathObj); -- } -+ freeXpathObject(xpathObj); - return config_changes; - } - -@@ -874,9 +877,7 @@ cib_process_xpath(const char *op, int options, const char *section, xmlNode * re - xpathObj = xpath_search(*result_cib, section); - } - -- if (xpathObj != NULL && xpathObj->nodesetval != NULL) { -- max = xpathObj->nodesetval->nodeNr; -- } -+ max = numXpathResults(xpathObj); - - if (max < 1 && safe_str_eq(op, CIB_OP_DELETE)) { - crm_debug("%s was already removed", section); -@@ -951,7 +952,10 @@ cib_process_xpath(const char *op, int options, const char *section, xmlNode * re - - path_len += extra; - new_path = malloc(path_len + 1); -- if (id) { -+ if(new_path == NULL) { -+ break; -+ -+ } else if (id) { - snprintf(new_path, path_len + 1, "/%s[@id='%s']%s", parent->name, id, - path ? path : ""); - } else { -@@ -991,10 +995,7 @@ cib_process_xpath(const char *op, int options, const char *section, xmlNode * re - } - } - -- if (xpathObj) { -- xmlXPathFreeObject(xpathObj); -- } -- -+ freeXpathObject(xpathObj); - return rc; - } - -diff --git a/lib/cib/cib_remote.c b/lib/cib/cib_remote.c -index 223695b..8847422 100644 ---- a/lib/cib/cib_remote.c -+++ b/lib/cib/cib_remote.c -@@ -218,9 +218,10 @@ cib_tls_signon(cib_t * cib, crm_remote_t * connection, gboolean event_channel) - connection->tls_session = NULL; - #endif - sock = crm_remote_tcp_connect(private->server, private->port); -- if (sock <= 0) { -+ if (sock < 0) { - crm_perror(LOG_ERR, "remote tcp connection to %s:%d failed", private->server, - private->port); -+ return -ENOTCONN; - } - - connection->tcp_socket = sock; -diff --git a/lib/cib/cib_utils.c b/lib/cib/cib_utils.c -index aa3e08b..6353d1d 100644 ---- a/lib/cib/cib_utils.c -+++ b/lib/cib/cib_utils.c -@@ -5,12 +5,12 @@ - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. -- * -+ * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. -- * -+ * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -@@ -497,7 +497,7 @@ cib_perform_op(const char *op, int call_options, cib_op_t * fn, gboolean is_quer - /* The diff calculation in cib_config_changed() accounts for 25% of the - * CIB's total CPU usage on the DC - * -- * RNG validation on the otherhand, accounts for only 9%... -+ * RNG validation on the otherhand, accounts for only 9%... - */ - *config_changed = cib_config_changed(current_cib, scratch, &local_diff); - -@@ -537,7 +537,7 @@ cib_perform_op(const char *op, int call_options, cib_op_t * fn, gboolean is_quer - - static filter_t filter[] = { - { 0, XML_ATTR_ORIGIN }, -- { 0, XML_CIB_ATTR_WRITTEN }, -+ { 0, XML_CIB_ATTR_WRITTEN }, - { 0, XML_ATTR_UPDATE_ORIG }, - { 0, XML_ATTR_UPDATE_CLIENT }, - { 0, XML_ATTR_UPDATE_USER }, -@@ -881,13 +881,11 @@ cib_internal_config_changed(xmlNode * diff) - } - - xpathObj = xpath_search(diff, config_xpath); -- if (xpathObj && xpathObj->nodesetval->nodeNr > 0) { -+ if (numXpathResults(xpathObj) > 0) { - changed = TRUE; - } - -- if (xpathObj) { -- xmlXPathFreeObject(xpathObj); -- } -+ freeXpathObject(xpathObj); - - return changed; - } -diff --git a/lib/cluster/Makefile.am b/lib/cluster/Makefile.am -index a97cfee..a5a70ff 100644 ---- a/lib/cluster/Makefile.am -+++ b/lib/cluster/Makefile.am -@@ -28,7 +28,7 @@ header_HEADERS = - lib_LTLIBRARIES = libcrmcluster.la - - libcrmcluster_la_SOURCES = cluster.c membership.c --libcrmcluster_la_LDFLAGS = -version-info 3:0:1 $(CLUSTERLIBS) -+libcrmcluster_la_LDFLAGS = -version-info 4:0:0 $(CLUSTERLIBS) - libcrmcluster_la_LIBADD = $(top_builddir)/lib/common/libcrmcommon.la $(top_builddir)/lib/fencing/libstonithd.la - libcrmcluster_la_DEPENDENCIES = $(top_builddir)/lib/common/libcrmcommon.la $(top_builddir)/lib/fencing/libstonithd.la - -diff --git a/lib/cluster/cluster.c b/lib/cluster/cluster.c -index e5b85ea..9538816 100644 ---- a/lib/cluster/cluster.c -+++ b/lib/cluster/cluster.c -@@ -40,11 +40,8 @@ CRM_TRACE_INIT_DATA(cluster); - void *hb_library = NULL; - #endif - --static GHashTable *crm_uuid_cache = NULL; --static GHashTable *crm_uname_cache = NULL; -- - static char * --get_heartbeat_uuid(uint32_t unused, const char *uname) -+get_heartbeat_uuid(const char *uname) - { - char *uuid_calc = NULL; - -@@ -55,6 +52,8 @@ get_heartbeat_uuid(uint32_t unused, const char *uname) - if (heartbeat_cluster == NULL) { - crm_warn("No connection to heartbeat, using uuid=uname"); - return NULL; -+ } else if(uname == NULL) { -+ return NULL; - } - - if (heartbeat_cluster->llc_ops->get_uuid_by_name(heartbeat_cluster, uname, &uuid_raw) == -@@ -104,87 +103,62 @@ get_corosync_id(int id, const char *uuid) - } - - char * --get_corosync_uuid(uint32_t id, const char *uname) -+get_corosync_uuid(crm_node_t *node) - { -- if (!uname_is_uuid() && is_corosync_cluster()) { -- if (id <= 0) { -- /* Try the membership cache... */ -- crm_node_t *node = g_hash_table_lookup(crm_peer_cache, uname); -- -- if (node != NULL) { -- id = node->id; -- } -- } -+ if(node == NULL) { -+ return NULL; - -- if (id > 0) { -+ } else if (!uname_is_uuid() && is_corosync_cluster()) { -+ if (node->id > 0) { - int len = 32; - char *buffer = NULL; - - buffer = calloc(1, (len + 1)); - if (buffer != NULL) { -- snprintf(buffer, len, "%u", id); -+ snprintf(buffer, len, "%u", node->id); - } - - return buffer; - - } else { -- crm_warn("Node %s is not yet known by corosync", uname); -+ crm_info("Node %s is not yet known by corosync", node->uname); - } - -- } else if (uname != NULL) { -- return strdup(uname); -+ } else if (node->uname != NULL) { -+ return strdup(node->uname); - } - - return NULL; - } - --void --set_node_uuid(const char *uname, const char *uuid) --{ -- CRM_CHECK(uuid != NULL, return); -- CRM_CHECK(uname != NULL, return); -- -- if (crm_uuid_cache == NULL) { -- crm_uuid_cache = g_hash_table_new_full(crm_str_hash, g_str_equal, -- g_hash_destroy_str, g_hash_destroy_str); -- } -- -- g_hash_table_insert(crm_uuid_cache, strdup(uname), strdup(uuid)); --} -- - const char * --get_node_uuid(uint32_t id, const char *uname) -+crm_peer_uuid(crm_node_t *peer) - { - char *uuid = NULL; - enum cluster_type_e type = get_cluster_type(); - -- if (crm_uuid_cache == NULL) { -- crm_uuid_cache = g_hash_table_new_full(crm_str_hash, g_str_equal, -- g_hash_destroy_str, g_hash_destroy_str); -- } -- - /* avoid blocking heartbeat calls where possible */ -- if (uname) { -- uuid = g_hash_table_lookup(crm_uuid_cache, uname); -- } -- if (uuid != NULL) { -- return uuid; -+ if(peer == NULL) { -+ return NULL; -+ -+ } else if (peer->uuid) { -+ return peer->uuid; - } - - switch (type) { - case pcmk_cluster_corosync: -- uuid = get_corosync_uuid(id, uname); -+ uuid = get_corosync_uuid(peer); - break; - - case pcmk_cluster_cman: - case pcmk_cluster_classic_ais: -- if (uname) { -- uuid = strdup(uname); -+ if (peer->uname) { -+ uuid = strdup(peer->uname); - } - break; - - case pcmk_cluster_heartbeat: -- uuid = get_heartbeat_uuid(id, uname); -+ uuid = get_heartbeat_uuid(peer->uname); - break; - - case pcmk_cluster_unknown: -@@ -193,18 +167,8 @@ get_node_uuid(uint32_t id, const char *uname) - break; - } - -- if (uuid == NULL) { -- return NULL; -- } -- -- if (uname) { -- g_hash_table_insert(crm_uuid_cache, strdup(uname), uuid); -- return g_hash_table_lookup(crm_uuid_cache, uname); -- } -- -- /* Memory leak! */ -- CRM_LOG_ASSERT(uuid != NULL); -- return uuid; -+ peer->uuid = uuid; -+ return peer->uuid; - } - - gboolean -@@ -321,73 +285,15 @@ send_cluster_message(crm_node_t * node, enum crm_ais_msg_types service, xmlNode - return FALSE; - } - --void --empty_uuid_cache(void) --{ -- if (crm_uuid_cache != NULL) { -- g_hash_table_destroy(crm_uuid_cache); -- crm_uuid_cache = NULL; -- } --} -- --void --unget_uuid(const char *uname) --{ -- if (crm_uuid_cache == NULL) { -- return; -- } -- g_hash_table_remove(crm_uuid_cache, uname); --} -- - const char * --get_uuid(const char *uname) --{ -- return get_node_uuid(0, uname); --} -- --char * - get_local_node_name(void) - { -- char *name = NULL; -- enum cluster_type_e stack = get_cluster_type(); -- -- switch (stack) { -- --#if SUPPORT_CMAN -- case pcmk_cluster_cman: -- name = cman_node_name(0 /* AKA. CMAN_NODEID_US */ ); -- break; --#endif -+ static char *name = NULL; - --#if SUPPORT_COROSYNC --# if !SUPPORT_PLUGIN -- case pcmk_cluster_corosync: -- name = corosync_node_name(0, 0); -- break; --# endif --#endif -- case pcmk_cluster_heartbeat: -- case pcmk_cluster_classic_ais: -- break; -- default: -- crm_err("Unknown cluster type: %s (%d)", name_for_cluster_type(stack), stack); -- } -- -- if (name == NULL) { -- struct utsname res; -- int rc = uname(&res); -- -- if (rc == 0) { -- crm_notice("Defaulting to uname -n for the local %s node name", -- name_for_cluster_type(stack)); -- name = strdup(res.nodename); -- } -- } -- -- if (name == NULL) { -- crm_err("Could not obtain the local %s node name", name_for_cluster_type(stack)); -- crm_exit(100); -+ if(name) { -+ return name; - } -+ name = get_node_name(0); - return name; - } - -@@ -423,6 +329,22 @@ get_node_name(uint32_t nodeid) - crm_err("Unknown cluster type: %s (%d)", name_for_cluster_type(stack), stack); - } - -+ if(name == NULL && nodeid == 0) { -+ struct utsname res; -+ int rc = uname(&res); -+ -+ if (rc == 0) { -+ crm_notice("Defaulting to uname -n for the local %s node name", -+ name_for_cluster_type(stack)); -+ name = strdup(res.nodename); -+ } -+ -+ if (name == NULL) { -+ crm_err("Could not obtain the local %s node name", name_for_cluster_type(stack)); -+ crm_exit(DAEMON_RESPAWN_STOP); -+ } -+ } -+ - if (name == NULL) { - crm_notice("Could not obtain a node name for %s nodeid %u", - name_for_cluster_type(stack), nodeid); -@@ -432,36 +354,43 @@ get_node_name(uint32_t nodeid) - - /* Only used by update_failcount() in te_utils.c */ - const char * --get_uname(const char *uuid) -+crm_peer_uname(const char *uuid) - { -- char *uname = NULL; -- -- if (crm_uname_cache == NULL) { -- crm_uname_cache = g_hash_table_new_full(crm_str_hash, g_str_equal, -- g_hash_destroy_str, g_hash_destroy_str); -- } -+ GHashTableIter iter; -+ crm_node_t *node = NULL; - - CRM_CHECK(uuid != NULL, return NULL); - - /* avoid blocking calls where possible */ -- uname = g_hash_table_lookup(crm_uname_cache, uuid); -- if (uname != NULL) { -- crm_trace("%s = %s (cached)", uuid, uname); -- return uname; -+ g_hash_table_iter_init(&iter, crm_peer_cache); -+ while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) { -+ if(node->uuid && strcasecmp(node->uuid, uuid) == 0) { -+ if(node->uname) { -+ return node->uname; -+ } -+ break; -+ } - } -+ - #if SUPPORT_COROSYNC - if (is_openais_cluster()) { -- if (!uname_is_uuid() && is_corosync_cluster()) { -+ if (uname_is_uuid() == FALSE && is_corosync_cluster()) { - uint32_t id = crm_int_helper(uuid, NULL); -- crm_node_t *node = g_hash_table_lookup(crm_peer_id_cache, GUINT_TO_POINTER(id)); - -- if (node && node->uname) { -- uname = strdup(node->uname); -- } -+ node = crm_get_peer(id, NULL); - - } else { -- uname = strdup(uuid); -+ node = crm_get_peer(0, uuid); -+ } -+ -+ if (node) { -+ crm_info("Setting uuid for node %s[%u] to '%s'", node->uname, node->id, uuid); -+ node->uuid = strdup(uuid); -+ if(node->uname) { -+ return node->uname; -+ } - } -+ return NULL; - } - #endif - -@@ -470,34 +399,41 @@ get_uname(const char *uuid) - if (heartbeat_cluster != NULL) { - cl_uuid_t uuid_raw; - char *uuid_copy = strdup(uuid); -+ char *uname = malloc(MAX_NAME); - - cl_uuid_parse(uuid_copy, &uuid_raw); -- uname = malloc(MAX_NAME); - - if (heartbeat_cluster->llc_ops->get_name_by_uuid(heartbeat_cluster, &uuid_raw, uname, - MAX_NAME) == HA_FAIL) { - crm_err("Could not calculate uname for %s", uuid); -- free(uuid_copy); -- free(uname); -- uname = NULL; -+ } else { -+ node = crm_get_peer(0, uname); -+ } -+ -+ free(uuid_copy); -+ free(uname); -+ } -+ -+ if (node) { -+ crm_info("Setting uuid for node %s to '%s'", node->uname, uuid); -+ node->uuid = strdup(uuid); -+ if(node->uname) { -+ return node->uname; - } - } -+ return NULL; - } - #endif - -- if (uname) { -- crm_trace("Storing %s = %s", uuid, uname); -- g_hash_table_insert(crm_uname_cache, strdup(uuid), uname); -- } -- return uname; -+ return NULL; - } - - void --set_uuid(xmlNode * node, const char *attr, const char *uname) -+set_uuid(xmlNode *xml, const char *attr, crm_node_t *node) - { -- const char *uuid_calc = get_uuid(uname); -+ const char *uuid_calc = crm_peer_uuid(node); - -- crm_xml_add(node, attr, uuid_calc); -+ crm_xml_add(xml, attr, uuid_calc); - return; - } - -@@ -568,7 +504,7 @@ get_cluster_type(void) - - hb = (*new_cluster) ("heartbeat"); - -- crm_debug("Signing in with Heartbeat"); -+ crm_debug("Testing with Heartbeat"); - if (hb->llc_ops->signon(hb, crm_system_name) == HA_OK) { - hb->llc_ops->signoff(hb, FALSE); - -@@ -582,6 +518,7 @@ get_cluster_type(void) - #if SUPPORT_COROSYNC - /* If nothing is defined in the environment, try corosync (if supported) */ - if(cluster == NULL) { -+ crm_debug("Testing with Corosync"); - cluster_type = find_corosync_variant(); - if (cluster_type != pcmk_cluster_unknown) { - detected = TRUE; -@@ -615,6 +552,7 @@ get_cluster_type(void) - - } else { - cluster_type = pcmk_cluster_invalid; -+ goto done; /* Keep the compiler happy when no stacks are supported */ - } - - done: -@@ -624,7 +562,7 @@ get_cluster_type(void) - } else if (cluster_type == pcmk_cluster_invalid) { - crm_notice("This installation does not support the '%s' cluster infrastructure: terminating.", - cluster); -- crm_exit(100); -+ crm_exit(DAEMON_RESPAWN_STOP); - - } else { - crm_info("%s an active '%s' cluster", detected?"Detected":"Assuming", name_for_cluster_type(cluster_type)); -diff --git a/lib/cluster/corosync.c b/lib/cluster/corosync.c -index 02428cb..83a0c78 100644 ---- a/lib/cluster/corosync.c -+++ b/lib/cluster/corosync.c -@@ -52,10 +52,6 @@ quorum_handle_t pcmk_quorum_handle = 0; - - gboolean(*quorum_app_callback) (unsigned long long seq, gboolean quorate) = NULL; - --static char *pcmk_uname = NULL; --static int pcmk_uname_len = 0; --static uint32_t pcmk_nodeid = 0; -- - #define cs_repeat(counter, max, code) do { \ - code; \ - if(rc == CS_ERR_TRY_AGAIN || rc == CS_ERR_QUEUE_FULL) { \ -@@ -67,6 +63,40 @@ static uint32_t pcmk_nodeid = 0; - } \ - } while(counter < max) - -+static uint32_t get_local_nodeid(cpg_handle_t handle) -+{ -+ int rc = CS_OK; -+ int retries = 0; -+ static uint32_t local_nodeid = 0; -+ cpg_handle_t local_handle = handle; -+ cpg_callbacks_t cb = { }; -+ -+ if(local_nodeid != 0) { -+ return local_nodeid; -+ } -+ -+ if(handle == 0) { -+ crm_trace("Creating connection"); -+ cs_repeat(retries, 5, rc = cpg_initialize(&local_handle, &cb)); -+ } -+ -+ if (rc == CS_OK) { -+ retries = 0; -+ crm_trace("Performing lookup"); -+ cs_repeat(retries, 5, rc = cpg_local_get(local_handle, &local_nodeid)); -+ } -+ -+ if (rc != CS_OK) { -+ crm_err("Could not get local node id from the CPG API: %s (%d)", ais_error2text(rc), rc); -+ } -+ if(handle == 0) { -+ crm_trace("Closing connection"); -+ cpg_finalize(local_handle); -+ } -+ crm_debug("Local nodeid is %u", local_nodeid); -+ return local_nodeid; -+} -+ - /* - * CFG functionality stolen from node_name() in corosync-quorumtool.c - * This resolves the first address assigned to a node and returns the name or IP address. -@@ -78,30 +108,11 @@ corosync_node_name(uint64_t /*cmap_handle_t */ cmap_handle, uint32_t nodeid) - int rc = CS_OK; - int retries = 0; - char *name = NULL; -- - cmap_handle_t local_handle = 0; - - /* nodeid == 0 == CMAN_NODEID_US */ -- if (nodeid == 0 && pcmk_nodeid) { -- nodeid = pcmk_nodeid; -- -- } else if (nodeid == 0) { -- /* Look it up */ -- int rc = -1; -- int retries = 0; -- cpg_handle_t handle = 0; -- cpg_callbacks_t cb = { }; -- -- cs_repeat(retries, 5, rc = cpg_initialize(&handle, &cb)); -- if (rc == CS_OK) { -- retries = 0; -- cs_repeat(retries, 5, rc = cpg_local_get(handle, &pcmk_nodeid)); -- } -- -- if (rc != CS_OK) { -- crm_err("Could not get local node id from the CPG API: %d", rc); -- } -- cpg_finalize(handle); -+ if (nodeid == 0) { -+ nodeid = get_local_nodeid(0); - } - - if (cmap_handle == 0 && local_handle == 0) { -@@ -221,19 +232,6 @@ text2msg_type(const char *text) - return type; - } - --static char *ais_cluster_name = NULL; -- --gboolean --crm_get_cluster_name(char **cname) --{ -- CRM_CHECK(cname != NULL, return FALSE); -- if (ais_cluster_name) { -- *cname = strdup(ais_cluster_name); -- return TRUE; -- } -- return FALSE; --} -- - GListPtr cs_message_queue = NULL; - int cs_message_timer = 0; - -@@ -247,6 +245,7 @@ crm_cs_flush_cb(gpointer data) - return FALSE; - } - -+#define CS_SEND_MAX 200 - static ssize_t - crm_cs_flush(void) - { -@@ -256,16 +255,25 @@ crm_cs_flush(void) - static unsigned int last_sent = 0; - - if (pcmk_cpg_handle == 0) { -+ crm_trace("Connection is dead"); - return pcmk_ok; -+ } -+ -+ queue_len = g_list_length(cs_message_queue); -+ if ((queue_len % 1000) == 0 && queue_len > 1) { -+ crm_err("CPG queue has grown to %d", queue_len); -+ -+ } else if (queue_len == CS_SEND_MAX) { -+ crm_warn("CPG queue has grown to %d", queue_len); -+ } - -- } else if (cs_message_timer) { -+ if (cs_message_timer) { - /* There is already a timer, wait until it goes off */ - crm_trace("Timer active %d", cs_message_timer); - return pcmk_ok; - } - -- queue_len = g_list_length(cs_message_queue); -- while (cs_message_queue && sent < 100) { -+ while (cs_message_queue && sent < CS_SEND_MAX) { - AIS_Message *header = NULL; - struct iovec *iov = cs_message_queue->data; - -@@ -294,19 +302,20 @@ crm_cs_flush(void) - - queue_len -= sent; - if (sent > 1 || cs_message_queue) { -- crm_info("Sent %d CPG messages (%d remaining, last=%u): %s", -- sent, queue_len, last_sent, ais_error2text(rc)); -+ crm_info("Sent %d CPG messages (%d remaining, last=%u): %s (%d)", -+ sent, queue_len, last_sent, ais_error2text(rc), rc); - } else { -- crm_trace("Sent %d CPG messages (%d remaining, last=%u): %s", -- sent, queue_len, last_sent, ais_error2text(rc)); -+ crm_trace("Sent %d CPG messages (%d remaining, last=%u): %s (%d)", -+ sent, queue_len, last_sent, ais_error2text(rc), rc); - } - - if (cs_message_queue) { -- if (queue_len % 100 == 0 && queue_len > 99) { -- crm_err("CPG queue has grown to %d", queue_len); -+ uint32_t delay_ms = 100; -+ if(rc != CS_OK) { -+ /* Proportionally more if sending failed but cap at 1s */ -+ delay_ms = QB_MIN(1000, CS_SEND_MAX + (10 * queue_len)); - } -- -- cs_message_timer = g_timeout_add(1000 + 100 * queue_len, crm_cs_flush_cb, NULL); -+ cs_message_timer = g_timeout_add(delay_ms, crm_cs_flush_cb, NULL); - } - - return rc; -@@ -318,6 +327,8 @@ send_ais_text(int class, const char *data, - { - static int msg_id = 0; - static int local_pid = 0; -+ static int local_name_len = 0; -+ static const char *local_name = NULL; - - char *target = NULL; - struct iovec *iov; -@@ -330,6 +341,13 @@ send_ais_text(int class, const char *data, - - CRM_CHECK(dest != crm_msg_ais, return FALSE); - -+ if(local_name == NULL) { -+ local_name = get_local_node_name(); -+ } -+ if(local_name_len == 0 && local_name) { -+ local_name_len = strlen(local_name); -+ } -+ - if (data == NULL) { - data = ""; - } -@@ -368,9 +386,9 @@ send_ais_text(int class, const char *data, - ais_msg->sender.id = 0; - ais_msg->sender.type = sender; - ais_msg->sender.pid = local_pid; -- ais_msg->sender.size = pcmk_uname_len; -+ ais_msg->sender.size = local_name_len; - memset(ais_msg->sender.uname, 0, MAX_NAME); -- memcpy(ais_msg->sender.uname, pcmk_uname, ais_msg->sender.size); -+ memcpy(ais_msg->sender.uname, local_name, ais_msg->sender.size); - - ais_msg->size = 1 + strlen(data); - ais_msg->header.size = sizeof(AIS_Message) + ais_msg->size; -@@ -571,16 +589,20 @@ pcmk_cpg_deliver(cpg_handle_t handle, - uint32_t nodeid, uint32_t pid, void *msg, size_t msg_len) - { - AIS_Message *ais_msg = (AIS_Message *) msg; -+ uint32_t local_nodeid = get_local_nodeid(handle); -+ const char *local_name = get_local_node_name(); - - if (ais_msg->sender.id > 0 && ais_msg->sender.id != nodeid) { - crm_err("Nodeid mismatch from %d.%d: claimed nodeid=%u", nodeid, pid, ais_msg->sender.id); - return; - -- } else if (ais_msg->host.size != 0 && safe_str_neq(ais_msg->host.uname, pcmk_uname)) { -+ } else if (ais_msg->host.id != 0 && (local_nodeid != ais_msg->host.id)) { - /* Not for us */ -+ crm_trace("Not for us: %u != %u", ais_msg->host.id, local_nodeid); - return; -- } else if (ais_msg->host.id != 0 && (pcmk_nodeid != ais_msg->host.id)) { -+ } else if (ais_msg->host.size != 0 && safe_str_neq(ais_msg->host.uname, local_name)) { - /* Not for us */ -+ crm_trace("Not for us: %s != %s", ais_msg->host.uname, local_name); - return; - } - -@@ -615,6 +637,7 @@ pcmk_cpg_membership(cpg_handle_t handle, - int i; - gboolean found = FALSE; - static int counter = 0; -+ uint32_t local_nodeid = get_local_nodeid(handle); - - for (i = 0; i < left_list_entries; i++) { - crm_node_t *peer = crm_get_peer(left_list[i].nodeid, NULL); -@@ -631,14 +654,38 @@ pcmk_cpg_membership(cpg_handle_t handle, - crm_node_t *peer = crm_get_peer(member_list[i].nodeid, NULL); - - crm_info("Member[%d.%d] %s.%u ", counter, i, groupName->value, member_list[i].nodeid); -+ -+ /* Anyone that is sending us CPG messages must also be a _CPG_ member. -+ * But its _not_ safe to assume its in the quorum membership. -+ * We may have just found out its dead and are processing the last couple of messages it sent -+ */ - crm_update_peer_proc(__FUNCTION__, peer, crm_proc_cpg, ONLINESTATUS); -- if (pcmk_nodeid == member_list[i].nodeid) { -+ if(peer && peer->state && crm_is_peer_active(peer) == FALSE) { -+ time_t now = time(NULL); -+ -+ /* Co-opt the otherwise unused votes field */ -+ if(peer->votes == 0) { -+ peer->votes = now; -+ -+ } else if(now > (60 + peer->votes)) { -+ /* On the otherhand, if we're still getting messages, at a certain point -+ * we need to acknowledge our internal cache is probably wrong -+ * -+ * Set the threshold to 1 minute -+ */ -+ crm_err("Node %s[%u] appears to be online even though we think it is dead", peer->uname, peer->id); -+ crm_update_peer_state(__FUNCTION__, peer, CRM_NODE_MEMBER, 0); -+ peer->votes = 0; -+ } -+ } -+ -+ if (local_nodeid == member_list[i].nodeid) { - found = TRUE; - } - } - - if (!found) { -- crm_err("We're not part of CPG group %s anymore!", groupName->value); -+ crm_err("We're not part of CPG group '%s' anymore!", groupName->value); - cpg_evicted = TRUE; - } - -@@ -657,6 +704,7 @@ init_cpg_connection(gboolean(*dispatch) (int kind, const char *from, const char - int rc = -1; - int fd = 0; - int retries = 0; -+ uint32_t id = 0; - crm_node_t *peer = NULL; - - struct mainloop_fd_callbacks cpg_fd_callbacks = { -@@ -674,11 +722,13 @@ init_cpg_connection(gboolean(*dispatch) (int kind, const char *from, const char - goto bail; - } - -- retries = 0; -- cs_repeat(retries, 30, rc = cpg_local_get(pcmk_cpg_handle, (unsigned int *)nodeid)); -- if (rc != CS_OK) { -+ id = get_local_nodeid(pcmk_cpg_handle); -+ if (id == 0) { - crm_err("Could not get local node id from the CPG API"); - goto bail; -+ -+ } else if(nodeid) { -+ *nodeid = id; - } - - retries = 0; -@@ -702,7 +752,7 @@ init_cpg_connection(gboolean(*dispatch) (int kind, const char *from, const char - return FALSE; - } - -- peer = crm_get_peer(pcmk_nodeid, pcmk_uname); -+ peer = crm_get_peer(id, NULL); - crm_update_peer_proc(__FUNCTION__, peer, crm_proc_cpg, ONLINESTATUS); - return TRUE; - } -@@ -722,32 +772,13 @@ pcmk_quorum_dispatch(gpointer user_data) - } - - static void --corosync_mark_unseen_peer_dead(gpointer key, gpointer value, gpointer user_data) --{ -- int *seq = user_data; -- crm_node_t *node = value; -- -- if (node->last_seen != *seq && node->state -- && crm_str_eq(CRM_NODE_LOST, node->state, TRUE) == FALSE) { -- crm_notice("Node %d/%s was not seen in the previous transition", node->id, node->uname); -- crm_update_peer_state(__FUNCTION__, node, CRM_NODE_LOST, 0); -- } --} -- --static void --corosync_mark_node_unseen(gpointer key, gpointer value, gpointer user_data) --{ -- crm_node_t *node = value; -- -- node->last_seen = 0; --} -- --static void - pcmk_quorum_notification(quorum_handle_t handle, - uint32_t quorate, - uint64_t ring_id, uint32_t view_list_entries, uint32_t * view_list) - { - int i; -+ GHashTableIter iter; -+ crm_node_t *node = NULL; - static gboolean init_phase = TRUE; - - if (quorate != crm_have_quorum) { -@@ -766,14 +797,17 @@ pcmk_quorum_notification(quorum_handle_t handle, - } - - init_phase = FALSE; -- g_hash_table_foreach(crm_peer_cache, corosync_mark_node_unseen, NULL); -+ -+ g_hash_table_iter_init(&iter, crm_peer_cache); -+ while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) { -+ node->last_seen = 0; -+ } - - for (i = 0; i < view_list_entries; i++) { - uint32_t id = view_list[i]; - char *name = NULL; -- crm_node_t *node = NULL; - -- crm_debug("Member[%d] %d ", i, id); -+ crm_debug("Member[%d] %u ", i, id); - - node = crm_get_peer(id, NULL); - if (node->uname == NULL) { -@@ -787,7 +821,14 @@ pcmk_quorum_notification(quorum_handle_t handle, - } - - crm_trace("Reaping unseen nodes..."); -- g_hash_table_foreach(crm_peer_cache, corosync_mark_unseen_peer_dead, &ring_id); -+ g_hash_table_iter_init(&iter, crm_peer_cache); -+ while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) { -+ if (node->last_seen != ring_id && node->state) { -+ crm_update_peer_state(__FUNCTION__, node, CRM_NODE_LOST, 0); -+ } else if (node->last_seen != ring_id) { -+ crm_info("State of node %s[%u] is still unknown", node->uname, node->id); -+ } -+ } - - if (quorum_app_callback) { - quorum_app_callback(ring_id, quorate); -@@ -887,6 +928,8 @@ init_cs_connection(crm_cluster_t * cluster) - gboolean - init_cs_connection_once(crm_cluster_t * cluster) - { -+ const char *uuid = NULL; -+ crm_node_t *peer = NULL; - enum cluster_type_e stack = get_cluster_type(); - - crm_peer_init(); -@@ -897,23 +940,30 @@ init_cs_connection_once(crm_cluster_t * cluster) - return FALSE; - } - -- if (init_cpg_connection(cluster->cs_dispatch, cluster->destroy, &pcmk_nodeid) == FALSE) { -+ if (init_cpg_connection(cluster->cs_dispatch, cluster->destroy, NULL) == FALSE) { - return FALSE; - } -- pcmk_uname = get_local_node_name(); - crm_info("Connection to '%s': established", name_for_cluster_type(stack)); - -- CRM_ASSERT(pcmk_uname != NULL); -- pcmk_uname_len = strlen(pcmk_uname); -+ cluster->nodeid = get_local_nodeid(0); -+ if(cluster->nodeid == 0) { -+ crm_err("Could not establish local nodeid"); -+ return FALSE; -+ } - -- if (pcmk_nodeid != 0) { -- /* Ensure the local node always exists */ -- crm_get_peer(pcmk_nodeid, pcmk_uname); -+ cluster->uname = get_node_name(0); -+ if(cluster->uname == NULL) { -+ crm_err("Could not establish local node name"); -+ return FALSE; - } - -- cluster->uuid = get_corosync_uuid(pcmk_nodeid, pcmk_uname); -- cluster->uname = strdup(pcmk_uname); -- cluster->nodeid = pcmk_nodeid; -+ /* Ensure the local node always exists */ -+ peer = crm_get_peer(cluster->nodeid, cluster->uname); -+ uuid = get_corosync_uuid(peer); -+ -+ if(uuid) { -+ cluster->uuid = strdup(uuid); -+ } - - return TRUE; - } -@@ -1069,12 +1119,18 @@ corosync_initialize_nodelist(void *cluster, gboolean force_member, xmlNode * xml - - name = corosync_node_name(cmap_handle, nodeid); - if (name != NULL) { -- crm_node_t *node = g_hash_table_lookup(crm_peer_cache, name); -- -- if (node && node->id != nodeid) { -- crm_crit("Nodes %u and %u share the same name '%s': shutting down", node->id, -- nodeid, name); -- crm_exit(100); -+ GHashTableIter iter; -+ crm_node_t *node = NULL; -+ -+ g_hash_table_iter_init(&iter, crm_peer_cache); -+ while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) { -+ if(node->uname && strcasecmp(node->uname, name) == 0) { -+ if (node && node->id && node->id != nodeid) { -+ crm_crit("Nodes %u and %u share the same name '%s': shutting down", node->id, -+ nodeid, name); -+ crm_exit(DAEMON_RESPAWN_STOP); -+ } -+ } - } - } - -diff --git a/lib/cluster/heartbeat.c b/lib/cluster/heartbeat.c -index 2dda61b..a801c8e 100644 ---- a/lib/cluster/heartbeat.c -+++ b/lib/cluster/heartbeat.c -@@ -1,16 +1,16 @@ --/* -+/* - * Copyright (C) 2004 Andrew Beekhof -- * -+ * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. -- * -+ * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. -- * -+ * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -@@ -110,7 +110,7 @@ convert_ha_field(xmlNode * parent, void *msg_v, int lpc) - memset(uncompressed, 0, size); - used = size - 1; /* always leave room for a trailing '\0' - * BZ2_bzBuffToBuffDecompress wont say anything if -- * the uncompressed data is exactly 'size' bytes -+ * the uncompressed data is exactly 'size' bytes - */ - - rc = BZ2_bzBuffToBuffDecompress(uncompressed, &used, compressed, orig_len, 1, 0); -@@ -300,7 +300,7 @@ convert_xml_child(HA_Message * msg, xmlNode * xml) - ); - crm_debug("rc=%d, used=%d", rc, used); - if (rc != BZ_OK) { -- crm_exit(100); -+ crm_exit(DAEMON_RESPAWN_STOP); - } - crm_debug("Original %s, decompressed %s", buffer, uncompressed); - free(uncompressed); -@@ -368,8 +368,11 @@ crm_update_ccm_node(const oc_ev_membership_t * oc, int offset, const char *state - const char *uuid = NULL; - - CRM_CHECK(oc->m_array[offset].node_uname != NULL, return NULL); -- uuid = get_uuid(oc->m_array[offset].node_uname); -- peer = crm_update_peer(__FUNCTION__, oc->m_array[offset].node_id, -+ -+ peer = crm_get_peer(0, oc->m_array[offset].node_uname); -+ uuid = crm_peer_uuid(peer); -+ -+ crm_update_peer(__FUNCTION__, oc->m_array[offset].node_id, - oc->m_array[offset].node_born_on, seq, -1, 0, - uuid, oc->m_array[offset].node_uname, NULL, state); - -@@ -402,10 +405,13 @@ send_ha_message(ll_cluster_t * hb_conn, xmlNode * xml, const char *node, gboolea - all_is_good = FALSE; - - } else if (node != NULL) { -- if (hb_conn->llc_ops->send_ordered_nodemsg(hb_conn, msg, node) != HA_OK) { -+ char *host_lowercase = g_ascii_strdown(node, -1); -+ -+ if (hb_conn->llc_ops->send_ordered_nodemsg(hb_conn, msg, host_lowercase) != HA_OK) { - all_is_good = FALSE; - crm_err("Send failed"); - } -+ free(host_lowercase); - - } else if (force_ordered) { - if (hb_conn->llc_ops->send_ordered_clustermsg(hb_conn, msg) != HA_OK) { -@@ -482,6 +488,7 @@ ha_msg_dispatch(ll_cluster_t * cluster_conn, gpointer user_data) - gboolean - register_heartbeat_conn(crm_cluster_t * cluster) - { -+ crm_node_t *peer = NULL; - const char *const_uuid = NULL; - const char *const_uname = NULL; - -@@ -516,7 +523,9 @@ register_heartbeat_conn(crm_cluster_t * cluster) - const_uname = cluster->hb_conn->llc_ops->get_mynodeid(cluster->hb_conn); - CRM_CHECK(const_uname != NULL, return FALSE); - -- const_uuid = get_uuid(const_uname); -+ peer = crm_get_peer(0, const_uname); -+ const_uuid = crm_peer_uuid(peer); -+ - CRM_CHECK(const_uuid != NULL, return FALSE); - - crm_info("Hostname: %s", const_uname); -@@ -578,6 +587,7 @@ heartbeat_initialize_nodelist(void *cluster, gboolean force_member, xmlNode * xm - - do { - xmlNode *node = NULL; -+ crm_node_t *peer = NULL; - const char *ha_node_type = NULL; - const char *ha_node_uuid = NULL; - -@@ -592,7 +602,9 @@ heartbeat_initialize_nodelist(void *cluster, gboolean force_member, xmlNode * xm - continue; - } - -- ha_node_uuid = get_uuid(ha_node); -+ peer = crm_get_peer(0, ha_node); -+ ha_node_uuid = crm_peer_uuid(peer); -+ - if (ha_node_uuid == NULL) { - crm_warn("Node %s: no uuid found", ha_node); - continue; -diff --git a/lib/cluster/legacy.c b/lib/cluster/legacy.c -index 05394d9..14749e4 100644 ---- a/lib/cluster/legacy.c -+++ b/lib/cluster/legacy.c -@@ -48,9 +48,6 @@ struct cpg_name pcmk_cpg_group = { - cman_handle_t pcmk_cman_handle = NULL; - #endif - --static char *pcmk_uname = NULL; --static int pcmk_uname_len = 0; --static uint32_t pcmk_nodeid = 0; - int ais_membership_timer = 0; - gboolean ais_membership_force = FALSE; - int ais_dispatch(gpointer user_data); -@@ -101,7 +98,7 @@ text2msg_type(const char *text) - */ - int scan_rc = sscanf(text, "%d", &type); - -- if (scan_rc != 1) { -+ if (scan_rc != 1 || type <= crm_msg_stonith_ng) { - /* Ensure its sane */ - type = crm_msg_none; - } -@@ -140,10 +137,9 @@ int ais_fd_async = -1; /* never send messages via this channel */ - void *ais_ipc_ctx = NULL; - - hdb_handle_t ais_ipc_handle = 0; --static char *ais_cluster_name = NULL; - --gboolean --get_ais_nodeid(uint32_t * id, char **uname) -+static gboolean -+get_ais_details(uint32_t * id, char **uname) - { - struct iovec iov; - int retries = 0; -@@ -151,6 +147,15 @@ get_ais_nodeid(uint32_t * id, char **uname) - cs_ipc_header_response_t header; - struct crm_ais_nodeid_resp_s answer; - -+ static uint32_t local_id = 0; -+ static char *local_uname = NULL; -+ -+ if(local_id) { -+ if(id) *id = local_id; -+ if(uname) *uname = strdup(local_uname); -+ return TRUE; -+ } -+ - header.error = CS_OK; - header.id = crm_class_nodeid; - header.size = sizeof(cs_ipc_header_response_t); -@@ -190,47 +195,225 @@ get_ais_nodeid(uint32_t * id, char **uname) - - crm_info("Server details: id=%u uname=%s cname=%s", answer.id, answer.uname, answer.cname); - -- *id = answer.id; -- *uname = strdup(answer.uname); -- ais_cluster_name = strdup(answer.cname); -+ local_id = answer.id; -+ local_uname = strdup(answer.uname); - -+ if(id) *id = local_id; -+ if(uname) *uname = strdup(local_uname); - return TRUE; - } - --gboolean --crm_get_cluster_name(char **cname) -+static uint32_t get_local_nodeid(cpg_handle_t handle) - { -- CRM_CHECK(cname != NULL, return FALSE); -- if (ais_cluster_name) { -- *cname = strdup(ais_cluster_name); -- return TRUE; -+ int rc = CS_OK; -+ int retries = 0; -+ static uint32_t local_nodeid = 0; -+ cpg_handle_t local_handle = handle; -+ cpg_callbacks_t cb = { }; -+ -+ if(local_nodeid != 0) { -+ return local_nodeid; -+ } -+ -+#if 0 -+ /* Should not be necessary */ -+ if(get_cluster_type() == pcmk_cluster_classic_ais) { -+ get_ais_details(&local_nodeid, NULL); -+ goto done; -+ } -+#endif -+ -+ if(local_handle == 0) { -+ crm_trace("Creating connection"); -+ cs_repeat(retries, 5, rc = cpg_initialize(&local_handle, &cb)); -+ } -+ -+ if (rc == CS_OK) { -+ retries = 0; -+ crm_trace("Performing lookup"); -+ cs_repeat(retries, 5, rc = cpg_local_get(local_handle, &local_nodeid)); - } -+ -+ if (rc != CS_OK) { -+ crm_err("Could not get local node id from the CPG API: %s (%d)", ais_error2text(rc), rc); -+ } -+ -+ if(handle != local_handle) { -+ crm_trace("Closing connection %u", local_handle); -+ cpg_finalize(local_handle); -+ } -+ -+ crm_debug("Local nodeid is %u", local_nodeid); -+ return local_nodeid; -+} -+ -+GListPtr cs_message_queue = NULL; -+int cs_message_timer = 0; -+ -+static ssize_t crm_cs_flush(void); -+ -+static gboolean -+crm_cs_flush_cb(gpointer data) -+{ -+ cs_message_timer = 0; -+ crm_cs_flush(); - return FALSE; - } - -+#define CS_SEND_MAX 200 -+static ssize_t -+crm_cs_flush(void) -+{ -+ int sent = 0; -+ ssize_t rc = 0; -+ int queue_len = 0; -+ static unsigned int last_sent = 0; -+ -+ if (pcmk_cpg_handle == 0) { -+ crm_trace("Connection is dead"); -+ return pcmk_ok; -+ } -+ -+ queue_len = g_list_length(cs_message_queue); -+ if ((queue_len % 1000) == 0 && queue_len > 1) { -+ crm_err("CPG queue has grown to %d", queue_len); -+ -+ } else if (queue_len == CS_SEND_MAX) { -+ crm_warn("CPG queue has grown to %d", queue_len); -+ } -+ -+ if (cs_message_timer) { -+ /* There is already a timer, wait until it goes off */ -+ crm_trace("Timer active %d", cs_message_timer); -+ return pcmk_ok; -+ } -+ -+ while (cs_message_queue && sent < CS_SEND_MAX) { -+ AIS_Message *header = NULL; -+ struct iovec *iov = cs_message_queue->data; -+ -+ errno = 0; -+ rc = cpg_mcast_joined(pcmk_cpg_handle, CPG_TYPE_AGREED, iov, 1); -+ -+ if (rc != CS_OK) { -+ break; -+ } -+ -+ sent++; -+ header = iov->iov_base; -+ last_sent = header->id; -+ if (header->compressed_size) { -+ crm_trace("CPG message %d (%d compressed bytes) sent", -+ header->id, header->compressed_size); -+ } else { -+ crm_trace("CPG message %d (%d bytes) sent: %.200s", -+ header->id, header->size, header->data); -+ } -+ -+ cs_message_queue = g_list_remove(cs_message_queue, iov); -+ free(iov[0].iov_base); -+ free(iov); -+ } -+ -+ queue_len -= sent; -+ if (sent > 1 || cs_message_queue) { -+ crm_info("Sent %d CPG messages (%d remaining, last=%u): %s (%d)", -+ sent, queue_len, last_sent, ais_error2text(rc), rc); -+ } else { -+ crm_trace("Sent %d CPG messages (%d remaining, last=%u): %s (%d)", -+ sent, queue_len, last_sent, ais_error2text(rc), rc); -+ } -+ -+ if (cs_message_queue) { -+ uint32_t delay_ms = 100; -+ if(rc != CS_OK) { -+ /* Proportionally more if sending failed but cap at 1s */ -+ delay_ms = QB_MIN(1000, CS_SEND_MAX + (10 * queue_len)); -+ } -+ cs_message_timer = g_timeout_add(delay_ms, crm_cs_flush_cb, NULL); -+ } -+ -+ return rc; -+} -+ -+static bool -+send_plugin_text(int class, struct iovec *iov) -+{ -+ int rc = CS_OK; -+ int retries = 0; -+ int buf_len = sizeof(cs_ipc_header_response_t); -+ char *buf = malloc(buf_len); -+ AIS_Message *ais_msg = (AIS_Message*)iov[0].iov_base; -+ cs_ipc_header_response_t *header = (cs_ipc_header_response_t *) buf; -+ -+ /* There are only 6 handlers registered to crm_lib_service in plugin.c */ -+ CRM_CHECK(class < 6, crm_err("Invalid message class: %d", class); -+ return FALSE); -+ -+ do { -+ if (rc == CS_ERR_TRY_AGAIN || rc == CS_ERR_QUEUE_FULL) { -+ retries++; -+ crm_info("Peer overloaded or membership in flux:" -+ " Re-sending message (Attempt %d of 20)", retries); -+ sleep(retries); /* Proportional back off */ -+ } -+ -+ errno = 0; -+ rc = coroipcc_msg_send_reply_receive(ais_ipc_handle, iov, 1, buf, buf_len); -+ -+ } while ((rc == CS_ERR_TRY_AGAIN || rc == CS_ERR_QUEUE_FULL) && retries < 20); -+ -+ if (rc == CS_OK) { -+ CRM_CHECK(header->size == sizeof(cs_ipc_header_response_t), -+ crm_err("Odd message: id=%d, size=%d, class=%d, error=%d", -+ header->id, header->size, class, header->error)); -+ -+ CRM_ASSERT(buf_len >= header->size); -+ CRM_CHECK(header->id == CRM_MESSAGE_IPC_ACK, -+ crm_err("Bad response id (%d) for request (%d)", header->id, -+ ais_msg->header.id)); -+ CRM_CHECK(header->error == CS_OK, rc = header->error); -+ -+ } else { -+ crm_perror(LOG_ERR, "Sending plugin message %d FAILED: %s (%d)", -+ ais_msg->id, ais_error2text(rc), rc); -+ } -+ -+ free(iov[0].iov_base); -+ free(iov); -+ free(buf); -+ -+ return (rc == CS_OK); -+} -+ - gboolean - send_ais_text(int class, const char *data, - gboolean local, crm_node_t * node, enum crm_ais_msg_types dest) - { - static int msg_id = 0; - static int local_pid = 0; -- enum cluster_type_e cluster_type = get_cluster_type(); -+ static int local_name_len = 0; -+ static const char *local_name = NULL; - -- int retries = 0; -- int rc = CS_OK; -- int buf_len = sizeof(cs_ipc_header_response_t); -- -- char *buf = NULL; -- struct iovec iov; -- const char *transport = "pcmk"; -- cs_ipc_header_response_t *header = NULL; -+ char *target = NULL; -+ struct iovec *iov; - AIS_Message *ais_msg = NULL; -+ enum cluster_type_e cluster_type = get_cluster_type(); - enum crm_ais_msg_types sender = text2msg_type(crm_system_name); - - /* There are only 6 handlers registered to crm_lib_service in plugin.c */ - CRM_CHECK(class < 6, crm_err("Invalid message class: %d", class); - return FALSE); - -+ CRM_CHECK(dest != crm_msg_ais, return FALSE); -+ -+ if(local_name == NULL) { -+ local_name = get_local_node_name(); -+ } -+ if(local_name_len == 0 && local_name) { -+ local_name_len = strlen(local_name); -+ } -+ - if (data == NULL) { - data = ""; - } -@@ -254,140 +437,80 @@ send_ais_text(int class, const char *data, - - if (node) { - if (node->uname) { -+ target = strdup(node->uname); - ais_msg->host.size = strlen(node->uname); - memset(ais_msg->host.uname, 0, MAX_NAME); - memcpy(ais_msg->host.uname, node->uname, ais_msg->host.size); -+ } else { -+ target = g_strdup_printf("%u", node->id); - } - ais_msg->host.id = node->id; -+ } else { -+ target = strdup("all"); - } - - ais_msg->sender.id = 0; - ais_msg->sender.type = sender; - ais_msg->sender.pid = local_pid; -- ais_msg->sender.size = pcmk_uname_len; -+ ais_msg->sender.size = local_name_len; - memset(ais_msg->sender.uname, 0, MAX_NAME); -- memcpy(ais_msg->sender.uname, pcmk_uname, ais_msg->sender.size); -+ memcpy(ais_msg->sender.uname, local_name, ais_msg->sender.size); - - ais_msg->size = 1 + strlen(data); -+ ais_msg->header.size = sizeof(AIS_Message) + ais_msg->size; - - if (ais_msg->size < CRM_BZ2_THRESHOLD) { -- failback: -- ais_msg = realloc(ais_msg, sizeof(AIS_Message) + ais_msg->size); -+ ais_msg = realloc(ais_msg, ais_msg->header.size); - memcpy(ais_msg->data, data, ais_msg->size); - - } else { - char *compressed = NULL; -+ unsigned int new_size = 0; - char *uncompressed = strdup(data); -- unsigned int len = (ais_msg->size * 1.1) + 600; /* recomended size */ - -- crm_trace("Compressing message payload"); -- compressed = malloc(len); -+ if (crm_compress_string(uncompressed, ais_msg->size, 0, &compressed, &new_size)) { - -- rc = BZ2_bzBuffToBuffCompress(compressed, &len, uncompressed, ais_msg->size, CRM_BZ2_BLOCKS, -- 0, CRM_BZ2_WORK); -+ ais_msg->header.size = sizeof(AIS_Message) + new_size + 1; -+ ais_msg = realloc(ais_msg, ais_msg->header.size); -+ memcpy(ais_msg->data, compressed, new_size); -+ ais_msg->data[new_size] = 0; - -- free(uncompressed); -+ ais_msg->is_compressed = TRUE; -+ ais_msg->compressed_size = new_size; - -- if (rc != BZ_OK) { -- crm_err("Compression failed: %d", rc); -- free(compressed); -- goto failback; -+ } else { -+ ais_msg = realloc(ais_msg, ais_msg->header.size); -+ memcpy(ais_msg->data, data, ais_msg->size); - } - -- ais_msg = realloc(ais_msg, sizeof(AIS_Message) + len + 1); -- memcpy(ais_msg->data, compressed, len); -- ais_msg->data[len] = 0; -+ free(uncompressed); - free(compressed); -- -- ais_msg->is_compressed = TRUE; -- ais_msg->compressed_size = len; -- -- crm_trace("Compression details: %d -> %d", ais_msg->size, ais_data_len(ais_msg)); - } - -- ais_msg->header.size = sizeof(AIS_Message) + ais_data_len(ais_msg); -- -- crm_trace("Sending%s message %d to %s.%s (data=%d, total=%d)", -- ais_msg->is_compressed ? " compressed" : "", -- ais_msg->id, ais_dest(&(ais_msg->host)), msg_type2text(dest), -- ais_data_len(ais_msg), ais_msg->header.size); -- -- iov.iov_base = ais_msg; -- iov.iov_len = ais_msg->header.size; -- buf = realloc(buf, buf_len); -- -- do { -- if (rc == CS_ERR_TRY_AGAIN || rc == CS_ERR_QUEUE_FULL) { -- retries++; -- crm_info("Peer overloaded or membership in flux:" -- " Re-sending message (Attempt %d of 20)", retries); -- sleep(retries); /* Proportional back off */ -- } -- -- errno = 0; -- switch (cluster_type) { -- case pcmk_cluster_corosync: -- CRM_ASSERT(FALSE /*Not supported here */ ); -- break; -- -- case pcmk_cluster_classic_ais: -- rc = coroipcc_msg_send_reply_receive(ais_ipc_handle, &iov, 1, buf, buf_len); -- header = (cs_ipc_header_response_t *) buf; -- if (rc == CS_OK) { -- CRM_CHECK(header->size == sizeof(cs_ipc_header_response_t), -- crm_err("Odd message: id=%d, size=%d, class=%d, error=%d", -- header->id, header->size, class, header->error)); -- -- CRM_ASSERT(buf_len >= header->size); -- CRM_CHECK(header->id == CRM_MESSAGE_IPC_ACK, -- crm_err("Bad response id (%d) for request (%d)", header->id, -- ais_msg->header.id)); -- CRM_CHECK(header->error == CS_OK, rc = header->error); -- } -- break; -- -- case pcmk_cluster_cman: -- transport = "cpg"; -- CRM_CHECK(dest != crm_msg_ais, rc = CS_ERR_MESSAGE_ERROR; -- goto bail); -- rc = cpg_mcast_joined(pcmk_cpg_handle, CPG_TYPE_AGREED, &iov, 1); -- if (rc == CS_ERR_TRY_AGAIN || rc == CS_ERR_QUEUE_FULL) { -- cpg_flow_control_state_t fc_state = CPG_FLOW_CONTROL_DISABLED; -- int rc2 = cpg_flow_control_state_get(pcmk_cpg_handle, &fc_state); -- -- if (rc2 == CS_OK && fc_state == CPG_FLOW_CONTROL_ENABLED) { -- crm_warn("Connection overloaded, cannot send messages"); -- goto bail; -- -- } else if (rc2 != CS_OK) { -- crm_warn("Could not determin the connection state: %s (%d)", -- ais_error2text(rc2), rc2); -- goto bail; -- } -- } -- break; -- -- case pcmk_cluster_unknown: -- case pcmk_cluster_invalid: -- case pcmk_cluster_heartbeat: -- CRM_ASSERT(is_openais_cluster()); -- break; -- } -- -- } while ((rc == CS_ERR_TRY_AGAIN || rc == CS_ERR_QUEUE_FULL) && retries < 20); -- -- bail: -- if (rc != CS_OK) { -- crm_perror(LOG_ERR, "Sending message %d via %s: FAILED (rc=%d): %s", -- ais_msg->id, transport, rc, ais_error2text(rc)); -+ iov = calloc(1, sizeof(struct iovec)); -+ iov->iov_base = ais_msg; -+ iov->iov_len = ais_msg->header.size; - -+ if (ais_msg->compressed_size) { -+ crm_trace("Queueing %s message %u to %s (%d compressed bytes)", -+ cluster_type == pcmk_cluster_classic_ais?"plugin":"CPG", -+ ais_msg->id, target, ais_msg->compressed_size); - } else { -- crm_trace("Message %d: sent", ais_msg->id); -+ crm_trace("Queueing %s message %u to %s (%d bytes)", -+ cluster_type == pcmk_cluster_classic_ais?"plugin":"CPG", -+ ais_msg->id, target, ais_msg->size); - } - -- free(buf); -- free(ais_msg); -- return (rc == CS_OK); -+ /* The plugin is the only time we dont use CPG messaging */ -+ if(cluster_type == pcmk_cluster_classic_ais) { -+ return send_plugin_text(class, iov); -+ } -+ -+ cs_message_queue = g_list_append(cs_message_queue, iov); -+ crm_cs_flush(); -+ -+ free(target); -+ return TRUE; - } - - gboolean -@@ -427,6 +550,7 @@ terminate_cs_connection(void) - if (pcmk_cpg_handle) { - crm_info("Disconnecting CPG"); - if (cpg_leave(pcmk_cpg_handle, &pcmk_cpg_group) == CS_OK) { -+ crm_info("Destroying CPG"); - cpg_finalize(pcmk_cpg_handle); - } - pcmk_cpg_handle = 0; -@@ -441,6 +565,7 @@ terminate_cs_connection(void) - if (pcmk_cman_handle) { - crm_info("Disconnecting cman"); - if (cman_stop_notification(pcmk_cman_handle) >= 0) { -+ crm_info("Destroying cman"); - cman_finish(pcmk_cman_handle); - } - -@@ -635,7 +760,7 @@ ais_destroy(gpointer user_data) - { - crm_err("AIS connection terminated"); - ais_fd_sync = -1; -- crm_exit(1); -+ crm_exit(ENOTCONN); - } - - # if SUPPORT_CMAN -@@ -744,13 +869,6 @@ init_cman_connection(gboolean(*dispatch) (unsigned long long, gboolean), void (* - goto cman_bail; - } - -- rc = cman_get_cluster(pcmk_cman_handle, &cluster); -- if (rc < 0) { -- crm_err("Couldn't query cman cluster details: %d %d", rc, errno); -- goto cman_bail; -- } -- ais_cluster_name = strdup(cluster.ci_name); -- - rc = cman_start_notification(pcmk_cman_handle, cman_event_callback); - if (rc < 0) { - crm_err("Couldn't register for cman notifications: %d %d", rc, errno); -@@ -772,7 +890,7 @@ init_cman_connection(gboolean(*dispatch) (unsigned long long, gboolean), void (* - } - # else - crm_err("cman qorum is not supported in this build"); -- crm_exit(100); -+ crm_exit(DAEMON_RESPAWN_STOP); - # endif - return TRUE; - } -@@ -806,15 +924,18 @@ pcmk_cpg_deliver(cpg_handle_t handle, - uint32_t nodeid, uint32_t pid, void *msg, size_t msg_len) - { - AIS_Message *ais_msg = (AIS_Message *) msg; -+ uint32_t local_nodeid = get_local_nodeid(handle); -+ const char *local_name = get_local_node_name(); - - if (ais_msg->sender.id > 0 && ais_msg->sender.id != nodeid) { - crm_err("Nodeid mismatch from %d.%d: claimed nodeid=%u", nodeid, pid, ais_msg->sender.id); - return; - -- } else if (ais_msg->host.size != 0 && safe_str_neq(ais_msg->host.uname, pcmk_uname)) { -+ } else if (ais_msg->host.id != 0 && (local_nodeid != ais_msg->host.id)) { - /* Not for us */ - return; -- } else if (ais_msg->host.id != 0 && (pcmk_nodeid != ais_msg->host.id)) { -+ -+ } else if (ais_msg->host.size != 0 && safe_str_neq(ais_msg->host.uname, local_name)) { - /* Not for us */ - return; - } -@@ -850,6 +971,7 @@ pcmk_cpg_membership(cpg_handle_t handle, - int i; - gboolean found = FALSE; - static int counter = 0; -+ uint32_t local_nodeid = get_local_nodeid(handle); - - for (i = 0; i < left_list_entries; i++) { - crm_node_t *peer = crm_get_peer(left_list[i].nodeid, NULL); -@@ -867,7 +989,7 @@ pcmk_cpg_membership(cpg_handle_t handle, - - crm_info("Member[%d.%d] %s.%u ", counter, i, groupName->value, member_list[i].nodeid); - crm_update_peer_proc(__FUNCTION__, peer, crm_proc_cpg, ONLINESTATUS); -- if (pcmk_nodeid == member_list[i].nodeid) { -+ if (local_nodeid == member_list[i].nodeid) { - found = TRUE; - } - } -@@ -938,12 +1060,12 @@ init_cpg_connection(crm_cluster_t * cluster) - return FALSE; - } - -- peer = crm_get_peer(cluster->nodeid, pcmk_uname); -+ peer = crm_get_peer(cluster->nodeid, NULL); - crm_update_peer_proc(__FUNCTION__, peer, crm_proc_cpg, ONLINESTATUS); - - # else - crm_err("The Corosync CPG API is not supported in this build"); -- crm_exit(100); -+ crm_exit(DAEMON_RESPAWN_STOP); - # endif - return TRUE; - } -@@ -953,7 +1075,7 @@ init_quorum_connection(gboolean(*dispatch) (unsigned long long, gboolean), - void (*destroy) (gpointer)) - { - crm_err("The Corosync quorum API is not supported in this build"); -- crm_exit(100); -+ crm_exit(DAEMON_RESPAWN_STOP); - return TRUE; - } - -@@ -963,7 +1085,7 @@ init_cs_connection_classic(crm_cluster_t * cluster) - int rc; - int pid = 0; - char *pid_s = NULL; -- struct utsname name; -+ const char *name = NULL; - - struct mainloop_fd_callbacks ais_fd_callbacks = { - .dispatch = ais_dispatch, -@@ -1007,22 +1129,18 @@ init_cs_connection_classic(crm_cluster_t * cluster) - send_ais_text(crm_class_cluster, pid_s, TRUE, NULL, crm_msg_ais); - free(pid_s); - -- if (uname(&name) < 0) { -- crm_perror(LOG_ERR, "Could not determin the current host"); -- crm_exit(100); -- } -+ cluster->nodeid = get_local_nodeid(0); - -- get_ais_nodeid(&pcmk_nodeid, &pcmk_uname); -- if (safe_str_neq(name.nodename, pcmk_uname)) { -- crm_crit("Node name mismatch! Corosync supplied %s, our lookup returned %s", -- pcmk_uname, name.nodename); -+ name = get_local_node_name(); -+ get_ais_details(NULL, &(cluster->uname)); -+ if (safe_str_neq(name, cluster->uname)) { -+ crm_crit("Node name mismatch! Corosync supplied %s but our lookup returned %s", -+ cluster->uname, name); - crm_notice - ("Node name mismatches usually occur when assigned automatically by DHCP servers"); -- crm_notice("If this node was part of the cluster with a different name," -- " you will need to remove the old entry with crm_node --remove"); -+ crm_exit(ENOTUNIQ); - } -- -- cluster->nodeid = pcmk_nodeid; -+ - - return TRUE; - } -@@ -1080,10 +1198,9 @@ init_cs_connection(crm_cluster_t * cluster) - int rc = init_cs_connection_once(cluster); - - retries++; -- - switch (rc) { - case CS_OK: -- if (getenv("HA_mcp")) { -+ if (getenv("HA_mcp") && get_cluster_type() != pcmk_cluster_cman) { - xmlNode *poke = create_xml_node(NULL, "poke"); - mainloop_io_t *ipc = - mainloop_add_ipc_client(CRM_SYSTEM_MCP, G_PRIORITY_MEDIUM, 0, -@@ -1144,6 +1261,8 @@ extern int set_cluster_type(enum cluster_type_e type); - gboolean - init_cs_connection_once(crm_cluster_t * cluster) - { -+ const char *uuid = NULL; -+ crm_node_t *peer = NULL; - enum cluster_type_e stack = get_cluster_type(); - - crm_peer_init(); -@@ -1159,7 +1278,7 @@ init_cs_connection_once(crm_cluster_t * cluster) - if (init_cpg_connection(cluster) == FALSE) { - return FALSE; - } -- pcmk_uname = cman_node_name(0 /* CMAN_NODEID_US */ ); -+ cluster->uname = cman_node_name(0 /* CMAN_NODEID_US */ ); - break; - case pcmk_cluster_heartbeat: - crm_info("Could not find an active corosync based cluster"); -@@ -1173,17 +1292,25 @@ init_cs_connection_once(crm_cluster_t * cluster) - - crm_info("Connection to '%s': established", name_for_cluster_type(stack)); - -- CRM_ASSERT(pcmk_uname != NULL); -- pcmk_uname_len = strlen(pcmk_uname); -+ cluster->nodeid = get_local_nodeid(0); -+ if(cluster->nodeid == 0) { -+ crm_err("Could not establish local nodeid"); -+ return FALSE; -+ } - -- pcmk_nodeid = cluster->nodeid; -- if (pcmk_nodeid != 0) { -- /* Ensure the local node always exists */ -- crm_get_peer(pcmk_nodeid, pcmk_uname); -+ cluster->uname = get_node_name(0); -+ if(cluster->uname == NULL) { -+ crm_err("Could not establish local node name"); -+ return FALSE; - } - -- cluster->uuid = get_corosync_uuid(pcmk_nodeid, pcmk_uname); -- cluster->uname = strdup(pcmk_uname); -+ /* Ensure the local node always exists */ -+ peer = crm_get_peer(cluster->nodeid, cluster->uname); -+ uuid = get_corosync_uuid(peer); -+ -+ if(uuid) { -+ cluster->uuid = strdup(uuid); -+ } - - return TRUE; - } -diff --git a/lib/cluster/membership.c b/lib/cluster/membership.c -index b9ca70c..a1e044c 100644 ---- a/lib/cluster/membership.c -+++ b/lib/cluster/membership.c -@@ -32,7 +32,6 @@ - #include - #include - --GHashTable *crm_peer_id_cache = NULL; - GHashTable *crm_peer_cache = NULL; - unsigned long long crm_peer_seq = 0; - gboolean crm_have_quorum = FALSE; -@@ -40,6 +39,9 @@ gboolean crm_have_quorum = FALSE; - gboolean - crm_is_peer_active(const crm_node_t * node) - { -+ if(node == NULL) { -+ return FALSE; -+ } - #if SUPPORT_COROSYNC - if (is_openais_cluster()) { - return crm_is_corosync_peer_active(node); -@@ -80,41 +82,24 @@ guint - reap_crm_member(uint32_t id, const char *name) - { - int matches = 0; -- crm_node_t *node = NULL; -+ crm_node_t search; - -- if (crm_peer_cache == NULL || crm_peer_id_cache == NULL) { -+ if (crm_peer_cache == NULL) { - crm_trace("Nothing to do, cache not initialized"); - return 0; - } - -- if (name) { -- node = g_hash_table_lookup(crm_peer_cache, name); -- } -- -- if (node == NULL && id > 0) { -- node = g_hash_table_lookup(crm_peer_id_cache, GUINT_TO_POINTER(id)); -- } -- -- if (node == NULL) { -- crm_info("Peer %u/%s cannot be purged: does not exist", id, name); -- return 0; -- } -- -- if (crm_is_peer_active(node)) { -- crm_warn("Peer %u/%s cannot be purged: still active", id, name); -+ search.id = id; -+ search.uname = strdup(name); -+ matches = g_hash_table_foreach_remove(crm_peer_cache, crm_reap_dead_member, &search); -+ if(matches) { -+ crm_notice("Purged %d peers with id=%u and/or uname=%s from the membership cache", matches, id, name); - - } else { -- if (g_hash_table_remove(crm_peer_id_cache, GUINT_TO_POINTER(id))) { -- crm_notice("Purged dead peer %u/%s from the uuid cache", id, name); -- -- } else if (id) { -- crm_warn("Peer %u/%s was not found in the ID cache", id, name); -- } -- -- matches = g_hash_table_foreach_remove(crm_peer_cache, crm_reap_dead_member, node); -- crm_notice("Purged %d dead peers with id=%u from the membership cache", matches, id); -+ crm_info("No peers with id=%u and/or uname=%s exist", id, name); - } - -+ free(search.uname); - return matches; - } - -@@ -151,6 +136,7 @@ destroy_crm_node(gpointer data) - free(node->uname); - free(node->state); - free(node->uuid); -+ free(node->expected); - free(node); - } - -@@ -166,11 +152,7 @@ crm_peer_init(void) - - crm_peer_destroy(); - if (crm_peer_cache == NULL) { -- crm_peer_cache = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, destroy_crm_node); -- } -- -- if (crm_peer_id_cache == NULL) { -- crm_peer_id_cache = g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL, NULL); -+ crm_peer_cache = g_hash_table_new_full(crm_str_hash, g_str_equal, free, destroy_crm_node); - } - } - -@@ -178,14 +160,10 @@ void - crm_peer_destroy(void) - { - if (crm_peer_cache != NULL) { -+ crm_trace("Destroying peer cache with %d members", g_hash_table_size(crm_peer_cache)); - g_hash_table_destroy(crm_peer_cache); - crm_peer_cache = NULL; - } -- -- if (crm_peer_id_cache != NULL) { -- g_hash_table_destroy(crm_peer_id_cache); -- crm_peer_id_cache = NULL; -- } - } - - void (*crm_status_callback) (enum crm_status_type, crm_node_t *, const void *) = NULL; -@@ -196,77 +174,136 @@ crm_set_status_callback(void (*dispatch) (enum crm_status_type, crm_node_t *, co - crm_status_callback = dispatch; - } - -+static void crm_dump_peer_hash(int level, const char *caller) -+{ -+ GHashTableIter iter; -+ const char *id = NULL; -+ crm_node_t *node = NULL; -+ -+ g_hash_table_iter_init(&iter, crm_peer_cache); -+ while (g_hash_table_iter_next(&iter, (gpointer *) &id, (gpointer *) &node)) { -+ do_crm_log(level, "%s: Node %u/%s = %p - %s", caller, node->id, node->uname, node, id); -+ } -+} -+ - /* coverity[-alloc] Memory is referenced in one or both hashtables */ - crm_node_t * - crm_get_peer(unsigned int id, const char *uname) - { -+ GHashTableIter iter; - crm_node_t *node = NULL; -+ crm_node_t *by_id = NULL; -+ crm_node_t *by_name = NULL; - - CRM_ASSERT(id > 0 || uname != NULL); - - crm_peer_init(); - -- if (node == NULL && uname != NULL) { -- node = g_hash_table_lookup(crm_peer_cache, uname); -+ if (uname != NULL) { -+ g_hash_table_iter_init(&iter, crm_peer_cache); -+ while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) { -+ if(node->uname && strcasecmp(node->uname, uname) == 0) { -+ crm_trace("Name match: %s = %p", node->uname, node); -+ by_name = node; -+ break; -+ } -+ } -+ } -+ -+ if (id > 0) { -+ g_hash_table_iter_init(&iter, crm_peer_cache); -+ while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) { -+ if(node->id == id) { -+ crm_trace("ID match: %u = %p", node->id, node); -+ by_id = node; -+ break; -+ } -+ } - } - -- if (node == NULL && id > 0) { -- node = g_hash_table_lookup(crm_peer_id_cache, GUINT_TO_POINTER(id)); -+ node = by_id; /* Good default */ -+ if(by_id == by_name) { -+ /* Nothing to do if they match (both NULL counts) */ -+ crm_trace("Consistent: %p for %u/%s", by_id, id, uname); -+ -+ } else if(by_id == NULL && by_name) { -+ crm_trace("Only one: %p for %u/%s", by_name, id, uname); - -- if (node && node->uname && uname) { -- crm_crit("Node %s and %s share the same cluster node id '%u'!", node->uname, uname, id); -+ if(id && by_name->id) { -+ crm_dump_peer_hash(LOG_WARNING, __FUNCTION__); -+ crm_crit("Node %u and %u share the same name '%s'", -+ id, by_name->id, uname); -+ node = NULL; /* Create a new one */ - -- /* NOTE: Calling crm_new_peer() means the entry in -- * crm_peer_id_cache will point to the new entity -- * -- * TO-DO: Replace the old uname instead? -- */ -- node = NULL; -+ } else { -+ node = by_name; -+ } -+ -+ } else if(by_name == NULL && by_id) { -+ crm_trace("Only one: %p for %u/%s", by_id, id, uname); -+ -+ if(uname && by_id->uname) { -+ crm_dump_peer_hash(LOG_WARNING, __FUNCTION__); -+ crm_crit("Node '%s' and '%s' share the same cluster nodeid %u: assuming '%s' is correct", -+ uname, by_id->uname, id, uname); - } -+ -+ } else if(uname && by_id->uname) { -+ crm_warn("Node '%s' and '%s' share the same cluster nodeid: %u", by_id->uname, by_name->uname, id); -+ -+ } else if(id && by_name->id) { -+ crm_warn("Node %u and %u share the same name: '%s'", by_id->id, by_name->id, uname); -+ -+ } else { -+ /* Simple merge */ -+ -+ /* Only corosync based clusters use nodeid's -+ * -+ * The functions that call crm_update_peer_state() only know nodeid -+ * so 'by_id' is authorative when merging -+ * -+ * Same for crm_update_peer_proc() -+ */ -+ crm_dump_peer_hash(LOG_DEBUG, __FUNCTION__); -+ -+ crm_info("Merging %p into %p", by_name, by_id); -+ g_hash_table_remove(crm_peer_cache, by_name); - } - - if (node == NULL) { -- crm_debug("Creating entry for node %s/%u", uname, id); -+ char *uniqueid = crm_generate_uuid(); - - node = calloc(1, sizeof(crm_node_t)); - CRM_ASSERT(node); -+ -+ crm_info("Created entry %s/%p for node %s/%u (%d total)", -+ uniqueid, node, uname, id, 1 + g_hash_table_size(crm_peer_cache)); -+ g_hash_table_replace(crm_peer_cache, uniqueid, node); - } - -- if (id > 0 && node->id != id) { -- crm_node_t *old = g_hash_table_lookup(crm_peer_id_cache, GUINT_TO_POINTER(id)); -+ if(id > 0 && uname && (node->id == 0 || node->uname == NULL)) { -+ crm_info("Node %u is now known as %s", id, uname); -+ } - -+ if(id > 0 && node->id == 0) { - node->id = id; -- crm_info("Node %s now has id: %u", crm_str(uname), id); -- if (old && old->state) { -- /* Only corosync based clusters use nodeid's -- * The functions that call crm_update_peer_state() only know nodeid so 'old' is authorative when merging -- * Same for crm_update_peer_proc() -- */ -- crm_update_peer_state(__FUNCTION__, node, old->state, 0); -- crm_update_peer_proc(__FUNCTION__, node, old->processes, NULL); -- } -- g_hash_table_replace(crm_peer_id_cache, GUINT_TO_POINTER(node->id), node); - } - -- if (uname && node->uname == NULL) { -+ if(uname && node->uname == NULL) { - node->uname = strdup(uname); -- if (node->id) { -- crm_info("Node %u is now known as %s", node->id, uname); -- } -- g_hash_table_replace(crm_peer_cache, node->uname, node); - if (crm_status_callback) { - crm_status_callback(crm_status_uname, node, NULL); - } - } - -- if (node && node->uname && node->uuid == NULL) { -- const char *uuid = get_node_uuid(id, node->uname); -+ if(node->uuid == NULL) { -+ const char *uuid = crm_peer_uuid(node); - - if (uuid) { -- node->uuid = strdup(uuid); -- crm_info("Node %u has uuid %s", id, node->uuid); -+ crm_info("Node %u has uuid %s", id, uuid); -+ - } else { -- crm_warn("Cannot obtain a UUID for node %d/%s", id, node->uname); -+ crm_info("Cannot obtain a UUID for node %d/%s", id, node->uname); - } - } - -@@ -292,7 +329,7 @@ crm_update_peer(const char *source, unsigned int id, uint64_t born, uint64_t see - if (node->uuid == NULL) { - if (is_openais_cluster()) { - /* Yes, overrule whatever was passed in */ -- node->uuid = get_corosync_uuid(id, uname); -+ crm_peer_uuid(node); - - } else if (uuid != NULL) { - node->uuid = strdup(uuid); -@@ -342,38 +379,6 @@ crm_update_peer(const char *source, unsigned int id, uint64_t born, uint64_t see - } - - void --crm_update_peer_join(const char *source, crm_node_t * node, enum crm_join_phase phase) --{ -- enum crm_join_phase last = 0; -- -- if(node == NULL) { -- crm_err("%s: Could not set join to %d for NULL", source, phase); -- return; -- } -- -- last = node->join; -- -- if(phase == last) { -- crm_trace("%s: Node %s[%u] - join phase still %u", -- source, node->uname, node->id, last); -- -- } else if (phase <= crm_join_none) { -- node->join = phase; -- crm_info("%s: Node %s[%u] - join phase %u -> %u", -- source, node->uname, node->id, last, phase); -- -- } else if(phase == last + 1) { -- node->join = phase; -- crm_info("%s: Node %s[%u] - join phase %u -> %u", -- source, node->uname, node->id, last, phase); -- } else { -- crm_err("%s: Node %s[%u] - join phase cannot transition from %u to %u", -- source, node->uname, node->id, last, phase); -- -- } --} -- --void - crm_update_peer_proc(const char *source, crm_node_t * node, uint32_t flag, const char *status) - { - uint32_t last = 0; -@@ -470,7 +475,7 @@ crm_update_peer_state(const char *source, crm_node_t * node, const char *state, - } - - if (changed) { -- crm_notice("%s: Node %s[%u] - state is now %s", source, node->uname, node->id, state); -+ crm_notice("%s: Node %s[%u] - state is now %s (was %s)", source, node->uname, node->id, state, last); - if (crm_status_callback) { - crm_status_callback(crm_status_nstate, node, last); - } -diff --git a/lib/common/Makefile.am b/lib/common/Makefile.am -index 318d1fa..87c3f1f 100644 ---- a/lib/common/Makefile.am -+++ b/lib/common/Makefile.am -@@ -33,8 +33,11 @@ lib_LTLIBRARIES = libcrmcommon.la - CFLAGS = $(CFLAGS_COPY:-Wcast-qual=) -fPIC - - libcrmcommon_la_SOURCES = ipc.c utils.c xml.c iso8601.c remote.c mainloop.c logging.c -+if BUILD_CIBSECRETS -+libcrmcommon_la_SOURCES += cib_secrets.c -+endif - --libcrmcommon_la_LDFLAGS = -version-info 4:0:1 -+libcrmcommon_la_LDFLAGS = -version-info 5:0:2 - libcrmcommon_la_LIBADD = -ldl $(GNUTLSLIBS) - libcrmcommon_la_SOURCES += $(top_builddir)/lib/gnu/md5.c - -diff --git a/lib/common/cib_secrets.c b/lib/common/cib_secrets.c -new file mode 100644 -index 0000000..d1b60d3 ---- /dev/null -+++ b/lib/common/cib_secrets.c -@@ -0,0 +1,222 @@ -+/* -+ * cib_secrets.c -+ * -+ * Author: Dejan Muhamedagic -+ * Copyright (c) 2011 SUSE, Attachmate -+ * -+ * This program is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU General Public -+ * License as published by the Free Software Foundation; either -+ * version 2 of the License, or (at your option) any later version. -+ * -+ * This software is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public -+ * License along with this library; if not, write to the Free Software -+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -+ */ -+ -+#include -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include -+ -+#include -+#include -+ -+static int do_replace_secret_params(char *rsc_id, GHashTable *params, gboolean from_legacy_dir); -+static int is_magic_value(char *p); -+static int check_md5_hash(char *hash, char *value); -+static void add_secret_params(gpointer key, gpointer value, gpointer user_data); -+static char *read_local_file(char *local_file); -+ -+#define MAX_VALUE_LEN 255 -+#define MAGIC "lrm://" -+ -+static int -+is_magic_value(char *p) -+{ -+ return !strcmp(p, MAGIC); -+} -+ -+static int -+check_md5_hash(char *hash, char *value) -+{ -+ int rc = FALSE; -+ char *hash2 = NULL; -+ -+ hash2 = crm_md5sum(value); -+ crm_debug("hash: %s, calculated hash: %s", hash, hash2); -+ if (safe_str_eq(hash, hash2)) { -+ rc = TRUE; -+ } -+ -+ free(hash2); -+ return rc; -+} -+ -+static char * -+read_local_file(char *local_file) -+{ -+ FILE *fp = fopen(local_file, "r"); -+ char buf[MAX_VALUE_LEN+1]; -+ char *p; -+ -+ if (!fp) { -+ if (errno != ENOENT) { -+ crm_perror(LOG_ERR, "cannot open %s" , local_file); -+ } -+ return NULL; -+ } -+ -+ if (!fgets(buf, MAX_VALUE_LEN, fp)) { -+ crm_perror(LOG_ERR, "cannot read %s", local_file); -+ return NULL; -+ } -+ -+ /* strip white space */ -+ for (p = buf+strlen(buf)-1; p >= buf && isspace(*p); p--) -+ ; -+ *(p+1) = '\0'; -+ return g_strdup(buf); -+} -+ -+/* -+ * returns 0 on success or no replacements necessary -+ * returns -1 if replacement failed for whatever reasone -+ */ -+ -+int -+replace_secret_params(char *rsc_id, GHashTable *params) -+{ -+ if (do_replace_secret_params(rsc_id, params, FALSE) < 0 -+ && do_replace_secret_params(rsc_id, params, TRUE) < 0) { -+ return -1; -+ } -+ -+ return 0; -+} -+ -+static int -+do_replace_secret_params(char *rsc_id, GHashTable *params, gboolean from_legacy_dir) -+{ -+ char local_file[FILENAME_MAX+1], *start_pname; -+ char hash_file[FILENAME_MAX+1], *hash; -+ GList *secret_params = NULL, *l; -+ char *key, *pvalue, *secret_value; -+ int rc = 0; -+ const char *dir_prefix = NULL; -+ -+ if (params == NULL) { -+ return 0; -+ } -+ -+ if (from_legacy_dir) { -+ dir_prefix = LRM_LEGACY_CIBSECRETS_DIR; -+ -+ } else { -+ dir_prefix = LRM_CIBSECRETS_DIR; -+ } -+ -+ /* secret_params could be cached with the resource; -+ * there are also parameters sent with operations -+ * which cannot be cached -+ */ -+ g_hash_table_foreach(params, add_secret_params, &secret_params); -+ if (!secret_params) { /* none found? */ -+ return 0; -+ } -+ -+ crm_debug("replace secret parameters for resource %s", rsc_id); -+ -+ if (snprintf(local_file, FILENAME_MAX, -+ "%s/%s/", dir_prefix, rsc_id) > FILENAME_MAX) { -+ crm_err("filename size exceeded for resource %s", rsc_id); -+ return -1; -+ } -+ start_pname = local_file + strlen(local_file); -+ -+ for (l = g_list_first(secret_params); l; l = g_list_next(l)) { -+ key = (char *)(l->data); -+ pvalue = g_hash_table_lookup(params, key); -+ if (!pvalue) { /* this cannot really happen */ -+ crm_err("odd, no parameter %s for rsc %s found now", key, rsc_id); -+ continue; -+ } -+ -+ if ((strlen(key) + strlen(local_file)) >= FILENAME_MAX-2) { -+ crm_err("%d: parameter name %s too big", key); -+ rc = -1; -+ continue; -+ } -+ -+ strcpy(start_pname, key); -+ secret_value = read_local_file(local_file); -+ if (!secret_value) { -+ if (from_legacy_dir == FALSE) { -+ crm_debug("secret for rsc %s parameter %s not found in %s. " -+ "will try "LRM_LEGACY_CIBSECRETS_DIR, rsc_id, key, dir_prefix); -+ -+ } else { -+ crm_err("secret for rsc %s parameter %s not found in %s", -+ rsc_id, key, dir_prefix); -+ } -+ rc = -1; -+ continue; -+ } -+ -+ strcpy(hash_file, local_file); -+ if (strlen(hash_file) + 5 > FILENAME_MAX) { -+ crm_err("cannot build such a long name " -+ "for the sign file: %s.sign", hash_file); -+ g_free(secret_value); -+ rc = -1; -+ continue; -+ -+ } else { -+ strncat(hash_file, ".sign", 5); -+ hash = read_local_file(hash_file); -+ if (hash == NULL) { -+ crm_err("md5 sum for rsc %s parameter %s " -+ "cannot be read from %s", rsc_id, key, hash_file); -+ g_free(secret_value); -+ rc = -1; -+ continue; -+ -+ } else if (!check_md5_hash(hash, secret_value)) { -+ crm_err("md5 sum for rsc %s parameter %s " -+ "does not match", rsc_id, key); -+ g_free(secret_value); -+ g_free(hash); -+ rc = -1; -+ continue; -+ } -+ g_free(hash); -+ } -+ g_hash_table_replace(params, g_strdup(key), secret_value); -+ } -+ g_list_free(secret_params); -+ return rc; -+} -+ -+static void -+add_secret_params(gpointer key, gpointer value, gpointer user_data) -+{ -+ GList **lp = (GList **)user_data; -+ -+ if (is_magic_value((char *)value)) { -+ *lp = g_list_append(*lp, (char *)key); -+ } -+} -diff --git a/lib/common/ipc.c b/lib/common/ipc.c -index 88a73c0..2cd42bf 100644 ---- a/lib/common/ipc.c -+++ b/lib/common/ipc.c -@@ -147,16 +147,21 @@ create_reply_adv(xmlNode * original_request, xmlNode * xml_response_data, const - const char *crm_msg_reference = crm_element_value(original_request, F_CRM_REFERENCE); - - if (type == NULL) { -- crm_err("Cannot create new_message," " no message type in original message"); -+ crm_err("Cannot create new_message, no message type in original message"); - CRM_ASSERT(type != NULL); - return NULL; - #if 0 - } else if (strcasecmp(XML_ATTR_REQUEST, type) != 0) { -- crm_err("Cannot create new_message," " original message was not a request"); -+ crm_err("Cannot create new_message, original message was not a request"); - return NULL; - #endif - } - reply = create_xml_node(NULL, __FUNCTION__); -+ if (reply == NULL) { -+ crm_err("Cannot create new_message, malloc failed"); -+ return NULL; -+ } -+ - crm_xml_add(reply, F_CRM_ORIGIN, origin); - crm_xml_add(reply, F_TYPE, T_CRM); - crm_xml_add(reply, F_CRM_VERSION, CRM_FEATURE_SET); -@@ -243,19 +248,22 @@ crm_client_init(void) - void - crm_client_cleanup(void) - { -- if (client_connections == NULL) { -+ if (client_connections != NULL) { - int active = g_hash_table_size(client_connections); - - if (active) { - crm_err("Exiting with %d active connections", active); - } -- g_hash_table_destroy(client_connections); -+ g_hash_table_destroy(client_connections); client_connections = NULL; - } - } - - crm_client_t * --crm_client_new(qb_ipcs_connection_t * c, uid_t uid, gid_t gid) -+crm_client_new(qb_ipcs_connection_t * c, uid_t uid_client, gid_t gid_client) - { -+ static uid_t uid_server = 0; -+ static gid_t gid_cluster = 0; -+ - crm_client_t *client = NULL; - - CRM_LOG_ASSERT(c); -@@ -263,6 +271,29 @@ crm_client_new(qb_ipcs_connection_t * c, uid_t uid, gid_t gid) - return NULL; - } - -+ if (gid_cluster == 0) { -+ uid_server = getuid(); -+ if(crm_user_lookup(CRM_DAEMON_USER, NULL, &gid_cluster) < 0) { -+ static bool have_error = FALSE; -+ if(have_error == FALSE) { -+ crm_warn("Could not find group for user %s", CRM_DAEMON_USER); -+ have_error = TRUE; -+ } -+ } -+ } -+ -+ if(gid_cluster != 0 && gid_client != 0) { -+ uid_t best_uid = -1; /* Passing -1 to chown(2) means don't change */ -+ -+ if(uid_client == 0 || uid_server == 0) { /* Someone is priveliged, but the other may not be */ -+ best_uid = QB_MAX(uid_client, uid_server); -+ crm_trace("Allowing user %u to clean up after disconnect", best_uid); -+ } -+ -+ crm_trace("Giving access to group %u", gid_cluster); -+ qb_ipcs_connection_auth_set(c, best_uid, gid_cluster, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP); -+ } -+ - crm_client_init(); - - client = calloc(1, sizeof(crm_client_t)); -@@ -273,19 +304,10 @@ crm_client_new(qb_ipcs_connection_t * c, uid_t uid, gid_t gid) - - client->id = crm_generate_uuid(); - -- crm_info("Connecting %p for uid=%d gid=%d pid=%u id=%s", c, uid, gid, client->pid, client->id); -+ crm_info("Connecting %p for uid=%d gid=%d pid=%u id=%s", c, uid_client, gid_client, client->pid, client->id); - - #if ENABLE_ACL -- { -- struct group *crm_grp = NULL; -- -- crm_grp = getgrnam(CRM_DAEMON_GROUP); -- if (crm_grp) { -- qb_ipcs_connection_auth_set(c, -1, crm_grp->gr_gid, -- S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP); -- } -- client->user = uid2username(uid); -- } -+ client->user = uid2username(uid_client); - #endif - - g_hash_table_insert(client_connections, c, client); -@@ -358,7 +380,7 @@ crm_ipcs_recv(crm_client_t * c, void *data, size_t size, uint32_t * id, uint32_t - struct crm_ipc_response_header *header = data; - - if (id) { -- *id = ((struct qb_ipc_request_header *)data)->id; -+ *id = ((struct qb_ipc_response_header *)data)->id; - } - if (flags) { - *flags = header->flags; -@@ -481,6 +503,7 @@ crm_ipc_prepare(uint32_t request, xmlNode * message, struct iovec ** result) - - CRM_ASSERT(result != NULL); - -+ *result = NULL; - iov = calloc(2, sizeof(struct iovec)); - - crm_ipc_init(); -@@ -583,7 +606,7 @@ crm_ipcs_sendv(crm_client_t * c, struct iovec * iov, enum crm_ipc_server_flags f - crm_trace("Response %d sent, %d bytes to %p[%d]", header->qb.id, rc, c->ipcs, c->pid); - } - -- if (header->flags & crm_ipc_server_free) { -+ if (flags & crm_ipc_server_free) { - free(iov[0].iov_base); - free(iov[1].iov_base); - free(iov); -@@ -608,13 +631,20 @@ crm_ipcs_send(crm_client_t * c, uint32_t request, xmlNode * message, - enum crm_ipc_server_flags flags) - { - struct iovec *iov = NULL; -- ssize_t rc = crm_ipc_prepare(request, message, &iov); -+ ssize_t rc = 0; - -+ if(c == NULL) { -+ return -EDESTADDRREQ; -+ } -+ -+ rc = crm_ipc_prepare(request, message, &iov); - if (rc > 0) { - rc = crm_ipcs_sendv(c, iov, flags | crm_ipc_server_free); - - } else { -- crm_notice("Message to %p[%d] failed: %s (%d)", c->ipcs, c->pid, pcmk_strerror(rc), rc); -+ free(iov); -+ crm_notice("Message to %p[%d] failed: %s (%d)", -+ c->ipcs, c->pid, pcmk_strerror(rc), rc); - } - - return rc; -@@ -820,7 +850,7 @@ crm_ipc_decompress(crm_ipc_t * client) - if (rc != BZ_OK) { - crm_err("Decompression failed: %s (%d)", bz2_strerror(rc), rc); - free(uncompressed); -- return -EREMOTEIO; -+ return -EILSEQ; - } - - CRM_ASSERT((header->size_uncompressed + hdr_offset) >= ipc_buffer_max); -@@ -975,7 +1005,6 @@ crm_ipc_send(crm_ipc_t * client, xmlNode * message, enum crm_ipc_flags flags, in - struct iovec *iov; - static uint32_t id = 0; - struct crm_ipc_response_header *header; -- char *buffer = NULL; - - crm_ipc_init(); - -@@ -995,8 +1024,7 @@ crm_ipc_send(crm_ipc_t * client, xmlNode * message, enum crm_ipc_flags flags, in - if (rc < 0) { - crm_warn("Sending to %s (%p) is disabled until pending reply is recieved", client->name, - client->ipc); -- free(buffer); -- return -EREMOTEIO; -+ return -EALREADY; - - } else { - crm_notice("Lost reply from %s (%p) finally arrived, sending re-enabled", client->name, -@@ -1017,21 +1045,21 @@ crm_ipc_send(crm_ipc_t * client, xmlNode * message, enum crm_ipc_flags flags, in - ms_timeout = 5000; - } - -- crm_trace("Sending from client: %s request id: %d bytes: %u timeout:%d msg: %.200s...", -- client->name, header->qb.id, header->qb.size, ms_timeout, buffer); -+ crm_trace("Sending from client: %s request id: %d bytes: %u timeout:%d msg...", -+ client->name, header->qb.id, header->qb.size, ms_timeout); - - if (ms_timeout > 0) { - - rc = internal_ipc_send_request(client, iov, ms_timeout); - - if (rc <= 0) { -- crm_trace("Failed to send from client %s request %d with %u bytes: %.200s...", -- client->name, header->qb.id, header->qb.size, buffer); -+ crm_trace("Failed to send from client %s request %d with %u bytes...", -+ client->name, header->qb.id, header->qb.size); - goto send_cleanup; - - } else if (is_not_set(flags, crm_ipc_client_response)) { -- crm_trace("Message sent, not waiting for reply to %d from %s to %u bytes: %.200s...", -- header->qb.id, client->name, header->qb.size, buffer); -+ crm_trace("Message sent, not waiting for reply to %d from %s to %u bytes...", -+ header->qb.id, client->name, header->qb.size); - - goto send_cleanup; - } -@@ -1073,16 +1101,16 @@ crm_ipc_send(crm_ipc_t * client, xmlNode * message, enum crm_ipc_flags flags, in - } else if (rc == -ETIMEDOUT) { - crm_warn("Request %d to %s (%p) failed: %s (%ld) after %dms", - header->qb.id, client->name, client->ipc, pcmk_strerror(rc), rc, ms_timeout); -- crm_info("Request was %.120s", buffer); - crm_write_blackbox(0, NULL); - - } else if (rc <= 0) { - crm_warn("Request %d to %s (%p) failed: %s (%ld)", - header->qb.id, client->name, client->ipc, pcmk_strerror(rc), rc); -- crm_info("Request was %.120s", buffer); - } - -- free(buffer); -+ free(header); -+ free(iov[1].iov_base); -+ free(iov); - return rc; - } - -diff --git a/lib/common/logging.c b/lib/common/logging.c -index c3bce72..a1b01f2 100644 ---- a/lib/common/logging.c -+++ b/lib/common/logging.c -@@ -1,16 +1,16 @@ --/* -+/* - * Copyright (C) 2004 Andrew Beekhof -- * -+ * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. -- * -+ * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. -- * -+ * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -@@ -42,6 +42,7 @@ - #include - #include - -+unsigned int crm_log_priority = LOG_NOTICE; - unsigned int crm_log_level = LOG_INFO; - static gboolean crm_tracing_enabled(void); - unsigned int crm_trace_nonlog = 0; -@@ -221,9 +222,8 @@ crm_add_logfile(const char *filename) - return FALSE; /* Nothing to do */ - } - -+ /* Check the parent directory */ - filename_cp = strdup(filename); -- -- /* Check the parent directory and attempt to open */ - parent_dir = dirname(filename_cp); - rc = stat(parent_dir, &parent); - -@@ -231,27 +231,19 @@ crm_add_logfile(const char *filename) - crm_err("Directory '%s' does not exist: logging to '%s' is disabled", parent_dir, filename); - free(filename_cp); - return FALSE; -+ } -+ free(filename_cp); - -- } else if (parent.st_uid == geteuid() && (parent.st_mode & (S_IRUSR | S_IWUSR))) { -- /* all good - user */ -- logfile = fopen(filename, "a"); -- -- } else if (parent.st_gid == getegid() && (parent.st_mode & S_IXGRP)) { -- /* all good - group */ -- logfile = fopen(filename, "a"); -- -- } else { -- crm_err -- ("We (uid=%u, gid=%u) do not have permission to access '%s': logging to '%s' is disabled", -- geteuid(), getegid(), parent_dir, filename); -- free(filename_cp); -+ errno = 0; -+ logfile = fopen(filename, "a"); -+ if(logfile == NULL) { -+ crm_err("%s (%d): Logging to '%s' as uid=%u, gid=%u is disabled", -+ pcmk_strerror(errno), errno, filename, geteuid(), getegid()); - return FALSE; - } -- free(filename_cp); -- filename_cp = NULL; - - /* Check/Set permissions if we're root */ -- if (logfile && geteuid() == 0) { -+ if (geteuid() == 0) { - struct stat st; - uid_t pcmk_uid = 0; - gid_t pcmk_gid = 0; -@@ -265,13 +257,14 @@ crm_add_logfile(const char *filename) - return FALSE; - } - -- crm_user_lookup(CRM_DAEMON_USER, &pcmk_uid, &pcmk_gid); -- if (st.st_gid != pcmk_gid) { -- /* Wrong group */ -- fix = TRUE; -- } else if ((st.st_mode & S_IRWXG) != (S_IRGRP | S_IWGRP)) { -- /* Not read/writable by the correct group */ -- fix = TRUE; -+ if(crm_user_lookup(CRM_DAEMON_USER, &pcmk_uid, &pcmk_gid) == 0) { -+ if (st.st_gid != pcmk_gid) { -+ /* Wrong group */ -+ fix = TRUE; -+ } else if ((st.st_mode & S_IRWXG) != (S_IRGRP | S_IWGRP)) { -+ /* Not read/writable by the correct group */ -+ fix = TRUE; -+ } - } - - if (fix) { -@@ -293,11 +286,9 @@ crm_add_logfile(const char *filename) - } - } - } -- if (logfile) { -- fclose(logfile); -- } - -- /* Now open with libqb */ -+ /* Close and reopen with libqb */ -+ fclose(logfile); - fd = qb_log_file_open(filename); - - if (fd < 0) { -@@ -307,6 +298,7 @@ crm_add_logfile(const char *filename) - - crm_notice("Additional logging available in %s", filename); - qb_log_ctl(fd, QB_LOG_CONF_ENABLED, QB_TRUE); -+ /* qb_log_ctl(fd, QB_LOG_CONF_FILE_SYNC, 1); Turn on synchronous writes */ - - /* Enable callsites */ - crm_update_callsites(); -@@ -320,7 +312,11 @@ static char *blackbox_file_prefix = NULL; - static void - blackbox_logger(int32_t t, struct qb_log_callsite *cs, time_t timestamp, const char *msg) - { -- crm_write_blackbox(0, cs); -+ if(cs && cs->priority < LOG_ERR) { -+ crm_write_blackbox(SIGTRAP, cs); /* Bypass the over-dumping logic */ -+ } else { -+ crm_write_blackbox(0, cs); -+ } - } - - void -@@ -339,10 +335,16 @@ crm_enable_blackbox(int nsig) - qb_log_ctl(QB_LOG_BLACKBOX, QB_LOG_CONF_ENABLED, QB_TRUE); /* Setting the size seems to disable it */ - - crm_notice("Initiated blackbox recorder: %s", blackbox_file_prefix); -+ -+ /* Save to disk on abnormal termination */ - crm_signal(SIGSEGV, crm_trigger_blackbox); -+ crm_signal(SIGABRT, crm_trigger_blackbox); -+ crm_signal(SIGILL, crm_trigger_blackbox); -+ crm_signal(SIGBUS, crm_trigger_blackbox); -+ - crm_update_callsites(); - -- /* Original meanings from signal(7) -+ /* Original meanings from signal(7) - * - * Signal Value Action Comment - * SIGTRAP 5 Core Trace/breakpoint trap -@@ -353,8 +355,8 @@ crm_enable_blackbox(int nsig) - - blackbox_trigger = qb_log_custom_open(blackbox_logger, NULL, NULL, NULL); - qb_log_ctl(blackbox_trigger, QB_LOG_CONF_ENABLED, QB_TRUE); -- crm_info("Trigger: %d is %d %d", blackbox_trigger, -- qb_log_ctl(blackbox_trigger, QB_LOG_CONF_STATE_GET, 0), QB_LOG_STATE_ENABLED); -+ crm_trace("Trigger: %d is %d %d", blackbox_trigger, -+ qb_log_ctl(blackbox_trigger, QB_LOG_CONF_STATE_GET, 0), QB_LOG_STATE_ENABLED); - - crm_update_callsites(); - } -@@ -375,7 +377,6 @@ crm_write_blackbox(int nsig, struct qb_log_callsite *cs) - - switch (nsig) { - case 0: -- case SIGABRT: - case SIGTRAP: - /* The graceful case - such as assertion failure or user request */ - -@@ -461,7 +462,7 @@ crm_log_filter_source(int source, const char *trace_files, const char *trace_fns - } - - } else if (source == QB_LOG_SYSLOG) { /* No tracing to syslog */ -- if (cs->priority <= LOG_NOTICE && cs->priority <= crm_log_level) { -+ if (cs->priority <= crm_log_priority && cs->priority <= crm_log_level) { - qb_bit_set(cs->targets, source); - } - /* Log file tracing options... */ -@@ -584,6 +585,34 @@ crm_tracing_enabled(void) - return FALSE; - } - -+static int -+crm_priority2int(const char *name) -+{ -+ struct syslog_names { -+ const char *name; -+ int priority; -+ }; -+ static struct syslog_names p_names[] = { -+ {"emerg", LOG_EMERG}, -+ {"alert", LOG_ALERT}, -+ {"crit", LOG_CRIT}, -+ {"error", LOG_ERR}, -+ {"warning", LOG_WARNING}, -+ {"notice", LOG_NOTICE}, -+ {"info", LOG_INFO}, -+ {"debug", LOG_DEBUG}, -+ {NULL, -1} -+ }; -+ int lpc; -+ -+ for (lpc = 0; name != NULL && p_names[lpc].name != NULL; lpc++) { -+ if (crm_str_eq(p_names[lpc].name, name, TRUE)) { -+ return p_names[lpc].priority; -+ } -+ } -+ return crm_log_priority; -+} -+ - gboolean - crm_log_init(const char *entity, int level, gboolean daemon, gboolean to_stderr, - int argc, char **argv, gboolean quiet) -@@ -616,18 +645,23 @@ crm_log_init(const char *entity, int level, gboolean daemon, gboolean to_stderr, - } - - if (entity) { -- crm_system_name = entity; -+ free(crm_system_name); -+ crm_system_name = strdup(entity); - - } else if (argc > 0 && argv != NULL) { - char *mutable = strdup(argv[0]); -+ char *modified = basename(mutable); - -- crm_system_name = basename(mutable); -- if (strstr(crm_system_name, "lt-") == crm_system_name) { -- crm_system_name += 3; -+ if (strstr(modified, "lt-") == modified) { -+ modified += 3; - } - -+ free(crm_system_name); -+ crm_system_name = strdup(modified); -+ free(mutable); -+ - } else if (crm_system_name == NULL) { -- crm_system_name = "Unknown"; -+ crm_system_name = strdup("Unknown"); - } - - setenv("PCMK_service", crm_system_name, 1); -@@ -642,6 +676,8 @@ crm_log_init(const char *entity, int level, gboolean daemon, gboolean to_stderr, - to_stderr = TRUE; - } - -+ crm_log_priority = crm_priority2int(daemon_option("logpriority")); -+ - crm_log_level = level; - qb_log_init(crm_system_name, qb_log_facility2int(facility), level); - qb_log_tags_stringify_fn_set(crm_quark_to_string); -@@ -729,6 +765,7 @@ crm_log_init(const char *entity, int level, gboolean daemon, gboolean to_stderr, - mainloop_add_signal(SIGUSR1, crm_enable_blackbox); - } - -+ crm_xml_init(); /* Sets buffer allocation strategy */ - return TRUE; - } - -@@ -822,6 +859,148 @@ crm_log_args(int argc, char **argv) - } - - const char * -+pcmk_errorname(int rc) -+{ -+ int error = ABS(rc); -+ -+ switch (error) { -+ case E2BIG: return "E2BIG"; -+ case EACCES: return "EACCES"; -+ case EADDRINUSE: return "EADDRINUSE"; -+ case EADDRNOTAVAIL: return "EADDRNOTAVAIL"; -+ case EAFNOSUPPORT: return "EAFNOSUPPORT"; -+ case EAGAIN: return "EAGAIN"; -+ case EALREADY: return "EALREADY"; -+ case EBADE: return "EBADE"; -+ case EBADF: return "EBADF"; -+ case EBADFD: return "EBADFD"; -+ case EBADMSG: return "EBADMSG"; -+ case EBADR: return "EBADR"; -+ case EBADRQC: return "EBADRQC"; -+ case EBADSLT: return "EBADSLT"; -+ case EBUSY: return "EBUSY"; -+ case ECANCELED: return "ECANCELED"; -+ case ECHILD: return "ECHILD"; -+ case ECHRNG: return "ECHRNG"; -+ case ECOMM: return "ECOMM"; -+ case ECONNABORTED: return "ECONNABORTED"; -+ case ECONNREFUSED: return "ECONNREFUSED"; -+ case ECONNRESET: return "ECONNRESET"; -+ /* case EDEADLK: return "EDEADLK"; */ -+ case EDEADLOCK: return "EDEADLOCK"; -+ case EDESTADDRREQ: return "EDESTADDRREQ"; -+ case EDOM: return "EDOM"; -+ case EDQUOT: return "EDQUOT"; -+ case EEXIST: return "EEXIST"; -+ case EFAULT: return "EFAULT"; -+ case EFBIG: return "EFBIG"; -+ case EHOSTDOWN: return "EHOSTDOWN"; -+ case EHOSTUNREACH: return "EHOSTUNREACH"; -+ case EIDRM: return "EIDRM"; -+ case EILSEQ: return "EILSEQ"; -+ case EINPROGRESS: return "EINPROGRESS"; -+ case EINTR: return "EINTR"; -+ case EINVAL: return "EINVAL"; -+ case EIO: return "EIO"; -+ case EISCONN: return "EISCONN"; -+ case EISDIR: return "EISDIR"; -+ case EISNAM: return "EISNAM"; -+ case EKEYEXPIRED: return "EKEYEXPIRED"; -+ case EKEYREJECTED: return "EKEYREJECTED"; -+ case EKEYREVOKED: return "EKEYREVOKED"; -+ case EL2HLT: return "EL2HLT"; -+ case EL2NSYNC: return "EL2NSYNC"; -+ case EL3HLT: return "EL3HLT"; -+ case EL3RST: return "EL3RST"; -+ case ELIBACC: return "ELIBACC"; -+ case ELIBBAD: return "ELIBBAD"; -+ case ELIBMAX: return "ELIBMAX"; -+ case ELIBSCN: return "ELIBSCN"; -+ case ELIBEXEC: return "ELIBEXEC"; -+ case ELOOP: return "ELOOP"; -+ case EMEDIUMTYPE: return "EMEDIUMTYPE"; -+ case EMFILE: return "EMFILE"; -+ case EMLINK: return "EMLINK"; -+ case EMSGSIZE: return "EMSGSIZE"; -+ case EMULTIHOP: return "EMULTIHOP"; -+ case ENAMETOOLONG: return "ENAMETOOLONG"; -+ case ENETDOWN: return "ENETDOWN"; -+ case ENETRESET: return "ENETRESET"; -+ case ENETUNREACH: return "ENETUNREACH"; -+ case ENFILE: return "ENFILE"; -+ case ENOBUFS: return "ENOBUFS"; -+ case ENODATA: return "ENODATA"; -+ case ENODEV: return "ENODEV"; -+ case ENOENT: return "ENOENT"; -+ case ENOEXEC: return "ENOEXEC"; -+ case ENOKEY: return "ENOKEY"; -+ case ENOLCK: return "ENOLCK"; -+ case ENOLINK: return "ENOLINK"; -+ case ENOMEDIUM: return "ENOMEDIUM"; -+ case ENOMEM: return "ENOMEM"; -+ case ENOMSG: return "ENOMSG"; -+ case ENONET: return "ENONET"; -+ case ENOPKG: return "ENOPKG"; -+ case ENOPROTOOPT: return "ENOPROTOOPT"; -+ case ENOSPC: return "ENOSPC"; -+ case ENOSR: return "ENOSR"; -+ case ENOSTR: return "ENOSTR"; -+ case ENOSYS: return "ENOSYS"; -+ case ENOTBLK: return "ENOTBLK"; -+ case ENOTCONN: return "ENOTCONN"; -+ case ENOTDIR: return "ENOTDIR"; -+ case ENOTEMPTY: return "ENOTEMPTY"; -+ case ENOTSOCK: return "ENOTSOCK"; -+ /* case ENOTSUP: return "ENOTSUP"; */ -+ case ENOTTY: return "ENOTTY"; -+ case ENOTUNIQ: return "ENOTUNIQ"; -+ case ENXIO: return "ENXIO"; -+ case EOPNOTSUPP: return "EOPNOTSUPP"; -+ case EOVERFLOW: return "EOVERFLOW"; -+ case EPERM: return "EPERM"; -+ case EPFNOSUPPORT: return "EPFNOSUPPORT"; -+ case EPIPE: return "EPIPE"; -+ case EPROTO: return "EPROTO"; -+ case EPROTONOSUPPORT: return "EPROTONOSUPPORT"; -+ case EPROTOTYPE: return "EPROTOTYPE"; -+ case ERANGE: return "ERANGE"; -+ case EREMCHG: return "EREMCHG"; -+ case EREMOTE: return "EREMOTE"; -+ case EREMOTEIO: return "EREMOTEIO"; -+ case ERESTART: return "ERESTART"; -+ case EROFS: return "EROFS"; -+ case ESHUTDOWN: return "ESHUTDOWN"; -+ case ESPIPE: return "ESPIPE"; -+ case ESOCKTNOSUPPORT: return "ESOCKTNOSUPPORT"; -+ case ESRCH: return "ESRCH"; -+ case ESTALE: return "ESTALE"; -+ case ESTRPIPE: return "ESTRPIPE"; -+ case ETIME: return "ETIME"; -+ case ETIMEDOUT: return "ETIMEDOUT"; -+ case ETXTBSY: return "ETXTBSY"; -+ case EUCLEAN: return "EUCLEAN"; -+ case EUNATCH: return "EUNATCH"; -+ case EUSERS: return "EUSERS"; -+ /* case EWOULDBLOCK: return "EWOULDBLOCK"; */ -+ case EXDEV: return "EXDEV"; -+ case EXFULL: return "EXFULL"; -+ -+ case pcmk_err_generic: return "pcmk_err_generic"; -+ case pcmk_err_no_quorum: return "pcmk_err_no_quorum"; -+ case pcmk_err_dtd_validation: return "pcmk_err_dtd_validation"; -+ case pcmk_err_transform_failed: return "pcmk_err_transform_failed"; -+ case pcmk_err_old_data: return "pcmk_err_old_data"; -+ case pcmk_err_diff_failed: return "pcmk_err_diff_failed"; -+ case pcmk_err_diff_resync: return "pcmk_err_diff_resync"; -+ case pcmk_err_cib_modified: return "pcmk_err_cib_modified"; -+ case pcmk_err_cib_backup: return "pcmk_err_cib_backup"; -+ case pcmk_err_cib_save: return "pcmk_err_cib_save"; -+ } -+ return "Unknown"; -+} -+ -+ -+const char * - pcmk_strerror(int rc) - { - int error = rc; -@@ -851,6 +1030,12 @@ pcmk_strerror(int rc) - return "Application of an update diff failed"; - case pcmk_err_diff_resync: - return "Application of an update diff failed, requesting a full refresh"; -+ case pcmk_err_cib_modified: -+ return "The on-disk configuration was manually modified"; -+ case pcmk_err_cib_backup: -+ return "Could not archive the previous configuration"; -+ case pcmk_err_cib_save: -+ return "Could not save the new configuration to disk"; - - /* The following cases will only be hit on systems for which they are non-standard */ - /* coverity[dead_error_condition] False positive on non-Linux */ -@@ -917,17 +1102,20 @@ crm_log_output_fn(const char *file, const char *function, int line, int level, c - const char *next = NULL; - const char *offset = NULL; - -- if (output) { -- next = output; -- do { -- offset = next; -- next = strchrnul(offset, '\n'); -- do_crm_log_alias(level, file, function, line, "%s [ %.*s ]", prefix, -- (int)(next - offset), offset); -- if (next[0] != 0) { -- next++; -- } -- -- } while (next != NULL && next[0] != 0); -+ if (output == NULL) { -+ level = LOG_DEBUG; -+ output = "-- empty --"; - } -+ -+ next = output; -+ do { -+ offset = next; -+ next = strchrnul(offset, '\n'); -+ do_crm_log_alias(level, file, function, line, "%s [ %.*s ]", prefix, -+ (int)(next - offset), offset); -+ if (next[0] != 0) { -+ next++; -+ } -+ -+ } while (next != NULL && next[0] != 0); - } -diff --git a/lib/common/mainloop.c b/lib/common/mainloop.c -index c038174..e95d1d8 100644 ---- a/lib/common/mainloop.c -+++ b/lib/common/mainloop.c -@@ -42,7 +42,7 @@ struct mainloop_child_s { - void *privatedata; - - /* Called when a process dies */ -- void (*callback) (mainloop_child_t * p, int status, int signo, int exitcode); -+ void (*callback) (mainloop_child_t * p, pid_t pid, int core, int signo, int exitcode); - }; - - struct trigger_s { -@@ -112,11 +112,59 @@ crm_trigger_dispatch(GSource * source, GSourceFunc callback, gpointer userdata) - return rc; - } - -+static void -+crm_trigger_finalize(GSource * source) -+{ -+ crm_trace("Trigger %p destroyed", source); -+} -+ -+#if 0 -+struct _GSourceCopy -+{ -+ gpointer callback_data; -+ GSourceCallbackFuncs *callback_funcs; -+ -+ const GSourceFuncs *source_funcs; -+ guint ref_count; -+ -+ GMainContext *context; -+ -+ gint priority; -+ guint flags; -+ guint source_id; -+ -+ GSList *poll_fds; -+ -+ GSource *prev; -+ GSource *next; -+ -+ char *name; -+ -+ void *priv; -+}; -+ -+static int -+g_source_refcount(GSource * source) -+{ -+ /* Duplicating the contents of private header files is a necessary evil */ -+ if (source) { -+ struct _GSourceCopy *evil = (struct _GSourceCopy*)source; -+ return evil->ref_count; -+ } -+ return 0; -+} -+#else -+static int g_source_refcount(GSource * source) -+{ -+ return 0; -+} -+#endif -+ - static GSourceFuncs crm_trigger_funcs = { - crm_trigger_prepare, - crm_trigger_check, - crm_trigger_dispatch, -- NULL -+ crm_trigger_finalize, - }; - - static crm_trigger_t * -@@ -138,7 +186,10 @@ mainloop_setup_trigger(GSource * source, int priority, int (*dispatch) (gpointer - g_source_set_priority(source, priority); - g_source_set_can_recurse(source, FALSE); - -+ crm_trace("Setup %p with ref-count=%u", source, g_source_refcount(source)); - trigger->id = g_source_attach(source, NULL); -+ crm_trace("Attached %p with ref-count=%u", source, g_source_refcount(source)); -+ - return trigger; - } - -@@ -169,17 +220,35 @@ mainloop_add_trigger(int priority, int (*dispatch) (gpointer user_data), gpointe - void - mainloop_set_trigger(crm_trigger_t * source) - { -- source->trigger = TRUE; -+ if(source) { -+ source->trigger = TRUE; -+ } - } - - gboolean - mainloop_destroy_trigger(crm_trigger_t * source) - { -- source->trigger = FALSE; -- if (source->id > 0) { -- g_source_remove(source->id); -- source->id = 0; -+ GSource *gs = NULL; -+ -+ if(source == NULL) { -+ return TRUE; -+ } -+ -+ gs = (GSource *)source; -+ -+ if(g_source_refcount(gs) > 2) { -+ crm_info("Trigger %p is still referenced %u times", gs, g_source_refcount(gs)); - } -+ -+ g_source_destroy(gs); /* Remove from mainloop, ref_count-- */ -+ g_source_unref(gs); /* The caller no longer carries a reference to source -+ * -+ * At this point the source should be free'd, -+ * unless we're currently processing said -+ * source, in which case mainloop holds an -+ * additional reference and it will be free'd -+ * once our processing completes -+ */ - return TRUE; - } - -@@ -197,7 +266,9 @@ crm_signal_dispatch(GSource * source, GSourceFunc callback, gpointer userdata) - { - crm_signal_t *sig = (crm_signal_t *) source; - -- crm_info("Invoking handler for signal %d: %s", sig->signal, strsignal(sig->signal)); -+ if(sig->signal != SIGCHLD) { -+ crm_info("Invoking handler for signal %d: %s", sig->signal, strsignal(sig->signal)); -+ } - - sig->trigger.trigger = FALSE; - if (sig->handler) { -@@ -218,7 +289,7 @@ static GSourceFuncs crm_signal_funcs = { - crm_trigger_prepare, - crm_trigger_check, - crm_signal_dispatch, -- NULL -+ crm_trigger_finalize, - }; - - gboolean -@@ -321,6 +392,7 @@ mainloop_destroy_signal(int sig) - return TRUE; - } - -+ crm_trace("Destroying signal %d", sig); - tmp = crm_signals[sig]; - crm_signals[sig] = NULL; - mainloop_destroy_trigger((crm_trigger_t *) tmp); -@@ -329,6 +401,14 @@ mainloop_destroy_signal(int sig) - - static qb_array_t *gio_map = NULL; - -+void -+mainloop_cleanup(void) -+{ -+ if(gio_map) { -+ qb_array_free(gio_map); -+ } -+} -+ - /* - * libqb... - */ -@@ -591,7 +671,7 @@ mainloop_gio_callback(GIOChannel * gio, GIOCondition condition, gpointer data) - } while (keep && rc > 0 && --max > 0); - - } else { -- crm_trace("New message from %s[%p]", client->name, client); -+ crm_trace("New message from %s[%p] %u", client->name, client, condition); - if (client->dispatch_fn_io) { - if (client->dispatch_fn_io(client->userdata) < 0) { - crm_trace("Connection to %s no longer required", client->name); -@@ -651,29 +731,37 @@ static void - mainloop_gio_destroy(gpointer c) - { - mainloop_io_t *client = c; -+ char *c_name = strdup(client->name); - - /* client->source is valid but about to be destroyed (ref_count == 0) in gmain.c - * client->channel will still have ref_count > 0... should be == 1 - */ -- crm_trace("Destroying client %s[%p] %d", client->name, c, mainloop_gio_refcount(client)); -+ crm_trace("Destroying client %s[%p] %d", c_name, c, mainloop_gio_refcount(client)); - - if (client->ipc) { - crm_ipc_close(client->ipc); - } - - if (client->destroy_fn) { -- client->destroy_fn(client->userdata); -+ void (*destroy_fn) (gpointer userdata) = client->destroy_fn; -+ -+ client->destroy_fn = NULL; -+ destroy_fn(client->userdata); - } - - if (client->ipc) { -- crm_ipc_destroy(client->ipc); -+ crm_ipc_t *ipc = client->ipc; -+ -+ client->ipc = NULL; -+ crm_ipc_destroy(ipc); - } - -- crm_trace("Destroyed client %s[%p] %d", client->name, c, mainloop_gio_refcount(client)); -- free(client->name); -+ crm_trace("Destroyed client %s[%p] %d", c_name, c, mainloop_gio_refcount(client)); - -- memset(client, 0, sizeof(mainloop_io_t)); /* A bit of pointless paranoia */ -+ free(client->name); client->name = NULL; - free(client); -+ -+ free(c_name); - } - - mainloop_io_t * -@@ -770,19 +858,25 @@ mainloop_del_fd(mainloop_io_t * client) - } - - pid_t --mainloop_get_child_pid(mainloop_child_t * child) -+mainloop_child_pid(mainloop_child_t * child) - { - return child->pid; - } - -+const char * -+mainloop_child_name(mainloop_child_t * child) -+{ -+ return child->desc; -+} -+ - int --mainloop_get_child_timeout(mainloop_child_t * child) -+mainloop_child_timeout(mainloop_child_t * child) - { - return child->timeout; - } - - void * --mainloop_get_child_userdata(mainloop_child_t * child) -+mainloop_child_userdata(mainloop_child_t * child) - { - return child->privatedata; - } -@@ -819,70 +913,108 @@ child_timeout_callback(gpointer p) - return FALSE; - } - -+static GListPtr child_list = NULL; -+ - static void --mainloop_child_destroy(mainloop_child_t * child) -+child_death_dispatch(int signal) - { -- if (child->timerid != 0) { -- crm_trace("Removing timer %d", child->timerid); -- g_source_remove(child->timerid); -- child->timerid = 0; -- } -+ GListPtr iter = child_list; - -- free(child->desc); -- g_free(child); --} -+ while(iter) { -+ int rc = 0; -+ int core = 0; -+ int signo = 0; -+ int status = 0; -+ int exitcode = 0; - --static void --child_death_dispatch(GPid pid, gint status, gpointer user_data) --{ -- int signo = 0; -- int exitcode = 0; -- mainloop_child_t *child = user_data; -+ GListPtr saved = NULL; -+ mainloop_child_t *child = iter->data; - -- crm_trace("Managed process %d exited: %p", pid, child); -+ rc = waitpid(child->pid, &status, WNOHANG); -+ if(rc == 0) { -+ iter = iter->next; -+ continue; - -- if (WIFEXITED(status)) { -- exitcode = WEXITSTATUS(status); -- crm_trace("Managed process %d (%s) exited with rc=%d", pid, child->desc, exitcode); -+ } else if(rc != child->pid) { -+ signo = signal; -+ exitcode = 1; -+ status = 1; -+ crm_perror(LOG_ERR, "Call to waitpid(%d) failed", child->pid); - -- } else if (WIFSIGNALED(status)) { -- signo = WTERMSIG(status); -- crm_trace("Managed process %d (%s) exited with signal=%d", pid, child->desc, signo); -- } -+ } else { -+ crm_trace("Managed process %d exited: %p", child->pid, child); -+ -+ if (WIFEXITED(status)) { -+ exitcode = WEXITSTATUS(status); -+ crm_trace("Managed process %d (%s) exited with rc=%d", child->pid, child->desc, exitcode); -+ -+ } else if (WIFSIGNALED(status)) { -+ signo = WTERMSIG(status); -+ crm_trace("Managed process %d (%s) exited with signal=%d", child->pid, child->desc, signo); -+ } - #ifdef WCOREDUMP -- if (WCOREDUMP(status)) { -- crm_err("Managed process %d (%s) dumped core", pid, child->desc); -- } -+ if (WCOREDUMP(status)) { -+ core = 1; -+ crm_err("Managed process %d (%s) dumped core", child->pid, child->desc); -+ } - #endif -+ } - -- if (child->callback) { -- child->callback(child, status, signo, exitcode); -- } -- crm_trace("Removed process entry for %d", pid); -+ if (child->callback) { -+ child->callback(child, child->pid, core, signo, exitcode); -+ } -+ -+ crm_trace("Removing process entry %p for %d", child, child->pid); -+ -+ saved = iter; -+ iter = iter->next; -+ -+ child_list = g_list_remove_link(child_list, saved); -+ g_list_free(saved); - -- mainloop_child_destroy(child); -- return; -+ if (child->timerid != 0) { -+ crm_trace("Removing timer %d", child->timerid); -+ g_source_remove(child->timerid); -+ child->timerid = 0; -+ } -+ free(child->desc); -+ free(child); -+ } - } - - /* Create/Log a new tracked process - * To track a process group, use -pid - */ - void --mainloop_add_child(pid_t pid, int timeout, const char *desc, void *privatedata, -- void (*callback) (mainloop_child_t * p, int status, int signo, int exitcode)) -+mainloop_child_add(pid_t pid, int timeout, const char *desc, void *privatedata, -+ void (*callback) (mainloop_child_t * p, pid_t pid, int core, int signo, int exitcode)) - { -+ static bool need_init = TRUE; - mainloop_child_t *child = g_new(mainloop_child_t, 1); - - child->pid = pid; - child->timerid = 0; - child->timeout = FALSE; -- child->desc = strdup(desc); - child->privatedata = privatedata; - child->callback = callback; - -+ if(desc) { -+ child->desc = strdup(desc); -+ } -+ - if (timeout) { - child->timerid = g_timeout_add(timeout, child_timeout_callback, child); - } - -- child->watchid = g_child_watch_add(pid, child_death_dispatch, child); -+ child_list = g_list_append(child_list, child); -+ -+ if(need_init) { -+ need_init = FALSE; -+ -+ /* Do NOT use g_child_watch_add() and friends, they rely on pthreads */ -+ mainloop_add_signal(SIGCHLD, child_death_dispatch); -+ -+ /* In case they terminated before the signal handler was installed */ -+ child_death_dispatch(SIGCHLD); -+ } - } -diff --git a/lib/common/remote.c b/lib/common/remote.c -index ef198e3..8b00f16 100644 ---- a/lib/common/remote.c -+++ b/lib/common/remote.c -@@ -692,7 +692,7 @@ check_connect_finished(gpointer userdata) - if ((time(NULL) - cb_data->start) < (cb_data->timeout / 1000)) { - goto reschedule; - } -- crm_err("fd %d: timeout during select", sock); -+ crm_debug("fd %d: timeout during select", sock); - rc = -ETIMEDOUT; - goto dispatch_done; - } else { -@@ -728,11 +728,11 @@ check_connect_finished(gpointer userdata) - } else { - close(sock); - } -- free(cb_data); - - if (cb_data->callback) { - cb_data->callback(cb_data->userdata, rc); - } -+ free(cb_data); - return FALSE; - - reschedule: -@@ -748,7 +748,7 @@ internal_tcp_connect_async(int sock, - { - int rc = 0; - int flag = 0; -- int interval = 1000; -+ int interval = 500; - struct tcp_async_cb_data *cb_data = NULL; - - if ((flag = fcntl(sock, F_GETFL)) >= 0) { -@@ -821,12 +821,12 @@ int - crm_remote_tcp_connect_async(const char *host, int port, int timeout, /*ms */ - void *userdata, void (*callback) (void *userdata, int sock)) - { -- struct addrinfo *res; -- struct addrinfo *rp; -+ struct addrinfo *res = NULL; -+ struct addrinfo *rp = NULL; - struct addrinfo hints; - const char *server = host; - int ret_ga; -- int sock; -+ int sock = -1; - - /* getaddrinfo */ - memset(&hints, 0, sizeof(struct addrinfo)); -@@ -843,7 +843,7 @@ crm_remote_tcp_connect_async(const char *host, int port, int timeout, /*ms */ - - if (!res || !res->ai_addr) { - crm_err("getaddrinfo failed"); -- return -1; -+ goto async_cleanup; - } - - for (rp = res; rp != NULL; rp = rp->ai_next) { -@@ -879,7 +879,8 @@ crm_remote_tcp_connect_async(const char *host, int port, int timeout, /*ms */ - if (callback) { - if (internal_tcp_connect_async - (sock, rp->ai_addr, rp->ai_addrlen, timeout, userdata, callback) == 0) { -- return 0; /* Success for now, we'll hear back later in the callback */ -+ sock = 0; -+ goto async_cleanup; /* Success for now, we'll hear back later in the callback */ - } - - } else { -@@ -891,8 +892,12 @@ crm_remote_tcp_connect_async(const char *host, int port, int timeout, /*ms */ - close(sock); - sock = -1; - } -- freeaddrinfo(res); - -+async_cleanup: -+ -+ if (res) { -+ freeaddrinfo(res); -+ } - return sock; - } - -diff --git a/lib/common/utils.c b/lib/common/utils.c -index 503abce..adf0a6b 100644 ---- a/lib/common/utils.c -+++ b/lib/common/utils.c -@@ -47,6 +47,7 @@ - #include - #include - #include -+#include - #include - #include - #include -@@ -71,26 +72,44 @@ CRM_TRACE_INIT_DATA(common); - - gboolean crm_config_error = FALSE; - gboolean crm_config_warning = FALSE; --const char *crm_system_name = "unknown"; -+char *crm_system_name = NULL; - - int node_score_red = 0; - int node_score_green = 0; - int node_score_yellow = 0; - int node_score_infinity = INFINITY; - -+static struct crm_option *crm_long_options = NULL; -+static const char *crm_app_description = NULL; -+static char *crm_short_options = NULL; -+static const char *crm_app_usage = NULL; -+ - int - crm_exit(int rc) - { -- crm_trace("exit %d", rc); -+ mainloop_cleanup(); -+ - #if HAVE_LIBXML2 -+ crm_trace("cleaning up libxml"); - crm_xml_cleanup(); - #endif -+ -+ crm_trace("exit %d", rc); - qb_log_fini(); -- exit(rc); -- return rc; /* Can never happen, but allows return crm_exit(rc) -- * where "return rc" was used previously -- * - which keeps compilers happy. -- */ -+ -+ free(crm_short_options); -+ free(crm_system_name); -+ -+ exit(ABS(rc)); /* Always exit with a positive value so that it can be passed to crm_error -+ * -+ * Otherwise the system wraps it around and people -+ * have to jump through hoops figuring out what the -+ * error was -+ */ -+ return rc; /* Can never happen, but allows return crm_exit(rc) -+ * where "return rc" was used previously - which -+ * keeps compilers happy. -+ */ - } - - gboolean -@@ -315,9 +334,11 @@ crm_concat(const char *prefix, const char *suffix, char join) - CRM_ASSERT(suffix != NULL); - len = strlen(prefix) + strlen(suffix) + 2; - -- new_str = calloc(1, (len)); -- sprintf(new_str, "%s%c%s", prefix, join, suffix); -- new_str[len - 1] = 0; -+ new_str = malloc(len); -+ if(new_str) { -+ sprintf(new_str, "%s%c%s", prefix, join, suffix); -+ new_str[len - 1] = 0; -+ } - return new_str; - } - -@@ -336,7 +357,7 @@ crm_itoa(int an_int) - int len = 32; - char *buffer = NULL; - -- buffer = calloc(1, (len + 1)); -+ buffer = malloc(len + 1); - if (buffer != NULL) { - snprintf(buffer, len, "%d", an_int); - } -@@ -344,6 +365,29 @@ crm_itoa(int an_int) - return buffer; - } - -+void -+crm_build_path(const char *path_c, mode_t mode) -+{ -+ int offset = 1, len = 0; -+ char *path = strdup(path_c); -+ -+ CRM_CHECK(path != NULL, return); -+ for (len = strlen(path); offset < len; offset++) { -+ if (path[offset] == '/') { -+ path[offset] = 0; -+ if (mkdir(path, mode) < 0 && errno != EEXIST) { -+ crm_perror(LOG_ERR, "Could not create directory '%s'", path); -+ break; -+ } -+ path[offset] = '/'; -+ } -+ } -+ if (mkdir(path, mode) < 0 && errno != EEXIST) { -+ crm_perror(LOG_ERR, "Could not create directory '%s'", path); -+ } -+ free(path); -+} -+ - int - crm_user_lookup(const char *name, uid_t * uid, gid_t * gid) - { -@@ -693,7 +737,7 @@ generate_op_key(const char *rsc_id, const char *op_type, int interval) - - len += strlen(op_type); - len += strlen(rsc_id); -- op_id = calloc(1, len); -+ op_id = malloc(len); - CRM_CHECK(op_id != NULL, return NULL); - sprintf(op_id, "%s_%s_%d", rsc_id, op_type, interval); - return op_id; -@@ -785,7 +829,7 @@ generate_notify_key(const char *rsc_id, const char *notify_type, const char *op_ - len += strlen(op_type); - len += strlen(rsc_id); - len += strlen(notify_type); -- op_id = calloc(1, len); -+ op_id = malloc(len); - if (op_id != NULL) { - sprintf(op_id, "%s_%s_notify_%s_0", rsc_id, notify_type, op_type); - } -@@ -802,7 +846,7 @@ generate_transition_magic_v202(const char *transition_key, int op_status) - - len += strlen(transition_key); - -- fail_state = calloc(1, len); -+ fail_state = malloc(len); - if (fail_state != NULL) { - snprintf(fail_state, len, "%d:%s", op_status, transition_key); - } -@@ -819,7 +863,7 @@ generate_transition_magic(const char *transition_key, int op_status, int op_rc) - - len += strlen(transition_key); - -- fail_state = calloc(1, len); -+ fail_state = malloc(len); - if (fail_state != NULL) { - snprintf(fail_state, len, "%d:%d;%s", op_status, op_rc, transition_key); - } -@@ -864,9 +908,9 @@ generate_transition_key(int transition_id, int action_id, int target_rc, const c - - len += strlen(node); - -- fail_state = calloc(1, len); -+ fail_state = malloc(len); - if (fail_state != NULL) { -- snprintf(fail_state, len, "%d:%d:%d:%s", action_id, transition_id, target_rc, node); -+ snprintf(fail_state, len, "%d:%d:%d:%-*s", action_id, transition_id, target_rc, 36, node); - } - return fail_state; - } -@@ -1025,11 +1069,13 @@ filter_reload_parameters(xmlNode * param_set, const char *restart_string) - name = NULL; - len = strlen(prop_name) + 3; - -- name = calloc(1, len); -- sprintf(name, " %s ", prop_name); -- name[len - 1] = 0; -+ name = malloc(len); -+ if(name) { -+ sprintf(name, " %s ", prop_name); -+ name[len - 1] = 0; -+ match = strstr(restart_string, name); -+ } - -- match = strstr(restart_string, name); - if (match == NULL) { - crm_trace("%s not found in %s", prop_name, restart_string); - xml_remove_prop(param_set, prop_name); -@@ -1075,7 +1121,7 @@ crm_abort(const char *file, const char *function, int line, - default: /* Parent */ - crm_err("%s: Forked child %d to record non-fatal assert at %s:%d : %s", - function, pid, file, line, assert_condition); -- crm_write_blackbox(SIGABRT, NULL); -+ crm_write_blackbox(SIGTRAP, NULL); - - do { - rc = waitpid(pid, &status, 0); -@@ -1099,9 +1145,13 @@ generate_series_filename(const char *directory, const char *series, int sequence - CRM_CHECK(directory != NULL, return NULL); - CRM_CHECK(series != NULL, return NULL); - -+#if !HAVE_BZLIB_H -+ bzip = FALSE; -+#endif -+ - len += strlen(directory); - len += strlen(series); -- filename = calloc(1, len); -+ filename = malloc(len); - CRM_CHECK(filename != NULL, return NULL); - - if (bzip) { -@@ -1127,7 +1177,7 @@ get_last_sequence(const char *directory, const char *series) - - len += strlen(directory); - len += strlen(series); -- series_file = calloc(1, len); -+ series_file = malloc(len); - CRM_CHECK(series_file != NULL, return 0); - sprintf(series_file, "%s/%s.last", directory, series); - -@@ -1193,21 +1243,23 @@ write_last_sequence(const char *directory, const char *series, int sequence, int - - len += strlen(directory); - len += strlen(series); -- series_file = calloc(1, len); -- sprintf(series_file, "%s/%s.last", directory, series); -+ series_file = malloc(len); - -- file_strm = fopen(series_file, "w"); -- if (file_strm == NULL) { -- crm_err("Cannout open series file %s for writing", series_file); -- goto bail; -+ if(series_file) { -+ sprintf(series_file, "%s/%s.last", directory, series); -+ file_strm = fopen(series_file, "w"); - } - -- rc = fprintf(file_strm, "%d", sequence); -- if (rc < 0) { -- crm_perror(LOG_ERR, "Cannot write to series file %s", series_file); -+ if (file_strm != NULL) { -+ rc = fprintf(file_strm, "%d", sequence); -+ if (rc < 0) { -+ crm_perror(LOG_ERR, "Cannot write to series file %s", series_file); -+ } -+ -+ } else { -+ crm_err("Cannout open series file %s for writing", series_file); - } - -- bail: - if (file_strm != NULL) { - fflush(file_strm); - fclose(file_strm); -@@ -1308,15 +1360,22 @@ crm_pidfile_inuse(const char *filename, long mypid) - } - if (read(fd, buf, sizeof(buf)) > 0) { - if (sscanf(buf, "%lu", &pid) > 0) { -+ crm_trace("Got pid %lu from %s\n", pid, filename); - if (pid <= 1) { - /* Invalid pid */ - rc = -ENOENT; -+ unlink(filename); - - } else if (mypid && pid == mypid) { - /* In use by us */ - rc = pcmk_ok; - -- } else if (mypid && pid != mypid && crm_pid_active(pid)) { -+ } else if (crm_pid_active(pid) == FALSE) { -+ /* Contains a stale value */ -+ unlink(filename); -+ rc = -ENOENT; -+ -+ } else if (mypid && pid != mypid) { - /* locked by existing process - give up */ - rc = -EEXIST; - } -@@ -1337,9 +1396,12 @@ crm_lock_pidfile(const char *filename) - mypid = (unsigned long)getpid(); - - rc = crm_pidfile_inuse(filename, 0); -- if (rc != pcmk_ok && rc != -ENOENT) { -+ if (rc == -ENOENT) { -+ /* exists but the process is not active */ -+ -+ } else if (rc != pcmk_ok) { - /* locked by existing process - give up */ -- return -1; -+ return rc; - } - - if ((fd = open(filename, O_CREAT | O_WRONLY | O_EXCL, 0644)) < 0) { -@@ -1354,7 +1416,6 @@ crm_lock_pidfile(const char *filename) - if (rc != LOCKSTRLEN) { - crm_perror(LOG_ERR, "Incomplete write to %s", filename); - return -errno; -- - } - - return crm_pidfile_inuse(filename, mypid); -@@ -1363,6 +1424,7 @@ crm_lock_pidfile(const char *filename) - void - crm_make_daemon(const char *name, gboolean daemonize, const char *pidfile) - { -+ int rc; - long pid; - const char *devnull = "/dev/null"; - -@@ -1370,22 +1432,30 @@ crm_make_daemon(const char *name, gboolean daemonize, const char *pidfile) - return; - } - -+ /* Check before we even try... */ -+ rc = crm_pidfile_inuse(pidfile, 1); -+ if(rc < pcmk_ok && rc != -ENOENT) { -+ pid = crm_read_pidfile(pidfile); -+ crm_err("%s: already running [pid %ld in %s]", name, pid, pidfile); -+ printf("%s: already running [pid %ld in %s]\n", name, pid, pidfile); -+ crm_exit(rc); -+ } -+ - pid = fork(); - if (pid < 0) { - fprintf(stderr, "%s: could not start daemon\n", name); - crm_perror(LOG_ERR, "fork"); -- crm_exit(EX_USAGE); -+ crm_exit(EINVAL); - - } else if (pid > 0) { -- crm_exit(EX_OK); -+ crm_exit(pcmk_ok); - } - -- if (crm_lock_pidfile(pidfile) < 0) { -- pid = crm_read_pidfile(pidfile); -- if (crm_pid_active(pid) > 0) { -- crm_warn("%s: already running [pid %ld] (%s).\n", name, pid, pidfile); -- crm_exit(EX_OK); -- } -+ rc = crm_lock_pidfile(pidfile); -+ if(rc < pcmk_ok) { -+ crm_err("Could not lock '%s' for %s: %s (%d)", pidfile, name, pcmk_strerror(rc), rc); -+ printf("Could not lock '%s' for %s: %s (%d)\n", pidfile, name, pcmk_strerror(rc), rc); -+ crm_exit(rc); - } - - umask(S_IWGRP | S_IWOTH | S_IROTH); -@@ -1527,11 +1597,6 @@ crm_meta_value(GHashTable * hash, const char *field) - return value; - } - --static struct crm_option *crm_long_options = NULL; --static const char *crm_app_description = NULL; --static const char *crm_short_options = NULL; --static const char *crm_app_usage = NULL; -- - static struct option * - crm_create_long_opts(struct crm_option *long_options) - { -@@ -1585,7 +1650,7 @@ crm_set_options(const char *short_options, const char *app_usage, struct crm_opt - const char *app_desc) - { - if (short_options) { -- crm_short_options = short_options; -+ crm_short_options = strdup(short_options); - - } else if (long_options) { - int lpc = 0; -@@ -1742,12 +1807,68 @@ crm_help(char cmd, int exit_code) - } - } - -+void cib_ipc_servers_init(qb_ipcs_service_t **ipcs_ro, -+ qb_ipcs_service_t **ipcs_rw, -+ qb_ipcs_service_t **ipcs_shm, -+ struct qb_ipcs_service_handlers *ro_cb, -+ struct qb_ipcs_service_handlers *rw_cb) -+{ -+ *ipcs_ro = mainloop_add_ipc_server(cib_channel_ro, QB_IPC_NATIVE, ro_cb); -+ *ipcs_rw = mainloop_add_ipc_server(cib_channel_rw, QB_IPC_NATIVE, rw_cb); -+ *ipcs_shm = mainloop_add_ipc_server(cib_channel_shm, QB_IPC_SHM, rw_cb); -+ -+ if (*ipcs_ro == NULL || *ipcs_rw == NULL || *ipcs_shm == NULL) { -+ crm_err("Failed to create cib servers: exiting and inhibiting respawn."); -+ crm_warn("Verify pacemaker and pacemaker_remote are not both enabled."); -+ crm_exit(DAEMON_RESPAWN_STOP); -+ } -+} -+ -+void cib_ipc_servers_destroy(qb_ipcs_service_t *ipcs_ro, -+ qb_ipcs_service_t *ipcs_rw, -+ qb_ipcs_service_t *ipcs_shm) -+{ -+ qb_ipcs_destroy(ipcs_ro); -+ qb_ipcs_destroy(ipcs_rw); -+ qb_ipcs_destroy(ipcs_shm); -+} -+ -+qb_ipcs_service_t * -+crmd_ipc_server_init(struct qb_ipcs_service_handlers *cb) -+{ -+ return mainloop_add_ipc_server(CRM_SYSTEM_CRMD, QB_IPC_NATIVE, cb); -+} -+ -+void -+attrd_ipc_server_init(qb_ipcs_service_t **ipcs, struct qb_ipcs_service_handlers *cb) -+{ -+ *ipcs = mainloop_add_ipc_server(T_ATTRD, QB_IPC_NATIVE, cb); -+ -+ if (*ipcs == NULL) { -+ crm_err("Failed to create attrd servers: exiting and inhibiting respawn."); -+ crm_warn("Verify pacemaker and pacemaker_remote are not both enabled."); -+ crm_exit(DAEMON_RESPAWN_STOP); -+ } -+} -+ -+void -+stonith_ipc_server_init(qb_ipcs_service_t **ipcs, struct qb_ipcs_service_handlers *cb) -+{ -+ *ipcs = mainloop_add_ipc_server("stonith-ng", QB_IPC_NATIVE, cb); -+ -+ if (*ipcs == NULL) { -+ crm_err("Failed to create stonith-ng servers: exiting and inhibiting respawn."); -+ crm_warn("Verify pacemaker and pacemaker_remote are not both enabled."); -+ crm_exit(DAEMON_RESPAWN_STOP); -+ } -+} -+ - int - attrd_update_delegate(crm_ipc_t * ipc, char command, const char *host, const char *name, - const char *value, const char *section, const char *set, const char *dampen, - const char *user_name) - { -- int rc = 0; -+ int rc = -ENOTCONN; - int max = 5; - enum crm_ipc_flags flags = crm_ipc_client_none; - xmlNode *update = create_xml_node(NULL, __FUNCTION__); -@@ -1822,7 +1943,7 @@ attrd_update_delegate(crm_ipc_t * ipc, char command, const char *host, const cha - } else if (rc > 0) { - break; - -- } else if (rc == -EAGAIN || rc == -EREMOTEIO) { -+ } else if (rc == -EAGAIN || rc == -EALREADY) { - sleep(5 - max); - max--; - -@@ -1837,6 +1958,8 @@ attrd_update_delegate(crm_ipc_t * ipc, char command, const char *host, const cha - free_xml(update); - if (rc > 0) { - crm_debug("Sent update: %s=%s for %s", name, value, host ? host : "localhost"); -+ rc = pcmk_ok; -+ - } else { - crm_debug("Could not send update %s=%s for %s: %s (%d)", name, value, - host ? host : "localhost", pcmk_strerror(rc), rc); -@@ -2032,11 +2155,21 @@ create_operation_update(xmlNode * parent, lrmd_event_data_t * op, const char *ca - op->t_run, op->t_rcchange, op->exec_time, op->queue_time); - - if (op->interval == 0) { -- crm_xml_add_int(xml_op, "last-run", op->t_run); -+ /* The values are the same for non-recurring ops */ -+ crm_xml_add_int(xml_op, XML_RSC_OP_LAST_RUN, op->t_run); -+ crm_xml_add_int(xml_op, XML_RSC_OP_LAST_CHANGE, op->t_run); -+ -+ } else if(op->t_rcchange) { -+ /* last-run is not accurate for recurring ops */ -+ crm_xml_add_int(xml_op, XML_RSC_OP_LAST_CHANGE, op->t_rcchange); -+ -+ } else { -+ /* ...but is better than nothing otherwise */ -+ crm_xml_add_int(xml_op, XML_RSC_OP_LAST_CHANGE, op->t_run); - } -- crm_xml_add_int(xml_op, "last-rc-change", op->t_rcchange); -- crm_xml_add_int(xml_op, "exec-time", op->exec_time); -- crm_xml_add_int(xml_op, "queue-time", op->queue_time); -+ -+ crm_xml_add_int(xml_op, XML_RSC_OP_T_EXEC, op->exec_time); -+ crm_xml_add_int(xml_op, XML_RSC_OP_T_QUEUE, op->queue_time); - } - } - -@@ -2081,7 +2214,7 @@ uid2username(uid_t uid) - } - - void --determine_request_user(char *user, xmlNode * request, const char *field) -+determine_request_user(const char *user, xmlNode * request, const char *field) - { - /* Get our internal validation out of the way first */ - CRM_CHECK(user != NULL && request != NULL && field != NULL, return); -@@ -2140,7 +2273,7 @@ find_library_function(void **handle, const char *lib, const char *fn, gboolean f - if (!(*handle)) { - crm_err("%sCould not open %s: %s", fatal ? "Fatal: " : "", lib, dlerror()); - if (fatal) { -- crm_exit(100); -+ crm_exit(DAEMON_RESPAWN_STOP); - } - return NULL; - } -@@ -2149,7 +2282,7 @@ find_library_function(void **handle, const char *lib, const char *fn, gboolean f - if ((error = dlerror()) != NULL) { - crm_err("%sCould not find %s in %s: %s", fatal ? "Fatal: " : "", fn, lib, error); - if (fatal) { -- crm_exit(100); -+ crm_exit(DAEMON_RESPAWN_STOP); - } - } - -@@ -2202,18 +2335,27 @@ crm_generate_uuid(void) - char * - crm_md5sum(const char *buffer) - { -- int lpc = 0; -+ int lpc = 0, len = 0; - char *digest = NULL; - unsigned char raw_digest[MD5_DIGEST_SIZE]; - -- crm_trace("Beginning digest"); -+ if(buffer != NULL) { -+ len = strlen(buffer); -+ } -+ -+ crm_trace("Beginning digest of %d bytes", len); - digest = malloc(2 * MD5_DIGEST_SIZE + 1); -- md5_buffer(buffer, strlen(buffer), raw_digest); -- for (lpc = 0; lpc < MD5_DIGEST_SIZE; lpc++) { -- sprintf(digest + (2 * lpc), "%02x", raw_digest[lpc]); -+ if(digest) { -+ md5_buffer(buffer, len, raw_digest); -+ for (lpc = 0; lpc < MD5_DIGEST_SIZE; lpc++) { -+ sprintf(digest + (2 * lpc), "%02x", raw_digest[lpc]); -+ } -+ digest[(2 * MD5_DIGEST_SIZE)] = 0; -+ crm_trace("Digest %s.", digest); -+ -+ } else { -+ crm_err("Could not create digest"); - } -- digest[(2 * MD5_DIGEST_SIZE)] = 0; -- crm_trace("Digest %s\n", digest); - return digest; - } - -@@ -2233,7 +2375,10 @@ crm_compress_string(const char *data, int length, int max, char **result, unsign - max = (length * 1.1) + 600; /* recomended size */ - } - -+#ifdef CLOCK_MONOTONIC - clock_gettime(CLOCK_MONOTONIC, &before_t); -+#endif -+ - /* coverity[returned_null] Ignore */ - compressed = malloc(max); - -@@ -2249,7 +2394,10 @@ crm_compress_string(const char *data, int length, int max, char **result, unsign - return FALSE; - } - -+#ifdef CLOCK_MONOTONIC - clock_gettime(CLOCK_MONOTONIC, &after_t); -+#endif -+ - crm_info("Compressed %d bytes into %d (ratio %d:1) in %dms", - length, *result_len, length / (*result_len), - (after_t.tv_sec - before_t.tv_sec) * 1000 + (after_t.tv_nsec - -diff --git a/lib/common/xml.c b/lib/common/xml.c -index 9832321..d467ce4 100644 ---- a/lib/common/xml.c -+++ b/lib/common/xml.c -@@ -89,15 +89,6 @@ typedef struct { - } filter_t; - - /* *INDENT-OFF* */ --enum xml_log_options --{ -- xml_log_option_filtered = 0x001, -- xml_log_option_formatted = 0x002, -- xml_log_option_diff_plus = 0x010, -- xml_log_option_diff_minus = 0x020, -- xml_log_option_diff_short = 0x040, -- xml_log_option_diff_all = 0x100, --}; - - struct schema_s known_schemas[] = { - /* 0 */ { 0, NULL, NULL, NULL, 1 }, -@@ -125,8 +116,10 @@ static int max_schemas = DIMOF(known_schemas) - 2; /* skip back past 'none' - #define CHUNK_SIZE 1024 - - #define buffer_print(buffer, max, offset, fmt, args...) do { \ -- int rc; \ -- rc = snprintf((buffer) + (offset), (max) - (offset), fmt, ##args); \ -+ int rc = (max); \ -+ if(buffer) { \ -+ rc = snprintf((buffer) + (offset), (max) - (offset), fmt, ##args); \ -+ } \ - if(rc < 0) { \ - crm_perror(LOG_ERR, "snprintf failed"); \ - (buffer)[(offset)] = 0; \ -@@ -146,7 +139,7 @@ insert_prefix(int options, char **buffer, int *offset, int *max, int depth) - if (options & xml_log_option_formatted) { - size_t spaces = 2 * depth; - -- if (spaces >= ((*max) - (*offset))) { -+ if ((*buffer) == NULL || spaces >= ((*max) - (*offset))) { - (*max) = QB_MAX(CHUNK_SIZE, (*max) * 2); - (*buffer) = realloc((*buffer), (*max) + 1); - } -@@ -177,7 +170,6 @@ gboolean can_prune_leaf(xmlNode * xml_node); - void diff_filter_context(int context, int upper_bound, int lower_bound, - xmlNode * xml_node, xmlNode * parent); - int in_upper_context(int depth, int context, xmlNode * xml_node); --int write_file(const char *string, const char *filename); - int add_xml_object(xmlNode * parent, xmlNode * target, xmlNode * update, gboolean as_diff); - - static inline const char * -@@ -516,44 +508,6 @@ static void - crm_xml_err(void *ctx, const char *msg, ...) - G_GNUC_PRINTF(2, 3); - --int --write_file(const char *string, const char *filename) --{ -- int rc = 0; -- FILE *file_output_strm = NULL; -- -- CRM_CHECK(filename != NULL, return -1); -- -- if (string == NULL) { -- crm_err("Cannot write NULL to %s", filename); -- return -1; -- } -- -- file_output_strm = fopen(filename, "w"); -- if (file_output_strm == NULL) { -- crm_perror(LOG_ERR, "Cannot open %s for writing", filename); -- return -1; -- } -- -- rc = fprintf(file_output_strm, "%s", string); -- if (rc < 0) { -- crm_perror(LOG_ERR, "Cannot write output to %s", filename); -- } -- -- if (fflush(file_output_strm) != 0) { -- crm_perror(LOG_ERR, "fflush for %s failed:", filename); -- rc = -1; -- } -- -- if (fsync(fileno(file_output_strm)) < 0) { -- crm_perror(LOG_ERR, "fsync for %s failed:", filename); -- rc = -1; -- } -- -- fclose(file_output_strm); -- return rc; --} -- - static void - crm_xml_err(void *ctx, const char *msg, ...) - { -@@ -644,9 +598,13 @@ string2xml(const char *input) - - } else { - int len = strlen(input); -+ int lpc = 0; -+ -+ while(lpc < len) { -+ crm_warn("Parse error[+%.3d]: %.80s", lpc, input+lpc); -+ lpc += 80; -+ } - -- crm_warn("String start: %.50s", input); -- crm_warn("String start+%d: %s", len - 50, input + len - 50); - crm_abort(__FILE__, __PRETTY_FUNCTION__, __LINE__, "String parsing error", TRUE, TRUE); - } - } -@@ -836,9 +794,7 @@ static int - write_xml_stream(xmlNode * xml_node, const char *filename, FILE * stream, gboolean compress) - { - int res = 0; -- time_t now; - char *buffer = NULL; -- char *now_str = NULL; - unsigned int out = 0; - static mode_t cib_mode = S_IRUSR | S_IWUSR; - -@@ -851,15 +807,20 @@ write_xml_stream(xmlNode * xml_node, const char *filename, FILE * stream, gboole - return -1; - } - -- /* establish the correct permissions */ -- fchmod(fileno(stream), cib_mode); - - crm_log_xml_trace(xml_node, "Writing out"); - -- now = time(NULL); -- now_str = ctime(&now); -- now_str[24] = EOS; /* replace the newline */ -- crm_xml_add(xml_node, XML_CIB_ATTR_WRITTEN, now_str); -+ if(strstr(filename, "cib") != NULL) { -+ /* Only CIB's need this field written */ -+ time_t now = time(NULL); -+ char *now_str = ctime(&now); -+ -+ now_str[24] = EOS; /* replace the newline */ -+ crm_xml_add(xml_node, XML_CIB_ATTR_WRITTEN, now_str); -+ -+ /* establish the correct permissions */ -+ fchmod(fileno(stream), cib_mode); -+ } - - buffer = dump_xml_formatted(xml_node); - CRM_CHECK(buffer != NULL && strlen(buffer) > 0, crm_log_xml_warn(xml_node, "dump:failed"); -@@ -936,7 +897,9 @@ write_xml_fd(xmlNode * xml_node, const char *filename, int fd, gboolean compress - int - write_xml_file(xmlNode * xml_node, const char *filename, gboolean compress) - { -- FILE *stream = fopen(filename, "w"); -+ FILE *stream = NULL; -+ -+ stream = fopen(filename, "w"); - - return write_xml_stream(xml_node, filename, stream, compress); - } -@@ -1000,6 +963,8 @@ crm_xml_escape(const char *text) - - for (index = 0; index < length; index++) { - switch (copy[index]) { -+ case 0: -+ break; - case '<': - copy = crm_xml_escape_shuffle(copy, index, &length, "<"); - changes++; -@@ -1020,6 +985,35 @@ crm_xml_escape(const char *text) - copy = crm_xml_escape_shuffle(copy, index, &length, "&"); - changes++; - break; -+ case '\t': -+ /* Might as well just expand to a few spaces... */ -+ copy = crm_xml_escape_shuffle(copy, index, &length, " "); -+ changes++; -+ break; -+ case '\n': -+ /* crm_trace("Convert: \\%.3o", copy[index]); */ -+ copy = crm_xml_escape_shuffle(copy, index, &length, "\\n"); -+ changes++; -+ break; -+ case '\r': -+ copy = crm_xml_escape_shuffle(copy, index, &length, "\\r"); -+ changes++; -+ break; -+ /* For debugging... -+ case '\\': -+ crm_trace("Passthrough: \\%c", copy[index+1]); -+ break; -+ */ -+ default: -+ /* Check for and replace non-printing characters with their octal equivalent */ -+ if(copy[index] < ' ' || copy[index] > '~') { -+ char *replace = g_strdup_printf("\\%.3o", copy[index]); -+ -+ /* crm_trace("Convert to octal: \\%.3o", copy[index]); */ -+ copy = crm_xml_escape_shuffle(copy, index, &length, replace); -+ free(replace); -+ changes++; -+ } - } - } - -@@ -1033,12 +1027,13 @@ static inline void - dump_xml_attr(xmlAttrPtr attr, int options, char **buffer, int *offset, int *max) - { - char *p_value = NULL; -- const char *p_name = (const char *)attr->name; -+ const char *p_name = NULL; - - if (attr == NULL || attr->children == NULL) { - return; - } - -+ p_name = (const char *)attr->name; - p_value = crm_xml_escape((const char *)attr->children->content); - buffer_print(*buffer, *max, *offset, " %s=\"%s\"", p_name, p_value); - free(p_value); -@@ -1065,8 +1060,8 @@ log_data_element(int log_level, const char *file, const char *function, int line - - /* Since we use the same file and line, to avoid confusing libqb, we need to use the same format strings */ - if (data == NULL) { -- do_crm_log_alias(log_level, file, function, line, "%s%s", prefix, -- ": No data to dump as XML"); -+ do_crm_log_alias(log_level, file, function, line, "%s: %s", prefix, -+ "No data to dump as XML"); - return; - - } else if (is_set(options, xml_log_option_diff_short) -@@ -1098,7 +1093,14 @@ log_data_element(int log_level, const char *file, const char *function, int line - } - - insert_prefix(options, &buffer, &offset, &max, depth); -- buffer_print(buffer, max, offset, "<%s", name); -+ if(data->type == XML_COMMENT_NODE) { -+ buffer_print(buffer, max, offset, ""); -+ -+ } else { -+ buffer_print(buffer, max, offset, "<%s", name); -+ } - - hidden = crm_element_value(data, "hidden"); - for (pIter = crm_first_attr(data); pIter != NULL; pIter = pIter->next) { -@@ -1128,9 +1130,9 @@ log_data_element(int log_level, const char *file, const char *function, int line - buffer_print(buffer, max, offset, "/>"); - } - -- do_crm_log_alias(log_level, file, function, line, "%s%s", prefix, buffer); -+ do_crm_log_alias(log_level, file, function, line, "%s %s", prefix, buffer); - -- if (data->children) { -+ if (data->children && data->type != XML_COMMENT_NODE) { - offset = 0; - max = 0; - free(buffer); -@@ -1143,7 +1145,7 @@ log_data_element(int log_level, const char *file, const char *function, int line - insert_prefix(options, &buffer, &offset, &max, depth); - buffer_print(buffer, max, offset, "", name); - -- do_crm_log_alias(log_level, file, function, line, "%s%s", prefix, buffer); -+ do_crm_log_alias(log_level, file, function, line, "%s %s", prefix, buffer); - } - - free(prefix_m); -@@ -1180,7 +1182,10 @@ dump_filtered_xml(xmlNode * data, int options, char **buffer, int *offset, int * - } - - static void --dump_xml(xmlNode * data, int options, char **buffer, int *offset, int *max, int depth) -+dump_xml(xmlNode * data, int options, char **buffer, int *offset, int *max, int depth); -+ -+static void -+dump_xml_element(xmlNode * data, int options, char **buffer, int *offset, int *max, int depth) - { - const char *name = NULL; - -@@ -1208,13 +1213,73 @@ dump_xml(xmlNode * data, int options, char **buffer, int *offset, int *max, int - dump_filtered_xml(data, options, buffer, offset, max); - - } else { --#if 1 - xmlAttrPtr xIter = NULL; - - for (xIter = crm_first_attr(data); xIter != NULL; xIter = xIter->next) { - dump_xml_attr(xIter, options, buffer, offset, max); - } --#else -+ } -+ -+ if (data->children == NULL) { -+ buffer_print(*buffer, *max, *offset, "/>"); -+ -+ } else { -+ buffer_print(*buffer, *max, *offset, ">"); -+ } -+ -+ if (options & xml_log_option_formatted) { -+ buffer_print(*buffer, *max, *offset, "\n"); -+ } -+ -+ if (data->children) { -+ xmlNode *xChild = NULL; -+ -+ for (xChild = __xml_first_child(data); xChild != NULL; xChild = __xml_next(xChild)) { -+ dump_xml(xChild, options, buffer, offset, max, depth + 1); -+ } -+ -+ insert_prefix(options, buffer, offset, max, depth); -+ buffer_print(*buffer, *max, *offset, "", name); -+ -+ if (options & xml_log_option_formatted) { -+ buffer_print(*buffer, *max, *offset, "\n"); -+ } -+ } -+} -+ -+static void -+dump_xml_comment(xmlNode * data, int options, char **buffer, int *offset, int *max, int depth) -+{ -+ CRM_ASSERT(max != NULL); -+ CRM_ASSERT(offset != NULL); -+ CRM_ASSERT(buffer != NULL); -+ -+ if (data == NULL) { -+ crm_trace("Nothing to dump"); -+ return; -+ } -+ -+ if (*buffer == NULL) { -+ *offset = 0; -+ *max = 0; -+ } -+ -+ insert_prefix(options, buffer, offset, max, depth); -+ -+ buffer_print(*buffer, *max, *offset, ""); -+ -+ if (options & xml_log_option_formatted) { -+ buffer_print(*buffer, *max, *offset, "\n"); -+ } -+} -+ -+static void -+dump_xml(xmlNode * data, int options, char **buffer, int *offset, int *max, int depth) -+{ -+#if 0 -+ if (is_not_set(options, xml_log_option_filtered)) { - /* Turning this code on also changes the PE tests for some reason - * (not just newlines). Figure out why before considering to - * enable this permanently. -@@ -1263,34 +1328,46 @@ dump_xml(xmlNode * data, int options, char **buffer, int *offset, int *max, int - - xmlBufferFree(xml_buffer); - return; --#endif -- } -- -- if (data->children == NULL) { -- buffer_print(*buffer, *max, *offset, "/>"); -- -- } else { -- buffer_print(*buffer, *max, *offset, ">"); - } -+#endif - -- if (options & xml_log_option_formatted) { -- buffer_print(*buffer, *max, *offset, "\n"); -+ switch(data->type) { -+ case XML_ELEMENT_NODE: -+ /* Handle below */ -+ dump_xml_element(data, options, buffer, offset, max, depth); -+ break; -+ case XML_TEXT_NODE: -+ /* Ignore */ -+ return; -+ case XML_COMMENT_NODE: -+ dump_xml_comment(data, options, buffer, offset, max, depth); -+ break; -+ default: -+ crm_warn("Unhandled type: %d", data->type); -+ return; -+ -+ /* -+ XML_ATTRIBUTE_NODE = 2 -+ XML_CDATA_SECTION_NODE = 4 -+ XML_ENTITY_REF_NODE = 5 -+ XML_ENTITY_NODE = 6 -+ XML_PI_NODE = 7 -+ XML_DOCUMENT_NODE = 9 -+ XML_DOCUMENT_TYPE_NODE = 10 -+ XML_DOCUMENT_FRAG_NODE = 11 -+ XML_NOTATION_NODE = 12 -+ XML_HTML_DOCUMENT_NODE = 13 -+ XML_DTD_NODE = 14 -+ XML_ELEMENT_DECL = 15 -+ XML_ATTRIBUTE_DECL = 16 -+ XML_ENTITY_DECL = 17 -+ XML_NAMESPACE_DECL = 18 -+ XML_XINCLUDE_START = 19 -+ XML_XINCLUDE_END = 20 -+ XML_DOCB_DOCUMENT_NODE = 21 -+ */ - } - -- if (data->children) { -- xmlNode *xChild = NULL; -- -- for (xChild = __xml_first_child(data); xChild != NULL; xChild = __xml_next(xChild)) { -- dump_xml(xChild, options, buffer, offset, max, depth + 1); -- } -- -- insert_prefix(options, buffer, offset, max, depth); -- buffer_print(*buffer, *max, *offset, "", name); -- -- if (options & xml_log_option_formatted) { -- buffer_print(*buffer, *max, *offset, "\n"); -- } -- } - } - - static void -@@ -1446,9 +1523,6 @@ static void - save_xml_to_file(xmlNode * xml, const char *desc, const char *filename) - { - char *f = NULL; -- FILE *st = NULL; -- xmlDoc *doc = getDocPtr(xml); -- xmlBuffer *xml_buffer = xmlBufferCreate(); - - if (filename == NULL) { - char *uuid = crm_generate_uuid(); -@@ -1459,17 +1533,7 @@ save_xml_to_file(xmlNode * xml, const char *desc, const char *filename) - } - - crm_info("Saving %s to %s", desc, filename); -- xmlNodeDump(xml_buffer, doc, xml, 0, FALSE); -- -- st = fopen(filename, "w"); -- if (st) { -- fprintf(st, "%s", xml_buffer->content); -- /* fflush(st); */ -- /* fsync(fileno(st)); */ -- fclose(st); -- } -- -- xmlBufferFree(xml_buffer); -+ write_xml_file(xml, filename, FALSE); - g_free(f); - } - -@@ -1550,6 +1614,9 @@ apply_xml_diff(xmlNode * old, xmlNode * diff, xmlNode ** new) - crm_trace("Digest matched: expected %s, calculated %s", digest, new_digest); - } - free(new_digest); -+ -+ } else if (result) { -+ purge_diff_markers(*new); /* Purge now so the diff is ok */ - } - - return result; -@@ -2293,7 +2360,7 @@ calculate_xml_digest_v2(xmlNode * source, gboolean do_filter) - - static struct qb_log_callsite *digest_cs = NULL; - -- crm_trace("Begin digest"); -+ crm_trace("Begin digest %s", do_filter?"filtered":""); - if (do_filter && BEST_EFFORT_STATUS) { - /* Exclude the status calculation from the digest - * -@@ -2385,34 +2452,33 @@ validate_with_dtd(xmlDocPtr doc, gboolean to_logs, const char *dtd_file) - CRM_CHECK(dtd_file != NULL, return FALSE); - - dtd = xmlParseDTD(NULL, (const xmlChar *)dtd_file); -- CRM_CHECK(dtd != NULL, crm_err("Could not find/parse %s", dtd_file); -- goto cleanup); -- -- cvp = xmlNewValidCtxt(); -- CRM_CHECK(cvp != NULL, goto cleanup); -- -- if (to_logs) { -- cvp->userData = (void *)LOG_ERR; -- cvp->error = (xmlValidityErrorFunc) xml_log; -- cvp->warning = (xmlValidityWarningFunc) xml_log; -- } else { -- cvp->userData = (void *)stderr; -- cvp->error = (xmlValidityErrorFunc) fprintf; -- cvp->warning = (xmlValidityWarningFunc) fprintf; -+ if(dtd == NULL) { -+ crm_err("Could not locate/parse DTD: %s", dtd_file); -+ return TRUE; - } - -- if (!xmlValidateDtd(cvp, doc, dtd)) { -- valid = FALSE; -- } -+ cvp = xmlNewValidCtxt(); -+ if(cvp) { -+ if (to_logs) { -+ cvp->userData = (void *)LOG_ERR; -+ cvp->error = (xmlValidityErrorFunc) xml_log; -+ cvp->warning = (xmlValidityWarningFunc) xml_log; -+ } else { -+ cvp->userData = (void *)stderr; -+ cvp->error = (xmlValidityErrorFunc) fprintf; -+ cvp->warning = (xmlValidityWarningFunc) fprintf; -+ } - -- cleanup: -- if (cvp) { -+ if (!xmlValidateDtd(cvp, doc, dtd)) { -+ valid = FALSE; -+ } - xmlFreeValidCtxt(cvp); -- } -- if (dtd) { -- xmlFreeDtd(dtd); -+ -+ } else { -+ crm_err("Internal error: No valid context"); - } - -+ xmlFreeDtd(dtd); - return valid; - } - -@@ -2546,6 +2612,22 @@ validate_with_relaxng(xmlDocPtr doc, gboolean to_logs, const char *relaxng_file, - } - - void -+crm_xml_init(void) -+{ -+ static bool init = TRUE; -+ -+ if(init) { -+ init = FALSE; -+ /* The default allocator XML_BUFFER_ALLOC_EXACT does far too many -+ * realloc()s and it can take upwards of 18 seconds (yes, seconds) -+ * to dump a 28kb tree which XML_BUFFER_ALLOC_DOUBLEIT can do in -+ * less than 1 second. -+ */ -+ xmlSetBufferAllocationScheme(XML_BUFFER_ALLOC_DOUBLEIT); -+ } -+} -+ -+void - crm_xml_cleanup(void) - { - int lpc = 0; -@@ -2873,42 +2955,69 @@ update_validation(xmlNode ** xml_blob, int *best, gboolean transform, gboolean t - return rc; - } - -+/* -+ * From xpath2.c -+ * -+ * All the elements returned by an XPath query are pointers to -+ * elements from the tree *except* namespace nodes where the XPath -+ * semantic is different from the implementation in libxml2 tree. -+ * As a result when a returned node set is freed when -+ * xmlXPathFreeObject() is called, that routine must check the -+ * element type. But node from the returned set may have been removed -+ * by xmlNodeSetContent() resulting in access to freed data. -+ * -+ * This can be exercised by running -+ * valgrind xpath2 test3.xml '//discarded' discarded -+ * -+ * There is 2 ways around it: -+ * - make a copy of the pointers to the nodes from the result set -+ * then call xmlXPathFreeObject() and then modify the nodes -+ * or -+ * - remove the references from the node set, if they are not -+ namespace nodes, before calling xmlXPathFreeObject(). -+ */ -+void -+freeXpathObject(xmlXPathObjectPtr xpathObj) -+{ -+ int lpc, max = numXpathResults(xpathObj); -+ -+ if(xpathObj == NULL) { -+ return; -+ } -+ -+ for(lpc = 0; lpc < max; lpc++) { -+ if (xpathObj->nodesetval->nodeTab[lpc] && xpathObj->nodesetval->nodeTab[lpc]->type != XML_NAMESPACE_DECL) { -+ xpathObj->nodesetval->nodeTab[lpc] = NULL; -+ } -+ } -+ -+ /* _Now_ its safe to free it */ -+ xmlXPathFreeObject(xpathObj); -+} -+ - xmlNode * - getXpathResult(xmlXPathObjectPtr xpathObj, int index) - { - xmlNode *match = NULL; -+ int max = numXpathResults(xpathObj); - - CRM_CHECK(index >= 0, return NULL); - CRM_CHECK(xpathObj != NULL, return NULL); - -- if (index >= xpathObj->nodesetval->nodeNr) { -- crm_err("Requested index %d of only %d items", index, xpathObj->nodesetval->nodeNr); -+ if (index >= max) { -+ crm_err("Requested index %d of only %d items", index, max); -+ return NULL; -+ -+ } else if(xpathObj->nodesetval->nodeTab[index] == NULL) { -+ /* Previously requested */ - return NULL; - } - - match = xpathObj->nodesetval->nodeTab[index]; - CRM_CHECK(match != NULL, return NULL); - -- /* -- * From xpath2.c -- * -- * All the elements returned by an XPath query are pointers to -- * elements from the tree *except* namespace nodes where the XPath -- * semantic is different from the implementation in libxml2 tree. -- * As a result when a returned node set is freed when -- * xmlXPathFreeObject() is called, that routine must check the -- * element type. But node from the returned set may have been removed -- * by xmlNodeSetContent() resulting in access to freed data. -- * This can be exercised by running -- * valgrind xpath2 test3.xml '//discarded' discarded -- * There is 2 ways around it: -- * - make a copy of the pointers to the nodes from the result set -- * then call xmlXPathFreeObject() and then modify the nodes -- * or -- * - remove the reference to the modified nodes from the node set -- * as they are processed, if they are not namespace nodes. -- */ - if (xpathObj->nodesetval->nodeTab[index]->type != XML_NAMESPACE_DECL) { -+ /* See the comment for freeXpathObject() */ - xpathObj->nodesetval->nodeTab[index] = NULL; - } - -@@ -3090,6 +3199,7 @@ get_xpath_object_relative(const char *xpath, xmlNode * xml_obj, int error_level) - xmlNode * - get_xpath_object(const char *xpath, xmlNode * xml_obj, int error_level) - { -+ int max; - xmlNode *result = NULL; - xmlXPathObjectPtr xpathObj = NULL; - char *nodePath = NULL; -@@ -3101,12 +3211,14 @@ get_xpath_object(const char *xpath, xmlNode * xml_obj, int error_level) - - xpathObj = xpath_search(xml_obj, xpath); - nodePath = (char *)xmlGetNodePath(xml_obj); -- if (xpathObj == NULL || xpathObj->nodesetval == NULL || xpathObj->nodesetval->nodeNr < 1) { -+ max = numXpathResults(xpathObj); -+ -+ if (max < 1) { - do_crm_log(error_level, "No match for %s in %s", xpath, crm_str(nodePath)); - crm_log_xml_explicit(xml_obj, "Unexpected Input"); - -- } else if (xpathObj->nodesetval->nodeNr > 1) { -- int lpc = 0, max = xpathObj->nodesetval->nodeNr; -+ } else if (max > 1) { -+ int lpc = 0; - - do_crm_log(error_level, "Too many matches for %s in %s", xpath, crm_str(nodePath)); - -@@ -3125,9 +3237,7 @@ get_xpath_object(const char *xpath, xmlNode * xml_obj, int error_level) - result = getXpathResult(xpathObj, 0); - } - -- if (xpathObj) { -- xmlXPathFreeObject(xpathObj); -- } -+ freeXpathObject(xpathObj); - free(nodePath); - - return result; -diff --git a/lib/fencing/Makefile.am b/lib/fencing/Makefile.am -index 30215b8..3297961 100644 ---- a/lib/fencing/Makefile.am -+++ b/lib/fencing/Makefile.am -@@ -25,7 +25,7 @@ INCLUDES = -I$(top_builddir)/include -I$(top_srcdir)/include \ - lib_LTLIBRARIES = libstonithd.la - - libstonithd_la_SOURCES = st_client.c --libstonithd_la_LDFLAGS = -version-info 2:1:0 -+libstonithd_la_LDFLAGS = -version-info 3:0:1 - libstonithd_la_LIBADD = $(top_builddir)/lib/common/libcrmcommon.la - - AM_CFLAGS = $(INCLUDES) -diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c -index d464708..c87f2d5 100644 ---- a/lib/fencing/st_client.c -+++ b/lib/fencing/st_client.c -@@ -604,6 +604,9 @@ read_output(int fd) - more = read(fd, buffer, READ_MAX - 1); - - if (more > 0) { -+ buffer[more] = 0; /* Make sure its nul-terminated for logging -+ * 'more' is always less than our buffer size -+ */ - crm_trace("Got %d more bytes: %.200s...", more, buffer); - output = realloc(output, len + more + 1); - snprintf(output + len, more + 1, "%s", buffer); -@@ -635,10 +638,9 @@ update_remaining_timeout(stonith_action_t * action) - } - - static void --stonith_action_async_done(GPid pid, gint status, gpointer user_data) -+stonith_action_async_done(mainloop_child_t * p, pid_t pid, int core, int signo, int exitcode) - { -- int rc = -pcmk_err_generic; -- stonith_action_t *action = user_data; -+ stonith_action_t *action = mainloop_child_userdata(p); - - if (action->timer_sigterm > 0) { - g_source_remove(action->timer_sigterm); -@@ -648,26 +650,25 @@ stonith_action_async_done(GPid pid, gint status, gpointer user_data) - } - - if (action->last_timeout_signo) { -- rc = -ETIME; -+ action->rc = -ETIME; - crm_notice("Child process %d performing action '%s' timed out with signal %d", - pid, action->action, action->last_timeout_signo); -- } else if (WIFSIGNALED(status)) { -- int signo = WTERMSIG(status); - -- rc = -ECONNABORTED; -+ } else if (signo) { -+ action->rc = -ECONNABORTED; - crm_notice("Child process %d performing action '%s' timed out with signal %d", - pid, action->action, signo); -- } else if (WIFEXITED(status)) { -- rc = WEXITSTATUS(status); -+ -+ } else { -+ action->rc = exitcode; - crm_debug("Child process %d performing action '%s' exited with rc %d", -- pid, action->action, rc); -+ pid, action->action, exitcode); - } - -- action->rc = rc; - action->output = read_output(action->fd_stdout); - - if (action->rc != pcmk_ok && update_remaining_timeout(action)) { -- rc = internal_stonith_action_execute(action); -+ int rc = internal_stonith_action_execute(action); - if (rc == pcmk_ok) { - return; - } -@@ -778,17 +779,17 @@ internal_stonith_action_execute(stonith_action_t * action) - if (total != len) { - crm_perror(LOG_ERR, "Sent %d not %d bytes", total, len); - if (ret >= 0) { -- rc = -EREMOTEIO; -+ rc = -ECOMM; - } - goto fail; - } - -- close(p_write_fd); -+ close(p_write_fd); p_write_fd = -1; - - /* async */ - if (action->async) { - action->fd_stdout = p_read_fd; -- g_child_watch_add(pid, stonith_action_async_done, action); -+ mainloop_child_add(pid, 0/* Move the timeout here? */, action->action, action, stonith_action_async_done); - crm_trace("Op: %s on %s, pid: %d, timeout: %ds", action->action, action->agent, pid, - action->remaining_timeout); - action->last_timeout_signo = 0; -@@ -821,12 +822,18 @@ internal_stonith_action_execute(stonith_action_t * action) - } - - if (timeout == 0) { -- int killrc = kill(pid, 9 /*SIGKILL*/); -+ int killrc = kill(pid, SIGKILL); - - if (killrc && errno != ESRCH) { - crm_err("kill(%d, KILL) failed: %s (%d)", pid, pcmk_strerror(errno), errno); - } -- p = waitpid(pid, &status, WNOHANG); -+ /* -+ * From sigprocmask(2): -+ * It is not possible to block SIGKILL or SIGSTOP. Attempts to do so are silently ignored. -+ * -+ * This makes it safe to skip WNOHANG here -+ */ -+ p = waitpid(pid, &status, 0); - } - - if (p <= 0) { -@@ -1077,13 +1084,15 @@ stonith_api_device_metadata(stonith_t * stonith, int call_options, const char *a - xmlXPathObject *xpathObj = NULL; - - xpathObj = xpath_search(xml, "//actions"); -- if (xpathObj && xpathObj->nodesetval->nodeNr > 0) { -+ if (numXpathResults(xpathObj) > 0) { - actions = getXpathResult(xpathObj, 0); - } - -+ freeXpathObject(xpathObj); -+ - /* Now fudge the metadata so that the start/stop actions appear */ - xpathObj = xpath_search(xml, "//action[@name='stop']"); -- if (xpathObj == NULL || xpathObj->nodesetval->nodeNr <= 0) { -+ if (numXpathResults(xpathObj) <= 0) { - xmlNode *tmp = NULL; - - tmp = create_xml_node(actions, "action"); -@@ -1095,15 +1104,18 @@ stonith_api_device_metadata(stonith_t * stonith, int call_options, const char *a - crm_xml_add(tmp, "timeout", "20s"); - } - -+ freeXpathObject(xpathObj); -+ - /* Now fudge the metadata so that the port isn't required in the configuration */ - xpathObj = xpath_search(xml, "//parameter[@name='port']"); -- if (xpathObj && xpathObj->nodesetval->nodeNr > 0) { -+ if (numXpathResults(xpathObj) > 0) { - /* We'll fill this in */ - xmlNode *tmp = getXpathResult(xpathObj, 0); - - crm_xml_add(tmp, "required", "0"); - } - -+ freeXpathObject(xpathObj); - free(buffer); - buffer = dump_xml_formatted(xml); - free_xml(xml); -@@ -1232,7 +1244,7 @@ stonith_api_query(stonith_t * stonith, int call_options, const char *target, - - xpathObj = xpath_search(output, "//@agent"); - if (xpathObj) { -- max = xpathObj->nodesetval->nodeNr; -+ max = numXpathResults(xpathObj); - - for (lpc = 0; lpc < max; lpc++) { - xmlNode *match = getXpathResult(xpathObj, lpc); -@@ -1242,6 +1254,8 @@ stonith_api_query(stonith_t * stonith, int call_options, const char *target, - crm_info("%s[%d] = %s", "//@agent", lpc, xmlGetNodePath(match)); - *devices = stonith_key_value_add(*devices, NULL, crm_element_value(match, XML_ATTR_ID)); - } -+ -+ freeXpathObject(xpathObj); - } - - free_xml(output); -@@ -1366,6 +1380,7 @@ stonith_api_history(stonith_t * stonith, int call_options, const char *node, - kvp->action = crm_element_value_copy(op, F_STONITH_ACTION); - kvp->origin = crm_element_value_copy(op, F_STONITH_ORIGIN); - kvp->delegate = crm_element_value_copy(op, F_STONITH_DELEGATE); -+ kvp->client = crm_element_value_copy(op, F_STONITH_CLIENTNAME); - crm_element_value_int(op, F_STONITH_DATE, &kvp->completed); - crm_element_value_int(op, F_STONITH_STATE, &kvp->state); - -@@ -1519,6 +1534,7 @@ stonith_api_signoff(stonith_t * stonith) - crm_ipc_destroy(ipc); - } - -+ free(native->token); native->token = NULL; - stonith->state = stonith_disconnected; - return pcmk_ok; - } -@@ -2201,18 +2217,28 @@ stonith_api_free(stonith_t * stonith) - { - int rc = pcmk_ok; - -+ crm_trace("Destroying %p", stonith); -+ - if (stonith->state != stonith_disconnected) { -+ crm_trace("Disconnecting %p first", stonith); - rc = stonith->cmds->disconnect(stonith); - } - - if (stonith->state == stonith_disconnected) { - stonith_private_t *private = stonith->private; - -+ crm_trace("Removing %d callbacks", g_hash_table_size(private->stonith_op_callback_table)); - g_hash_table_destroy(private->stonith_op_callback_table); -- free(private->token); -+ -+ crm_trace("Destroying %d notification clients", g_list_length(private->notify_list)); -+ g_list_free_full(private->notify_list, free); -+ - free(stonith->private); - free(stonith->cmds); - free(stonith); -+ -+ } else { -+ crm_err("Not free'ing active connection: %s (%d)", pcmk_strerror(rc), rc); - } - - return rc; -@@ -2221,18 +2247,10 @@ stonith_api_free(stonith_t * stonith) - void - stonith_api_delete(stonith_t * stonith) - { -- stonith_private_t *private = stonith->private; -- GList *list = private->notify_list; -- -- while (list != NULL) { -- stonith_notify_client_t *client = g_list_nth_data(list, 0); -- -- list = g_list_remove(list, client); -- free(client); -+ crm_trace("Destroying %p", stonith); -+ if(stonith) { -+ stonith->cmds->free(stonith); - } -- -- stonith->cmds->free(stonith); -- stonith = NULL; - } - - stonith_t * -diff --git a/lib/lrmd/Makefile.am b/lib/lrmd/Makefile.am -index d727f80..38ff108 100644 ---- a/lib/lrmd/Makefile.am -+++ b/lib/lrmd/Makefile.am -@@ -25,7 +25,7 @@ INCLUDES = -I$(top_builddir)/include -I$(top_srcdir)/include \ - lib_LTLIBRARIES = liblrmd.la - - liblrmd_la_SOURCES = lrmd_client.c --liblrmd_la_LDFLAGS = -version-info 1:0:0 -+liblrmd_la_LDFLAGS = -version-info 2:1:1 - liblrmd_la_LIBADD = $(top_builddir)/lib/common/libcrmcommon.la \ - $(top_builddir)/lib/services/libcrmservice.la \ - $(top_builddir)/lib/fencing/libstonithd.la -diff --git a/lib/lrmd/lrmd_client.c b/lib/lrmd/lrmd_client.c -index d7cbbc0..5fd9efb 100644 ---- a/lib/lrmd/lrmd_client.c -+++ b/lib/lrmd/lrmd_client.c -@@ -58,10 +58,15 @@ static stonith_t *stonith_api = NULL; - static int lrmd_api_disconnect(lrmd_t * lrmd); - static int lrmd_api_is_connected(lrmd_t * lrmd); - -+/* IPC proxy functions */ -+int lrmd_internal_proxy_send(lrmd_t * lrmd, xmlNode *msg); -+static void lrmd_internal_proxy_dispatch(lrmd_t *lrmd, xmlNode *msg); -+void lrmd_internal_set_proxy_callback(lrmd_t * lrmd, void *userdata, void (*callback)(lrmd_t *lrmd, void *userdata, xmlNode *msg)); -+ - #ifdef HAVE_GNUTLS_GNUTLS_H - # define LRMD_CLIENT_HANDSHAKE_TIMEOUT 5000 /* 5 seconds */ - gnutls_psk_client_credentials_t psk_cred_s; --int lrmd_tls_set_key(gnutls_datum_t * key, const char *location); -+int lrmd_tls_set_key(gnutls_datum_t * key); - static void lrmd_tls_disconnect(lrmd_t * lrmd); - static int global_remote_msg_id = 0; - int lrmd_tls_send_msg(crm_remote_t * session, xmlNode * msg, uint32_t id, const char *msg_type); -@@ -92,6 +97,9 @@ typedef struct lrmd_private_s { - - lrmd_event_callback callback; - -+ /* Internal IPC proxy msg passing for remote guests */ -+ void (*proxy_callback)(lrmd_t *lrmd, void *userdata, xmlNode *msg); -+ void *proxy_callback_userdata; - } lrmd_private_t; - - static lrmd_list_t * -@@ -227,9 +235,16 @@ static int - lrmd_dispatch_internal(lrmd_t * lrmd, xmlNode * msg) - { - const char *type; -+ const char *proxy_session = crm_element_value(msg, F_LRMD_IPC_SESSION); - lrmd_private_t *native = lrmd->private; - lrmd_event_data_t event = { 0, }; - -+ if (proxy_session != NULL) { -+ /* this is proxy business */ -+ lrmd_internal_proxy_dispatch(lrmd, msg); -+ return 1; -+ } -+ - if (!native->callback) { - /* no callback set */ - crm_trace("notify event received but client has not set callback"); -@@ -834,6 +849,12 @@ lrmd_handshake(lrmd_t * lrmd, const char *name) - crm_xml_add(hello, F_TYPE, T_LRMD); - crm_xml_add(hello, F_LRMD_OPERATION, CRM_OP_REGISTER); - crm_xml_add(hello, F_LRMD_CLIENTNAME, name); -+ crm_xml_add(hello, F_LRMD_PROTOCOL_VERSION, LRMD_PROTOCOL_VERSION); -+ -+ /* advertise that we are a proxy provider */ -+ if (native->proxy_callback) { -+ crm_xml_add(hello, F_LRMD_IS_IPC_PROVIDER, "true"); -+ } - - rc = lrmd_send_xml(lrmd, hello, -1, &reply); - -@@ -847,7 +868,14 @@ lrmd_handshake(lrmd_t * lrmd, const char *name) - const char *msg_type = crm_element_value(reply, F_LRMD_OPERATION); - const char *tmp_ticket = crm_element_value(reply, F_LRMD_CLIENTID); - -- if (safe_str_neq(msg_type, CRM_OP_REGISTER)) { -+ crm_element_value_int(reply, F_LRMD_RC, &rc); -+ -+ if (rc == -EPROTO) { -+ crm_err("LRMD protocol mismatch client version %s, server version %s", -+ LRMD_PROTOCOL_VERSION, crm_element_value(reply, F_LRMD_PROTOCOL_VERSION)); -+ crm_log_xml_err(reply, "Protocol Error"); -+ -+ } else if (safe_str_neq(msg_type, CRM_OP_REGISTER)) { - crm_err("Invalid registration message: %s", msg_type); - crm_log_xml_err(reply, "Bad reply"); - rc = -EPROTO; -@@ -906,8 +934,8 @@ lrmd_ipc_connect(lrmd_t * lrmd, int *fd) - } - - #ifdef HAVE_GNUTLS_GNUTLS_H --int --lrmd_tls_set_key(gnutls_datum_t * key, const char *location) -+static int -+set_key(gnutls_datum_t * key, const char *location) - { - FILE *stream; - int read_len = 256; -@@ -917,6 +945,10 @@ lrmd_tls_set_key(gnutls_datum_t * key, const char *location) - static size_t key_cache_len = 0; - static time_t key_cache_updated; - -+ if (location == NULL) { -+ return -1; -+ } -+ - if (key_cache) { - time_t now = time(NULL); - -@@ -943,7 +975,7 @@ lrmd_tls_set_key(gnutls_datum_t * key, const char *location) - - key->data = gnutls_malloc(read_len); - while (!feof(stream)) { -- char next; -+ int next; - - if (cur_len == buf_len) { - buf_len = cur_len + read_len; -@@ -977,22 +1009,25 @@ lrmd_tls_set_key(gnutls_datum_t * key, const char *location) - return 0; - } - --static int --lrmd_tls_key_cb(gnutls_session_t session, char **username, gnutls_datum_t * key) -+int -+lrmd_tls_set_key(gnutls_datum_t * key) - { - int rc = 0; -+ const char *specific_location = getenv("PCMK_authkey_location"); - -- if (lrmd_tls_set_key(key, DEFAULT_REMOTE_KEY_LOCATION)) { -- rc = lrmd_tls_set_key(key, ALT_REMOTE_KEY_LOCATION); -+ if (set_key(key, specific_location) == 0) { -+ crm_debug("Using custom authkey location %s", specific_location); -+ return 0; -+ } -+ -+ if (set_key(key, DEFAULT_REMOTE_KEY_LOCATION)) { -+ rc = set_key(key, ALT_REMOTE_KEY_LOCATION); - } - if (rc) { - crm_err("No lrmd remote key found"); - return -1; - } - -- *username = gnutls_malloc(strlen(DEFAULT_REMOTE_USERNAME) + 1); -- strcpy(*username, DEFAULT_REMOTE_USERNAME); -- - return rc; - } - -@@ -1034,6 +1069,7 @@ lrmd_tcp_connect_cb(void *userdata, int sock) - .destroy = lrmd_tls_connection_destroy, - }; - int rc = sock; -+ gnutls_datum_t psk_key = { NULL, 0 }; - - if (rc < 0) { - lrmd_tls_connection_destroy(lrmd); -@@ -1045,8 +1081,16 @@ lrmd_tcp_connect_cb(void *userdata, int sock) - /* TODO continue with tls stuff now that tcp connect passed. make this async as well soon - * to avoid all blocking code in the client. */ - native->sock = sock; -+ -+ if (lrmd_tls_set_key(&psk_key) != 0) { -+ lrmd_tls_connection_destroy(lrmd); -+ return; -+ } -+ - gnutls_psk_allocate_client_credentials(&native->psk_cred_c); -- gnutls_psk_set_client_credentials_function(native->psk_cred_c, lrmd_tls_key_cb); -+ gnutls_psk_set_client_credentials(native->psk_cred_c, DEFAULT_REMOTE_USERNAME, &psk_key, GNUTLS_PSK_KEY_RAW); -+ gnutls_free(psk_key.data); -+ - native->remote->tls_session = create_psk_tls_session(sock, GNUTLS_CLIENT, native->psk_cred_c); - - if (crm_initiate_client_tls_handshake(native->remote, LRMD_CLIENT_HANDSHAKE_TIMEOUT) != 0) { -@@ -1099,19 +1143,28 @@ lrmd_tls_connect(lrmd_t * lrmd, int *fd) - - lrmd_private_t *native = lrmd->private; - int sock; -+ gnutls_datum_t psk_key = { NULL, 0 }; - - lrmd_gnutls_global_init(); - - sock = crm_remote_tcp_connect(native->server, native->port); -- if (sock <= 0) { -+ if (sock < 0) { - crm_warn("Could not establish remote lrmd connection to %s", native->server); - lrmd_tls_connection_destroy(lrmd); - return -ENOTCONN; - } - - native->sock = sock; -+ -+ if (lrmd_tls_set_key(&psk_key) != 0) { -+ lrmd_tls_connection_destroy(lrmd); -+ return -1; -+ } -+ - gnutls_psk_allocate_client_credentials(&native->psk_cred_c); -- gnutls_psk_set_client_credentials_function(native->psk_cred_c, lrmd_tls_key_cb); -+ gnutls_psk_set_client_credentials(native->psk_cred_c, DEFAULT_REMOTE_USERNAME, &psk_key, GNUTLS_PSK_KEY_RAW); -+ gnutls_free(psk_key.data); -+ - native->remote->tls_session = create_psk_tls_session(sock, GNUTLS_CLIENT, native->psk_cred_c); - - if (crm_initiate_client_tls_handshake(native->remote, LRMD_CLIENT_HANDSHAKE_TIMEOUT) != 0) { -@@ -1400,6 +1453,38 @@ lrmd_api_set_callback(lrmd_t * lrmd, lrmd_event_callback callback) - native->callback = callback; - } - -+void -+lrmd_internal_set_proxy_callback(lrmd_t * lrmd, void *userdata, void (*callback)(lrmd_t *lrmd, void *userdata, xmlNode *msg)) -+{ -+ lrmd_private_t *native = lrmd->private; -+ -+ native->proxy_callback = callback; -+ native->proxy_callback_userdata = userdata; -+} -+ -+void -+lrmd_internal_proxy_dispatch(lrmd_t *lrmd, xmlNode *msg) -+{ -+ lrmd_private_t *native = lrmd->private; -+ -+ if (native->proxy_callback) { -+ crm_log_xml_trace(msg, "PROXY_INBOUND"); -+ native->proxy_callback(lrmd, native->proxy_callback_userdata, msg); -+ } -+} -+ -+int -+lrmd_internal_proxy_send(lrmd_t * lrmd, xmlNode *msg) -+{ -+ if (lrmd == NULL) { -+ return -ENOTCONN; -+ } -+ crm_xml_add(msg, F_LRMD_OPERATION, CRM_OP_IPC_FWD); -+ -+ crm_log_xml_trace(msg, "PROXY_OUTBOUND"); -+ return lrmd_send_xml_no_reply(lrmd, msg); -+} -+ - static int - stonith_get_metadata(const char *provider, const char *type, char **output) - { -@@ -1412,40 +1497,36 @@ stonith_get_metadata(const char *provider, const char *type, char **output) - return rc; - } - --static int --lsb_get_metadata(const char *type, char **output) --{ -- - #define lsb_metadata_template \ --"\n"\ --"\n"\ --"\n"\ --" 1.0\n"\ --" \n"\ --" %s"\ --" \n"\ --" %s\n"\ --" \n"\ --" \n"\ --" \n"\ --" \n"\ --" \n"\ --" \n"\ --" \n"\ --" \n"\ --" \n"\ --" \n"\ --" \n"\ --" \n"\ --" %s\n"\ --" %s\n"\ --" %s\n"\ --" %s\n"\ --" %s\n"\ --" %s\n"\ --" %s\n"\ --" \n"\ --"\n" -+ "\n" \ -+ "\n" \ -+ "\n" \ -+ " 1.0\n" \ -+ " \n" \ -+ " %s\n" \ -+ " \n" \ -+ " %s\n" \ -+ " \n" \ -+ " \n" \ -+ " \n" \ -+ " \n" \ -+ " \n" \ -+ " \n" \ -+ " \n" \ -+ " \n" \ -+ " \n" \ -+ " \n" \ -+ " \n" \ -+ " \n" \ -+ " %s\n" \ -+ " %s\n" \ -+ " %s\n" \ -+ " %s\n" \ -+ " %s\n" \ -+ " %s\n" \ -+ " %s\n" \ -+ " \n" \ -+ "\n" - - #define LSB_INITSCRIPT_INFOBEGIN_TAG "### BEGIN INIT INFO" - #define LSB_INITSCRIPT_INFOEND_TAG "### END INIT INFO" -@@ -1471,6 +1552,9 @@ lsb_get_metadata(const char *type, char **output) - continue; \ - } - -+static int -+lsb_get_metadata(const char *type, char **output) -+{ - char ra_pathname[PATH_MAX] = { 0, }; - FILE *fp; - GString *meta_data = NULL; -@@ -1486,11 +1570,15 @@ lsb_get_metadata(const char *type, char **output) - char *xml_l_dscrpt = NULL; - GString *l_dscrpt = NULL; - -- snprintf(ra_pathname, sizeof(ra_pathname), "%s%s%s", -- type[0] == '/' ? "" : LSB_ROOT_DIR, type[0] == '/' ? "" : "/", type); -+ if(type[0] == '/') { -+ snprintf(ra_pathname, sizeof(ra_pathname), "%s", type); -+ } else { -+ snprintf(ra_pathname, sizeof(ra_pathname), "%s/%s", LSB_ROOT_DIR, type); -+ } - -+ crm_trace("Looking into %s", ra_pathname); - if (!(fp = fopen(ra_pathname, "r"))) { -- return -EIO; -+ return -errno; - } - - /* Enter into the lsb-compliant comment block */ -@@ -1559,6 +1647,7 @@ lsb_get_metadata(const char *type, char **output) - *output = strdup(meta_data->str); - g_string_free(meta_data, TRUE); - -+ crm_trace("Created fake metadata: %d", strlen(*output)); - return pcmk_ok; - } - -@@ -1874,13 +1963,19 @@ lrmd_remote_api_new(const char *nodename, const char *server, int port) - lrmd_private_t *native = new_lrmd->private; - - if (!nodename && !server) { -+ lrmd_api_delete(new_lrmd); - return NULL; - } - - native->type = CRM_CLIENT_TLS; - native->remote_nodename = nodename ? strdup(nodename) : strdup(server); - native->server = server ? strdup(server) : strdup(nodename); -- native->port = port ? port : DEFAULT_REMOTE_PORT; -+ native->port = port; -+ if (native->port == 0) { -+ const char *remote_port_str = getenv("PCMK_remote_port"); -+ native->port = remote_port_str ? atoi(remote_port_str) : DEFAULT_REMOTE_PORT; -+ } -+ - return new_lrmd; - #else - crm_err("GNUTLS is not enabled for this build, remote LRMD client can not be created"); -@@ -1906,6 +2001,12 @@ lrmd_api_delete(lrmd_t * lrmd) - free(native->remote_nodename); - free(native->remote); - } -+ -+ if (stonith_api) { -+ stonith_api->cmds->free(stonith_api); -+ stonith_api = NULL; -+ } -+ - free(lrmd->private); - free(lrmd); - } -diff --git a/lib/pengine/Makefile.am b/lib/pengine/Makefile.am -index cd5e2b4..6c0bb32 100644 ---- a/lib/pengine/Makefile.am -+++ b/lib/pengine/Makefile.am -@@ -26,12 +26,13 @@ lib_LTLIBRARIES = libpe_rules.la libpe_status.la - ## SOURCES - noinst_HEADERS = unpack.h variant.h - --libpe_rules_la_LDFLAGS = -version-info 2:1:0 -+libpe_rules_la_LDFLAGS = -version-info 2:2:0 - libpe_rules_la_SOURCES = rules.c common.c -+libpe_rules_la_LIBADD = $(top_builddir)/lib/common/libcrmcommon.la - --libpe_status_la_LDFLAGS = -version-info 4:1:0 -+libpe_status_la_LDFLAGS = -version-info 6:0:2 - libpe_status_la_SOURCES = status.c unpack.c utils.c complex.c native.c group.c clone.c rules.c common.c --libpe_status_la_LIBADD = @CURSESLIBS@ -+libpe_status_la_LIBADD = @CURSESLIBS@ $(top_builddir)/lib/common/libcrmcommon.la - - clean-generic: - rm -f *.log *.debug *~ -diff --git a/lib/pengine/clone.c b/lib/pengine/clone.c -index 2ff092e..38ff23a 100644 ---- a/lib/pengine/clone.c -+++ b/lib/pengine/clone.c -@@ -206,7 +206,7 @@ clone_unpack(resource_t * rsc, pe_working_set_t * data_set) - clone_data->ordered = TRUE; - } - if ((rsc->flags & pe_rsc_unique) == 0 && clone_data->clone_node_max > 1) { -- crm_config_err("Anonymous clones (%s) may only support one copy" " per node", rsc->id); -+ crm_config_err("Anonymous clones (%s) may only support one copy per node", rsc->id); - clone_data->clone_node_max = 1; - } - -@@ -510,6 +510,32 @@ clone_print(resource_t * rsc, const char *pre_text, long options, void *print_da - list_text = NULL; - - /* Stopped */ -+ if(is_not_set(rsc->flags, pe_rsc_unique)) { -+ -+ GListPtr nIter; -+ GListPtr list = g_hash_table_get_values(rsc->allowed_nodes); -+ -+ /* Custom stopped list for non-unique clones */ -+ free(stopped_list); stopped_list = NULL; -+ -+ if(g_list_length(list) == 0) { -+ /* Clusters with symmetrical=false haven't calculated allowed_nodes yet -+ * If we've not probed for them yet, the Stopped list will be empty -+ */ -+ list = g_hash_table_get_values(rsc->known_on); -+ } -+ -+ list = g_list_sort(list, sort_node_uname); -+ for (nIter = list; nIter != NULL; nIter = nIter->next) { -+ node_t *node = (node_t *)nIter->data; -+ -+ if(pe_find_node(rsc->running_on, node->details->uname) == NULL) { -+ stopped_list = add_list_element(stopped_list, node->details->uname); -+ } -+ } -+ g_list_free(list); -+ } -+ - short_print(stopped_list, child_text, "Stopped", options, print_data); - free(stopped_list); - -diff --git a/lib/pengine/complex.c b/lib/pengine/complex.c -index b13d511..9110585 100644 ---- a/lib/pengine/complex.c -+++ b/lib/pengine/complex.c -@@ -22,7 +22,6 @@ - #include - #include - --extern xmlNode *get_object_root(const char *object_type, xmlNode * the_root); - void populate_hash(xmlNode * nvpair_list, GHashTable * hash, const char **attrs, int attrs_length); - - resource_object_functions_t resource_class_functions[] = { -@@ -220,9 +219,9 @@ unpack_template(xmlNode * xml_obj, xmlNode ** expanded_xml, pe_working_set_t * d - return FALSE; - } - -- cib_resources = get_object_root(XML_CIB_TAG_RESOURCES, data_set->input); -+ cib_resources = get_xpath_object("//"XML_CIB_TAG_RESOURCES, data_set->input, LOG_TRACE); - if (cib_resources == NULL) { -- pe_err("Cannot get the root of object '%s'", XML_CIB_TAG_RESOURCES); -+ pe_err("No resources configured"); - return FALSE; - } - -diff --git a/lib/pengine/native.c b/lib/pengine/native.c -index ad73f25..110c210 100644 ---- a/lib/pengine/native.c -+++ b/lib/pengine/native.c -@@ -1,16 +1,16 @@ --/* -+/* - * Copyright (C) 2004 Andrew Beekhof -- * -+ * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. -- * -+ * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. -- * -+ * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -@@ -34,7 +34,6 @@ native_add_running(resource_t * rsc, node_t * node, pe_working_set_t * data_set) - GListPtr gIter = rsc->running_on; - - CRM_CHECK(node != NULL, return); -- - for (; gIter != NULL; gIter = gIter->next) { - node_t *a_node = (node_t *) gIter->data; - -@@ -44,7 +43,8 @@ native_add_running(resource_t * rsc, node_t * node, pe_working_set_t * data_set) - } - } - -- pe_rsc_trace(rsc, "Adding %s to %s", rsc->id, node->details->uname); -+ pe_rsc_trace(rsc, "Adding %s to %s %s", rsc->id, node->details->uname, -+ is_set(rsc->flags, pe_rsc_managed)?"":"(unmanaged)"); - - rsc->running_on = g_list_append(rsc->running_on, node); - if (rsc->variant == pe_native) { -@@ -52,8 +52,16 @@ native_add_running(resource_t * rsc, node_t * node, pe_working_set_t * data_set) - } - - if (is_not_set(rsc->flags, pe_rsc_managed)) { -+ resource_t *p = rsc->parent; -+ - pe_rsc_info(rsc, "resource %s isnt managed", rsc->id); - resource_location(rsc, node, INFINITY, "not_managed_default", data_set); -+ -+ while(p && node->details->online) { -+ /* add without the additional location constraint */ -+ p->running_on = g_list_append(p->running_on, node); -+ p = p->parent; -+ } - return; - } - -@@ -287,7 +295,7 @@ native_print_xml(resource_t * rsc, const char *pre_text, long options, void *pri - - /* resource information. */ - status_print("%sid); -+ status_print("id=\"%s\" ", rsc_printable_id(rsc)); - status_print("resource_agent=\"%s%s%s:%s\" ", - class, - prov ? "::" : "", prov ? prov : "", crm_element_value(rsc->xml, XML_ATTR_TYPE)); -@@ -329,12 +337,19 @@ native_print_xml(resource_t * rsc, const char *pre_text, long options, void *pri - } - } - -+ - void - native_print(resource_t * rsc, const char *pre_text, long options, void *print_data) - { - node_t *node = NULL; -- const char *prov = NULL; - const char *class = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS); -+ const char *kind = crm_element_value(rsc->xml, XML_ATTR_TYPE); -+ -+ int offset = 0; -+ char buffer[LINE_MAX]; -+ -+ CRM_ASSERT(rsc->variant == pe_native); -+ CRM_ASSERT(kind != NULL); - - if (rsc->meta) { - const char *is_internal = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_INTERNAL_RSC); -@@ -353,13 +368,12 @@ native_print(resource_t * rsc, const char *pre_text, long options, void *print_d - return; - } - -- if (safe_str_eq(class, "ocf")) { -- prov = crm_element_value(rsc->xml, XML_AGENT_ATTR_PROVIDER); -- } -- - if (rsc->running_on != NULL) { - node = rsc->running_on->data; - } -+ if ((options & pe_print_rsconly) || g_list_length(rsc->running_on) > 1) { -+ node = NULL; -+ } - - if (options & pe_print_html) { - if (is_not_set(rsc->flags, pe_rsc_managed)) { -@@ -382,40 +396,51 @@ native_print(resource_t * rsc, const char *pre_text, long options, void *print_d - } - } - -+ if(pre_text) { -+ offset += snprintf(buffer + offset, LINE_MAX - offset, "%s", pre_text); -+ } -+ offset += snprintf(buffer + offset, LINE_MAX - offset, "%s", rsc_printable_id(rsc)); -+ offset += snprintf(buffer + offset, LINE_MAX - offset, "\t(%s", class); -+ if (safe_str_eq(class, "ocf")) { -+ const char *prov = crm_element_value(rsc->xml, XML_AGENT_ATTR_PROVIDER); -+ offset += snprintf(buffer + offset, LINE_MAX - offset, "::%s", prov); -+ } -+ offset += snprintf(buffer + offset, LINE_MAX - offset, ":%s):\t", kind); -+ if(is_set(rsc->flags, pe_rsc_orphan)) { -+ offset += snprintf(buffer + offset, LINE_MAX - offset, " ORPHANED "); -+ } -+ offset += snprintf(buffer + offset, LINE_MAX - offset, "%s ", role2text(rsc->role)); -+ if(node) { -+ offset += snprintf(buffer + offset, LINE_MAX - offset, "%s ", node->details->uname); -+ } -+ if(is_not_set(rsc->flags, pe_rsc_managed)) { -+ offset += snprintf(buffer + offset, LINE_MAX - offset, "(unmanaged) "); -+ } -+ if(is_set(rsc->flags, pe_rsc_failed)) { -+ offset += snprintf(buffer + offset, LINE_MAX - offset, "FAILED "); -+ } -+ if(is_set(rsc->flags, pe_rsc_failure_ignored)) { -+ offset += snprintf(buffer + offset, LINE_MAX - offset, "(failure ignored)"); -+ } -+ - if ((options & pe_print_rsconly) || g_list_length(rsc->running_on) > 1) { -- const char *desc = NULL; -- -- desc = crm_element_value(rsc->xml, XML_ATTR_DESC); -- status_print("%s%s\t(%s%s%s:%s%s):\t%s %s%s%s%s%s", -- pre_text ? pre_text : "", rsc->id, -- class, prov ? "::" : "", prov ? prov : "", -- crm_element_value(rsc->xml, XML_ATTR_TYPE), -- is_set(rsc->flags, pe_rsc_orphan) ? " ORPHANED" : "", -- (rsc->variant != pe_native) ? "" : role2text(rsc->role), -- is_set(rsc->flags, pe_rsc_managed) ? "" : "(unmanaged) ", -- is_set(rsc->flags, pe_rsc_failed) ? "FAILED " : "", -- is_set(rsc->flags, pe_rsc_failure_ignored) ? "(failure ignored) " : "", -- desc ? ": " : "", desc ? desc : ""); -+ const char *desc = crm_element_value(rsc->xml, XML_ATTR_DESC); -+ if(desc) { -+ offset += snprintf(buffer + offset, LINE_MAX - offset, "%s", desc); -+ } -+ } - -- } else { -- status_print("%s%s\t(%s%s%s:%s):\t%s%s %s%s%s%s", -- pre_text ? pre_text : "", rsc->id, -- class, prov ? "::" : "", prov ? prov : "", -- crm_element_value(rsc->xml, XML_ATTR_TYPE), -- is_set(rsc->flags, pe_rsc_orphan) ? " ORPHANED " : "", -- (rsc->variant != pe_native) ? "" : role2text(rsc->role), -- (rsc->variant != pe_native) ? "" : node != NULL ? node->details->uname : "", -- is_set(rsc->flags, pe_rsc_managed) ? "" : " (unmanaged)", -- is_set(rsc->flags, pe_rsc_failed) ? " FAILED" : "", -- is_set(rsc->flags, pe_rsc_failure_ignored) ? " (failure ignored)" : ""); -+ status_print("%s", buffer); - - #if CURSES_ENABLED -- if (options & pe_print_ncurses) { -- /* coverity[negative_returns] False positive */ -- move(-1, 0); -- } --#endif -+ if ((options & pe_print_rsconly) || g_list_length(rsc->running_on) > 1) { -+ /* Done */ -+ -+ } else if (options & pe_print_ncurses) { -+ /* coverity[negative_returns] False positive */ -+ move(-1, 0); - } -+#endif - - if (options & pe_print_html) { - status_print(" "); -diff --git a/lib/pengine/status.c b/lib/pengine/status.c -index f0449de..bb9dfcb 100644 ---- a/lib/pengine/status.c -+++ b/lib/pengine/status.c -@@ -1,16 +1,16 @@ --/* -+/* - * Copyright (C) 2004 Andrew Beekhof -- * -+ * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. -- * -+ * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. -- * -+ * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -@@ -29,14 +29,12 @@ - #include - #include - --extern xmlNode *get_object_root(const char *object_type, xmlNode * the_root); -- - #define MEMCHECK_STAGE_0 0 - - #define check_and_exit(stage) cleanup_calculations(data_set); \ - crm_mem_stats(NULL); \ - crm_err("Exiting: stage %d", stage); \ -- crm_exit(1); -+ crm_exit(pcmk_err_generic); - - /* - * Unpack everything -@@ -52,11 +50,11 @@ extern xmlNode *get_object_root(const char *object_type, xmlNode * the_root); - gboolean - cluster_status(pe_working_set_t * data_set) - { -- xmlNode *config = get_object_root(XML_CIB_TAG_CRMCONFIG, data_set->input); -- xmlNode *cib_nodes = get_object_root(XML_CIB_TAG_NODES, data_set->input); -- xmlNode *cib_resources = get_object_root(XML_CIB_TAG_RESOURCES, data_set->input); -- xmlNode *cib_status = get_object_root(XML_CIB_TAG_STATUS, data_set->input); -- xmlNode *cib_domains = get_object_root(XML_CIB_TAG_DOMAINS, data_set->input); -+ xmlNode *config = get_xpath_object("//"XML_CIB_TAG_CRMCONFIG, data_set->input, LOG_TRACE); -+ xmlNode *cib_nodes = get_xpath_object("//"XML_CIB_TAG_NODES, data_set->input, LOG_TRACE); -+ xmlNode *cib_resources = get_xpath_object("//"XML_CIB_TAG_RESOURCES, data_set->input, LOG_TRACE); -+ xmlNode *cib_status = get_xpath_object("//"XML_CIB_TAG_STATUS, data_set->input, LOG_TRACE); -+ xmlNode *cib_domains = get_xpath_object("//"XML_CIB_TAG_DOMAINS, data_set->input, LOG_TRACE); - const char *value = crm_element_value(data_set->input, XML_ATTR_HAVE_QUORUM); - - crm_trace("Beginning unpack"); -@@ -73,7 +71,9 @@ cluster_status(pe_working_set_t * data_set) - data_set->now = crm_time_new(NULL); - } - -- if (data_set->input != NULL && crm_element_value(data_set->input, XML_ATTR_DC_UUID) != NULL) { -+ if (data_set->dc_uuid == NULL -+ && data_set->input != NULL -+ && crm_element_value(data_set->input, XML_ATTR_DC_UUID) != NULL) { - /* this should always be present */ - data_set->dc_uuid = crm_element_value_copy(data_set->input, XML_ATTR_DC_UUID); - } -@@ -83,21 +83,29 @@ cluster_status(pe_working_set_t * data_set) - set_bit(data_set->flags, pe_flag_have_quorum); - } - -- data_set->op_defaults = get_object_root(XML_CIB_TAG_OPCONFIG, data_set->input); -- data_set->rsc_defaults = get_object_root(XML_CIB_TAG_RSCCONFIG, data_set->input); -+ data_set->op_defaults = get_xpath_object("//"XML_CIB_TAG_OPCONFIG, data_set->input, LOG_TRACE); -+ data_set->rsc_defaults = get_xpath_object("//"XML_CIB_TAG_RSCCONFIG, data_set->input, LOG_TRACE); - - unpack_config(config, data_set); - -- if (is_set(data_set->flags, pe_flag_have_quorum) == FALSE -- && data_set->no_quorum_policy != no_quorum_ignore) { -- crm_warn("We do not have quorum" " - fencing and resource management disabled"); -+ if (is_not_set(data_set->flags, pe_flag_quick_location) -+ && is_not_set(data_set->flags, pe_flag_have_quorum) -+ && data_set->no_quorum_policy != no_quorum_ignore) { -+ crm_warn("We do not have quorum - fencing and resource management disabled"); - } - - unpack_nodes(cib_nodes, data_set); - unpack_domains(cib_domains, data_set); -- unpack_remote_nodes(cib_resources, data_set); -+ -+ if(is_not_set(data_set->flags, pe_flag_quick_location)) { -+ unpack_remote_nodes(cib_resources, data_set); -+ } -+ - unpack_resources(cib_resources, data_set); -- unpack_status(cib_status, data_set); -+ -+ if(is_not_set(data_set->flags, pe_flag_quick_location)) { -+ unpack_status(cib_status, data_set); -+ } - - set_bit(data_set->flags, pe_flag_have_status); - return TRUE; -@@ -225,6 +233,7 @@ set_working_set_defaults(pe_working_set_t * data_set) - pe_dataset = data_set; - memset(data_set, 0, sizeof(pe_working_set_t)); - -+ data_set->dc_uuid = NULL; - data_set->order_id = 1; - data_set->action_id = 1; - data_set->no_quorum_policy = no_quorum_freeze; -diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c -index 4257579..7216545 100644 ---- a/lib/pengine/unpack.c -+++ b/lib/pengine/unpack.c -@@ -61,7 +61,7 @@ pe_fence_node(pe_working_set_t * data_set, node_t * node, const char *reason) - set_bit(rsc->flags, pe_rsc_failed); - } - } else if (node->details->unclean == FALSE) { -- if (is_set(data_set->flags, pe_flag_stonith_enabled)) { -+ if(pe_can_fence(data_set, node)) { - crm_warn("Node %s will be fenced %s", node->details->uname, reason); - } else { - crm_warn("Node %s is unclean %s", node->details->uname, reason); -@@ -85,8 +85,9 @@ unpack_config(xmlNode * config, pe_working_set_t * data_set) - verify_pe_options(data_set->config_hash); - - set_config_flag(data_set, "enable-startup-probes", pe_flag_startup_probes); -- crm_info("Startup probes: %s", -- is_set(data_set->flags, pe_flag_startup_probes) ? "enabled" : "disabled (dangerous)"); -+ if(is_not_set(data_set->flags, pe_flag_startup_probes)) { -+ crm_info("Startup probes: disabled (dangerous)"); -+ } - - value = pe_pref(data_set->config_hash, "stonith-timeout"); - data_set->stonith_timeout = crm_get_msec(value); -@@ -190,7 +191,7 @@ unpack_config(xmlNode * config, pe_working_set_t * data_set) - node_score_green = char2score(pe_pref(data_set->config_hash, "node-health-green")); - node_score_yellow = char2score(pe_pref(data_set->config_hash, "node-health-yellow")); - -- crm_info("Node scores: 'red' = %s, 'yellow' = %s, 'green' = %s", -+ crm_debug("Node scores: 'red' = %s, 'yellow' = %s, 'green' = %s", - pe_pref(data_set->config_hash, "node-health-red"), - pe_pref(data_set->config_hash, "node-health-yellow"), - pe_pref(data_set->config_hash, "node-health-green")); -@@ -292,6 +293,7 @@ expand_remote_rsc_meta(xmlNode *xml_obj, xmlNode *parent, GHashTable **rsc_name_ - const char *remote_name = NULL; - const char *remote_server = NULL; - const char *remote_port = NULL; -+ const char *connect_timeout = "60s"; - char *tmp_id = NULL; - - for (attr_set = __xml_first_child(xml_obj); attr_set != NULL; attr_set = __xml_next(attr_set)) { -@@ -309,6 +311,8 @@ expand_remote_rsc_meta(xmlNode *xml_obj, xmlNode *parent, GHashTable **rsc_name_ - remote_server = value; - } else if (safe_str_eq(name, "remote-port")) { - remote_port = value; -+ } else if (safe_str_eq(name, "remote-connect-timeout")) { -+ connect_timeout = value; - } - } - } -@@ -369,6 +373,16 @@ expand_remote_rsc_meta(xmlNode *xml_obj, xmlNode *parent, GHashTable **rsc_name_ - crm_xml_add(attr, XML_NVPAIR_ATTR_NAME, "monitor"); - free(tmp_id); - -+ if (connect_timeout) { -+ attr = create_xml_node(xml_tmp, XML_ATTR_OP); -+ tmp_id = crm_concat(remote_name, "start-interval-0", '_'); -+ crm_xml_add(attr, XML_ATTR_ID, tmp_id); -+ crm_xml_add(attr, XML_ATTR_TIMEOUT, connect_timeout); -+ crm_xml_add(attr, XML_LRM_ATTR_INTERVAL, "0"); -+ crm_xml_add(attr, XML_NVPAIR_ATTR_NAME, "start"); -+ free(tmp_id); -+ } -+ - if (remote_port || remote_server) { - xml_tmp = create_xml_node(xml_rsc, XML_TAG_ATTR_SETS); - tmp_id = crm_concat(remote_name, XML_TAG_ATTR_SETS, '_'); -@@ -459,6 +473,11 @@ unpack_nodes(xmlNode * xml_nodes, pe_working_set_t * data_set) - } - } - -+ if (data_set->localhost && pe_find_node(data_set->nodes, data_set->localhost) == NULL) { -+ crm_info("Creating a fake local node"); -+ create_node(data_set->localhost, data_set->localhost, NULL, 0, data_set); -+ } -+ - return TRUE; - } - -@@ -478,7 +497,7 @@ unpack_domains(xmlNode * xml_domains, pe_working_set_t * data_set) - xmlNode *xml_node = NULL; - xmlNode *xml_domain = NULL; - -- crm_info("Unpacking domains"); -+ crm_debug("Unpacking domains"); - data_set->domains = - g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, - g_hash_destroy_node_list); -@@ -562,12 +581,6 @@ setup_container(resource_t * rsc, pe_working_set_t * data_set) - rsc->container = container; - container->fillers = g_list_append(container->fillers, rsc); - pe_rsc_trace(rsc, "Resource %s's container is %s", rsc->id, container_id); -- if (rsc->is_remote_node) { -- node_t *node = g_hash_table_lookup(container->allowed_nodes, rsc->id); -- if (node) { -- node->weight = -INFINITY; -- } -- } - } else { - pe_err("Resource %s: Unknown resource container (%s)", rsc->id, container_id); - } -@@ -665,7 +678,8 @@ unpack_resources(xmlNode * xml_resources, pe_working_set_t * data_set) - - data_set->resources = g_list_sort(data_set->resources, sort_rsc_priority); - -- if (is_set(data_set->flags, pe_flag_stonith_enabled) -+ if (is_not_set(data_set->flags, pe_flag_quick_location) -+ && is_set(data_set->flags, pe_flag_stonith_enabled) - && is_set(data_set->flags, pe_flag_have_stonith_resource) == FALSE) { - crm_config_err("Resource start-up disabled since no STONITH resources have been defined"); - crm_config_err("Either configure some or disable STONITH with the stonith-enabled option"); -@@ -1464,7 +1478,7 @@ find_anonymous_clone(pe_working_set_t * data_set, node_t * node, resource_t * pa - } - - if (safe_str_neq(rsc_id, rsc->id)) { -- pe_rsc_info(rsc, "Internally renamed %s on %s to %s%s", -+ pe_rsc_debug(rsc, "Internally renamed %s on %s to %s%s", - rsc_id, node->details->uname, rsc->id, - is_set(rsc->flags, pe_rsc_orphan) ? " (ORPHAN)" : ""); - } -@@ -1644,7 +1658,7 @@ process_rsc_state(resource_t * rsc, node_t * node, - /* treat it as if it is still running - * but also mark the node as unclean - */ -- pe_fence_node(data_set, node, "to recover from resource failure(s)"); -+ pe_fence_node(data_set, node, "because of resource failure(s)"); - break; - - case action_fail_standby: -@@ -2021,7 +2035,7 @@ unpack_rsc_op(resource_t * rsc, node_t * node, xmlNode * xml_op, - int task_status_i = -2; - int actual_rc_i = 0; - int target_rc = -1; -- int last_failure = 0; -+ time_t last_failure = 0; - int clear_failcount = 0; - - action_t *action = NULL; -@@ -2044,6 +2058,7 @@ unpack_rsc_op(resource_t * rsc, node_t * node, xmlNode * xml_op, - op_version = crm_element_value(xml_op, XML_ATTR_CRM_VERSION); - magic = crm_element_value(xml_op, XML_ATTR_TRANSITION_MAGIC); - key = crm_element_value(xml_op, XML_ATTR_TRANSITION_KEY); -+ actual_rc = crm_element_value(xml_op, XML_LRM_ATTR_RC); - - crm_element_value_int(xml_op, XML_LRM_ATTR_CALLID, &task_id); - -@@ -2064,8 +2079,8 @@ unpack_rsc_op(resource_t * rsc, node_t * node, xmlNode * xml_op, - if (rsc->failure_timeout > 0) { - int last_run = 0; - -- if (crm_element_value_int(xml_op, "last-rc-change", &last_run) == 0) { -- time_t now = get_timet_now(data_set); -+ if (crm_element_value_int(xml_op, XML_RSC_OP_LAST_CHANGE, &last_run) == 0) { -+ time_t now = get_effective_time(data_set); - - if (now > (last_run + rsc->failure_timeout)) { - expired = TRUE; -@@ -2073,8 +2088,8 @@ unpack_rsc_op(resource_t * rsc, node_t * node, xmlNode * xml_op, - } - } - -- pe_rsc_trace(rsc, "Unpacking task %s/%s (call_id=%d, status=%s) on %s (role=%s)", -- id, task, task_id, task_status, node->details->uname, role2text(rsc->role)); -+ pe_rsc_trace(rsc, "Unpacking task %s/%s (call_id=%d, status=%s, rc=%s) on %s (role=%s)", -+ id, task, task_id, task_status, actual_rc, node->details->uname, role2text(rsc->role)); - - interval_s = crm_element_value(xml_op, XML_LRM_ATTR_INTERVAL); - interval = crm_parse_int(interval_s, "0"); -@@ -2089,7 +2104,6 @@ unpack_rsc_op(resource_t * rsc, node_t * node, xmlNode * xml_op, - node->details->uname, rsc->id); - } - -- actual_rc = crm_element_value(xml_op, XML_LRM_ATTR_RC); - CRM_CHECK(actual_rc != NULL, return FALSE); - actual_rc_i = crm_parse_int(actual_rc, NULL); - -@@ -2123,13 +2137,14 @@ unpack_rsc_op(resource_t * rsc, node_t * node, xmlNode * xml_op, - } - - if (expired) { -- int fc = get_failcount(node, rsc, &last_failure, data_set); -- -- if (rsc->failure_timeout > 0 && last_failure > 0 && fc == 0) { -- -- clear_failcount = 1; -- crm_notice("Clearing expired failcount for %s on %s", rsc->id, node->details->uname); -+ if (rsc->failure_timeout > 0) { -+ int fc = get_failcount_full(node, rsc, &last_failure, FALSE, data_set); -+ if(fc && get_failcount_full(node, rsc, &last_failure, TRUE, data_set) == 0) { -+ clear_failcount = 1; -+ crm_notice("Clearing expired failcount for %s on %s", rsc->id, node->details->uname); -+ } - } -+ - } else if (strstr(id, "last_failure") && - ((strcmp(task, "start") == 0) || (strcmp(task, "monitor") == 0))) { - -@@ -2159,8 +2174,16 @@ unpack_rsc_op(resource_t * rsc, node_t * node, xmlNode * xml_op, - if (expired - && actual_rc_i != PCMK_EXECRA_NOT_RUNNING - && actual_rc_i != PCMK_EXECRA_RUNNING_MASTER && actual_rc_i != PCMK_EXECRA_OK) { -- crm_notice("Ignoring expired failure %s (rc=%d, magic=%s) on %s", -- id, actual_rc_i, magic, node->details->uname); -+ if(interval == 0) { -+ crm_notice("Ignoring expired failure %s (rc=%d, magic=%s) on %s", -+ id, actual_rc_i, magic, node->details->uname); -+ -+ } else if(node->details->online && node->details->unclean == FALSE) { -+ crm_notice("Re-initiated expired failure %s (rc=%d, magic=%s) on %s", -+ id, actual_rc_i, magic, node->details->uname); -+ /* This is SO horrible, but we don't have access to CancelXmlOp() yet */ -+ crm_xml_add(xml_op, XML_LRM_ATTR_RESTART_DIGEST, "failure-timeout"); -+ } - goto done; - } - -@@ -2221,6 +2244,14 @@ unpack_rsc_op(resource_t * rsc, node_t * node, xmlNode * xml_op, - case PCMK_EXECRA_NOT_INSTALLED: - case PCMK_EXECRA_INVALID_PARAM: - effective_node = node; -+ if(pe_can_fence(data_set, node) == FALSE -+ && safe_str_eq(task, CRMD_ACTION_STOP)) { -+ /* If a stop fails and we can't fence, there's nothing else we can do */ -+ pe_proc_err("No further recovery can be attempted for %s: %s action failed with '%s' (%d)", -+ rsc->id, task, lrmd_event_rc2str(actual_rc_i), actual_rc_i); -+ clear_bit(rsc->flags, pe_rsc_managed); -+ set_bit(rsc->flags, pe_rsc_block); -+ } - /* fall through */ - case PCMK_EXECRA_NOT_CONFIGURED: - failed = rsc; -@@ -2280,8 +2311,16 @@ unpack_rsc_op(resource_t * rsc, node_t * node, xmlNode * xml_op, - - action = custom_action(rsc, strdup(action_key), task, NULL, TRUE, FALSE, data_set); - if (expired) { -- crm_notice("Ignoring expired failure (calculated) %s (rc=%d, magic=%s) on %s", -- id, actual_rc_i, magic, node->details->uname); -+ if(interval == 0) { -+ crm_notice("Ignoring expired calculated failure %s (rc=%d, magic=%s) on %s", -+ id, actual_rc_i, magic, node->details->uname); -+ -+ } else if(node->details->online && node->details->unclean == FALSE) { -+ crm_notice("Re-initiated expired calculated failure %s (rc=%d, magic=%s) on %s", -+ id, actual_rc_i, magic, node->details->uname); -+ /* This is SO horrible, but we don't have access to CancelXmlOp() yet */ -+ crm_xml_add(xml_op, XML_LRM_ATTR_RESTART_DIGEST, "calculated-failure-timeout"); -+ } - goto done; - - } else if ((action->on_fail == action_fail_ignore) || -@@ -2329,6 +2368,12 @@ unpack_rsc_op(resource_t * rsc, node_t * node, xmlNode * xml_op, - if (actual_rc_i == PCMK_EXECRA_NOT_RUNNING) { - clear_past_failure = TRUE; - -+ } else if (safe_str_eq(task, CRMD_ACTION_STATUS)) { -+ clear_past_failure = TRUE; -+ if (rsc->role < RSC_ROLE_STARTED) { -+ set_active(rsc); -+ } -+ - } else if (safe_str_eq(task, CRMD_ACTION_START)) { - rsc->role = RSC_ROLE_STARTED; - clear_past_failure = TRUE; -@@ -2436,7 +2481,7 @@ unpack_rsc_op(resource_t * rsc, node_t * node, xmlNode * xml_op, - } - - } else if (rsc->role < RSC_ROLE_STARTED) { -- /* start, migrate_to and migrate_from will land here */ -+ /* migrate_to and migrate_from will land here */ - pe_rsc_trace(rsc, "%s active on %s", rsc->id, node->details->uname); - set_active(rsc); - } -@@ -2444,7 +2489,6 @@ unpack_rsc_op(resource_t * rsc, node_t * node, xmlNode * xml_op, - /* clear any previous failure actions */ - if (clear_past_failure) { - switch (*on_fail) { -- case action_fail_block: - case action_fail_stop: - case action_fail_fence: - case action_fail_migrate: -@@ -2453,6 +2497,7 @@ unpack_rsc_op(resource_t * rsc, node_t * node, xmlNode * xml_op, - rsc->id, fail2text(*on_fail)); - break; - -+ case action_fail_block: - case action_fail_ignore: - case action_fail_recover: - *on_fail = action_fail_ignore; -@@ -2571,9 +2616,14 @@ unpack_rsc_op(resource_t * rsc, node_t * node, xmlNode * xml_op, - * setting role=slave is not dangerous because no master will be - * promoted until the failed resource has been fully stopped - */ -- crm_warn("Forcing %s to stop after a failed demote action", rsc->id); - rsc->next_role = RSC_ROLE_STOPPED; -- rsc->role = RSC_ROLE_SLAVE; -+ if (action->on_fail == action_fail_block) { -+ rsc->role = RSC_ROLE_MASTER; -+ -+ } else { -+ crm_warn("Forcing %s to stop after a failed demote action", rsc->id); -+ rsc->role = RSC_ROLE_SLAVE; -+ } - - } else if (compare_version("2.0", op_version) > 0 - && safe_str_eq(task, CRMD_ACTION_START)) { -diff --git a/lib/pengine/utils.c b/lib/pengine/utils.c -index 57236ff..1e3877d 100644 ---- a/lib/pengine/utils.c -+++ b/lib/pengine/utils.c -@@ -1,16 +1,16 @@ --/* -+/* - * Copyright (C) 2004 Andrew Beekhof -- * -+ * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. -- * -+ * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. -- * -+ * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -@@ -36,6 +36,32 @@ void unpack_operation(action_t * action, xmlNode * xml_obj, resource_t * contain - static xmlNode *find_rsc_op_entry_helper(resource_t * rsc, const char *key, - gboolean include_disabled); - -+bool pe_can_fence(pe_working_set_t * data_set, node_t *node) -+{ -+ if(is_not_set(data_set->flags, pe_flag_stonith_enabled)) { -+ return FALSE; /* Turned off */ -+ -+ } else if (is_not_set(data_set->flags, pe_flag_have_stonith_resource)) { -+ return FALSE; /* No devices */ -+ -+ } else if (is_set(data_set->flags, pe_flag_have_quorum)) { -+ return TRUE; -+ -+ } else if (data_set->no_quorum_policy == no_quorum_ignore) { -+ return TRUE; -+ -+ } else if(node == NULL) { -+ return FALSE; -+ -+ } else if(node->details->online) { -+ crm_notice("We can fence %s without quorum because they're in our membership", node->details->uname); -+ return TRUE; -+ } -+ -+ crm_trace("Cannot fence %s", node->details->uname); -+ return FALSE; -+} -+ - node_t * - node_copy(node_t * this_node) - { -@@ -385,7 +411,7 @@ custom_action(resource_t * rsc, char *key, const char *task, - Implied by calloc()... - action->actions_before = NULL; - action->actions_after = NULL; -- -+ - action->pseudo = FALSE; - action->dumped = FALSE; - action->processed = FALSE; -@@ -1281,9 +1307,10 @@ sort_op_by_callid(gconstpointer a, gconstpointer b) - int last_a = -1; - int last_b = -1; - -- crm_element_value_const_int(xml_a, "last-rc-change", &last_a); -- crm_element_value_const_int(xml_b, "last-rc-change", &last_b); -+ crm_element_value_const_int(xml_a, XML_RSC_OP_LAST_CHANGE, &last_a); -+ crm_element_value_const_int(xml_b, XML_RSC_OP_LAST_CHANGE, &last_b); - -+ crm_trace("rc-change: %d vs %d", last_a, last_b); - if (last_a >= 0 && last_a < last_b) { - sort_return(-1, "rc-change"); - -@@ -1347,25 +1374,18 @@ sort_op_by_callid(gconstpointer a, gconstpointer b) - } - - time_t --get_timet_now(pe_working_set_t * data_set) -+get_effective_time(pe_working_set_t * data_set) - { -- time_t now = 0; -- -- /* if (data_set && data_set->now) { */ -- /* now = data_set->now->tm_now; */ -- /* } */ -- -- if (now == 0) { -- /* eventually we should convert data_set->now into time_tm -- * for now, its only triggered by PE regression tests -- */ -- now = time(NULL); -- crm_crit("Defaulting to 'now'"); -- /* if (data_set && data_set->now) { */ -- /* data_set->now->tm_now = now; */ -- /* } */ -+ if(data_set) { -+ if (data_set->now == NULL) { -+ crm_trace("Recording a new 'now'"); -+ data_set->now = crm_time_new(NULL); -+ } -+ return crm_time_get_seconds_since_epoch(data_set->now); - } -- return now; -+ -+ crm_trace("Defaulting to 'now'"); -+ return time(NULL); - } - - struct fail_search { -@@ -1395,7 +1415,13 @@ get_failcount_by_prefix(gpointer key_p, gpointer value, gpointer user_data) - } - - int --get_failcount(node_t * node, resource_t * rsc, int *last_failure, pe_working_set_t * data_set) -+get_failcount(node_t * node, resource_t * rsc, time_t *last_failure, pe_working_set_t * data_set) -+{ -+ return get_failcount_full(node, rsc, last_failure, TRUE, data_set); -+} -+ -+int -+get_failcount_full(node_t * node, resource_t * rsc, time_t *last_failure, bool effective, pe_working_set_t * data_set) - { - char *key = NULL; - const char *value = NULL; -@@ -1429,9 +1455,32 @@ get_failcount(node_t * node, resource_t * rsc, int *last_failure, pe_working_set - *last_failure = search.last; - } - -- if (search.count != 0 && search.last != 0 && rsc->failure_timeout) { -+ if(search.count && rsc->failure_timeout) { -+ /* Never time-out if blocking failures are configured */ -+ char *xml_name = clone_strip(rsc->id); -+ char *xpath = g_strdup_printf("//primitive[@id='%s']//op[@on-fail='block']", xml_name); -+ xmlXPathObject *xpathObj = xpath_search(rsc->xml, xpath); -+ -+ free(xml_name); -+ free(xpath); -+ -+ if (numXpathResults(xpathObj) > 0) { -+ xmlNode *pref = getXpathResult(xpathObj, 0); -+ pe_warn("Setting %s.failure_timeout=%d in %s conflicts with on-fail=block: ignoring timeout", rsc->id, rsc->failure_timeout, ID(pref)); -+ rsc->failure_timeout = 0; -+#if 0 -+ /* A good idea? */ -+ } else if (rsc->container == NULL && is_not_set(data_set->flags, pe_flag_stonith_enabled)) { -+ /* In this case, stop.on-fail defaults to block in unpack_operation() */ -+ rsc->failure_timeout = 0; -+#endif -+ } -+ freeXpathObject(xpathObj); -+ } -+ -+ if (effective && search.count != 0 && search.last != 0 && rsc->failure_timeout) { - if (search.last > 0) { -- time_t now = get_timet_now(data_set); -+ time_t now = get_effective_time(data_set); - - if (now > (search.last + rsc->failure_timeout)) { - crm_debug("Failcount for %s on %s has expired (limit was %ds)", -@@ -1453,7 +1502,7 @@ get_failcount(node_t * node, resource_t * rsc, int *last_failure, pe_working_set - - /* If it's a resource container, get its failcount plus all the failcounts of the resources within it */ - int --get_failcount_all(node_t * node, resource_t * rsc, int *last_failure, pe_working_set_t * data_set) -+get_failcount_all(node_t * node, resource_t * rsc, time_t *last_failure, pe_working_set_t * data_set) - { - int failcount_all = 0; - -@@ -1464,7 +1513,7 @@ get_failcount_all(node_t * node, resource_t * rsc, int *last_failure, pe_working - - for (gIter = rsc->fillers; gIter != NULL; gIter = gIter->next) { - resource_t *filler = (resource_t *) gIter->data; -- int filler_last_failure = 0; -+ time_t filler_last_failure = 0; - - failcount_all += get_failcount(node, filler, &filler_last_failure, data_set); - -@@ -1719,3 +1768,11 @@ rsc_action_digest_cmp(resource_t * rsc, xmlNode * xml_op, node_t * node, - - return data; - } -+ -+const char *rsc_printable_id(resource_t *rsc) -+{ -+ if (is_not_set(rsc->flags, pe_rsc_unique)) { -+ return ID(rsc->xml); -+ } -+ return rsc->id; -+} -diff --git a/lib/services/Makefile.am b/lib/services/Makefile.am -index 3ee3347..67d7237 100644 ---- a/lib/services/Makefile.am -+++ b/lib/services/Makefile.am -@@ -25,8 +25,8 @@ noinst_HEADERS = upstart.h systemd.h services_private.h - - libcrmservice_la_SOURCES = services.c services_linux.c - libcrmservice_la_LDFLAGS = -version-info 1:0:0 --libcrmservice_la_CFLAGS = $(GIO_CFLAGS) --libcrmservice_la_LIBADD = $(GIO_LIBS) -+libcrmservice_la_CFLAGS = $(GIO_CFLAGS) -DOCF_ROOT_DIR=\"@OCF_ROOT_DIR@\" -+libcrmservice_la_LIBADD = $(GIO_LIBS) $(top_builddir)/lib/common/libcrmcommon.la - - if BUILD_UPSTART - libcrmservice_la_SOURCES += upstart.c -diff --git a/lib/services/services.c b/lib/services/services.c -index 200fc3f..adfc508 100644 ---- a/lib/services/services.c -+++ b/lib/services/services.c -@@ -61,7 +61,7 @@ resources_action_create(const char *name, const char *standard, const char *prov - const char *agent, const char *action, int interval, int timeout, - GHashTable * params) - { -- svc_action_t *op; -+ svc_action_t *op = NULL; - - /* - * Do some up front sanity checks before we go off and -@@ -70,27 +70,27 @@ resources_action_create(const char *name, const char *standard, const char *prov - - if (crm_strlen_zero(name)) { - crm_err("A service or resource action must have a name."); -- return NULL; -+ goto return_error; - } - - if (crm_strlen_zero(standard)) { - crm_err("A service action must have a valid standard."); -- return NULL; -+ goto return_error; - } - - if (!strcasecmp(standard, "ocf") && crm_strlen_zero(provider)) { - crm_err("An OCF resource action must have a provider."); -- return NULL; -+ goto return_error; - } - - if (crm_strlen_zero(agent)) { - crm_err("A service or resource action must have an agent."); -- return NULL; -+ goto return_error; - } - - if (crm_strlen_zero(action)) { - crm_err("A service or resource action must specify an action."); -- return NULL; -+ goto return_error; - } - - if (safe_str_eq(action, "monitor") -@@ -163,8 +163,10 @@ resources_action_create(const char *name, const char *standard, const char *prov - if (strcasecmp(op->standard, "ocf") == 0) { - op->provider = strdup(provider); - op->params = params; -+ params = NULL; - - if (asprintf(&op->opaque->exec, "%s/resource.d/%s/%s", OCF_ROOT_DIR, provider, agent) == -1) { -+ crm_err("Internal error: cannot create agent path"); - goto return_error; - } - op->opaque->args[0] = strdup(op->opaque->exec); -@@ -176,6 +178,7 @@ resources_action_create(const char *name, const char *standard, const char *prov - * of tacking on the LSB_ROOT_DIR path to the front */ - op->opaque->exec = strdup(op->agent); - } else if (asprintf(&op->opaque->exec, "%s/%s", LSB_ROOT_DIR, op->agent) == -1) { -+ crm_err("Internal error: cannot create agent path"); - goto return_error; - } - op->opaque->args[0] = strdup(op->opaque->exec); -@@ -206,6 +209,7 @@ resources_action_create(const char *name, const char *standard, const char *prov - op->opaque->exec = strdup(op->agent); - - } else if (asprintf(&op->opaque->exec, "%s/%s", NAGIOS_PLUGIN_DIR, op->agent) == -1) { -+ crm_err("Internal error: cannot create agent path"); - goto return_error; - } - -@@ -253,9 +257,15 @@ resources_action_create(const char *name, const char *standard, const char *prov - op = NULL; - } - -+ if(params) { -+ g_hash_table_destroy(params); -+ } - return op; - - return_error: -+ if(params) { -+ g_hash_table_destroy(params); -+ } - services_action_free(op); - - return NULL; -@@ -311,6 +321,7 @@ services_action_free(svc_action_t * op) - free(op->opaque->args[i]); - } - -+ free(op->opaque); - free(op->rsc); - free(op->action); - -diff --git a/lib/services/services_linux.c b/lib/services/services_linux.c -index 3402397..868dc5b 100644 ---- a/lib/services/services_linux.c -+++ b/lib/services/services_linux.c -@@ -30,6 +30,7 @@ - #include - #include - #include -+#include - - #include "crm/crm.h" - #include "crm/common/mainloop.h" -@@ -37,6 +38,10 @@ - - #include "services_private.h" - -+#if SUPPORT_CIBSECRETS -+# include "crm/common/cib_secrets.h" -+#endif -+ - static inline void - set_fd_opts(int fd, int opts) - { -@@ -250,10 +255,9 @@ operation_finalize(svc_action_t * op) - } - - static void --operation_finished(mainloop_child_t * p, int status, int signo, int exitcode) -+operation_finished(mainloop_child_t * p, pid_t pid, int core, int signo, int exitcode) - { -- svc_action_t *op = mainloop_get_child_userdata(p); -- pid_t pid = mainloop_get_child_pid(p); -+ svc_action_t *op = mainloop_child_userdata(p); - char *prefix = g_strdup_printf("%s:%d", op->id, op->pid); - - mainloop_clear_child_userdata(p); -@@ -275,7 +279,7 @@ operation_finished(mainloop_child_t * p, int status, int signo, int exitcode) - } - - if (signo) { -- if (mainloop_get_child_timeout(p)) { -+ if (mainloop_child_timeout(p)) { - crm_warn("%s - timed out after %dms", prefix, op->timeout); - op->status = PCMK_LRM_OP_TIMEOUT; - op->rc = PCMK_OCF_TIMEOUT; -@@ -291,7 +295,12 @@ operation_finished(mainloop_child_t * p, int status, int signo, int exitcode) - crm_debug("%s - exited with rc=%d", prefix, exitcode); - } - -+ g_free(prefix); -+ prefix = g_strdup_printf("%s:%d:stderr", op->id, op->pid); - crm_log_output(LOG_NOTICE, prefix, op->stderr_data); -+ -+ g_free(prefix); -+ prefix = g_strdup_printf("%s:%d:stdout", op->id, op->pid); - crm_log_output(LOG_DEBUG, prefix, op->stdout_data); - - g_free(prefix); -@@ -304,6 +313,8 @@ services_os_action_execute(svc_action_t * op, gboolean synchronous) - int rc, lpc; - int stdout_fd[2]; - int stderr_fd[2]; -+ sigset_t mask; -+ sigset_t old_mask; - - if (pipe(stdout_fd) < 0) { - crm_err("pipe() failed"); -@@ -313,6 +324,16 @@ services_os_action_execute(svc_action_t * op, gboolean synchronous) - crm_err("pipe() failed"); - } - -+ if (synchronous) { -+ sigemptyset(&mask); -+ sigaddset(&mask, SIGCHLD); -+ sigemptyset(&old_mask); -+ -+ if (sigprocmask(SIG_BLOCK, &mask, &old_mask) < 0) { -+ crm_perror(LOG_ERR, "sigprocmask() failed"); -+ } -+ } -+ - op->pid = fork(); - switch (op->pid) { - case -1: -@@ -349,6 +370,20 @@ services_os_action_execute(svc_action_t * op, gboolean synchronous) - close(lpc); - } - -+#if SUPPORT_CIBSECRETS -+ if (replace_secret_params(op->rsc, op->params) < 0) { -+ /* replacing secrets failed! */ -+ if (safe_str_eq(op->action,"stop")) { -+ /* don't fail on stop! */ -+ crm_info("proceeding with the stop operation for %s", op->rsc); -+ -+ } else { -+ crm_err("failed to get secrets for %s, " -+ "considering resource not configured", op->rsc); -+ _exit(PCMK_OCF_NOT_CONFIGURED); -+ } -+ } -+#endif - /* Setup environment correctly */ - add_OCF_env_vars(op); - -@@ -392,27 +427,101 @@ services_os_action_execute(svc_action_t * op, gboolean synchronous) - - if (synchronous) { - int status = 0; -- int timeout = (1 + op->timeout) / 1000; -+ int timeout = op->timeout; -+ int sfd = -1; -+ time_t start = -1; -+ struct pollfd fds[3]; -+ int wait_rc = 0; -+ -+ sfd = signalfd(-1, &mask, 0); -+ if (sfd < 0) { -+ crm_perror(LOG_ERR, "signalfd() failed"); -+ } -+ -+ fds[0].fd = op->opaque->stdout_fd; -+ fds[0].events = POLLIN; -+ fds[0].revents = 0; -+ -+ fds[1].fd = op->opaque->stderr_fd; -+ fds[1].events = POLLIN; -+ fds[1].revents = 0; -+ -+ fds[2].fd = sfd; -+ fds[2].events = POLLIN; -+ fds[2].revents = 0; - - crm_trace("Waiting for %d", op->pid); -- while ((op->timeout < 0 || timeout > 0) && waitpid(op->pid, &status, WNOHANG) <= 0) { -- sleep(1); -- read_output(op->opaque->stdout_fd, op); -- read_output(op->opaque->stderr_fd, op); -- timeout--; -- } -+ start = time(NULL); -+ do { -+ int poll_rc = poll(fds, 3, timeout); -+ -+ if (poll_rc > 0) { -+ if (fds[0].revents & POLLIN) { -+ read_output(op->opaque->stdout_fd, op); -+ } -+ -+ if (fds[1].revents & POLLIN) { -+ read_output(op->opaque->stderr_fd, op); -+ } -+ -+ if (fds[2].revents & POLLIN) { -+ struct signalfd_siginfo fdsi; -+ ssize_t s; -+ -+ s = read(sfd, &fdsi, sizeof(struct signalfd_siginfo)); -+ if (s != sizeof(struct signalfd_siginfo)) { -+ crm_perror(LOG_ERR, "Read from signal fd %d failed", sfd); -+ -+ } else if (fdsi.ssi_signo == SIGCHLD) { -+ wait_rc = waitpid(op->pid, &status, WNOHANG); -+ -+ if (wait_rc < 0){ -+ crm_perror(LOG_ERR, "waitpid() for %d failed", op->pid); -+ -+ } else if (wait_rc > 0) { -+ break; -+ } -+ } -+ } -+ -+ } else if (poll_rc == 0) { -+ timeout = 0; -+ break; -+ -+ } else if (poll_rc < 0) { -+ if (errno != EINTR) { -+ crm_perror(LOG_ERR, "poll() failed"); -+ break; -+ } -+ } -+ -+ timeout = op->timeout - (time(NULL) - start) * 1000; -+ -+ } while ((op->timeout < 0 || timeout > 0)); - - crm_trace("Child done: %d", op->pid); -- if (timeout == 0) { -- int killrc = kill(op->pid, 9 /*SIGKILL*/); -+ if (wait_rc <= 0) { -+ int killrc = kill(op->pid, SIGKILL); - - op->rc = PCMK_OCF_UNKNOWN_ERROR; -- op->status = PCMK_LRM_OP_TIMEOUT; -- crm_warn("%s:%d - timed out after %dms", op->id, op->pid, op->timeout); -+ if (op->timeout > 0 && timeout <= 0) { -+ op->status = PCMK_LRM_OP_TIMEOUT; -+ crm_warn("%s:%d - timed out after %dms", op->id, op->pid, op->timeout); -+ -+ } else { -+ op->status = PCMK_LRM_OP_ERROR; -+ } - - if (killrc && errno != ESRCH) { - crm_err("kill(%d, KILL) failed: %d", op->pid, errno); - } -+ /* -+ * From sigprocmask(2): -+ * It is not possible to block SIGKILL or SIGSTOP. Attempts to do so are silently ignored. -+ * -+ * This makes it safe to skip WNOHANG here -+ */ -+ waitpid(op->pid, &status, 0); - - } else if (WIFEXITED(status)) { - op->status = PCMK_LRM_OP_DONE; -@@ -434,9 +543,19 @@ services_os_action_execute(svc_action_t * op, gboolean synchronous) - read_output(op->opaque->stdout_fd, op); - read_output(op->opaque->stderr_fd, op); - -+ close(op->opaque->stdout_fd); -+ close(op->opaque->stderr_fd); -+ close(sfd); -+ -+ if (sigismember(&old_mask, SIGCHLD) == 0) { -+ if (sigprocmask(SIG_UNBLOCK, &mask, NULL) < 0) { -+ crm_perror(LOG_ERR, "sigprocmask() to unblocked failed"); -+ } -+ } -+ - } else { - crm_trace("Async waiting for %d - %s", op->pid, op->opaque->exec); -- mainloop_add_child(op->pid, op->timeout, op->id, op, operation_finished); -+ mainloop_child_add(op->pid, op->timeout, op->id, op, operation_finished); - - op->opaque->stdout_gsource = mainloop_add_fd(op->id, - G_PRIORITY_LOW, -diff --git a/lib/services/systemd.c b/lib/services/systemd.c -index 6bc67c5..886cb35 100644 ---- a/lib/services/systemd.c -+++ b/lib/services/systemd.c -@@ -201,7 +201,7 @@ systemd_unit_property(const char *obj, const gchar * iface, const char *name) - GVariant *_ret = NULL; - char *output = NULL; - -- crm_info("Calling GetAll on %s", obj); -+ crm_trace("Calling GetAll on %s", obj); - proxy = get_proxy(obj, BUS_PROPERTY_IFACE); - - if (!proxy) { -@@ -218,7 +218,7 @@ systemd_unit_property(const char *obj, const gchar * iface, const char *name) - g_object_unref(proxy); - return NULL; - } -- crm_info("Call to GetAll passed: type '%s' %d\n", g_variant_get_type_string(_ret), -+ crm_debug("Call to GetAll passed: type '%s' %d\n", g_variant_get_type_string(_ret), - g_variant_n_children(_ret)); - - asv = g_variant_get_child_value(_ret, 0); -@@ -226,7 +226,7 @@ systemd_unit_property(const char *obj, const gchar * iface, const char *name) - - value = g_variant_lookup_value(asv, name, NULL); - if (value && g_variant_is_of_type(value, G_VARIANT_TYPE_STRING)) { -- crm_debug("Got value '%s' for %s[%s]", g_variant_get_string(value, NULL), obj, name); -+ crm_info("Got value '%s' for %s[%s]", g_variant_get_string(value, NULL), obj, name); - output = g_variant_dup_string(value, NULL); - - } else { -@@ -443,7 +443,7 @@ systemd_unit_exec(svc_action_t * op, gboolean synchronous) - if (safe_str_eq(op->action, "monitor") || safe_str_eq(action, "status")) { - char *state = systemd_unit_property(unit, BUS_NAME ".Unit", "ActiveState"); - -- if (!g_strcmp0(state, "active")) { -+ if (g_strcmp0(state, "active") == 0) { - op->rc = PCMK_EXECRA_OK; - } else { - op->rc = PCMK_EXECRA_NOT_RUNNING; -@@ -452,11 +452,11 @@ systemd_unit_exec(svc_action_t * op, gboolean synchronous) - free(state); - goto cleanup; - -- } else if (!g_strcmp0(action, "start")) { -+ } else if (g_strcmp0(action, "start") == 0) { - action = "StartUnit"; -- } else if (!g_strcmp0(action, "stop")) { -+ } else if (g_strcmp0(action, "stop") == 0) { - action = "StopUnit"; -- } else if (!g_strcmp0(action, "restart")) { -+ } else if (g_strcmp0(action, "restart") == 0) { - action = "RestartUnit"; - } else { - op->rc = PCMK_EXECRA_UNIMPLEMENT_FEATURE; -diff --git a/lib/transition/Makefile.am b/lib/transition/Makefile.am -index 49c7113..da87e61 100644 ---- a/lib/transition/Makefile.am -+++ b/lib/transition/Makefile.am -@@ -27,8 +27,9 @@ lib_LTLIBRARIES = libtransitioner.la - noinst_HEADERS = - libtransitioner_la_SOURCES = unpack.c graph.c utils.c - --libtransitioner_la_LDFLAGS = -version-info 2:0:0 -+libtransitioner_la_LDFLAGS = -version-info 2:1:0 - libtransitioner_la_CFLAGS = -I$(top_builddir) -+libtransitioner_la_LIBADD = $(top_builddir)/lib/common/libcrmcommon.la - - clean-generic: - rm -f *~ -diff --git a/lib/transition/unpack.c b/lib/transition/unpack.c -index 3187d21..90b7a96 100644 ---- a/lib/transition/unpack.c -+++ b/lib/transition/unpack.c -@@ -1,16 +1,16 @@ --/* -+/* - * Copyright (C) 2004 Andrew Beekhof -- * -+ * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. -- * -+ * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. -- * -+ * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -@@ -166,10 +166,10 @@ unpack_graph(xmlNode * xml_graph, const char *reference) - - - - rc = rc; - op->op_status = status; -+ op->t_run = time(NULL); -+ op->t_rcchange = op->t_run; - - op->params = g_hash_table_new_full(crm_str_hash, g_str_equal, - g_hash_destroy_str, g_hash_destroy_str); -diff --git a/lrmd/Makefile.am b/lrmd/Makefile.am -index 86f2a35..73f1d7e 100644 ---- a/lrmd/Makefile.am -+++ b/lrmd/Makefile.am -@@ -4,12 +4,12 @@ - # modify it under the terms of the GNU Lesser General Public - # License as published by the Free Software Foundation; either - # version 2.1 of the License, or (at your option) any later version. --# -+# - # This library is distributed in the hope that it will be useful, - # but WITHOUT ANY WARRANTY; without even the implied warranty of - # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - # Lesser General Public License for more details. --# -+# - # You should have received a copy of the GNU Lesser General Public - # License along with this library; if not, write to the Free Software - # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -@@ -21,16 +21,38 @@ testdir = $(datadir)/$(PACKAGE)/tests/lrmd - test_SCRIPTS = regression.py - - lrmdlibdir = $(CRM_DAEMON_DIR) -- --## binary progs - lrmdlib_PROGRAMS = lrmd lrmd_test - --lrmd_SOURCES = main.c lrmd.c tls_backend.c -+initdir = $(INITDIR) -+init_SCRIPTS = pacemaker_remote -+sbin_PROGRAMS = pacemaker_remoted -+ -+if HAVE_SYSTEMD -+systemdunit_DATA = pacemaker_remote.service -+endif -+ -+if BUILD_HELP -+man8_MANS = $(sbin_PROGRAMS:%=%.8) -+endif -+ -+%.8: % -+ echo Creating $@ -+ chmod a+x $(top_builddir)/lrmd/$< -+ $(top_builddir)/lrmd/$< --help -+ $(HELP2MAN) --output $@ --no-info --section 8 --name "Part of the Pacemaker cluster resource manager" $(top_builddir)/lrmd/$< -+ -+lrmd_SOURCES = main.c lrmd.c - lrmd_LDADD = $(top_builddir)/lib/common/libcrmcommon.la \ - $(top_builddir)/lib/services/libcrmservice.la \ - $(top_builddir)/lib/lrmd/liblrmd.la \ - $(top_builddir)/lib/fencing/libstonithd.la - -+ -+pacemaker_remoted_SOURCES = main.c lrmd.c tls_backend.c ipc_proxy.c -+pacemaker_remoted_CFLAGS = -DSUPPORT_REMOTE -+pacemaker_remoted_LDADD = $(lrmd_LDADD) -+ -+ - lrmd_test_SOURCES = test.c - lrmd_test_LDADD = $(top_builddir)/lib/common/libcrmcommon.la \ - $(top_builddir)/lib/lrmd/liblrmd.la \ -diff --git a/lrmd/ipc_proxy.c b/lrmd/ipc_proxy.c -new file mode 100644 -index 0000000..bbf9b24 ---- /dev/null -+++ b/lrmd/ipc_proxy.c -@@ -0,0 +1,374 @@ -+/* -+ * Copyright (c) 2012 David Vossel -+ * -+ * This library is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU Lesser General Public -+ * License as published by the Free Software Foundation; either -+ * version 2.1 of the License, or (at your option) any later version. -+ * -+ * This library is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * Lesser General Public License for more details. -+ * -+ * You should have received a copy of the GNU Lesser General Public -+ * License along with this library; if not, write to the Free Software -+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -+ * -+ */ -+ -+ -+#include -+#include -+ -+#include -+#include -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+static qb_ipcs_service_t *cib_ro = NULL; -+static qb_ipcs_service_t *cib_rw = NULL; -+static qb_ipcs_service_t *cib_shm = NULL; -+ -+static qb_ipcs_service_t *attrd_ipcs = NULL; -+static qb_ipcs_service_t *crmd_ipcs = NULL; -+static qb_ipcs_service_t *stonith_ipcs = NULL; -+ -+/* ipc providers == crmd clients connecting from cluster nodes */ -+GHashTable *ipc_providers; -+/* ipc clients == things like cibadmin, crm_resource, connecting locally */ -+GHashTable *ipc_clients; -+ -+static int32_t -+ipc_proxy_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid, const char *ipc_channel) -+{ -+ void *key = NULL; -+ void *value = NULL; -+ crm_client_t *client; -+ crm_client_t *ipc_proxy = NULL; -+ GHashTableIter iter; -+ xmlNode *msg; -+ -+ crm_trace("Connection %p on channel %s", c, ipc_channel); -+ -+ if (g_hash_table_size(ipc_providers) == 0) { -+ crm_err("No ipc providers available for uid %d gid %d", uid, gid); -+ return -EREMOTEIO; -+ } -+ -+ g_hash_table_iter_init(&iter, ipc_providers); -+ if (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) { -+ /* grab the first provider available, any provider in this -+ * table will work. Usually there will only be one. These are -+ * lrmd client connections originating for a cluster node's crmd. */ -+ ipc_proxy = value; -+ } else { -+ crm_err("No ipc providers available for uid %d gid %d", uid, gid); -+ return -EREMOTEIO; -+ } -+ -+ /* this new client is a local ipc client on a remote -+ * guest wanting to access the ipc on any available cluster nodes */ -+ client = crm_client_new(c, uid, gid); -+ if (client == NULL) { -+ return -EREMOTEIO; -+ } -+ -+ /* This ipc client is bound to a single ipc provider. If the -+ * provider goes away, this client is disconnected */ -+ client->userdata = strdup(ipc_proxy->id); -+ -+ g_hash_table_insert(ipc_clients, client->id, client); -+ -+ msg = create_xml_node(NULL, T_LRMD_IPC_PROXY); -+ crm_xml_add(msg, F_LRMD_IPC_OP, "new"); -+ crm_xml_add(msg, F_LRMD_IPC_IPC_SERVER, ipc_channel); -+ crm_xml_add(msg, F_LRMD_IPC_SESSION, client->id); -+ lrmd_server_send_notify(ipc_proxy, msg); -+ free_xml(msg); -+ crm_debug("created new ipc proxy with session id %s", client->id); -+ return 0; -+} -+ -+static int32_t -+crmd_proxy_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid) -+{ -+ return ipc_proxy_accept(c, uid, gid, CRM_SYSTEM_CRMD); -+} -+ -+static int32_t -+attrd_proxy_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid) -+{ -+ return ipc_proxy_accept(c, uid, gid, T_ATTRD); -+} -+ -+static int32_t -+stonith_proxy_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid) -+{ -+ return ipc_proxy_accept(c, uid, gid, "stonith-ng"); -+} -+ -+static int32_t -+cib_proxy_accept_rw(qb_ipcs_connection_t * c, uid_t uid, gid_t gid) -+{ -+ return ipc_proxy_accept(c, uid, gid, cib_channel_rw); -+} -+ -+static int32_t -+cib_proxy_accept_ro(qb_ipcs_connection_t * c, uid_t uid, gid_t gid) -+{ -+ return ipc_proxy_accept(c, uid, gid, cib_channel_ro); -+} -+ -+static void -+ipc_proxy_created(qb_ipcs_connection_t * c) -+{ -+ crm_trace("Connection %p", c); -+} -+ -+void -+ipc_proxy_forward_client(crm_client_t *ipc_proxy, xmlNode *xml) -+{ -+ const char *session = crm_element_value(xml, F_LRMD_IPC_SESSION); -+ const char *msg_type = crm_element_value(xml, F_LRMD_IPC_OP); -+ xmlNode *msg = get_message_xml(xml, F_LRMD_IPC_MSG); -+ crm_client_t *ipc_client = crm_client_get_by_id(session); -+ int rc = 0; -+ -+ if (ipc_client == NULL) { -+ xmlNode *msg = create_xml_node(NULL, T_LRMD_IPC_PROXY); -+ crm_xml_add(msg, F_LRMD_IPC_OP, "destroy"); -+ crm_xml_add(msg, F_LRMD_IPC_SESSION, session); -+ lrmd_server_send_notify(ipc_proxy, msg); -+ free_xml(msg); -+ return; -+ } -+ -+ /* This is an event or response from the ipc provider -+ * going to the local ipc client. -+ * -+ * Looking at the chain of events. -+ * -+ * -----remote node----------------|---- cluster node ------ -+ * ipc_client <--1--> this code <--2--> crmd <----3----> ipc server -+ * -+ * This function is receiving a msg from connection 2 -+ * and forwarding it to connection 1. -+ */ -+ if (safe_str_eq(msg_type, "event")) { -+ rc = crm_ipcs_send(ipc_client, 0, msg, TRUE); -+ } else if (safe_str_eq(msg_type, "response")) { -+ int msg_id = 0; -+ crm_element_value_int(xml, F_LRMD_IPC_MSG_ID, &msg_id); -+ rc = crm_ipcs_send(ipc_client, msg_id, msg, FALSE); -+ } else if (safe_str_eq(msg_type, "destroy")) { -+ qb_ipcs_disconnect(ipc_client->ipcs); -+ } else { -+ crm_err("Unknown ipc proxy msg type %s" , msg_type); -+ } -+ -+ if (rc < 0) { -+ crm_warn("IPC Proxy send to ipc client %s failed, rc = %d", ipc_client->id, rc); -+ } -+} -+ -+static int32_t -+ipc_proxy_dispatch(qb_ipcs_connection_t * c, void *data, size_t size) -+{ -+ uint32_t id = 0; -+ uint32_t flags = 0; -+ crm_client_t *client = crm_client_get(c); -+ crm_client_t *ipc_proxy = crm_client_get_by_id(client->userdata); -+ xmlNode *request = NULL; -+ xmlNode *msg = NULL; -+ -+ if (!ipc_proxy) { -+ qb_ipcs_disconnect(client->ipcs); -+ return 0; -+ } -+ -+ /* This is a request from the local ipc client going -+ * to the ipc provider. -+ * -+ * Looking at the chain of events. -+ * -+ * -----remote node----------------|---- cluster node ------ -+ * ipc_client <--1--> this code <--2--> crmd <----3----> ipc server -+ * -+ * This function is receiving a request from connection -+ * 1 and forwarding it to connection 2. -+ */ -+ request = crm_ipcs_recv(client, data, size, &id, &flags); -+ -+ if (!request) { -+ return 0; -+ } -+ -+ CRM_CHECK(client != NULL, crm_err("Invalid client"); -+ return FALSE); -+ CRM_CHECK(client->id != NULL, crm_err("Invalid client: %p", client); -+ return FALSE); -+ -+ msg = create_xml_node(NULL, T_LRMD_IPC_PROXY); -+ crm_xml_add(msg, F_LRMD_IPC_OP, "request"); -+ crm_xml_add(msg, F_LRMD_IPC_SESSION, client->id); -+ crm_xml_add(msg, F_LRMD_IPC_USER, client->user); -+ crm_xml_add_int(msg, F_LRMD_IPC_MSG_ID, id); -+ crm_xml_add_int(msg, F_LRMD_IPC_MSG_FLAGS, flags); -+ add_message_xml(msg, F_LRMD_IPC_MSG, request); -+ lrmd_server_send_notify(ipc_proxy, msg); -+ free_xml(msg); -+ -+ return 0; -+} -+ -+static int32_t -+ipc_proxy_closed(qb_ipcs_connection_t * c) -+{ -+ crm_client_t *client = crm_client_get(c); -+ crm_client_t *ipc_proxy = crm_client_get_by_id(client->userdata); -+ -+ crm_trace("Connection %p", c); -+ -+ if (ipc_proxy) { -+ xmlNode *msg = create_xml_node(NULL, T_LRMD_IPC_PROXY); -+ crm_xml_add(msg, F_LRMD_IPC_OP, "destroy"); -+ crm_xml_add(msg, F_LRMD_IPC_SESSION, client->id); -+ lrmd_server_send_notify(ipc_proxy, msg); -+ free_xml(msg); -+ } -+ -+ g_hash_table_remove(ipc_clients, client->id); -+ -+ free(client->userdata); -+ client->userdata = NULL; -+ crm_client_destroy(client); -+ return 0; -+} -+ -+static void -+ipc_proxy_destroy(qb_ipcs_connection_t * c) -+{ -+ crm_trace("Connection %p", c); -+} -+ -+static struct qb_ipcs_service_handlers crmd_proxy_callbacks = { -+ .connection_accept = crmd_proxy_accept, -+ .connection_created = ipc_proxy_created, -+ .msg_process = ipc_proxy_dispatch, -+ .connection_closed = ipc_proxy_closed, -+ .connection_destroyed = ipc_proxy_destroy -+}; -+ -+static struct qb_ipcs_service_handlers attrd_proxy_callbacks = { -+ .connection_accept = attrd_proxy_accept, -+ .connection_created = ipc_proxy_created, -+ .msg_process = ipc_proxy_dispatch, -+ .connection_closed = ipc_proxy_closed, -+ .connection_destroyed = ipc_proxy_destroy -+}; -+ -+static struct qb_ipcs_service_handlers stonith_proxy_callbacks = { -+ .connection_accept = stonith_proxy_accept, -+ .connection_created = ipc_proxy_created, -+ .msg_process = ipc_proxy_dispatch, -+ .connection_closed = ipc_proxy_closed, -+ .connection_destroyed = ipc_proxy_destroy -+}; -+ -+static struct qb_ipcs_service_handlers cib_proxy_callbacks_ro = { -+ .connection_accept = cib_proxy_accept_ro, -+ .connection_created = ipc_proxy_created, -+ .msg_process = ipc_proxy_dispatch, -+ .connection_closed = ipc_proxy_closed, -+ .connection_destroyed = ipc_proxy_destroy -+}; -+ -+static struct qb_ipcs_service_handlers cib_proxy_callbacks_rw = { -+ .connection_accept = cib_proxy_accept_rw, -+ .connection_created = ipc_proxy_created, -+ .msg_process = ipc_proxy_dispatch, -+ .connection_closed = ipc_proxy_closed, -+ .connection_destroyed = ipc_proxy_destroy -+}; -+ -+void -+ipc_proxy_add_provider(crm_client_t *ipc_proxy) -+{ -+ if (ipc_providers == NULL) { -+ return; -+ } -+ g_hash_table_insert(ipc_providers, ipc_proxy->id, ipc_proxy); -+} -+ -+void -+ipc_proxy_remove_provider(crm_client_t *ipc_proxy) -+{ -+ GHashTableIter iter; -+ crm_client_t *ipc_client = NULL; -+ char *key = NULL; -+ -+ if (ipc_providers == NULL) { -+ return; -+ } -+ -+ g_hash_table_remove(ipc_providers, ipc_proxy->id); -+ -+ g_hash_table_iter_init(&iter, ipc_clients); -+ while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & ipc_client)) { -+ const char *proxy_id = ipc_client->userdata; -+ if (safe_str_eq(proxy_id, ipc_proxy->id)) { -+ crm_info("ipc proxy connection for client %s pid %d destroyed because cluster node disconnected.", -+ ipc_client->id, ipc_client->pid); -+ qb_ipcs_disconnect(ipc_client->ipcs); -+ } -+ } -+} -+ -+void -+ipc_proxy_init(void) -+{ -+ ipc_clients = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, NULL); -+ ipc_providers = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, NULL); -+ -+ cib_ipc_servers_init(&cib_ro, -+ &cib_rw, -+ &cib_shm, -+ &cib_proxy_callbacks_ro, -+ &cib_proxy_callbacks_rw); -+ -+ attrd_ipc_server_init(&attrd_ipcs, &attrd_proxy_callbacks); -+ stonith_ipc_server_init(&stonith_ipcs, &stonith_proxy_callbacks); -+ crmd_ipcs = crmd_ipc_server_init(&crmd_proxy_callbacks); -+ if (crmd_ipcs == NULL) { -+ crm_err("Failed to create crmd server: exiting and inhibiting respawn."); -+ crm_warn("Verify pacemaker and pacemaker_remote are not both enabled."); -+ crm_exit(DAEMON_RESPAWN_STOP); -+ } -+} -+ -+void -+ipc_proxy_cleanup(void) -+{ -+ if (ipc_providers) { -+ g_hash_table_destroy(ipc_providers); -+ } -+ if (ipc_clients) { -+ g_hash_table_destroy(ipc_clients); -+ } -+ cib_ipc_servers_destroy(cib_ro, cib_rw, cib_shm); -+ qb_ipcs_destroy(attrd_ipcs); -+ qb_ipcs_destroy(stonith_ipcs); -+ qb_ipcs_destroy(crmd_ipcs); -+ cib_ro = NULL; -+ cib_rw = NULL; -+ cib_shm = NULL; -+ ipc_providers = NULL; -+ ipc_clients = NULL; -+} -diff --git a/lrmd/lrmd.c b/lrmd/lrmd.c -index 46c0eac..a4747cb 100644 ---- a/lrmd/lrmd.c -+++ b/lrmd/lrmd.c -@@ -5,12 +5,12 @@ - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. -- * -+ * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. -- * -+ * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -@@ -760,8 +760,8 @@ lrmd_rsc_execute_stonith(lrmd_rsc_t * rsc, lrmd_cmd_t * cmd) - } - } - -- /* Stonith automatically registers devices from the CIB when changes occur, -- * but to avoid a possible race condition between stonith receiving the CIB update -+ /* Stonith automatically registers devices from the IPC when changes occur, -+ * but to avoid a possible race condition between stonith receiving the IPC update - * and the lrmd requesting that resource, the lrmd still registers the device as well. - * Stonith knows how to handle duplicate device registrations correctly. */ - rc = stonith_api->cmds->register_device(stonith_api, -@@ -870,10 +870,11 @@ lrmd_rsc_execute_service_lib(lrmd_rsc_t * rsc, lrmd_cmd_t * cmd) - - action->cb_data = cmd; - -- /* 'cmd' may not be valid after this point -+ /* 'cmd' may not be valid after this point if -+ * services_action_async() returned TRUE - * - * Upstart and systemd both synchronously determine monitor/status -- * results and call action_complete (which may free 'cmd') if necessary -+ * results and call action_complete (which may free 'cmd') if necessary. - */ - if (services_action_async(action, action_complete)) { - return TRUE; -@@ -996,11 +997,25 @@ static int - process_lrmd_signon(crm_client_t * client, uint32_t id, xmlNode * request) - { - xmlNode *reply = create_xml_node(NULL, "reply"); -+ const char *is_ipc_provider = crm_element_value(request, F_LRMD_IS_IPC_PROVIDER); -+ const char *protocol_version = crm_element_value(request, F_LRMD_PROTOCOL_VERSION); -+ -+ if (safe_str_neq(protocol_version, LRMD_PROTOCOL_VERSION)) { -+ crm_xml_add_int(reply, F_LRMD_RC, -EPROTO); -+ crm_xml_add(reply, F_LRMD_PROTOCOL_VERSION, LRMD_PROTOCOL_VERSION); -+ } - - crm_xml_add(reply, F_LRMD_OPERATION, CRM_OP_REGISTER); - crm_xml_add(reply, F_LRMD_CLIENTID, client->id); - lrmd_server_send_reply(client, id, reply); - -+ if (crm_is_true(is_ipc_provider)) { -+ /* this is a remote connection from a cluster nodes crmd */ -+#ifdef SUPPORT_REMOTE -+ ipc_proxy_add_provider(client); -+#endif -+ } -+ - free_xml(reply); - return pcmk_ok; - } -@@ -1250,12 +1265,16 @@ process_lrmd_message(crm_client_t * client, uint32_t id, xmlNode * request) - const char *op = crm_element_value(request, F_LRMD_OPERATION); - int do_reply = 0; - int do_notify = 0; -- int exit = 0; - - crm_trace("Processing %s operation from %s", op, client->id); - crm_element_value_int(request, F_LRMD_CALLID, &call_id); - -- if (crm_str_eq(op, CRM_OP_REGISTER, TRUE)) { -+ if (crm_str_eq(op, CRM_OP_IPC_FWD, TRUE)) { -+#ifdef SUPPORT_REMOTE -+ ipc_proxy_forward_client(client, request); -+#endif -+ do_reply = 1; -+ } else if (crm_str_eq(op, CRM_OP_REGISTER, TRUE)) { - rc = process_lrmd_signon(client, id, request); - } else if (crm_str_eq(op, LRMD_OP_RSC_REG, TRUE)) { - rc = process_lrmd_rsc_register(client, id, request); -@@ -1295,8 +1314,4 @@ process_lrmd_message(crm_client_t * client, uint32_t id, xmlNode * request) - if (do_notify) { - send_generic_notify(rc, request); - } -- -- if (exit) { -- lrmd_shutdown(0); -- } - } -diff --git a/lrmd/lrmd_private.h b/lrmd/lrmd_private.h -index 84805bf..f625293 100644 ---- a/lrmd/lrmd_private.h -+++ b/lrmd/lrmd_private.h -@@ -64,7 +64,7 @@ void lrmd_tls_server_destroy(void); - /* Hidden in lrmd client lib */ - extern int lrmd_tls_send_msg(crm_remote_t * session, xmlNode * msg, uint32_t id, - const char *msg_type); --extern int lrmd_tls_set_key(gnutls_datum_t * key, const char *location); -+extern int lrmd_tls_set_key(gnutls_datum_t * key); - # endif - - int lrmd_server_send_reply(crm_client_t * client, uint32_t id, xmlNode * reply); -@@ -92,4 +92,13 @@ stonith_t *get_stonith_connection(void); - */ - void stonith_connection_failed(void); - -+#ifdef SUPPORT_REMOTE -+void ipc_proxy_init(void); -+void ipc_proxy_cleanup(void); -+void ipc_proxy_add_provider(crm_client_t *client); -+void ipc_proxy_remove_provider(crm_client_t *client); -+void ipc_proxy_forward_client(crm_client_t *client, xmlNode *xml); - #endif -+ -+#endif -+ -diff --git a/lrmd/main.c b/lrmd/main.c -index 59ee22c..38c5466 100644 ---- a/lrmd/main.c -+++ b/lrmd/main.c -@@ -5,12 +5,12 @@ - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. -- * -+ * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. -- * -+ * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -@@ -31,11 +31,13 @@ - - #include - -+#if defined(HAVE_GNUTLS_GNUTLS_H) && defined(SUPPORT_REMOTE) -+# define ENABLE_PCMK_REMOTE -+#endif -+ - GMainLoop *mainloop = NULL; --qb_ipcs_service_t *ipcs = NULL; -+static qb_ipcs_service_t *ipcs = NULL; - stonith_t *stonith_api = NULL; --static gboolean enable_remote = FALSE; --static int remote_port = 0; - int lrmd_call_id = 0; - - static void -@@ -149,6 +151,9 @@ lrmd_ipc_closed(qb_ipcs_connection_t * c) - - crm_trace("Connection %p", c); - client_disconnect_cleanup(client->id); -+#ifdef ENABLE_PCMK_REMOTE -+ ipc_proxy_remove_provider(client); -+#endif - crm_client_destroy(client); - return 0; - } -@@ -175,7 +180,7 @@ lrmd_server_send_reply(crm_client_t * client, uint32_t id, xmlNode * reply) - switch (client->kind) { - case CRM_CLIENT_IPC: - return crm_ipcs_send(client, id, reply, FALSE); --#ifdef HAVE_GNUTLS_GNUTLS_H -+#ifdef ENABLE_PCMK_REMOTE - case CRM_CLIENT_TLS: - return lrmd_tls_send_msg(client->remote, reply, id, "reply"); - #endif -@@ -196,7 +201,7 @@ lrmd_server_send_notify(crm_client_t * client, xmlNode * msg) - return -1; - } - return crm_ipcs_send(client, 0, msg, TRUE); --#ifdef HAVE_GNUTLS_GNUTLS_H -+#ifdef ENABLE_PCMK_REMOTE - case CRM_CLIENT_TLS: - if (client->remote == NULL) { - crm_trace("Asked to send event to disconnected remote client"); -@@ -217,7 +222,7 @@ lrmd_shutdown(int nsig) - if (ipcs) { - mainloop_del_ipc_server(ipcs); - } -- crm_exit(0); -+ crm_exit(pcmk_ok); - } - - /* *INDENT-OFF* */ -@@ -226,10 +231,11 @@ static struct crm_option long_options[] = { - {"help", 0, 0, '?', "\tThis text"}, - {"version", 0, 0, '$', "\tVersion information" }, - {"verbose", 0, 0, 'V', "\tIncrease debug output"}, -- {"tls_enable", 0, 0, 't', "\tEnable TLS connection."}, -- {"tls_port", 1, 0, 'p', "\tTLS port to listen to, defaults to 1984"}, - - {"logfile", 1, 0, 'l', "\tSend logs to the additional named logfile"}, -+ -+ /* For compatibility with the original lrmd */ -+ {"dummy", 0, 0, 'r', NULL, 1}, - {0, 0, 0, 0} - }; - /* *INDENT-ON* */ -@@ -241,9 +247,15 @@ main(int argc, char **argv) - int flag = 0; - int index = 0; - -- crm_log_init("lrmd", LOG_INFO, TRUE, FALSE, argc, argv, FALSE); -+#ifdef ENABLE_PCMK_REMOTE -+ crm_log_init("pacemaker_remoted", LOG_INFO, TRUE, FALSE, argc, argv, FALSE); - crm_set_options(NULL, "[options]", long_options, - "Daemon for controlling services confirming to different standards"); -+#else -+ crm_log_init("lrmd", LOG_INFO, TRUE, FALSE, argc, argv, FALSE); -+ crm_set_options(NULL, "[options]", long_options, -+ "Pacemaker Remote daemon for extending pacemaker functionality to remote nodes."); -+#endif - - while (1) { - flag = crm_get_option(argc, argv, &index); -@@ -252,16 +264,13 @@ main(int argc, char **argv) - } - - switch (flag) { -+ case 'r': -+ break; - case 'l': - crm_add_logfile(optarg); - break; -- case 't': -- enable_remote = TRUE; -- break; -- case 'p': -- remote_port = atoi(optarg); - case 'V': -- set_crm_log_level(crm_log_level + 1); -+ crm_bump_log_level(argc, argv); - break; - case '?': - case '$': -@@ -273,28 +282,31 @@ main(int argc, char **argv) - } - } - -- if (enable_remote && !remote_port) { -- remote_port = DEFAULT_REMOTE_PORT; -- } -+ /* Used by RAs - Leave owned by root */ -+ crm_build_path(CRM_RSCTMP_DIR, 0755); -+ -+ /* Legacy: Used by RAs - Leave owned by root */ -+ crm_build_path(HA_STATE_DIR"/heartbeat/rsctmp", 0755); - - rsc_list = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, free_rsc); - ipcs = mainloop_add_ipc_server(CRM_SYSTEM_LRMD, QB_IPC_SHM, &lrmd_ipc_callbacks); - if (ipcs == NULL) { - crm_err("Failed to create IPC server: shutting down and inhibiting respawn"); -- crm_exit(100); -+ crm_exit(DAEMON_RESPAWN_STOP); - } - -- if (enable_remote) { --#ifdef HAVE_GNUTLS_GNUTLS_H -+#ifdef ENABLE_PCMK_REMOTE -+ { -+ const char *remote_port_str = getenv("PCMK_remote_port"); -+ int remote_port = remote_port_str ? atoi(remote_port_str) : DEFAULT_REMOTE_PORT; -+ - if (lrmd_init_remote_tls_server(remote_port) < 0) { -- crm_err("Failed to create TLS server: shutting down and inhibiting respawn"); -- crm_exit(100); -+ crm_err("Failed to create TLS server on port %d: shutting down and inhibiting respawn", remote_port); -+ crm_exit(DAEMON_RESPAWN_STOP); - } --#else -- crm_err("GNUTLS not enabled in this build, can not establish remote server"); -- crm_exit(100); --#endif -+ ipc_proxy_init(); - } -+#endif - - mainloop_add_signal(SIGTERM, lrmd_shutdown); - mainloop = g_main_new(FALSE); -@@ -302,12 +314,11 @@ main(int argc, char **argv) - g_main_run(mainloop); - - mainloop_del_ipc_server(ipcs); -- crm_client_cleanup(); -- if (enable_remote) { --#ifdef HAVE_GNUTLS_GNUTLS_H -- lrmd_tls_server_destroy(); -+#ifdef ENABLE_PCMK_REMOTE -+ lrmd_tls_server_destroy(); -+ ipc_proxy_cleanup(); - #endif -- } -+ crm_client_cleanup(); - - g_hash_table_destroy(rsc_list); - -diff --git a/lrmd/pacemaker_remote.in b/lrmd/pacemaker_remote.in -new file mode 100644 -index 0000000..ba89087 ---- /dev/null -+++ b/lrmd/pacemaker_remote.in -@@ -0,0 +1,155 @@ -+#!/bin/bash -+ -+# Authors: -+# Andrew Beekhof -+# -+# License: Revised BSD -+ -+# chkconfig: - 99 01 -+# description: Pacemaker Cluster Manager -+# processname: pacemaker_remoted -+# -+### BEGIN INIT INFO -+# Provides: pacemaker_remoted -+# Required-Start: $network -+# Should-Start: $syslog -+# Required-Stop: $network -+# Default-Start: -+# Default-Stop: -+# Short-Description: Starts and stops the Pacemaker remote agent for non-cluster nodes -+# Description: Starts and stops the Pacemaker remote agent for non-cluster nodes -+### END INIT INFO -+ -+desc="Pacemaker Remote Agent" -+prog="pacemaker_remoted" -+cman=0 -+ -+# set secure PATH -+PATH="/sbin:/bin:/usr/sbin:/usr/bin:@sbindir@" -+ -+checkrc() { -+ if [ $? = 0 ]; then -+ success -+ else -+ failure -+ fi -+} -+ -+success() -+{ -+ echo -ne "[ OK ]\r" -+} -+ -+failure() -+{ -+ echo -ne "[FAILED]\r" -+} -+ -+status() -+{ -+ pid=$(pidof $1 2>/dev/null) -+ rtrn=$? -+ if [ $rtrn -ne 0 ]; then -+ echo "$1 is stopped" -+ else -+ echo "$1 (pid $pid) is running..." -+ fi -+ return $rtrn -+} -+ -+# rpm based distros -+if [ -d @sysconfdir@/sysconfig ]; then -+ [ -f @INITDIR@/functions ] && . @INITDIR@/functions -+ [ -f @sysconfdir@/sysconfig/pacemaker ] && . @sysconfdir@/sysconfig/pacemaker -+ [ -z "$LOCK_FILE" ] && LOCK_FILE="@localstatedir@/lock/subsys/pacemaker_remote" -+fi -+ -+# deb based distros -+if [ -d @sysconfdir@/default ]; then -+ [ -f @sysconfdir@/default/pacemaker ] && . @sysconfdir@/default/pacemaker -+ [ -z "$LOCK_FILE" ] && LOCK_FILE="@localstatedir@/lock/pacemaker_remote" -+fi -+ -+start() -+{ -+ echo -n "Starting $desc: " -+ -+ # most recent distributions use tmpfs for $@localstatedir@/run -+ # to avoid to clean it up on every boot. -+ # they also assume that init scripts will create -+ # required subdirectories for proper operations -+ mkdir -p @localstatedir@/run -+ -+ if status $prog > /dev/null 2>&1; then -+ success -+ else -+ $prog > /dev/null 2>&1 & -+ -+ # Time to connect to corosync and fail -+ sleep 5 -+ -+ if status $prog > /dev/null 2>&1; then -+ touch $LOCK_FILE -+ pidof $prog > @localstatedir@/run/$prog.pid -+ success -+ else -+ failure -+ rtrn=1 -+ fi -+ fi -+ echo -+} -+ -+stop() -+{ -+ if status $prog > /dev/null 2>&1; then -+ echo -n "Signaling $desc to terminate: " -+ kill -TERM $(pidof $prog) > /dev/null 2>&1 -+ success -+ echo -+ -+ echo -n "Waiting for $desc to unload:" -+ while status $prog > /dev/null 2>&1; do -+ sleep 1 -+ echo -n "." -+ done -+ fi -+ -+ rm -f $LOCK_FILE -+ rm -f @localstatedir@/run/$prog.pid -+ success -+ echo -+} -+ -+rtrn=0 -+ -+case "$1" in -+start) -+ start -+;; -+restart|reload|force-reload) -+ stop -+ start -+;; -+condrestart|try-restart) -+ if status $prog > /dev/null 2>&1; then -+ stop -+ start -+ rtrn=$? -+ fi -+;; -+status) -+ status $prog -+ rtrn=$? -+;; -+stop) -+ stop -+ rtrn=$? -+;; -+*) -+ echo "usage: $0 {start|stop|restart|reload|force-reload|condrestart|try-restart|status}" -+ rtrn=2 -+;; -+esac -+ -+exit $rtrn -diff --git a/lrmd/pacemaker_remote.service.in b/lrmd/pacemaker_remote.service.in -new file mode 100644 -index 0000000..f73e943 ---- /dev/null -+++ b/lrmd/pacemaker_remote.service.in -@@ -0,0 +1,22 @@ -+[Unit] -+Description=Pacemaker Remote Service -+After=network.target -+Requires=network.target -+ -+[Install] -+WantedBy=multi-user.target -+ -+[Service] -+Type=simple -+KillMode=process -+NotifyAccess=none -+SysVStartPriority=99 -+EnvironmentFile=-/etc/sysconfig/pacemaker -+ -+ExecStart=@sbindir@/pacemaker_remoted -+ -+TimeoutStopSec=30s -+TimeoutStartSec=30s -+ -+# Restart options include: no, on-success, on-failure, on-abort or always -+Restart=on-failure -diff --git a/lrmd/regression.py.in b/lrmd/regression.py.in -index 7c33d9c..8d5fea4 100755 ---- a/lrmd/regression.py.in -+++ b/lrmd/regression.py.in -@@ -36,7 +36,12 @@ class Test: - self.name = name - self.description = description - self.cmds = [] -- self.daemon_location = "@CRM_DAEMON_DIR@/lrmd" -+ -+ if tls: -+ self.daemon_location = "/usr/sbin/pacemaker_remoted" -+ else: -+ self.daemon_location = "@CRM_DAEMON_DIR@/lrmd" -+ - self.test_tool_location = "@CRM_DAEMON_DIR@/lrmd_test" - self.verbose = verbose - self.tls = tls -@@ -72,17 +77,17 @@ class Test: - - def start_environment(self): - ### make sure we are in full control here ### -- cmd = shlex.split("killall -q -9 stonithd lrmd lt-lrmd lrmd_test lt-lrmd_test") -+ cmd = shlex.split("killall -q -9 stonithd lrmd lt-lrmd lrmd_test lt-lrmd_test pacemaker_remoted") - test = subprocess.Popen(cmd, stdout=subprocess.PIPE) - test.wait() - - additional_args = "" - -- self.stonith_process = subprocess.Popen(shlex.split("@CRM_DAEMON_DIR@/stonithd -s")) -- if self.tls: -- additional_args = additional_args + " -t " -+ if self.tls == 0: -+ self.stonith_process = subprocess.Popen(shlex.split("@CRM_DAEMON_DIR@/stonithd -s")) -+ - if self.verbose: -- additional_args = additional_args + " -VVV " -+ additional_args = additional_args + " -VV " - - self.lrmd_process = subprocess.Popen(shlex.split("%s %s -l /tmp/lrmd-regression.log" % (self.daemon_location, additional_args))) - -@@ -123,7 +128,7 @@ class Test: - self.__new_cmd(self.test_tool_location, args, 0, "", kill=killProc) - - def add_expected_fail_cmd(self, args): -- self.__new_cmd(self.test_tool_location, args, 255, "") -+ self.__new_cmd(self.test_tool_location, args, 1, "") - - def get_exitcode(self): - return self.result_exitcode -@@ -170,6 +175,12 @@ class Test: - def run(self): - res = 0 - i = 1 -+ -+ if self.tls and self.name.count("stonith") != 0: -+ self.result_txt = "SKIPPED - '%s' - disabled when testing pacemaker_remote" % (self.name) -+ print self.result_txt -+ return res -+ - self.start_environment() - - if self.verbose: -@@ -204,13 +215,13 @@ class Tests: - self.tests = [] - self.verbose = verbose - self.tls = tls; -- self.rsc_classes = output_from_command("crm_resource --list-standards") -+ self.rsc_classes = output_from_command("crm_resource --list-standards") - self.rsc_classes = self.rsc_classes[:-1] # Strip trailing empty line -- print "Testing "+repr(self.rsc_classes) -+ self.need_authkey = 0 -+ if self.tls: -+ self.rsc_classes.remove("stonith") - -- if not os.path.isfile("/etc/pacemaker/authkey"): -- os.system("mkdir -p /etc/pacemaker") -- os.system("dd if=/dev/urandom of=/etc/pacemaker/authkey bs=4096 count=1") -+ print "Testing "+repr(self.rsc_classes) - - self.common_cmds = { - "ocf_reg_line" : "-c register_rsc -r ocf_test_rsc -t 3000 -C ocf -P pacemaker -T Dummy", -@@ -252,7 +263,7 @@ class Tests: - "upstart_cancel_line" : "-c cancel -r \"upstart_test_rsc\" -a \"monitor\" -i \"2000\" -t \"3000\" ", - "upstart_cancel_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:upstart_test_rsc action:monitor rc:ok op_status:Cancelled\" ", - -- "service_reg_line" : "-c register_rsc -r service_test_rsc -t 3000 -C service -T lrmd_dummy_daemon", -+ "service_reg_line" : "-c register_rsc -r service_test_rsc -t 3000 -C service -T LSBDummy", - "service_reg_event" : "-l \"NEW_EVENT event_type:register rsc_id:service_test_rsc action:none rc:ok op_status:complete\"", - "service_unreg_line" : "-c unregister_rsc -r \"service_test_rsc\" -t 3000", - "service_unreg_event" : "-l \"NEW_EVENT event_type:unregister rsc_id:service_test_rsc action:none rc:ok op_status:complete\"", -@@ -265,7 +276,7 @@ class Tests: - "service_cancel_line" : "-c cancel -r \"service_test_rsc\" -a \"monitor\" -i \"2000\" -t \"3000\" ", - "service_cancel_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:service_test_rsc action:monitor rc:ok op_status:Cancelled\" ", - -- "lsb_reg_line" : "-c register_rsc -r lsb_test_rsc -t 3000 -C lsb -T lrmd_dummy_daemon", -+ "lsb_reg_line" : "-c register_rsc -r lsb_test_rsc -t 3000 -C lsb -T LSBDummy", - "lsb_reg_event" : "-l \"NEW_EVENT event_type:register rsc_id:lsb_test_rsc action:none rc:ok op_status:complete\" ", - "lsb_unreg_line" : "-c unregister_rsc -r \"lsb_test_rsc\" -t 3000", - "lsb_unreg_event" : "-l \"NEW_EVENT event_type:unregister rsc_id:lsb_test_rsc action:none rc:ok op_status:complete\"", -@@ -298,8 +309,14 @@ class Tests: - return test - - def setup_test_environment(self): -+ os.system("service pacemaker_remote stop") - self.cleanup_test_environment() - -+ if self.tls and not os.path.isfile("/etc/pacemaker/authkey"): -+ self.need_authkey = 1 -+ os.system("mkdir -p /etc/pacemaker") -+ os.system("dd if=/dev/urandom of=/etc/pacemaker/authkey bs=4096 count=1") -+ - ### Make fake systemd daemon and unit file ### - dummy_daemon = "#!/bin/bash\nwhile true\ndo\nsleep 5\ndone" - dummy_service_file = ("[Unit]\n" -@@ -370,14 +387,17 @@ if __name__ == "__main__": - os.system("cat <<-END >>/usr/sbin/fence_dummy_monitor\n%s\nEND" % (dummy_fence_agent)) - os.system("chmod 711 /usr/sbin/fence_dummy_monitor") - -- os.system("cp /usr/share/pacemaker/tests/cts/LSBDummy /etc/init.d/lrmd_dummy_daemon") -+ os.system("cp /usr/share/pacemaker/tests/cts/LSBDummy /etc/init.d/LSBDummy") - os.system("mkdir -p @CRM_CORE_DIR@/root") - - os.system("systemctl daemon-reload") - - def cleanup_test_environment(self): -+ if self.need_authkey: -+ os.system("rm -f /etc/pacemaker/authkey") -+ - os.system("rm -f /lib/systemd/system/lrmd_dummy_daemon.service") -- os.system("rm -f /etc/init.d/lrmd_dummy_daemon") -+ os.system("rm -f /etc/init.d/LSBDummy") - os.system("rm -f /usr/sbin/lrmd_dummy_daemon") - os.system("rm -f /usr/sbin/fence_dummy_monitor") - os.system("rm -f /usr/sbin/fence_dummy_sleep") -@@ -505,7 +525,7 @@ if __name__ == "__main__": - "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" ") - test.add_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" -t 3000") - test.add_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" -t 3000") -- test.add_cmd_and_kill("rm -f /var/run/Dummy-test_rsc.state", "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:complete\" -t 6000") -+ test.add_cmd_and_kill("rm -f @localstatedir@/run/Dummy-test_rsc.state", "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:complete\" -t 6000") - test.add_cmd("-c cancel -r \"test_rsc\" -a \"monitor\" -i \"100\" -t \"3000\" " - "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:Cancelled\" ") - test.add_expected_fail_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:complete\" -t 3000") -@@ -522,7 +542,7 @@ if __name__ == "__main__": - test.add_cmd("-c exec -r \"test_rsc\" -a \"monitor\" -i \"100\" -t 3000 -o " - "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" ") - test.add_expected_fail_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" -t 3000") -- test.add_cmd_and_kill("rm -f /var/run/Dummy-test_rsc.state", "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:complete\" -t 6000") -+ test.add_cmd_and_kill("rm -f @localstatedir@/run/Dummy-test_rsc.state", "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:complete\" -t 6000") - test.add_cmd("-c cancel -r \"test_rsc\" -a \"monitor\" -i \"100\" -t \"3000\" " - "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:Cancelled\" ") - test.add_expected_fail_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:complete\" -t 3000") -@@ -651,6 +671,19 @@ if __name__ == "__main__": - - ### These are tests that target specific cases ### - def build_custom_tests(self): -+ -+ ### verify resource temporary folder is created and used by heartbeat agents. ### -+ test = self.new_test("rsc_tmp_dir", "Verify creation and use of rsc temporary state directory") -+ test.add_sys_cmd("ls", "-al @CRM_RSCTMP_DIR@") -+ test.add_cmd("-c register_rsc -r test_rsc -P heartbeat -C ocf -T Dummy " -+ "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" -t 3000") -+ test.add_cmd("-c exec -r test_rsc -a start -t 4000") -+ test.add_sys_cmd("ls", "-al @CRM_RSCTMP_DIR@") -+ test.add_sys_cmd("ls", "@CRM_RSCTMP_DIR@/Dummy-test_rsc.state") -+ test.add_cmd("-c exec -r test_rsc -a stop -t 4000") -+ test.add_cmd("-c unregister_rsc -r test_rsc -t 3000 " -+ "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") -+ - ### start delay then stop test ### - test = self.new_test("start_delay", "Verify start delay works as expected.") - test.add_cmd("-c register_rsc -r test_rsc -P pacemaker -C ocf -T Dummy " -@@ -715,7 +748,7 @@ if __name__ == "__main__": - "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" ") - test.add_cmd("-c exec -r \"test_rsc\" -a \"monitor\" -i \"100\" -t 3000 -n " - "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" ") -- # this will fail because the monitor notifications should only go to the original caller, which no longer exists. -+ # this will fail because the monitor notifications should only go to the original caller, which no longer exists. - test.add_expected_fail_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" -t 3000") - test.add_cmd("-c cancel -r \"test_rsc\" -a \"monitor\" -i \"100\" -t \"3000\" ") - test.add_cmd("-c unregister_rsc -r \"test_rsc\" -t 3000 " -@@ -731,8 +764,8 @@ if __name__ == "__main__": - - ### get metadata ### - test = self.new_test("get_lsb_metadata", "Retrieve metadata for a resource") -- test.add_cmd_check_stdout("-c metadata -C \"lsb\" -T \"lrmd_dummy_daemon\"" -- ,"resource-agent name=\"lrmd_dummy_daemon\"") -+ test.add_cmd_check_stdout("-c metadata -C \"lsb\" -T \"LSBDummy\"" -+ ,"resource-agent name='LSBDummy'") - - ### get stonith metadata ### - test = self.new_test("get_stonith_metadata", "Retrieve stonith metadata for a resource") -@@ -762,9 +795,9 @@ if __name__ == "__main__": - test.add_cmd_check_stdout("-c list_agents -C ocf", "Stateful") - test.add_cmd_check_stdout("-c list_agents -C lsb", "", "Stateful") ### should not exist - test.add_cmd_check_stdout("-c list_agents -C service", "", "Stateful") ### should not exist -- test.add_cmd_check_stdout("-c list_agents ", "lrmd_dummy_daemon") ### init.d ### -- test.add_cmd_check_stdout("-c list_agents -C lsb", "lrmd_dummy_daemon") -- test.add_cmd_check_stdout("-c list_agents -C service", "lrmd_dummy_daemon") -+ test.add_cmd_check_stdout("-c list_agents ", "LSBDummy") ### init.d ### -+ test.add_cmd_check_stdout("-c list_agents -C lsb", "LSBDummy") -+ test.add_cmd_check_stdout("-c list_agents -C service", "LSBDummy") - test.add_cmd_check_stdout("-c list_agents -C ocf", "", "lrmd_dummy_daemon") ### should not exist - - test.add_cmd_check_stdout("-c list_agents -C ocf", "", "lrmd_dummy_daemon") ### should not exist -@@ -774,15 +807,15 @@ if __name__ == "__main__": - - if "systemd" in self.rsc_classes: - test.add_cmd_check_stdout("-c list_agents ", "lrmd_dummy_daemon") ### systemd ### -- test.add_cmd_check_stdout("-c list_agents -C service", "lrmd_dummy_daemon") -- test.add_cmd_check_stdout("-c list_agents -C systemd", "", "Stateful") ### should not exist -+ test.add_cmd_check_stdout("-c list_agents -C service", "LSBDummy") -+ test.add_cmd_check_stdout("-c list_agents -C systemd", "", "Stateful") ### should not exist - test.add_cmd_check_stdout("-c list_agents -C systemd", "lrmd_dummy_daemon") - test.add_cmd_check_stdout("-c list_agents -C systemd", "", "fence_dummy_monitor") ### should not exist - - if "upstart" in self.rsc_classes: - test.add_cmd_check_stdout("-c list_agents ", "lrmd_dummy_daemon") ### upstart ### -- test.add_cmd_check_stdout("-c list_agents -C service", "lrmd_dummy_daemon") -- test.add_cmd_check_stdout("-c list_agents -C upstart", "", "Stateful") ### should not exist -+ test.add_cmd_check_stdout("-c list_agents -C service", "LSBDummy") -+ test.add_cmd_check_stdout("-c list_agents -C upstart", "", "Stateful") ### should not exist - test.add_cmd_check_stdout("-c list_agents -C upstart", "lrmd_dummy_daemon") - test.add_cmd_check_stdout("-c list_agents -C upstart", "", "fence_dummy_monitor") ### should not exist - -@@ -855,7 +888,7 @@ class TestOptions: - self.options['verbose'] = 0 - self.options['invalid-arg'] = "" - self.options['show-usage'] = 0 -- self.options['tls-backend'] = 0 -+ self.options['pacemaker-remote'] = 0 - - def build_options(self, argv): - args = argv[1:] -@@ -870,8 +903,8 @@ class TestOptions: - self.options['list-tests'] = 1 - elif args[i] == "-V" or args[i] == "--verbose": - self.options['verbose'] = 1 -- elif args[i] == "-S" or args[i] == "--tls-backend": -- self.options['tls-backend'] = 1 -+ elif args[i] == "-R" or args[i] == "--pacemaker-remote": -+ self.options['pacemaker-remote'] = 1 - elif args[i] == "-r" or args[i] == "--run-only": - self.options['run-only'] = args[i+1] - skip = 1 -@@ -887,7 +920,7 @@ class TestOptions: - print "\t [--list-tests | -l] Print out all registered tests." - print "\t [--run-only | -r 'testname'] Run a specific test" - print "\t [--verbose | -V] Verbose output" -- print "\t [--tls-backend | -S Use tls backend" -+ print "\t [--pacemaker-remote | -R Test pacemaker-remote binary instead of lrmd." - print "\t [--run-only-pattern | -p 'string'] Run only tests containing the string value" - print "\n\tExample: Run only the test 'start_top'" - print "\t\t python ./regression.py --run-only start_stop" -@@ -899,7 +932,7 @@ def main(argv): - o = TestOptions() - o.build_options(argv) - -- tests = Tests(o.options['verbose'], o.options['tls-backend']) -+ tests = Tests(o.options['verbose'], o.options['pacemaker-remote']) - - tests.build_generic_tests() - tests.build_multi_rsc_tests() -diff --git a/lrmd/tls_backend.c b/lrmd/tls_backend.c -index 8ee16d4..780d17b 100644 ---- a/lrmd/tls_backend.c -+++ b/lrmd/tls_backend.c -@@ -137,6 +137,7 @@ lrmd_remote_client_destroy(gpointer user_data) - return; - } - -+ ipc_proxy_remove_provider(client); - client_disconnect_cleanup(client->id); - - crm_notice("LRMD client disconnecting remote client - name: %s id: %s", -@@ -249,17 +250,7 @@ lrmd_remote_connection_destroy(gpointer user_data) - static int - lrmd_tls_server_key_cb(gnutls_session_t session, const char *username, gnutls_datum_t * key) - { -- int rc = 0; -- -- if (lrmd_tls_set_key(key, DEFAULT_REMOTE_KEY_LOCATION)) { -- rc = lrmd_tls_set_key(key, ALT_REMOTE_KEY_LOCATION); -- } -- if (rc) { -- crm_err("No lrmd remote key found"); -- return -1; -- } -- -- return rc; -+ return lrmd_tls_set_key(key); - } - - int -diff --git a/mcp/Makefile.am b/mcp/Makefile.am -index f3a0f86..73a71c4 100644 ---- a/mcp/Makefile.am -+++ b/mcp/Makefile.am -@@ -5,17 +5,17 @@ - # modify it under the terms of the GNU General Public License - # as published by the Free Software Foundation; either version 2 - # of the License, or (at your option) any later version. --# -+# - # This program is distributed in the hope that it will be useful, - # but WITHOUT ANY WARRANTY; without even the implied warranty of - # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - # GNU General Public License for more details. --# -+# - # You should have received a copy of the GNU General Public License - # along with this program; if not, write to the Free Software - # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - # --MAINTAINERCLEANFILES = Makefile.in -+MAINTAINERCLEANFILES = Makefile.in - - if BUILD_CS_SUPPORT - INCLUDES = -I$(top_builddir)/include -I$(top_srcdir)/include \ -@@ -23,7 +23,7 @@ INCLUDES = -I$(top_builddir)/include -I$(top_srcdir)/include \ - - initdir = $(INITDIR) - init_SCRIPTS = pacemaker --sbin_PROGRAMS = pacemakerd -+sbin_PROGRAMS = pacemakerd - - if BUILD_HELP - man8_MANS = $(sbin_PROGRAMS:%=%.8) -@@ -35,7 +35,7 @@ endif - - ## SOURCES - --noinst_HEADERS = -+noinst_HEADERS = - - pacemakerd_SOURCES = pacemaker.c corosync.c - pacemakerd_LDADD = $(top_builddir)/lib/cluster/libcrmcluster.la $(top_builddir)/lib/common/libcrmcommon.la -@@ -44,6 +44,7 @@ pacemakerd_LDFLAGS = $(CLUSTERLIBS) - %.8: % - echo Creating $@ - chmod a+x $(top_builddir)/mcp/$< -+ $(top_builddir)/mcp/$< --help - $(HELP2MAN) --output $@ --no-info --section 8 --name "Part of the Pacemaker cluster resource manager" $(top_builddir)/mcp/$< - - clean-generic: -diff --git a/mcp/corosync.c b/mcp/corosync.c -index 28a7ff7..64d6eb5 100644 ---- a/mcp/corosync.c -+++ b/mcp/corosync.c -@@ -174,7 +174,7 @@ cpg_connection_destroy(gpointer user_data) - { - crm_err("Connection destroyed"); - cpg_handle = 0; -- crm_exit(1); -+ crm_exit(ENOTCONN); - } - - static void -@@ -527,7 +527,7 @@ read_config(void) - } else { - crm_err("We can only start Pacemaker from init if using version 1" - " of the Pacemaker plugin for Corosync. Terminating."); -- crm_exit(100); -+ crm_exit(DAEMON_RESPAWN_STOP); - } - break; - } -@@ -581,6 +581,19 @@ read_config(void) - #if HAVE_CONFDB - confdb_finalize(config); - #elif HAVE_CMAP -+ if(local_handle){ -+ gid_t gid = 0; -+ if (crm_user_lookup(CRM_DAEMON_USER, NULL, &gid) < 0) { -+ crm_warn("No group found for user %s", CRM_DAEMON_USER); -+ -+ } else { -+ char key[PATH_MAX]; -+ snprintf(key, PATH_MAX, "uidgid.gid.%u", gid); -+ rc = cmap_set_uint8(local_handle, key, 1); -+ crm_notice("Configured corosync to accept connections from group %u: %s (%d)", -+ gid, ais_error2text(rc), rc); -+ } -+ } - cmap_finalize(local_handle); - #endif - -@@ -624,6 +637,7 @@ read_config(void) - } - - set_daemon_option("logfacility", logging_syslog_facility); -+ setenv("HA_LOGFACILITY", logging_syslog_facility, 1); - - free(logging_debug); - free(logging_logfile); -diff --git a/mcp/pacemaker.c b/mcp/pacemaker.c -index ea97851..47fdd68 100644 ---- a/mcp/pacemaker.c -+++ b/mcp/pacemaker.c -@@ -20,6 +20,7 @@ - #include - - #include -+#include - #include - #include - #include -@@ -38,7 +39,7 @@ GHashTable *peers = NULL; - - #define PCMK_PROCESS_CHECK_INTERVAL 5 - --char *local_name = NULL; -+const char *local_name = NULL; - uint32_t local_nodeid = 0; - crm_trigger_t *shutdown_trigger = NULL; - const char *pid_file = "/var/run/pacemaker.pid"; -@@ -160,27 +161,23 @@ pcmk_process_exit(pcmk_child_t * child) - } - - static void --pcmk_child_exit(GPid pid, gint status, gpointer user_data) -+pcmk_child_exit(mainloop_child_t * p, pid_t pid, int core, int signo, int exitcode) - { -- int exitcode = 0; -- pcmk_child_t *child = user_data; -- -- if (WIFSIGNALED(status)) { -- int signo = WTERMSIG(status); -- int core = WCOREDUMP(status); -+ pcmk_child_t *child = mainloop_child_userdata(p); -+ const char *name = mainloop_child_name(p); - -+ if (signo) { - crm_notice("Child process %s terminated with signal %d (pid=%d, core=%d)", -- child->name, signo, child->pid, core); -+ name, signo, pid, core); - -- } else if (WIFEXITED(status)) { -- exitcode = WEXITSTATUS(status); -+ } else { - do_crm_log(exitcode == 0 ? LOG_INFO : LOG_ERR, -- "Child process %s exited (pid=%d, rc=%d)", child->name, child->pid, exitcode); -+ "Child process %s (%d) exited: %s (%d)", name, pid, pcmk_strerror(exitcode), exitcode); - } - - if (exitcode == 100) { - crm_warn("Pacemaker child process %s no longer wishes to be respawned. " -- "Shutting ourselves down.", child->name); -+ "Shutting ourselves down.", name); - child->respawn = FALSE; - fatal_error = TRUE; - pcmk_shutdown(15); -@@ -226,12 +223,14 @@ start_child(pcmk_child_t * child) - { - int lpc = 0; - uid_t uid = 0; -+ gid_t gid = 0; - struct rlimit oflimits; - gboolean use_valgrind = FALSE; - gboolean use_callgrind = FALSE; - const char *devnull = "/dev/null"; - const char *env_valgrind = getenv("PCMK_valgrind_enabled"); - const char *env_callgrind = getenv("PCMK_callgrind_enabled"); -+ enum cluster_type_e stack = get_cluster_type(); - - child->active_before_startup = FALSE; - -@@ -261,12 +260,20 @@ start_child(pcmk_child_t * child) - use_valgrind = FALSE; - } - -+ if (child->uid) { -+ if (crm_user_lookup(child->uid, &uid, &gid) < 0) { -+ crm_err("Invalid user (%s) for %s: not found", child->uid, child->name); -+ return FALSE; -+ } -+ crm_info("Using uid=%u and group=%u for process %s", uid, gid, child->name); -+ } -+ - child->pid = fork(); - CRM_ASSERT(child->pid != -1); - - if (child->pid > 0) { - /* parent */ -- g_child_watch_add(child->pid, pcmk_child_exit, child); -+ mainloop_child_add(child->pid, 0, child->name, child, pcmk_child_exit); - - crm_info("Forked child %d for process %s%s", child->pid, child->name, - use_valgrind ? " (valgrind enabled: " VALGRIND_BIN ")" : ""); -@@ -292,17 +299,21 @@ start_child(pcmk_child_t * child) - } - opts_default[0] = strdup(child->command);; - --#if 0 -- /* Dont set the group for now - it prevents connection to the cluster */ -- if (gid && setgid(gid) < 0) { -- crm_perror("Could not set group to %d", gid); -- } --#endif -+ if(gid) { -+ if(stack == pcmk_cluster_corosync) { -+ /* Drop root privileges completely -+ * -+ * We can do this because we set uidgid.gid.${gid}=1 -+ * via CMAP which allows these processes to connect to -+ * corosync -+ */ -+ if (setgid(gid) < 0) { -+ crm_perror(LOG_ERR, "Could not set group to %d", gid); -+ } - -- if (child->uid) { -- if (crm_user_lookup(child->uid, &uid, NULL) < 0) { -- crm_err("Invalid uid (%s) specified for %s", child->uid, child->name); -- return TRUE; -+ /* Keep the root group (so we can access corosync), but add the haclient group (so we can access ipc) */ -+ } else if (initgroups(child->uid, gid) < 0) { -+ crm_err("Cannot initalize groups for %s: %s (%d)", child->uid, pcmk_strerror(errno), errno); - } - } - -@@ -326,7 +337,7 @@ start_child(pcmk_child_t * child) - (void)execvp(child->command, opts_default); - } - crm_perror(LOG_ERR, "FATAL: Cannot exec %s", child->command); -- crm_exit(100); -+ crm_exit(DAEMON_RESPAWN_STOP); - } - return TRUE; /* never reached */ - } -@@ -403,7 +414,7 @@ pcmk_shutdown_worker(gpointer user_data) - - if (fatal_error) { - crm_notice("Attempting to inhibit respawning after fatal error"); -- crm_exit(100); -+ crm_exit(DAEMON_RESPAWN_STOP); - } - - return TRUE; -@@ -418,29 +429,6 @@ pcmk_shutdown(int nsig) - mainloop_set_trigger(shutdown_trigger); - } - --static void --build_path(const char *path_c, mode_t mode) --{ -- int offset = 1, len = 0; -- char *path = strdup(path_c); -- -- CRM_CHECK(path != NULL, return); -- for (len = strlen(path); offset < len; offset++) { -- if (path[offset] == '/') { -- path[offset] = 0; -- if (mkdir(path, mode) < 0 && errno != EEXIST) { -- crm_perror(LOG_ERR, "Could not create directory '%s'", path); -- break; -- } -- path[offset] = '/'; -- } -- } -- if (mkdir(path, mode) < 0 && errno != EEXIST) { -- crm_perror(LOG_ERR, "Could not create directory '%s'", path); -- } -- free(path); --} -- - static int32_t - pcmk_ipc_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid) - { -@@ -641,8 +629,8 @@ static struct crm_option long_options[] = { - {"features", 0, 0, 'F', "\tDisplay the full version and list of features Pacemaker was built with"}, - - {"-spacer-", 1, 0, '-', "\nAdditional Options:"}, -- {"foreground", 0, 0, 'f', "\tRun in the foreground instead of as a daemon"}, -- {"pid-file", 1, 0, 'p', "\t(Advanced) Daemon pid file location"}, -+ {"foreground", 0, 0, 'f', "\t(Ignored) Pacemaker always runs in the foreground"}, -+ {"pid-file", 1, 0, 'p', "\t(Ignored) Daemon pid file location"}, - - {NULL, 0, 0, 0} - }; -@@ -809,7 +797,6 @@ main(int argc, char **argv) - const char *facility = daemon_option("logfacility"); - - setenv("LC_ALL", "C", 1); -- setenv("HA_LOGFACILITY", facility, 1); - setenv("HA_LOGD", "no", 1); - - set_daemon_option("mcp", "true"); -@@ -844,9 +831,9 @@ main(int argc, char **argv) - shutdown = TRUE; - break; - case 'F': -- printf("Pacemaker %s (Build: %s)\n Supporting: %s\n", VERSION, BUILD_VERSION, -- CRM_FEATURES); -- crm_exit(0); -+ printf("Pacemaker %s (Build: %s)\n Supporting v%s: %s\n", VERSION, BUILD_VERSION, -+ CRM_FEATURE_SET, CRM_FEATURES); -+ crm_exit(pcmk_ok); - default: - printf("Argument code 0%o (%c) is not (?yet?) supported\n", flag, flag); - ++argerr; -@@ -882,13 +869,13 @@ main(int argc, char **argv) - } - crm_ipc_close(old_instance); - crm_ipc_destroy(old_instance); -- crm_exit(0); -+ crm_exit(pcmk_ok); - - } else if (crm_ipc_connected(old_instance)) { - crm_ipc_close(old_instance); - crm_ipc_destroy(old_instance); - crm_err("Pacemaker is already active, aborting startup"); -- crm_exit(100); -+ crm_exit(DAEMON_RESPAWN_STOP); - } - - crm_ipc_close(old_instance); -@@ -896,7 +883,7 @@ main(int argc, char **argv) - - if (read_config() == FALSE) { - crm_notice("Could not obtain corosync config data, exiting"); -- crm_exit(1); -+ crm_exit(ENODATA); - } - - crm_notice("Starting Pacemaker %s (Build: %s): %s", VERSION, BUILD_VERSION, CRM_FEATURES); -@@ -932,51 +919,46 @@ main(int argc, char **argv) - - if (crm_user_lookup(CRM_DAEMON_USER, &pcmk_uid, &pcmk_gid) < 0) { - crm_err("Cluster user %s does not exist, aborting Pacemaker startup", CRM_DAEMON_USER); -- crm_exit(1); -+ crm_exit(ENOKEY); - } - - mkdir(CRM_STATE_DIR, 0750); - mcp_chown(CRM_STATE_DIR, pcmk_uid, pcmk_gid); - -- /* Used by stonithd */ -- build_path(HA_STATE_DIR "/heartbeat", 0755); -- mcp_chown(HA_STATE_DIR "/heartbeat", pcmk_uid, pcmk_gid); -- -- /* Used by RAs - Leave owned by root */ -- build_path(CRM_RSCTMP_DIR, 0755); -- - /* Used to store core files in */ -- build_path(CRM_CORE_DIR, 0755); -+ crm_build_path(CRM_CORE_DIR, 0775); - mcp_chown(CRM_CORE_DIR, pcmk_uid, pcmk_gid); - - /* Used to store blackbox dumps in */ -- build_path(CRM_BLACKBOX_DIR, 0755); -+ crm_build_path(CRM_BLACKBOX_DIR, 0755); - mcp_chown(CRM_BLACKBOX_DIR, pcmk_uid, pcmk_gid); - - /* Used to store policy engine inputs in */ -- build_path(PE_STATE_DIR, 0755); -+ crm_build_path(PE_STATE_DIR, 0755); - mcp_chown(PE_STATE_DIR, pcmk_uid, pcmk_gid); - - /* Used to store the cluster configuration */ -- build_path(CRM_CONFIG_DIR, 0755); -+ crm_build_path(CRM_CONFIG_DIR, 0755); - mcp_chown(CRM_CONFIG_DIR, pcmk_uid, pcmk_gid); - -+ /* Resource agent paths are constructed by the lrmd */ -+ - peers = g_hash_table_new(g_direct_hash, g_direct_equal); - - ipcs = mainloop_add_ipc_server(CRM_SYSTEM_MCP, QB_IPC_NATIVE, &ipc_callbacks); - if (ipcs == NULL) { - crm_err("Couldn't start IPC server"); -- crm_exit(1); -+ crm_exit(EIO); - } - - if (cluster_connect_cfg(&local_nodeid) == FALSE) { - crm_err("Couldn't connect to Corosync's CFG service"); -- crm_exit(1); -+ crm_exit(ENOPROTOOPT); - } - - if (cluster_connect_cpg() == FALSE) { - crm_err("Couldn't connect to Corosync's CPG service"); -- crm_exit(1); -+ crm_exit(ENOPROTOOPT); - } - - local_name = get_local_node_name(); -@@ -1005,5 +987,5 @@ main(int argc, char **argv) - - crm_info("Exiting %s", crm_system_name); - -- crm_exit(0); -+ crm_exit(pcmk_ok); - } -diff --git a/mcp/pacemaker.combined.upstart.in b/mcp/pacemaker.combined.upstart.in -new file mode 100644 -index 0000000..df055f7 ---- /dev/null -+++ b/mcp/pacemaker.combined.upstart.in -@@ -0,0 +1,57 @@ -+# pacemaker-corosync - High-Availability cluster -+# -+# Starts Corosync cluster engine and Pacemaker cluster manager. -+ -+kill timeout 3600 -+ -+env prog=pacemakerd -+env rpm_sysconf=@sysconfdir@/sysconfig/pacemaker -+env rpm_lockfile=@localstatedir@/lock/subsys/pacemaker -+env deb_sysconf=@sysconfdir@/default/pacemaker -+env deb_lockfile=@localstatedir@/lock/pacemaker -+ -+script -+ [ -f "$rpm_sysconf" ] && . $rpm_sysconf -+ [ -f "$deb_sysconf" ] && . $deb_sysconf -+ exec $prog -+end script -+ -+pre-start script -+ # setup the software watchdog which corosync uses in post-stop. -+ # rewrite according to environment. -+ modprobe softdog soft_margin=60 -+ start corosync -+ -+ # if you use corosync-notifyd, uncomment the line below. -+ #start corosync-notifyd -+ -+ # give it time to fail. -+ sleep 2 -+ pidof corosync || { exit 1; } -+end script -+ -+post-start script -+ [ -f "$rpm_sysconf" ] && . $rpm_sysconf -+ [ -f "$deb_sysconf" ] && . $deb_sysconf -+ [ -z "$LOCK_FILE" -a -d @sysconfdir@/sysconfig ] && LOCK_FILE="$rpm_lockfile" -+ [ -z "$LOCK_FILE" -a -d @sysconfdir@/default ] && LOCK_FILE="$deb_lockfile" -+ touch $LOCK_FILE -+ pidof $prog > @localstatedir@/run/$prog.pid -+end script -+ -+post-stop script -+ [ -f "$rpm_sysconf" ] && . $rpm_sysconf -+ [ -f "$deb_sysconf" ] && . $deb_sysconf -+ [ -z "$LOCK_FILE" -a -d @sysconfdir@/sysconfig ] && LOCK_FILE="$rpm_lockfile" -+ [ -z "$LOCK_FILE" -a -d @sysconfdir@/default ] && LOCK_FILE="$deb_lockfile" -+ rm -f $LOCK_FILE -+ rm -f @localstatedir@/run/$prog.pid -+ -+ # when pacemakerd disappeared unexpectedly, a machine is rebooted -+ # by the watchdog of corosync. -+ pidof crmd && killall -q -9 corosync -+ stop corosync || true -+ -+ # if you use corosync-notifyd, uncomment the line below. -+ #stop corosync-notifyd || true -+end script -diff --git a/mcp/pacemaker.in b/mcp/pacemaker.in -index 0e613e3..a6647fe 100644 ---- a/mcp/pacemaker.in -+++ b/mcp/pacemaker.in -@@ -8,15 +8,15 @@ - - # chkconfig: - 99 01 - # description: Pacemaker Cluster Manager --# processname: pacemaker -+# processname: pacemakerd - # - ### BEGIN INIT INFO - # Provides: pacemaker - # Required-Start: $network corosync - # Should-Start: $syslog --# Required-Stop: $network --# Default-Start: --# Default-Stop: -+# Required-Stop: $network corosync -+# Default-Start: 2 3 4 5 -+# Default-Stop: 0 1 6 - # Short-Description: Starts and stops Pacemaker Cluster Manager. - # Description: Starts and stops Pacemaker Cluster Manager. - ### END INIT INFO -@@ -71,6 +71,11 @@ if [ -d @sysconfdir@/default ]; then - [ -z "$LOCK_FILE" ] && LOCK_FILE="@localstatedir@/lock/pacemaker" - fi - -+# Unless specified otherwise, assume cman is in use if cluster.conf exists -+if [ x = "x$PCMK_STACK" -a -f @sysconfdir@/cluster/cluster.conf ]; then -+ PCMK_STACK=cman -+fi -+ - start() - { - echo -n "Starting $desc: " -@@ -101,8 +106,21 @@ start() - echo - } - -+cman_pre_start() -+{ -+ pid=$(pidof corosync 2>/dev/null) -+ if [ $? -ne 0 ]; then -+ service cman start -+ fi -+} -+ - cman_pre_stop() - { -+ pid=$(pidof fenced 2>/dev/null) -+ if [ $? -ne 0 ]; then -+ : CMAN is not running, nothing to do here -+ return -+ fi - cname=`crm_node --name` - crm_attribute -N $cname -n standby -v true -l reboot - logger -t pacemaker -p daemon.notice "Waiting for shutdown of managed resources" -@@ -151,7 +169,7 @@ stop() - echo -n "." - done - else -- echo -n "$desc is already stopped, cleaning up any stale processes and files" -+ echo -n "$desc is already stopped" - fi - - rm -f $LOCK_FILE -@@ -166,7 +184,7 @@ rtrn=0 - case "$1" in - start) - # For consistency with stop -- [ -f @INITDIR@/cman ] && service cman start -+ [ "$PCMK_STACK" = cman ] && cman_pre_start - start - ;; - restart|reload|force-reload) -@@ -195,9 +213,9 @@ stop) - # 4. stop pacemaker - # 5. stop the rest of cman (so it doesn't end up half up/down) - # -- [ -f @INITDIR@/cman ] && cman_pre_stop -+ [ "$PCMK_STACK" = cman ] && cman_pre_stop - stop -- [ -f @INITDIR@/cman ] && service cman stop -+ [ "$PCMK_STACK" = cman ] && service cman stop - ;; - *) - echo "usage: $0 {start|stop|restart|reload|force-reload|condrestart|try-restart|status}" -diff --git a/mcp/pacemaker.service.in b/mcp/pacemaker.service.in -index fd6bebb..3b44eb7 100644 ---- a/mcp/pacemaker.service.in -+++ b/mcp/pacemaker.service.in -@@ -1,7 +1,13 @@ - [Unit] - Description=Pacemaker High Availability Cluster Manager -+ -+After=basic.target - After=network.target -+After=corosync.service -+ -+Requires=basic.target - Requires=network.target -+Requires=corosync.service - - [Install] - WantedBy=multi-user.target -diff --git a/mcp/pacemaker.sysconfig b/mcp/pacemaker.sysconfig -index d63604d..7f12111 100644 ---- a/mcp/pacemaker.sysconfig -+++ b/mcp/pacemaker.sysconfig -@@ -1,5 +1,9 @@ - # For non-systemd based systems, prefix export to each enabled line - -+# Turn on special handling for CMAN clusters in the init script -+# Without this, fenced (and by inference, cman) cannot reliably be made to shut down -+# PCMK_STACK=cman -+ - #==#==# Variables that control logging - - # Enable debug logging globally or per-subsystem -@@ -12,11 +16,16 @@ - # By default Pacemaker will inherit the logfile specified in corosync.conf - # PCMK_debugfile=/var/log/pacemaker.log - --# Specify an alternate syslog target for NOTICE (and higher) messages -+# Specify an alternate syslog target for NOTICE (and higher) messages - # Use 'none' to disable - not recommended - # The default value is 'daemon' - # PCMK_logfacility=none|daemon|user|local0|local1|local2|local3|local4|local5|local6|local7 - -+# Send all messages up-to-and-including the configured priority to syslog -+# A value of 'info' will be far too verbose for most installations and 'debug' is almost certain to send you blind -+# The default value is 'notice' -+# PCMK_logpriority=emerg|alert|crit|error|warning|notice|info|debug -+ - # Log all messages from a comma-separated list of functions - # PCMK_trace_functions=function1,function2,function3 - -@@ -54,6 +63,13 @@ - # Mostly only useful for developer testing - # PCMK_schema_directory=/some/path - -+#==#==# Pacemaker Remote -+# Use a custom directory for finding the authkey. -+# PCMK_authkey_location=/etc/pacemaker/authkey -+# -+# Specify a custom port for Pacemaker Remote connections -+# PCMK_remote_port=3121 -+ - #==#==# IPC - - # Force use of a particular class of IPC connection -@@ -73,4 +89,4 @@ - # PCMK_valgrind_enabled=cib,crmd - # PCMK_callgrind_enabled=yes - # PCMK_callgrind_enabled=cib,crmd --# VALGRIND_OPTS="--leak-check=full --trace-children=no --num-callers=25 --log-file=/tmp/pacemaker-%p.valgrind" -+# VALGRIND_OPTS="--leak-check=full --trace-children=no --num-callers=25 --log-file=/var/lib/pacemaker/valgrind-%p --suppressions=/usr/share/pacemaker/tests/valgrind-pcmk.suppressions --gen-suppressions=all" -diff --git a/mcp/pacemaker.upstart.in b/mcp/pacemaker.upstart.in -new file mode 100644 -index 0000000..7c977dc ---- /dev/null -+++ b/mcp/pacemaker.upstart.in -@@ -0,0 +1,37 @@ -+# pacemaker - High-Availability cluster resource manager -+# -+# Starts pacemakerd -+ -+stop on runlevel [0123456] -+kill timeout 3600 -+respawn -+ -+env prog=pacemakerd -+env rpm_sysconf=@sysconfdir@/sysconfig/pacemaker -+env rpm_lockfile=@localstatedir@/lock/subsys/pacemaker -+env deb_sysconf=@sysconfdir@/default/pacemaker -+env deb_lockfile=@localstatedir@/lock/pacemaker -+ -+script -+ [ -f "$rpm_sysconf" ] && . $rpm_sysconf -+ [ -f "$deb_sysconf" ] && . $deb_sysconf -+ exec $prog -+end script -+ -+post-start script -+ [ -f "$rpm_sysconf" ] && . $rpm_sysconf -+ [ -f "$deb_sysconf" ] && . $deb_sysconf -+ [ -z "$LOCK_FILE" -a -d @sysconfdir@/sysconfig ] && LOCK_FILE="$rpm_lockfile" -+ [ -z "$LOCK_FILE" -a -d @sysconfdir@/default ] && LOCK_FILE="$deb_lockfile" -+ touch $LOCK_FILE -+ pidof $prog > @localstatedir@/run/$prog.pid -+end script -+ -+post-stop script -+ [ -f "$rpm_sysconf" ] && . $rpm_sysconf -+ [ -f "$deb_sysconf" ] && . $deb_sysconf -+ [ -z "$LOCK_FILE" -a -d @sysconfdir@/sysconfig ] && LOCK_FILE="$rpm_lockfile" -+ [ -z "$LOCK_FILE" -a -d @sysconfdir@/default ] && LOCK_FILE="$deb_lockfile" -+ rm -f $LOCK_FILE -+ rm -f @localstatedir@/run/$prog.pid -+end script -diff --git a/pengine/Makefile.am b/pengine/Makefile.am -index aadbd61..7309f5b 100644 ---- a/pengine/Makefile.am -+++ b/pengine/Makefile.am -@@ -5,12 +5,12 @@ - # modify it under the terms of the GNU General Public License - # as published by the Free Software Foundation; either version 2 - # of the License, or (at your option) any later version. --# -+# - # This program is distributed in the hope that it will be useful, - # but WITHOUT ANY WARRANTY; without even the implied warranty of - # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - # GNU General Public License for more details. --# -+# - # You should have received a copy of the GNU General Public License - # along with this program; if not, write to the Free Software - # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -@@ -27,7 +27,7 @@ PE_TESTS = $(wildcard test10/*.scores) - - testdir = $(datadir)/$(PACKAGE)/tests/pengine - test_SCRIPTS = regression.sh --test_DATA = regression.core.sh ptest.supp -+test_DATA = regression.core.sh - - test10dir = $(datadir)/$(PACKAGE)/tests/pengine/test10 - test10_DATA = $(PE_TESTS) $(PE_TESTS:%.scores=%.xml) $(PE_TESTS:%.scores=%.exp) $(PE_TESTS:%.scores=%.dot) $(PE_TESTS:%.scores=%.summary) $(wildcard test10/*.stderr) -@@ -35,7 +35,7 @@ test10_DATA = $(PE_TESTS) $(PE_TESTS:%.scores=%.xml) $(PE_TESTS:%.scores=%.exp) - COMMONLIBS = \ - $(top_builddir)/lib/common/libcrmcommon.la \ - $(top_builddir)/lib/pengine/libpe_status.la \ -- libpengine.la $(CURSESLIBS) $(CLUSTERLIBS) -+ libpengine.la $(CURSESLIBS) $(CLUSTERLIBS) - - ## libraries - lib_LTLIBRARIES = libpengine.la -@@ -59,8 +59,8 @@ endif - noinst_HEADERS = allocate.h utils.h pengine.h - #utils.h pengine.h - --libpengine_la_LDFLAGS = -version-info 4:1:0 --# -L$(top_builddir)/lib/pils -lpils -export-dynamic -module -avoid-version -+libpengine_la_LDFLAGS = -version-info 6:0:2 -+# -L$(top_builddir)/lib/pils -lpils -export-dynamic -module -avoid-version - libpengine_la_SOURCES = pengine.c allocate.c utils.c constraints.c - libpengine_la_SOURCES += native.c group.c clone.c master.c graph.c - -diff --git a/pengine/allocate.c b/pengine/allocate.c -index 9f5e370..255a599 100644 ---- a/pengine/allocate.c -+++ b/pengine/allocate.c -@@ -239,7 +239,7 @@ check_action_definition(resource_t * rsc, node_t * active_node, xmlNode * xml_op - did_change = TRUE; - key = generate_op_key(rsc->id, task, interval); - crm_log_xml_info(digest_data->params_restart, "params:restart"); -- crm_info("Parameters to %s on %s changed: was %s vs. now %s (restart:%s) %s", -+ pe_rsc_info(rsc, "Parameters to %s on %s changed: was %s vs. now %s (restart:%s) %s", - key, active_node->details->uname, - crm_str(digest_restart), digest_data->digest_restart_calc, - op_version, crm_element_value(xml_op, XML_ATTR_TRANSITION_MAGIC)); -@@ -254,7 +254,7 @@ check_action_definition(resource_t * rsc, node_t * active_node, xmlNode * xml_op - did_change = TRUE; - crm_log_xml_info(digest_data->params_all, "params:reload"); - key = generate_op_key(rsc->id, task, interval); -- crm_info("Parameters to %s on %s changed: was %s vs. now %s (reload:%s) %s", -+ pe_rsc_info(rsc, "Parameters to %s on %s changed: was %s vs. now %s (reload:%s) %s", - key, active_node->details->uname, - crm_str(digest_all), digest_data->digest_all_calc, op_version, - crm_element_value(xml_op, XML_ATTR_TRANSITION_MAGIC)); -@@ -319,8 +319,15 @@ check_actions_for(xmlNode * rsc_entry, resource_t * rsc, node_t * node, pe_worki - CRM_CHECK(node != NULL, return); - - if (is_set(rsc->flags, pe_rsc_orphan)) { -- pe_rsc_trace(rsc, "Skipping param check for %s and deleting: orphan", rsc->id); -- DeleteRsc(rsc, node, FALSE, data_set); -+ resource_t *parent = uber_parent(rsc); -+ if(parent == NULL -+ || parent->variant < pe_clone -+ || is_set(parent->flags, pe_rsc_unique)) { -+ pe_rsc_trace(rsc, "Skipping param check for %s and deleting: orphan", rsc->id); -+ DeleteRsc(rsc, node, FALSE, data_set); -+ } else { -+ pe_rsc_trace(rsc, "Skipping param check for %s (orphan clone)", rsc->id); -+ } - return; - - } else if (pe_find_node_id(rsc->running_on, node->details->id) == NULL) { -@@ -1265,19 +1272,12 @@ stage6(pe_working_set_t * data_set) - gboolean integrity_lost = FALSE; - action_t *all_stopped = get_pseudo_op(ALL_STOPPED, data_set); - action_t *done = get_pseudo_op(STONITH_DONE, data_set); -- gboolean need_stonith = FALSE; -+ gboolean need_stonith = TRUE; - GListPtr gIter = data_set->nodes; - - crm_trace("Processing fencing and shutdown cases"); - -- if (is_set(data_set->flags, pe_flag_stonith_enabled) -- && (is_set(data_set->flags, pe_flag_have_quorum) -- || data_set->no_quorum_policy == no_quorum_ignore -- || data_set->no_quorum_policy == no_quorum_suicide)) { -- need_stonith = TRUE; -- } -- -- if (need_stonith && any_managed_resouces(data_set) == FALSE) { -+ if (any_managed_resouces(data_set) == FALSE) { - crm_notice("Delaying fencing operations until there are resources to manage"); - need_stonith = FALSE; - } -@@ -1290,7 +1290,7 @@ stage6(pe_working_set_t * data_set) - } - - stonith_op = NULL; -- if (node->details->unclean && need_stonith) { -+ if (need_stonith && node->details->unclean && pe_can_fence(data_set, node)) { - pe_warn("Scheduling Node %s for STONITH", node->details->uname); - - stonith_op = pe_fence_op(node, NULL, data_set); -@@ -1578,7 +1578,7 @@ apply_remote_node_ordering(pe_working_set_t *data_set) - - /* when the container representing a remote node fails, the stop - * action for all the resources living in that container is implied -- * by the container stopping. This is similar to how fencing operations -+ * by the container stopping. This is similar to how fencing operations - * work for cluster nodes. */ - pe_set_action_bit(action, pe_action_pseudo); - custom_action_order(container, -@@ -1863,7 +1863,7 @@ pe_post_notify(resource_t * rsc, node_t * node, notify_data_t * n_data, pe_worki - if (interval == NULL || safe_str_eq(interval, "0")) { - pe_rsc_trace(rsc, "Skipping %s: interval", mon->uuid); - continue; -- } else if (safe_str_eq(mon->task, "cancel")) { -+ } else if (safe_str_eq(mon->task, RSC_CANCEL)) { - pe_rsc_trace(rsc, "Skipping %s: cancel", mon->uuid); - continue; - } -@@ -2342,11 +2342,14 @@ stage8(pe_working_set_t * data_set) - * But for now its the best way to detect (in CTS) when - * CIB resource updates are being lost - */ -- crm_crit("Cannot %s node '%s' because of %s:%s%s", -- action->node->details->unclean ? "fence" : "shut down", -- action->node->details->uname, action->rsc->id, -- is_not_set(action->rsc->flags, pe_rsc_managed) ? " unmanaged" : " blocked", -- is_set(action->rsc->flags, pe_rsc_failed) ? " failed" : ""); -+ if (is_set(data_set->flags, pe_flag_have_quorum) -+ || data_set->no_quorum_policy == no_quorum_ignore) { -+ crm_crit("Cannot %s node '%s' because of %s:%s%s", -+ action->node->details->unclean ? "fence" : "shut down", -+ action->node->details->uname, action->rsc->id, -+ is_not_set(action->rsc->flags, pe_rsc_managed) ? " unmanaged" : " blocked", -+ is_set(action->rsc->flags, pe_rsc_failed) ? " failed" : ""); -+ } - } - - graph_element_from_action(action, data_set); -diff --git a/pengine/constraints.c b/pengine/constraints.c -index 7bd917f..056d566 100644 ---- a/pengine/constraints.c -+++ b/pengine/constraints.c -@@ -366,10 +366,10 @@ unpack_rsc_location(xmlNode * xml_obj, pe_working_set_t * data_set) - { - gboolean empty = TRUE; - rsc_to_node_t *location = NULL; -- const char *id_lh = crm_element_value(xml_obj, "rsc"); -+ const char *id_lh = crm_element_value(xml_obj, XML_COLOC_ATTR_SOURCE); - const char *id = crm_element_value(xml_obj, XML_ATTR_ID); - resource_t *rsc_lh = pe_find_resource(data_set->resources, id_lh); -- const char *node = crm_element_value(xml_obj, "node"); -+ const char *node = crm_element_value(xml_obj, XML_CIB_TAG_NODE); - const char *score = crm_element_value(xml_obj, XML_RULE_ATTR_SCORE); - const char *domain = crm_element_value(xml_obj, XML_CIB_TAG_DOMAIN); - const char *role = crm_element_value(xml_obj, XML_RULE_ATTR_ROLE); -@@ -1066,6 +1066,14 @@ order_rsc_sets(const char *id, xmlNode * set1, xmlNode * set2, enum pe_order_kin - action_2 = invert_action(action_2); - } - -+ if(safe_str_eq(RSC_STOP, action_1) || safe_str_eq(RSC_DEMOTE, action_1)) { -+ /* Assuming: A -> ( B || C) -> D -+ * The one-or-more logic only applies during the start/promote phase -+ * During shutdown neither B nor can shutdown until D is down, so simply turn require_all back on. -+ */ -+ require_all = TRUE; -+ } -+ - if (symmetrical == FALSE) { - flags = get_asymmetrical_flags(kind); - } else { -@@ -2356,6 +2364,11 @@ unpack_rsc_ticket(xmlNode * xml_obj, pe_working_set_t * data_set) - return FALSE; - } - -+ if (data_set->tickets == NULL) { -+ data_set->tickets = -+ g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, destroy_ticket); -+ } -+ - if (ticket_str == NULL) { - crm_config_err("Invalid constraint '%s': No ticket specified", id); - return FALSE; -diff --git a/pengine/graph.c b/pengine/graph.c -index 1282330..2e44ce5 100644 ---- a/pengine/graph.c -+++ b/pengine/graph.c -@@ -1,16 +1,16 @@ --/* -+/* - * Copyright (C) 2004 Andrew Beekhof -- * -+ * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. -- * -+ * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. -- * -+ * - * You should have received a copy of the GNU General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -@@ -333,6 +333,26 @@ graph_update_action(action_t * first, action_t * then, node_t * node, enum pe_ac - update_action_flags(first, pe_action_print_always); /* dont care about changed */ - } - -+ if ((type & pe_order_implies_then -+ || type & pe_order_implies_first -+ || type & pe_order_restart) -+ && first->rsc -+ && safe_str_eq(first->task, RSC_STOP) -+ && is_not_set(first->rsc->flags, pe_rsc_managed) -+ && is_set(first->rsc->flags, pe_rsc_block) -+ && is_not_set(first->flags, pe_action_runnable)) { -+ -+ if (update_action_flags(then, pe_action_runnable | pe_action_clear)) { -+ changed |= pe_graph_updated_then; -+ } -+ -+ if (changed) { -+ pe_rsc_trace(then->rsc, "unmanaged left: %s then %s: changed", first->uuid, then->uuid); -+ } else { -+ crm_trace("unmanaged left: %s then %s", first->uuid, then->uuid); -+ } -+ } -+ - if (processed == FALSE) { - crm_trace("Constraint 0x%.6x not applicable", type); - } -@@ -568,7 +588,7 @@ get_router_node(action_t *action) - * This means some actions will get routed through the cluster - * node the connection rsc began on, and others are routed through - * the cluster node the connection rsc ends up on. -- * -+ * - * 1. stop, demote, migrate actions of resources living in the remote - * node _MUST_ occur _BEFORE_ the connection can move (these actions - * are all required before the remote rsc stop action can occur.) In -@@ -595,8 +615,8 @@ get_router_node(action_t *action) - return router_node; - } - --xmlNode * --action2xml(action_t * action, gboolean as_input) -+static xmlNode * -+action2xml(action_t * action, gboolean as_input, pe_working_set_t *data_set) - { - gboolean needs_node_info = TRUE; - xmlNode *action_xml = NULL; -@@ -752,7 +772,14 @@ action2xml(action_t * action, gboolean as_input) - crm_xml_add(args_xml, XML_ATTR_CRM_VERSION, CRM_FEATURE_SET); - - g_hash_table_foreach(action->extra, hash2field, args_xml); -- if (action->rsc != NULL) { -+ if (action->rsc != NULL && action->node) { -+ GHashTable *p = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); -+ -+ get_rsc_attributes(p, action->rsc, action->node, data_set); -+ g_hash_table_foreach(p, hash2smartfield, args_xml); -+ -+ g_hash_table_destroy(p); -+ } else if(action->rsc) { - g_hash_table_foreach(action->rsc->parameters, hash2smartfield, args_xml); - } - -@@ -800,7 +827,7 @@ should_dump_action(action_t * action) - * probe_complete from running (presumably because it is only - * partially up) - * -- * For these reasons we tolerate such perversions -+ * For these reasons we tolerate such perversions - */ - - for (lpc = action->actions_after; lpc != NULL; lpc = lpc->next) { -@@ -940,7 +967,7 @@ should_dump_input(int last_action, action_t * action, action_wrapper_t * wrapper - if (action->rsc && safe_str_eq(action->task, RSC_MIGRATE)) { - /* Remove the orders like : - * "load_stopped_node2" -> "rscA_migrate_to node1" -- * which were created from: pengine/native.c: MigrateRsc() -+ * which were created from: pengine/native.c: MigrateRsc() - * order_actions(other, then, other_w->type); - */ - wrapper->type = pe_order_none; -@@ -1045,7 +1072,7 @@ graph_element_from_action(action_t * action, pe_working_set_t * data_set) - crm_xml_add_int(syn, XML_CIB_ATTR_PRIORITY, synapse_priority); - } - -- xml_action = action2xml(action, FALSE); -+ xml_action = action2xml(action, FALSE, data_set); - add_node_nocopy(set, crm_element_name(xml_action), xml_action); - - action->actions_before = g_list_sort(action->actions_before, sort_action_id); -@@ -1063,7 +1090,7 @@ graph_element_from_action(action_t * action, pe_working_set_t * data_set) - last_action = wrapper->action->id; - input = create_xml_node(in, "trigger"); - -- xml_action = action2xml(wrapper->action, TRUE); -+ xml_action = action2xml(wrapper->action, TRUE, data_set); - add_node_nocopy(input, crm_element_name(xml_action), xml_action); - } - } -diff --git a/pengine/main.c b/pengine/main.c -index 8b7b5e4..397cc61 100644 ---- a/pengine/main.c -+++ b/pengine/main.c -@@ -172,7 +172,7 @@ main(int argc, char **argv) - ipcs = mainloop_add_ipc_server(CRM_SYSTEM_PENGINE, QB_IPC_SHM, &ipc_callbacks); - if (ipcs == NULL) { - crm_err("Failed to create IPC server: shutting down and inhibiting respawn"); -- crm_exit(100); -+ crm_exit(DAEMON_RESPAWN_STOP); - } - - /* Create the mainloop and run it... */ -@@ -182,12 +182,12 @@ main(int argc, char **argv) - g_main_run(mainloop); - - crm_info("Exiting %s", crm_system_name); -- return crm_exit(0); -+ return crm_exit(pcmk_ok); - } - - void - pengine_shutdown(int nsig) - { - mainloop_del_ipc_server(ipcs); -- crm_exit(EX_OK); -+ crm_exit(pcmk_ok); - } -diff --git a/pengine/master.c b/pengine/master.c -index 78f2c93..23db809 100644 ---- a/pengine/master.c -+++ b/pengine/master.c -@@ -1,16 +1,16 @@ --/* -+/* - * Copyright (C) 2004 Andrew Beekhof -- * -+ * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. -- * -+ * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. -- * -+ * - * You should have received a copy of the GNU General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -@@ -394,7 +394,7 @@ master_promotion_order(resource_t * rsc, pe_working_set_t * data_set) - } - - static gboolean --anonymous_known_on(resource_t * rsc, node_t * node) -+filter_anonymous_instance(resource_t * rsc, node_t * node) - { - GListPtr rIter = NULL; - char *key = clone_strip(rsc->id); -@@ -402,10 +402,35 @@ anonymous_known_on(resource_t * rsc, node_t * node) - - for (rIter = parent->children; rIter; rIter = rIter->next) { - resource_t *child = rIter->data; -+ resource_t *active = parent->fns->find_rsc(child, key, node, pe_find_clone|pe_find_current); - -- /* ->find_rsc() because we might be a cloned group -- * and knowing that other members of the group are -- * known here implies nothing -+ /* -+ * Look for an active instance on $node, if there is one, only it recieves the master score -+ * Use ->find_rsc() because we might be a cloned group -+ */ -+ if(rsc == active) { -+ pe_rsc_trace(rsc, "Found %s for %s active on %s: done", active->id, key, node->details->uname); -+ free(key); -+ return TRUE; -+ } else if(active) { -+ pe_rsc_trace(rsc, "Found %s for %s on %s: not %s", active->id, key, node->details->uname, rsc->id); -+ free(key); -+ return FALSE; -+ } else { -+ pe_rsc_trace(rsc, "%s on %s: not active", key, node->details->uname); -+ } -+ } -+ -+ for (rIter = parent->children; rIter; rIter = rIter->next) { -+ resource_t *child = rIter->data; -+ -+ /* -+ * We know its not running, but any score will still count if -+ * the instance has been probed on $node -+ * -+ * Again use ->find_rsc() because we might be a cloned group -+ * and knowing that other members of the group are known here -+ * implies nothing - */ - rsc = parent->fns->find_rsc(child, key, NULL, pe_find_clone); - pe_rsc_trace(rsc, "Checking %s for %s on %s", rsc->id, key, node->details->uname); -@@ -452,11 +477,11 @@ master_score(resource_t * rsc, node_t * node, int not_set_value) - node_t *match = pe_find_node_id(rsc->running_on, node->details->id); - node_t *known = pe_hash_table_lookup(rsc->known_on, node->details->id); - -- if (is_not_set(rsc->flags, pe_rsc_unique) && anonymous_known_on(rsc, node)) { -- pe_rsc_trace(rsc, "Anonymous clone %s is known on %s", rsc->id, node->details->uname); -+ if (is_not_set(rsc->flags, pe_rsc_unique) && filter_anonymous_instance(rsc, node)) { -+ pe_rsc_trace(rsc, "Anonymous clone %s is allowed on %s", rsc->id, node->details->uname); - - } else if (match == NULL && known == NULL) { -- pe_rsc_trace(rsc, "%s (aka. %s) is not known on %s - ignoring", rsc->id, -+ pe_rsc_trace(rsc, "%s (aka. %s) has been filtered on %s - ignoring", rsc->id, - rsc->clone_name, node->details->uname); - return score; - } -diff --git a/pengine/native.c b/pengine/native.c -index ac73ffa..45df696 100644 ---- a/pengine/native.c -+++ b/pengine/native.c -@@ -1,16 +1,16 @@ --/* -+/* - * Copyright (C) 2004 Andrew Beekhof -- * -+ * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. -- * -+ * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. -- * -+ * - * You should have received a copy of the GNU General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -@@ -57,7 +57,7 @@ gboolean NullOp(resource_t * rsc, node_t * next, gboolean optional, pe_working_s - - /* *INDENT-OFF* */ - enum rsc_role_e rsc_state_matrix[RSC_ROLE_MAX][RSC_ROLE_MAX] = { --/* Current State */ -+/* Current State */ - /* Next State: Unknown Stopped Started Slave Master */ - /* Unknown */ { RSC_ROLE_UNKNOWN, RSC_ROLE_STOPPED, RSC_ROLE_STOPPED, RSC_ROLE_STOPPED, RSC_ROLE_STOPPED, }, - /* Stopped */ { RSC_ROLE_STOPPED, RSC_ROLE_STOPPED, RSC_ROLE_STARTED, RSC_ROLE_SLAVE, RSC_ROLE_SLAVE, }, -@@ -67,7 +67,7 @@ enum rsc_role_e rsc_state_matrix[RSC_ROLE_MAX][RSC_ROLE_MAX] = { - }; - - gboolean (*rsc_action_matrix[RSC_ROLE_MAX][RSC_ROLE_MAX])(resource_t*,node_t*,gboolean,pe_working_set_t*) = { --/* Current State */ -+/* Current State */ - /* Next State: Unknown Stopped Started Slave Master */ - /* Unknown */ { RoleError, StopRsc, RoleError, RoleError, RoleError, }, - /* Stopped */ { RoleError, NullOp, StartRsc, StartRsc, RoleError, }, -@@ -1124,6 +1124,10 @@ native_create_actions(resource_t * rsc, pe_working_set_t * data_set) - pe_rsc_trace(rsc, "Recovering %s", rsc->id); - need_stop = TRUE; - -+ } else if (is_set(rsc->flags, pe_rsc_block)) { -+ pe_rsc_trace(rsc, "Block %s", rsc->id); -+ need_stop = TRUE; -+ - } else if (rsc->role > RSC_ROLE_STARTED && current != NULL && chosen != NULL) { - /* Recovery of a promoted resource */ - start = start_action(rsc, chosen, TRUE); -@@ -1148,7 +1152,8 @@ native_create_actions(resource_t * rsc, pe_working_set_t * data_set) - role = next_role; - } - -- while (rsc->role <= rsc->next_role && role != rsc->role) { -+ -+ while (rsc->role <= rsc->next_role && role != rsc->role && is_not_set(rsc->flags, pe_rsc_block)) { - next_role = rsc_state_matrix[role][rsc->role]; - pe_rsc_trace(rsc, "Up: Executing: %s->%s (%s)%s", role2text(role), role2text(next_role), - rsc->id, need_stop ? " required" : ""); -@@ -1170,15 +1175,33 @@ native_create_actions(resource_t * rsc, pe_working_set_t * data_set) - role = next_role; - } - -- if (rsc->next_role != RSC_ROLE_STOPPED || is_set(rsc->flags, pe_rsc_managed) == FALSE) { -+ if(is_set(rsc->flags, pe_rsc_block)) { -+ pe_rsc_trace(rsc, "No monitor additional ops for blocked resource"); -+ -+ } else if (rsc->next_role != RSC_ROLE_STOPPED || is_set(rsc->flags, pe_rsc_managed) == FALSE) { -+ pe_rsc_trace(rsc, "Monitor ops for active resource"); - start = start_action(rsc, chosen, TRUE); - Recurring(rsc, start, chosen, data_set); - Recurring_Stopped(rsc, start, chosen, data_set); - } else { -+ pe_rsc_trace(rsc, "Monitor ops for in-active resource"); - Recurring_Stopped(rsc, NULL, NULL, data_set); - } - } - -+static void -+rsc_avoids_remote_nodes(resource_t *rsc) -+{ -+ GHashTableIter iter; -+ node_t *node = NULL; -+ g_hash_table_iter_init(&iter, rsc->allowed_nodes); -+ while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) { -+ if (node->details->remote_rsc) { -+ node->weight = -INFINITY; -+ } -+ } -+} -+ - void - native_internal_constraints(resource_t * rsc, pe_working_set_t * data_set) - { -@@ -1272,16 +1295,17 @@ native_internal_constraints(resource_t * rsc, pe_working_set_t * data_set) - } - - if (rsc->is_remote_node || is_stonith) { -- GHashTableIter iter; -- node_t *node = NULL; -- g_hash_table_iter_init(&iter, rsc->allowed_nodes); -- while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) { -- /* don't allow remote nodes to run stonith devices -- * or remote connection resources.*/ -- if (node->details->remote_rsc) { -- node->weight = -INFINITY; -- } -- } -+ /* don't allow remote nodes to run stonith devices -+ * or remote connection resources.*/ -+ rsc_avoids_remote_nodes(rsc); -+ } -+ -+ /* If this rsc is a remote connection resource associated -+ * with a container ( which will most likely be a virtual guest ) -+ * do not allow the container to live on any remote-nodes. -+ * remote-nodes managing nested remote-nodes should not be allowed. */ -+ if (rsc->is_remote_node && rsc->container) { -+ rsc_avoids_remote_nodes(rsc->container); - } - } - -@@ -1983,11 +2007,28 @@ LogActions(resource_t * rsc, pe_working_set_t * data_set, gboolean terminal) - GListPtr gIter = NULL; - - CRM_CHECK(current != NULL,); -+ -+ key = stop_key(rsc); - for (gIter = rsc->running_on; gIter != NULL; gIter = gIter->next) { - node_t *node = (node_t *) gIter->data; -+ action_t *stop_op = NULL; -+ gboolean allowed = FALSE; -+ -+ possible_matches = find_actions(rsc->actions, key, node); -+ if (possible_matches) { -+ stop_op = possible_matches->data; -+ g_list_free(possible_matches); -+ } - -- log_change("Stop %s\t(%s)", rsc->id, node->details->uname); -+ if (stop_op && (stop_op->flags & pe_action_runnable)) { -+ allowed = TRUE; -+ } -+ -+ log_change("Stop %s\t(%s%s)", rsc->id, node->details->uname, -+ allowed ? "" : " - blocked"); - } -+ -+ free(key); - } - - if (moving) { -@@ -2175,10 +2216,6 @@ NullOp(resource_t * rsc, node_t * next, gboolean optional, pe_working_set_t * da - gboolean - DeleteRsc(resource_t * rsc, node_t * node, gboolean optional, pe_working_set_t * data_set) - { --#if DELETE_THEN_REFRESH -- action_t *delete = NULL; -- action_t *refresh = NULL; --#endif - if (is_set(rsc->flags, pe_rsc_failed)) { - pe_rsc_trace(rsc, "Resource %s not deleted from %s: failed", rsc->id, node->details->uname); - return FALSE; -@@ -2195,11 +2232,7 @@ DeleteRsc(resource_t * rsc, node_t * node, gboolean optional, pe_working_set_t * - - crm_notice("Removing %s from %s", rsc->id, node->details->uname); - --#if DELETE_THEN_REFRESH -- delete = delete_action(rsc, node, optional); --#else - delete_action(rsc, node, optional); --#endif - - new_rsc_order(rsc, RSC_STOP, rsc, RSC_DELETE, - optional ? pe_order_implies_then : pe_order_optional, data_set); -@@ -2207,15 +2240,6 @@ DeleteRsc(resource_t * rsc, node_t * node, gboolean optional, pe_working_set_t * - new_rsc_order(rsc, RSC_DELETE, rsc, RSC_START, - optional ? pe_order_implies_then : pe_order_optional, data_set); - --#if DELETE_THEN_REFRESH -- refresh = custom_action(NULL, strdup(CRM_OP_LRM_REFRESH), CRM_OP_LRM_REFRESH, -- node, FALSE, TRUE, data_set); -- -- add_hash_param(refresh->meta, XML_ATTR_TE_NOWAIT, XML_BOOLEAN_TRUE); -- -- order_actions(delete, refresh, pe_order_optional); --#endif -- - return TRUE; - } - -@@ -2541,7 +2565,7 @@ native_stop_constraints(resource_t * rsc, action_t * stonith_op, gboolean is_sto - * - * The extra notification here changes - * + C.healthy depends on C.notify -- * into: -+ * into: - * + C.healthy depends on C.notify' - * + C.notify' depends on STONITH' - * thus breaking the loop -@@ -2560,7 +2584,7 @@ native_stop_constraints(resource_t * rsc, action_t * stonith_op, gboolean is_sto - - /* From Bug #1601, successful fencing must be an input to a failed resources stop action. - -- However given group(rA, rB) running on nodeX and B.stop has failed, -+ However given group(rA, rB) running on nodeX and B.stop has failed, - A := stop healthy resource (rA.stop) - B := stop failed resource (pseudo operation B.stop) - C := stonith nodeX -@@ -2574,12 +2598,12 @@ native_stop_constraints(resource_t * rsc, action_t * stonith_op, gboolean is_sto - (marked as a pseudo op depending on the STONITH). - - TODO: Break the "A requires B" dependency in update_action() and re-enable this block -- -+ - } else if(is_stonith == FALSE) { - crm_info("Moving healthy resource %s" - " off %s before fencing", - rsc->id, node->details->uname); -- -+ - * stop healthy resources before the - * stonith op - * -diff --git a/pengine/pengine.c b/pengine/pengine.c -index 97d68df..99a81c6 100644 ---- a/pengine/pengine.c -+++ b/pengine/pengine.c -@@ -184,6 +184,7 @@ process_pe_message(xmlNode * msg, xmlNode * xml_data, crm_client_t * sender) - } - - if (is_repoke == FALSE && series_wrap != 0) { -+ unlink(filename); - write_xml_file(xml_data, filename, HAVE_BZLIB_H); - write_last_sequence(PE_STATE_DIR, series[series_id].name, seq + 1, series_wrap); - } else { -@@ -210,29 +211,37 @@ do_calculations(pe_working_set_t * data_set, xmlNode * xml_input, crm_time_t * n - set_working_set_defaults(data_set); - data_set->input = xml_input; - data_set->now = now; -- if (data_set->now == NULL) { -- data_set->now = crm_time_new(NULL); -- } -+ - } else { - crm_trace("Already have status - reusing"); - } - -+ if (data_set->now == NULL) { -+ data_set->now = crm_time_new(NULL); -+ } -+ - crm_trace("Calculate cluster status"); - stage0(data_set); - -- gIter = data_set->resources; -- for (; gIter != NULL; gIter = gIter->next) { -- resource_t *rsc = (resource_t *) gIter->data; -+ if(is_not_set(data_set->flags, pe_flag_quick_location)) { -+ gIter = data_set->resources; -+ for (; gIter != NULL; gIter = gIter->next) { -+ resource_t *rsc = (resource_t *) gIter->data; - -- if (is_set(rsc->flags, pe_rsc_orphan) && rsc->role == RSC_ROLE_STOPPED) { -- continue; -+ if (is_set(rsc->flags, pe_rsc_orphan) && rsc->role == RSC_ROLE_STOPPED) { -+ continue; -+ } -+ rsc->fns->print(rsc, NULL, pe_print_log, &rsc_log_level); - } -- rsc->fns->print(rsc, NULL, pe_print_log, &rsc_log_level); - } - - crm_trace("Applying placement constraints"); - stage2(data_set); - -+ if(is_set(data_set->flags, pe_flag_quick_location)){ -+ return NULL; -+ } -+ - crm_trace("Create internal constraints"); - stage3(data_set); - -diff --git a/pengine/ptest.supp b/pengine/ptest.supp -deleted file mode 100644 -index f4ef2cc..0000000 ---- a/pengine/ptest.supp -+++ /dev/null -@@ -1,33 +0,0 @@ --# Valgrind suppressions for PE testing --{ -- Valgrind bug -- Memcheck:Addr8 -- fun:__strspn_sse42 -- fun:crm_get_msec --} -- --{ -- Ignore crm_system_name -- Memcheck:Leak -- fun:malloc -- fun:crm_strdup_fn -- fun:crm_log_init_worker -- fun:crm_log_init -- fun:main --} -- --{ -- libqb fixed upstream 1 -- Memcheck:Leak -- fun:realloc -- fun:_grow_bin_array -- fun:_qb_array_grow --} -- --{ -- libqb fixed upstream 2 -- Memcheck:Leak -- fun:calloc -- fun:qb_log_dcs_get -- fun:_qb_log_callsite_get --} -diff --git a/pengine/regression.core.sh.in b/pengine/regression.core.sh.in -index 685200a..03a6993 100644 ---- a/pengine/regression.core.sh.in -+++ b/pengine/regression.core.sh.in -@@ -1,15 +1,15 @@ - # Copyright (C) 2004 Andrew Beekhof -- # -+ # - # This program is free software; you can redistribute it and/or - # modify it under the terms of the GNU General Public - # License as published by the Free Software Foundation; either - # version 2.1 of the License, or (at your option) any later version. -- # -+ # - # This software is distributed in the hope that it will be useful, - # but WITHOUT ANY WARRANTY; without even the implied warranty of - # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - # General Public License for more details. -- # -+ # - # You should have received a copy of the GNU General Public - # License along with this library; if not, write to the Free Software - # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -@@ -20,7 +20,7 @@ num_failed=0 - num_tests=0 - force_local=0 - VALGRIND_CMD="" --diff_opts="--ignore-all-space -u -N" -+diff_opts="--ignore-all-space --ignore-blank-lines -u -N" - - test_home=`dirname $0` - test_name=`basename $0` -@@ -47,19 +47,20 @@ io_dir=$test_home/test10 - test_binary=@abs_top_builddir@/tools/crm_simulate - PCMK_schema_directory=@abs_top_builddir@/xml - -+ - failed=$test_home/.regression.failed.diff - single_test= - - while true ; do - case "$1" in - -V|--verbose) verbose=1; shift;; -- -v|--valgrind) -+ -v|--valgrind) - export G_SLICE=always-malloc -- VALGRIND_CMD="valgrind -q --log-file=%q{valgrind_output} --show-reachable=no --leak-check=full --trace-children=no --time-stamp=yes --num-callers=20 --suppressions=$test_home/ptest.supp" -+ VALGRIND_CMD="valgrind -q --gen-suppressions=all --log-file=%q{valgrind_output} --show-reachable=no --leak-check=full --trace-children=no --time-stamp=yes --num-callers=20 --suppressions=@datadir@/@PACKAGE@/tests/valgrind-pcmk.suppressions" - test_binary= - shift;; -- --valgrind-dhat) -- VALGRIND_CMD="valgrind --log-file=%q{valgrind_output} --show-top-n=100 --num-callers=4 --time-stamp=yes --trace-children=no --tool=exp-dhat --suppressions=$test_home/ptest.supp" -+ --valgrind-dhat) -+ VALGRIND_CMD="valgrind --log-file=%q{valgrind_output} --show-top-n=100 --num-callers=4 --time-stamp=yes --trace-children=no --tool=exp-dhat" - test_binary= - shift;; - --valgrind-skip-output) -@@ -67,6 +68,7 @@ while true ; do - shift;; - --run) single_test="$2"; shift; shift; break;; - -b|--binary) test_binary=$2; PCMK_schema_directory=""; shift; shift;; -+ -i|--io-dir) io_dir=$2; shift; shift;; - -?|--help) echo "$0 [--binary name] [--force-local]"; shift; exit 0;; - --) shift ; break ;; - "") break;; -@@ -243,7 +245,7 @@ function do_test { - cp "$dot_output" "$dot_expected" - cp "$score_output" "$scores" - cp "$summary_output" "$summary" -- info " Created expected outputs" -+ info " Created expected outputs" - fi - - diff $diff_opts $summary $summary_output >/dev/null -@@ -262,7 +264,7 @@ function do_test { - diff $diff_opts $dot_expected $dot_output 2>/dev/null >> $failed - echo "" >> $failed - did_fail=1 -- else -+ else - rm -f $dot_output - fi - -@@ -274,7 +276,7 @@ function do_test { - echo "" >> $failed - did_fail=1 - fi -- -+ - diff $diff_opts $scores $score_output >/dev/null - rc=$? - if [ $rc != 0 ]; then -@@ -312,5 +314,6 @@ else - failed=.single - > $failed - do_test $single_test "Single shot" $* -+ cat $failed - exit $? - fi -diff --git a/pengine/regression.sh b/pengine/regression.sh -index 447b563..16ab007 100755 ---- a/pengine/regression.sh -+++ b/pengine/regression.sh -@@ -1,24 +1,24 @@ - #!/bin/bash - - # Copyright (C) 2004 Andrew Beekhof -- # -+ # - # This program is free software; you can redistribute it and/or - # modify it under the terms of the GNU General Public - # License as published by the Free Software Foundation; either - # version 2 of the License, or (at your option) any later version. -- # -+ # - # This software is distributed in the hope that it will be useful, - # but WITHOUT ANY WARRANTY; without even the implied warranty of - # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - # General Public License for more details. -- # -+ # - # You should have received a copy of the GNU General Public - # License along with this library; if not, write to the Free Software - # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - # - - core=`dirname $0` --. $core/regression.core.sh -+. $core/regression.core.sh || exit 1 - - create_mode="true" - info Generating test outputs for these tests... -@@ -121,6 +121,7 @@ do_test one-or-more-4 "D cannot start because of target-role" - do_test one-or-more-5 "Start A and F even though C and D are stopped" - do_test one-or-more-6 "Leave A running even though B is stopped" - do_test one-or-more-7 "Leave A running even though C is stopped" -+do_test bug-5140-require-all-false "Allow basegrp:0 to stop" - - echo "" - do_test order1 "Order start 1 " -@@ -180,6 +181,7 @@ do_test attrs5 "string: not_exists " - do_test attrs6 "is_dc: true " - do_test attrs7 "is_dc: false " - do_test attrs8 "score_attribute " -+do_test per-node-attrs "Per node resource parameters" - - echo "" - do_test mon-rsc-1 "Schedule Monitor - start" -@@ -198,6 +200,10 @@ do_test rec-rsc-6 "Resource Recover - multiple - restart" - do_test rec-rsc-7 "Resource Recover - multiple - stop " - do_test rec-rsc-8 "Resource Recover - multiple - block " - do_test rec-rsc-9 "Resource Recover - group/group" -+do_test monitor-recovery "on-fail=block + resource recovery detected by recurring monitor" -+do_test stop-failure-no-quorum "Stop failure without quorum" -+do_test stop-failure-no-fencing "Stop failure without fencing available" -+do_test stop-failure-with-fencing "Stop failure with fencing available" - - echo "" - do_test quorum-1 "No quorum - ignore" -@@ -274,8 +280,8 @@ echo "" - do_test clone-anon-probe-1 "Probe the correct (anonymous) clone instance for each node" - do_test clone-anon-probe-2 "Avoid needless re-probing of anonymous clones" - do_test clone-anon-failcount "Merge failcounts for anonymous clones" --do_test inc0 "Incarnation start" --do_test inc1 "Incarnation start order" -+do_test inc0 "Incarnation start" -+do_test inc1 "Incarnation start order" - do_test inc2 "Incarnation silent restart, stop, move" - do_test inc3 "Inter-incarnation ordering, silent restart, stop, move" - do_test inc4 "Inter-incarnation ordering, silent restart, stop, move (ordered)" -@@ -285,8 +291,8 @@ do_test inc7 "Clone colocation" - do_test inc8 "Clone anti-colocation" - do_test inc9 "Non-unique clone" - do_test inc10 "Non-unique clone (stop)" --do_test inc11 "Primitive colocation with clones" --do_test inc12 "Clone shutdown" -+do_test inc11 "Primitive colocation with clones" -+do_test inc12 "Clone shutdown" - do_test cloned-group "Make sure only the correct number of cloned groups are started" - do_test clone-no-shuffle "Dont prioritize allocation of instances that must be moved" - do_test clone-max-zero "Orphan processing with clone-max=0" -@@ -300,7 +306,7 @@ do_test clone-colocate-instance-1 "Colocation with a specific clone instance (ne - do_test clone-colocate-instance-2 "Colocation with a specific clone instance" - do_test clone-order-instance "Ordering with specific clone instances" - do_test bug-lf-2453 "Enforce mandatory clone ordering without colocation" --do_test bug-lf-2508 "Correctly reconstruct the status of anonymous cloned groups" -+do_test bug-lf-2508 "Correctly reconstruct the status of anonymous cloned groups" - do_test bug-lf-2544 "Balanced clone placement" - do_test bug-lf-2445 "Redistribute clones with node-max > 1 and stickiness = 0" - do_test bug-lf-2574 "Avoid clone shuffle" -@@ -323,7 +329,7 @@ do_test novell-239082 "Demote/Promote ordering" - do_test novell-239087 "Stable master placement" - do_test master-12 "Promotion based solely on rsc_location constraints" - do_test master-13 "Include preferences of colocated resources when placing master" --do_test master-demote "Ordering when actions depends on demoting a slave resource" -+do_test master-demote "Ordering when actions depends on demoting a slave resource" - do_test master-ordering "Prevent resources from starting that need a master" - do_test bug-1765 "Master-Master Colocation (dont stop the slaves)" - do_test master-group "Promotion of cloned groups" -@@ -341,7 +347,7 @@ do_test master-promotion-constraint "Mandatory master colocation constraints" - do_test unmanaged-master "Ensure role is preserved for unmanaged resources" - do_test master-unmanaged-monitor "Start the correct monitor operation for unmanaged masters" - do_test master-demote-2 "Demote does not clear past failure" --do_test master-move "Move master based on failure of colocated group" -+do_test master-move "Move master based on failure of colocated group" - do_test master-probed-score "Observe the promotion score of probed resources" - do_test colocation_constraint_stops_master "cl#5054 - Ensure master is demoted when stopped by colocation constraint" - do_test colocation_constraint_stops_slave "cl#5054 - Ensure slave is not demoted when stopped by colocation constraint" -@@ -349,6 +355,8 @@ do_test order_constraint_stops_master "cl#5054 - Ensure master is demoted w - do_test order_constraint_stops_slave "cl#5054 - Ensure slave is not demoted when stopped by order constraint" - do_test master_monitor_restart "cl#5072 - Ensure master monitor operation will start after promotion." - do_test bug-rh-880249 "Handle replacement of an m/s resource with a primitive" -+do_test bug-5143-ms-shuffle "Prevent master shuffling due to promotion score" -+do_test master-demote-block "Block promotion if demote fails with on-fail=block" - - echo "" - do_test history-1 "Correctly parse stateful-1 resource state" -@@ -360,6 +368,10 @@ do_test managed-2 "Not managed - up " - do_test bug-5028 "Shutdown should block if anything depends on an unmanaged resource" - do_test bug-5028-detach "Ensure detach still works" - do_test bug-5028-bottom "Ensure shutdown still blocks if the blocked resource is at the bottom of the stack" -+do_test unmanaged-stop-1 "cl#5155 - Block the stop of resources if any depending resource is unmanaged " -+do_test unmanaged-stop-2 "cl#5155 - Block the stop of resources if the first resource in a mandatory stop order is unmanaged " -+do_test unmanaged-stop-3 "cl#5155 - Block the stop of resources if any depending resource in a group is unmanaged " -+do_test unmanaged-stop-4 "cl#5155 - Block the stop of resources if any depending resource in the middle of a group is unmanaged " - - echo "" - do_test interleave-0 "Interleave (reference)" -@@ -433,6 +445,7 @@ do_test bug-5025-2 "Make sure clear failcount action isn't set when config does - do_test bug-5025-3 "Automatically clean up failcount after resource config change with restart" - do_test bug-5025-4 "Clear failcount when last failure is a start op and rsc attributes changed." - do_test failcount "Ensure failcounts are correctly expired" -+do_test failcount-block "Ensure failcounts are not expired when on-fail=block is present" - do_test monitor-onfail-restart "bug-5058 - Monitor failure with on-fail set to restart" - do_test monitor-onfail-stop "bug-5058 - Monitor failure wiht on-fail set to stop" - do_test bug-5059 "No need to restart p_stateful1:*" -@@ -567,7 +580,7 @@ do_test ticket-group-21 "Ticket - Group (loss-policy=fence, standby, revoked)" - do_test ticket-group-22 "Ticket - Group (loss-policy=freeze, standby, granted)" - do_test ticket-group-23 "Ticket - Group (loss-policy=freeze, granted, standby)" - do_test ticket-group-24 "Ticket - Group (loss-policy=freeze, standby, revoked)" -- -+ - echo"" - do_test ticket-clone-1 "Ticket - Clone (loss-policy=stop, initial)" - do_test ticket-clone-2 "Ticket - Clone (loss-policy=stop, granted)" -@@ -594,8 +607,8 @@ do_test ticket-clone-21 "Ticket - Clone (loss-policy=fence, standby, revoked)" - do_test ticket-clone-22 "Ticket - Clone (loss-policy=freeze, standby, granted)" - do_test ticket-clone-23 "Ticket - Clone (loss-policy=freeze, granted, standby)" - do_test ticket-clone-24 "Ticket - Clone (loss-policy=freeze, standby, revoked)" -- --echo"" -+ -+echo"" - do_test ticket-master-1 "Ticket - Master (loss-policy=stop, initial)" - do_test ticket-master-2 "Ticket - Master (loss-policy=stop, granted)" - do_test ticket-master-3 "Ticket - Master (loss-policy-stop, revoked)" -@@ -673,6 +686,7 @@ do_test container-group-4 "Container in group - reached migration-threshold" - echo "" - do_test whitebox-fail1 "Fail whitebox container rsc." - do_test whitebox-fail2 "Fail whitebox container rsc lrmd connection." -+do_test whitebox-fail3 "Failed containers should not run nested on remote nodes." - do_test whitebox-start "Start whitebox container with resources assigned to it" - do_test whitebox-stop "Stop whitebox container with resources assigned to it" - do_test whitebox-move "Move whitebox container with resources assigned to it" -diff --git a/pengine/test10/1360.summary b/pengine/test10/1360.summary -index 04ec941..946c828 100644 ---- a/pengine/test10/1360.summary -+++ b/pengine/test10/1360.summary -@@ -6,6 +6,7 @@ Online: [ ssgtest1a ssgtest1b ] - VIP (ocf::testing:VIP-RIP.sh): Started ssgtest1a - Clone Set: dolly [dollies] - Started: [ ssgtest1a ] -+ Stopped: [ ssgtest1b ] - - Transition Summary: - * Move dollies:0 (Started ssgtest1a -> ssgtest1b) -@@ -26,4 +27,5 @@ Online: [ ssgtest1a ssgtest1b ] - VIP (ocf::testing:VIP-RIP.sh): Started ssgtest1a - Clone Set: dolly [dollies] - Started: [ ssgtest1b ] -+ Stopped: [ ssgtest1a ] - -diff --git a/pengine/test10/bug-1572-1.scores b/pengine/test10/bug-1572-1.scores -index 30d1aed..1aecc90 100644 ---- a/pengine/test10/bug-1572-1.scores -+++ b/pengine/test10/bug-1572-1.scores -@@ -2,7 +2,7 @@ Allocation scores: - clone_color: ms_drbd_7788 allocation score on arc-dknightlx: 0 - clone_color: ms_drbd_7788 allocation score on arc-tkincaidlx.wsicorp.com: 0 - clone_color: rsc_drbd_7788:0 allocation score on arc-dknightlx: 1 --clone_color: rsc_drbd_7788:0 allocation score on arc-tkincaidlx.wsicorp.com: 100 -+clone_color: rsc_drbd_7788:0 allocation score on arc-tkincaidlx.wsicorp.com: 0 - clone_color: rsc_drbd_7788:1 allocation score on arc-dknightlx: 0 - clone_color: rsc_drbd_7788:1 allocation score on arc-tkincaidlx.wsicorp.com: 101 - group_color: IPaddr_147_81_84_133 allocation score on arc-dknightlx: 0 -diff --git a/pengine/test10/bug-1572-1.summary b/pengine/test10/bug-1572-1.summary -index 7446e92..4280f7b 100644 ---- a/pengine/test10/bug-1572-1.summary -+++ b/pengine/test10/bug-1572-1.summary -@@ -76,7 +76,7 @@ Online: [ arc-dknightlx arc-tkincaidlx.wsicorp.com ] - - Master/Slave Set: ms_drbd_7788 [rsc_drbd_7788] - Masters: [ arc-tkincaidlx.wsicorp.com ] -- Stopped: [ rsc_drbd_7788:1 ] -+ Stopped: [ arc-dknightlx ] - Resource Group: grp_pgsql_mirror - fs_mirror (ocf::heartbeat:Filesystem): Started arc-tkincaidlx.wsicorp.com - pgsql_5555 (ocf::heartbeat:pgsql): Started arc-tkincaidlx.wsicorp.com -diff --git a/pengine/test10/bug-1572-2.summary b/pengine/test10/bug-1572-2.summary -index d93372d..6174027 100644 ---- a/pengine/test10/bug-1572-2.summary -+++ b/pengine/test10/bug-1572-2.summary -@@ -52,7 +52,7 @@ Online: [ arc-dknightlx arc-tkincaidlx.wsicorp.com ] - - Master/Slave Set: ms_drbd_7788 [rsc_drbd_7788] - Slaves: [ arc-tkincaidlx.wsicorp.com ] -- Stopped: [ rsc_drbd_7788:1 ] -+ Stopped: [ arc-dknightlx ] - Resource Group: grp_pgsql_mirror - fs_mirror (ocf::heartbeat:Filesystem): Stopped - pgsql_5555 (ocf::heartbeat:pgsql): Stopped -diff --git a/pengine/test10/bug-1765.scores b/pengine/test10/bug-1765.scores -index af90cc6..28f19e1 100644 ---- a/pengine/test10/bug-1765.scores -+++ b/pengine/test10/bug-1765.scores -@@ -1,9 +1,9 @@ - Allocation scores: - clone_color: drbd0:0 allocation score on sles236: 76 - clone_color: drbd0:0 allocation score on sles238: 75 --clone_color: drbd0:1 allocation score on sles236: 10 -+clone_color: drbd0:1 allocation score on sles236: 0 - clone_color: drbd0:1 allocation score on sles238: 5 --clone_color: drbd1:0 allocation score on sles236: 10 -+clone_color: drbd1:0 allocation score on sles236: 0 - clone_color: drbd1:0 allocation score on sles238: 76 - clone_color: drbd1:1 allocation score on sles236: 76 - clone_color: drbd1:1 allocation score on sles238: 0 -@@ -21,7 +21,7 @@ native_color: drbd0:0 allocation score on sles236: 76 - native_color: drbd0:0 allocation score on sles238: 75 - native_color: drbd0:1 allocation score on sles236: -INFINITY - native_color: drbd0:1 allocation score on sles238: 5 --native_color: drbd1:0 allocation score on sles236: 10 -+native_color: drbd1:0 allocation score on sles236: 0 - native_color: drbd1:0 allocation score on sles238: 76 - native_color: drbd1:1 allocation score on sles236: 76 - native_color: drbd1:1 allocation score on sles238: -INFINITY -diff --git a/pengine/test10/bug-1765.summary b/pengine/test10/bug-1765.summary -index d3b8c41..593bac3 100644 ---- a/pengine/test10/bug-1765.summary -+++ b/pengine/test10/bug-1765.summary -@@ -4,7 +4,7 @@ Online: [ sles236 sles238 ] - - Master/Slave Set: ms-drbd0 [drbd0] - Masters: [ sles236 ] -- Stopped: [ drbd0:1 ] -+ Stopped: [ sles238 ] - Master/Slave Set: ms-drbd1 [drbd1] - Masters: [ sles236 ] - Slaves: [ sles238 ] -diff --git a/pengine/test10/bug-5014-CLONE-A-start-B-start.summary b/pengine/test10/bug-5014-CLONE-A-start-B-start.summary -index 9dad260..84a4e4a 100644 ---- a/pengine/test10/bug-5014-CLONE-A-start-B-start.summary -+++ b/pengine/test10/bug-5014-CLONE-A-start-B-start.summary -@@ -3,9 +3,9 @@ Current cluster status: - Online: [ fc16-builder ] - - Clone Set: clone1 [ClusterIP] -- Stopped: [ ClusterIP:0 ] -+ Stopped: [ fc16-builder ] - Clone Set: clone2 [ClusterIP2] -- Stopped: [ ClusterIP2:0 ] -+ Stopped: [ fc16-builder ] - - Transition Summary: - * Start ClusterIP:0 (fc16-builder) -diff --git a/pengine/test10/bug-5014-CLONE-A-stop-B-started.summary b/pengine/test10/bug-5014-CLONE-A-stop-B-started.summary -index e9c18f9..1020124 100644 ---- a/pengine/test10/bug-5014-CLONE-A-stop-B-started.summary -+++ b/pengine/test10/bug-5014-CLONE-A-stop-B-started.summary -@@ -20,7 +20,7 @@ Revised cluster status: - Online: [ fc16-builder ] - - Clone Set: clone1 [ClusterIP] -- Stopped: [ ClusterIP:0 ] -+ Stopped: [ fc16-builder ] - Clone Set: clone2 [ClusterIP2] - Started: [ fc16-builder ] - -diff --git a/pengine/test10/bug-5025-1.summary b/pengine/test10/bug-5025-1.summary -index 6324f20..9f9baa2 100644 ---- a/pengine/test10/bug-5025-1.summary -+++ b/pengine/test10/bug-5025-1.summary -@@ -10,7 +10,7 @@ Transition Summary: - * Reload A (Started fc16-builder) - - Executing cluster transition: -- * Cluster action: clear_failcount on fc16-builder -+ * Cluster action: clear_failcount for A on fc16-builder - * Resource action: A reload on fc16-builder - * Resource action: A monitor=30000 on fc16-builder - -diff --git a/pengine/test10/bug-5025-3.summary b/pengine/test10/bug-5025-3.summary -index de1d654..0d843d2 100644 ---- a/pengine/test10/bug-5025-3.summary -+++ b/pengine/test10/bug-5025-3.summary -@@ -12,7 +12,7 @@ Transition Summary: - - Executing cluster transition: - * Resource action: A stop on fc16-builder -- * Cluster action: clear_failcount on fc16-builder -+ * Cluster action: clear_failcount for A on fc16-builder - * Resource action: A start on fc16-builder - * Resource action: A monitor=30000 on fc16-builder - * Pseudo action: all_stopped -diff --git a/pengine/test10/bug-5025-4.summary b/pengine/test10/bug-5025-4.summary -index daa6bea..f21a5e4 100644 ---- a/pengine/test10/bug-5025-4.summary -+++ b/pengine/test10/bug-5025-4.summary -@@ -10,7 +10,7 @@ Transition Summary: - - Executing cluster transition: - * Resource action: remote-node start on 18builder -- * Cluster action: clear_failcount on 18builder -+ * Cluster action: clear_failcount for remote-node on 18builder - * Resource action: remote-node monitor=30000 on 18builder - - Revised cluster status: -diff --git a/pengine/test10/bug-5028-bottom.dot b/pengine/test10/bug-5028-bottom.dot -index 93eef66..e25e097 100644 ---- a/pengine/test10/bug-5028-bottom.dot -+++ b/pengine/test10/bug-5028-bottom.dot -@@ -1,16 +1,10 @@ - digraph "g" { - "all_stopped" [ style=bold color="green" fontcolor="orange"] - "do_shutdown bl460g6a" [ style=dashed color="red" fontcolor="black"] --"dummy-g_running_0" [ style=dashed color="red" fontcolor="orange"] --"dummy-g_start_0" -> "dummy-g_running_0" [ style = dashed] --"dummy-g_start_0" [ style=bold color="green" fontcolor="orange"] - "dummy-g_stop_0" -> "dummy-g_stopped_0" [ style = dashed] - "dummy-g_stop_0" -> "dummy02_stop_0 bl460g6a" [ style = bold] - "dummy-g_stop_0" [ style=bold color="green" fontcolor="orange"] --"dummy-g_stopped_0" -> "dummy-g_start_0" [ style = dashed] - "dummy-g_stopped_0" [ style=dashed color="red" fontcolor="orange"] --"dummy02_start_0 " -> "dummy-g_running_0" [ style = dashed] --"dummy02_start_0 " [ style=dashed color="red" fontcolor="black"] - "dummy02_stop_0 bl460g6a" -> "all_stopped" [ style = bold] - "dummy02_stop_0 bl460g6a" -> "do_shutdown bl460g6a" [ style = dashed] - "dummy02_stop_0 bl460g6a" -> "dummy-g_stopped_0" [ style = dashed] -diff --git a/pengine/test10/bug-5028-bottom.exp b/pengine/test10/bug-5028-bottom.exp -index 1264da7..900c024 100644 ---- a/pengine/test10/bug-5028-bottom.exp -+++ b/pengine/test10/bug-5028-bottom.exp -@@ -1,7 +1,7 @@ - - - -- -+ - - - -@@ -9,26 +9,18 @@ - - - -- -- -- -- -- -- -- -- -- -+ - - - - - - -- -+ - - - -- -+ - - - -@@ -36,7 +28,7 @@ - - - -- -+ - - - -diff --git a/pengine/test10/bug-5028-bottom.summary b/pengine/test10/bug-5028-bottom.summary -index 6e3d895..479fb18 100644 ---- a/pengine/test10/bug-5028-bottom.summary -+++ b/pengine/test10/bug-5028-bottom.summary -@@ -11,7 +11,6 @@ Transition Summary: - - Executing cluster transition: - * Pseudo action: dummy-g_stop_0 -- * Pseudo action: dummy-g_start_0 - * Resource action: dummy02 stop on bl460g6a - * Pseudo action: all_stopped - -diff --git a/pengine/test10/bug-5028-detach.exp b/pengine/test10/bug-5028-detach.exp -index b91b31d..1c3374b 100644 ---- a/pengine/test10/bug-5028-detach.exp -+++ b/pengine/test10/bug-5028-detach.exp -@@ -1,7 +1,7 @@ - - - -- -+ - - - -diff --git a/pengine/test10/bug-5028.exp b/pengine/test10/bug-5028.exp -index d7b91d1..249c666 100644 ---- a/pengine/test10/bug-5028.exp -+++ b/pengine/test10/bug-5028.exp -@@ -1,7 +1,7 @@ - - - -- -+ - - - -@@ -9,7 +9,7 @@ - - - -- -+ - - - -diff --git a/pengine/test10/bug-5059.scores b/pengine/test10/bug-5059.scores -index 845f70a..d8295c6 100644 ---- a/pengine/test10/bug-5059.scores -+++ b/pengine/test10/bug-5059.scores -@@ -4,7 +4,7 @@ clone_color: c_dummy allocation score on gluster02.h: 0 - clone_color: c_dummy allocation score on gluster03.h: 0 - clone_color: c_dummy allocation score on gluster04.h: 0 - clone_color: g_stateful:0 allocation score on gluster01.h: 5 --clone_color: g_stateful:0 allocation score on gluster02.h: 10 -+clone_color: g_stateful:0 allocation score on gluster02.h: 0 - clone_color: g_stateful:0 allocation score on gluster03.h: 0 - clone_color: g_stateful:0 allocation score on gluster04.h: 0 - clone_color: g_stateful:1 allocation score on gluster01.h: 0 -diff --git a/pengine/test10/bug-5059.summary b/pengine/test10/bug-5059.summary -index b854e43..b93e4e6 100644 ---- a/pengine/test10/bug-5059.summary -+++ b/pengine/test10/bug-5059.summary -@@ -6,13 +6,15 @@ OFFLINE: [ gluster04.h ] - - Master/Slave Set: ms_stateful [g_stateful] - Resource Group: g_stateful:0 -- p_stateful1:0 (ocf::pacemaker:Stateful): Started gluster01.h -- p_stateful2:0 (ocf::pacemaker:Stateful): Stopped -+ p_stateful1 (ocf::pacemaker:Stateful): Started gluster01.h -+ p_stateful2 (ocf::pacemaker:Stateful): Stopped - Resource Group: g_stateful:1 -- p_stateful1:1 (ocf::pacemaker:Stateful): Started gluster02.h -- p_stateful2:1 (ocf::pacemaker:Stateful): Stopped -+ p_stateful1 (ocf::pacemaker:Stateful): Started gluster02.h -+ p_stateful2 (ocf::pacemaker:Stateful): Stopped -+ Stopped: [ gluster03.h gluster04.h ] - Clone Set: c_dummy [p_dummy1] - Started: [ gluster01.h gluster02.h ] -+ Stopped: [ gluster03.h gluster04.h ] - - Transition Summary: - * Promote p_stateful1:0 (Slave -> Master gluster01.h) -@@ -70,6 +72,8 @@ OFFLINE: [ gluster04.h ] - Master/Slave Set: ms_stateful [g_stateful] - Masters: [ gluster01.h ] - Slaves: [ gluster02.h ] -+ Stopped: [ gluster03.h gluster04.h ] - Clone Set: c_dummy [p_dummy1] - Started: [ gluster01.h gluster02.h ] -+ Stopped: [ gluster03.h gluster04.h ] - -diff --git a/pengine/test10/bug-5069-op-disabled.summary b/pengine/test10/bug-5069-op-disabled.summary -index c7c47b9..6524e8e 100644 ---- a/pengine/test10/bug-5069-op-disabled.summary -+++ b/pengine/test10/bug-5069-op-disabled.summary -@@ -8,7 +8,7 @@ OFFLINE: [ fc16-builder fc16-builder3 ] - Transition Summary: - - Executing cluster transition: -- * Cluster action: clear_failcount on fc16-builder2 -+ * Cluster action: clear_failcount for A on fc16-builder2 - * Resource action: A cancel=10000 on fc16-builder2 - - Revised cluster status: -diff --git a/pengine/test10/bug-5140-require-all-false.dot b/pengine/test10/bug-5140-require-all-false.dot -new file mode 100644 -index 0000000..8e7f299 ---- /dev/null -+++ b/pengine/test10/bug-5140-require-all-false.dot -@@ -0,0 +1,25 @@ -+digraph "g" { -+"all_stopped" [ style=bold color="green" fontcolor="orange"] -+"baseclone_stop_0" -> "baseclone_stopped_0" [ style = bold] -+"baseclone_stop_0" -> "basegrp:0_stop_0" [ style = bold] -+"baseclone_stop_0" [ style=bold color="green" fontcolor="orange"] -+"baseclone_stopped_0" [ style=bold color="green" fontcolor="orange"] -+"basegrp:0_stop_0" -> "basegrp:0_stopped_0" [ style = bold] -+"basegrp:0_stop_0" -> "clvmd_stop_0 hex-2" [ style = bold] -+"basegrp:0_stop_0" -> "dlm_stop_0 hex-2" [ style = bold] -+"basegrp:0_stop_0" -> "o2cb_stop_0 hex-2" [ style = bold] -+"basegrp:0_stop_0" [ style=bold color="green" fontcolor="orange"] -+"basegrp:0_stopped_0" -> "baseclone_stopped_0" [ style = bold] -+"basegrp:0_stopped_0" [ style=bold color="green" fontcolor="orange"] -+"clvmd_stop_0 hex-2" -> "all_stopped" [ style = bold] -+"clvmd_stop_0 hex-2" -> "basegrp:0_stopped_0" [ style = bold] -+"clvmd_stop_0 hex-2" -> "dlm_stop_0 hex-2" [ style = bold] -+"clvmd_stop_0 hex-2" [ style=bold color="green" fontcolor="black"] -+"dlm_stop_0 hex-2" -> "all_stopped" [ style = bold] -+"dlm_stop_0 hex-2" -> "basegrp:0_stopped_0" [ style = bold] -+"dlm_stop_0 hex-2" [ style=bold color="green" fontcolor="black"] -+"o2cb_stop_0 hex-2" -> "all_stopped" [ style = bold] -+"o2cb_stop_0 hex-2" -> "basegrp:0_stopped_0" [ style = bold] -+"o2cb_stop_0 hex-2" -> "clvmd_stop_0 hex-2" [ style = bold] -+"o2cb_stop_0 hex-2" [ style=bold color="green" fontcolor="black"] -+} -diff --git a/pengine/test10/bug-5140-require-all-false.exp b/pengine/test10/bug-5140-require-all-false.exp -new file mode 100644 -index 0000000..a2433e1 ---- /dev/null -+++ b/pengine/test10/bug-5140-require-all-false.exp -@@ -0,0 +1,122 @@ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -diff --git a/pengine/test10/bug-5140-require-all-false.scores b/pengine/test10/bug-5140-require-all-false.scores -new file mode 100644 -index 0000000..0bbb768 ---- /dev/null -+++ b/pengine/test10/bug-5140-require-all-false.scores -@@ -0,0 +1,275 @@ -+Allocation scores: -+clone_color: baseclone allocation score on hex-1: 0 -+clone_color: baseclone allocation score on hex-2: 0 -+clone_color: baseclone allocation score on hex-3: 0 -+clone_color: basegrp:0 allocation score on hex-1: 0 -+clone_color: basegrp:0 allocation score on hex-2: 0 -+clone_color: basegrp:0 allocation score on hex-3: 0 -+clone_color: basegrp:1 allocation score on hex-1: 0 -+clone_color: basegrp:1 allocation score on hex-2: 0 -+clone_color: basegrp:1 allocation score on hex-3: 0 -+clone_color: basegrp:2 allocation score on hex-1: 0 -+clone_color: basegrp:2 allocation score on hex-2: 0 -+clone_color: basegrp:2 allocation score on hex-3: 0 -+clone_color: clvmd:0 allocation score on hex-1: 0 -+clone_color: clvmd:0 allocation score on hex-2: 1 -+clone_color: clvmd:0 allocation score on hex-3: 0 -+clone_color: clvmd:1 allocation score on hex-1: 0 -+clone_color: clvmd:1 allocation score on hex-2: 0 -+clone_color: clvmd:1 allocation score on hex-3: 0 -+clone_color: clvmd:2 allocation score on hex-1: 0 -+clone_color: clvmd:2 allocation score on hex-2: 0 -+clone_color: clvmd:2 allocation score on hex-3: 0 -+clone_color: dlm:0 allocation score on hex-1: 0 -+clone_color: dlm:0 allocation score on hex-2: 1 -+clone_color: dlm:0 allocation score on hex-3: 0 -+clone_color: dlm:1 allocation score on hex-1: 0 -+clone_color: dlm:1 allocation score on hex-2: 0 -+clone_color: dlm:1 allocation score on hex-3: 0 -+clone_color: dlm:2 allocation score on hex-1: 0 -+clone_color: dlm:2 allocation score on hex-2: 0 -+clone_color: dlm:2 allocation score on hex-3: 0 -+clone_color: drbd-r0:0 allocation score on hex-1: 0 -+clone_color: drbd-r0:0 allocation score on hex-2: 0 -+clone_color: drbd-r0:0 allocation score on hex-3: -INFINITY -+clone_color: drbd-r0:1 allocation score on hex-1: 0 -+clone_color: drbd-r0:1 allocation score on hex-2: 0 -+clone_color: drbd-r0:1 allocation score on hex-3: -INFINITY -+clone_color: drbd-r1:0 allocation score on hex-1: -INFINITY -+clone_color: drbd-r1:0 allocation score on hex-2: 0 -+clone_color: drbd-r1:0 allocation score on hex-3: 0 -+clone_color: drbd-r1:1 allocation score on hex-1: -INFINITY -+clone_color: drbd-r1:1 allocation score on hex-2: 0 -+clone_color: drbd-r1:1 allocation score on hex-3: 0 -+clone_color: fs-ocfs-1:0 allocation score on hex-1: 0 -+clone_color: fs-ocfs-1:0 allocation score on hex-2: 0 -+clone_color: fs-ocfs-1:0 allocation score on hex-3: 0 -+clone_color: fs-ocfs-1:1 allocation score on hex-1: 0 -+clone_color: fs-ocfs-1:1 allocation score on hex-2: 0 -+clone_color: fs-ocfs-1:1 allocation score on hex-3: 0 -+clone_color: fs-ocfs-1:2 allocation score on hex-1: 0 -+clone_color: fs-ocfs-1:2 allocation score on hex-2: 0 -+clone_color: fs-ocfs-1:2 allocation score on hex-3: 0 -+clone_color: fs-ocfs-2:0 allocation score on hex-1: 0 -+clone_color: fs-ocfs-2:0 allocation score on hex-2: 0 -+clone_color: fs-ocfs-2:0 allocation score on hex-3: 0 -+clone_color: fs-ocfs-2:1 allocation score on hex-1: 0 -+clone_color: fs-ocfs-2:1 allocation score on hex-2: 0 -+clone_color: fs-ocfs-2:1 allocation score on hex-3: 0 -+clone_color: fs-ocfs-2:2 allocation score on hex-1: 0 -+clone_color: fs-ocfs-2:2 allocation score on hex-2: 0 -+clone_color: fs-ocfs-2:2 allocation score on hex-3: 0 -+clone_color: fs2 allocation score on hex-1: 0 -+clone_color: fs2 allocation score on hex-2: 0 -+clone_color: fs2 allocation score on hex-3: 0 -+clone_color: ms-r0 allocation score on hex-1: 0 -+clone_color: ms-r0 allocation score on hex-2: 0 -+clone_color: ms-r0 allocation score on hex-3: -INFINITY -+clone_color: ms-r1 allocation score on hex-1: -INFINITY -+clone_color: ms-r1 allocation score on hex-2: 0 -+clone_color: ms-r1 allocation score on hex-3: 0 -+clone_color: o2cb:0 allocation score on hex-1: 0 -+clone_color: o2cb:0 allocation score on hex-2: 1 -+clone_color: o2cb:0 allocation score on hex-3: 0 -+clone_color: o2cb:1 allocation score on hex-1: 0 -+clone_color: o2cb:1 allocation score on hex-2: 0 -+clone_color: o2cb:1 allocation score on hex-3: 0 -+clone_color: o2cb:2 allocation score on hex-1: 0 -+clone_color: o2cb:2 allocation score on hex-2: 0 -+clone_color: o2cb:2 allocation score on hex-3: 0 -+clone_color: vg1:0 allocation score on hex-1: 0 -+clone_color: vg1:0 allocation score on hex-2: 0 -+clone_color: vg1:0 allocation score on hex-3: 0 -+clone_color: vg1:1 allocation score on hex-1: 0 -+clone_color: vg1:1 allocation score on hex-2: 0 -+clone_color: vg1:1 allocation score on hex-3: 0 -+clone_color: vg1:2 allocation score on hex-1: 0 -+clone_color: vg1:2 allocation score on hex-2: 0 -+clone_color: vg1:2 allocation score on hex-3: 0 -+drbd-r0:0 promotion score on none: 0 -+drbd-r0:1 promotion score on none: 0 -+drbd-r1:0 promotion score on none: 0 -+drbd-r1:1 promotion score on none: 0 -+group_color: basegrp:0 allocation score on hex-1: -INFINITY -+group_color: basegrp:0 allocation score on hex-2: -INFINITY -+group_color: basegrp:0 allocation score on hex-3: -INFINITY -+group_color: basegrp:1 allocation score on hex-1: -INFINITY -+group_color: basegrp:1 allocation score on hex-2: -INFINITY -+group_color: basegrp:1 allocation score on hex-3: -INFINITY -+group_color: basegrp:2 allocation score on hex-1: -INFINITY -+group_color: basegrp:2 allocation score on hex-2: -INFINITY -+group_color: basegrp:2 allocation score on hex-3: -INFINITY -+group_color: clvmd:0 allocation score on hex-1: -INFINITY -+group_color: clvmd:0 allocation score on hex-2: -INFINITY -+group_color: clvmd:0 allocation score on hex-3: -INFINITY -+group_color: clvmd:1 allocation score on hex-1: -INFINITY -+group_color: clvmd:1 allocation score on hex-2: -INFINITY -+group_color: clvmd:1 allocation score on hex-3: -INFINITY -+group_color: clvmd:2 allocation score on hex-1: -INFINITY -+group_color: clvmd:2 allocation score on hex-2: -INFINITY -+group_color: clvmd:2 allocation score on hex-3: -INFINITY -+group_color: dlm:0 allocation score on hex-1: -INFINITY -+group_color: dlm:0 allocation score on hex-2: -INFINITY -+group_color: dlm:0 allocation score on hex-3: -INFINITY -+group_color: dlm:1 allocation score on hex-1: -INFINITY -+group_color: dlm:1 allocation score on hex-2: -INFINITY -+group_color: dlm:1 allocation score on hex-3: -INFINITY -+group_color: dlm:2 allocation score on hex-1: -INFINITY -+group_color: dlm:2 allocation score on hex-2: -INFINITY -+group_color: dlm:2 allocation score on hex-3: -INFINITY -+group_color: dummy1 allocation score on hex-1: 0 -+group_color: dummy1 allocation score on hex-2: 0 -+group_color: dummy1 allocation score on hex-3: 0 -+group_color: dummy2 allocation score on hex-1: 0 -+group_color: dummy2 allocation score on hex-2: 0 -+group_color: dummy2 allocation score on hex-3: 0 -+group_color: fs-md0 allocation score on hex-1: 0 -+group_color: fs-md0 allocation score on hex-2: 0 -+group_color: fs-md0 allocation score on hex-3: 0 -+group_color: fs-ocfs-1:0 allocation score on hex-1: -INFINITY -+group_color: fs-ocfs-1:0 allocation score on hex-2: -INFINITY -+group_color: fs-ocfs-1:0 allocation score on hex-3: -INFINITY -+group_color: fs-ocfs-1:1 allocation score on hex-1: -INFINITY -+group_color: fs-ocfs-1:1 allocation score on hex-2: -INFINITY -+group_color: fs-ocfs-1:1 allocation score on hex-3: -INFINITY -+group_color: fs-ocfs-1:2 allocation score on hex-1: -INFINITY -+group_color: fs-ocfs-1:2 allocation score on hex-2: -INFINITY -+group_color: fs-ocfs-1:2 allocation score on hex-3: -INFINITY -+group_color: fs-r0 allocation score on hex-1: 0 -+group_color: fs-r0 allocation score on hex-2: -INFINITY -+group_color: fs-r0 allocation score on hex-3: 0 -+group_color: md0 allocation score on hex-1: 0 -+group_color: md0 allocation score on hex-2: 0 -+group_color: md0 allocation score on hex-3: 0 -+group_color: md0-group allocation score on hex-1: 0 -+group_color: md0-group allocation score on hex-2: 0 -+group_color: md0-group allocation score on hex-3: 0 -+group_color: o2cb:0 allocation score on hex-1: -INFINITY -+group_color: o2cb:0 allocation score on hex-2: -INFINITY -+group_color: o2cb:0 allocation score on hex-3: -INFINITY -+group_color: o2cb:1 allocation score on hex-1: -INFINITY -+group_color: o2cb:1 allocation score on hex-2: -INFINITY -+group_color: o2cb:1 allocation score on hex-3: -INFINITY -+group_color: o2cb:2 allocation score on hex-1: -INFINITY -+group_color: o2cb:2 allocation score on hex-2: -INFINITY -+group_color: o2cb:2 allocation score on hex-3: -INFINITY -+group_color: r0-group allocation score on hex-1: 0 -+group_color: r0-group allocation score on hex-2: 0 -+group_color: r0-group allocation score on hex-3: 0 -+group_color: vg-md0 allocation score on hex-1: 0 -+group_color: vg-md0 allocation score on hex-2: 0 -+group_color: vg-md0 allocation score on hex-3: 0 -+group_color: vg1:0 allocation score on hex-1: -INFINITY -+group_color: vg1:0 allocation score on hex-2: -INFINITY -+group_color: vg1:0 allocation score on hex-3: -INFINITY -+group_color: vg1:1 allocation score on hex-1: -INFINITY -+group_color: vg1:1 allocation score on hex-2: -INFINITY -+group_color: vg1:1 allocation score on hex-3: -INFINITY -+group_color: vg1:2 allocation score on hex-1: -INFINITY -+group_color: vg1:2 allocation score on hex-2: -INFINITY -+group_color: vg1:2 allocation score on hex-3: -INFINITY -+native_color: cluster-md0 allocation score on hex-1: 0 -+native_color: cluster-md0 allocation score on hex-2: 0 -+native_color: cluster-md0 allocation score on hex-3: 0 -+native_color: clvmd:0 allocation score on hex-1: -INFINITY -+native_color: clvmd:0 allocation score on hex-2: -INFINITY -+native_color: clvmd:0 allocation score on hex-3: -INFINITY -+native_color: clvmd:1 allocation score on hex-1: -INFINITY -+native_color: clvmd:1 allocation score on hex-2: -INFINITY -+native_color: clvmd:1 allocation score on hex-3: -INFINITY -+native_color: clvmd:2 allocation score on hex-1: -INFINITY -+native_color: clvmd:2 allocation score on hex-2: -INFINITY -+native_color: clvmd:2 allocation score on hex-3: -INFINITY -+native_color: dlm:0 allocation score on hex-1: -INFINITY -+native_color: dlm:0 allocation score on hex-2: -INFINITY -+native_color: dlm:0 allocation score on hex-3: -INFINITY -+native_color: dlm:1 allocation score on hex-1: -INFINITY -+native_color: dlm:1 allocation score on hex-2: -INFINITY -+native_color: dlm:1 allocation score on hex-3: -INFINITY -+native_color: dlm:2 allocation score on hex-1: -INFINITY -+native_color: dlm:2 allocation score on hex-2: -INFINITY -+native_color: dlm:2 allocation score on hex-3: -INFINITY -+native_color: drbd-r0:0 allocation score on hex-1: -INFINITY -+native_color: drbd-r0:0 allocation score on hex-2: -INFINITY -+native_color: drbd-r0:0 allocation score on hex-3: -INFINITY -+native_color: drbd-r0:1 allocation score on hex-1: -INFINITY -+native_color: drbd-r0:1 allocation score on hex-2: -INFINITY -+native_color: drbd-r0:1 allocation score on hex-3: -INFINITY -+native_color: drbd-r1:0 allocation score on hex-1: -INFINITY -+native_color: drbd-r1:0 allocation score on hex-2: -INFINITY -+native_color: drbd-r1:0 allocation score on hex-3: -INFINITY -+native_color: drbd-r1:1 allocation score on hex-1: -INFINITY -+native_color: drbd-r1:1 allocation score on hex-2: -INFINITY -+native_color: drbd-r1:1 allocation score on hex-3: -INFINITY -+native_color: dummy1 allocation score on hex-1: -INFINITY -+native_color: dummy1 allocation score on hex-2: -INFINITY -+native_color: dummy1 allocation score on hex-3: -INFINITY -+native_color: dummy2 allocation score on hex-1: -INFINITY -+native_color: dummy2 allocation score on hex-2: -INFINITY -+native_color: dummy2 allocation score on hex-3: -INFINITY -+native_color: dummy3 allocation score on hex-1: -INFINITY -+native_color: dummy3 allocation score on hex-2: -INFINITY -+native_color: dummy3 allocation score on hex-3: -INFINITY -+native_color: dummy4 allocation score on hex-1: -INFINITY -+native_color: dummy4 allocation score on hex-2: -INFINITY -+native_color: dummy4 allocation score on hex-3: -INFINITY -+native_color: dummy5 allocation score on hex-1: 0 -+native_color: dummy5 allocation score on hex-2: 0 -+native_color: dummy5 allocation score on hex-3: 0 -+native_color: dummy6 allocation score on hex-1: 0 -+native_color: dummy6 allocation score on hex-2: 0 -+native_color: dummy6 allocation score on hex-3: 0 -+native_color: fencing allocation score on hex-1: 0 -+native_color: fencing allocation score on hex-2: 0 -+native_color: fencing allocation score on hex-3: 0 -+native_color: fs-md0 allocation score on hex-1: -INFINITY -+native_color: fs-md0 allocation score on hex-2: -INFINITY -+native_color: fs-md0 allocation score on hex-3: -INFINITY -+native_color: fs-ocfs-1:0 allocation score on hex-1: -INFINITY -+native_color: fs-ocfs-1:0 allocation score on hex-2: -INFINITY -+native_color: fs-ocfs-1:0 allocation score on hex-3: -INFINITY -+native_color: fs-ocfs-1:1 allocation score on hex-1: -INFINITY -+native_color: fs-ocfs-1:1 allocation score on hex-2: -INFINITY -+native_color: fs-ocfs-1:1 allocation score on hex-3: -INFINITY -+native_color: fs-ocfs-1:2 allocation score on hex-1: -INFINITY -+native_color: fs-ocfs-1:2 allocation score on hex-2: -INFINITY -+native_color: fs-ocfs-1:2 allocation score on hex-3: -INFINITY -+native_color: fs-ocfs-2:0 allocation score on hex-1: -INFINITY -+native_color: fs-ocfs-2:0 allocation score on hex-2: -INFINITY -+native_color: fs-ocfs-2:0 allocation score on hex-3: -INFINITY -+native_color: fs-ocfs-2:1 allocation score on hex-1: -INFINITY -+native_color: fs-ocfs-2:1 allocation score on hex-2: -INFINITY -+native_color: fs-ocfs-2:1 allocation score on hex-3: -INFINITY -+native_color: fs-ocfs-2:2 allocation score on hex-1: -INFINITY -+native_color: fs-ocfs-2:2 allocation score on hex-2: -INFINITY -+native_color: fs-ocfs-2:2 allocation score on hex-3: -INFINITY -+native_color: fs-r0 allocation score on hex-1: -INFINITY -+native_color: fs-r0 allocation score on hex-2: -INFINITY -+native_color: fs-r0 allocation score on hex-3: -INFINITY -+native_color: fs-xfs-1 allocation score on hex-1: -INFINITY -+native_color: fs-xfs-1 allocation score on hex-2: -INFINITY -+native_color: fs-xfs-1 allocation score on hex-3: -INFINITY -+native_color: md0 allocation score on hex-1: 0 -+native_color: md0 allocation score on hex-2: 0 -+native_color: md0 allocation score on hex-3: 0 -+native_color: o2cb:0 allocation score on hex-1: -INFINITY -+native_color: o2cb:0 allocation score on hex-2: -INFINITY -+native_color: o2cb:0 allocation score on hex-3: -INFINITY -+native_color: o2cb:1 allocation score on hex-1: -INFINITY -+native_color: o2cb:1 allocation score on hex-2: -INFINITY -+native_color: o2cb:1 allocation score on hex-3: -INFINITY -+native_color: o2cb:2 allocation score on hex-1: -INFINITY -+native_color: o2cb:2 allocation score on hex-2: -INFINITY -+native_color: o2cb:2 allocation score on hex-3: -INFINITY -+native_color: vg-md0 allocation score on hex-1: -INFINITY -+native_color: vg-md0 allocation score on hex-2: -INFINITY -+native_color: vg-md0 allocation score on hex-3: -INFINITY -+native_color: vg1:0 allocation score on hex-1: -INFINITY -+native_color: vg1:0 allocation score on hex-2: -INFINITY -+native_color: vg1:0 allocation score on hex-3: -INFINITY -+native_color: vg1:1 allocation score on hex-1: -INFINITY -+native_color: vg1:1 allocation score on hex-2: -INFINITY -+native_color: vg1:1 allocation score on hex-3: -INFINITY -+native_color: vg1:2 allocation score on hex-1: -INFINITY -+native_color: vg1:2 allocation score on hex-2: -INFINITY -+native_color: vg1:2 allocation score on hex-3: -INFINITY -diff --git a/pengine/test10/bug-5140-require-all-false.summary b/pengine/test10/bug-5140-require-all-false.summary -new file mode 100644 -index 0000000..b9d38bb ---- /dev/null -+++ b/pengine/test10/bug-5140-require-all-false.summary -@@ -0,0 +1,80 @@ -+ -+Current cluster status: -+Node hex-1: standby -+Node hex-2: standby -+Node hex-3: OFFLINE (standby) -+ -+ fencing (stonith:external/sbd): Stopped -+ Clone Set: baseclone [basegrp] -+ Resource Group: basegrp:0 -+ dlm (ocf::pacemaker:controld): Started hex-2 -+ clvmd (ocf::lvm2:clvmd): Started hex-2 -+ o2cb (ocf::ocfs2:o2cb): Started hex-2 -+ vg1 (ocf::heartbeat:LVM): Stopped -+ fs-ocfs-1 (ocf::heartbeat:Filesystem): Stopped -+ Stopped: [ hex-1 hex-3 ] -+ fs-xfs-1 (ocf::heartbeat:Filesystem): Stopped -+ Clone Set: fs2 [fs-ocfs-2] -+ Stopped: [ hex-1 hex-2 hex-3 ] -+ Master/Slave Set: ms-r0 [drbd-r0] -+ Stopped: [ hex-1 hex-2 hex-3 ] -+ Master/Slave Set: ms-r1 [drbd-r1] -+ Stopped: [ hex-1 hex-2 hex-3 ] -+ Resource Group: md0-group -+ md0 (ocf::heartbeat:Raid1): Stopped -+ vg-md0 (ocf::heartbeat:LVM): Stopped -+ fs-md0 (ocf::heartbeat:Filesystem): Stopped -+ dummy1 (ocf::heartbeat:Delay): Stopped -+ dummy3 (ocf::heartbeat:Delay): Stopped -+ dummy4 (ocf::heartbeat:Delay): Stopped -+ dummy5 (ocf::heartbeat:Delay): Stopped -+ dummy6 (ocf::heartbeat:Delay): Stopped -+ Resource Group: r0-group -+ fs-r0 (ocf::heartbeat:Filesystem): Stopped -+ dummy2 (ocf::heartbeat:Delay): Stopped -+ cluster-md0 (ocf::heartbeat:Raid1): Stopped -+ -+Transition Summary: -+ * Stop dlm:0 (hex-2) -+ * Stop clvmd:0 (hex-2) -+ * Stop o2cb:0 (hex-2) -+ -+Executing cluster transition: -+ * Pseudo action: baseclone_stop_0 -+ * Pseudo action: basegrp:0_stop_0 -+ * Resource action: o2cb stop on hex-2 -+ * Resource action: clvmd stop on hex-2 -+ * Resource action: dlm stop on hex-2 -+ * Pseudo action: all_stopped -+ * Pseudo action: basegrp:0_stopped_0 -+ * Pseudo action: baseclone_stopped_0 -+ -+Revised cluster status: -+Node hex-1: standby -+Node hex-2: standby -+Node hex-3: OFFLINE (standby) -+ -+ fencing (stonith:external/sbd): Stopped -+ Clone Set: baseclone [basegrp] -+ Stopped: [ hex-1 hex-2 hex-3 ] -+ fs-xfs-1 (ocf::heartbeat:Filesystem): Stopped -+ Clone Set: fs2 [fs-ocfs-2] -+ Stopped: [ hex-1 hex-2 hex-3 ] -+ Master/Slave Set: ms-r0 [drbd-r0] -+ Stopped: [ hex-1 hex-2 hex-3 ] -+ Master/Slave Set: ms-r1 [drbd-r1] -+ Stopped: [ hex-1 hex-2 hex-3 ] -+ Resource Group: md0-group -+ md0 (ocf::heartbeat:Raid1): Stopped -+ vg-md0 (ocf::heartbeat:LVM): Stopped -+ fs-md0 (ocf::heartbeat:Filesystem): Stopped -+ dummy1 (ocf::heartbeat:Delay): Stopped -+ dummy3 (ocf::heartbeat:Delay): Stopped -+ dummy4 (ocf::heartbeat:Delay): Stopped -+ dummy5 (ocf::heartbeat:Delay): Stopped -+ dummy6 (ocf::heartbeat:Delay): Stopped -+ Resource Group: r0-group -+ fs-r0 (ocf::heartbeat:Filesystem): Stopped -+ dummy2 (ocf::heartbeat:Delay): Stopped -+ cluster-md0 (ocf::heartbeat:Raid1): Stopped -+ -diff --git a/pengine/test10/bug-5140-require-all-false.xml b/pengine/test10/bug-5140-require-all-false.xml -new file mode 100644 -index 0000000..2db4935 ---- /dev/null -+++ b/pengine/test10/bug-5140-require-all-false.xml -@@ -0,0 +1,416 @@ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -diff --git a/pengine/test10/bug-5143-ms-shuffle.dot b/pengine/test10/bug-5143-ms-shuffle.dot -new file mode 100644 -index 0000000..c8da3a7 ---- /dev/null -+++ b/pengine/test10/bug-5143-ms-shuffle.dot -@@ -0,0 +1,33 @@ -+digraph "g" { -+"drbd-r1_monitor_29000 hex-2" [ style=bold color="green" fontcolor="black"] -+"drbd-r1_monitor_31000 hex-3" [ style=bold color="green" fontcolor="black"] -+"drbd-r1_post_notify_promoted_0 hex-2" -> "ms-r1_confirmed-post_notify_promoted_0" [ style = bold] -+"drbd-r1_post_notify_promoted_0 hex-2" [ style=bold color="green" fontcolor="black"] -+"drbd-r1_post_notify_promoted_0 hex-3" -> "ms-r1_confirmed-post_notify_promoted_0" [ style = bold] -+"drbd-r1_post_notify_promoted_0 hex-3" [ style=bold color="green" fontcolor="black"] -+"drbd-r1_pre_notify_promote_0 hex-2" -> "ms-r1_confirmed-pre_notify_promote_0" [ style = bold] -+"drbd-r1_pre_notify_promote_0 hex-2" [ style=bold color="green" fontcolor="black"] -+"drbd-r1_pre_notify_promote_0 hex-3" -> "ms-r1_confirmed-pre_notify_promote_0" [ style = bold] -+"drbd-r1_pre_notify_promote_0 hex-3" [ style=bold color="green" fontcolor="black"] -+"drbd-r1_promote_0 hex-3" -> "drbd-r1_monitor_31000 hex-3" [ style = bold] -+"drbd-r1_promote_0 hex-3" -> "ms-r1_promoted_0" [ style = bold] -+"drbd-r1_promote_0 hex-3" [ style=bold color="green" fontcolor="black"] -+"ms-r1_confirmed-post_notify_promoted_0" -> "drbd-r1_monitor_29000 hex-2" [ style = bold] -+"ms-r1_confirmed-post_notify_promoted_0" -> "drbd-r1_monitor_31000 hex-3" [ style = bold] -+"ms-r1_confirmed-post_notify_promoted_0" [ style=bold color="green" fontcolor="orange"] -+"ms-r1_confirmed-pre_notify_promote_0" -> "ms-r1_post_notify_promoted_0" [ style = bold] -+"ms-r1_confirmed-pre_notify_promote_0" -> "ms-r1_promote_0" [ style = bold] -+"ms-r1_confirmed-pre_notify_promote_0" [ style=bold color="green" fontcolor="orange"] -+"ms-r1_post_notify_promoted_0" -> "drbd-r1_post_notify_promoted_0 hex-2" [ style = bold] -+"ms-r1_post_notify_promoted_0" -> "drbd-r1_post_notify_promoted_0 hex-3" [ style = bold] -+"ms-r1_post_notify_promoted_0" -> "ms-r1_confirmed-post_notify_promoted_0" [ style = bold] -+"ms-r1_post_notify_promoted_0" [ style=bold color="green" fontcolor="orange"] -+"ms-r1_pre_notify_promote_0" -> "drbd-r1_pre_notify_promote_0 hex-2" [ style = bold] -+"ms-r1_pre_notify_promote_0" -> "drbd-r1_pre_notify_promote_0 hex-3" [ style = bold] -+"ms-r1_pre_notify_promote_0" -> "ms-r1_confirmed-pre_notify_promote_0" [ style = bold] -+"ms-r1_pre_notify_promote_0" [ style=bold color="green" fontcolor="orange"] -+"ms-r1_promote_0" -> "drbd-r1_promote_0 hex-3" [ style = bold] -+"ms-r1_promote_0" [ style=bold color="green" fontcolor="orange"] -+"ms-r1_promoted_0" -> "ms-r1_post_notify_promoted_0" [ style = bold] -+"ms-r1_promoted_0" [ style=bold color="green" fontcolor="orange"] -+} -diff --git a/pengine/test10/bug-5143-ms-shuffle.exp b/pengine/test10/bug-5143-ms-shuffle.exp -new file mode 100644 -index 0000000..cd1bf59 ---- /dev/null -+++ b/pengine/test10/bug-5143-ms-shuffle.exp -@@ -0,0 +1,180 @@ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -diff --git a/pengine/test10/bug-5143-ms-shuffle.scores b/pengine/test10/bug-5143-ms-shuffle.scores -new file mode 100644 -index 0000000..e6bfca3 ---- /dev/null -+++ b/pengine/test10/bug-5143-ms-shuffle.scores -@@ -0,0 +1,273 @@ -+Allocation scores: -+clone_color: baseclone allocation score on hex-1: 0 -+clone_color: baseclone allocation score on hex-2: 2000 -+clone_color: baseclone allocation score on hex-3: 0 -+clone_color: basegrp:0 allocation score on hex-1: 0 -+clone_color: basegrp:0 allocation score on hex-2: 0 -+clone_color: basegrp:0 allocation score on hex-3: 0 -+clone_color: basegrp:1 allocation score on hex-1: 0 -+clone_color: basegrp:1 allocation score on hex-2: 0 -+clone_color: basegrp:1 allocation score on hex-3: 0 -+clone_color: basegrp:2 allocation score on hex-1: 0 -+clone_color: basegrp:2 allocation score on hex-2: 0 -+clone_color: basegrp:2 allocation score on hex-3: 0 -+clone_color: clvmd:0 allocation score on hex-1: 1 -+clone_color: clvmd:0 allocation score on hex-2: 0 -+clone_color: clvmd:0 allocation score on hex-3: 0 -+clone_color: clvmd:1 allocation score on hex-1: 0 -+clone_color: clvmd:1 allocation score on hex-2: 1 -+clone_color: clvmd:1 allocation score on hex-3: 0 -+clone_color: clvmd:2 allocation score on hex-1: 0 -+clone_color: clvmd:2 allocation score on hex-2: 0 -+clone_color: clvmd:2 allocation score on hex-3: 1 -+clone_color: dlm:0 allocation score on hex-1: 1 -+clone_color: dlm:0 allocation score on hex-2: 0 -+clone_color: dlm:0 allocation score on hex-3: 0 -+clone_color: dlm:1 allocation score on hex-1: 0 -+clone_color: dlm:1 allocation score on hex-2: 1 -+clone_color: dlm:1 allocation score on hex-3: 0 -+clone_color: dlm:2 allocation score on hex-1: 0 -+clone_color: dlm:2 allocation score on hex-2: 0 -+clone_color: dlm:2 allocation score on hex-3: 1 -+clone_color: drbd-r0:0 allocation score on hex-1: 10001 -+clone_color: drbd-r0:0 allocation score on hex-2: 0 -+clone_color: drbd-r0:0 allocation score on hex-3: -INFINITY -+clone_color: drbd-r0:1 allocation score on hex-1: 0 -+clone_color: drbd-r0:1 allocation score on hex-2: 10001 -+clone_color: drbd-r0:1 allocation score on hex-3: -INFINITY -+clone_color: drbd-r1:0 allocation score on hex-1: -INFINITY -+clone_color: drbd-r1:0 allocation score on hex-2: 1 -+clone_color: drbd-r1:0 allocation score on hex-3: 0 -+clone_color: drbd-r1:1 allocation score on hex-1: -INFINITY -+clone_color: drbd-r1:1 allocation score on hex-2: 0 -+clone_color: drbd-r1:1 allocation score on hex-3: 10001 -+clone_color: fs-ocfs-1:0 allocation score on hex-1: 1 -+clone_color: fs-ocfs-1:0 allocation score on hex-2: 0 -+clone_color: fs-ocfs-1:0 allocation score on hex-3: 0 -+clone_color: fs-ocfs-1:1 allocation score on hex-1: 0 -+clone_color: fs-ocfs-1:1 allocation score on hex-2: 1 -+clone_color: fs-ocfs-1:1 allocation score on hex-3: 0 -+clone_color: fs-ocfs-1:2 allocation score on hex-1: 0 -+clone_color: fs-ocfs-1:2 allocation score on hex-2: 0 -+clone_color: fs-ocfs-1:2 allocation score on hex-3: 1 -+clone_color: fs-ocfs-2:0 allocation score on hex-1: 1 -+clone_color: fs-ocfs-2:0 allocation score on hex-2: 0 -+clone_color: fs-ocfs-2:0 allocation score on hex-3: 0 -+clone_color: fs-ocfs-2:1 allocation score on hex-1: 0 -+clone_color: fs-ocfs-2:1 allocation score on hex-2: 1 -+clone_color: fs-ocfs-2:1 allocation score on hex-3: 0 -+clone_color: fs-ocfs-2:2 allocation score on hex-1: 0 -+clone_color: fs-ocfs-2:2 allocation score on hex-2: 0 -+clone_color: fs-ocfs-2:2 allocation score on hex-3: 1 -+clone_color: fs2 allocation score on hex-1: 1000 -+clone_color: fs2 allocation score on hex-2: 0 -+clone_color: fs2 allocation score on hex-3: 0 -+clone_color: ms-r0 allocation score on hex-1: 0 -+clone_color: ms-r0 allocation score on hex-2: 0 -+clone_color: ms-r0 allocation score on hex-3: -INFINITY -+clone_color: ms-r1 allocation score on hex-1: -INFINITY -+clone_color: ms-r1 allocation score on hex-2: 0 -+clone_color: ms-r1 allocation score on hex-3: 0 -+clone_color: o2cb:0 allocation score on hex-1: 1 -+clone_color: o2cb:0 allocation score on hex-2: 0 -+clone_color: o2cb:0 allocation score on hex-3: 0 -+clone_color: o2cb:1 allocation score on hex-1: 0 -+clone_color: o2cb:1 allocation score on hex-2: 1 -+clone_color: o2cb:1 allocation score on hex-3: 0 -+clone_color: o2cb:2 allocation score on hex-1: 0 -+clone_color: o2cb:2 allocation score on hex-2: 0 -+clone_color: o2cb:2 allocation score on hex-3: 1 -+clone_color: vg1:0 allocation score on hex-1: 1 -+clone_color: vg1:0 allocation score on hex-2: 0 -+clone_color: vg1:0 allocation score on hex-3: 0 -+clone_color: vg1:1 allocation score on hex-1: 0 -+clone_color: vg1:1 allocation score on hex-2: 1 -+clone_color: vg1:1 allocation score on hex-3: 0 -+clone_color: vg1:2 allocation score on hex-1: 0 -+clone_color: vg1:2 allocation score on hex-2: 0 -+clone_color: vg1:2 allocation score on hex-3: 1 -+drbd-r0:0 promotion score on hex-1: 10000 -+drbd-r0:1 promotion score on hex-2: 10000 -+drbd-r0:2 promotion score on none: 0 -+drbd-r1:0 promotion score on hex-2: -1 -+drbd-r1:1 promotion score on hex-3: 10000 -+group_color: basegrp:0 allocation score on hex-1: 0 -+group_color: basegrp:0 allocation score on hex-2: 0 -+group_color: basegrp:0 allocation score on hex-3: 0 -+group_color: basegrp:1 allocation score on hex-1: -INFINITY -+group_color: basegrp:1 allocation score on hex-2: 0 -+group_color: basegrp:1 allocation score on hex-3: 0 -+group_color: basegrp:2 allocation score on hex-1: -INFINITY -+group_color: basegrp:2 allocation score on hex-2: -INFINITY -+group_color: basegrp:2 allocation score on hex-3: 0 -+group_color: clvmd:0 allocation score on hex-1: 1 -+group_color: clvmd:0 allocation score on hex-2: 0 -+group_color: clvmd:0 allocation score on hex-3: 0 -+group_color: clvmd:1 allocation score on hex-1: -INFINITY -+group_color: clvmd:1 allocation score on hex-2: 1 -+group_color: clvmd:1 allocation score on hex-3: 0 -+group_color: clvmd:2 allocation score on hex-1: -INFINITY -+group_color: clvmd:2 allocation score on hex-2: -INFINITY -+group_color: clvmd:2 allocation score on hex-3: 1 -+group_color: dlm:0 allocation score on hex-1: 1 -+group_color: dlm:0 allocation score on hex-2: 0 -+group_color: dlm:0 allocation score on hex-3: 0 -+group_color: dlm:1 allocation score on hex-1: -INFINITY -+group_color: dlm:1 allocation score on hex-2: 1 -+group_color: dlm:1 allocation score on hex-3: 0 -+group_color: dlm:2 allocation score on hex-1: -INFINITY -+group_color: dlm:2 allocation score on hex-2: -INFINITY -+group_color: dlm:2 allocation score on hex-3: 1 -+group_color: dummy1 allocation score on hex-1: 0 -+group_color: dummy1 allocation score on hex-2: 0 -+group_color: dummy1 allocation score on hex-3: 1000 -+group_color: dummy2 allocation score on hex-1: 0 -+group_color: dummy2 allocation score on hex-2: 0 -+group_color: dummy2 allocation score on hex-3: 0 -+group_color: fs-md0 allocation score on hex-1: 0 -+group_color: fs-md0 allocation score on hex-2: 0 -+group_color: fs-md0 allocation score on hex-3: 1000 -+group_color: fs-ocfs-1:0 allocation score on hex-1: 1 -+group_color: fs-ocfs-1:0 allocation score on hex-2: 0 -+group_color: fs-ocfs-1:0 allocation score on hex-3: 0 -+group_color: fs-ocfs-1:1 allocation score on hex-1: -INFINITY -+group_color: fs-ocfs-1:1 allocation score on hex-2: 1 -+group_color: fs-ocfs-1:1 allocation score on hex-3: 0 -+group_color: fs-ocfs-1:2 allocation score on hex-1: -INFINITY -+group_color: fs-ocfs-1:2 allocation score on hex-2: -INFINITY -+group_color: fs-ocfs-1:2 allocation score on hex-3: 1 -+group_color: fs-r0 allocation score on hex-1: 0 -+group_color: fs-r0 allocation score on hex-2: 0 -+group_color: fs-r0 allocation score on hex-3: 0 -+group_color: md0 allocation score on hex-1: 0 -+group_color: md0 allocation score on hex-2: 0 -+group_color: md0 allocation score on hex-3: 1000 -+group_color: md0-group allocation score on hex-1: 0 -+group_color: md0-group allocation score on hex-2: 0 -+group_color: md0-group allocation score on hex-3: 0 -+group_color: o2cb:0 allocation score on hex-1: 1 -+group_color: o2cb:0 allocation score on hex-2: 0 -+group_color: o2cb:0 allocation score on hex-3: 0 -+group_color: o2cb:1 allocation score on hex-1: -INFINITY -+group_color: o2cb:1 allocation score on hex-2: 1 -+group_color: o2cb:1 allocation score on hex-3: 0 -+group_color: o2cb:2 allocation score on hex-1: -INFINITY -+group_color: o2cb:2 allocation score on hex-2: -INFINITY -+group_color: o2cb:2 allocation score on hex-3: 1 -+group_color: r0-group allocation score on hex-1: 0 -+group_color: r0-group allocation score on hex-2: 0 -+group_color: r0-group allocation score on hex-3: 0 -+group_color: vg-md0 allocation score on hex-1: 0 -+group_color: vg-md0 allocation score on hex-2: 0 -+group_color: vg-md0 allocation score on hex-3: 1000 -+group_color: vg1:0 allocation score on hex-1: 1 -+group_color: vg1:0 allocation score on hex-2: 0 -+group_color: vg1:0 allocation score on hex-3: 0 -+group_color: vg1:1 allocation score on hex-1: -INFINITY -+group_color: vg1:1 allocation score on hex-2: 1 -+group_color: vg1:1 allocation score on hex-3: 0 -+group_color: vg1:2 allocation score on hex-1: -INFINITY -+group_color: vg1:2 allocation score on hex-2: -INFINITY -+group_color: vg1:2 allocation score on hex-3: 1 -+native_color: clvmd:0 allocation score on hex-1: 4 -+native_color: clvmd:0 allocation score on hex-2: -INFINITY -+native_color: clvmd:0 allocation score on hex-3: -INFINITY -+native_color: clvmd:1 allocation score on hex-1: -INFINITY -+native_color: clvmd:1 allocation score on hex-2: 4 -+native_color: clvmd:1 allocation score on hex-3: -INFINITY -+native_color: clvmd:2 allocation score on hex-1: -INFINITY -+native_color: clvmd:2 allocation score on hex-2: -INFINITY -+native_color: clvmd:2 allocation score on hex-3: 4 -+native_color: dlm:0 allocation score on hex-1: 5 -+native_color: dlm:0 allocation score on hex-2: 0 -+native_color: dlm:0 allocation score on hex-3: 0 -+native_color: dlm:1 allocation score on hex-1: -INFINITY -+native_color: dlm:1 allocation score on hex-2: 5 -+native_color: dlm:1 allocation score on hex-3: 0 -+native_color: dlm:2 allocation score on hex-1: -INFINITY -+native_color: dlm:2 allocation score on hex-2: -INFINITY -+native_color: dlm:2 allocation score on hex-3: 5 -+native_color: drbd-r0:0 allocation score on hex-1: 10001 -+native_color: drbd-r0:0 allocation score on hex-2: 0 -+native_color: drbd-r0:0 allocation score on hex-3: -INFINITY -+native_color: drbd-r0:1 allocation score on hex-1: -INFINITY -+native_color: drbd-r0:1 allocation score on hex-2: 10001 -+native_color: drbd-r0:1 allocation score on hex-3: -INFINITY -+native_color: drbd-r1:0 allocation score on hex-1: -INFINITY -+native_color: drbd-r1:0 allocation score on hex-2: 1 -+native_color: drbd-r1:0 allocation score on hex-3: -INFINITY -+native_color: drbd-r1:1 allocation score on hex-1: -INFINITY -+native_color: drbd-r1:1 allocation score on hex-2: 0 -+native_color: drbd-r1:1 allocation score on hex-3: 10001 -+native_color: dummy1 allocation score on hex-1: -INFINITY -+native_color: dummy1 allocation score on hex-2: -INFINITY -+native_color: dummy1 allocation score on hex-3: 1000 -+native_color: dummy2 allocation score on hex-1: -INFINITY -+native_color: dummy2 allocation score on hex-2: -INFINITY -+native_color: dummy2 allocation score on hex-3: -INFINITY -+native_color: dummy3 allocation score on hex-1: 1000 -+native_color: dummy3 allocation score on hex-2: -INFINITY -+native_color: dummy3 allocation score on hex-3: 0 -+native_color: dummy4 allocation score on hex-1: -INFINITY -+native_color: dummy4 allocation score on hex-2: 1000 -+native_color: dummy4 allocation score on hex-3: -INFINITY -+native_color: dummy5 allocation score on hex-1: 1000 -+native_color: dummy5 allocation score on hex-2: 0 -+native_color: dummy5 allocation score on hex-3: 0 -+native_color: dummy6 allocation score on hex-1: 0 -+native_color: dummy6 allocation score on hex-2: 1000 -+native_color: dummy6 allocation score on hex-3: 0 -+native_color: fencing allocation score on hex-1: 1000 -+native_color: fencing allocation score on hex-2: 0 -+native_color: fencing allocation score on hex-3: 0 -+native_color: fs-md0 allocation score on hex-1: -INFINITY -+native_color: fs-md0 allocation score on hex-2: -INFINITY -+native_color: fs-md0 allocation score on hex-3: 2000 -+native_color: fs-ocfs-1:0 allocation score on hex-1: 1 -+native_color: fs-ocfs-1:0 allocation score on hex-2: -INFINITY -+native_color: fs-ocfs-1:0 allocation score on hex-3: -INFINITY -+native_color: fs-ocfs-1:1 allocation score on hex-1: -INFINITY -+native_color: fs-ocfs-1:1 allocation score on hex-2: 1 -+native_color: fs-ocfs-1:1 allocation score on hex-3: -INFINITY -+native_color: fs-ocfs-1:2 allocation score on hex-1: -INFINITY -+native_color: fs-ocfs-1:2 allocation score on hex-2: -INFINITY -+native_color: fs-ocfs-1:2 allocation score on hex-3: 1 -+native_color: fs-ocfs-2:0 allocation score on hex-1: 1 -+native_color: fs-ocfs-2:0 allocation score on hex-2: -INFINITY -+native_color: fs-ocfs-2:0 allocation score on hex-3: -INFINITY -+native_color: fs-ocfs-2:1 allocation score on hex-1: -INFINITY -+native_color: fs-ocfs-2:1 allocation score on hex-2: 1 -+native_color: fs-ocfs-2:1 allocation score on hex-3: -INFINITY -+native_color: fs-ocfs-2:2 allocation score on hex-1: -INFINITY -+native_color: fs-ocfs-2:2 allocation score on hex-2: -INFINITY -+native_color: fs-ocfs-2:2 allocation score on hex-3: 1 -+native_color: fs-r0 allocation score on hex-1: -INFINITY -+native_color: fs-r0 allocation score on hex-2: -INFINITY -+native_color: fs-r0 allocation score on hex-3: -INFINITY -+native_color: fs-xfs-1 allocation score on hex-1: -1000 -+native_color: fs-xfs-1 allocation score on hex-2: 2000 -+native_color: fs-xfs-1 allocation score on hex-3: 0 -+native_color: md0 allocation score on hex-1: 0 -+native_color: md0 allocation score on hex-2: 0 -+native_color: md0 allocation score on hex-3: 4000 -+native_color: o2cb:0 allocation score on hex-1: 3 -+native_color: o2cb:0 allocation score on hex-2: -INFINITY -+native_color: o2cb:0 allocation score on hex-3: -INFINITY -+native_color: o2cb:1 allocation score on hex-1: -INFINITY -+native_color: o2cb:1 allocation score on hex-2: 3 -+native_color: o2cb:1 allocation score on hex-3: -INFINITY -+native_color: o2cb:2 allocation score on hex-1: -INFINITY -+native_color: o2cb:2 allocation score on hex-2: -INFINITY -+native_color: o2cb:2 allocation score on hex-3: 3 -+native_color: vg-md0 allocation score on hex-1: -INFINITY -+native_color: vg-md0 allocation score on hex-2: -INFINITY -+native_color: vg-md0 allocation score on hex-3: 3000 -+native_color: vg1:0 allocation score on hex-1: 2 -+native_color: vg1:0 allocation score on hex-2: -INFINITY -+native_color: vg1:0 allocation score on hex-3: -INFINITY -+native_color: vg1:1 allocation score on hex-1: -INFINITY -+native_color: vg1:1 allocation score on hex-2: 2 -+native_color: vg1:1 allocation score on hex-3: -INFINITY -+native_color: vg1:2 allocation score on hex-1: -INFINITY -+native_color: vg1:2 allocation score on hex-2: -INFINITY -+native_color: vg1:2 allocation score on hex-3: 2 -diff --git a/pengine/test10/bug-5143-ms-shuffle.summary b/pengine/test10/bug-5143-ms-shuffle.summary -new file mode 100644 -index 0000000..7a3bcf4 ---- /dev/null -+++ b/pengine/test10/bug-5143-ms-shuffle.summary -@@ -0,0 +1,78 @@ -+ -+Current cluster status: -+Online: [ hex-1 hex-2 hex-3 ] -+ -+ fencing (stonith:external/sbd): Started hex-1 -+ Clone Set: baseclone [basegrp] -+ Started: [ hex-1 hex-2 hex-3 ] -+ fs-xfs-1 (ocf::heartbeat:Filesystem): Started hex-2 -+ Clone Set: fs2 [fs-ocfs-2] -+ Started: [ hex-1 hex-2 hex-3 ] -+ Master/Slave Set: ms-r0 [drbd-r0] -+ Masters: [ hex-1 ] -+ Slaves: [ hex-2 ] -+ Stopped: [ hex-3 ] -+ Master/Slave Set: ms-r1 [drbd-r1] -+ Slaves: [ hex-2 hex-3 ] -+ Stopped: [ hex-1 ] -+ Resource Group: md0-group -+ md0 (ocf::heartbeat:Raid1): Started hex-3 -+ vg-md0 (ocf::heartbeat:LVM): Started hex-3 -+ fs-md0 (ocf::heartbeat:Filesystem): Started hex-3 -+ dummy1 (ocf::heartbeat:Delay): Started hex-3 -+ dummy3 (ocf::heartbeat:Delay): Started hex-1 -+ dummy4 (ocf::heartbeat:Delay): Started hex-2 -+ dummy5 (ocf::heartbeat:Delay): Started hex-1 -+ dummy6 (ocf::heartbeat:Delay): Started hex-2 -+ Resource Group: r0-group -+ fs-r0 (ocf::heartbeat:Filesystem): Stopped -+ dummy2 (ocf::heartbeat:Delay): Stopped -+ -+Transition Summary: -+ * Promote drbd-r1:1 (Slave -> Master hex-3) -+ -+Executing cluster transition: -+ * Pseudo action: ms-r1_pre_notify_promote_0 -+ * Resource action: drbd-r1 notify on hex-2 -+ * Resource action: drbd-r1 notify on hex-3 -+ * Pseudo action: ms-r1_confirmed-pre_notify_promote_0 -+ * Pseudo action: ms-r1_promote_0 -+ * Resource action: drbd-r1 promote on hex-3 -+ * Pseudo action: ms-r1_promoted_0 -+ * Pseudo action: ms-r1_post_notify_promoted_0 -+ * Resource action: drbd-r1 notify on hex-2 -+ * Resource action: drbd-r1 notify on hex-3 -+ * Pseudo action: ms-r1_confirmed-post_notify_promoted_0 -+ * Resource action: drbd-r1 monitor=29000 on hex-2 -+ * Resource action: drbd-r1 monitor=31000 on hex-3 -+ -+Revised cluster status: -+Online: [ hex-1 hex-2 hex-3 ] -+ -+ fencing (stonith:external/sbd): Started hex-1 -+ Clone Set: baseclone [basegrp] -+ Started: [ hex-1 hex-2 hex-3 ] -+ fs-xfs-1 (ocf::heartbeat:Filesystem): Started hex-2 -+ Clone Set: fs2 [fs-ocfs-2] -+ Started: [ hex-1 hex-2 hex-3 ] -+ Master/Slave Set: ms-r0 [drbd-r0] -+ Masters: [ hex-1 ] -+ Slaves: [ hex-2 ] -+ Stopped: [ hex-3 ] -+ Master/Slave Set: ms-r1 [drbd-r1] -+ Masters: [ hex-3 ] -+ Slaves: [ hex-2 ] -+ Stopped: [ hex-1 ] -+ Resource Group: md0-group -+ md0 (ocf::heartbeat:Raid1): Started hex-3 -+ vg-md0 (ocf::heartbeat:LVM): Started hex-3 -+ fs-md0 (ocf::heartbeat:Filesystem): Started hex-3 -+ dummy1 (ocf::heartbeat:Delay): Started hex-3 -+ dummy3 (ocf::heartbeat:Delay): Started hex-1 -+ dummy4 (ocf::heartbeat:Delay): Started hex-2 -+ dummy5 (ocf::heartbeat:Delay): Started hex-1 -+ dummy6 (ocf::heartbeat:Delay): Started hex-2 -+ Resource Group: r0-group -+ fs-r0 (ocf::heartbeat:Filesystem): Stopped -+ dummy2 (ocf::heartbeat:Delay): Stopped -+ -diff --git a/pengine/test10/bug-5143-ms-shuffle.xml b/pengine/test10/bug-5143-ms-shuffle.xml -new file mode 100644 -index 0000000..486d706 ---- /dev/null -+++ b/pengine/test10/bug-5143-ms-shuffle.xml -@@ -0,0 +1,477 @@ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -diff --git a/pengine/test10/bug-lf-1852.summary b/pengine/test10/bug-lf-1852.summary -index 98ffea5..337ad6a 100644 ---- a/pengine/test10/bug-lf-1852.summary -+++ b/pengine/test10/bug-lf-1852.summary -@@ -4,7 +4,7 @@ Online: [ mysql-01 mysql-02 ] - - Master/Slave Set: ms-drbd0 [drbd0] - Masters: [ mysql-02 ] -- Stopped: [ drbd0:1 ] -+ Stopped: [ mysql-01 ] - Resource Group: fs_mysql_ip - fs0 (ocf::heartbeat:Filesystem): Started mysql-02 - mysqlid (lsb:mysql): Started mysql-02 -diff --git a/pengine/test10/bug-lf-2106.scores b/pengine/test10/bug-lf-2106.scores -index c5ca47b..4dc52b5 100644 ---- a/pengine/test10/bug-lf-2106.scores -+++ b/pengine/test10/bug-lf-2106.scores -@@ -1,26 +1,26 @@ - Allocation scores: - clone_color: drbd-bugtrack:0 allocation score on cl-virt-1: 1 --clone_color: drbd-bugtrack:0 allocation score on cl-virt-2: 75 -+clone_color: drbd-bugtrack:0 allocation score on cl-virt-2: 0 - clone_color: drbd-bugtrack:1 allocation score on cl-virt-1: 0 - clone_color: drbd-bugtrack:1 allocation score on cl-virt-2: 76 - clone_color: drbd-infotos:0 allocation score on cl-virt-1: 1 --clone_color: drbd-infotos:0 allocation score on cl-virt-2: 75 -+clone_color: drbd-infotos:0 allocation score on cl-virt-2: 0 - clone_color: drbd-infotos:1 allocation score on cl-virt-1: 0 - clone_color: drbd-infotos:1 allocation score on cl-virt-2: 76 - clone_color: drbd-itwiki:0 allocation score on cl-virt-1: 1 --clone_color: drbd-itwiki:0 allocation score on cl-virt-2: 75 -+clone_color: drbd-itwiki:0 allocation score on cl-virt-2: 0 - clone_color: drbd-itwiki:1 allocation score on cl-virt-1: 0 - clone_color: drbd-itwiki:1 allocation score on cl-virt-2: 76 - clone_color: drbd-medomus-cvs:0 allocation score on cl-virt-1: 1 --clone_color: drbd-medomus-cvs:0 allocation score on cl-virt-2: 75 -+clone_color: drbd-medomus-cvs:0 allocation score on cl-virt-2: 0 - clone_color: drbd-medomus-cvs:1 allocation score on cl-virt-1: 0 - clone_color: drbd-medomus-cvs:1 allocation score on cl-virt-2: 76 - clone_color: drbd-servsyslog:0 allocation score on cl-virt-1: 1 --clone_color: drbd-servsyslog:0 allocation score on cl-virt-2: 75 -+clone_color: drbd-servsyslog:0 allocation score on cl-virt-2: 0 - clone_color: drbd-servsyslog:1 allocation score on cl-virt-1: 0 - clone_color: drbd-servsyslog:1 allocation score on cl-virt-2: 76 - clone_color: drbd-smsprod2:0 allocation score on cl-virt-1: 1 --clone_color: drbd-smsprod2:0 allocation score on cl-virt-2: 75 -+clone_color: drbd-smsprod2:0 allocation score on cl-virt-2: 0 - clone_color: drbd-smsprod2:1 allocation score on cl-virt-1: 0 - clone_color: drbd-smsprod2:1 allocation score on cl-virt-2: 76 - clone_color: ms-bugtrack allocation score on cl-virt-1: 50 -@@ -66,27 +66,27 @@ native_color: apcstonith allocation score on cl-virt-2: 0 - native_color: bugtrack allocation score on cl-virt-1: -INFINITY - native_color: bugtrack allocation score on cl-virt-2: 176 - native_color: drbd-bugtrack:0 allocation score on cl-virt-1: 1 --native_color: drbd-bugtrack:0 allocation score on cl-virt-2: 75 -+native_color: drbd-bugtrack:0 allocation score on cl-virt-2: 0 - native_color: drbd-bugtrack:1 allocation score on cl-virt-1: -INFINITY - native_color: drbd-bugtrack:1 allocation score on cl-virt-2: 76 - native_color: drbd-infotos:0 allocation score on cl-virt-1: 1 --native_color: drbd-infotos:0 allocation score on cl-virt-2: 75 -+native_color: drbd-infotos:0 allocation score on cl-virt-2: 0 - native_color: drbd-infotos:1 allocation score on cl-virt-1: -INFINITY - native_color: drbd-infotos:1 allocation score on cl-virt-2: 76 - native_color: drbd-itwiki:0 allocation score on cl-virt-1: 1 --native_color: drbd-itwiki:0 allocation score on cl-virt-2: 75 -+native_color: drbd-itwiki:0 allocation score on cl-virt-2: 0 - native_color: drbd-itwiki:1 allocation score on cl-virt-1: -INFINITY - native_color: drbd-itwiki:1 allocation score on cl-virt-2: 76 - native_color: drbd-medomus-cvs:0 allocation score on cl-virt-1: 1 --native_color: drbd-medomus-cvs:0 allocation score on cl-virt-2: 75 -+native_color: drbd-medomus-cvs:0 allocation score on cl-virt-2: 0 - native_color: drbd-medomus-cvs:1 allocation score on cl-virt-1: -INFINITY - native_color: drbd-medomus-cvs:1 allocation score on cl-virt-2: 76 - native_color: drbd-servsyslog:0 allocation score on cl-virt-1: 1 --native_color: drbd-servsyslog:0 allocation score on cl-virt-2: 75 -+native_color: drbd-servsyslog:0 allocation score on cl-virt-2: 0 - native_color: drbd-servsyslog:1 allocation score on cl-virt-1: -INFINITY - native_color: drbd-servsyslog:1 allocation score on cl-virt-2: 76 - native_color: drbd-smsprod2:0 allocation score on cl-virt-1: 1 --native_color: drbd-smsprod2:0 allocation score on cl-virt-2: 75 -+native_color: drbd-smsprod2:0 allocation score on cl-virt-2: 0 - native_color: drbd-smsprod2:1 allocation score on cl-virt-1: -INFINITY - native_color: drbd-smsprod2:1 allocation score on cl-virt-2: 76 - native_color: infotos allocation score on cl-virt-1: -INFINITY -diff --git a/pengine/test10/bug-lf-2106.summary b/pengine/test10/bug-lf-2106.summary -index 7be05ae..9a71125 100644 ---- a/pengine/test10/bug-lf-2106.summary -+++ b/pengine/test10/bug-lf-2106.summary -@@ -39,8 +39,8 @@ Transition Summary: - * Restart pingd:1 (Started cl-virt-2) - - Executing cluster transition: -- * Cluster action: clear_failcount on cl-virt-1 -- * Cluster action: clear_failcount on cl-virt-2 -+ * Cluster action: clear_failcount for pingd on cl-virt-1 -+ * Cluster action: clear_failcount for pingd on cl-virt-2 - * Pseudo action: pingdclone_stop_0 - * Resource action: pingd:0 stop on cl-virt-1 - * Resource action: pingd:0 stop on cl-virt-2 -diff --git a/pengine/test10/bug-lf-2153.summary b/pengine/test10/bug-lf-2153.summary -index 475e7df..9995475 100644 ---- a/pengine/test10/bug-lf-2153.summary -+++ b/pengine/test10/bug-lf-2153.summary -@@ -43,10 +43,10 @@ Online: [ alice ] - - Master/Slave Set: ms_drbd_iscsivg01 [res_drbd_iscsivg01] - Masters: [ alice ] -- Stopped: [ res_drbd_iscsivg01:1 ] -+ Stopped: [ bob ] - Clone Set: cl_tgtd [res_tgtd] - Started: [ alice ] -- Stopped: [ res_tgtd:1 ] -+ Stopped: [ bob ] - Resource Group: rg_iscsivg01 - res_portblock_iscsivg01_block (ocf::heartbeat:portblock): Started alice - res_lvm_iscsivg01 (ocf::heartbeat:LVM): Started alice -diff --git a/pengine/test10/bug-lf-2160.summary b/pengine/test10/bug-lf-2160.summary -index 77ef8f9..e09540a 100644 ---- a/pengine/test10/bug-lf-2160.summary -+++ b/pengine/test10/bug-lf-2160.summary -@@ -5,6 +5,7 @@ Online: [ cardhu dualamd1 dualamd3 ] - domU-test01 (ocf::heartbeat:Xen): Started dualamd1 - Clone Set: clone-dom0-iscsi1 [dom0-iscsi1-cnx1] - Started: [ dualamd1 dualamd3 ] -+ Stopped: [ cardhu ] - - Transition Summary: - -@@ -19,4 +20,5 @@ Online: [ cardhu dualamd1 dualamd3 ] - domU-test01 (ocf::heartbeat:Xen): Started dualamd1 - Clone Set: clone-dom0-iscsi1 [dom0-iscsi1-cnx1] - Started: [ dualamd1 dualamd3 ] -+ Stopped: [ cardhu ] - -diff --git a/pengine/test10/bug-lf-2171.summary b/pengine/test10/bug-lf-2171.summary -index a240116..5af3cd0 100644 ---- a/pengine/test10/bug-lf-2171.summary -+++ b/pengine/test10/bug-lf-2171.summary -@@ -29,7 +29,7 @@ Revised cluster status: - Online: [ xenserver1 xenserver2 ] - - Clone Set: cl_res_Dummy1 [res_Dummy1] -- Stopped: [ res_Dummy1:0 res_Dummy1:1 ] -+ Stopped: [ xenserver1 xenserver2 ] - Resource Group: gr_Dummy - res_Dummy2 (ocf::heartbeat:Dummy): Stopped - res_Dummy3 (ocf::heartbeat:Dummy): Stopped -diff --git a/pengine/test10/bug-lf-2213.summary b/pengine/test10/bug-lf-2213.summary -index f5d06cb..a60012d 100644 ---- a/pengine/test10/bug-lf-2213.summary -+++ b/pengine/test10/bug-lf-2213.summary -@@ -3,7 +3,7 @@ Current cluster status: - Online: [ fs1 fs2 web1 web2 ] - - Clone Set: cl-test [gr-test] -- Stopped: [ gr-test:0 gr-test:1 gr-test:2 gr-test:3 ] -+ Stopped: [ fs1 fs2 web1 web2 ] - - Transition Summary: - * Start test:0 (web1) -@@ -24,5 +24,5 @@ Online: [ fs1 fs2 web1 web2 ] - - Clone Set: cl-test [gr-test] - Started: [ web1 web2 ] -- Stopped: [ gr-test:2 gr-test:3 ] -+ Stopped: [ fs1 fs2 ] - -diff --git a/pengine/test10/bug-lf-2358.summary b/pengine/test10/bug-lf-2358.summary -index 3b383c7..434c5ed 100644 ---- a/pengine/test10/bug-lf-2358.summary -+++ b/pengine/test10/bug-lf-2358.summary -@@ -3,7 +3,7 @@ Current cluster status: - Online: [ alice.demo bob.demo ] - - Master/Slave Set: ms_drbd_nfsexport [res_drbd_nfsexport] -- Stopped: [ res_drbd_nfsexport:0 res_drbd_nfsexport:1 ] -+ Stopped: [ alice.demo bob.demo ] - Resource Group: rg_nfs - res_fs_nfsexport (ocf::heartbeat:Filesystem): Stopped - res_ip_nfs (ocf::heartbeat:IPaddr2): Stopped -@@ -14,7 +14,7 @@ Online: [ alice.demo bob.demo ] - res_mysql1 (ocf::heartbeat:mysql): Started bob.demo - Master/Slave Set: ms_drbd_mysql1 [res_drbd_mysql1] - Masters: [ bob.demo ] -- Stopped: [ res_drbd_mysql1:1 ] -+ Stopped: [ alice.demo ] - Master/Slave Set: ms_drbd_mysql2 [res_drbd_mysql2] - Masters: [ alice.demo ] - Slaves: [ bob.demo ] -@@ -42,7 +42,7 @@ Revised cluster status: - Online: [ alice.demo bob.demo ] - - Master/Slave Set: ms_drbd_nfsexport [res_drbd_nfsexport] -- Stopped: [ res_drbd_nfsexport:0 res_drbd_nfsexport:1 ] -+ Stopped: [ alice.demo bob.demo ] - Resource Group: rg_nfs - res_fs_nfsexport (ocf::heartbeat:Filesystem): Stopped - res_ip_nfs (ocf::heartbeat:IPaddr2): Stopped -diff --git a/pengine/test10/bug-lf-2361.summary b/pengine/test10/bug-lf-2361.summary -index fd48ba9..b81456c 100644 ---- a/pengine/test10/bug-lf-2361.summary -+++ b/pengine/test10/bug-lf-2361.summary -@@ -4,9 +4,9 @@ Online: [ alice.demo bob.demo ] - - dummy1 (ocf::heartbeat:Dummy): Stopped - Master/Slave Set: ms_stateful [stateful] -- Stopped: [ stateful:0 stateful:1 ] -+ Stopped: [ alice.demo bob.demo ] - Clone Set: cl_dummy2 [dummy2] -- Stopped: [ dummy2:0 dummy2:1 ] -+ Stopped: [ alice.demo bob.demo ] - - Transition Summary: - * Start stateful:0 (alice.demo) -@@ -38,5 +38,5 @@ Online: [ alice.demo bob.demo ] - Master/Slave Set: ms_stateful [stateful] - Slaves: [ alice.demo bob.demo ] - Clone Set: cl_dummy2 [dummy2] -- Stopped: [ dummy2:0 dummy2:1 ] -+ Stopped: [ alice.demo bob.demo ] - -diff --git a/pengine/test10/bug-lf-2422.summary b/pengine/test10/bug-lf-2422.summary -index a4d3abd..760fc2b 100644 ---- a/pengine/test10/bug-lf-2422.summary -+++ b/pengine/test10/bug-lf-2422.summary -@@ -56,25 +56,25 @@ Online: [ qa-suse-1 qa-suse-2 qa-suse-3 qa-suse-4 ] - sbd_stonith (stonith:external/sbd): Started qa-suse-2 - Clone Set: c-o2stage [o2stage] - Resource Group: o2stage:0 -- dlm:0 (ocf::pacemaker:controld): Started qa-suse-1 -- clvm:0 (ocf::lvm2:clvmd): Started qa-suse-1 -- o2cb:0 (ocf::ocfs2:o2cb): Stopped -- cmirror:0 (ocf::lvm2:cmirrord): Stopped -+ dlm (ocf::pacemaker:controld): Started qa-suse-1 -+ clvm (ocf::lvm2:clvmd): Started qa-suse-1 -+ o2cb (ocf::ocfs2:o2cb): Stopped -+ cmirror (ocf::lvm2:cmirrord): Stopped - Resource Group: o2stage:1 -- dlm:1 (ocf::pacemaker:controld): Started qa-suse-4 -- clvm:1 (ocf::lvm2:clvmd): Started qa-suse-4 -- o2cb:1 (ocf::ocfs2:o2cb): Stopped -- cmirror:1 (ocf::lvm2:cmirrord): Stopped -+ dlm (ocf::pacemaker:controld): Started qa-suse-4 -+ clvm (ocf::lvm2:clvmd): Started qa-suse-4 -+ o2cb (ocf::ocfs2:o2cb): Stopped -+ cmirror (ocf::lvm2:cmirrord): Stopped - Resource Group: o2stage:2 -- dlm:2 (ocf::pacemaker:controld): Started qa-suse-3 -- clvm:2 (ocf::lvm2:clvmd): Started qa-suse-3 -- o2cb:2 (ocf::ocfs2:o2cb): Stopped -- cmirror:2 (ocf::lvm2:cmirrord): Stopped -+ dlm (ocf::pacemaker:controld): Started qa-suse-3 -+ clvm (ocf::lvm2:clvmd): Started qa-suse-3 -+ o2cb (ocf::ocfs2:o2cb): Stopped -+ cmirror (ocf::lvm2:cmirrord): Stopped - Resource Group: o2stage:3 -- dlm:3 (ocf::pacemaker:controld): Started qa-suse-2 -- clvm:3 (ocf::lvm2:clvmd): Started qa-suse-2 -- o2cb:3 (ocf::ocfs2:o2cb): Stopped -- cmirror:3 (ocf::lvm2:cmirrord): Stopped -+ dlm (ocf::pacemaker:controld): Started qa-suse-2 -+ clvm (ocf::lvm2:clvmd): Started qa-suse-2 -+ o2cb (ocf::ocfs2:o2cb): Stopped -+ cmirror (ocf::lvm2:cmirrord): Stopped - Clone Set: c-ocfs [ocfs] -- Stopped: [ ocfs:0 ocfs:1 ocfs:2 ocfs:3 ] -+ Stopped: [ qa-suse-1 qa-suse-2 qa-suse-3 qa-suse-4 ] - -diff --git a/pengine/test10/bug-lf-2453.summary b/pengine/test10/bug-lf-2453.summary -index 8b40492..70eb79e 100644 ---- a/pengine/test10/bug-lf-2453.summary -+++ b/pengine/test10/bug-lf-2453.summary -@@ -32,7 +32,7 @@ Online: [ domu1 domu2 ] - - PrimitiveResource1 (ocf::heartbeat:IPaddr2): Stopped - Clone Set: CloneResource1 [apache] -- Stopped: [ apache:0 apache:1 ] -+ Stopped: [ domu1 domu2 ] - Clone Set: CloneResource2 [DummyResource] -- Stopped: [ DummyResource:0 DummyResource:1 ] -+ Stopped: [ domu1 domu2 ] - -diff --git a/pengine/test10/bug-lf-2508.summary b/pengine/test10/bug-lf-2508.summary -index 7032109..4435ced 100644 ---- a/pengine/test10/bug-lf-2508.summary -+++ b/pengine/test10/bug-lf-2508.summary -@@ -11,19 +11,19 @@ Online: [ srv01 srv03 srv04 ] - Dummy03 (ocf::heartbeat:Dummy): Started srv03 - Clone Set: clnStonith1 [grpStonith1] - Started: [ srv02 srv03 srv04 ] -- Stopped: [ grpStonith1:3 ] -+ Stopped: [ srv01 ] - Clone Set: clnStonith2 [grpStonith2] - Started: [ srv01 srv03 srv04 ] -- Stopped: [ grpStonith2:3 ] -+ Stopped: [ srv02 ] - Clone Set: clnStonith3 [grpStonith3] - Resource Group: grpStonith3:1 -- prmStonith3-1:1 (stonith:external/stonith-helper): Started srv01 -- prmStonith3-3:1 (stonith:external/ssh): Stopped -+ prmStonith3-1 (stonith:external/stonith-helper): Started srv01 -+ prmStonith3-3 (stonith:external/ssh): Stopped - Started: [ srv02 srv04 ] -- Stopped: [ grpStonith3:3 ] -+ Stopped: [ srv03 ] - Clone Set: clnStonith4 [grpStonith4] - Started: [ srv01 srv02 srv03 ] -- Stopped: [ grpStonith4:3 ] -+ Stopped: [ srv04 ] - - Transition Summary: - * Start Dummy01 (srv01) -@@ -89,14 +89,14 @@ OFFLINE: [ srv02 ] - Dummy03 (ocf::heartbeat:Dummy): Started srv03 - Clone Set: clnStonith1 [grpStonith1] - Started: [ srv03 srv04 ] -- Stopped: [ grpStonith1:2 grpStonith1:3 ] -+ Stopped: [ srv01 srv02 ] - Clone Set: clnStonith2 [grpStonith2] - Started: [ srv01 srv03 srv04 ] -- Stopped: [ grpStonith2:3 ] -+ Stopped: [ srv02 ] - Clone Set: clnStonith3 [grpStonith3] - Started: [ srv01 srv04 ] -- Stopped: [ grpStonith3:2 grpStonith3:3 ] -+ Stopped: [ srv02 srv03 ] - Clone Set: clnStonith4 [grpStonith4] - Started: [ srv01 srv03 ] -- Stopped: [ grpStonith4:2 grpStonith4:3 ] -+ Stopped: [ srv02 srv04 ] - -diff --git a/pengine/test10/bug-lf-2551.summary b/pengine/test10/bug-lf-2551.summary -index 0d27903..d299ee9 100644 ---- a/pengine/test10/bug-lf-2551.summary -+++ b/pengine/test10/bug-lf-2551.summary -@@ -148,7 +148,7 @@ OFFLINE: [ hex-9 ] - vm-00 (ocf::heartbeat:Xen): Started hex-0 - Clone Set: base-clone [base-group] - Started: [ hex-0 hex-7 hex-8 ] -- Stopped: [ base-group:3 ] -+ Stopped: [ hex-9 ] - vm-01 (ocf::heartbeat:Xen): Started hex-7 - vm-02 (ocf::heartbeat:Xen): Started hex-8 - vm-03 (ocf::heartbeat:Xen): Stopped -diff --git a/pengine/test10/bug-lf-2574.summary b/pengine/test10/bug-lf-2574.summary -index c8e945f..3024a73 100644 ---- a/pengine/test10/bug-lf-2574.summary -+++ b/pengine/test10/bug-lf-2574.summary -@@ -6,7 +6,7 @@ Online: [ srv01 srv02 srv03 ] - main_rsc2 (ocf::pacemaker:Dummy): Started srv02 - Clone Set: clnDummy1 [prmDummy1] - Started: [ srv02 srv03 ] -- Stopped: [ prmDummy1:2 ] -+ Stopped: [ srv01 ] - Clone Set: clnPingd [prmPingd] - Started: [ srv01 srv02 srv03 ] - -@@ -30,8 +30,8 @@ Online: [ srv01 srv02 srv03 ] - main_rsc2 (ocf::pacemaker:Dummy): Started srv02 - Clone Set: clnDummy1 [prmDummy1] - Started: [ srv02 srv03 ] -- Stopped: [ prmDummy1:2 ] -+ Stopped: [ srv01 ] - Clone Set: clnPingd [prmPingd] - Started: [ srv02 srv03 ] -- Stopped: [ prmPingd:2 ] -+ Stopped: [ srv01 ] - -diff --git a/pengine/test10/bug-lf-2581.summary b/pengine/test10/bug-lf-2581.summary -index 6979839..98b3763 100644 ---- a/pengine/test10/bug-lf-2581.summary -+++ b/pengine/test10/bug-lf-2581.summary -@@ -4,7 +4,7 @@ Online: [ elvis queen ] - - Clone Set: AZ-clone [AZ-group] - Started: [ elvis ] -- Stopped: [ AZ-group:1 ] -+ Stopped: [ queen ] - Resource Group: BC-group-1 - B-1 (ocf::rgk:typeB): Started elvis - C-1 (ocf::rgk:typeC): Started elvis -@@ -13,7 +13,7 @@ Online: [ elvis queen ] - C-2 (ocf::rgk:typeC): Started elvis - Clone Set: stonith-l2network-set [stonith-l2network] - Started: [ elvis ] -- Stopped: [ stonith-l2network:1 ] -+ Stopped: [ queen ] - - Transition Summary: - * Start A:1 (queen) -diff --git a/pengine/test10/bug-lf-2606.summary b/pengine/test10/bug-lf-2606.summary -index 4d1dee3..535b3d9 100644 ---- a/pengine/test10/bug-lf-2606.summary -+++ b/pengine/test10/bug-lf-2606.summary -@@ -39,5 +39,5 @@ OFFLINE: [ node2 ] - rsc2 (ocf::pacemaker:Dummy): Started node1 - Master/Slave Set: ms3 [rsc3] - Slaves: [ node1 ] -- Stopped: [ rsc3:1 ] -+ Stopped: [ node2 ] - -diff --git a/pengine/test10/bug-lf-2619.summary b/pengine/test10/bug-lf-2619.summary -index e6defdc..ad94dd4 100644 ---- a/pengine/test10/bug-lf-2619.summary -+++ b/pengine/test10/bug-lf-2619.summary -@@ -24,7 +24,7 @@ Online: [ act1 act2 act3 sby1 sby2 ] - prmIpPostgreSQLDB3 (ocf::pacemaker:Dummy): Started act3 - prmApPostgreSQLDB3 (ocf::pacemaker:Dummy): Started act3 - Clone Set: clnPingd [prmPingd] -- prmPingd:0 (ocf::pacemaker:ping): Started act1 FAILED -+ prmPingd (ocf::pacemaker:ping): Started act1 FAILED - Started: [ act2 act3 sby1 sby2 ] - - Transition Summary: -@@ -95,5 +95,5 @@ Online: [ act1 act2 act3 sby1 sby2 ] - prmApPostgreSQLDB3 (ocf::pacemaker:Dummy): Started act3 - Clone Set: clnPingd [prmPingd] - Started: [ act2 act3 sby1 sby2 ] -- Stopped: [ prmPingd:4 ] -+ Stopped: [ act1 ] - -diff --git a/pengine/test10/bug-suse-707150.summary b/pengine/test10/bug-suse-707150.summary -index 697c6a6..6d1fcdf 100644 ---- a/pengine/test10/bug-suse-707150.summary -+++ b/pengine/test10/bug-suse-707150.summary -@@ -6,13 +6,13 @@ OFFLINE: [ hex-7 hex-8 ] - vm-00 (ocf::heartbeat:Xen): Stopped - Clone Set: base-clone [base-group] - Resource Group: base-group:0 -- dlm:0 (ocf::pacemaker:controld): Started hex-0 -- o2cb:0 (ocf::ocfs2:o2cb): Stopped -- clvm:0 (ocf::lvm2:clvmd): Stopped -- cmirrord:0 (ocf::lvm2:cmirrord): Stopped -- vg1:0 (ocf::heartbeat:LVM): Stopped -- ocfs2-1:0 (ocf::heartbeat:Filesystem): Stopped -- Stopped: [ base-group:1 base-group:2 base-group:3 ] -+ dlm (ocf::pacemaker:controld): Started hex-0 -+ o2cb (ocf::ocfs2:o2cb): Stopped -+ clvm (ocf::lvm2:clvmd): Stopped -+ cmirrord (ocf::lvm2:cmirrord): Stopped -+ vg1 (ocf::heartbeat:LVM): Stopped -+ ocfs2-1 (ocf::heartbeat:Filesystem): Stopped -+ Stopped: [ hex-7 hex-8 hex-9 ] - vm-01 (ocf::heartbeat:Xen): Stopped - fencing-sbd (stonith:external/sbd): Started hex-9 - dummy1 (ocf::heartbeat:Dummy): Started hex-0 -@@ -52,20 +52,20 @@ OFFLINE: [ hex-7 hex-8 ] - vm-00 (ocf::heartbeat:Xen): Stopped - Clone Set: base-clone [base-group] - Resource Group: base-group:0 -- dlm:0 (ocf::pacemaker:controld): Started hex-0 -- o2cb:0 (ocf::ocfs2:o2cb): Started hex-0 -- clvm:0 (ocf::lvm2:clvmd): Started hex-0 -- cmirrord:0 (ocf::lvm2:cmirrord): Started hex-0 -- vg1:0 (ocf::heartbeat:LVM): Stopped -- ocfs2-1:0 (ocf::heartbeat:Filesystem): Stopped -+ dlm (ocf::pacemaker:controld): Started hex-0 -+ o2cb (ocf::ocfs2:o2cb): Started hex-0 -+ clvm (ocf::lvm2:clvmd): Started hex-0 -+ cmirrord (ocf::lvm2:cmirrord): Started hex-0 -+ vg1 (ocf::heartbeat:LVM): Stopped -+ ocfs2-1 (ocf::heartbeat:Filesystem): Stopped - Resource Group: base-group:1 -- dlm:1 (ocf::pacemaker:controld): Started hex-9 -- o2cb:1 (ocf::ocfs2:o2cb): Started hex-9 -- clvm:1 (ocf::lvm2:clvmd): Started hex-9 -- cmirrord:1 (ocf::lvm2:cmirrord): Started hex-9 -- vg1:1 (ocf::heartbeat:LVM): Stopped -- ocfs2-1:1 (ocf::heartbeat:Filesystem): Stopped -- Stopped: [ base-group:2 base-group:3 ] -+ dlm (ocf::pacemaker:controld): Started hex-9 -+ o2cb (ocf::ocfs2:o2cb): Started hex-9 -+ clvm (ocf::lvm2:clvmd): Started hex-9 -+ cmirrord (ocf::lvm2:cmirrord): Started hex-9 -+ vg1 (ocf::heartbeat:LVM): Stopped -+ ocfs2-1 (ocf::heartbeat:Filesystem): Stopped -+ Stopped: [ hex-7 hex-8 ] - vm-01 (ocf::heartbeat:Xen): Stopped - fencing-sbd (stonith:external/sbd): Started hex-9 - dummy1 (ocf::heartbeat:Dummy): Started hex-0 -diff --git a/pengine/test10/clone-anon-dup.dot b/pengine/test10/clone-anon-dup.dot -index 342ad3a..85c7fcc 100644 ---- a/pengine/test10/clone-anon-dup.dot -+++ b/pengine/test10/clone-anon-dup.dot -@@ -1,12 +1,6 @@ - digraph "g" { - "all_stopped" [ style=bold color="green" fontcolor="orange" ] --"apache2:0_delete_0 wc01" [ style=bold color="green" fontcolor="black"] --"apache2:0_delete_0 wc02" [ style=bold color="green" fontcolor="black"] --"apache2:0_delete_0 wc03" [ style=bold color="green" fontcolor="black"] - "apache2:0_stop_0 wc02" -> "all_stopped" [ style = bold] --"apache2:0_stop_0 wc02" -> "apache2:0_delete_0 wc01" [ style = bold] --"apache2:0_stop_0 wc02" -> "apache2:0_delete_0 wc02" [ style = bold] --"apache2:0_stop_0 wc02" -> "apache2:0_delete_0 wc03" [ style = bold] - "apache2:0_stop_0 wc02" -> "group_webservice:2_stopped_0" [ style = bold] - "apache2:0_stop_0 wc02" [ style=bold color="green" fontcolor="black"] - "clone_webservice_stop_0" -> "clone_webservice_stopped_0" [ style = bold] -diff --git a/pengine/test10/clone-anon-dup.exp b/pengine/test10/clone-anon-dup.exp -index e06b55a..69ed601 100644 ---- a/pengine/test10/clone-anon-dup.exp -+++ b/pengine/test10/clone-anon-dup.exp -@@ -1,20 +1,20 @@ - - - -- -+ - - - - - - -- -+ - - - - - -- -+ - - - -@@ -23,7 +23,7 @@ - - - -- -+ - - - -@@ -32,7 +32,7 @@ - - - -- -+ - - - -@@ -41,164 +41,125 @@ - - - -- -+ - - - - - -- -+ - - -- -+ - - - - - -- -+ - - - - - -- -+ - - - - - -- -+ - - - - - - -- -+ - - -- -+ - - - -- -+ - -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -+ - - - - - -- -+ - - -- -+ - - - -- -+ - -- -+ - - - - - -- -+ - -- -+ - - - - - -- -+ - - - -- -+ - -- -+ - - - - - -- -+ - - - -- -+ - -- -+ - - - - - -- -+ - - - -- -+ - -- -+ - - - - - -- -+ - - -- -+ - - -- -+ - - - -- -+ - - - -@@ -206,7 +167,7 @@ - - - -- -+ - - - -diff --git a/pengine/test10/clone-anon-dup.summary b/pengine/test10/clone-anon-dup.summary -index ed127c3..bffbf15 100644 ---- a/pengine/test10/clone-anon-dup.summary -+++ b/pengine/test10/clone-anon-dup.summary -@@ -2,12 +2,13 @@ - Current cluster status: - Online: [ wc01 wc02 wc03 ] - -- stonith-1 (stonith:dummy): Stopped -+ stonith-1 (stonith:dummy): Stopped - Clone Set: clone_webservice [group_webservice] - Resource Group: group_webservice:2 -- fs_www:2 (ocf::heartbeat:Filesystem): ORPHANED Stopped -- apache2:2 (ocf::heartbeat:apache): ORPHANED Started wc02 -+ fs_www (ocf::heartbeat:Filesystem): ORPHANED Stopped -+ apache2 (ocf::heartbeat:apache): ORPHANED Started wc02 - Started: [ wc01 wc02 ] -+ Stopped: [ wc03 ] - - Transition Summary: - * Start stonith-1 (wc01) -@@ -22,9 +23,6 @@ Executing cluster transition: - * Resource action: stonith-1 start on wc01 - * Pseudo action: group_webservice:2_stop_0 - * Resource action: apache2:0 stop on wc02 -- * Resource action: apache2:0 delete on wc03 -- * Resource action: apache2:0 delete on wc01 -- * Resource action: apache2:0 delete on wc02 - * Pseudo action: all_stopped - * Pseudo action: group_webservice:2_stopped_0 - * Pseudo action: clone_webservice_stopped_0 -@@ -35,4 +33,5 @@ Online: [ wc01 wc02 wc03 ] - stonith-1 (stonith:dummy): Started wc01 - Clone Set: clone_webservice [group_webservice] - Started: [ wc01 wc02 ] -+ Stopped: [ wc03 ] - -diff --git a/pengine/test10/clone-anon-failcount.summary b/pengine/test10/clone-anon-failcount.summary -index 1f8c853..7e899b3 100644 ---- a/pengine/test10/clone-anon-failcount.summary -+++ b/pengine/test10/clone-anon-failcount.summary -@@ -23,9 +23,10 @@ Online: [ srv01 srv02 srv03 srv04 ] - prmStonithN4 (stonith:external/ssh): Started srv03 - Clone Set: clnUMgroup01 [clnUmResource] - Resource Group: clnUmResource:0 -- clnUMdummy01:0 (ocf::pacemaker:Dummy): Started srv04 FAILED -- clnUMdummy02:0 (ocf::pacemaker:Dummy): Started srv04 -+ clnUMdummy01 (ocf::pacemaker:Dummy): Started srv04 FAILED -+ clnUMdummy02 (ocf::pacemaker:Dummy): Started srv04 - Started: [ srv01 ] -+ Stopped: [ srv02 srv03 ] - Clone Set: clnPingd [clnPrmPingd] - Started: [ srv01 srv02 srv03 srv04 ] - Clone Set: clnDiskd1 [clnPrmDiskd1] -@@ -105,7 +106,7 @@ Online: [ srv01 srv02 srv03 srv04 ] - prmStonithN4 (stonith:external/ssh): Started srv03 - Clone Set: clnUMgroup01 [clnUmResource] - Started: [ srv04 ] -- Stopped: [ clnUmResource:1 ] -+ Stopped: [ srv01 srv02 srv03 ] - Clone Set: clnPingd [clnPrmPingd] - Started: [ srv01 srv02 srv03 srv04 ] - Clone Set: clnDiskd1 [clnPrmDiskd1] -diff --git a/pengine/test10/clone-anon-probe-1.summary b/pengine/test10/clone-anon-probe-1.summary -index 7567efa..093f59b 100644 ---- a/pengine/test10/clone-anon-probe-1.summary -+++ b/pengine/test10/clone-anon-probe-1.summary -@@ -3,7 +3,7 @@ Current cluster status: - Online: [ mysql-01 mysql-02 ] - - Clone Set: ms-drbd0 [drbd0] -- Stopped: [ drbd0:0 drbd0:1 ] -+ Stopped: [ mysql-01 mysql-02 ] - - Transition Summary: - * Start drbd0:0 (mysql-01) -diff --git a/pengine/test10/clone-anon-probe-2.summary b/pengine/test10/clone-anon-probe-2.summary -index ce278b1..7064e86 100644 ---- a/pengine/test10/clone-anon-probe-2.summary -+++ b/pengine/test10/clone-anon-probe-2.summary -@@ -4,7 +4,7 @@ Online: [ mysql-01 mysql-02 ] - - Clone Set: ms-drbd0 [drbd0] - Started: [ mysql-02 ] -- Stopped: [ drbd0:1 ] -+ Stopped: [ mysql-01 ] - - Transition Summary: - * Start drbd0:1 (mysql-01) -diff --git a/pengine/test10/clone-colocate-instance-1.summary b/pengine/test10/clone-colocate-instance-1.summary -index 4cc23cc..d6eaa4f 100644 ---- a/pengine/test10/clone-colocate-instance-1.summary -+++ b/pengine/test10/clone-colocate-instance-1.summary -@@ -5,7 +5,7 @@ Online: [ alice.demo bob.demo ] - dummy1 (ocf::heartbeat:Dummy): Stopped - dummy2 (ocf::heartbeat:Dummy): Stopped - Clone Set: cl_dummy [dummy] -- Stopped: [ dummy:0 dummy:1 ] -+ Stopped: [ alice.demo bob.demo ] - - Transition Summary: - * Start dummy1 (alice.demo) -diff --git a/pengine/test10/clone-colocate-instance-2.summary b/pengine/test10/clone-colocate-instance-2.summary -index 95a64fd..10b380a 100644 ---- a/pengine/test10/clone-colocate-instance-2.summary -+++ b/pengine/test10/clone-colocate-instance-2.summary -@@ -5,7 +5,7 @@ Online: [ alice.demo bob.demo ] - dummy1 (ocf::heartbeat:Dummy): Stopped - dummy2 (ocf::heartbeat:Dummy): Stopped - Clone Set: cl_dummy [dummy] -- Stopped: [ dummy:0 dummy:1 ] -+ Stopped: [ alice.demo bob.demo ] - - Transition Summary: - * Start dummy1 (bob.demo) -diff --git a/pengine/test10/clone-max-zero.dot b/pengine/test10/clone-max-zero.dot -index f4d11bd..33cec0a 100644 ---- a/pengine/test10/clone-max-zero.dot -+++ b/pengine/test10/clone-max-zero.dot -@@ -11,19 +11,11 @@ digraph "g" { - "dlm-clone_stop_0" -> "dlm:1_stop_0 c001n12" [ style = bold] - "dlm-clone_stop_0" [ style=bold color="green" fontcolor="orange" ] - "dlm-clone_stopped_0" [ style=bold color="green" fontcolor="orange" ] --"dlm:0_delete_0 c001n11" [ style=bold color="green" fontcolor="black"] --"dlm:0_delete_0 c001n12" [ style=bold color="green" fontcolor="black"] - "dlm:0_stop_0 c001n11" -> "all_stopped" [ style = bold] - "dlm:0_stop_0 c001n11" -> "dlm-clone_stopped_0" [ style = bold] --"dlm:0_stop_0 c001n11" -> "dlm:0_delete_0 c001n11" [ style = bold] --"dlm:0_stop_0 c001n11" -> "dlm:0_delete_0 c001n12" [ style = bold] - "dlm:0_stop_0 c001n11" [ style=bold color="green" fontcolor="black"] --"dlm:1_delete_0 c001n11" [ style=bold color="green" fontcolor="black"] --"dlm:1_delete_0 c001n12" [ style=bold color="green" fontcolor="black"] - "dlm:1_stop_0 c001n12" -> "all_stopped" [ style = bold] - "dlm:1_stop_0 c001n12" -> "dlm-clone_stopped_0" [ style = bold] --"dlm:1_stop_0 c001n12" -> "dlm:1_delete_0 c001n11" [ style = bold] --"dlm:1_stop_0 c001n12" -> "dlm:1_delete_0 c001n12" [ style = bold] - "dlm:1_stop_0 c001n12" [ style=bold color="green" fontcolor="black"] - "o2cb-clone_stop_0" -> "o2cb-clone_stopped_0" [ style = bold] - "o2cb-clone_stop_0" -> "o2cb:0_stop_0 c001n11" [ style = bold] -diff --git a/pengine/test10/clone-max-zero.exp b/pengine/test10/clone-max-zero.exp -index 274c117..d071c4b 100644 ---- a/pengine/test10/clone-max-zero.exp -+++ b/pengine/test10/clone-max-zero.exp -@@ -1,233 +1,181 @@ - - - -- -+ - - - - - - -- -+ - - -- -+ - - - - - -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -+ - - - - - - -- -+ - -- -- -- -- -- -- -- -- -- -- - -- -+ - - - -- -+ - -- -+ - - - - - -- -+ - - -- -+ - - -- -+ - - - -- -+ - -- -+ - - - - - -- -+ - - - -- -+ - -- -+ - - - - - - -- -+ - - -- -+ - - - -- -+ - -- -+ - - - - - - -- -+ - - -- -+ - - - -- -+ - -- -+ - - - - - -- -+ - - -- -+ - - -- -+ - - - -- -+ - -- -+ - - - - - -- -+ - - - -- -+ - -- -+ - - - - - - -- -+ - - - -- -+ - -- -+ - - - - - - -- -+ - - - -- -+ - -- -+ - - - - - -- -+ - - -- -+ - - -- -+ - - - -- -+ - -- -+ - - - - - -- -+ - - - -@@ -235,22 +183,22 @@ - - - -- -+ - - -- -+ - - -- -+ - - -- -+ - - -- -+ - - -- -+ - - - -diff --git a/pengine/test10/clone-max-zero.summary b/pengine/test10/clone-max-zero.summary -index 2a0b498..54829c8 100644 ---- a/pengine/test10/clone-max-zero.summary -+++ b/pengine/test10/clone-max-zero.summary -@@ -4,8 +4,8 @@ Online: [ c001n11 c001n12 ] - - fencing (stonith:external/ssh): Started c001n11 - Clone Set: dlm-clone [dlm] -- dlm:0 (ocf::pacemaker:controld): ORPHANED Started c001n12 -- dlm:1 (ocf::pacemaker:controld): ORPHANED Started c001n11 -+ dlm (ocf::pacemaker:controld): ORPHANED Started c001n12 -+ dlm (ocf::pacemaker:controld): ORPHANED Started c001n11 - Clone Set: o2cb-clone [o2cb] - Started: [ c001n11 c001n12 ] - Clone Set: clone-drbd0 [drbd0] -@@ -32,11 +32,7 @@ Executing cluster transition: - * Pseudo action: o2cb-clone_stopped_0 - * Pseudo action: dlm-clone_stop_0 - * Resource action: dlm:1 stop on c001n12 -- * Resource action: dlm:1 delete on c001n11 -- * Resource action: dlm:1 delete on c001n12 - * Resource action: dlm:0 stop on c001n11 -- * Resource action: dlm:0 delete on c001n11 -- * Resource action: dlm:0 delete on c001n12 - * Pseudo action: dlm-clone_stopped_0 - * Pseudo action: all_stopped - -@@ -45,10 +41,11 @@ Online: [ c001n11 c001n12 ] - - fencing (stonith:external/ssh): Started c001n11 - Clone Set: dlm-clone [dlm] -+ Stopped: [ c001n11 c001n12 ] - Clone Set: o2cb-clone [o2cb] -- Stopped: [ o2cb:0 o2cb:1 ] -+ Stopped: [ c001n11 c001n12 ] - Clone Set: clone-drbd0 [drbd0] - Started: [ c001n11 c001n12 ] - Clone Set: c-ocfs2-1 [ocfs2-1] -- Stopped: [ ocfs2-1:0 ocfs2-1:1 ] -+ Stopped: [ c001n11 c001n12 ] - -diff --git a/pengine/test10/clone-no-shuffle.summary b/pengine/test10/clone-no-shuffle.summary -index 618da52..a9f3f7a 100644 ---- a/pengine/test10/clone-no-shuffle.summary -+++ b/pengine/test10/clone-no-shuffle.summary -@@ -5,7 +5,7 @@ Online: [ dktest1sles10 dktest2sles10 ] - stonith-1 (stonith:dummy): Stopped - Master/Slave Set: ms-drbd1 [drbd1] - Masters: [ dktest2sles10 ] -- Stopped: [ drbd1:1 ] -+ Stopped: [ dktest1sles10 ] - testip (ocf::heartbeat:IPaddr2): Started dktest2sles10 - - Transition Summary: -@@ -56,6 +56,6 @@ Online: [ dktest1sles10 dktest2sles10 ] - stonith-1 (stonith:dummy): Started dktest1sles10 - Master/Slave Set: ms-drbd1 [drbd1] - Slaves: [ dktest1sles10 ] -- Stopped: [ drbd1:1 ] -+ Stopped: [ dktest2sles10 ] - testip (ocf::heartbeat:IPaddr2): Stopped - -diff --git a/pengine/test10/clone-order-instance.summary b/pengine/test10/clone-order-instance.summary -index 8df1c50..4946342 100644 ---- a/pengine/test10/clone-order-instance.summary -+++ b/pengine/test10/clone-order-instance.summary -@@ -4,7 +4,7 @@ Online: [ alice.demo bob.demo ] - - dummy1 (ocf::heartbeat:Dummy): Stopped - Clone Set: cl_dummy [dummy] -- Stopped: [ dummy:0 dummy:1 ] -+ Stopped: [ alice.demo bob.demo ] - - Transition Summary: - * Start dummy1 (alice.demo) -diff --git a/pengine/test10/clone-order-primitive.summary b/pengine/test10/clone-order-primitive.summary -index aecc65b..a628ade 100644 ---- a/pengine/test10/clone-order-primitive.summary -+++ b/pengine/test10/clone-order-primitive.summary -@@ -3,7 +3,7 @@ Current cluster status: - Online: [ pcw2058.see.ed.ac.uk pcw2059.see.ed.ac.uk pcw2688.see.ed.ac.uk pcw2709.see.ed.ac.uk ] - - Clone Set: cups_clone [cups_lsb] -- Stopped: [ cups_lsb:0 cups_lsb:1 ] -+ Stopped: [ pcw2058.see.ed.ac.uk pcw2059.see.ed.ac.uk pcw2688.see.ed.ac.uk pcw2709.see.ed.ac.uk ] - smb_lsb (lsb:smb): Stopped - - Transition Summary: -@@ -23,5 +23,6 @@ Online: [ pcw2058.see.ed.ac.uk pcw2059.see.ed.ac.uk pcw2688.see.ed.ac.uk pcw2709 - - Clone Set: cups_clone [cups_lsb] - Started: [ pcw2058.see.ed.ac.uk pcw2059.see.ed.ac.uk ] -+ Stopped: [ pcw2688.see.ed.ac.uk pcw2709.see.ed.ac.uk ] - smb_lsb (lsb:smb): Started pcw2688.see.ed.ac.uk - -diff --git a/pengine/test10/cloned-group.dot b/pengine/test10/cloned-group.dot -index 0d71821..cf6ec27 100644 ---- a/pengine/test10/cloned-group.dot -+++ b/pengine/test10/cloned-group.dot -@@ -1,8 +1,6 @@ - digraph "g" { - "all_stopped" [ style=bold color="green" fontcolor="orange" ] --"apache2:0_delete_0 webcluster01" [ style=bold color="green" fontcolor="black"] - "apache2:0_stop_0 webcluster01" -> "all_stopped" [ style = bold] --"apache2:0_stop_0 webcluster01" -> "apache2:0_delete_0 webcluster01" [ style = bold] - "apache2:0_stop_0 webcluster01" -> "grrr:2_stopped_0" [ style = bold] - "apache2:0_stop_0 webcluster01" [ style=bold color="green" fontcolor="black"] - "apache2:1_monitor_10000 webcluster01" [ style=bold color="green" fontcolor="black" ] -@@ -43,11 +41,9 @@ digraph "g" { - "grrr:2_stop_0" [ style=bold color="green" fontcolor="orange" ] - "grrr:2_stopped_0" -> "apache2_clone_stopped_0" [ style = bold] - "grrr:2_stopped_0" [ style=bold color="green" fontcolor="orange" ] --"mysql-proxy:0_delete_0 webcluster01" [ style=bold color="green" fontcolor="black"] - "mysql-proxy:0_stop_0 webcluster01" -> "all_stopped" [ style = bold] - "mysql-proxy:0_stop_0 webcluster01" -> "apache2:0_stop_0 webcluster01" [ style = bold] - "mysql-proxy:0_stop_0 webcluster01" -> "grrr:2_stopped_0" [ style = bold] --"mysql-proxy:0_stop_0 webcluster01" -> "mysql-proxy:0_delete_0 webcluster01" [ style = bold] - "mysql-proxy:0_stop_0 webcluster01" [ style=bold color="green" fontcolor="black"] - "mysql-proxy:1_monitor_10000 webcluster01" [ style=bold color="green" fontcolor="black" ] - "mysql-proxy:1_start_0 webcluster01" -> "grrr:0_running_0" [ style = bold] -diff --git a/pengine/test10/cloned-group.exp b/pengine/test10/cloned-group.exp -index 0a833b8..1e2ee27 100644 ---- a/pengine/test10/cloned-group.exp -+++ b/pengine/test10/cloned-group.exp -@@ -1,96 +1,96 @@ - - - -- -+ - - - - - -- -+ - - -- -+ - - -- -+ - - - - - -- -+ - - - - - -- -+ - - - - - -- -+ - - - - - -- -+ - - -- -+ - - -- -+ - - - - - -- -+ - - - - - -- -+ - - -- -+ - - - - - -- -+ - - - - - - -- -+ - - -- -+ - - - - - -- -+ - - - - - - -- -+ - - -- -+ - - - -@@ -103,39 +103,39 @@ - - - -- -+ - - - - - -- -+ - - - - - - -- -+ - - -- -+ - - -- -+ - - - - - -- -+ - - - - - - -- -+ - - - -@@ -148,149 +148,123 @@ -
- - -- -+ - - -
- - -- -+ - - - - - -- -+ - - -- -+ - - -- -+ - - - - - -- -+ - - - - - -- -+ - - - - - -- -+ - - - - - - -- -+ - - -- -+ - - - - - -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -+ - - - - - - -- -+ - - - -- -+ - -- -+ - - - - - -- -+ - - -- -+ - - -- -+ - - - -- -+ - -- -+ - - - - - -- -+ - -- -+ - - - - - -- -+ - - -- -+ - - - -- -+ - -- -+ - - - - - -- -+ - - - -- -+ - - - -@@ -298,16 +272,16 @@ - - - -- -+ - - -- -+ - - -- -+ - - -- -+ - - - -diff --git a/pengine/test10/cloned-group.summary b/pengine/test10/cloned-group.summary -index 1ec8e79..7d64be4 100644 ---- a/pengine/test10/cloned-group.summary -+++ b/pengine/test10/cloned-group.summary -@@ -5,10 +5,10 @@ OFFLINE: [ webcluster02 ] - - Clone Set: apache2_clone [grrr] - Resource Group: grrr:2 -- apache2:2 (ocf::heartbeat:apache): ORPHANED Started webcluster01 -- mysql-proxy:2 (lsb:mysql-proxy): ORPHANED Started webcluster01 -+ apache2 (ocf::heartbeat:apache): ORPHANED Started webcluster01 -+ mysql-proxy (lsb:mysql-proxy): ORPHANED Started webcluster01 - Started: [ webcluster01 ] -- Stopped: [ grrr:1 ] -+ Stopped: [ webcluster02 ] - - Transition Summary: - * Restart apache2:0 (Started webcluster01) -@@ -22,10 +22,8 @@ Executing cluster transition: - * Resource action: mysql-proxy:1 stop on webcluster01 - * Pseudo action: grrr:2_stop_0 - * Resource action: mysql-proxy:0 stop on webcluster01 -- * Resource action: mysql-proxy:0 delete on webcluster01 - * Resource action: apache2:1 stop on webcluster01 - * Resource action: apache2:0 stop on webcluster01 -- * Resource action: apache2:0 delete on webcluster01 - * Pseudo action: all_stopped - * Pseudo action: grrr:0_stopped_0 - * Pseudo action: grrr:2_stopped_0 -@@ -45,5 +43,5 @@ OFFLINE: [ webcluster02 ] - - Clone Set: apache2_clone [grrr] - Started: [ webcluster01 ] -- Stopped: [ grrr:1 ] -+ Stopped: [ webcluster02 ] - -diff --git a/pengine/test10/coloc-clone-stays-active.summary b/pengine/test10/coloc-clone-stays-active.summary -index a00c775..b5edc92 100644 ---- a/pengine/test10/coloc-clone-stays-active.summary -+++ b/pengine/test10/coloc-clone-stays-active.summary -@@ -29,7 +29,7 @@ Online: [ s01-0 s01-1 ] - Masters: [ s01-1 ] - Slaves: [ s01-0 ] - Clone Set: cl-o2cb [o2cb] -- Stopped: [ o2cb:0 o2cb:1 ] -+ Stopped: [ s01-0 s01-1 ] - Master/Slave Set: ms-drbd-s01-service [drbd-s01-service] - Masters: [ s01-0 s01-1 ] - Clone Set: cl-s01-service-fs [s01-service-fs] -@@ -37,7 +37,7 @@ Online: [ s01-0 s01-1 ] - Clone Set: cl-ietd [ietd] - Started: [ s01-0 s01-1 ] - Clone Set: cl-dhcpd [dhcpd] -- Stopped: [ dhcpd:0 dhcpd:1 ] -+ Stopped: [ s01-0 s01-1 ] - Resource Group: http-server - vip-233 (ocf::heartbeat:IPaddr2): Started s01-0 - nginx (lsb:nginx): Stopped -@@ -73,7 +73,7 @@ Online: [ s01-0 s01-1 ] - Master/Slave Set: ms-drbd-vds-tftpboot [drbd-vds-tftpboot] - Masters: [ s01-0 s01-1 ] - Clone Set: cl-vds-tftpboot-fs [vds-tftpboot-fs] -- Stopped: [ vds-tftpboot-fs:0 vds-tftpboot-fs:1 ] -+ Stopped: [ s01-0 s01-1 ] - Clone Set: cl-gfs2 [gfs2] - Started: [ s01-0 s01-1 ] - Master/Slave Set: ms-drbd-vds-http [drbd-vds-http] -@@ -137,7 +137,7 @@ Online: [ s01-0 s01-1 ] - Masters: [ s01-1 ] - Slaves: [ s01-0 ] - Clone Set: cl-o2cb [o2cb] -- Stopped: [ o2cb:0 o2cb:1 ] -+ Stopped: [ s01-0 s01-1 ] - Master/Slave Set: ms-drbd-s01-service [drbd-s01-service] - Masters: [ s01-0 s01-1 ] - Clone Set: cl-s01-service-fs [s01-service-fs] -@@ -145,7 +145,7 @@ Online: [ s01-0 s01-1 ] - Clone Set: cl-ietd [ietd] - Started: [ s01-0 s01-1 ] - Clone Set: cl-dhcpd [dhcpd] -- Stopped: [ dhcpd:0 dhcpd:1 ] -+ Stopped: [ s01-0 s01-1 ] - Resource Group: http-server - vip-233 (ocf::heartbeat:IPaddr2): Started s01-0 - nginx (lsb:nginx): Stopped -@@ -181,7 +181,7 @@ Online: [ s01-0 s01-1 ] - Master/Slave Set: ms-drbd-vds-tftpboot [drbd-vds-tftpboot] - Masters: [ s01-0 s01-1 ] - Clone Set: cl-vds-tftpboot-fs [vds-tftpboot-fs] -- Stopped: [ vds-tftpboot-fs:0 vds-tftpboot-fs:1 ] -+ Stopped: [ s01-0 s01-1 ] - Clone Set: cl-gfs2 [gfs2] - Started: [ s01-0 s01-1 ] - Master/Slave Set: ms-drbd-vds-http [drbd-vds-http] -diff --git a/pengine/test10/colocate-primitive-with-clone.summary b/pengine/test10/colocate-primitive-with-clone.summary -index 41f95ea..5e4c511 100644 ---- a/pengine/test10/colocate-primitive-with-clone.summary -+++ b/pengine/test10/colocate-primitive-with-clone.summary -@@ -38,19 +38,19 @@ Online: [ srv01 srv02 srv03 srv04 ] - prmStonithN4 (stonith:external/ssh): Started srv03 - Clone Set: clnUMgroup01 [clnUmResource] - Started: [ srv04 ] -- Stopped: [ clnUmResource:1 ] -+ Stopped: [ srv01 srv02 srv03 ] - Clone Set: clnPingd [clnPrmPingd] - Started: [ srv02 srv03 srv04 ] -- Stopped: [ clnPrmPingd:3 ] -+ Stopped: [ srv01 ] - Clone Set: clnDiskd1 [clnPrmDiskd1] - Started: [ srv02 srv03 srv04 ] -- Stopped: [ clnPrmDiskd1:3 ] -+ Stopped: [ srv01 ] - Clone Set: clnG3dummy1 [clnG3dummy01] - Started: [ srv02 srv03 srv04 ] -- Stopped: [ clnG3dummy01:3 ] -+ Stopped: [ srv01 ] - Clone Set: clnG3dummy2 [clnG3dummy02] - Started: [ srv02 srv03 srv04 ] -- Stopped: [ clnG3dummy02:3 ] -+ Stopped: [ srv01 ] - - Transition Summary: - * Start UmVIPcheck (srv04) -@@ -109,17 +109,17 @@ Online: [ srv01 srv02 srv03 srv04 ] - prmStonithN4 (stonith:external/ssh): Started srv03 - Clone Set: clnUMgroup01 [clnUmResource] - Started: [ srv04 ] -- Stopped: [ clnUmResource:1 ] -+ Stopped: [ srv01 srv02 srv03 ] - Clone Set: clnPingd [clnPrmPingd] - Started: [ srv02 srv03 srv04 ] -- Stopped: [ clnPrmPingd:3 ] -+ Stopped: [ srv01 ] - Clone Set: clnDiskd1 [clnPrmDiskd1] - Started: [ srv02 srv03 srv04 ] -- Stopped: [ clnPrmDiskd1:3 ] -+ Stopped: [ srv01 ] - Clone Set: clnG3dummy1 [clnG3dummy01] - Started: [ srv02 srv03 srv04 ] -- Stopped: [ clnG3dummy01:3 ] -+ Stopped: [ srv01 ] - Clone Set: clnG3dummy2 [clnG3dummy02] - Started: [ srv02 srv03 srv04 ] -- Stopped: [ clnG3dummy02:3 ] -+ Stopped: [ srv01 ] - -diff --git a/pengine/test10/colocation_constraint_stops_master.summary b/pengine/test10/colocation_constraint_stops_master.summary -index c052861..1a80c4f 100644 ---- a/pengine/test10/colocation_constraint_stops_master.summary -+++ b/pengine/test10/colocation_constraint_stops_master.summary -@@ -4,6 +4,7 @@ Online: [ fc16-builder fc16-builder2 ] - - Master/Slave Set: MASTER_RSC_A [NATIVE_RSC_A] - Masters: [ fc16-builder ] -+ Stopped: [ fc16-builder2 ] - - Transition Summary: - * Demote NATIVE_RSC_A:0 (Master -> Stopped fc16-builder) -@@ -32,5 +33,5 @@ Revised cluster status: - Online: [ fc16-builder fc16-builder2 ] - - Master/Slave Set: MASTER_RSC_A [NATIVE_RSC_A] -- Stopped: [ NATIVE_RSC_A:0 ] -+ Stopped: [ fc16-builder fc16-builder2 ] - -diff --git a/pengine/test10/colocation_constraint_stops_slave.summary b/pengine/test10/colocation_constraint_stops_slave.summary -index 5528308..f928563 100644 ---- a/pengine/test10/colocation_constraint_stops_slave.summary -+++ b/pengine/test10/colocation_constraint_stops_slave.summary -@@ -5,6 +5,7 @@ OFFLINE: [ fc16-builder2 ] - - Master/Slave Set: MASTER_RSC_A [NATIVE_RSC_A] - Slaves: [ fc16-builder ] -+ Stopped: [ fc16-builder2 ] - NATIVE_RSC_B (ocf::pacemaker:Dummy): Started fc16-builder - - Transition Summary: -@@ -28,6 +29,6 @@ Online: [ fc16-builder ] - OFFLINE: [ fc16-builder2 ] - - Master/Slave Set: MASTER_RSC_A [NATIVE_RSC_A] -- Stopped: [ NATIVE_RSC_A:0 ] -+ Stopped: [ fc16-builder fc16-builder2 ] - NATIVE_RSC_B (ocf::pacemaker:Dummy): Stopped - -diff --git a/pengine/test10/date-1.exp b/pengine/test10/date-1.exp -index f3deedf..2a64721 100644 ---- a/pengine/test10/date-1.exp -+++ b/pengine/test10/date-1.exp -@@ -25,7 +25,7 @@ - - - -- -+ - - - -diff --git a/pengine/test10/failcount-block.dot b/pengine/test10/failcount-block.dot -new file mode 100644 -index 0000000..c45ebad ---- /dev/null -+++ b/pengine/test10/failcount-block.dot -@@ -0,0 +1,12 @@ -+digraph "g" { -+"rsc_pcmk-1_clear_failcount pcmk-1" [ style=bold color="green" fontcolor="black"] -+"rsc_pcmk-1_monitor_5000 pcmk-1" [ style=bold color="green" fontcolor="black"] -+"rsc_pcmk-3_clear_failcount pcmk-1" [ style=bold color="green" fontcolor="black"] -+"rsc_pcmk-3_monitor_5000 pcmk-1" [ style=bold color="green" fontcolor="black"] -+"rsc_pcmk-3_start_0 pcmk-1" -> "rsc_pcmk-3_monitor_5000 pcmk-1" [ style = bold] -+"rsc_pcmk-3_start_0 pcmk-1" [ style=bold color="green" fontcolor="black"] -+"rsc_pcmk-4_monitor_5000 pcmk-1" [ style=bold color="green" fontcolor="black"] -+"rsc_pcmk-4_start_0 pcmk-1" -> "rsc_pcmk-4_monitor_5000 pcmk-1" [ style = bold] -+"rsc_pcmk-4_start_0 pcmk-1" [ style=bold color="green" fontcolor="black"] -+"rsc_pcmk-5_clear_failcount pcmk-1" [ style=bold color="green" fontcolor="black"] -+} -diff --git a/pengine/test10/failcount-block.exp b/pengine/test10/failcount-block.exp -new file mode 100644 -index 0000000..9ac63a2 ---- /dev/null -+++ b/pengine/test10/failcount-block.exp -@@ -0,0 +1,83 @@ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -diff --git a/pengine/test10/failcount-block.scores b/pengine/test10/failcount-block.scores -new file mode 100644 -index 0000000..09896aa ---- /dev/null -+++ b/pengine/test10/failcount-block.scores -@@ -0,0 +1,11 @@ -+Allocation scores: -+native_color: rsc_pcmk-1 allocation score on pcmk-1: 0 -+native_color: rsc_pcmk-1 allocation score on pcmk-4: 0 -+native_color: rsc_pcmk-2 allocation score on pcmk-1: INFINITY -+native_color: rsc_pcmk-2 allocation score on pcmk-4: 0 -+native_color: rsc_pcmk-3 allocation score on pcmk-1: 0 -+native_color: rsc_pcmk-3 allocation score on pcmk-4: 0 -+native_color: rsc_pcmk-4 allocation score on pcmk-1: 0 -+native_color: rsc_pcmk-4 allocation score on pcmk-4: 0 -+native_color: rsc_pcmk-5 allocation score on pcmk-1: 0 -+native_color: rsc_pcmk-5 allocation score on pcmk-4: 0 -diff --git a/pengine/test10/failcount-block.summary b/pengine/test10/failcount-block.summary -new file mode 100644 -index 0000000..93a2022 ---- /dev/null -+++ b/pengine/test10/failcount-block.summary -@@ -0,0 +1,35 @@ -+ -+Current cluster status: -+Online: [ pcmk-1 ] -+OFFLINE: [ pcmk-4 ] -+ -+ rsc_pcmk-1 (ocf::heartbeat:IPaddr2): Started pcmk-1 -+ rsc_pcmk-2 (ocf::heartbeat:IPaddr2): Started pcmk-1 (unmanaged) FAILED -+ rsc_pcmk-3 (ocf::heartbeat:IPaddr2): Stopped -+ rsc_pcmk-4 (ocf::heartbeat:IPaddr2): Stopped -+ rsc_pcmk-5 (ocf::heartbeat:IPaddr2): Started pcmk-1 -+ -+Transition Summary: -+ * Start rsc_pcmk-3 (pcmk-1) -+ * Start rsc_pcmk-4 (pcmk-1) -+ -+Executing cluster transition: -+ * Resource action: rsc_pcmk-1 monitor=5000 on pcmk-1 -+ * Cluster action: clear_failcount for rsc_pcmk-1 on pcmk-1 -+ * Resource action: rsc_pcmk-3 start on pcmk-1 -+ * Cluster action: clear_failcount for rsc_pcmk-3 on pcmk-1 -+ * Resource action: rsc_pcmk-4 start on pcmk-1 -+ * Cluster action: clear_failcount for rsc_pcmk-5 on pcmk-1 -+ * Resource action: rsc_pcmk-3 monitor=5000 on pcmk-1 -+ * Resource action: rsc_pcmk-4 monitor=5000 on pcmk-1 -+ -+Revised cluster status: -+Online: [ pcmk-1 ] -+OFFLINE: [ pcmk-4 ] -+ -+ rsc_pcmk-1 (ocf::heartbeat:IPaddr2): Started pcmk-1 -+ rsc_pcmk-2 (ocf::heartbeat:IPaddr2): Started pcmk-1 (unmanaged) FAILED -+ rsc_pcmk-3 (ocf::heartbeat:IPaddr2): Started pcmk-1 -+ rsc_pcmk-4 (ocf::heartbeat:IPaddr2): Started pcmk-1 -+ rsc_pcmk-5 (ocf::heartbeat:IPaddr2): Started pcmk-1 -+ -diff --git a/pengine/test10/failcount-block.xml b/pengine/test10/failcount-block.xml -new file mode 100644 -index 0000000..12d4937 ---- /dev/null -+++ b/pengine/test10/failcount-block.xml -@@ -0,0 +1,147 @@ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -diff --git a/pengine/test10/failcount.dot b/pengine/test10/failcount.dot -index aa7124e..41966ad 100644 ---- a/pengine/test10/failcount.dot -+++ b/pengine/test10/failcount.dot -@@ -1,22 +1,6 @@ - digraph "g" { --"re-dummy1_delete_0 dresproddns01" [ style=bold color="green" fontcolor="black"] --"re-dummy1_delete_0 dresproddns02" [ style=bold color="green" fontcolor="black"] --"re-dummy2_delete_0 dresproddns01" [ style=bold color="green" fontcolor="black"] --"re-dummy2_delete_0 dresproddns02" [ style=bold color="green" fontcolor="black"] --"re-dummy3_delete_0 dresproddns01" [ style=bold color="green" fontcolor="black"] --"re-dummy3_delete_0 dresproddns02" [ style=bold color="green" fontcolor="black"] --"re-dummy4_delete_0 dresproddns01" [ style=bold color="green" fontcolor="black"] --"re-dummy4_delete_0 dresproddns02" [ style=bold color="green" fontcolor="black"] --"re-dummy_delete_0 dresproddns01" [ style=bold color="green" fontcolor="black"] --"re-dummy_delete_0 dresproddns02" [ style=bold color="green" fontcolor="black"] --"re-monitoring-lv:0_delete_0 dresproddns01" [ style=bold color="green" fontcolor="black"] --"re-monitoring-lv:0_delete_0 dresproddns02" [ style=bold color="green" fontcolor="black"] --"re-monitoring-lv_delete_0 dresproddns01" [ style=bold color="green" fontcolor="black"] --"re-monitoring-lv_delete_0 dresproddns02" [ style=bold color="green" fontcolor="black"] --"re-named-103ns1-ip_delete_0 dresproddns01" [ style=bold color="green" fontcolor="black"] --"re-named-103ns1-ip_delete_0 dresproddns02" [ style=bold color="green" fontcolor="black"] --"re-named-103ns2-ip_delete_0 dresproddns01" [ style=bold color="green" fontcolor="black"] --"re-named-103ns2-ip_delete_0 dresproddns02" [ style=bold color="green" fontcolor="black"] -+"re-named-lsb:0_monitor_10000 dresproddns02" [ style=bold color="green" fontcolor="black"] -+"re-named-lsb:1_monitor_10000 dresproddns01" [ style=bold color="green" fontcolor="black"] - "re-openfire-lsb:0_clear_failcount dresproddns01" [ style=bold color="green" fontcolor="black"] - "re-openfire-lsb:1_clear_failcount dresproddns02" [ style=bold color="green" fontcolor="black"] - } -diff --git a/pengine/test10/failcount.exp b/pengine/test10/failcount.exp -index 396bdb1..9b7234b 100644 ---- a/pengine/test10/failcount.exp -+++ b/pengine/test10/failcount.exp -@@ -19,162 +19,18 @@ - - - -- -- -- -+ -+ -+ - - - - - - -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -+ -+ -+ - - - -diff --git a/pengine/test10/failcount.summary b/pengine/test10/failcount.summary -index 9ee764f..2a8f6a4 100644 ---- a/pengine/test10/failcount.summary -+++ b/pengine/test10/failcount.summary -@@ -28,26 +28,10 @@ Online: [ dresproddns01 dresproddns02 ] - Transition Summary: - - Executing cluster transition: -- * Cluster action: clear_failcount on dresproddns01 -- * Cluster action: clear_failcount on dresproddns02 -- * Resource action: re-named-103ns2-ip delete on dresproddns02 -- * Resource action: re-named-103ns2-ip delete on dresproddns01 -- * Resource action: re-monitoring-lv:0 delete on dresproddns02 -- * Resource action: re-monitoring-lv:0 delete on dresproddns01 -- * Resource action: re-dummy delete on dresproddns02 -- * Resource action: re-dummy delete on dresproddns01 -- * Resource action: re-dummy3 delete on dresproddns02 -- * Resource action: re-dummy3 delete on dresproddns01 -- * Resource action: re-dummy4 delete on dresproddns02 -- * Resource action: re-dummy4 delete on dresproddns01 -- * Resource action: re-dummy1 delete on dresproddns02 -- * Resource action: re-dummy1 delete on dresproddns01 -- * Resource action: re-monitoring-lv delete on dresproddns02 -- * Resource action: re-monitoring-lv delete on dresproddns01 -- * Resource action: re-dummy2 delete on dresproddns02 -- * Resource action: re-dummy2 delete on dresproddns01 -- * Resource action: re-named-103ns1-ip delete on dresproddns02 -- * Resource action: re-named-103ns1-ip delete on dresproddns01 -+ * Cluster action: clear_failcount for re-openfire-lsb on dresproddns01 -+ * Cluster action: clear_failcount for re-openfire-lsb on dresproddns02 -+ * Resource action: re-named-lsb:1 monitor=10000 on dresproddns01 -+ * Resource action: re-named-lsb:0 monitor=10000 on dresproddns02 - - Revised cluster status: - Online: [ dresproddns01 dresproddns02 ] -diff --git a/pengine/test10/failcount.xml b/pengine/test10/failcount.xml -index 16c07f2..5ac8a2c 100644 ---- a/pengine/test10/failcount.xml -+++ b/pengine/test10/failcount.xml -@@ -565,13 +565,6 @@ - - - -- -- -- -- -- -- -- - - - -@@ -580,32 +573,17 @@ - - - -- -- -- -- -- -- - - - -- -- -- - - - - -- -- -- - - - - -- -- -- - - - -@@ -621,9 +599,6 @@ - - - -- -- -- - - - -@@ -645,9 +620,6 @@ - - - -- -- -- - - - -@@ -711,9 +683,6 @@ - - - -- -- -- - - - -@@ -743,34 +712,16 @@ - - - -- -- -- - - - - -- -- -- -- -- -- - - - -- -- -- - - - -- -- -- -- -- -- - - - -@@ -787,12 +738,6 @@ - - - -- -- -- -- -- -- - - - -diff --git a/pengine/test10/group14.summary b/pengine/test10/group14.summary -index 0f6b089..8bea277 100644 ---- a/pengine/test10/group14.summary -+++ b/pengine/test10/group14.summary -@@ -17,7 +17,7 @@ OFFLINE: [ c001n02 c001n03 c001n04 c001n05 ] - rsc_c001n06 (ocf::heartbeat:IPaddr): Stopped - rsc_c001n07 (ocf::heartbeat:IPaddr): Stopped - Clone Set: DoFencing [child_DoFencing] -- Stopped: [ child_DoFencing:0 child_DoFencing:1 child_DoFencing:2 child_DoFencing:3 child_DoFencing:4 child_DoFencing:5 ] -+ Stopped: [ c001n02 c001n03 c001n04 c001n05 c001n06 c001n07 ] - Master/Slave Set: master_rsc_1 [ocf_msdummy] (unique) - ocf_msdummy:0 (ocf::heartbeat:Stateful): Stopped - ocf_msdummy:1 (ocf::heartbeat:Stateful): Stopped -@@ -84,7 +84,7 @@ OFFLINE: [ c001n02 c001n03 c001n04 c001n05 ] - rsc_c001n07 (ocf::heartbeat:IPaddr): Stopped - Clone Set: DoFencing [child_DoFencing] - Started: [ c001n06 c001n07 ] -- Stopped: [ child_DoFencing:2 child_DoFencing:3 child_DoFencing:4 child_DoFencing:5 ] -+ Stopped: [ c001n02 c001n03 c001n04 c001n05 ] - Master/Slave Set: master_rsc_1 [ocf_msdummy] (unique) - ocf_msdummy:0 (ocf::heartbeat:Stateful): Stopped - ocf_msdummy:1 (ocf::heartbeat:Stateful): Stopped -diff --git a/pengine/test10/history-1.summary b/pengine/test10/history-1.summary -index 8add326..6ae03e2 100644 ---- a/pengine/test10/history-1.summary -+++ b/pengine/test10/history-1.summary -@@ -5,7 +5,7 @@ OFFLINE: [ pcmk-4 ] - - Clone Set: Fencing [FencingChild] - Started: [ pcmk-1 pcmk-2 pcmk-3 ] -- Stopped: [ FencingChild:3 ] -+ Stopped: [ pcmk-4 ] - Resource Group: group-1 - r192.168.101.181 (ocf::heartbeat:IPaddr): Stopped - r192.168.101.182 (ocf::heartbeat:IPaddr): Stopped -@@ -18,10 +18,10 @@ OFFLINE: [ pcmk-4 ] - migrator (ocf::pacemaker:Dummy): Started pcmk-1 - Clone Set: Connectivity [ping-1] - Started: [ pcmk-1 pcmk-2 pcmk-3 ] -- Stopped: [ ping-1:3 ] -+ Stopped: [ pcmk-4 ] - Master/Slave Set: master-1 [stateful-1] - Slaves: [ pcmk-1 pcmk-2 pcmk-3 ] -- Stopped: [ stateful-1:3 ] -+ Stopped: [ pcmk-4 ] - - Transition Summary: - -@@ -33,7 +33,7 @@ OFFLINE: [ pcmk-4 ] - - Clone Set: Fencing [FencingChild] - Started: [ pcmk-1 pcmk-2 pcmk-3 ] -- Stopped: [ FencingChild:3 ] -+ Stopped: [ pcmk-4 ] - Resource Group: group-1 - r192.168.101.181 (ocf::heartbeat:IPaddr): Stopped - r192.168.101.182 (ocf::heartbeat:IPaddr): Stopped -@@ -46,8 +46,8 @@ OFFLINE: [ pcmk-4 ] - migrator (ocf::pacemaker:Dummy): Started pcmk-1 - Clone Set: Connectivity [ping-1] - Started: [ pcmk-1 pcmk-2 pcmk-3 ] -- Stopped: [ ping-1:3 ] -+ Stopped: [ pcmk-4 ] - Master/Slave Set: master-1 [stateful-1] - Slaves: [ pcmk-1 pcmk-2 pcmk-3 ] -- Stopped: [ stateful-1:3 ] -+ Stopped: [ pcmk-4 ] - -diff --git a/pengine/test10/honor_stonith_rsc_order1.summary b/pengine/test10/honor_stonith_rsc_order1.summary -index c9b0f57..a51aaa1 100644 ---- a/pengine/test10/honor_stonith_rsc_order1.summary -+++ b/pengine/test10/honor_stonith_rsc_order1.summary -@@ -3,7 +3,7 @@ Current cluster status: - Online: [ fc16-builder ] - - Clone Set: S_CLONE [S_A] -- Stopped: [ S_A:0 ] -+ Stopped: [ fc16-builder ] - Resource Group: S_GROUP - S_B (stonith:fence_xvm): Stopped - A (ocf::pacemaker:Dummy): Stopped -diff --git a/pengine/test10/honor_stonith_rsc_order2.summary b/pengine/test10/honor_stonith_rsc_order2.summary -index 3ff043d..983ff77 100644 ---- a/pengine/test10/honor_stonith_rsc_order2.summary -+++ b/pengine/test10/honor_stonith_rsc_order2.summary -@@ -3,7 +3,7 @@ Current cluster status: - Online: [ fc16-builder ] - - Clone Set: S_CLONE [S_A] -- Stopped: [ S_A:0 ] -+ Stopped: [ fc16-builder ] - Resource Group: S_GROUP - S_B (stonith:fence_xvm): Stopped - S_C (stonith:fence_xvm): Stopped -diff --git a/pengine/test10/honor_stonith_rsc_order3.summary b/pengine/test10/honor_stonith_rsc_order3.summary -index 14a37d6..12a9c6b 100644 ---- a/pengine/test10/honor_stonith_rsc_order3.summary -+++ b/pengine/test10/honor_stonith_rsc_order3.summary -@@ -3,9 +3,9 @@ Current cluster status: - Online: [ fc16-builder ] - - Clone Set: S_CLONE [S_A] -- Stopped: [ S_A:0 ] -+ Stopped: [ fc16-builder ] - Clone Set: S_CLONE2 [S_GROUP] -- Stopped: [ S_GROUP:0 ] -+ Stopped: [ fc16-builder ] - A (ocf::pacemaker:Dummy): Stopped - - Transition Summary: -diff --git a/pengine/test10/ignore_stonith_rsc_order3.summary b/pengine/test10/ignore_stonith_rsc_order3.summary -index b671f29..1f1a71e 100644 ---- a/pengine/test10/ignore_stonith_rsc_order3.summary -+++ b/pengine/test10/ignore_stonith_rsc_order3.summary -@@ -3,7 +3,7 @@ Current cluster status: - Online: [ fc16-builder ] - - Clone Set: S_CLONE [S_A] -- Stopped: [ S_A:0 ] -+ Stopped: [ fc16-builder ] - Resource Group: MIXED_GROUP - A (ocf::pacemaker:Dummy): Stopped - S_B (stonith:fence_xvm): Stopped -diff --git a/pengine/test10/ignore_stonith_rsc_order4.summary b/pengine/test10/ignore_stonith_rsc_order4.summary -index fb0fa54..06aa292 100644 ---- a/pengine/test10/ignore_stonith_rsc_order4.summary -+++ b/pengine/test10/ignore_stonith_rsc_order4.summary -@@ -3,9 +3,9 @@ Current cluster status: - Online: [ fc16-builder ] - - Clone Set: S_CLONE [S_A] -- Stopped: [ S_A:0 ] -+ Stopped: [ fc16-builder ] - Clone Set: S_CLONE2 [MIXED_GROUP] -- Stopped: [ MIXED_GROUP:0 ] -+ Stopped: [ fc16-builder ] - - Transition Summary: - * Start S_A:0 (fc16-builder) -diff --git a/pengine/test10/inc10.summary b/pengine/test10/inc10.summary -index cb03155..77552e7 100644 ---- a/pengine/test10/inc10.summary -+++ b/pengine/test10/inc10.summary -@@ -38,8 +38,8 @@ Online: [ xen-1 xen-3 xen-4 ] - - Clone Set: DoFencing [child_DoFencing] - Started: [ xen-1 xen-3 xen-4 ] -- Stopped: [ child_DoFencing:3 ] -+ Stopped: [ xen-2 ] - Clone Set: ocfs2-clone [ocfs2] - Started: [ xen-1 xen-3 xen-4 ] -- Stopped: [ ocfs2:3 ] -+ Stopped: [ xen-2 ] - -diff --git a/pengine/test10/inc12.summary b/pengine/test10/inc12.summary -index 7171c08..af2315d 100644 ---- a/pengine/test10/inc12.summary -+++ b/pengine/test10/inc12.summary -@@ -16,7 +16,7 @@ Online: [ c001n02 c001n03 c001n04 c001n05 c001n06 c001n07 ] - rsc_c001n07 (ocf::heartbeat:IPaddr): Started c001n07 - Clone Set: DoFencing [child_DoFencing] - Started: [ c001n02 c001n04 c001n05 c001n06 c001n07 ] -- Stopped: [ child_DoFencing:5 ] -+ Stopped: [ c001n03 ] - Master/Slave Set: master_rsc_1 [ocf_msdummy] (unique) - ocf_msdummy:0 (ocf::heartbeat:Stateful): Stopped - ocf_msdummy:1 (ocf::heartbeat:Stateful): Stopped -@@ -114,7 +114,7 @@ Online: [ c001n02 c001n03 c001n04 c001n05 c001n06 c001n07 ] - rsc_c001n06 (ocf::heartbeat:IPaddr): Stopped - rsc_c001n07 (ocf::heartbeat:IPaddr): Stopped - Clone Set: DoFencing [child_DoFencing] -- Stopped: [ child_DoFencing:0 child_DoFencing:1 child_DoFencing:2 child_DoFencing:3 child_DoFencing:4 child_DoFencing:5 ] -+ Stopped: [ c001n02 c001n03 c001n04 c001n05 c001n06 c001n07 ] - Master/Slave Set: master_rsc_1 [ocf_msdummy] (unique) - ocf_msdummy:0 (ocf::heartbeat:Stateful): Stopped - ocf_msdummy:1 (ocf::heartbeat:Stateful): Stopped -diff --git a/pengine/test10/inc6.summary b/pengine/test10/inc6.summary -index ea5f085..cf84c1f 100644 ---- a/pengine/test10/inc6.summary -+++ b/pengine/test10/inc6.summary -@@ -4,14 +4,12 @@ Online: [ node1 node2 ] - - Clone Set: rsc1 [child_rsc1] - Started: [ node1 node2 ] -- Stopped: [ child_rsc1:2 ] - Clone Set: rsc2 [child_rsc2] (unique) - child_rsc2:0 (heartbeat:apache): Started node1 - child_rsc2:1 (heartbeat:apache): Started node1 - child_rsc2:2 (heartbeat:apache): Stopped - Clone Set: rsc3 [child_rsc3] - Started: [ node1 node2 ] -- Stopped: [ child_rsc3:2 ] - Clone Set: rsc4 [child_rsc4] (unique) - child_rsc4:0 (heartbeat:apache): Started node1 - child_rsc4:1 (heartbeat:apache): Started node1 -@@ -22,14 +20,12 @@ Online: [ node1 node2 ] - child_rsc5:2 (heartbeat:apache): Stopped - Clone Set: rsc6 [child_rsc6] - Started: [ node1 node2 ] -- Stopped: [ child_rsc6:2 ] - Clone Set: rsc7 [child_rsc7] (unique) - child_rsc7:0 (heartbeat:apache): Started node2 - child_rsc7:1 (heartbeat:apache): Started node2 - child_rsc7:2 (heartbeat:apache): Stopped - Clone Set: rsc8 [child_rsc8] - Started: [ node1 node2 ] -- Stopped: [ child_rsc8:2 ] - - Transition Summary: - * Move child_rsc2:1 (Started node1 -> node2) -@@ -79,14 +75,12 @@ Online: [ node1 node2 ] - - Clone Set: rsc1 [child_rsc1] - Started: [ node1 node2 ] -- Stopped: [ child_rsc1:2 ] - Clone Set: rsc2 [child_rsc2] (unique) - child_rsc2:0 (heartbeat:apache): Started node1 - child_rsc2:1 (heartbeat:apache): Started [ node1 node2 ] - child_rsc2:2 (heartbeat:apache): Stopped - Clone Set: rsc3 [child_rsc3] - Started: [ node1 node2 ] -- Stopped: [ child_rsc3:2 ] - Clone Set: rsc4 [child_rsc4] (unique) - child_rsc4:0 (heartbeat:apache): Started node1 - child_rsc4:1 (heartbeat:apache): Started [ node1 node2 ] -@@ -97,12 +91,10 @@ Online: [ node1 node2 ] - child_rsc5:2 (heartbeat:apache): Stopped - Clone Set: rsc6 [child_rsc6] - Started: [ node1 node2 ] -- Stopped: [ child_rsc6:2 ] - Clone Set: rsc7 [child_rsc7] (unique) - child_rsc7:0 (heartbeat:apache): Started node2 - child_rsc7:1 (heartbeat:apache): Started node1 - child_rsc7:2 (heartbeat:apache): Stopped - Clone Set: rsc8 [child_rsc8] - Started: [ node1 node2 ] -- Stopped: [ child_rsc8:2 ] - -diff --git a/pengine/test10/inc9.dot b/pengine/test10/inc9.dot -index 8e29738..f4d6649 100644 ---- a/pengine/test10/inc9.dot -+++ b/pengine/test10/inc9.dot -@@ -1,20 +1,12 @@ - digraph "g" { - "all_stopped" [ style=bold color="green" fontcolor="orange" ] --"child_rsc1:1_delete_0 node1" [ style=bold color="green" fontcolor="black"] --"child_rsc1:1_delete_0 node2" [ style=bold color="green" fontcolor="black"] - "child_rsc1:1_stop_0 node1" -> "all_stopped" [ style = bold] --"child_rsc1:1_stop_0 node1" -> "child_rsc1:1_delete_0 node1" [ style = bold] --"child_rsc1:1_stop_0 node1" -> "child_rsc1:1_delete_0 node2" [ style = bold] - "child_rsc1:1_stop_0 node1" -> "rsc1_stopped_0" [ style = bold] - "child_rsc1:1_stop_0 node1" [ style=bold color="green" fontcolor="black"] - "child_rsc1:1_stop_0 node2" -> "all_stopped" [ style = bold] --"child_rsc1:1_stop_0 node2" -> "child_rsc1:1_delete_0 node1" [ style = bold] --"child_rsc1:1_stop_0 node2" -> "child_rsc1:1_delete_0 node2" [ style = bold] - "child_rsc1:1_stop_0 node2" -> "rsc1_stopped_0" [ style = bold] - "child_rsc1:1_stop_0 node2" [ style=bold color="green" fontcolor="black"] --"child_rsc1:2_delete_0 node1" [ style=bold color="green" fontcolor="black"] - "child_rsc1:2_stop_0 node1" -> "all_stopped" [ style = bold] --"child_rsc1:2_stop_0 node1" -> "child_rsc1:2_delete_0 node1" [ style = bold] - "child_rsc1:2_stop_0 node1" -> "rsc1_stopped_0" [ style = bold] - "child_rsc1:2_stop_0 node1" [ style=bold color="green" fontcolor="black"] - "rsc1_stop_0" -> "child_rsc1:1_stop_0 node1" [ style = bold] -diff --git a/pengine/test10/inc9.exp b/pengine/test10/inc9.exp -index 8610824..a6d26c9 100644 ---- a/pengine/test10/inc9.exp -+++ b/pengine/test10/inc9.exp -@@ -1,138 +1,73 @@ - - - -- -+ - - - - - - -- -+ - - - - - -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -+ - - - - - - -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -+ - - - -- -+ - -- -+ - - - - - - -- -+ - - - -- -+ - -- -+ - - - - - -- -+ - - -- -+ - - -- -+ - - -- -+ - - - -- -+ - -- -+ - - - - - -- -+ - - - -@@ -140,13 +75,13 @@ - - - -- -+ - - -- -+ - - -- -+ - - - -diff --git a/pengine/test10/inc9.summary b/pengine/test10/inc9.summary -index f7ae82a..5a7f123 100644 ---- a/pengine/test10/inc9.summary -+++ b/pengine/test10/inc9.summary -@@ -3,11 +3,10 @@ Current cluster status: - Online: [ node1 node2 ] - - Clone Set: rsc1 [child_rsc1] -- child_rsc1:5 (heartbeat:apache): ORPHANED Started node1 -- child_rsc1:6 (heartbeat:apache): ORPHANED Started node1 -- child_rsc1:7 (heartbeat:apache): ORPHANED Started node2 -+ child_rsc1 (heartbeat:apache): ORPHANED Started node1 -+ child_rsc1 (heartbeat:apache): ORPHANED Started node1 -+ child_rsc1 (heartbeat:apache): ORPHANED Started node2 - Started: [ node1 node2 ] -- Stopped: [ child_rsc1:2 child_rsc1:3 child_rsc1:4 ] - - Transition Summary: - * Stop child_rsc1:5 (node1) -@@ -17,13 +16,8 @@ Transition Summary: - Executing cluster transition: - * Pseudo action: rsc1_stop_0 - * Resource action: child_rsc1:1 stop on node1 -- * Resource action: child_rsc1:1 delete on node2 -- * Resource action: child_rsc1:1 delete on node1 - * Resource action: child_rsc1:2 stop on node1 -- * Resource action: child_rsc1:2 delete on node1 - * Resource action: child_rsc1:1 stop on node2 -- * Resource action: child_rsc1:1 delete on node2 -- * Resource action: child_rsc1:1 delete on node1 - * Pseudo action: rsc1_stopped_0 - * Pseudo action: all_stopped - -@@ -32,5 +26,4 @@ Online: [ node1 node2 ] - - Clone Set: rsc1 [child_rsc1] - Started: [ node1 node2 ] -- Stopped: [ child_rsc1:2 child_rsc1:3 child_rsc1:4 ] - -diff --git a/pengine/test10/interleave-pseudo-stop.summary b/pengine/test10/interleave-pseudo-stop.summary -index 9ab0a63..7ac882d 100644 ---- a/pengine/test10/interleave-pseudo-stop.summary -+++ b/pengine/test10/interleave-pseudo-stop.summary -@@ -65,14 +65,14 @@ OFFLINE: [ node1 ] - - Clone Set: stonithcloneset [stonithclone] - Started: [ node2 ] -- Stopped: [ stonithclone:1 ] -+ Stopped: [ node1 ] - Clone Set: evmscloneset [evmsclone] - Started: [ node2 ] -- Stopped: [ evmsclone:1 ] -+ Stopped: [ node1 ] - Clone Set: imagestorecloneset [imagestoreclone] - Started: [ node2 ] -- Stopped: [ imagestoreclone:1 ] -+ Stopped: [ node1 ] - Clone Set: configstorecloneset [configstoreclone] - Started: [ node2 ] -- Stopped: [ configstoreclone:1 ] -+ Stopped: [ node1 ] - -diff --git a/pengine/test10/interleave-restart.summary b/pengine/test10/interleave-restart.summary -index ca5a593..32ff592 100644 ---- a/pengine/test10/interleave-restart.summary -+++ b/pengine/test10/interleave-restart.summary -@@ -5,7 +5,7 @@ Online: [ node1 node2 ] - Clone Set: stonithcloneset [stonithclone] - Started: [ node1 node2 ] - Clone Set: evmscloneset [evmsclone] -- evmsclone:1 (ocf::heartbeat:EvmsSCC): Started node1 FAILED -+ evmsclone (ocf::heartbeat:EvmsSCC): Started node1 FAILED - Started: [ node2 ] - Clone Set: imagestorecloneset [imagestoreclone] - Started: [ node1 node2 ] -diff --git a/pengine/test10/interleave-stop.summary b/pengine/test10/interleave-stop.summary -index 042e62d..14aa585 100644 ---- a/pengine/test10/interleave-stop.summary -+++ b/pengine/test10/interleave-stop.summary -@@ -60,14 +60,14 @@ Online: [ node2 ] - - Clone Set: stonithcloneset [stonithclone] - Started: [ node2 ] -- Stopped: [ stonithclone:1 ] -+ Stopped: [ node1 ] - Clone Set: evmscloneset [evmsclone] - Started: [ node2 ] -- Stopped: [ evmsclone:1 ] -+ Stopped: [ node1 ] - Clone Set: imagestorecloneset [imagestoreclone] - Started: [ node2 ] -- Stopped: [ imagestoreclone:1 ] -+ Stopped: [ node1 ] - Clone Set: configstorecloneset [configstoreclone] - Started: [ node2 ] -- Stopped: [ configstoreclone:1 ] -+ Stopped: [ node1 ] - -diff --git a/pengine/test10/load-stopped-loop.summary b/pengine/test10/load-stopped-loop.summary -index c14e05d..e514e82 100644 ---- a/pengine/test10/load-stopped-loop.summary -+++ b/pengine/test10/load-stopped-loop.summary -@@ -14,79 +14,69 @@ Online: [ mgmt01 v03-a v03-b ] - stonith-v03-d (stonith:fence_ipmilan): Stopped - Clone Set: cl-clvmd [clvmd] - Started: [ mgmt01 v03-a v03-b ] -- Stopped: [ clvmd:3 clvmd:4 clvmd:5 clvmd:6 clvmd:7 clvmd:8 ] - Clone Set: cl-dlm [dlm] - Started: [ mgmt01 v03-a v03-b ] -- Stopped: [ dlm:3 dlm:4 dlm:5 dlm:6 dlm:7 dlm:8 ] - Clone Set: cl-iscsid [iscsid] - Started: [ mgmt01 v03-a v03-b ] -- Stopped: [ iscsid:3 iscsid:4 iscsid:5 iscsid:6 iscsid:7 iscsid:8 ] - Clone Set: cl-libvirtd [libvirtd] - Started: [ v03-a v03-b ] -- Stopped: [ libvirtd:2 libvirtd:3 libvirtd:4 libvirtd:5 libvirtd:6 libvirtd:7 ] -+ Stopped: [ mgmt01 ] - Clone Set: cl-multipathd [multipathd] - Started: [ mgmt01 v03-a v03-b ] -- Stopped: [ multipathd:3 multipathd:4 multipathd:5 multipathd:6 multipathd:7 multipathd:8 ] - Clone Set: cl-node-params [node-params] - Started: [ v03-a v03-b ] -- Stopped: [ node-params:2 node-params:3 node-params:4 node-params:5 node-params:6 node-params:7 ] -+ Stopped: [ mgmt01 ] - Clone Set: cl-vlan1-if [vlan1-if] - Started: [ v03-a v03-b ] -- Stopped: [ vlan1-if:2 vlan1-if:3 vlan1-if:4 vlan1-if:5 vlan1-if:6 vlan1-if:7 ] -+ Stopped: [ mgmt01 ] - Clone Set: cl-vlan101-if [vlan101-if] - Started: [ v03-a v03-b ] -- Stopped: [ vlan101-if:2 vlan101-if:3 vlan101-if:4 vlan101-if:5 vlan101-if:6 vlan101-if:7 ] -+ Stopped: [ mgmt01 ] - Clone Set: cl-vlan102-if [vlan102-if] - Started: [ v03-a v03-b ] -- Stopped: [ vlan102-if:2 vlan102-if:3 vlan102-if:4 vlan102-if:5 vlan102-if:6 vlan102-if:7 ] -+ Stopped: [ mgmt01 ] - Clone Set: cl-vlan103-if [vlan103-if] - Started: [ v03-a v03-b ] -- Stopped: [ vlan103-if:2 vlan103-if:3 vlan103-if:4 vlan103-if:5 vlan103-if:6 vlan103-if:7 ] -+ Stopped: [ mgmt01 ] - Clone Set: cl-vlan104-if [vlan104-if] - Started: [ v03-a v03-b ] -- Stopped: [ vlan104-if:2 vlan104-if:3 vlan104-if:4 vlan104-if:5 vlan104-if:6 vlan104-if:7 ] -+ Stopped: [ mgmt01 ] - Clone Set: cl-vlan3-if [vlan3-if] - Started: [ v03-a v03-b ] -- Stopped: [ vlan3-if:2 vlan3-if:3 vlan3-if:4 vlan3-if:5 vlan3-if:6 vlan3-if:7 ] -+ Stopped: [ mgmt01 ] - Clone Set: cl-vlan4-if [vlan4-if] - Started: [ v03-a v03-b ] -- Stopped: [ vlan4-if:2 vlan4-if:3 vlan4-if:4 vlan4-if:5 vlan4-if:6 vlan4-if:7 ] -+ Stopped: [ mgmt01 ] - Clone Set: cl-vlan5-if [vlan5-if] - Started: [ v03-a v03-b ] -- Stopped: [ vlan5-if:2 vlan5-if:3 vlan5-if:4 vlan5-if:5 vlan5-if:6 vlan5-if:7 ] -+ Stopped: [ mgmt01 ] - Clone Set: cl-vlan900-if [vlan900-if] - Started: [ v03-a v03-b ] -- Stopped: [ vlan900-if:2 vlan900-if:3 vlan900-if:4 vlan900-if:5 vlan900-if:6 vlan900-if:7 ] -+ Stopped: [ mgmt01 ] - Clone Set: cl-vlan909-if [vlan909-if] - Started: [ v03-a v03-b ] -- Stopped: [ vlan909-if:2 vlan909-if:3 vlan909-if:4 vlan909-if:5 vlan909-if:6 vlan909-if:7 ] -+ Stopped: [ mgmt01 ] - Clone Set: cl-libvirt-images-fs [libvirt-images-fs] - Started: [ mgmt01 v03-a v03-b ] -- Stopped: [ libvirt-images-fs:3 libvirt-images-fs:4 libvirt-images-fs:5 libvirt-images-fs:6 libvirt-images-fs:7 libvirt-images-fs:8 ] - Clone Set: cl-libvirt-install-fs [libvirt-install-fs] - Started: [ mgmt01 v03-a v03-b ] -- Stopped: [ libvirt-install-fs:3 libvirt-install-fs:4 libvirt-install-fs:5 libvirt-install-fs:6 libvirt-install-fs:7 libvirt-install-fs:8 ] - Clone Set: cl-vds-ok-pool-0-iscsi [vds-ok-pool-0-iscsi] - Started: [ mgmt01 v03-a v03-b ] -- Stopped: [ vds-ok-pool-0-iscsi:3 vds-ok-pool-0-iscsi:4 vds-ok-pool-0-iscsi:5 vds-ok-pool-0-iscsi:6 vds-ok-pool-0-iscsi:7 vds-ok-pool-0-iscsi:8 ] - Clone Set: cl-vds-ok-pool-0-vg [vds-ok-pool-0-vg] - Started: [ mgmt01 v03-a v03-b ] -- Stopped: [ vds-ok-pool-0-vg:3 vds-ok-pool-0-vg:4 vds-ok-pool-0-vg:5 vds-ok-pool-0-vg:6 vds-ok-pool-0-vg:7 vds-ok-pool-0-vg:8 ] - Clone Set: cl-vds-ok-pool-1-iscsi [vds-ok-pool-1-iscsi] - Started: [ mgmt01 v03-a v03-b ] -- Stopped: [ vds-ok-pool-1-iscsi:3 vds-ok-pool-1-iscsi:4 vds-ok-pool-1-iscsi:5 vds-ok-pool-1-iscsi:6 vds-ok-pool-1-iscsi:7 vds-ok-pool-1-iscsi:8 ] - Clone Set: cl-vds-ok-pool-1-vg [vds-ok-pool-1-vg] - Started: [ mgmt01 v03-a v03-b ] -- Stopped: [ vds-ok-pool-1-vg:3 vds-ok-pool-1-vg:4 vds-ok-pool-1-vg:5 vds-ok-pool-1-vg:6 vds-ok-pool-1-vg:7 vds-ok-pool-1-vg:8 ] - Clone Set: cl-libvirt-images-pool [libvirt-images-pool] - Started: [ v03-a v03-b ] -- Stopped: [ libvirt-images-pool:2 libvirt-images-pool:3 libvirt-images-pool:4 libvirt-images-pool:5 libvirt-images-pool:6 libvirt-images-pool:7 ] -+ Stopped: [ mgmt01 ] - Clone Set: cl-vds-ok-pool-0-pool [vds-ok-pool-0-pool] - Started: [ v03-a v03-b ] -- Stopped: [ vds-ok-pool-0-pool:2 vds-ok-pool-0-pool:3 vds-ok-pool-0-pool:4 vds-ok-pool-0-pool:5 vds-ok-pool-0-pool:6 vds-ok-pool-0-pool:7 ] -+ Stopped: [ mgmt01 ] - Clone Set: cl-vds-ok-pool-1-pool [vds-ok-pool-1-pool] - Started: [ v03-a v03-b ] -- Stopped: [ vds-ok-pool-1-pool:2 vds-ok-pool-1-pool:3 vds-ok-pool-1-pool:4 vds-ok-pool-1-pool:5 vds-ok-pool-1-pool:6 vds-ok-pool-1-pool:7 ] -+ Stopped: [ mgmt01 ] - git.vds-ok.com-vm (ocf::vds-ok:VirtualDomain): Stopped - vd01-a.cdev.ttc.prague.cz.vds-ok.com-vm (ocf::vds-ok:VirtualDomain): Started v03-a - vd01-b.cdev.ttc.prague.cz.vds-ok.com-vm (ocf::vds-ok:VirtualDomain): Started v03-a -@@ -109,7 +99,7 @@ Online: [ mgmt01 v03-a v03-b ] - zakaz.transferrus.ru-vm (ocf::vds-ok:VirtualDomain): Stopped - Clone Set: cl-vlan200-if [vlan200-if] - Started: [ v03-a v03-b ] -- Stopped: [ vlan200-if:2 vlan200-if:3 vlan200-if:4 vlan200-if:5 vlan200-if:6 vlan200-if:7 ] -+ Stopped: [ mgmt01 ] - lenny-x32-devel-vm (ocf::vds-ok:VirtualDomain): Started v03-a - dist.express-consult.org-vm (ocf::vds-ok:VirtualDomain): Stopped - eu1.ca-pages.com-vm (ocf::vds-ok:VirtualDomain): Stopped -@@ -121,7 +111,7 @@ Online: [ mgmt01 v03-a v03-b ] - c5-x64-devel.vds-ok.com-vm (ocf::vds-ok:VirtualDomain): Started v03-b - Clone Set: cl-mcast-test-net [mcast-test-net] - Started: [ v03-a v03-b ] -- Stopped: [ mcast-test-net:2 mcast-test-net:3 mcast-test-net:4 mcast-test-net:5 mcast-test-net:6 mcast-test-net:7 ] -+ Stopped: [ mgmt01 ] - dist.fly-uni.org-vm (ocf::vds-ok:VirtualDomain): Stopped - ktstudio.net-vm (ocf::vds-ok:VirtualDomain): Started v03-a - cloudsrv.credo-dialogue.com-vm (ocf::vds-ok:VirtualDomain): Started v03-b -@@ -134,7 +124,7 @@ Online: [ mgmt01 v03-a v03-b ] - lustre04-right.vds-ok.com-vm (ocf::vds-ok:VirtualDomain): Started v03-a - Clone Set: cl-mcast-anbriz-net [mcast-anbriz-net] - Started: [ v03-a v03-b ] -- Stopped: [ mcast-anbriz-net:2 mcast-anbriz-net:3 mcast-anbriz-net:4 mcast-anbriz-net:5 mcast-anbriz-net:6 mcast-anbriz-net:7 ] -+ Stopped: [ mgmt01 ] - gw.anbriz.vds-ok.com-vm (ocf::vds-ok:VirtualDomain): Started v03-b - license.anbriz.vds-ok.com-vm (ocf::vds-ok:VirtualDomain): Started v03-b - terminal.anbriz.vds-ok.com-vm (ocf::vds-ok:VirtualDomain): Stopped -@@ -143,13 +133,13 @@ Online: [ mgmt01 v03-a v03-b ] - test-01.vds-ok.com-vm (ocf::vds-ok:VirtualDomain): Started v03-a - Clone Set: cl-libvirt-qpid [libvirt-qpid] - Started: [ v03-a v03-b ] -- Stopped: [ libvirt-qpid:2 libvirt-qpid:3 libvirt-qpid:4 libvirt-qpid:5 libvirt-qpid:6 libvirt-qpid:7 ] -+ Stopped: [ mgmt01 ] - gw.gleb.vds-ok.com-vm (ocf::vds-ok:VirtualDomain): Stopped - gw.gotin.vds-ok.com-vm (ocf::vds-ok:VirtualDomain): Stopped - terminal0.anbriz.vds-ok.com-vm (ocf::vds-ok:VirtualDomain): Started v03-a - Clone Set: cl-mcast-gleb-net [mcast-gleb-net] - Started: [ v03-a v03-b ] -- Stopped: [ mcast-gleb-net:2 mcast-gleb-net:3 mcast-gleb-net:4 mcast-gleb-net:5 mcast-gleb-net:6 mcast-gleb-net:7 ] -+ Stopped: [ mgmt01 ] - - Transition Summary: - * Reload vds-ok-pool-0-iscsi:0 (Started mgmt01) -@@ -215,79 +205,69 @@ Online: [ mgmt01 v03-a v03-b ] - stonith-v03-d (stonith:fence_ipmilan): Stopped - Clone Set: cl-clvmd [clvmd] - Started: [ mgmt01 v03-a v03-b ] -- Stopped: [ clvmd:3 clvmd:4 clvmd:5 clvmd:6 clvmd:7 clvmd:8 ] - Clone Set: cl-dlm [dlm] - Started: [ mgmt01 v03-a v03-b ] -- Stopped: [ dlm:3 dlm:4 dlm:5 dlm:6 dlm:7 dlm:8 ] - Clone Set: cl-iscsid [iscsid] - Started: [ mgmt01 v03-a v03-b ] -- Stopped: [ iscsid:3 iscsid:4 iscsid:5 iscsid:6 iscsid:7 iscsid:8 ] - Clone Set: cl-libvirtd [libvirtd] - Started: [ v03-a v03-b ] -- Stopped: [ libvirtd:2 libvirtd:3 libvirtd:4 libvirtd:5 libvirtd:6 libvirtd:7 ] -+ Stopped: [ mgmt01 ] - Clone Set: cl-multipathd [multipathd] - Started: [ mgmt01 v03-a v03-b ] -- Stopped: [ multipathd:3 multipathd:4 multipathd:5 multipathd:6 multipathd:7 multipathd:8 ] - Clone Set: cl-node-params [node-params] - Started: [ v03-a v03-b ] -- Stopped: [ node-params:2 node-params:3 node-params:4 node-params:5 node-params:6 node-params:7 ] -+ Stopped: [ mgmt01 ] - Clone Set: cl-vlan1-if [vlan1-if] - Started: [ v03-a v03-b ] -- Stopped: [ vlan1-if:2 vlan1-if:3 vlan1-if:4 vlan1-if:5 vlan1-if:6 vlan1-if:7 ] -+ Stopped: [ mgmt01 ] - Clone Set: cl-vlan101-if [vlan101-if] - Started: [ v03-a v03-b ] -- Stopped: [ vlan101-if:2 vlan101-if:3 vlan101-if:4 vlan101-if:5 vlan101-if:6 vlan101-if:7 ] -+ Stopped: [ mgmt01 ] - Clone Set: cl-vlan102-if [vlan102-if] - Started: [ v03-a v03-b ] -- Stopped: [ vlan102-if:2 vlan102-if:3 vlan102-if:4 vlan102-if:5 vlan102-if:6 vlan102-if:7 ] -+ Stopped: [ mgmt01 ] - Clone Set: cl-vlan103-if [vlan103-if] - Started: [ v03-a v03-b ] -- Stopped: [ vlan103-if:2 vlan103-if:3 vlan103-if:4 vlan103-if:5 vlan103-if:6 vlan103-if:7 ] -+ Stopped: [ mgmt01 ] - Clone Set: cl-vlan104-if [vlan104-if] - Started: [ v03-a v03-b ] -- Stopped: [ vlan104-if:2 vlan104-if:3 vlan104-if:4 vlan104-if:5 vlan104-if:6 vlan104-if:7 ] -+ Stopped: [ mgmt01 ] - Clone Set: cl-vlan3-if [vlan3-if] - Started: [ v03-a v03-b ] -- Stopped: [ vlan3-if:2 vlan3-if:3 vlan3-if:4 vlan3-if:5 vlan3-if:6 vlan3-if:7 ] -+ Stopped: [ mgmt01 ] - Clone Set: cl-vlan4-if [vlan4-if] - Started: [ v03-a v03-b ] -- Stopped: [ vlan4-if:2 vlan4-if:3 vlan4-if:4 vlan4-if:5 vlan4-if:6 vlan4-if:7 ] -+ Stopped: [ mgmt01 ] - Clone Set: cl-vlan5-if [vlan5-if] - Started: [ v03-a v03-b ] -- Stopped: [ vlan5-if:2 vlan5-if:3 vlan5-if:4 vlan5-if:5 vlan5-if:6 vlan5-if:7 ] -+ Stopped: [ mgmt01 ] - Clone Set: cl-vlan900-if [vlan900-if] - Started: [ v03-a v03-b ] -- Stopped: [ vlan900-if:2 vlan900-if:3 vlan900-if:4 vlan900-if:5 vlan900-if:6 vlan900-if:7 ] -+ Stopped: [ mgmt01 ] - Clone Set: cl-vlan909-if [vlan909-if] - Started: [ v03-a v03-b ] -- Stopped: [ vlan909-if:2 vlan909-if:3 vlan909-if:4 vlan909-if:5 vlan909-if:6 vlan909-if:7 ] -+ Stopped: [ mgmt01 ] - Clone Set: cl-libvirt-images-fs [libvirt-images-fs] - Started: [ mgmt01 v03-a v03-b ] -- Stopped: [ libvirt-images-fs:3 libvirt-images-fs:4 libvirt-images-fs:5 libvirt-images-fs:6 libvirt-images-fs:7 libvirt-images-fs:8 ] - Clone Set: cl-libvirt-install-fs [libvirt-install-fs] - Started: [ mgmt01 v03-a v03-b ] -- Stopped: [ libvirt-install-fs:3 libvirt-install-fs:4 libvirt-install-fs:5 libvirt-install-fs:6 libvirt-install-fs:7 libvirt-install-fs:8 ] - Clone Set: cl-vds-ok-pool-0-iscsi [vds-ok-pool-0-iscsi] - Started: [ mgmt01 v03-a v03-b ] -- Stopped: [ vds-ok-pool-0-iscsi:3 vds-ok-pool-0-iscsi:4 vds-ok-pool-0-iscsi:5 vds-ok-pool-0-iscsi:6 vds-ok-pool-0-iscsi:7 vds-ok-pool-0-iscsi:8 ] - Clone Set: cl-vds-ok-pool-0-vg [vds-ok-pool-0-vg] - Started: [ mgmt01 v03-a v03-b ] -- Stopped: [ vds-ok-pool-0-vg:3 vds-ok-pool-0-vg:4 vds-ok-pool-0-vg:5 vds-ok-pool-0-vg:6 vds-ok-pool-0-vg:7 vds-ok-pool-0-vg:8 ] - Clone Set: cl-vds-ok-pool-1-iscsi [vds-ok-pool-1-iscsi] - Started: [ mgmt01 v03-a v03-b ] -- Stopped: [ vds-ok-pool-1-iscsi:3 vds-ok-pool-1-iscsi:4 vds-ok-pool-1-iscsi:5 vds-ok-pool-1-iscsi:6 vds-ok-pool-1-iscsi:7 vds-ok-pool-1-iscsi:8 ] - Clone Set: cl-vds-ok-pool-1-vg [vds-ok-pool-1-vg] - Started: [ mgmt01 v03-a v03-b ] -- Stopped: [ vds-ok-pool-1-vg:3 vds-ok-pool-1-vg:4 vds-ok-pool-1-vg:5 vds-ok-pool-1-vg:6 vds-ok-pool-1-vg:7 vds-ok-pool-1-vg:8 ] - Clone Set: cl-libvirt-images-pool [libvirt-images-pool] - Started: [ v03-a v03-b ] -- Stopped: [ libvirt-images-pool:2 libvirt-images-pool:3 libvirt-images-pool:4 libvirt-images-pool:5 libvirt-images-pool:6 libvirt-images-pool:7 ] -+ Stopped: [ mgmt01 ] - Clone Set: cl-vds-ok-pool-0-pool [vds-ok-pool-0-pool] - Started: [ v03-a v03-b ] -- Stopped: [ vds-ok-pool-0-pool:2 vds-ok-pool-0-pool:3 vds-ok-pool-0-pool:4 vds-ok-pool-0-pool:5 vds-ok-pool-0-pool:6 vds-ok-pool-0-pool:7 ] -+ Stopped: [ mgmt01 ] - Clone Set: cl-vds-ok-pool-1-pool [vds-ok-pool-1-pool] - Started: [ v03-a v03-b ] -- Stopped: [ vds-ok-pool-1-pool:2 vds-ok-pool-1-pool:3 vds-ok-pool-1-pool:4 vds-ok-pool-1-pool:5 vds-ok-pool-1-pool:6 vds-ok-pool-1-pool:7 ] -+ Stopped: [ mgmt01 ] - git.vds-ok.com-vm (ocf::vds-ok:VirtualDomain): Stopped - vd01-a.cdev.ttc.prague.cz.vds-ok.com-vm (ocf::vds-ok:VirtualDomain): Started v03-a - vd01-b.cdev.ttc.prague.cz.vds-ok.com-vm (ocf::vds-ok:VirtualDomain): Started v03-a -@@ -310,7 +290,7 @@ Online: [ mgmt01 v03-a v03-b ] - zakaz.transferrus.ru-vm (ocf::vds-ok:VirtualDomain): Stopped - Clone Set: cl-vlan200-if [vlan200-if] - Started: [ v03-a v03-b ] -- Stopped: [ vlan200-if:2 vlan200-if:3 vlan200-if:4 vlan200-if:5 vlan200-if:6 vlan200-if:7 ] -+ Stopped: [ mgmt01 ] - lenny-x32-devel-vm (ocf::vds-ok:VirtualDomain): Started v03-a - dist.express-consult.org-vm (ocf::vds-ok:VirtualDomain): Stopped - eu1.ca-pages.com-vm (ocf::vds-ok:VirtualDomain): Stopped -@@ -322,7 +302,7 @@ Online: [ mgmt01 v03-a v03-b ] - c5-x64-devel.vds-ok.com-vm (ocf::vds-ok:VirtualDomain): Started v03-b - Clone Set: cl-mcast-test-net [mcast-test-net] - Started: [ v03-a v03-b ] -- Stopped: [ mcast-test-net:2 mcast-test-net:3 mcast-test-net:4 mcast-test-net:5 mcast-test-net:6 mcast-test-net:7 ] -+ Stopped: [ mgmt01 ] - dist.fly-uni.org-vm (ocf::vds-ok:VirtualDomain): Stopped - ktstudio.net-vm (ocf::vds-ok:VirtualDomain): Started v03-a - cloudsrv.credo-dialogue.com-vm (ocf::vds-ok:VirtualDomain): Started v03-b -@@ -335,7 +315,7 @@ Online: [ mgmt01 v03-a v03-b ] - lustre04-right.vds-ok.com-vm (ocf::vds-ok:VirtualDomain): Started v03-a - Clone Set: cl-mcast-anbriz-net [mcast-anbriz-net] - Started: [ v03-a v03-b ] -- Stopped: [ mcast-anbriz-net:2 mcast-anbriz-net:3 mcast-anbriz-net:4 mcast-anbriz-net:5 mcast-anbriz-net:6 mcast-anbriz-net:7 ] -+ Stopped: [ mgmt01 ] - gw.anbriz.vds-ok.com-vm (ocf::vds-ok:VirtualDomain): Started v03-b - license.anbriz.vds-ok.com-vm (ocf::vds-ok:VirtualDomain): Started v03-a - terminal.anbriz.vds-ok.com-vm (ocf::vds-ok:VirtualDomain): Stopped -@@ -344,11 +324,11 @@ Online: [ mgmt01 v03-a v03-b ] - test-01.vds-ok.com-vm (ocf::vds-ok:VirtualDomain): Started v03-a - Clone Set: cl-libvirt-qpid [libvirt-qpid] - Started: [ v03-a v03-b ] -- Stopped: [ libvirt-qpid:2 libvirt-qpid:3 libvirt-qpid:4 libvirt-qpid:5 libvirt-qpid:6 libvirt-qpid:7 ] -+ Stopped: [ mgmt01 ] - gw.gleb.vds-ok.com-vm (ocf::vds-ok:VirtualDomain): Stopped - gw.gotin.vds-ok.com-vm (ocf::vds-ok:VirtualDomain): Stopped - terminal0.anbriz.vds-ok.com-vm (ocf::vds-ok:VirtualDomain): Started v03-b - Clone Set: cl-mcast-gleb-net [mcast-gleb-net] - Started: [ v03-a v03-b ] -- Stopped: [ mcast-gleb-net:2 mcast-gleb-net:3 mcast-gleb-net:4 mcast-gleb-net:5 mcast-gleb-net:6 mcast-gleb-net:7 ] -+ Stopped: [ mgmt01 ] - -diff --git a/pengine/test10/master-demote-2.summary b/pengine/test10/master-demote-2.summary -index b5fb0b9..53259a7 100644 ---- a/pengine/test10/master-demote-2.summary -+++ b/pengine/test10/master-demote-2.summary -@@ -16,7 +16,7 @@ Online: [ pcmk-1 pcmk-2 pcmk-3 pcmk-4 ] - Clone Set: Connectivity [ping-1] - Started: [ pcmk-1 pcmk-2 pcmk-3 pcmk-4 ] - Master/Slave Set: master-1 [stateful-1] -- stateful-1:0 (ocf::pacemaker:Stateful): Slave pcmk-1 FAILED -+ stateful-1 (ocf::pacemaker:Stateful): Slave pcmk-1 FAILED - Slaves: [ pcmk-2 pcmk-3 pcmk-4 ] - - Transition Summary: -diff --git a/pengine/test10/master-demote-block.dot b/pengine/test10/master-demote-block.dot -new file mode 100644 -index 0000000..9b3c48f ---- /dev/null -+++ b/pengine/test10/master-demote-block.dot -@@ -0,0 +1,3 @@ -+digraph "g" { -+"dummy:1_monitor_20000 dl380g5d" [ style=bold color="green" fontcolor="black"] -+} -diff --git a/pengine/test10/master-demote-block.exp b/pengine/test10/master-demote-block.exp -new file mode 100644 -index 0000000..a8a51d7 ---- /dev/null -+++ b/pengine/test10/master-demote-block.exp -@@ -0,0 +1,12 @@ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -diff --git a/pengine/test10/master-demote-block.scores b/pengine/test10/master-demote-block.scores -new file mode 100644 -index 0000000..e99c828 ---- /dev/null -+++ b/pengine/test10/master-demote-block.scores -@@ -0,0 +1,13 @@ -+Allocation scores: -+clone_color: dummy:0 allocation score on dl380g5c: -INFINITY -+clone_color: dummy:0 allocation score on dl380g5d: 0 -+clone_color: dummy:1 allocation score on dl380g5c: -INFINITY -+clone_color: dummy:1 allocation score on dl380g5d: INFINITY -+clone_color: stateful allocation score on dl380g5c: -INFINITY -+clone_color: stateful allocation score on dl380g5d: 0 -+dummy:0 promotion score on dl380g5c: INFINITY -+dummy:1 promotion score on dl380g5d: 5 -+native_color: dummy:0 allocation score on dl380g5c: -INFINITY -+native_color: dummy:0 allocation score on dl380g5d: -INFINITY -+native_color: dummy:1 allocation score on dl380g5c: -INFINITY -+native_color: dummy:1 allocation score on dl380g5d: INFINITY -diff --git a/pengine/test10/master-demote-block.summary b/pengine/test10/master-demote-block.summary -new file mode 100644 -index 0000000..97fb20b ---- /dev/null -+++ b/pengine/test10/master-demote-block.summary -@@ -0,0 +1,22 @@ -+ -+Current cluster status: -+Node dl380g5c (21c624bd-c426-43dc-9665-bbfb92054bcd): standby -+Online: [ dl380g5d ] -+ -+ Master/Slave Set: stateful [dummy] -+ dummy (ocf::pacemaker:Stateful): Master dl380g5c (unmanaged) FAILED -+ Slaves: [ dl380g5d ] -+ -+Transition Summary: -+ -+Executing cluster transition: -+ * Resource action: dummy:1 monitor=20000 on dl380g5d -+ -+Revised cluster status: -+Node dl380g5c (21c624bd-c426-43dc-9665-bbfb92054bcd): standby -+Online: [ dl380g5d ] -+ -+ Master/Slave Set: stateful [dummy] -+ dummy (ocf::pacemaker:Stateful): Master dl380g5c (unmanaged) FAILED -+ Slaves: [ dl380g5d ] -+ -diff --git a/pengine/test10/master-demote-block.xml b/pengine/test10/master-demote-block.xml -new file mode 100644 -index 0000000..7aedca0 ---- /dev/null -+++ b/pengine/test10/master-demote-block.xml -@@ -0,0 +1,80 @@ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -diff --git a/pengine/test10/master-demote.scores b/pengine/test10/master-demote.scores -index 49c28dc..3650eca 100644 ---- a/pengine/test10/master-demote.scores -+++ b/pengine/test10/master-demote.scores -@@ -14,8 +14,8 @@ clone_color: fence_node:1 allocation score on cxb1: 1 - clone_color: named_drbd allocation score on cxa1: 200 - clone_color: named_drbd allocation score on cxb1: 210 - clone_color: named_drbd_node:0 allocation score on cxa1: 76 --clone_color: named_drbd_node:0 allocation score on cxb1: 75 --clone_color: named_drbd_node:1 allocation score on cxa1: 75 -+clone_color: named_drbd_node:0 allocation score on cxb1: 0 -+clone_color: named_drbd_node:1 allocation score on cxa1: 0 - clone_color: named_drbd_node:1 allocation score on cxb1: 76 - clone_color: pingd_clone allocation score on cxa1: 0 - clone_color: pingd_clone allocation score on cxb1: 0 -@@ -52,7 +52,7 @@ native_color: named_daemon allocation score on cxa1: -INFINITY - native_color: named_daemon allocation score on cxb1: -INFINITY - native_color: named_drbd_node:0 allocation score on cxa1: 76 - native_color: named_drbd_node:0 allocation score on cxb1: -INFINITY --native_color: named_drbd_node:1 allocation score on cxa1: 75 -+native_color: named_drbd_node:1 allocation score on cxa1: 0 - native_color: named_drbd_node:1 allocation score on cxb1: 76 - native_color: named_filesys allocation score on cxa1: -INFINITY - native_color: named_filesys allocation score on cxb1: -INFINITY -diff --git a/pengine/test10/master-depend.summary b/pengine/test10/master-depend.summary -index d64ccbf..f802b4e 100644 ---- a/pengine/test10/master-depend.summary -+++ b/pengine/test10/master-depend.summary -@@ -4,11 +4,11 @@ Online: [ vbox4 ] - OFFLINE: [ vbox3 ] - - Master/Slave Set: drbd [drbd0] -- Stopped: [ drbd0:0 drbd0:1 ] -+ Stopped: [ vbox3 vbox4 ] - Clone Set: cman_clone [cman] -- Stopped: [ cman:0 cman:1 ] -+ Stopped: [ vbox3 vbox4 ] - Clone Set: clvmd_clone [clvmd] -- Stopped: [ clvmd:0 clvmd:1 ] -+ Stopped: [ vbox3 vbox4 ] - vmnci36 (ocf::heartbeat:vm): Stopped - vmnci37 (ocf::heartbeat:vm): Stopped - vmnci38 (ocf::heartbeat:vm): Stopped -@@ -46,12 +46,12 @@ OFFLINE: [ vbox3 ] - - Master/Slave Set: drbd [drbd0] - Slaves: [ vbox4 ] -- Stopped: [ drbd0:1 ] -+ Stopped: [ vbox3 ] - Clone Set: cman_clone [cman] - Started: [ vbox4 ] -- Stopped: [ cman:1 ] -+ Stopped: [ vbox3 ] - Clone Set: clvmd_clone [clvmd] -- Stopped: [ clvmd:0 clvmd:1 ] -+ Stopped: [ vbox3 vbox4 ] - vmnci36 (ocf::heartbeat:vm): Stopped - vmnci37 (ocf::heartbeat:vm): Stopped - vmnci38 (ocf::heartbeat:vm): Stopped -diff --git a/pengine/test10/master-ordering.summary b/pengine/test10/master-ordering.summary -index 330bdc8..9196392 100644 ---- a/pengine/test10/master-ordering.summary -+++ b/pengine/test10/master-ordering.summary -@@ -3,23 +3,23 @@ Current cluster status: - Online: [ webcluster01 ] - OFFLINE: [ webcluster02 ] - -- mysql-server (ocf::heartbeat:mysql): Stopped -- extip_1 (ocf::heartbeat:IPaddr2): Stopped -- extip_2 (ocf::heartbeat:IPaddr2): Stopped -+ mysql-server (ocf::heartbeat:mysql): Stopped -+ extip_1 (ocf::heartbeat:IPaddr2): Stopped -+ extip_2 (ocf::heartbeat:IPaddr2): Stopped - Resource Group: group_main -- intip_0_main (ocf::heartbeat:IPaddr2): Stopped -- intip_1_master (ocf::heartbeat:IPaddr2): Stopped -- intip_2_slave (ocf::heartbeat:IPaddr2): Stopped -+ intip_0_main (ocf::heartbeat:IPaddr2): Stopped -+ intip_1_master (ocf::heartbeat:IPaddr2): Stopped -+ intip_2_slave (ocf::heartbeat:IPaddr2): Stopped - Master/Slave Set: ms_drbd_www [drbd_www] -- Stopped: [ drbd_www:0 drbd_www:1 ] -+ Stopped: [ webcluster01 webcluster02 ] - Clone Set: clone_ocfs2_www [ocfs2_www] (unique) -- ocfs2_www:0 (ocf::heartbeat:Filesystem): Stopped -- ocfs2_www:1 (ocf::heartbeat:Filesystem): Stopped -+ ocfs2_www:0 (ocf::heartbeat:Filesystem): Stopped -+ ocfs2_www:1 (ocf::heartbeat:Filesystem): Stopped - Clone Set: clone_webservice [group_webservice] -- Stopped: [ group_webservice:0 group_webservice:1 ] -+ Stopped: [ webcluster01 webcluster02 ] - Master/Slave Set: ms_drbd_mysql [drbd_mysql] -- Stopped: [ drbd_mysql:0 drbd_mysql:1 ] -- fs_mysql (ocf::heartbeat:Filesystem): Stopped -+ Stopped: [ webcluster01 webcluster02 ] -+ fs_mysql (ocf::heartbeat:Filesystem): Stopped - - Transition Summary: - * Start extip_1 (webcluster01) -@@ -73,23 +73,23 @@ Revised cluster status: - Online: [ webcluster01 ] - OFFLINE: [ webcluster02 ] - -- mysql-server (ocf::heartbeat:mysql): Stopped -+ mysql-server (ocf::heartbeat:mysql): Stopped - extip_1 (ocf::heartbeat:IPaddr2): Started webcluster01 - extip_2 (ocf::heartbeat:IPaddr2): Started webcluster01 - Resource Group: group_main -- intip_0_main (ocf::heartbeat:IPaddr2): Stopped -+ intip_0_main (ocf::heartbeat:IPaddr2): Stopped - intip_1_master (ocf::heartbeat:IPaddr2): Started webcluster01 - intip_2_slave (ocf::heartbeat:IPaddr2): Started webcluster01 - Master/Slave Set: ms_drbd_www [drbd_www] - Slaves: [ webcluster01 ] -- Stopped: [ drbd_www:1 ] -+ Stopped: [ webcluster02 ] - Clone Set: clone_ocfs2_www [ocfs2_www] (unique) -- ocfs2_www:0 (ocf::heartbeat:Filesystem): Stopped -- ocfs2_www:1 (ocf::heartbeat:Filesystem): Stopped -+ ocfs2_www:0 (ocf::heartbeat:Filesystem): Stopped -+ ocfs2_www:1 (ocf::heartbeat:Filesystem): Stopped - Clone Set: clone_webservice [group_webservice] -- Stopped: [ group_webservice:0 group_webservice:1 ] -+ Stopped: [ webcluster01 webcluster02 ] - Master/Slave Set: ms_drbd_mysql [drbd_mysql] - Slaves: [ webcluster01 ] -- Stopped: [ drbd_mysql:1 ] -- fs_mysql (ocf::heartbeat:Filesystem): Stopped -+ Stopped: [ webcluster02 ] -+ fs_mysql (ocf::heartbeat:Filesystem): Stopped - -diff --git a/pengine/test10/master-probed-score.summary b/pengine/test10/master-probed-score.summary -index faf4824..a634efd 100644 ---- a/pengine/test10/master-probed-score.summary -+++ b/pengine/test10/master-probed-score.summary -@@ -3,7 +3,7 @@ Current cluster status: - Online: [ hypatia-corosync.nevis.columbia.edu orestes-corosync.nevis.columbia.edu ] - - Master/Slave Set: AdminClone [AdminDrbd] -- Stopped: [ AdminDrbd:0 AdminDrbd:1 ] -+ Stopped: [ hypatia-corosync.nevis.columbia.edu orestes-corosync.nevis.columbia.edu ] - CronAmbientTemperature (ocf::heartbeat:symlink): Stopped - StonithHypatia (stonith:fence_nut): Stopped - StonithOrestes (stonith:fence_nut): Stopped -@@ -14,7 +14,7 @@ Online: [ hypatia-corosync.nevis.columbia.edu orestes-corosync.nevis.columbia.ed - Dhcpd (lsb:dhcpd): Stopped - DhcpIP (ocf::heartbeat:IPaddr2): Stopped - Clone Set: CupsClone [CupsGroup] -- Stopped: [ CupsGroup:0 CupsGroup:1 ] -+ Stopped: [ hypatia-corosync.nevis.columbia.edu orestes-corosync.nevis.columbia.edu ] - Clone Set: IPClone [IPGroup] (unique) - Resource Group: IPGroup:0 - ClusterIP:0 (ocf::heartbeat:IPaddr2): Stopped -@@ -25,13 +25,13 @@ Online: [ hypatia-corosync.nevis.columbia.edu orestes-corosync.nevis.columbia.ed - ClusterIPLocal:1 (ocf::heartbeat:IPaddr2): Stopped - ClusterIPSandbox:1 (ocf::heartbeat:IPaddr2): Stopped - Clone Set: LibvirtdClone [LibvirtdGroup] -- Stopped: [ LibvirtdGroup:0 LibvirtdGroup:1 ] -+ Stopped: [ hypatia-corosync.nevis.columbia.edu orestes-corosync.nevis.columbia.edu ] - Clone Set: TftpClone [TftpGroup] -- Stopped: [ TftpGroup:0 TftpGroup:1 ] -+ Stopped: [ hypatia-corosync.nevis.columbia.edu orestes-corosync.nevis.columbia.edu ] - Clone Set: ExportsClone [ExportsGroup] -- Stopped: [ ExportsGroup:0 ExportsGroup:1 ] -+ Stopped: [ hypatia-corosync.nevis.columbia.edu orestes-corosync.nevis.columbia.edu ] - Clone Set: FilesystemClone [FilesystemGroup] -- Stopped: [ FilesystemGroup:0 FilesystemGroup:1 ] -+ Stopped: [ hypatia-corosync.nevis.columbia.edu orestes-corosync.nevis.columbia.edu ] - KVM-guest (ocf::heartbeat:VirtualDomain): Stopped - Proxy (ocf::heartbeat:VirtualDomain): Stopped - -diff --git a/pengine/test10/master-pseudo.summary b/pengine/test10/master-pseudo.summary -index 4ac7605..d480fb8 100644 ---- a/pengine/test10/master-pseudo.summary -+++ b/pengine/test10/master-pseudo.summary -@@ -6,6 +6,7 @@ Online: [ sambuca.linbit ] - ip_float_right (ocf::heartbeat:IPaddr2): Stopped - Master/Slave Set: ms_drbd_float [drbd_float] - Slaves: [ sambuca.linbit ] -+ Stopped: [ raki.linbit ] - Resource Group: nfsexport - ip_nfs (ocf::heartbeat:IPaddr2): Stopped - fs_float (ocf::heartbeat:Filesystem): Stopped -@@ -54,6 +55,7 @@ Online: [ sambuca.linbit ] - ip_float_right (ocf::heartbeat:IPaddr2): Started sambuca.linbit - Master/Slave Set: ms_drbd_float [drbd_float] - Masters: [ sambuca.linbit ] -+ Stopped: [ raki.linbit ] - Resource Group: nfsexport - ip_nfs (ocf::heartbeat:IPaddr2): Started sambuca.linbit - fs_float (ocf::heartbeat:Filesystem): Stopped -diff --git a/pengine/test10/master-reattach.summary b/pengine/test10/master-reattach.summary -index 8afe6a9..2e7f492 100644 ---- a/pengine/test10/master-reattach.summary -+++ b/pengine/test10/master-reattach.summary -@@ -3,8 +3,8 @@ Current cluster status: - Online: [ dktest1 dktest2 ] - - Master/Slave Set: ms-drbd1 [drbd1] (unmanaged) -- drbd1:0 (ocf::heartbeat:drbd): Master dktest1 (unmanaged) -- drbd1:1 (ocf::heartbeat:drbd): Slave dktest2 (unmanaged) -+ drbd1 (ocf::heartbeat:drbd): Master dktest1 (unmanaged) -+ drbd1 (ocf::heartbeat:drbd): Slave dktest2 (unmanaged) - Resource Group: apache - apache-vip (ocf::heartbeat:IPaddr2): Started dktest1 (unmanaged) - mount (ocf::heartbeat:Filesystem): Started dktest1 (unmanaged) -@@ -23,8 +23,8 @@ Revised cluster status: - Online: [ dktest1 dktest2 ] - - Master/Slave Set: ms-drbd1 [drbd1] (unmanaged) -- drbd1:0 (ocf::heartbeat:drbd): Master dktest1 (unmanaged) -- drbd1:1 (ocf::heartbeat:drbd): Slave dktest2 (unmanaged) -+ drbd1 (ocf::heartbeat:drbd): Master dktest1 (unmanaged) -+ drbd1 (ocf::heartbeat:drbd): Slave dktest2 (unmanaged) - Resource Group: apache - apache-vip (ocf::heartbeat:IPaddr2): Started dktest1 (unmanaged) - mount (ocf::heartbeat:Filesystem): Started dktest1 (unmanaged) -diff --git a/pengine/test10/master-unmanaged-monitor.summary b/pengine/test10/master-unmanaged-monitor.summary -index 0a5712d..369dcde 100644 ---- a/pengine/test10/master-unmanaged-monitor.summary -+++ b/pengine/test10/master-unmanaged-monitor.summary -@@ -3,7 +3,7 @@ Current cluster status: - Online: [ pcmk-1 pcmk-2 pcmk-3 pcmk-4 ] - - Clone Set: Fencing [FencingChild] (unmanaged) -- Stopped: [ FencingChild:0 FencingChild:1 FencingChild:2 FencingChild:3 ] -+ Stopped: [ pcmk-1 pcmk-2 pcmk-3 pcmk-4 ] - Resource Group: group-1 - r192.168.122.112 (ocf::heartbeat:IPaddr): Started pcmk-3 (unmanaged) - r192.168.122.113 (ocf::heartbeat:IPaddr): Started pcmk-3 (unmanaged) -@@ -15,15 +15,15 @@ Online: [ pcmk-1 pcmk-2 pcmk-3 pcmk-4 ] - lsb-dummy (lsb:/usr/share/pacemaker/tests/cts/LSBDummy): Started pcmk-3 (unmanaged) - migrator (ocf::pacemaker:Dummy): Started pcmk-4 (unmanaged) - Clone Set: Connectivity [ping-1] (unmanaged) -- ping-1:0 (ocf::pacemaker:ping): Started pcmk-2 (unmanaged) -- ping-1:1 (ocf::pacemaker:ping): Started pcmk-3 (unmanaged) -- ping-1:2 (ocf::pacemaker:ping): Started pcmk-4 (unmanaged) -- ping-1:3 (ocf::pacemaker:ping): Started pcmk-1 (unmanaged) -+ ping-1 (ocf::pacemaker:ping): Started pcmk-2 (unmanaged) -+ ping-1 (ocf::pacemaker:ping): Started pcmk-3 (unmanaged) -+ ping-1 (ocf::pacemaker:ping): Started pcmk-4 (unmanaged) -+ ping-1 (ocf::pacemaker:ping): Started pcmk-1 (unmanaged) - Master/Slave Set: master-1 [stateful-1] (unmanaged) -- stateful-1:0 (ocf::pacemaker:Stateful): Slave pcmk-2 (unmanaged) -- stateful-1:1 (ocf::pacemaker:Stateful): Master pcmk-3 (unmanaged) -- stateful-1:2 (ocf::pacemaker:Stateful): Slave pcmk-4 (unmanaged) -- Stopped: [ stateful-1:3 ] -+ stateful-1 (ocf::pacemaker:Stateful): Slave pcmk-2 (unmanaged) -+ stateful-1 (ocf::pacemaker:Stateful): Master pcmk-3 (unmanaged) -+ stateful-1 (ocf::pacemaker:Stateful): Slave pcmk-4 (unmanaged) -+ Stopped: [ pcmk-1 ] - - Transition Summary: - -@@ -44,7 +44,7 @@ Revised cluster status: - Online: [ pcmk-1 pcmk-2 pcmk-3 pcmk-4 ] - - Clone Set: Fencing [FencingChild] (unmanaged) -- Stopped: [ FencingChild:0 FencingChild:1 FencingChild:2 FencingChild:3 ] -+ Stopped: [ pcmk-1 pcmk-2 pcmk-3 pcmk-4 ] - Resource Group: group-1 - r192.168.122.112 (ocf::heartbeat:IPaddr): Started pcmk-3 (unmanaged) - r192.168.122.113 (ocf::heartbeat:IPaddr): Started pcmk-3 (unmanaged) -@@ -56,13 +56,13 @@ Online: [ pcmk-1 pcmk-2 pcmk-3 pcmk-4 ] - lsb-dummy (lsb:/usr/share/pacemaker/tests/cts/LSBDummy): Started pcmk-3 (unmanaged) - migrator (ocf::pacemaker:Dummy): Started pcmk-4 (unmanaged) - Clone Set: Connectivity [ping-1] (unmanaged) -- ping-1:0 (ocf::pacemaker:ping): Started pcmk-2 (unmanaged) -- ping-1:1 (ocf::pacemaker:ping): Started pcmk-3 (unmanaged) -- ping-1:2 (ocf::pacemaker:ping): Started pcmk-4 (unmanaged) -- ping-1:3 (ocf::pacemaker:ping): Started pcmk-1 (unmanaged) -+ ping-1 (ocf::pacemaker:ping): Started pcmk-2 (unmanaged) -+ ping-1 (ocf::pacemaker:ping): Started pcmk-3 (unmanaged) -+ ping-1 (ocf::pacemaker:ping): Started pcmk-4 (unmanaged) -+ ping-1 (ocf::pacemaker:ping): Started pcmk-1 (unmanaged) - Master/Slave Set: master-1 [stateful-1] (unmanaged) -- stateful-1:0 (ocf::pacemaker:Stateful): Slave pcmk-2 (unmanaged) -- stateful-1:1 (ocf::pacemaker:Stateful): Master pcmk-3 (unmanaged) -- stateful-1:2 (ocf::pacemaker:Stateful): Slave pcmk-4 (unmanaged) -- Stopped: [ stateful-1:3 ] -+ stateful-1 (ocf::pacemaker:Stateful): Slave pcmk-2 (unmanaged) -+ stateful-1 (ocf::pacemaker:Stateful): Master pcmk-3 (unmanaged) -+ stateful-1 (ocf::pacemaker:Stateful): Slave pcmk-4 (unmanaged) -+ Stopped: [ pcmk-1 ] - -diff --git a/pengine/test10/master_monitor_restart.summary b/pengine/test10/master_monitor_restart.summary -index 2eed0a5..05b6460 100644 ---- a/pengine/test10/master_monitor_restart.summary -+++ b/pengine/test10/master_monitor_restart.summary -@@ -5,7 +5,7 @@ Online: [ node1 ] - - Master/Slave Set: MS_RSC [MS_RSC_NATIVE] - Masters: [ node1 ] -- Stopped: [ MS_RSC_NATIVE:1 ] -+ Stopped: [ node2 ] - - Transition Summary: - -@@ -18,5 +18,5 @@ Online: [ node1 ] - - Master/Slave Set: MS_RSC [MS_RSC_NATIVE] - Masters: [ node1 ] -- Stopped: [ MS_RSC_NATIVE:1 ] -+ Stopped: [ node2 ] - -diff --git a/pengine/test10/migrate-5.summary b/pengine/test10/migrate-5.summary -index 726f1f2..f9248ac 100644 ---- a/pengine/test10/migrate-5.summary -+++ b/pengine/test10/migrate-5.summary -@@ -30,5 +30,5 @@ Online: [ dom0-01 ] - domU-test01 (ocf::heartbeat:Xen): Started dom0-01 - Clone Set: clone-dom0-iscsi1 [dom0-iscsi1] - Started: [ dom0-01 ] -- Stopped: [ dom0-iscsi1:1 ] -+ Stopped: [ dom0-02 ] - -diff --git a/pengine/test10/migrate-fencing.summary b/pengine/test10/migrate-fencing.summary -index fbe5b25..920e754 100644 ---- a/pengine/test10/migrate-fencing.summary -+++ b/pengine/test10/migrate-fencing.summary -@@ -86,7 +86,7 @@ OFFLINE: [ pcmk-4 ] - - Clone Set: Fencing [FencingChild] - Started: [ pcmk-1 pcmk-2 pcmk-3 ] -- Stopped: [ FencingChild:3 ] -+ Stopped: [ pcmk-4 ] - Resource Group: group-1 - r192.168.101.181 (ocf::heartbeat:IPaddr): Started pcmk-1 - r192.168.101.182 (ocf::heartbeat:IPaddr): Started pcmk-1 -@@ -99,9 +99,9 @@ OFFLINE: [ pcmk-4 ] - migrator (ocf::pacemaker:Dummy): Started pcmk-3 - Clone Set: Connectivity [ping-1] - Started: [ pcmk-1 pcmk-2 pcmk-3 ] -- Stopped: [ ping-1:3 ] -+ Stopped: [ pcmk-4 ] - Master/Slave Set: master-1 [stateful-1] - Masters: [ pcmk-1 ] - Slaves: [ pcmk-2 pcmk-3 ] -- Stopped: [ stateful-1:3 ] -+ Stopped: [ pcmk-4 ] - -diff --git a/pengine/test10/migrate-partial-3.summary b/pengine/test10/migrate-partial-3.summary -index 8cdb66b..cf27c9f 100644 ---- a/pengine/test10/migrate-partial-3.summary -+++ b/pengine/test10/migrate-partial-3.summary -@@ -6,7 +6,7 @@ OFFLINE: [ hex-15 ] - test-vm (ocf::heartbeat:Xen): Started hex-14 FAILED - Clone Set: c-clusterfs [dlm] - Started: [ hex-13 hex-14 ] -- Stopped: [ dlm:2 ] -+ Stopped: [ hex-15 ] - - Transition Summary: - * Recover test-vm (Started hex-14 -> hex-13) -@@ -26,5 +26,5 @@ OFFLINE: [ hex-15 ] - test-vm (ocf::heartbeat:Xen): Started hex-13 - Clone Set: c-clusterfs [dlm] - Started: [ hex-13 hex-14 ] -- Stopped: [ dlm:2 ] -+ Stopped: [ hex-15 ] - -diff --git a/pengine/test10/migrate-partial-4.summary b/pengine/test10/migrate-partial-4.summary -index 8d4fa3b..1cf119a 100644 ---- a/pengine/test10/migrate-partial-4.summary -+++ b/pengine/test10/migrate-partial-4.summary -@@ -23,27 +23,27 @@ Online: [ lustre01-left lustre02-left lustre03-left lustre04-left ] - ip-booth (ocf::heartbeat:IPaddr2): Started lustre02-left - boothd (ocf::pacemaker:booth-site): Started lustre02-left - Master/Slave Set: ms-drbd-mgs [drbd-mgs] -- Stopped: [ drbd-mgs:0 drbd-mgs:1 ] -+ Stopped: [ lustre01-left lustre02-left lustre03-left lustre04-left ] - Master/Slave Set: ms-drbd-testfs-mdt0000 [drbd-testfs-mdt0000] -- Stopped: [ drbd-testfs-mdt0000:0 ] -+ Stopped: [ lustre01-left lustre02-left lustre03-left lustre04-left ] - Master/Slave Set: ms-drbd-testfs-mdt0000-left [drbd-testfs-mdt0000-left] -- Stopped: [ drbd-testfs-mdt0000-left:0 drbd-testfs-mdt0000-left:1 ] -+ Stopped: [ lustre01-left lustre02-left lustre03-left lustre04-left ] - Master/Slave Set: ms-drbd-testfs-ost0000 [drbd-testfs-ost0000] -- Stopped: [ drbd-testfs-ost0000:0 ] -+ Stopped: [ lustre01-left lustre02-left lustre03-left lustre04-left ] - Master/Slave Set: ms-drbd-testfs-ost0000-left [drbd-testfs-ost0000-left] -- Stopped: [ drbd-testfs-ost0000-left:0 drbd-testfs-ost0000-left:1 ] -+ Stopped: [ lustre01-left lustre02-left lustre03-left lustre04-left ] - Master/Slave Set: ms-drbd-testfs-ost0001 [drbd-testfs-ost0001] -- Stopped: [ drbd-testfs-ost0001:0 ] -+ Stopped: [ lustre01-left lustre02-left lustre03-left lustre04-left ] - Master/Slave Set: ms-drbd-testfs-ost0001-left [drbd-testfs-ost0001-left] -- Stopped: [ drbd-testfs-ost0001-left:0 drbd-testfs-ost0001-left:1 ] -+ Stopped: [ lustre01-left lustre02-left lustre03-left lustre04-left ] - Master/Slave Set: ms-drbd-testfs-ost0002 [drbd-testfs-ost0002] -- Stopped: [ drbd-testfs-ost0002:0 ] -+ Stopped: [ lustre01-left lustre02-left lustre03-left lustre04-left ] - Master/Slave Set: ms-drbd-testfs-ost0002-left [drbd-testfs-ost0002-left] -- Stopped: [ drbd-testfs-ost0002-left:0 drbd-testfs-ost0002-left:1 ] -+ Stopped: [ lustre01-left lustre02-left lustre03-left lustre04-left ] - Master/Slave Set: ms-drbd-testfs-ost0003 [drbd-testfs-ost0003] -- Stopped: [ drbd-testfs-ost0003:0 ] -+ Stopped: [ lustre01-left lustre02-left lustre03-left lustre04-left ] - Master/Slave Set: ms-drbd-testfs-ost0003-left [drbd-testfs-ost0003-left] -- Stopped: [ drbd-testfs-ost0003-left:0 drbd-testfs-ost0003-left:1 ] -+ Stopped: [ lustre01-left lustre02-left lustre03-left lustre04-left ] - - Transition Summary: - * Start drbd-stacked (lustre02-left) -@@ -104,24 +104,25 @@ Online: [ lustre01-left lustre02-left lustre03-left lustre04-left ] - boothd (ocf::pacemaker:booth-site): Started lustre02-left - Master/Slave Set: ms-drbd-mgs [drbd-mgs] - Slaves: [ lustre01-left lustre02-left ] -+ Stopped: [ lustre03-left lustre04-left ] - Master/Slave Set: ms-drbd-testfs-mdt0000 [drbd-testfs-mdt0000] -- Stopped: [ drbd-testfs-mdt0000:0 ] -+ Stopped: [ lustre01-left lustre02-left lustre03-left lustre04-left ] - Master/Slave Set: ms-drbd-testfs-mdt0000-left [drbd-testfs-mdt0000-left] -- Stopped: [ drbd-testfs-mdt0000-left:0 drbd-testfs-mdt0000-left:1 ] -+ Stopped: [ lustre01-left lustre02-left lustre03-left lustre04-left ] - Master/Slave Set: ms-drbd-testfs-ost0000 [drbd-testfs-ost0000] -- Stopped: [ drbd-testfs-ost0000:0 ] -+ Stopped: [ lustre01-left lustre02-left lustre03-left lustre04-left ] - Master/Slave Set: ms-drbd-testfs-ost0000-left [drbd-testfs-ost0000-left] -- Stopped: [ drbd-testfs-ost0000-left:0 drbd-testfs-ost0000-left:1 ] -+ Stopped: [ lustre01-left lustre02-left lustre03-left lustre04-left ] - Master/Slave Set: ms-drbd-testfs-ost0001 [drbd-testfs-ost0001] -- Stopped: [ drbd-testfs-ost0001:0 ] -+ Stopped: [ lustre01-left lustre02-left lustre03-left lustre04-left ] - Master/Slave Set: ms-drbd-testfs-ost0001-left [drbd-testfs-ost0001-left] -- Stopped: [ drbd-testfs-ost0001-left:0 drbd-testfs-ost0001-left:1 ] -+ Stopped: [ lustre01-left lustre02-left lustre03-left lustre04-left ] - Master/Slave Set: ms-drbd-testfs-ost0002 [drbd-testfs-ost0002] -- Stopped: [ drbd-testfs-ost0002:0 ] -+ Stopped: [ lustre01-left lustre02-left lustre03-left lustre04-left ] - Master/Slave Set: ms-drbd-testfs-ost0002-left [drbd-testfs-ost0002-left] -- Stopped: [ drbd-testfs-ost0002-left:0 drbd-testfs-ost0002-left:1 ] -+ Stopped: [ lustre01-left lustre02-left lustre03-left lustre04-left ] - Master/Slave Set: ms-drbd-testfs-ost0003 [drbd-testfs-ost0003] -- Stopped: [ drbd-testfs-ost0003:0 ] -+ Stopped: [ lustre01-left lustre02-left lustre03-left lustre04-left ] - Master/Slave Set: ms-drbd-testfs-ost0003-left [drbd-testfs-ost0003-left] -- Stopped: [ drbd-testfs-ost0003-left:0 drbd-testfs-ost0003-left:1 ] -+ Stopped: [ lustre01-left lustre02-left lustre03-left lustre04-left ] - -diff --git a/pengine/test10/migrate-shutdown.summary b/pengine/test10/migrate-shutdown.summary -index 62eb906..e634a5c 100644 ---- a/pengine/test10/migrate-shutdown.summary -+++ b/pengine/test10/migrate-shutdown.summary -@@ -15,11 +15,11 @@ Online: [ pcmk-1 pcmk-2 pcmk-3 pcmk-4 ] - migrator (ocf::pacemaker:Dummy): Started pcmk-1 - Clone Set: Connectivity [ping-1] - Started: [ pcmk-1 pcmk-2 pcmk-4 ] -- Stopped: [ ping-1:3 ] -+ Stopped: [ pcmk-3 ] - Master/Slave Set: master-1 [stateful-1] - Masters: [ pcmk-2 ] - Slaves: [ pcmk-1 pcmk-4 ] -- Stopped: [ stateful-1:3 ] -+ Stopped: [ pcmk-3 ] - - Transition Summary: - * Stop Fencing (pcmk-1) -@@ -85,7 +85,7 @@ Online: [ pcmk-1 pcmk-2 pcmk-3 pcmk-4 ] - lsb-dummy (lsb:/usr/share/pacemaker/tests/cts/LSBDummy): Stopped - migrator (ocf::pacemaker:Dummy): Stopped - Clone Set: Connectivity [ping-1] -- Stopped: [ ping-1:0 ping-1:1 ping-1:2 ping-1:3 ] -+ Stopped: [ pcmk-1 pcmk-2 pcmk-3 pcmk-4 ] - Master/Slave Set: master-1 [stateful-1] -- Stopped: [ stateful-1:0 stateful-1:1 stateful-1:2 stateful-1:3 ] -+ Stopped: [ pcmk-1 pcmk-2 pcmk-3 pcmk-4 ] - -diff --git a/pengine/test10/migrate-start-complex.summary b/pengine/test10/migrate-start-complex.summary -index 7d3c329..9de9466 100644 ---- a/pengine/test10/migrate-start-complex.summary -+++ b/pengine/test10/migrate-start-complex.summary -@@ -6,9 +6,9 @@ Online: [ dom0-01 dom0-02 ] - domU-test01 (ocf::heartbeat:Xen): Started dom0-02 - Clone Set: clone-dom0-iscsi1 [dom0-iscsi1] - Started: [ dom0-02 ] -- Stopped: [ dom0-iscsi1:1 ] -+ Stopped: [ dom0-01 ] - Clone Set: clone-bottom [bottom] -- Stopped: [ bottom:0 bottom:1 ] -+ Stopped: [ dom0-01 dom0-02 ] - - Transition Summary: - * Move top (Started dom0-02 -> dom0-01) -diff --git a/pengine/test10/migrate-start.summary b/pengine/test10/migrate-start.summary -index 057b020..8bbe3e5 100644 ---- a/pengine/test10/migrate-start.summary -+++ b/pengine/test10/migrate-start.summary -@@ -5,7 +5,7 @@ Online: [ dom0-01 dom0-02 ] - domU-test01 (ocf::heartbeat:Xen): Started dom0-02 - Clone Set: clone-dom0-iscsi1 [dom0-iscsi1] - Started: [ dom0-02 ] -- Stopped: [ dom0-iscsi1:1 ] -+ Stopped: [ dom0-01 ] - - Transition Summary: - * Migrate domU-test01 (Started dom0-02 -> dom0-01) -diff --git a/pengine/test10/migrate-stop-complex.summary b/pengine/test10/migrate-stop-complex.summary -index 7cbc802..51cec76 100644 ---- a/pengine/test10/migrate-stop-complex.summary -+++ b/pengine/test10/migrate-stop-complex.summary -@@ -41,8 +41,8 @@ Online: [ dom0-01 ] - domU-test01 (ocf::heartbeat:Xen): Started dom0-01 - Clone Set: clone-dom0-iscsi1 [dom0-iscsi1] - Started: [ dom0-01 ] -- Stopped: [ dom0-iscsi1:1 ] -+ Stopped: [ dom0-02 ] - Clone Set: clone-bottom [bottom] - Started: [ dom0-01 ] -- Stopped: [ bottom:1 ] -+ Stopped: [ dom0-02 ] - -diff --git a/pengine/test10/migrate-stop-start-complex.summary b/pengine/test10/migrate-stop-start-complex.summary -index f0cf070..bb3babe 100644 ---- a/pengine/test10/migrate-stop-start-complex.summary -+++ b/pengine/test10/migrate-stop-start-complex.summary -@@ -9,7 +9,7 @@ Online: [ dom0-01 ] - Started: [ dom0-01 dom0-02 ] - Clone Set: clone-bottom [bottom] - Started: [ dom0-02 ] -- Stopped: [ bottom:1 ] -+ Stopped: [ dom0-01 ] - - Transition Summary: - * Restart top (Started dom0-01) -@@ -45,8 +45,8 @@ Online: [ dom0-01 ] - domU-test01 (ocf::heartbeat:Xen): Started dom0-01 - Clone Set: clone-dom0-iscsi1 [dom0-iscsi1] - Started: [ dom0-01 ] -- Stopped: [ dom0-iscsi1:1 ] -+ Stopped: [ dom0-02 ] - Clone Set: clone-bottom [bottom] - Started: [ dom0-01 ] -- Stopped: [ bottom:1 ] -+ Stopped: [ dom0-02 ] - -diff --git a/pengine/test10/migrate-stop.summary b/pengine/test10/migrate-stop.summary -index 726f1f2..f9248ac 100644 ---- a/pengine/test10/migrate-stop.summary -+++ b/pengine/test10/migrate-stop.summary -@@ -30,5 +30,5 @@ Online: [ dom0-01 ] - domU-test01 (ocf::heartbeat:Xen): Started dom0-01 - Clone Set: clone-dom0-iscsi1 [dom0-iscsi1] - Started: [ dom0-01 ] -- Stopped: [ dom0-iscsi1:1 ] -+ Stopped: [ dom0-02 ] - -diff --git a/pengine/test10/migrate-stop_start.summary b/pengine/test10/migrate-stop_start.summary -index cf843fd..bc4bb6a 100644 ---- a/pengine/test10/migrate-stop_start.summary -+++ b/pengine/test10/migrate-stop_start.summary -@@ -6,7 +6,7 @@ Online: [ dom0-01 ] - domU-test01 (ocf::heartbeat:Xen): Started dom0-02 - Clone Set: clone-dom0-iscsi1 [dom0-iscsi1] - Started: [ dom0-02 ] -- Stopped: [ dom0-iscsi1:1 ] -+ Stopped: [ dom0-01 ] - - Transition Summary: - * Move domU-test01 (Started dom0-02 -> dom0-01) -@@ -34,5 +34,5 @@ Online: [ dom0-01 ] - domU-test01 (ocf::heartbeat:Xen): Started dom0-01 - Clone Set: clone-dom0-iscsi1 [dom0-iscsi1] - Started: [ dom0-01 ] -- Stopped: [ dom0-iscsi1:1 ] -+ Stopped: [ dom0-02 ] - -diff --git a/pengine/test10/monitor-onfail-restart.xml b/pengine/test10/monitor-onfail-restart.xml -index c0e9b6e..beb68bc 100644 ---- a/pengine/test10/monitor-onfail-restart.xml -+++ b/pengine/test10/monitor-onfail-restart.xml -@@ -30,7 +30,7 @@ - - - -- -+ - - - -@@ -45,4 +45,4 @@ - - - -- -\ No newline at end of file -+ -diff --git a/pengine/test10/monitor-recovery.dot b/pengine/test10/monitor-recovery.dot -new file mode 100644 -index 0000000..13eab93 ---- /dev/null -+++ b/pengine/test10/monitor-recovery.dot -@@ -0,0 +1,10 @@ -+digraph "g" { -+"all_stopped" [ style=bold color="green" fontcolor="orange"] -+"cl_tomcat_stop_0" -> "cl_tomcat_stopped_0" [ style = bold] -+"cl_tomcat_stop_0" -> "d_tomcat_stop_0 CSE-1" [ style = bold] -+"cl_tomcat_stop_0" [ style=bold color="green" fontcolor="orange"] -+"cl_tomcat_stopped_0" [ style=bold color="green" fontcolor="orange"] -+"d_tomcat_stop_0 CSE-1" -> "all_stopped" [ style = bold] -+"d_tomcat_stop_0 CSE-1" -> "cl_tomcat_stopped_0" [ style = bold] -+"d_tomcat_stop_0 CSE-1" [ style=bold color="green" fontcolor="black"] -+} -diff --git a/pengine/test10/monitor-recovery.exp b/pengine/test10/monitor-recovery.exp -new file mode 100644 -index 0000000..546b9c6 ---- /dev/null -+++ b/pengine/test10/monitor-recovery.exp -@@ -0,0 +1,51 @@ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -diff --git a/pengine/test10/monitor-recovery.scores b/pengine/test10/monitor-recovery.scores -new file mode 100644 -index 0000000..93a0ed4 ---- /dev/null -+++ b/pengine/test10/monitor-recovery.scores -@@ -0,0 +1,21 @@ -+Allocation scores: -+clone_color: cl_tomcat allocation score on CSE-1: -INFINITY -+clone_color: cl_tomcat allocation score on CSE-2: 0 -+clone_color: d_tomcat:0 allocation score on CSE-1: -INFINITY -+clone_color: d_tomcat:0 allocation score on CSE-2: 0 -+clone_color: d_tomcat:1 allocation score on CSE-1: -INFINITY -+clone_color: d_tomcat:1 allocation score on CSE-2: 0 -+group_color: ip_11 allocation score on CSE-1: 0 -+group_color: ip_11 allocation score on CSE-2: 0 -+group_color: ip_19 allocation score on CSE-1: 0 -+group_color: ip_19 allocation score on CSE-2: 0 -+group_color: svc-cse allocation score on CSE-1: 0 -+group_color: svc-cse allocation score on CSE-2: 0 -+native_color: d_tomcat:0 allocation score on CSE-1: -INFINITY -+native_color: d_tomcat:0 allocation score on CSE-2: -INFINITY -+native_color: d_tomcat:1 allocation score on CSE-1: -INFINITY -+native_color: d_tomcat:1 allocation score on CSE-2: -INFINITY -+native_color: ip_11 allocation score on CSE-1: -INFINITY -+native_color: ip_11 allocation score on CSE-2: -INFINITY -+native_color: ip_19 allocation score on CSE-1: -INFINITY -+native_color: ip_19 allocation score on CSE-2: -INFINITY -diff --git a/pengine/test10/monitor-recovery.summary b/pengine/test10/monitor-recovery.summary -new file mode 100644 -index 0000000..cae6d07 ---- /dev/null -+++ b/pengine/test10/monitor-recovery.summary -@@ -0,0 +1,31 @@ -+ -+Current cluster status: -+Online: [ CSE-1 ] -+OFFLINE: [ CSE-2 ] -+ -+ Resource Group: svc-cse -+ ip_19 (ocf::heartbeat:IPaddr2): Stopped -+ ip_11 (ocf::heartbeat:IPaddr2): Stopped -+ Clone Set: cl_tomcat [d_tomcat] -+ Started: [ CSE-1 ] -+ Stopped: [ CSE-2 ] -+ -+Transition Summary: -+ * Stop d_tomcat:0 (CSE-1) -+ -+Executing cluster transition: -+ * Pseudo action: cl_tomcat_stop_0 -+ * Resource action: d_tomcat stop on CSE-1 -+ * Pseudo action: cl_tomcat_stopped_0 -+ * Pseudo action: all_stopped -+ -+Revised cluster status: -+Online: [ CSE-1 ] -+OFFLINE: [ CSE-2 ] -+ -+ Resource Group: svc-cse -+ ip_19 (ocf::heartbeat:IPaddr2): Stopped -+ ip_11 (ocf::heartbeat:IPaddr2): Stopped -+ Clone Set: cl_tomcat [d_tomcat] -+ Stopped: [ CSE-1 CSE-2 ] -+ -diff --git a/pengine/test10/monitor-recovery.xml b/pengine/test10/monitor-recovery.xml -new file mode 100644 -index 0000000..bc10ed1 ---- /dev/null -+++ b/pengine/test10/monitor-recovery.xml -@@ -0,0 +1,107 @@ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -diff --git a/pengine/test10/novell-239079.summary b/pengine/test10/novell-239079.summary -index 3745d37..1298acb 100644 ---- a/pengine/test10/novell-239079.summary -+++ b/pengine/test10/novell-239079.summary -@@ -4,7 +4,7 @@ Online: [ xen-1 xen-2 ] - - fs_1 (ocf::heartbeat:Filesystem): Stopped - Master/Slave Set: ms-drbd0 [drbd0] -- Stopped: [ drbd0:0 drbd0:1 ] -+ Stopped: [ xen-1 xen-2 ] - - Transition Summary: - * Start drbd0:0 (xen-1) -diff --git a/pengine/test10/novell-239082.summary b/pengine/test10/novell-239082.summary -index 80d2206..b2c28ca 100644 ---- a/pengine/test10/novell-239082.summary -+++ b/pengine/test10/novell-239082.summary -@@ -54,5 +54,5 @@ Online: [ xen-1 xen-2 ] - fs_1 (ocf::heartbeat:Filesystem): Started xen-2 - Master/Slave Set: ms-drbd0 [drbd0] - Masters: [ xen-2 ] -- Stopped: [ drbd0:1 ] -+ Stopped: [ xen-1 ] - -diff --git a/pengine/test10/novell-252693-2.summary b/pengine/test10/novell-252693-2.summary -index 5b314a2..73b95d7 100644 ---- a/pengine/test10/novell-252693-2.summary -+++ b/pengine/test10/novell-252693-2.summary -@@ -4,19 +4,19 @@ Online: [ node1 node2 ] - - Clone Set: stonithcloneset [stonithclone] - Started: [ node2 ] -- Stopped: [ stonithclone:1 ] -+ Stopped: [ node1 ] - Clone Set: evmsdcloneset [evmsdclone] - Started: [ node2 ] -- Stopped: [ evmsdclone:1 ] -+ Stopped: [ node1 ] - Clone Set: evmscloneset [evmsclone] - Started: [ node2 ] -- Stopped: [ evmsclone:1 ] -+ Stopped: [ node1 ] - Clone Set: imagestorecloneset [imagestoreclone] - Started: [ node2 ] -- Stopped: [ imagestoreclone:1 ] -+ Stopped: [ node1 ] - Clone Set: configstorecloneset [configstoreclone] - Started: [ node2 ] -- Stopped: [ configstoreclone:1 ] -+ Stopped: [ node1 ] - sles10 (ocf::heartbeat:Xen): Started node2 - - Transition Summary: -diff --git a/pengine/test10/novell-252693-3.summary b/pengine/test10/novell-252693-3.summary -index d8ddcd7..9d42229 100644 ---- a/pengine/test10/novell-252693-3.summary -+++ b/pengine/test10/novell-252693-3.summary -@@ -4,19 +4,19 @@ Online: [ node1 node2 ] - - Clone Set: stonithcloneset [stonithclone] - Started: [ node2 ] -- Stopped: [ stonithclone:1 ] -+ Stopped: [ node1 ] - Clone Set: evmsdcloneset [evmsdclone] - Started: [ node2 ] -- Stopped: [ evmsdclone:1 ] -+ Stopped: [ node1 ] - Clone Set: evmscloneset [evmsclone] - Started: [ node2 ] -- Stopped: [ evmsclone:1 ] -+ Stopped: [ node1 ] - Clone Set: imagestorecloneset [imagestoreclone] -- imagestoreclone:0 (ocf::heartbeat:Filesystem): Started node2 FAILED -- Stopped: [ imagestoreclone:1 ] -+ imagestoreclone (ocf::heartbeat:Filesystem): Started node2 FAILED -+ Stopped: [ node1 ] - Clone Set: configstorecloneset [configstoreclone] - Started: [ node2 ] -- Stopped: [ configstoreclone:1 ] -+ Stopped: [ node1 ] - sles10 (ocf::heartbeat:Xen): Started node2 - - Transition Summary: -diff --git a/pengine/test10/novell-252693.summary b/pengine/test10/novell-252693.summary -index 47087bb..8207d41 100644 ---- a/pengine/test10/novell-252693.summary -+++ b/pengine/test10/novell-252693.summary -@@ -72,18 +72,18 @@ Online: [ node1 node2 ] - - Clone Set: stonithcloneset [stonithclone] - Started: [ node2 ] -- Stopped: [ stonithclone:1 ] -+ Stopped: [ node1 ] - Clone Set: evmsdcloneset [evmsdclone] - Started: [ node2 ] -- Stopped: [ evmsdclone:1 ] -+ Stopped: [ node1 ] - Clone Set: evmscloneset [evmsclone] - Started: [ node2 ] -- Stopped: [ evmsclone:1 ] -+ Stopped: [ node1 ] - Clone Set: imagestorecloneset [imagestoreclone] - Started: [ node2 ] -- Stopped: [ imagestoreclone:1 ] -+ Stopped: [ node1 ] - Clone Set: configstorecloneset [configstoreclone] - Started: [ node2 ] -- Stopped: [ configstoreclone:1 ] -+ Stopped: [ node1 ] - sles10 (ocf::heartbeat:Xen): Started node2 - -diff --git a/pengine/test10/obsolete-lrm-resource.summary b/pengine/test10/obsolete-lrm-resource.summary -index f45bdd5..237c3b8 100644 ---- a/pengine/test10/obsolete-lrm-resource.summary -+++ b/pengine/test10/obsolete-lrm-resource.summary -@@ -3,7 +3,7 @@ Current cluster status: - Online: [ yingying.site ] - - Clone Set: rsc1 [rsc1_child] -- Stopped: [ rsc1_child:0 ] -+ Stopped: [ yingying.site ] - - Transition Summary: - * Start rsc1_child:0 (yingying.site) -diff --git a/pengine/test10/one-or-more-5.exp b/pengine/test10/one-or-more-5.exp -index ed11c12..67d1231 100644 ---- a/pengine/test10/one-or-more-5.exp -+++ b/pengine/test10/one-or-more-5.exp -@@ -1,7 +1,7 @@ - - - -- -+ - - - -@@ -11,13 +11,13 @@ - - - -- -+ - - - - - -- -+ - - - -@@ -26,20 +26,20 @@ - - - -- -+ - - - - - - -- -+ - - - - - -- -+ - - - -@@ -48,7 +48,7 @@ - - - -- -+ - - - -@@ -57,7 +57,7 @@ - - - -- -+ - - - -@@ -66,23 +66,23 @@ - - - -- -+ - - - - - - -- -+ - - -- -+ - - - - - -- -+ - - - -@@ -91,7 +91,7 @@ - - - -- -+ - - - -@@ -101,13 +101,13 @@ - - - -- -+ - -
- - - -- -+ - - - -@@ -116,40 +116,40 @@ - - - -- -+ - - - - - -- -+ - - -- -+ - - -- -+ - - -- -+ - - -- -+ - - -- -+ - - - - - -- -+ - - - - - -- -+ - - - -@@ -161,7 +161,7 @@ - - - -- -+ - - - -@@ -173,7 +173,7 @@ - - - -- -+ - - - -diff --git a/pengine/test10/order-clone.summary b/pengine/test10/order-clone.summary -index 4af7b7e..cb61fb0 100644 ---- a/pengine/test10/order-clone.summary -+++ b/pengine/test10/order-clone.summary -@@ -4,17 +4,17 @@ Online: [ hex-0 hex-7 hex-8 hex-9 ] - - fencing-sbd (stonith:external/sbd): Stopped - Clone Set: o2cb-clone [o2cb] -- Stopped: [ o2cb:0 o2cb:1 o2cb:2 o2cb:3 ] -+ Stopped: [ hex-0 hex-7 hex-8 hex-9 ] - Clone Set: vg1-clone [vg1] -- Stopped: [ vg1:0 vg1:1 vg1:2 vg1:3 ] -+ Stopped: [ hex-0 hex-7 hex-8 hex-9 ] - Clone Set: fs2-clone [ocfs2-2] -- Stopped: [ ocfs2-2:0 ocfs2-2:1 ocfs2-2:2 ocfs2-2:3 ] -+ Stopped: [ hex-0 hex-7 hex-8 hex-9 ] - Clone Set: fs1-clone [ocfs2-1] -- Stopped: [ ocfs2-1:0 ocfs2-1:1 ocfs2-1:2 ocfs2-1:3 ] -+ Stopped: [ hex-0 hex-7 hex-8 hex-9 ] - Clone Set: dlm-clone [dlm] -- Stopped: [ dlm:0 dlm:1 dlm:2 dlm:3 ] -+ Stopped: [ hex-0 hex-7 hex-8 hex-9 ] - Clone Set: clvm-clone [clvm] -- Stopped: [ clvm:0 clvm:1 clvm:2 clvm:3 ] -+ Stopped: [ hex-0 hex-7 hex-8 hex-9 ] - - Transition Summary: - * Start fencing-sbd (hex-0) -@@ -27,15 +27,15 @@ Online: [ hex-0 hex-7 hex-8 hex-9 ] - - fencing-sbd (stonith:external/sbd): Started hex-0 - Clone Set: o2cb-clone [o2cb] -- Stopped: [ o2cb:0 o2cb:1 o2cb:2 o2cb:3 ] -+ Stopped: [ hex-0 hex-7 hex-8 hex-9 ] - Clone Set: vg1-clone [vg1] -- Stopped: [ vg1:0 vg1:1 vg1:2 vg1:3 ] -+ Stopped: [ hex-0 hex-7 hex-8 hex-9 ] - Clone Set: fs2-clone [ocfs2-2] -- Stopped: [ ocfs2-2:0 ocfs2-2:1 ocfs2-2:2 ocfs2-2:3 ] -+ Stopped: [ hex-0 hex-7 hex-8 hex-9 ] - Clone Set: fs1-clone [ocfs2-1] -- Stopped: [ ocfs2-1:0 ocfs2-1:1 ocfs2-1:2 ocfs2-1:3 ] -+ Stopped: [ hex-0 hex-7 hex-8 hex-9 ] - Clone Set: dlm-clone [dlm] -- Stopped: [ dlm:0 dlm:1 dlm:2 dlm:3 ] -+ Stopped: [ hex-0 hex-7 hex-8 hex-9 ] - Clone Set: clvm-clone [clvm] -- Stopped: [ clvm:0 clvm:1 clvm:2 clvm:3 ] -+ Stopped: [ hex-0 hex-7 hex-8 hex-9 ] - -diff --git a/pengine/test10/order7.exp b/pengine/test10/order7.exp -index f8594a9..cc7cf86 100644 ---- a/pengine/test10/order7.exp -+++ b/pengine/test10/order7.exp -@@ -45,7 +45,7 @@ - - - -- -+ - - - -diff --git a/pengine/test10/order_constraint_stops_master.summary b/pengine/test10/order_constraint_stops_master.summary -index cbbe157..8170d30 100644 ---- a/pengine/test10/order_constraint_stops_master.summary -+++ b/pengine/test10/order_constraint_stops_master.summary -@@ -4,6 +4,7 @@ Online: [ fc16-builder fc16-builder2 ] - - Master/Slave Set: MASTER_RSC_A [NATIVE_RSC_A] - Masters: [ fc16-builder ] -+ Stopped: [ fc16-builder2 ] - NATIVE_RSC_B (ocf::pacemaker:Dummy): Started fc16-builder2 - - Transition Summary: -@@ -35,6 +36,6 @@ Revised cluster status: - Online: [ fc16-builder fc16-builder2 ] - - Master/Slave Set: MASTER_RSC_A [NATIVE_RSC_A] -- Stopped: [ NATIVE_RSC_A:0 ] -+ Stopped: [ fc16-builder fc16-builder2 ] - NATIVE_RSC_B (ocf::pacemaker:Dummy): Stopped - -diff --git a/pengine/test10/order_constraint_stops_slave.summary b/pengine/test10/order_constraint_stops_slave.summary -index 14478b0..5a67aee 100644 ---- a/pengine/test10/order_constraint_stops_slave.summary -+++ b/pengine/test10/order_constraint_stops_slave.summary -@@ -5,6 +5,7 @@ OFFLINE: [ fc16-builder2 ] - - Master/Slave Set: MASTER_RSC_A [NATIVE_RSC_A] - Slaves: [ fc16-builder ] -+ Stopped: [ fc16-builder2 ] - NATIVE_RSC_B (ocf::pacemaker:Dummy): Started fc16-builder - - Transition Summary: -@@ -28,6 +29,6 @@ Online: [ fc16-builder ] - OFFLINE: [ fc16-builder2 ] - - Master/Slave Set: MASTER_RSC_A [NATIVE_RSC_A] -- Stopped: [ NATIVE_RSC_A:0 ] -+ Stopped: [ fc16-builder fc16-builder2 ] - NATIVE_RSC_B (ocf::pacemaker:Dummy): Stopped - -diff --git a/pengine/test10/orphan-2.summary b/pengine/test10/orphan-2.summary -index e20bb18..dbbdcd0 100644 ---- a/pengine/test10/orphan-2.summary -+++ b/pengine/test10/orphan-2.summary -@@ -27,8 +27,8 @@ Executing cluster transition: - * Resource action: rsc_c001n01 monitor on c001n08 - * Resource action: rsc_c001n01 monitor on c001n03 - * Resource action: rsc_c001n01 monitor on c001n02 -- * Cluster action: clear_failcount on c001n08 -- * Cluster action: clear_failcount on c001n02 -+ * Cluster action: clear_failcount for rsc_c001n08 on c001n08 -+ * Cluster action: clear_failcount for rsc_c001n08 on c001n02 - * Pseudo action: probe_complete - * Resource action: rsc_c001n08 stop on c001n08 - * Resource action: rsc_c001n08 delete on c001n08 -diff --git a/pengine/test10/params-6.summary b/pengine/test10/params-6.summary -index 05367dd..78f98c0 100644 ---- a/pengine/test10/params-6.summary -+++ b/pengine/test10/params-6.summary -@@ -14,83 +14,73 @@ Online: [ mgmt01 v03-a v03-b ] - stonith-v03-d (stonith:fence_ipmilan): Stopped - Clone Set: cl-clvmd [clvmd] - Started: [ mgmt01 v03-a v03-b ] -- Stopped: [ clvmd:3 clvmd:4 clvmd:5 clvmd:6 clvmd:7 clvmd:8 ] - Clone Set: cl-dlm [dlm] - Started: [ mgmt01 v03-a v03-b ] -- Stopped: [ dlm:3 dlm:4 dlm:5 dlm:6 dlm:7 dlm:8 ] - Clone Set: cl-iscsid [iscsid] - Started: [ mgmt01 v03-a v03-b ] -- Stopped: [ iscsid:3 iscsid:4 iscsid:5 iscsid:6 iscsid:7 iscsid:8 ] - Clone Set: cl-libvirtd [libvirtd] - Started: [ v03-a v03-b ] -- Stopped: [ libvirtd:2 libvirtd:3 libvirtd:4 libvirtd:5 libvirtd:6 libvirtd:7 ] -+ Stopped: [ mgmt01 ] - Clone Set: cl-multipathd [multipathd] - Started: [ mgmt01 v03-a v03-b ] -- Stopped: [ multipathd:3 multipathd:4 multipathd:5 multipathd:6 multipathd:7 multipathd:8 ] - Clone Set: cl-node-params [node-params] - Started: [ v03-a v03-b ] -- Stopped: [ node-params:2 node-params:3 node-params:4 node-params:5 node-params:6 node-params:7 ] -+ Stopped: [ mgmt01 ] - Clone Set: cl-vlan1-if [vlan1-if] - Started: [ v03-a v03-b ] -- Stopped: [ vlan1-if:2 vlan1-if:3 vlan1-if:4 vlan1-if:5 vlan1-if:6 vlan1-if:7 ] -+ Stopped: [ mgmt01 ] - Clone Set: cl-vlan101-if [vlan101-if] - Started: [ v03-a v03-b ] -- Stopped: [ vlan101-if:2 vlan101-if:3 vlan101-if:4 vlan101-if:5 vlan101-if:6 vlan101-if:7 ] -+ Stopped: [ mgmt01 ] - Clone Set: cl-vlan102-if [vlan102-if] - Started: [ v03-a v03-b ] -- Stopped: [ vlan102-if:2 vlan102-if:3 vlan102-if:4 vlan102-if:5 vlan102-if:6 vlan102-if:7 ] -+ Stopped: [ mgmt01 ] - Clone Set: cl-vlan103-if [vlan103-if] - Started: [ v03-a v03-b ] -- Stopped: [ vlan103-if:2 vlan103-if:3 vlan103-if:4 vlan103-if:5 vlan103-if:6 vlan103-if:7 ] -+ Stopped: [ mgmt01 ] - Clone Set: cl-vlan104-if [vlan104-if] - Started: [ v03-a v03-b ] -- Stopped: [ vlan104-if:2 vlan104-if:3 vlan104-if:4 vlan104-if:5 vlan104-if:6 vlan104-if:7 ] -+ Stopped: [ mgmt01 ] - Clone Set: cl-vlan3-if [vlan3-if] - Started: [ v03-a v03-b ] -- Stopped: [ vlan3-if:2 vlan3-if:3 vlan3-if:4 vlan3-if:5 vlan3-if:6 vlan3-if:7 ] -+ Stopped: [ mgmt01 ] - Clone Set: cl-vlan4-if [vlan4-if] - Started: [ v03-a v03-b ] -- Stopped: [ vlan4-if:2 vlan4-if:3 vlan4-if:4 vlan4-if:5 vlan4-if:6 vlan4-if:7 ] -+ Stopped: [ mgmt01 ] - Clone Set: cl-vlan5-if [vlan5-if] - Started: [ v03-a v03-b ] -- Stopped: [ vlan5-if:2 vlan5-if:3 vlan5-if:4 vlan5-if:5 vlan5-if:6 vlan5-if:7 ] -+ Stopped: [ mgmt01 ] - Clone Set: cl-vlan900-if [vlan900-if] - Started: [ v03-a v03-b ] -- Stopped: [ vlan900-if:2 vlan900-if:3 vlan900-if:4 vlan900-if:5 vlan900-if:6 vlan900-if:7 ] -+ Stopped: [ mgmt01 ] - Clone Set: cl-vlan909-if [vlan909-if] - Started: [ v03-a v03-b ] -- Stopped: [ vlan909-if:2 vlan909-if:3 vlan909-if:4 vlan909-if:5 vlan909-if:6 vlan909-if:7 ] -+ Stopped: [ mgmt01 ] - Clone Set: cl-libvirt-images-fs [libvirt-images-fs] - Started: [ mgmt01 v03-a v03-b ] -- Stopped: [ libvirt-images-fs:3 libvirt-images-fs:4 libvirt-images-fs:5 libvirt-images-fs:6 libvirt-images-fs:7 libvirt-images-fs:8 ] - Clone Set: cl-libvirt-install-fs [libvirt-install-fs] - Started: [ mgmt01 v03-a v03-b ] -- Stopped: [ libvirt-install-fs:3 libvirt-install-fs:4 libvirt-install-fs:5 libvirt-install-fs:6 libvirt-install-fs:7 libvirt-install-fs:8 ] - Clone Set: cl-vds-ok-pool-0-iscsi [vds-ok-pool-0-iscsi] - Started: [ mgmt01 v03-a v03-b ] -- Stopped: [ vds-ok-pool-0-iscsi:3 vds-ok-pool-0-iscsi:4 vds-ok-pool-0-iscsi:5 vds-ok-pool-0-iscsi:6 vds-ok-pool-0-iscsi:7 vds-ok-pool-0-iscsi:8 ] - Clone Set: cl-vds-ok-pool-0-vg [vds-ok-pool-0-vg] - Started: [ mgmt01 v03-a v03-b ] -- Stopped: [ vds-ok-pool-0-vg:3 vds-ok-pool-0-vg:4 vds-ok-pool-0-vg:5 vds-ok-pool-0-vg:6 vds-ok-pool-0-vg:7 vds-ok-pool-0-vg:8 ] - Clone Set: cl-vds-ok-pool-1-iscsi [vds-ok-pool-1-iscsi] - Started: [ mgmt01 v03-a v03-b ] -- Stopped: [ vds-ok-pool-1-iscsi:3 vds-ok-pool-1-iscsi:4 vds-ok-pool-1-iscsi:5 vds-ok-pool-1-iscsi:6 vds-ok-pool-1-iscsi:7 vds-ok-pool-1-iscsi:8 ] - Clone Set: cl-vds-ok-pool-1-vg [vds-ok-pool-1-vg] - Started: [ mgmt01 v03-a v03-b ] -- Stopped: [ vds-ok-pool-1-vg:3 vds-ok-pool-1-vg:4 vds-ok-pool-1-vg:5 vds-ok-pool-1-vg:6 vds-ok-pool-1-vg:7 vds-ok-pool-1-vg:8 ] - Clone Set: cl-libvirt-images-pool [libvirt-images-pool] - Started: [ v03-a v03-b ] -- Stopped: [ libvirt-images-pool:2 libvirt-images-pool:3 libvirt-images-pool:4 libvirt-images-pool:5 libvirt-images-pool:6 libvirt-images-pool:7 ] -+ Stopped: [ mgmt01 ] - Clone Set: cl-vds-ok-pool-0-pool [vds-ok-pool-0-pool] - Started: [ v03-a v03-b ] -- Stopped: [ vds-ok-pool-0-pool:2 vds-ok-pool-0-pool:3 vds-ok-pool-0-pool:4 vds-ok-pool-0-pool:5 vds-ok-pool-0-pool:6 vds-ok-pool-0-pool:7 ] -+ Stopped: [ mgmt01 ] - Clone Set: cl-vds-ok-pool-1-pool [vds-ok-pool-1-pool] - Started: [ v03-a v03-b ] -- Stopped: [ vds-ok-pool-1-pool:2 vds-ok-pool-1-pool:3 vds-ok-pool-1-pool:4 vds-ok-pool-1-pool:5 vds-ok-pool-1-pool:6 vds-ok-pool-1-pool:7 ] -+ Stopped: [ mgmt01 ] - git.vds-ok.com-vm (ocf::vds-ok:VirtualDomain): Stopped - Clone Set: cl-libvirt-qpid [libvirt-qpid] - Started: [ v03-a v03-b ] -- Stopped: [ libvirt-qpid:2 libvirt-qpid:3 libvirt-qpid:4 libvirt-qpid:5 libvirt-qpid:6 libvirt-qpid:7 ] -+ Stopped: [ mgmt01 ] - vd01-a.cdev.ttc.prague.cz.vds-ok.com-vm (ocf::vds-ok:VirtualDomain): Started v03-b - vd01-b.cdev.ttc.prague.cz.vds-ok.com-vm (ocf::vds-ok:VirtualDomain): Started v03-b - vd01-c.cdev.ttc.prague.cz.vds-ok.com-vm (ocf::vds-ok:VirtualDomain): Started v03-a -@@ -112,7 +102,7 @@ Online: [ mgmt01 v03-a v03-b ] - zakaz.transferrus.ru-vm (ocf::vds-ok:VirtualDomain): Stopped - Clone Set: cl-vlan200-if [vlan200-if] - Started: [ v03-a v03-b ] -- Stopped: [ vlan200-if:2 vlan200-if:3 vlan200-if:4 vlan200-if:5 vlan200-if:6 vlan200-if:7 ] -+ Stopped: [ mgmt01 ] - anbriz-gw-vm (ocf::vds-ok:VirtualDomain): Stopped - anbriz-work-vm (ocf::vds-ok:VirtualDomain): Stopped - lenny-x32-devel-vm (ocf::vds-ok:VirtualDomain): Started v03-a -@@ -187,7 +177,7 @@ Online: [ mgmt01 v03-a v03-b ] - c5-x64-devel.vds-ok.com-vm (ocf::vds-ok:VirtualDomain): Started v03-a - Clone Set: cl-mcast-test-net [mcast-test-net] - Started: [ v03-a v03-b ] -- Stopped: [ mcast-test-net:2 mcast-test-net:3 mcast-test-net:4 mcast-test-net:5 mcast-test-net:6 mcast-test-net:7 ] -+ Stopped: [ mgmt01 ] - dist.fly-uni.org-vm (ocf::vds-ok:VirtualDomain): Stopped - - Transition Summary: -@@ -217,83 +207,73 @@ Online: [ mgmt01 v03-a v03-b ] - stonith-v03-d (stonith:fence_ipmilan): Stopped - Clone Set: cl-clvmd [clvmd] - Started: [ mgmt01 v03-a v03-b ] -- Stopped: [ clvmd:3 clvmd:4 clvmd:5 clvmd:6 clvmd:7 clvmd:8 ] - Clone Set: cl-dlm [dlm] - Started: [ mgmt01 v03-a v03-b ] -- Stopped: [ dlm:3 dlm:4 dlm:5 dlm:6 dlm:7 dlm:8 ] - Clone Set: cl-iscsid [iscsid] - Started: [ mgmt01 v03-a v03-b ] -- Stopped: [ iscsid:3 iscsid:4 iscsid:5 iscsid:6 iscsid:7 iscsid:8 ] - Clone Set: cl-libvirtd [libvirtd] - Started: [ v03-a v03-b ] -- Stopped: [ libvirtd:2 libvirtd:3 libvirtd:4 libvirtd:5 libvirtd:6 libvirtd:7 ] -+ Stopped: [ mgmt01 ] - Clone Set: cl-multipathd [multipathd] - Started: [ mgmt01 v03-a v03-b ] -- Stopped: [ multipathd:3 multipathd:4 multipathd:5 multipathd:6 multipathd:7 multipathd:8 ] - Clone Set: cl-node-params [node-params] - Started: [ v03-a v03-b ] -- Stopped: [ node-params:2 node-params:3 node-params:4 node-params:5 node-params:6 node-params:7 ] -+ Stopped: [ mgmt01 ] - Clone Set: cl-vlan1-if [vlan1-if] - Started: [ v03-a v03-b ] -- Stopped: [ vlan1-if:2 vlan1-if:3 vlan1-if:4 vlan1-if:5 vlan1-if:6 vlan1-if:7 ] -+ Stopped: [ mgmt01 ] - Clone Set: cl-vlan101-if [vlan101-if] - Started: [ v03-a v03-b ] -- Stopped: [ vlan101-if:2 vlan101-if:3 vlan101-if:4 vlan101-if:5 vlan101-if:6 vlan101-if:7 ] -+ Stopped: [ mgmt01 ] - Clone Set: cl-vlan102-if [vlan102-if] - Started: [ v03-a v03-b ] -- Stopped: [ vlan102-if:2 vlan102-if:3 vlan102-if:4 vlan102-if:5 vlan102-if:6 vlan102-if:7 ] -+ Stopped: [ mgmt01 ] - Clone Set: cl-vlan103-if [vlan103-if] - Started: [ v03-a v03-b ] -- Stopped: [ vlan103-if:2 vlan103-if:3 vlan103-if:4 vlan103-if:5 vlan103-if:6 vlan103-if:7 ] -+ Stopped: [ mgmt01 ] - Clone Set: cl-vlan104-if [vlan104-if] - Started: [ v03-a v03-b ] -- Stopped: [ vlan104-if:2 vlan104-if:3 vlan104-if:4 vlan104-if:5 vlan104-if:6 vlan104-if:7 ] -+ Stopped: [ mgmt01 ] - Clone Set: cl-vlan3-if [vlan3-if] - Started: [ v03-a v03-b ] -- Stopped: [ vlan3-if:2 vlan3-if:3 vlan3-if:4 vlan3-if:5 vlan3-if:6 vlan3-if:7 ] -+ Stopped: [ mgmt01 ] - Clone Set: cl-vlan4-if [vlan4-if] - Started: [ v03-a v03-b ] -- Stopped: [ vlan4-if:2 vlan4-if:3 vlan4-if:4 vlan4-if:5 vlan4-if:6 vlan4-if:7 ] -+ Stopped: [ mgmt01 ] - Clone Set: cl-vlan5-if [vlan5-if] - Started: [ v03-a v03-b ] -- Stopped: [ vlan5-if:2 vlan5-if:3 vlan5-if:4 vlan5-if:5 vlan5-if:6 vlan5-if:7 ] -+ Stopped: [ mgmt01 ] - Clone Set: cl-vlan900-if [vlan900-if] - Started: [ v03-a v03-b ] -- Stopped: [ vlan900-if:2 vlan900-if:3 vlan900-if:4 vlan900-if:5 vlan900-if:6 vlan900-if:7 ] -+ Stopped: [ mgmt01 ] - Clone Set: cl-vlan909-if [vlan909-if] - Started: [ v03-a v03-b ] -- Stopped: [ vlan909-if:2 vlan909-if:3 vlan909-if:4 vlan909-if:5 vlan909-if:6 vlan909-if:7 ] -+ Stopped: [ mgmt01 ] - Clone Set: cl-libvirt-images-fs [libvirt-images-fs] - Started: [ mgmt01 v03-a v03-b ] -- Stopped: [ libvirt-images-fs:3 libvirt-images-fs:4 libvirt-images-fs:5 libvirt-images-fs:6 libvirt-images-fs:7 libvirt-images-fs:8 ] - Clone Set: cl-libvirt-install-fs [libvirt-install-fs] - Started: [ mgmt01 v03-a v03-b ] -- Stopped: [ libvirt-install-fs:3 libvirt-install-fs:4 libvirt-install-fs:5 libvirt-install-fs:6 libvirt-install-fs:7 libvirt-install-fs:8 ] - Clone Set: cl-vds-ok-pool-0-iscsi [vds-ok-pool-0-iscsi] - Started: [ mgmt01 v03-a v03-b ] -- Stopped: [ vds-ok-pool-0-iscsi:3 vds-ok-pool-0-iscsi:4 vds-ok-pool-0-iscsi:5 vds-ok-pool-0-iscsi:6 vds-ok-pool-0-iscsi:7 vds-ok-pool-0-iscsi:8 ] - Clone Set: cl-vds-ok-pool-0-vg [vds-ok-pool-0-vg] - Started: [ mgmt01 v03-a v03-b ] -- Stopped: [ vds-ok-pool-0-vg:3 vds-ok-pool-0-vg:4 vds-ok-pool-0-vg:5 vds-ok-pool-0-vg:6 vds-ok-pool-0-vg:7 vds-ok-pool-0-vg:8 ] - Clone Set: cl-vds-ok-pool-1-iscsi [vds-ok-pool-1-iscsi] - Started: [ mgmt01 v03-a v03-b ] -- Stopped: [ vds-ok-pool-1-iscsi:3 vds-ok-pool-1-iscsi:4 vds-ok-pool-1-iscsi:5 vds-ok-pool-1-iscsi:6 vds-ok-pool-1-iscsi:7 vds-ok-pool-1-iscsi:8 ] - Clone Set: cl-vds-ok-pool-1-vg [vds-ok-pool-1-vg] - Started: [ mgmt01 v03-a v03-b ] -- Stopped: [ vds-ok-pool-1-vg:3 vds-ok-pool-1-vg:4 vds-ok-pool-1-vg:5 vds-ok-pool-1-vg:6 vds-ok-pool-1-vg:7 vds-ok-pool-1-vg:8 ] - Clone Set: cl-libvirt-images-pool [libvirt-images-pool] - Started: [ v03-a v03-b ] -- Stopped: [ libvirt-images-pool:2 libvirt-images-pool:3 libvirt-images-pool:4 libvirt-images-pool:5 libvirt-images-pool:6 libvirt-images-pool:7 ] -+ Stopped: [ mgmt01 ] - Clone Set: cl-vds-ok-pool-0-pool [vds-ok-pool-0-pool] - Started: [ v03-a v03-b ] -- Stopped: [ vds-ok-pool-0-pool:2 vds-ok-pool-0-pool:3 vds-ok-pool-0-pool:4 vds-ok-pool-0-pool:5 vds-ok-pool-0-pool:6 vds-ok-pool-0-pool:7 ] -+ Stopped: [ mgmt01 ] - Clone Set: cl-vds-ok-pool-1-pool [vds-ok-pool-1-pool] - Started: [ v03-a v03-b ] -- Stopped: [ vds-ok-pool-1-pool:2 vds-ok-pool-1-pool:3 vds-ok-pool-1-pool:4 vds-ok-pool-1-pool:5 vds-ok-pool-1-pool:6 vds-ok-pool-1-pool:7 ] -+ Stopped: [ mgmt01 ] - git.vds-ok.com-vm (ocf::vds-ok:VirtualDomain): Stopped - Clone Set: cl-libvirt-qpid [libvirt-qpid] - Started: [ v03-a v03-b ] -- Stopped: [ libvirt-qpid:2 libvirt-qpid:3 libvirt-qpid:4 libvirt-qpid:5 libvirt-qpid:6 libvirt-qpid:7 ] -+ Stopped: [ mgmt01 ] - vd01-a.cdev.ttc.prague.cz.vds-ok.com-vm (ocf::vds-ok:VirtualDomain): Started v03-b - vd01-b.cdev.ttc.prague.cz.vds-ok.com-vm (ocf::vds-ok:VirtualDomain): Started v03-b - vd01-c.cdev.ttc.prague.cz.vds-ok.com-vm (ocf::vds-ok:VirtualDomain): Started v03-a -@@ -315,7 +295,7 @@ Online: [ mgmt01 v03-a v03-b ] - zakaz.transferrus.ru-vm (ocf::vds-ok:VirtualDomain): Stopped - Clone Set: cl-vlan200-if [vlan200-if] - Started: [ v03-a v03-b ] -- Stopped: [ vlan200-if:2 vlan200-if:3 vlan200-if:4 vlan200-if:5 vlan200-if:6 vlan200-if:7 ] -+ Stopped: [ mgmt01 ] - anbriz-gw-vm (ocf::vds-ok:VirtualDomain): Stopped - anbriz-work-vm (ocf::vds-ok:VirtualDomain): Stopped - lenny-x32-devel-vm (ocf::vds-ok:VirtualDomain): Started v03-a -@@ -390,6 +370,6 @@ Online: [ mgmt01 v03-a v03-b ] - c5-x64-devel.vds-ok.com-vm (ocf::vds-ok:VirtualDomain): Started v03-a - Clone Set: cl-mcast-test-net [mcast-test-net] - Started: [ v03-a v03-b ] -- Stopped: [ mcast-test-net:2 mcast-test-net:3 mcast-test-net:4 mcast-test-net:5 mcast-test-net:6 mcast-test-net:7 ] -+ Stopped: [ mgmt01 ] - dist.fly-uni.org-vm (ocf::vds-ok:VirtualDomain): Stopped - -diff --git a/pengine/test10/per-node-attrs.dot b/pengine/test10/per-node-attrs.dot -new file mode 100644 -index 0000000..6dca5e0 ---- /dev/null -+++ b/pengine/test10/per-node-attrs.dot -@@ -0,0 +1,17 @@ -+digraph "g" { -+"dummy_monitor_0 pcmk-1" -> "probe_complete pcmk-1" [ style = bold] -+"dummy_monitor_0 pcmk-1" [ style=bold color="green" fontcolor="black"] -+"dummy_monitor_0 pcmk-2" -> "probe_complete pcmk-2" [ style = bold] -+"dummy_monitor_0 pcmk-2" [ style=bold color="green" fontcolor="black"] -+"dummy_monitor_0 pcmk-3" -> "probe_complete pcmk-3" [ style = bold] -+"dummy_monitor_0 pcmk-3" [ style=bold color="green" fontcolor="black"] -+"dummy_start_0 pcmk-1" [ style=bold color="green" fontcolor="black"] -+"probe_complete pcmk-1" -> "probe_complete" [ style = bold] -+"probe_complete pcmk-1" [ style=bold color="green" fontcolor="black"] -+"probe_complete pcmk-2" -> "probe_complete" [ style = bold] -+"probe_complete pcmk-2" [ style=bold color="green" fontcolor="black"] -+"probe_complete pcmk-3" -> "probe_complete" [ style = bold] -+"probe_complete pcmk-3" [ style=bold color="green" fontcolor="black"] -+"probe_complete" -> "dummy_start_0 pcmk-1" [ style = bold] -+"probe_complete" [ style=bold color="green" fontcolor="orange"] -+} -diff --git a/pengine/test10/per-node-attrs.exp b/pengine/test10/per-node-attrs.exp -new file mode 100644 -index 0000000..1e38557 ---- /dev/null -+++ b/pengine/test10/per-node-attrs.exp -@@ -0,0 +1,97 @@ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -diff --git a/pengine/test10/per-node-attrs.scores b/pengine/test10/per-node-attrs.scores -new file mode 100644 -index 0000000..14a57ff ---- /dev/null -+++ b/pengine/test10/per-node-attrs.scores -@@ -0,0 +1,4 @@ -+Allocation scores: -+native_color: dummy allocation score on pcmk-1: 0 -+native_color: dummy allocation score on pcmk-2: 0 -+native_color: dummy allocation score on pcmk-3: 0 -diff --git a/pengine/test10/per-node-attrs.summary b/pengine/test10/per-node-attrs.summary -new file mode 100644 -index 0000000..420f2d2 ---- /dev/null -+++ b/pengine/test10/per-node-attrs.summary -@@ -0,0 +1,21 @@ -+ -+Current cluster status: -+Online: [ pcmk-1 pcmk-2 pcmk-3 ] -+ -+ dummy (ocf::heartbeat:IPaddr2): Stopped -+ -+Transition Summary: -+ * Start dummy (pcmk-1) -+ -+Executing cluster transition: -+ * Resource action: dummy monitor on pcmk-3 -+ * Resource action: dummy monitor on pcmk-2 -+ * Resource action: dummy monitor on pcmk-1 -+ * Pseudo action: probe_complete -+ * Resource action: dummy start on pcmk-1 -+ -+Revised cluster status: -+Online: [ pcmk-1 pcmk-2 pcmk-3 ] -+ -+ dummy (ocf::heartbeat:IPaddr2): Started pcmk-1 -+ -diff --git a/pengine/test10/per-node-attrs.xml b/pengine/test10/per-node-attrs.xml -new file mode 100644 -index 0000000..928debe ---- /dev/null -+++ b/pengine/test10/per-node-attrs.xml -@@ -0,0 +1,54 @@ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -diff --git a/pengine/test10/probe-0.summary b/pengine/test10/probe-0.summary -index 3df06f6..d1340c5 100644 ---- a/pengine/test10/probe-0.summary -+++ b/pengine/test10/probe-0.summary -@@ -7,7 +7,7 @@ Online: [ x32c47 x32c48 ] - Clone Set: imagestorecloneset [imagestoreclone] - Started: [ x32c47 x32c48 ] - Clone Set: configstorecloneset [configstoreclone] -- Stopped: [ configstoreclone:0 configstoreclone:1 ] -+ Stopped: [ x32c47 x32c48 ] - - Transition Summary: - * Start configstoreclone:0 (x32c47) -diff --git a/pengine/test10/probe-2.summary b/pengine/test10/probe-2.summary -index 39fb48c..e8a2269 100644 ---- a/pengine/test10/probe-2.summary -+++ b/pengine/test10/probe-2.summary -@@ -136,23 +136,23 @@ Online: [ wc01 ] - intip_nfs (ocf::heartbeat:IPaddr2): Started wc01 - Master/Slave Set: ms_drbd_mysql [drbd_mysql] - Masters: [ wc01 ] -- Stopped: [ drbd_mysql:1 ] -+ Stopped: [ wc02 ] - Resource Group: group_mysql - fs_mysql (ocf::heartbeat:Filesystem): Started wc01 - intip_sql (ocf::heartbeat:IPaddr2): Started wc01 - mysql-server (ocf::heartbeat:mysql): Started wc01 - Master/Slave Set: ms_drbd_www [drbd_www] - Masters: [ wc01 ] -- Stopped: [ drbd_www:1 ] -+ Stopped: [ wc02 ] - Clone Set: clone_nfs-common [group_nfs-common] - Started: [ wc01 ] -- Stopped: [ group_nfs-common:1 ] -+ Stopped: [ wc02 ] - Clone Set: clone_mysql-proxy [group_mysql-proxy] - Started: [ wc01 ] -- Stopped: [ group_mysql-proxy:1 ] -+ Stopped: [ wc02 ] - Clone Set: clone_webservice [group_webservice] - Started: [ wc01 ] -- Stopped: [ group_webservice:1 ] -+ Stopped: [ wc02 ] - Resource Group: group_ftpd - extip_ftp (ocf::heartbeat:IPaddr2): Started wc01 - pure-ftpd (ocf::heartbeat:Pure-FTPd): Started wc01 -diff --git a/pengine/test10/probe-3.scores b/pengine/test10/probe-3.scores -index 794ed4a..277670c 100644 ---- a/pengine/test10/probe-3.scores -+++ b/pengine/test10/probe-3.scores -@@ -44,15 +44,15 @@ clone_color: ping-1:3 allocation score on pcmk-2: 0 - clone_color: ping-1:3 allocation score on pcmk-3: 0 - clone_color: ping-1:3 allocation score on pcmk-4: 0 - clone_color: stateful-1:0 allocation score on pcmk-1: 11 --clone_color: stateful-1:0 allocation score on pcmk-2: 5 --clone_color: stateful-1:0 allocation score on pcmk-3: 5 -+clone_color: stateful-1:0 allocation score on pcmk-2: 0 -+clone_color: stateful-1:0 allocation score on pcmk-3: 0 - clone_color: stateful-1:0 allocation score on pcmk-4: 0 --clone_color: stateful-1:1 allocation score on pcmk-1: 10 -+clone_color: stateful-1:1 allocation score on pcmk-1: 0 - clone_color: stateful-1:1 allocation score on pcmk-2: 6 --clone_color: stateful-1:1 allocation score on pcmk-3: 5 -+clone_color: stateful-1:1 allocation score on pcmk-3: 0 - clone_color: stateful-1:1 allocation score on pcmk-4: 0 --clone_color: stateful-1:2 allocation score on pcmk-1: 10 --clone_color: stateful-1:2 allocation score on pcmk-2: 5 -+clone_color: stateful-1:2 allocation score on pcmk-1: 0 -+clone_color: stateful-1:2 allocation score on pcmk-2: 0 - clone_color: stateful-1:2 allocation score on pcmk-3: 6 - clone_color: stateful-1:2 allocation score on pcmk-4: 0 - clone_color: stateful-1:3 allocation score on pcmk-1: 0 -@@ -147,11 +147,11 @@ native_color: stateful-1:0 allocation score on pcmk-1: 11 - native_color: stateful-1:0 allocation score on pcmk-2: -INFINITY - native_color: stateful-1:0 allocation score on pcmk-3: -INFINITY - native_color: stateful-1:0 allocation score on pcmk-4: -INFINITY --native_color: stateful-1:1 allocation score on pcmk-1: 10 -+native_color: stateful-1:1 allocation score on pcmk-1: 0 - native_color: stateful-1:1 allocation score on pcmk-2: 6 --native_color: stateful-1:1 allocation score on pcmk-3: 5 -+native_color: stateful-1:1 allocation score on pcmk-3: 0 - native_color: stateful-1:1 allocation score on pcmk-4: -INFINITY --native_color: stateful-1:2 allocation score on pcmk-1: 10 -+native_color: stateful-1:2 allocation score on pcmk-1: 0 - native_color: stateful-1:2 allocation score on pcmk-2: -INFINITY - native_color: stateful-1:2 allocation score on pcmk-3: 6 - native_color: stateful-1:2 allocation score on pcmk-4: -INFINITY -diff --git a/pengine/test10/probe-3.summary b/pengine/test10/probe-3.summary -index 8ab28ef..c11a5ba 100644 ---- a/pengine/test10/probe-3.summary -+++ b/pengine/test10/probe-3.summary -@@ -15,14 +15,14 @@ Online: [ pcmk-1 pcmk-2 pcmk-3 ] - migrator (ocf::pacemaker:Dummy): Started pcmk-3 - Clone Set: Connectivity [ping-1] - Started: [ pcmk-1 pcmk-2 pcmk-3 ] -- Stopped: [ ping-1:3 ] -+ Stopped: [ pcmk-4 ] - Master/Slave Set: master-1 [stateful-1] - Masters: [ pcmk-1 ] - Slaves: [ pcmk-2 pcmk-3 ] -- Stopped: [ stateful-1:3 ] -+ Stopped: [ pcmk-4 ] - Clone Set: Fencing [FencingChild] - Started: [ pcmk-1 pcmk-2 pcmk-3 ] -- Stopped: [ FencingChild:3 ] -+ Stopped: [ pcmk-4 ] - - Transition Summary: - -@@ -44,12 +44,12 @@ Online: [ pcmk-1 pcmk-2 pcmk-3 ] - migrator (ocf::pacemaker:Dummy): Started pcmk-3 - Clone Set: Connectivity [ping-1] - Started: [ pcmk-1 pcmk-2 pcmk-3 ] -- Stopped: [ ping-1:3 ] -+ Stopped: [ pcmk-4 ] - Master/Slave Set: master-1 [stateful-1] - Masters: [ pcmk-1 ] - Slaves: [ pcmk-2 pcmk-3 ] -- Stopped: [ stateful-1:3 ] -+ Stopped: [ pcmk-4 ] - Clone Set: Fencing [FencingChild] - Started: [ pcmk-1 pcmk-2 pcmk-3 ] -- Stopped: [ FencingChild:3 ] -+ Stopped: [ pcmk-4 ] - -diff --git a/pengine/test10/probe-4.scores b/pengine/test10/probe-4.scores -index 080ab8b..b1615c0 100644 ---- a/pengine/test10/probe-4.scores -+++ b/pengine/test10/probe-4.scores -@@ -46,15 +46,15 @@ clone_color: ping-1:3 allocation score on pcmk-2: 0 - clone_color: ping-1:3 allocation score on pcmk-3: 0 - clone_color: ping-1:3 allocation score on pcmk-4: 0 - clone_color: stateful-1:0 allocation score on pcmk-1: 11 --clone_color: stateful-1:0 allocation score on pcmk-2: 5 --clone_color: stateful-1:0 allocation score on pcmk-3: 5 -+clone_color: stateful-1:0 allocation score on pcmk-2: 0 -+clone_color: stateful-1:0 allocation score on pcmk-3: 0 - clone_color: stateful-1:0 allocation score on pcmk-4: 0 --clone_color: stateful-1:1 allocation score on pcmk-1: 10 -+clone_color: stateful-1:1 allocation score on pcmk-1: 0 - clone_color: stateful-1:1 allocation score on pcmk-2: 6 --clone_color: stateful-1:1 allocation score on pcmk-3: 5 -+clone_color: stateful-1:1 allocation score on pcmk-3: 0 - clone_color: stateful-1:1 allocation score on pcmk-4: 0 --clone_color: stateful-1:2 allocation score on pcmk-1: 10 --clone_color: stateful-1:2 allocation score on pcmk-2: 5 -+clone_color: stateful-1:2 allocation score on pcmk-1: 0 -+clone_color: stateful-1:2 allocation score on pcmk-2: 0 - clone_color: stateful-1:2 allocation score on pcmk-3: 6 - clone_color: stateful-1:2 allocation score on pcmk-4: 0 - clone_color: stateful-1:3 allocation score on pcmk-1: 0 -@@ -149,11 +149,11 @@ native_color: stateful-1:0 allocation score on pcmk-1: 11 - native_color: stateful-1:0 allocation score on pcmk-2: -INFINITY - native_color: stateful-1:0 allocation score on pcmk-3: -INFINITY - native_color: stateful-1:0 allocation score on pcmk-4: -INFINITY --native_color: stateful-1:1 allocation score on pcmk-1: 10 -+native_color: stateful-1:1 allocation score on pcmk-1: 0 - native_color: stateful-1:1 allocation score on pcmk-2: 6 --native_color: stateful-1:1 allocation score on pcmk-3: 5 -+native_color: stateful-1:1 allocation score on pcmk-3: 0 - native_color: stateful-1:1 allocation score on pcmk-4: -INFINITY --native_color: stateful-1:2 allocation score on pcmk-1: 10 -+native_color: stateful-1:2 allocation score on pcmk-1: 0 - native_color: stateful-1:2 allocation score on pcmk-2: -INFINITY - native_color: stateful-1:2 allocation score on pcmk-3: 6 - native_color: stateful-1:2 allocation score on pcmk-4: -INFINITY -diff --git a/pengine/test10/probe-4.summary b/pengine/test10/probe-4.summary -index d666e5d..e2eb8af 100644 ---- a/pengine/test10/probe-4.summary -+++ b/pengine/test10/probe-4.summary -@@ -15,14 +15,14 @@ Online: [ pcmk-1 pcmk-2 pcmk-3 ] - migrator (ocf::pacemaker:Dummy): Stopped - Clone Set: Connectivity [ping-1] - Started: [ pcmk-1 pcmk-2 pcmk-3 ] -- Stopped: [ ping-1:3 ] -+ Stopped: [ pcmk-4 ] - Master/Slave Set: master-1 [stateful-1] - Masters: [ pcmk-1 ] - Slaves: [ pcmk-2 pcmk-3 ] -- Stopped: [ stateful-1:3 ] -+ Stopped: [ pcmk-4 ] - Clone Set: Fencing [FencingChild] - Started: [ pcmk-1 pcmk-2 pcmk-3 ] -- Stopped: [ FencingChild:3 ] -+ Stopped: [ pcmk-4 ] - - Transition Summary: - * Start migrator (pcmk-3) -@@ -47,12 +47,12 @@ Online: [ pcmk-1 pcmk-2 pcmk-3 ] - migrator (ocf::pacemaker:Dummy): Stopped - Clone Set: Connectivity [ping-1] - Started: [ pcmk-1 pcmk-2 pcmk-3 ] -- Stopped: [ ping-1:3 ] -+ Stopped: [ pcmk-4 ] - Master/Slave Set: master-1 [stateful-1] - Masters: [ pcmk-1 ] - Slaves: [ pcmk-2 pcmk-3 ] -- Stopped: [ stateful-1:3 ] -+ Stopped: [ pcmk-4 ] - Clone Set: Fencing [FencingChild] - Started: [ pcmk-1 pcmk-2 pcmk-3 ] -- Stopped: [ FencingChild:3 ] -+ Stopped: [ pcmk-4 ] - -diff --git a/pengine/test10/rec-node-13.summary b/pengine/test10/rec-node-13.summary -index e425beb..2833d0b 100644 ---- a/pengine/test10/rec-node-13.summary -+++ b/pengine/test10/rec-node-13.summary -@@ -6,7 +6,7 @@ OFFLINE: [ c001n03 c001n05 ] - - Clone Set: DoFencing [child_DoFencing] - Started: [ c001n02 c001n06 c001n07 ] -- Stopped: [ child_DoFencing:3 child_DoFencing:4 child_DoFencing:5 ] -+ Stopped: [ c001n03 c001n04 c001n05 ] - DcIPaddr (ocf::heartbeat:IPaddr): Stopped - Resource Group: group-1 - ocf_192.168.100.181 (ocf::heartbeat:IPaddr): Started c001n02 -@@ -50,7 +50,7 @@ OFFLINE: [ c001n03 c001n04 c001n05 ] - - Clone Set: DoFencing [child_DoFencing] - Started: [ c001n02 c001n06 c001n07 ] -- Stopped: [ child_DoFencing:3 child_DoFencing:4 child_DoFencing:5 ] -+ Stopped: [ c001n03 c001n04 c001n05 ] - DcIPaddr (ocf::heartbeat:IPaddr): Stopped - Resource Group: group-1 - ocf_192.168.100.181 (ocf::heartbeat:IPaddr): Started c001n02 -diff --git a/pengine/test10/rsc-sets-clone-1.summary b/pengine/test10/rsc-sets-clone-1.summary -index d840454..4e2ced1 100644 ---- a/pengine/test10/rsc-sets-clone-1.summary -+++ b/pengine/test10/rsc-sets-clone-1.summary -@@ -10,9 +10,9 @@ Online: [ sys2 sys3 ] - stonithsys3 (stonith:external/ipmi): Started sys2 - Clone Set: baseclone [basegrp] - Started: [ sys2 ] -- Stopped: [ basegrp:1 ] -+ Stopped: [ sys3 ] - Clone Set: fs1 [nfs1] -- Stopped: [ nfs1:0 nfs1:1 ] -+ Stopped: [ sys2 sys3 ] - - Transition Summary: - * Restart stonithsys3 (Started sys2) -@@ -80,5 +80,5 @@ Online: [ sys2 sys3 ] - Clone Set: baseclone [basegrp] - Started: [ sys2 sys3 ] - Clone Set: fs1 [nfs1] -- Stopped: [ nfs1:0 nfs1:1 ] -+ Stopped: [ sys2 sys3 ] - -diff --git a/pengine/test10/rsc-sets-clone.summary b/pengine/test10/rsc-sets-clone.summary -index 697f94a..7ee23a2 100644 ---- a/pengine/test10/rsc-sets-clone.summary -+++ b/pengine/test10/rsc-sets-clone.summary -@@ -33,5 +33,5 @@ Online: [ node2 ] - rsc3 (ocf::pacemaker:Dummy): Started node2 - Clone Set: clone-rsc [rsc] - Started: [ node2 ] -- Stopped: [ rsc:1 ] -+ Stopped: [ node1 ] - -diff --git a/pengine/test10/rsc-sets-master.summary b/pengine/test10/rsc-sets-master.summary -index fafb423..5415cda 100644 ---- a/pengine/test10/rsc-sets-master.summary -+++ b/pengine/test10/rsc-sets-master.summary -@@ -41,7 +41,7 @@ Online: [ node2 ] - - Master/Slave Set: ms-rsc [rsc] - Masters: [ node2 ] -- Stopped: [ rsc:1 ] -+ Stopped: [ node1 ] - rsc1 (ocf::pacemaker:Dummy): Started node2 - rsc2 (ocf::pacemaker:Dummy): Started node2 - rsc3 (ocf::pacemaker:Dummy): Started node2 -diff --git a/pengine/test10/stonith-0.summary b/pengine/test10/stonith-0.summary -index e4253c7..a91a06c 100644 ---- a/pengine/test10/stonith-0.summary -+++ b/pengine/test10/stonith-0.summary -@@ -19,7 +19,7 @@ Online: [ c001n02 c001n04 c001n06 c001n07 c001n08 ] - rsc_c001n08 (ocf::heartbeat:IPaddr): Started c001n08 - Clone Set: DoFencing [child_DoFencing] - Started: [ c001n02 c001n04 c001n06 c001n07 c001n08 ] -- Stopped: [ child_DoFencing:5 child_DoFencing:6 ] -+ Stopped: [ c001n03 c001n05 ] - Master/Slave Set: master_rsc_1 [ocf_msdummy] (unique) - ocf_msdummy:0 (ocf::heartbeat:/usr/lib/heartbeat/cts/OCFMSDummy): Master c001n02 - ocf_msdummy:1 (ocf::heartbeat:/usr/lib/heartbeat/cts/OCFMSDummy): Started c001n02 -@@ -90,7 +90,7 @@ OFFLINE: [ c001n03 c001n05 ] - rsc_c001n08 (ocf::heartbeat:IPaddr): Started c001n08 - Clone Set: DoFencing [child_DoFencing] - Started: [ c001n02 c001n04 c001n06 c001n07 c001n08 ] -- Stopped: [ child_DoFencing:5 child_DoFencing:6 ] -+ Stopped: [ c001n03 c001n05 ] - Master/Slave Set: master_rsc_1 [ocf_msdummy] (unique) - ocf_msdummy:0 (ocf::heartbeat:/usr/lib/heartbeat/cts/OCFMSDummy): Master c001n02 - ocf_msdummy:1 (ocf::heartbeat:/usr/lib/heartbeat/cts/OCFMSDummy): Started c001n02 -diff --git a/pengine/test10/stonith-1.summary b/pengine/test10/stonith-1.summary -index b2c46d9..d3e81db 100644 ---- a/pengine/test10/stonith-1.summary -+++ b/pengine/test10/stonith-1.summary -@@ -15,7 +15,7 @@ Online: [ sles-1 sles-2 sles-4 ] - rsc_sles-4 (ocf::heartbeat:IPaddr): Started sles-4 - Clone Set: DoFencing [child_DoFencing] - Started: [ sles-1 sles-2 sles-3 ] -- Stopped: [ child_DoFencing:3 ] -+ Stopped: [ sles-4 ] - Master/Slave Set: master_rsc_1 [ocf_msdummy] (unique) - ocf_msdummy:0 (ocf::heartbeat:Stateful): Stopped - ocf_msdummy:1 (ocf::heartbeat:Stateful): Stopped -@@ -98,7 +98,7 @@ OFFLINE: [ sles-3 ] - rsc_sles-4 (ocf::heartbeat:IPaddr): Started sles-4 - Clone Set: DoFencing [child_DoFencing] - Started: [ sles-1 sles-2 sles-4 ] -- Stopped: [ child_DoFencing:3 ] -+ Stopped: [ sles-3 ] - Master/Slave Set: master_rsc_1 [ocf_msdummy] (unique) - ocf_msdummy:0 (ocf::heartbeat:Stateful): Started sles-4 - ocf_msdummy:1 (ocf::heartbeat:Stateful): Started sles-1 -diff --git a/pengine/test10/stonith-2.summary b/pengine/test10/stonith-2.summary -index 59e5fc4..f02dd74 100644 ---- a/pengine/test10/stonith-2.summary -+++ b/pengine/test10/stonith-2.summary -@@ -17,7 +17,7 @@ Online: [ sles-1 sles-2 sles-3 sles-4 sles-6 ] - rsc_sles-6 (ocf::heartbeat:IPaddr): Started sles-6 - Clone Set: DoFencing [child_DoFencing] - Started: [ sles-1 sles-2 sles-3 sles-4 sles-6 ] -- Stopped: [ child_DoFencing:5 ] -+ Stopped: [ sles-5 ] - Master/Slave Set: master_rsc_1 [ocf_msdummy] (unique) - ocf_msdummy:0 (ocf::heartbeat:Stateful): Started sles-3 - ocf_msdummy:1 (ocf::heartbeat:Stateful): Started sles-4 -@@ -60,7 +60,7 @@ OFFLINE: [ sles-5 ] - rsc_sles-6 (ocf::heartbeat:IPaddr): Started sles-6 - Clone Set: DoFencing [child_DoFencing] - Started: [ sles-1 sles-2 sles-3 sles-4 sles-6 ] -- Stopped: [ child_DoFencing:5 ] -+ Stopped: [ sles-5 ] - Master/Slave Set: master_rsc_1 [ocf_msdummy] (unique) - ocf_msdummy:0 (ocf::heartbeat:Stateful): Started sles-3 - ocf_msdummy:1 (ocf::heartbeat:Stateful): Started sles-4 -diff --git a/pengine/test10/stonith-3.summary b/pengine/test10/stonith-3.summary -index 651974c..b5b6d8a 100644 ---- a/pengine/test10/stonith-3.summary -+++ b/pengine/test10/stonith-3.summary -@@ -5,7 +5,7 @@ Online: [ rh5node2 ] - - prmIpPostgreSQLDB (ocf::heartbeat:IPaddr): Stopped - Clone Set: clnStonith [grpStonith] -- Stopped: [ grpStonith:0 grpStonith:1 ] -+ Stopped: [ rh5node1 rh5node2 ] - - Transition Summary: - * Start prmIpPostgreSQLDB (rh5node2) -@@ -33,5 +33,5 @@ OFFLINE: [ rh5node1 ] - prmIpPostgreSQLDB (ocf::heartbeat:IPaddr): Started rh5node2 - Clone Set: clnStonith [grpStonith] - Started: [ rh5node2 ] -- Stopped: [ grpStonith:1 ] -+ Stopped: [ rh5node1 ] - -diff --git a/pengine/test10/stop-failure-no-fencing.dot b/pengine/test10/stop-failure-no-fencing.dot -new file mode 100644 -index 0000000..3769c1d ---- /dev/null -+++ b/pengine/test10/stop-failure-no-fencing.dot -@@ -0,0 +1,6 @@ -+digraph "g" { -+"clvm-clone_stop_0" -> "clvm-clone_stopped_0" [ style = dashed] -+"clvm-clone_stop_0" [ style=dashed color="red" fontcolor="orange"] -+"clvm-clone_stopped_0" [ style=dashed color="red" fontcolor="orange"] -+"probe_complete pcmk-2" [ style=bold color="green" fontcolor="black"] -+} -diff --git a/pengine/test10/stop-failure-no-fencing.exp b/pengine/test10/stop-failure-no-fencing.exp -new file mode 100644 -index 0000000..e9d94b8 ---- /dev/null -+++ b/pengine/test10/stop-failure-no-fencing.exp -@@ -0,0 +1,10 @@ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -diff --git a/pengine/test10/stop-failure-no-fencing.scores b/pengine/test10/stop-failure-no-fencing.scores -new file mode 100644 -index 0000000..93b2031 ---- /dev/null -+++ b/pengine/test10/stop-failure-no-fencing.scores -@@ -0,0 +1,77 @@ -+Allocation scores: -+clone_color: clvm-clone allocation score on pcmk-1: 0 -+clone_color: clvm-clone allocation score on pcmk-2: 0 -+clone_color: clvm-clone allocation score on pcmk-3: -INFINITY -+clone_color: clvm-clone allocation score on pcmk-4: 0 -+clone_color: clvm:0 allocation score on pcmk-1: 0 -+clone_color: clvm:0 allocation score on pcmk-2: 0 -+clone_color: clvm:0 allocation score on pcmk-3: -INFINITY -+clone_color: clvm:0 allocation score on pcmk-4: 0 -+clone_color: clvm:1 allocation score on pcmk-1: 0 -+clone_color: clvm:1 allocation score on pcmk-2: 0 -+clone_color: clvm:1 allocation score on pcmk-3: -INFINITY -+clone_color: clvm:1 allocation score on pcmk-4: 0 -+clone_color: clvm:2 allocation score on pcmk-1: 0 -+clone_color: clvm:2 allocation score on pcmk-2: 0 -+clone_color: clvm:2 allocation score on pcmk-3: -INFINITY -+clone_color: clvm:2 allocation score on pcmk-4: 0 -+clone_color: clvm:3 allocation score on pcmk-1: 0 -+clone_color: clvm:3 allocation score on pcmk-2: 0 -+clone_color: clvm:3 allocation score on pcmk-3: -INFINITY -+clone_color: clvm:3 allocation score on pcmk-4: 0 -+clone_color: dlm-clone allocation score on pcmk-1: 0 -+clone_color: dlm-clone allocation score on pcmk-2: 0 -+clone_color: dlm-clone allocation score on pcmk-3: 0 -+clone_color: dlm-clone allocation score on pcmk-4: 0 -+clone_color: dlm:0 allocation score on pcmk-1: 0 -+clone_color: dlm:0 allocation score on pcmk-2: 0 -+clone_color: dlm:0 allocation score on pcmk-3: 0 -+clone_color: dlm:0 allocation score on pcmk-4: 0 -+clone_color: dlm:1 allocation score on pcmk-1: 0 -+clone_color: dlm:1 allocation score on pcmk-2: 0 -+clone_color: dlm:1 allocation score on pcmk-3: 0 -+clone_color: dlm:1 allocation score on pcmk-4: 0 -+clone_color: dlm:2 allocation score on pcmk-1: 0 -+clone_color: dlm:2 allocation score on pcmk-2: 0 -+clone_color: dlm:2 allocation score on pcmk-3: 0 -+clone_color: dlm:2 allocation score on pcmk-4: 0 -+clone_color: dlm:3 allocation score on pcmk-1: 0 -+clone_color: dlm:3 allocation score on pcmk-2: 0 -+clone_color: dlm:3 allocation score on pcmk-3: 0 -+clone_color: dlm:3 allocation score on pcmk-4: 0 -+native_color: ClusterIP allocation score on pcmk-1: -INFINITY -+native_color: ClusterIP allocation score on pcmk-2: -INFINITY -+native_color: ClusterIP allocation score on pcmk-3: -INFINITY -+native_color: ClusterIP allocation score on pcmk-4: -INFINITY -+native_color: clvm:0 allocation score on pcmk-1: -INFINITY -+native_color: clvm:0 allocation score on pcmk-2: -INFINITY -+native_color: clvm:0 allocation score on pcmk-3: -INFINITY -+native_color: clvm:0 allocation score on pcmk-4: -INFINITY -+native_color: clvm:1 allocation score on pcmk-1: -INFINITY -+native_color: clvm:1 allocation score on pcmk-2: -INFINITY -+native_color: clvm:1 allocation score on pcmk-3: -INFINITY -+native_color: clvm:1 allocation score on pcmk-4: -INFINITY -+native_color: clvm:2 allocation score on pcmk-1: -INFINITY -+native_color: clvm:2 allocation score on pcmk-2: -INFINITY -+native_color: clvm:2 allocation score on pcmk-3: -INFINITY -+native_color: clvm:2 allocation score on pcmk-4: -INFINITY -+native_color: clvm:3 allocation score on pcmk-1: -INFINITY -+native_color: clvm:3 allocation score on pcmk-2: -INFINITY -+native_color: clvm:3 allocation score on pcmk-3: -INFINITY -+native_color: clvm:3 allocation score on pcmk-4: -INFINITY -+native_color: dlm:0 allocation score on pcmk-1: 0 -+native_color: dlm:0 allocation score on pcmk-2: 0 -+native_color: dlm:0 allocation score on pcmk-3: -INFINITY -+native_color: dlm:0 allocation score on pcmk-4: -INFINITY -+native_color: dlm:1 allocation score on pcmk-1: 0 -+native_color: dlm:1 allocation score on pcmk-2: 0 -+native_color: dlm:1 allocation score on pcmk-3: -INFINITY -+native_color: dlm:1 allocation score on pcmk-4: -INFINITY -+native_color: dlm:2 allocation score on pcmk-1: 0 -+native_color: dlm:2 allocation score on pcmk-2: 0 -+native_color: dlm:2 allocation score on pcmk-3: -INFINITY -+native_color: dlm:2 allocation score on pcmk-4: -INFINITY -+native_color: dlm:3 allocation score on pcmk-1: 0 -+native_color: dlm:3 allocation score on pcmk-2: 0 -+native_color: dlm:3 allocation score on pcmk-3: -INFINITY -+native_color: dlm:3 allocation score on pcmk-4: -INFINITY -diff --git a/pengine/test10/stop-failure-no-fencing.summary b/pengine/test10/stop-failure-no-fencing.summary -new file mode 100644 -index 0000000..3b6b40d ---- /dev/null -+++ b/pengine/test10/stop-failure-no-fencing.summary -@@ -0,0 +1,29 @@ -+ -+Current cluster status: -+Node pcmk-3 (103): UNCLEAN (offline) -+Node pcmk-4 (104): UNCLEAN (offline) -+Online: [ pcmk-1 pcmk-2 ] -+ -+ Clone Set: dlm-clone [dlm] -+ Stopped: [ pcmk-1 pcmk-2 pcmk-3 pcmk-4 ] -+ Clone Set: clvm-clone [clvm] -+ clvm (lsb:clvmd): Started pcmk-3 (unmanaged) FAILED -+ Stopped: [ pcmk-1 pcmk-2 pcmk-3 pcmk-4 ] -+ ClusterIP (ocf::heartbeat:IPaddr2): Stopped -+ -+Transition Summary: -+ -+Executing cluster transition: -+ -+Revised cluster status: -+Node pcmk-3 (103): UNCLEAN (offline) -+Node pcmk-4 (104): UNCLEAN (offline) -+Online: [ pcmk-1 pcmk-2 ] -+ -+ Clone Set: dlm-clone [dlm] -+ Stopped: [ pcmk-1 pcmk-2 pcmk-3 pcmk-4 ] -+ Clone Set: clvm-clone [clvm] -+ clvm (lsb:clvmd): Started pcmk-3 (unmanaged) FAILED -+ Stopped: [ pcmk-1 pcmk-2 pcmk-3 pcmk-4 ] -+ ClusterIP (ocf::heartbeat:IPaddr2): Stopped -+ -diff --git a/pengine/test10/stop-failure-no-fencing.xml b/pengine/test10/stop-failure-no-fencing.xml -new file mode 100644 -index 0000000..06c6d38 ---- /dev/null -+++ b/pengine/test10/stop-failure-no-fencing.xml -@@ -0,0 +1,114 @@ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -diff --git a/pengine/test10/stop-failure-no-quorum.dot b/pengine/test10/stop-failure-no-quorum.dot -new file mode 100644 -index 0000000..55faa45 ---- /dev/null -+++ b/pengine/test10/stop-failure-no-quorum.dot -@@ -0,0 +1,19 @@ -+digraph "g" { -+"ClusterIP_monitor_60000 pcmk-1" [ style=dashed color="red" fontcolor="black"] -+"all_stopped" [ style=bold color="green" fontcolor="orange"] -+"clvm-clone_stop_0" -> "clvm-clone_stopped_0" [ style = bold] -+"clvm-clone_stop_0" -> "clvm_stop_0 pcmk-2" [ style = bold] -+"clvm-clone_stop_0" [ style=bold color="green" fontcolor="orange"] -+"clvm-clone_stopped_0" [ style=bold color="green" fontcolor="orange"] -+"clvm:2_monitor_60000 pcmk-1" [ style=dashed color="red" fontcolor="black"] -+"clvm_stop_0 pcmk-2" -> "all_stopped" [ style = bold] -+"clvm_stop_0 pcmk-2" -> "clvm-clone_stopped_0" [ style = bold] -+"clvm_stop_0 pcmk-2" [ style=bold color="green" fontcolor="orange"] -+"dlm_monitor_60000 pcmk-1" [ style=dashed color="red" fontcolor="black"] -+"stonith pcmk-2" -> "clvm-clone_stop_0" [ style = bold] -+"stonith pcmk-2" -> "clvm_stop_0 pcmk-2" [ style = bold] -+"stonith pcmk-2" -> "stonith_complete" [ style = bold] -+"stonith pcmk-2" [ style=bold color="green" fontcolor="black"] -+"stonith_complete" -> "all_stopped" [ style = bold] -+"stonith_complete" [ style=bold color="green" fontcolor="orange"] -+} -diff --git a/pengine/test10/stop-failure-no-quorum.exp b/pengine/test10/stop-failure-no-quorum.exp -new file mode 100644 -index 0000000..b2260c7 ---- /dev/null -+++ b/pengine/test10/stop-failure-no-quorum.exp -@@ -0,0 +1,79 @@ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -diff --git a/pengine/test10/stop-failure-no-quorum.scores b/pengine/test10/stop-failure-no-quorum.scores -new file mode 100644 -index 0000000..df30423 ---- /dev/null -+++ b/pengine/test10/stop-failure-no-quorum.scores -@@ -0,0 +1,81 @@ -+Allocation scores: -+clone_color: clvm-clone allocation score on pcmk-1: 0 -+clone_color: clvm-clone allocation score on pcmk-2: -INFINITY -+clone_color: clvm-clone allocation score on pcmk-3: -INFINITY -+clone_color: clvm-clone allocation score on pcmk-4: 0 -+clone_color: clvm:0 allocation score on pcmk-1: 0 -+clone_color: clvm:0 allocation score on pcmk-2: -INFINITY -+clone_color: clvm:0 allocation score on pcmk-3: -INFINITY -+clone_color: clvm:0 allocation score on pcmk-4: 0 -+clone_color: clvm:1 allocation score on pcmk-1: 0 -+clone_color: clvm:1 allocation score on pcmk-2: -INFINITY -+clone_color: clvm:1 allocation score on pcmk-3: -INFINITY -+clone_color: clvm:1 allocation score on pcmk-4: 0 -+clone_color: clvm:2 allocation score on pcmk-1: 0 -+clone_color: clvm:2 allocation score on pcmk-2: -INFINITY -+clone_color: clvm:2 allocation score on pcmk-3: -INFINITY -+clone_color: clvm:2 allocation score on pcmk-4: 0 -+clone_color: clvm:3 allocation score on pcmk-1: 0 -+clone_color: clvm:3 allocation score on pcmk-2: -INFINITY -+clone_color: clvm:3 allocation score on pcmk-3: -INFINITY -+clone_color: clvm:3 allocation score on pcmk-4: 0 -+clone_color: dlm-clone allocation score on pcmk-1: 0 -+clone_color: dlm-clone allocation score on pcmk-2: 0 -+clone_color: dlm-clone allocation score on pcmk-3: 0 -+clone_color: dlm-clone allocation score on pcmk-4: 0 -+clone_color: dlm:0 allocation score on pcmk-1: 0 -+clone_color: dlm:0 allocation score on pcmk-2: 0 -+clone_color: dlm:0 allocation score on pcmk-3: 0 -+clone_color: dlm:0 allocation score on pcmk-4: 0 -+clone_color: dlm:1 allocation score on pcmk-1: 0 -+clone_color: dlm:1 allocation score on pcmk-2: 0 -+clone_color: dlm:1 allocation score on pcmk-3: 0 -+clone_color: dlm:1 allocation score on pcmk-4: 0 -+clone_color: dlm:2 allocation score on pcmk-1: 0 -+clone_color: dlm:2 allocation score on pcmk-2: 0 -+clone_color: dlm:2 allocation score on pcmk-3: 0 -+clone_color: dlm:2 allocation score on pcmk-4: 0 -+clone_color: dlm:3 allocation score on pcmk-1: 0 -+clone_color: dlm:3 allocation score on pcmk-2: 0 -+clone_color: dlm:3 allocation score on pcmk-3: 0 -+clone_color: dlm:3 allocation score on pcmk-4: 0 -+native_color: ClusterIP allocation score on pcmk-1: 0 -+native_color: ClusterIP allocation score on pcmk-2: -INFINITY -+native_color: ClusterIP allocation score on pcmk-3: -INFINITY -+native_color: ClusterIP allocation score on pcmk-4: -INFINITY -+native_color: Fencing allocation score on pcmk-1: 0 -+native_color: Fencing allocation score on pcmk-2: 0 -+native_color: Fencing allocation score on pcmk-3: 0 -+native_color: Fencing allocation score on pcmk-4: 0 -+native_color: clvm:0 allocation score on pcmk-1: -INFINITY -+native_color: clvm:0 allocation score on pcmk-2: -INFINITY -+native_color: clvm:0 allocation score on pcmk-3: -INFINITY -+native_color: clvm:0 allocation score on pcmk-4: -INFINITY -+native_color: clvm:1 allocation score on pcmk-1: -INFINITY -+native_color: clvm:1 allocation score on pcmk-2: -INFINITY -+native_color: clvm:1 allocation score on pcmk-3: -INFINITY -+native_color: clvm:1 allocation score on pcmk-4: -INFINITY -+native_color: clvm:2 allocation score on pcmk-1: 0 -+native_color: clvm:2 allocation score on pcmk-2: -INFINITY -+native_color: clvm:2 allocation score on pcmk-3: -INFINITY -+native_color: clvm:2 allocation score on pcmk-4: -INFINITY -+native_color: clvm:3 allocation score on pcmk-1: -INFINITY -+native_color: clvm:3 allocation score on pcmk-2: -INFINITY -+native_color: clvm:3 allocation score on pcmk-3: -INFINITY -+native_color: clvm:3 allocation score on pcmk-4: -INFINITY -+native_color: dlm:0 allocation score on pcmk-1: 0 -+native_color: dlm:0 allocation score on pcmk-2: -INFINITY -+native_color: dlm:0 allocation score on pcmk-3: -INFINITY -+native_color: dlm:0 allocation score on pcmk-4: -INFINITY -+native_color: dlm:1 allocation score on pcmk-1: -INFINITY -+native_color: dlm:1 allocation score on pcmk-2: -INFINITY -+native_color: dlm:1 allocation score on pcmk-3: -INFINITY -+native_color: dlm:1 allocation score on pcmk-4: -INFINITY -+native_color: dlm:2 allocation score on pcmk-1: -INFINITY -+native_color: dlm:2 allocation score on pcmk-2: -INFINITY -+native_color: dlm:2 allocation score on pcmk-3: -INFINITY -+native_color: dlm:2 allocation score on pcmk-4: -INFINITY -+native_color: dlm:3 allocation score on pcmk-1: -INFINITY -+native_color: dlm:3 allocation score on pcmk-2: -INFINITY -+native_color: dlm:3 allocation score on pcmk-3: -INFINITY -+native_color: dlm:3 allocation score on pcmk-4: -INFINITY -diff --git a/pengine/test10/stop-failure-no-quorum.summary b/pengine/test10/stop-failure-no-quorum.summary -new file mode 100644 -index 0000000..1cab287 ---- /dev/null -+++ b/pengine/test10/stop-failure-no-quorum.summary -@@ -0,0 +1,45 @@ -+ -+Current cluster status: -+Node pcmk-2 (102): UNCLEAN (online) -+Node pcmk-3 (103): UNCLEAN (offline) -+Node pcmk-4 (104): UNCLEAN (offline) -+Online: [ pcmk-1 ] -+ -+ Clone Set: dlm-clone [dlm] -+ Stopped: [ pcmk-1 pcmk-2 pcmk-3 pcmk-4 ] -+ Clone Set: clvm-clone [clvm] -+ clvm (lsb:clvmd): Started pcmk-2 FAILED -+ clvm (lsb:clvmd): Started pcmk-3 (unmanaged) FAILED -+ Stopped: [ pcmk-1 pcmk-3 pcmk-4 ] -+ ClusterIP (ocf::heartbeat:IPaddr2): Stopped -+ Fencing (stonith:fence_xvm): Stopped -+ -+Transition Summary: -+ * Start dlm:0 (pcmk-1 - blocked) -+ * Stop clvm:0 (pcmk-2) -+ * Start clvm:2 (pcmk-1 - blocked) -+ * Start ClusterIP (pcmk-1 - blocked) -+ * Start Fencing (pcmk-1 - blocked) -+ -+Executing cluster transition: -+ * Fencing pcmk-2 -+ * Pseudo action: stonith_complete -+ * Pseudo action: clvm-clone_stop_0 -+ * Pseudo action: clvm_stop_0 -+ * Pseudo action: clvm-clone_stopped_0 -+ * Pseudo action: all_stopped -+ -+Revised cluster status: -+Node pcmk-3 (103): UNCLEAN (offline) -+Node pcmk-4 (104): UNCLEAN (offline) -+Online: [ pcmk-1 ] -+OFFLINE: [ pcmk-2 ] -+ -+ Clone Set: dlm-clone [dlm] -+ Stopped: [ pcmk-1 pcmk-2 pcmk-3 pcmk-4 ] -+ Clone Set: clvm-clone [clvm] -+ clvm (lsb:clvmd): Started pcmk-3 (unmanaged) FAILED -+ Stopped: [ pcmk-1 pcmk-2 pcmk-3 pcmk-4 ] -+ ClusterIP (ocf::heartbeat:IPaddr2): Stopped -+ Fencing (stonith:fence_xvm): Stopped -+ -diff --git a/pengine/test10/stop-failure-no-quorum.xml b/pengine/test10/stop-failure-no-quorum.xml -new file mode 100644 -index 0000000..3a2466a ---- /dev/null -+++ b/pengine/test10/stop-failure-no-quorum.xml -@@ -0,0 +1,131 @@ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -diff --git a/pengine/test10/stop-failure-with-fencing.dot b/pengine/test10/stop-failure-with-fencing.dot -new file mode 100644 -index 0000000..a4dd5f8 ---- /dev/null -+++ b/pengine/test10/stop-failure-with-fencing.dot -@@ -0,0 +1,24 @@ -+digraph "g" { -+"ClusterIP_monitor_60000 pcmk-1" [ style=dashed color="red" fontcolor="black"] -+"Fencing_monitor_0 pcmk-1" -> "probe_complete pcmk-1" [ style = bold] -+"Fencing_monitor_0 pcmk-1" [ style=bold color="green" fontcolor="black"] -+"all_stopped" [ style=bold color="green" fontcolor="orange"] -+"clvm-clone_stop_0" -> "clvm-clone_stopped_0" [ style = bold] -+"clvm-clone_stop_0" -> "clvm_stop_0 pcmk-2" [ style = bold] -+"clvm-clone_stop_0" [ style=bold color="green" fontcolor="orange"] -+"clvm-clone_stopped_0" [ style=bold color="green" fontcolor="orange"] -+"clvm:1_monitor_60000 pcmk-1" [ style=dashed color="red" fontcolor="black"] -+"clvm_stop_0 pcmk-2" -> "all_stopped" [ style = bold] -+"clvm_stop_0 pcmk-2" -> "clvm-clone_stopped_0" [ style = bold] -+"clvm_stop_0 pcmk-2" [ style=bold color="green" fontcolor="orange"] -+"dlm_monitor_60000 pcmk-1" [ style=dashed color="red" fontcolor="black"] -+"probe_complete pcmk-1" -> "probe_complete" [ style = bold] -+"probe_complete pcmk-1" [ style=bold color="green" fontcolor="black"] -+"probe_complete" [ style=bold color="green" fontcolor="orange"] -+"stonith pcmk-2" -> "clvm-clone_stop_0" [ style = bold] -+"stonith pcmk-2" -> "clvm_stop_0 pcmk-2" [ style = bold] -+"stonith pcmk-2" -> "stonith_complete" [ style = bold] -+"stonith pcmk-2" [ style=bold color="green" fontcolor="black"] -+"stonith_complete" -> "all_stopped" [ style = bold] -+"stonith_complete" [ style=bold color="green" fontcolor="orange"] -+} -diff --git a/pengine/test10/stop-failure-with-fencing.exp b/pengine/test10/stop-failure-with-fencing.exp -new file mode 100644 -index 0000000..1c22e38 ---- /dev/null -+++ b/pengine/test10/stop-failure-with-fencing.exp -@@ -0,0 +1,112 @@ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -diff --git a/pengine/test10/stop-failure-with-fencing.scores b/pengine/test10/stop-failure-with-fencing.scores -new file mode 100644 -index 0000000..cf62806 ---- /dev/null -+++ b/pengine/test10/stop-failure-with-fencing.scores -@@ -0,0 +1,81 @@ -+Allocation scores: -+clone_color: clvm-clone allocation score on pcmk-1: 0 -+clone_color: clvm-clone allocation score on pcmk-2: -INFINITY -+clone_color: clvm-clone allocation score on pcmk-3: 0 -+clone_color: clvm-clone allocation score on pcmk-4: 0 -+clone_color: clvm:0 allocation score on pcmk-1: 0 -+clone_color: clvm:0 allocation score on pcmk-2: -INFINITY -+clone_color: clvm:0 allocation score on pcmk-3: 0 -+clone_color: clvm:0 allocation score on pcmk-4: 0 -+clone_color: clvm:1 allocation score on pcmk-1: 0 -+clone_color: clvm:1 allocation score on pcmk-2: -INFINITY -+clone_color: clvm:1 allocation score on pcmk-3: 0 -+clone_color: clvm:1 allocation score on pcmk-4: 0 -+clone_color: clvm:2 allocation score on pcmk-1: 0 -+clone_color: clvm:2 allocation score on pcmk-2: -INFINITY -+clone_color: clvm:2 allocation score on pcmk-3: 0 -+clone_color: clvm:2 allocation score on pcmk-4: 0 -+clone_color: clvm:3 allocation score on pcmk-1: 0 -+clone_color: clvm:3 allocation score on pcmk-2: -INFINITY -+clone_color: clvm:3 allocation score on pcmk-3: 0 -+clone_color: clvm:3 allocation score on pcmk-4: 0 -+clone_color: dlm-clone allocation score on pcmk-1: 0 -+clone_color: dlm-clone allocation score on pcmk-2: 0 -+clone_color: dlm-clone allocation score on pcmk-3: 0 -+clone_color: dlm-clone allocation score on pcmk-4: 0 -+clone_color: dlm:0 allocation score on pcmk-1: 0 -+clone_color: dlm:0 allocation score on pcmk-2: 0 -+clone_color: dlm:0 allocation score on pcmk-3: 0 -+clone_color: dlm:0 allocation score on pcmk-4: 0 -+clone_color: dlm:1 allocation score on pcmk-1: 0 -+clone_color: dlm:1 allocation score on pcmk-2: 0 -+clone_color: dlm:1 allocation score on pcmk-3: 0 -+clone_color: dlm:1 allocation score on pcmk-4: 0 -+clone_color: dlm:2 allocation score on pcmk-1: 0 -+clone_color: dlm:2 allocation score on pcmk-2: 0 -+clone_color: dlm:2 allocation score on pcmk-3: 0 -+clone_color: dlm:2 allocation score on pcmk-4: 0 -+clone_color: dlm:3 allocation score on pcmk-1: 0 -+clone_color: dlm:3 allocation score on pcmk-2: 0 -+clone_color: dlm:3 allocation score on pcmk-3: 0 -+clone_color: dlm:3 allocation score on pcmk-4: 0 -+native_color: ClusterIP allocation score on pcmk-1: 0 -+native_color: ClusterIP allocation score on pcmk-2: -INFINITY -+native_color: ClusterIP allocation score on pcmk-3: -INFINITY -+native_color: ClusterIP allocation score on pcmk-4: -INFINITY -+native_color: Fencing allocation score on pcmk-1: 0 -+native_color: Fencing allocation score on pcmk-2: 0 -+native_color: Fencing allocation score on pcmk-3: 0 -+native_color: Fencing allocation score on pcmk-4: 0 -+native_color: clvm:0 allocation score on pcmk-1: -INFINITY -+native_color: clvm:0 allocation score on pcmk-2: -INFINITY -+native_color: clvm:0 allocation score on pcmk-3: -INFINITY -+native_color: clvm:0 allocation score on pcmk-4: -INFINITY -+native_color: clvm:1 allocation score on pcmk-1: 0 -+native_color: clvm:1 allocation score on pcmk-2: -INFINITY -+native_color: clvm:1 allocation score on pcmk-3: -INFINITY -+native_color: clvm:1 allocation score on pcmk-4: -INFINITY -+native_color: clvm:2 allocation score on pcmk-1: -INFINITY -+native_color: clvm:2 allocation score on pcmk-2: -INFINITY -+native_color: clvm:2 allocation score on pcmk-3: -INFINITY -+native_color: clvm:2 allocation score on pcmk-4: -INFINITY -+native_color: clvm:3 allocation score on pcmk-1: -INFINITY -+native_color: clvm:3 allocation score on pcmk-2: -INFINITY -+native_color: clvm:3 allocation score on pcmk-3: -INFINITY -+native_color: clvm:3 allocation score on pcmk-4: -INFINITY -+native_color: dlm:0 allocation score on pcmk-1: 0 -+native_color: dlm:0 allocation score on pcmk-2: -INFINITY -+native_color: dlm:0 allocation score on pcmk-3: -INFINITY -+native_color: dlm:0 allocation score on pcmk-4: -INFINITY -+native_color: dlm:1 allocation score on pcmk-1: -INFINITY -+native_color: dlm:1 allocation score on pcmk-2: -INFINITY -+native_color: dlm:1 allocation score on pcmk-3: -INFINITY -+native_color: dlm:1 allocation score on pcmk-4: -INFINITY -+native_color: dlm:2 allocation score on pcmk-1: -INFINITY -+native_color: dlm:2 allocation score on pcmk-2: -INFINITY -+native_color: dlm:2 allocation score on pcmk-3: -INFINITY -+native_color: dlm:2 allocation score on pcmk-4: -INFINITY -+native_color: dlm:3 allocation score on pcmk-1: -INFINITY -+native_color: dlm:3 allocation score on pcmk-2: -INFINITY -+native_color: dlm:3 allocation score on pcmk-3: -INFINITY -+native_color: dlm:3 allocation score on pcmk-4: -INFINITY -diff --git a/pengine/test10/stop-failure-with-fencing.summary b/pengine/test10/stop-failure-with-fencing.summary -new file mode 100644 -index 0000000..06c517e ---- /dev/null -+++ b/pengine/test10/stop-failure-with-fencing.summary -@@ -0,0 +1,45 @@ -+ -+Current cluster status: -+Node pcmk-2 (102): UNCLEAN (online) -+Node pcmk-3 (103): UNCLEAN (offline) -+Node pcmk-4 (104): UNCLEAN (offline) -+Online: [ pcmk-1 ] -+ -+ Clone Set: dlm-clone [dlm] -+ Stopped: [ pcmk-1 pcmk-2 pcmk-3 pcmk-4 ] -+ Clone Set: clvm-clone [clvm] -+ clvm (lsb:clvmd): Started pcmk-2 FAILED -+ Stopped: [ pcmk-1 pcmk-3 pcmk-4 ] -+ ClusterIP (ocf::heartbeat:IPaddr2): Stopped -+ Fencing (stonith:fence_xvm): Stopped -+ -+Transition Summary: -+ * Start dlm:0 (pcmk-1 - blocked) -+ * Stop clvm:0 (pcmk-2) -+ * Start clvm:1 (pcmk-1 - blocked) -+ * Start ClusterIP (pcmk-1 - blocked) -+ * Start Fencing (pcmk-1 - blocked) -+ -+Executing cluster transition: -+ * Resource action: Fencing monitor on pcmk-1 -+ * Fencing pcmk-2 -+ * Pseudo action: stonith_complete -+ * Pseudo action: probe_complete -+ * Pseudo action: clvm-clone_stop_0 -+ * Pseudo action: clvm_stop_0 -+ * Pseudo action: clvm-clone_stopped_0 -+ * Pseudo action: all_stopped -+ -+Revised cluster status: -+Node pcmk-3 (103): UNCLEAN (offline) -+Node pcmk-4 (104): UNCLEAN (offline) -+Online: [ pcmk-1 ] -+OFFLINE: [ pcmk-2 ] -+ -+ Clone Set: dlm-clone [dlm] -+ Stopped: [ pcmk-1 pcmk-2 pcmk-3 pcmk-4 ] -+ Clone Set: clvm-clone [clvm] -+ Stopped: [ pcmk-1 pcmk-2 pcmk-3 pcmk-4 ] -+ ClusterIP (ocf::heartbeat:IPaddr2): Stopped -+ Fencing (stonith:fence_xvm): Stopped -+ -diff --git a/pengine/test10/stop-failure-with-fencing.xml b/pengine/test10/stop-failure-with-fencing.xml -new file mode 100644 -index 0000000..2be00d5 ---- /dev/null -+++ b/pengine/test10/stop-failure-with-fencing.xml -@@ -0,0 +1,103 @@ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -diff --git a/pengine/test10/target-1.summary b/pengine/test10/target-1.summary -index 95ab900..3dd4852 100644 ---- a/pengine/test10/target-1.summary -+++ b/pengine/test10/target-1.summary -@@ -7,6 +7,7 @@ Online: [ c001n01 c001n02 c001n03 c001n08 ] - rsc_c001n02 (ocf::heartbeat:IPaddr): Started c001n02 - Master/Slave Set: promoteme [rsc_c001n03] - Slaves: [ c001n03 ] -+ Stopped: [ c001n01 c001n02 c001n08 ] - rsc_c001n01 (ocf::heartbeat:IPaddr): Started c001n01 - - Transition Summary: -@@ -37,5 +38,6 @@ Online: [ c001n01 c001n02 c001n03 c001n08 ] - rsc_c001n02 (ocf::heartbeat:IPaddr): Started c001n02 - Master/Slave Set: promoteme [rsc_c001n03] - Slaves: [ c001n03 ] -+ Stopped: [ c001n01 c001n02 c001n08 ] - rsc_c001n01 (ocf::heartbeat:IPaddr): Started c001n01 - -diff --git a/pengine/test10/template-clone-group.summary b/pengine/test10/template-clone-group.summary -index f1386fb..930758c 100644 ---- a/pengine/test10/template-clone-group.summary -+++ b/pengine/test10/template-clone-group.summary -@@ -3,7 +3,7 @@ Current cluster status: - Online: [ node1 node2 ] - - Clone Set: clone1 [group1] -- Stopped: [ group1:0 group1:1 ] -+ Stopped: [ node1 node2 ] - - Transition Summary: - * Start rsc1:0 (node1) -diff --git a/pengine/test10/template-clone-primitive.summary b/pengine/test10/template-clone-primitive.summary -index 20fb1e0..ba41149 100644 ---- a/pengine/test10/template-clone-primitive.summary -+++ b/pengine/test10/template-clone-primitive.summary -@@ -3,7 +3,7 @@ Current cluster status: - Online: [ node1 node2 ] - - Clone Set: clone1 [rsc1] -- Stopped: [ rsc1:0 rsc1:1 ] -+ Stopped: [ node1 node2 ] - - Transition Summary: - * Start rsc1:0 (node1) -diff --git a/pengine/test10/ticket-clone-1.summary b/pengine/test10/ticket-clone-1.summary -index c6087ad..7bc4d43 100644 ---- a/pengine/test10/ticket-clone-1.summary -+++ b/pengine/test10/ticket-clone-1.summary -@@ -4,7 +4,7 @@ Online: [ node1 node2 ] - - rsc_stonith (stonith:null): Started node1 - Clone Set: clone1 [rsc1] -- Stopped: [ rsc1:0 rsc1:1 ] -+ Stopped: [ node1 node2 ] - - Transition Summary: - -@@ -18,5 +18,5 @@ Online: [ node1 node2 ] - - rsc_stonith (stonith:null): Started node1 - Clone Set: clone1 [rsc1] -- Stopped: [ rsc1:0 rsc1:1 ] -+ Stopped: [ node1 node2 ] - -diff --git a/pengine/test10/ticket-clone-10.summary b/pengine/test10/ticket-clone-10.summary -index c6087ad..7bc4d43 100644 ---- a/pengine/test10/ticket-clone-10.summary -+++ b/pengine/test10/ticket-clone-10.summary -@@ -4,7 +4,7 @@ Online: [ node1 node2 ] - - rsc_stonith (stonith:null): Started node1 - Clone Set: clone1 [rsc1] -- Stopped: [ rsc1:0 rsc1:1 ] -+ Stopped: [ node1 node2 ] - - Transition Summary: - -@@ -18,5 +18,5 @@ Online: [ node1 node2 ] - - rsc_stonith (stonith:null): Started node1 - Clone Set: clone1 [rsc1] -- Stopped: [ rsc1:0 rsc1:1 ] -+ Stopped: [ node1 node2 ] - -diff --git a/pengine/test10/ticket-clone-11.summary b/pengine/test10/ticket-clone-11.summary -index 6a38240..3188894 100644 ---- a/pengine/test10/ticket-clone-11.summary -+++ b/pengine/test10/ticket-clone-11.summary -@@ -4,7 +4,7 @@ Online: [ node1 node2 ] - - rsc_stonith (stonith:null): Started node1 - Clone Set: clone1 [rsc1] -- Stopped: [ rsc1:0 rsc1:1 ] -+ Stopped: [ node1 node2 ] - - Transition Summary: - * Start rsc1:0 (node2) -diff --git a/pengine/test10/ticket-clone-13.summary b/pengine/test10/ticket-clone-13.summary -index 5ba88ce..7628a9e 100644 ---- a/pengine/test10/ticket-clone-13.summary -+++ b/pengine/test10/ticket-clone-13.summary -@@ -4,7 +4,7 @@ Online: [ node1 node2 ] - - rsc_stonith (stonith:null): Started node1 - Clone Set: clone1 [rsc1] -- Stopped: [ rsc1:0 rsc1:1 ] -+ Stopped: [ node1 node2 ] - - Transition Summary: - -@@ -15,5 +15,5 @@ Online: [ node1 node2 ] - - rsc_stonith (stonith:null): Started node1 - Clone Set: clone1 [rsc1] -- Stopped: [ rsc1:0 rsc1:1 ] -+ Stopped: [ node1 node2 ] - -diff --git a/pengine/test10/ticket-clone-14.summary b/pengine/test10/ticket-clone-14.summary -index de7268f..c807656 100644 ---- a/pengine/test10/ticket-clone-14.summary -+++ b/pengine/test10/ticket-clone-14.summary -@@ -22,5 +22,5 @@ Online: [ node1 node2 ] - - rsc_stonith (stonith:null): Started node1 - Clone Set: clone1 [rsc1] -- Stopped: [ rsc1:0 rsc1:1 ] -+ Stopped: [ node1 node2 ] - -diff --git a/pengine/test10/ticket-clone-15.summary b/pengine/test10/ticket-clone-15.summary -index de7268f..c807656 100644 ---- a/pengine/test10/ticket-clone-15.summary -+++ b/pengine/test10/ticket-clone-15.summary -@@ -22,5 +22,5 @@ Online: [ node1 node2 ] - - rsc_stonith (stonith:null): Started node1 - Clone Set: clone1 [rsc1] -- Stopped: [ rsc1:0 rsc1:1 ] -+ Stopped: [ node1 node2 ] - -diff --git a/pengine/test10/ticket-clone-16.summary b/pengine/test10/ticket-clone-16.summary -index 5ba88ce..7628a9e 100644 ---- a/pengine/test10/ticket-clone-16.summary -+++ b/pengine/test10/ticket-clone-16.summary -@@ -4,7 +4,7 @@ Online: [ node1 node2 ] - - rsc_stonith (stonith:null): Started node1 - Clone Set: clone1 [rsc1] -- Stopped: [ rsc1:0 rsc1:1 ] -+ Stopped: [ node1 node2 ] - - Transition Summary: - -@@ -15,5 +15,5 @@ Online: [ node1 node2 ] - - rsc_stonith (stonith:null): Started node1 - Clone Set: clone1 [rsc1] -- Stopped: [ rsc1:0 rsc1:1 ] -+ Stopped: [ node1 node2 ] - -diff --git a/pengine/test10/ticket-clone-17.summary b/pengine/test10/ticket-clone-17.summary -index de7268f..c807656 100644 ---- a/pengine/test10/ticket-clone-17.summary -+++ b/pengine/test10/ticket-clone-17.summary -@@ -22,5 +22,5 @@ Online: [ node1 node2 ] - - rsc_stonith (stonith:null): Started node1 - Clone Set: clone1 [rsc1] -- Stopped: [ rsc1:0 rsc1:1 ] -+ Stopped: [ node1 node2 ] - -diff --git a/pengine/test10/ticket-clone-18.summary b/pengine/test10/ticket-clone-18.summary -index de7268f..c807656 100644 ---- a/pengine/test10/ticket-clone-18.summary -+++ b/pengine/test10/ticket-clone-18.summary -@@ -22,5 +22,5 @@ Online: [ node1 node2 ] - - rsc_stonith (stonith:null): Started node1 - Clone Set: clone1 [rsc1] -- Stopped: [ rsc1:0 rsc1:1 ] -+ Stopped: [ node1 node2 ] - -diff --git a/pengine/test10/ticket-clone-19.summary b/pengine/test10/ticket-clone-19.summary -index 5ba88ce..7628a9e 100644 ---- a/pengine/test10/ticket-clone-19.summary -+++ b/pengine/test10/ticket-clone-19.summary -@@ -4,7 +4,7 @@ Online: [ node1 node2 ] - - rsc_stonith (stonith:null): Started node1 - Clone Set: clone1 [rsc1] -- Stopped: [ rsc1:0 rsc1:1 ] -+ Stopped: [ node1 node2 ] - - Transition Summary: - -@@ -15,5 +15,5 @@ Online: [ node1 node2 ] - - rsc_stonith (stonith:null): Started node1 - Clone Set: clone1 [rsc1] -- Stopped: [ rsc1:0 rsc1:1 ] -+ Stopped: [ node1 node2 ] - -diff --git a/pengine/test10/ticket-clone-2.summary b/pengine/test10/ticket-clone-2.summary -index 6a38240..3188894 100644 ---- a/pengine/test10/ticket-clone-2.summary -+++ b/pengine/test10/ticket-clone-2.summary -@@ -4,7 +4,7 @@ Online: [ node1 node2 ] - - rsc_stonith (stonith:null): Started node1 - Clone Set: clone1 [rsc1] -- Stopped: [ rsc1:0 rsc1:1 ] -+ Stopped: [ node1 node2 ] - - Transition Summary: - * Start rsc1:0 (node2) -diff --git a/pengine/test10/ticket-clone-20.summary b/pengine/test10/ticket-clone-20.summary -index de7268f..c807656 100644 ---- a/pengine/test10/ticket-clone-20.summary -+++ b/pengine/test10/ticket-clone-20.summary -@@ -22,5 +22,5 @@ Online: [ node1 node2 ] - - rsc_stonith (stonith:null): Started node1 - Clone Set: clone1 [rsc1] -- Stopped: [ rsc1:0 rsc1:1 ] -+ Stopped: [ node1 node2 ] - -diff --git a/pengine/test10/ticket-clone-21.summary b/pengine/test10/ticket-clone-21.summary -index f254cb2..7973159 100644 ---- a/pengine/test10/ticket-clone-21.summary -+++ b/pengine/test10/ticket-clone-21.summary -@@ -27,5 +27,5 @@ OFFLINE: [ node1 node2 ] - - rsc_stonith (stonith:null): Stopped - Clone Set: clone1 [rsc1] -- Stopped: [ rsc1:0 rsc1:1 ] -+ Stopped: [ node1 node2 ] - -diff --git a/pengine/test10/ticket-clone-22.summary b/pengine/test10/ticket-clone-22.summary -index 5ba88ce..7628a9e 100644 ---- a/pengine/test10/ticket-clone-22.summary -+++ b/pengine/test10/ticket-clone-22.summary -@@ -4,7 +4,7 @@ Online: [ node1 node2 ] - - rsc_stonith (stonith:null): Started node1 - Clone Set: clone1 [rsc1] -- Stopped: [ rsc1:0 rsc1:1 ] -+ Stopped: [ node1 node2 ] - - Transition Summary: - -@@ -15,5 +15,5 @@ Online: [ node1 node2 ] - - rsc_stonith (stonith:null): Started node1 - Clone Set: clone1 [rsc1] -- Stopped: [ rsc1:0 rsc1:1 ] -+ Stopped: [ node1 node2 ] - -diff --git a/pengine/test10/ticket-clone-23.summary b/pengine/test10/ticket-clone-23.summary -index de7268f..c807656 100644 ---- a/pengine/test10/ticket-clone-23.summary -+++ b/pengine/test10/ticket-clone-23.summary -@@ -22,5 +22,5 @@ Online: [ node1 node2 ] - - rsc_stonith (stonith:null): Started node1 - Clone Set: clone1 [rsc1] -- Stopped: [ rsc1:0 rsc1:1 ] -+ Stopped: [ node1 node2 ] - -diff --git a/pengine/test10/ticket-clone-3.summary b/pengine/test10/ticket-clone-3.summary -index de7268f..c807656 100644 ---- a/pengine/test10/ticket-clone-3.summary -+++ b/pengine/test10/ticket-clone-3.summary -@@ -22,5 +22,5 @@ Online: [ node1 node2 ] - - rsc_stonith (stonith:null): Started node1 - Clone Set: clone1 [rsc1] -- Stopped: [ rsc1:0 rsc1:1 ] -+ Stopped: [ node1 node2 ] - -diff --git a/pengine/test10/ticket-clone-4.summary b/pengine/test10/ticket-clone-4.summary -index c6087ad..7bc4d43 100644 ---- a/pengine/test10/ticket-clone-4.summary -+++ b/pengine/test10/ticket-clone-4.summary -@@ -4,7 +4,7 @@ Online: [ node1 node2 ] - - rsc_stonith (stonith:null): Started node1 - Clone Set: clone1 [rsc1] -- Stopped: [ rsc1:0 rsc1:1 ] -+ Stopped: [ node1 node2 ] - - Transition Summary: - -@@ -18,5 +18,5 @@ Online: [ node1 node2 ] - - rsc_stonith (stonith:null): Started node1 - Clone Set: clone1 [rsc1] -- Stopped: [ rsc1:0 rsc1:1 ] -+ Stopped: [ node1 node2 ] - -diff --git a/pengine/test10/ticket-clone-5.summary b/pengine/test10/ticket-clone-5.summary -index 6a38240..3188894 100644 ---- a/pengine/test10/ticket-clone-5.summary -+++ b/pengine/test10/ticket-clone-5.summary -@@ -4,7 +4,7 @@ Online: [ node1 node2 ] - - rsc_stonith (stonith:null): Started node1 - Clone Set: clone1 [rsc1] -- Stopped: [ rsc1:0 rsc1:1 ] -+ Stopped: [ node1 node2 ] - - Transition Summary: - * Start rsc1:0 (node2) -diff --git a/pengine/test10/ticket-clone-6.summary b/pengine/test10/ticket-clone-6.summary -index de7268f..c807656 100644 ---- a/pengine/test10/ticket-clone-6.summary -+++ b/pengine/test10/ticket-clone-6.summary -@@ -22,5 +22,5 @@ Online: [ node1 node2 ] - - rsc_stonith (stonith:null): Started node1 - Clone Set: clone1 [rsc1] -- Stopped: [ rsc1:0 rsc1:1 ] -+ Stopped: [ node1 node2 ] - -diff --git a/pengine/test10/ticket-clone-7.summary b/pengine/test10/ticket-clone-7.summary -index c6087ad..7bc4d43 100644 ---- a/pengine/test10/ticket-clone-7.summary -+++ b/pengine/test10/ticket-clone-7.summary -@@ -4,7 +4,7 @@ Online: [ node1 node2 ] - - rsc_stonith (stonith:null): Started node1 - Clone Set: clone1 [rsc1] -- Stopped: [ rsc1:0 rsc1:1 ] -+ Stopped: [ node1 node2 ] - - Transition Summary: - -@@ -18,5 +18,5 @@ Online: [ node1 node2 ] - - rsc_stonith (stonith:null): Started node1 - Clone Set: clone1 [rsc1] -- Stopped: [ rsc1:0 rsc1:1 ] -+ Stopped: [ node1 node2 ] - -diff --git a/pengine/test10/ticket-clone-8.summary b/pengine/test10/ticket-clone-8.summary -index 6a38240..3188894 100644 ---- a/pengine/test10/ticket-clone-8.summary -+++ b/pengine/test10/ticket-clone-8.summary -@@ -4,7 +4,7 @@ Online: [ node1 node2 ] - - rsc_stonith (stonith:null): Started node1 - Clone Set: clone1 [rsc1] -- Stopped: [ rsc1:0 rsc1:1 ] -+ Stopped: [ node1 node2 ] - - Transition Summary: - * Start rsc1:0 (node2) -diff --git a/pengine/test10/ticket-clone-9.summary b/pengine/test10/ticket-clone-9.summary -index f254cb2..7973159 100644 ---- a/pengine/test10/ticket-clone-9.summary -+++ b/pengine/test10/ticket-clone-9.summary -@@ -27,5 +27,5 @@ OFFLINE: [ node1 node2 ] - - rsc_stonith (stonith:null): Stopped - Clone Set: clone1 [rsc1] -- Stopped: [ rsc1:0 rsc1:1 ] -+ Stopped: [ node1 node2 ] - -diff --git a/pengine/test10/ticket-master-1.summary b/pengine/test10/ticket-master-1.summary -index a28786f..41ba380 100644 ---- a/pengine/test10/ticket-master-1.summary -+++ b/pengine/test10/ticket-master-1.summary -@@ -4,7 +4,7 @@ Online: [ node1 node2 ] - - rsc_stonith (stonith:null): Started node1 - Master/Slave Set: ms1 [rsc1] -- Stopped: [ rsc1:0 rsc1:1 ] -+ Stopped: [ node1 node2 ] - - Transition Summary: - -@@ -18,5 +18,5 @@ Online: [ node1 node2 ] - - rsc_stonith (stonith:null): Started node1 - Master/Slave Set: ms1 [rsc1] -- Stopped: [ rsc1:0 rsc1:1 ] -+ Stopped: [ node1 node2 ] - -diff --git a/pengine/test10/ticket-master-10.summary b/pengine/test10/ticket-master-10.summary -index 9538cf4..2b6f2c6 100644 ---- a/pengine/test10/ticket-master-10.summary -+++ b/pengine/test10/ticket-master-10.summary -@@ -4,7 +4,7 @@ Online: [ node1 node2 ] - - rsc_stonith (stonith:null): Started node1 - Master/Slave Set: ms1 [rsc1] -- Stopped: [ rsc1:0 rsc1:1 ] -+ Stopped: [ node1 node2 ] - - Transition Summary: - * Start rsc1:0 (node2) -diff --git a/pengine/test10/ticket-master-13.summary b/pengine/test10/ticket-master-13.summary -index 1f201d3..5f5d0d1 100644 ---- a/pengine/test10/ticket-master-13.summary -+++ b/pengine/test10/ticket-master-13.summary -@@ -4,7 +4,7 @@ Online: [ node1 node2 ] - - rsc_stonith (stonith:null): Started node1 - Master/Slave Set: ms1 [rsc1] -- Stopped: [ rsc1:0 rsc1:1 ] -+ Stopped: [ node1 node2 ] - - Transition Summary: - -@@ -15,5 +15,5 @@ Online: [ node1 node2 ] - - rsc_stonith (stonith:null): Started node1 - Master/Slave Set: ms1 [rsc1] -- Stopped: [ rsc1:0 rsc1:1 ] -+ Stopped: [ node1 node2 ] - -diff --git a/pengine/test10/ticket-master-14.summary b/pengine/test10/ticket-master-14.summary -index 86b5ec4..9f7b89d 100644 ---- a/pengine/test10/ticket-master-14.summary -+++ b/pengine/test10/ticket-master-14.summary -@@ -26,5 +26,5 @@ Online: [ node1 node2 ] - - rsc_stonith (stonith:null): Started node1 - Master/Slave Set: ms1 [rsc1] -- Stopped: [ rsc1:0 rsc1:1 ] -+ Stopped: [ node1 node2 ] - -diff --git a/pengine/test10/ticket-master-15.summary b/pengine/test10/ticket-master-15.summary -index 86b5ec4..9f7b89d 100644 ---- a/pengine/test10/ticket-master-15.summary -+++ b/pengine/test10/ticket-master-15.summary -@@ -26,5 +26,5 @@ Online: [ node1 node2 ] - - rsc_stonith (stonith:null): Started node1 - Master/Slave Set: ms1 [rsc1] -- Stopped: [ rsc1:0 rsc1:1 ] -+ Stopped: [ node1 node2 ] - -diff --git a/pengine/test10/ticket-master-2.summary b/pengine/test10/ticket-master-2.summary -index 3ba0728..96a797e 100644 ---- a/pengine/test10/ticket-master-2.summary -+++ b/pengine/test10/ticket-master-2.summary -@@ -4,7 +4,7 @@ Online: [ node1 node2 ] - - rsc_stonith (stonith:null): Started node1 - Master/Slave Set: ms1 [rsc1] -- Stopped: [ rsc1:0 rsc1:1 ] -+ Stopped: [ node1 node2 ] - - Transition Summary: - * Start rsc1:0 (node2) -diff --git a/pengine/test10/ticket-master-21.summary b/pengine/test10/ticket-master-21.summary -index 2229553..ca5bf84 100644 ---- a/pengine/test10/ticket-master-21.summary -+++ b/pengine/test10/ticket-master-21.summary -@@ -31,5 +31,5 @@ OFFLINE: [ node1 ] - rsc_stonith (stonith:null): Started node2 - Master/Slave Set: ms1 [rsc1] - Slaves: [ node2 ] -- Stopped: [ rsc1:1 ] -+ Stopped: [ node1 ] - -diff --git a/pengine/test10/ticket-master-3.summary b/pengine/test10/ticket-master-3.summary -index 86b5ec4..9f7b89d 100644 ---- a/pengine/test10/ticket-master-3.summary -+++ b/pengine/test10/ticket-master-3.summary -@@ -26,5 +26,5 @@ Online: [ node1 node2 ] - - rsc_stonith (stonith:null): Started node1 - Master/Slave Set: ms1 [rsc1] -- Stopped: [ rsc1:0 rsc1:1 ] -+ Stopped: [ node1 node2 ] - -diff --git a/pengine/test10/ticket-master-4.summary b/pengine/test10/ticket-master-4.summary -index 9538cf4..2b6f2c6 100644 ---- a/pengine/test10/ticket-master-4.summary -+++ b/pengine/test10/ticket-master-4.summary -@@ -4,7 +4,7 @@ Online: [ node1 node2 ] - - rsc_stonith (stonith:null): Started node1 - Master/Slave Set: ms1 [rsc1] -- Stopped: [ rsc1:0 rsc1:1 ] -+ Stopped: [ node1 node2 ] - - Transition Summary: - * Start rsc1:0 (node2) -diff --git a/pengine/test10/ticket-master-7.summary b/pengine/test10/ticket-master-7.summary -index 9538cf4..2b6f2c6 100644 ---- a/pengine/test10/ticket-master-7.summary -+++ b/pengine/test10/ticket-master-7.summary -@@ -4,7 +4,7 @@ Online: [ node1 node2 ] - - rsc_stonith (stonith:null): Started node1 - Master/Slave Set: ms1 [rsc1] -- Stopped: [ rsc1:0 rsc1:1 ] -+ Stopped: [ node1 node2 ] - - Transition Summary: - * Start rsc1:0 (node2) -diff --git a/pengine/test10/ticket-master-9.summary b/pengine/test10/ticket-master-9.summary -index 2229553..ca5bf84 100644 ---- a/pengine/test10/ticket-master-9.summary -+++ b/pengine/test10/ticket-master-9.summary -@@ -31,5 +31,5 @@ OFFLINE: [ node1 ] - rsc_stonith (stonith:null): Started node2 - Master/Slave Set: ms1 [rsc1] - Slaves: [ node2 ] -- Stopped: [ rsc1:1 ] -+ Stopped: [ node1 ] - -diff --git a/pengine/test10/ticket-rsc-sets-1.summary b/pengine/test10/ticket-rsc-sets-1.summary -index 5d1c19f..b893af6 100644 ---- a/pengine/test10/ticket-rsc-sets-1.summary -+++ b/pengine/test10/ticket-rsc-sets-1.summary -@@ -8,9 +8,9 @@ Online: [ node1 node2 ] - rsc2 (ocf::pacemaker:Dummy): Stopped - rsc3 (ocf::pacemaker:Dummy): Stopped - Clone Set: clone4 [rsc4] -- Stopped: [ rsc4:0 rsc4:1 ] -+ Stopped: [ node1 node2 ] - Master/Slave Set: ms5 [rsc5] -- Stopped: [ rsc5:0 rsc5:1 ] -+ Stopped: [ node1 node2 ] - - Transition Summary: - * Start rsc5:0 (node2) -@@ -42,7 +42,7 @@ Online: [ node1 node2 ] - rsc2 (ocf::pacemaker:Dummy): Stopped - rsc3 (ocf::pacemaker:Dummy): Stopped - Clone Set: clone4 [rsc4] -- Stopped: [ rsc4:0 rsc4:1 ] -+ Stopped: [ node1 node2 ] - Master/Slave Set: ms5 [rsc5] - Slaves: [ node1 node2 ] - -diff --git a/pengine/test10/ticket-rsc-sets-10.summary b/pengine/test10/ticket-rsc-sets-10.summary -index e4e3ed5..66f807a 100644 ---- a/pengine/test10/ticket-rsc-sets-10.summary -+++ b/pengine/test10/ticket-rsc-sets-10.summary -@@ -45,7 +45,7 @@ Online: [ node1 node2 ] - rsc2 (ocf::pacemaker:Dummy): Stopped - rsc3 (ocf::pacemaker:Dummy): Stopped - Clone Set: clone4 [rsc4] -- Stopped: [ rsc4:0 rsc4:1 ] -+ Stopped: [ node1 node2 ] - Master/Slave Set: ms5 [rsc5] - Slaves: [ node1 node2 ] - -diff --git a/pengine/test10/ticket-rsc-sets-11.summary b/pengine/test10/ticket-rsc-sets-11.summary -index d44934f..47d3923 100644 ---- a/pengine/test10/ticket-rsc-sets-11.summary -+++ b/pengine/test10/ticket-rsc-sets-11.summary -@@ -8,7 +8,7 @@ Online: [ node1 node2 ] - rsc2 (ocf::pacemaker:Dummy): Stopped - rsc3 (ocf::pacemaker:Dummy): Stopped - Clone Set: clone4 [rsc4] -- Stopped: [ rsc4:0 rsc4:1 ] -+ Stopped: [ node1 node2 ] - Master/Slave Set: ms5 [rsc5] - Slaves: [ node1 node2 ] - -@@ -25,7 +25,7 @@ Online: [ node1 node2 ] - rsc2 (ocf::pacemaker:Dummy): Stopped - rsc3 (ocf::pacemaker:Dummy): Stopped - Clone Set: clone4 [rsc4] -- Stopped: [ rsc4:0 rsc4:1 ] -+ Stopped: [ node1 node2 ] - Master/Slave Set: ms5 [rsc5] - Slaves: [ node1 node2 ] - -diff --git a/pengine/test10/ticket-rsc-sets-12.summary b/pengine/test10/ticket-rsc-sets-12.summary -index b5c4da0..6801c64 100644 ---- a/pengine/test10/ticket-rsc-sets-12.summary -+++ b/pengine/test10/ticket-rsc-sets-12.summary -@@ -8,7 +8,7 @@ Online: [ node1 node2 ] - rsc2 (ocf::pacemaker:Dummy): Started node1 - rsc3 (ocf::pacemaker:Dummy): Started node1 - Clone Set: clone4 [rsc4] -- Stopped: [ rsc4:0 rsc4:1 ] -+ Stopped: [ node1 node2 ] - Master/Slave Set: ms5 [rsc5] - Slaves: [ node1 node2 ] - -@@ -34,7 +34,7 @@ Online: [ node1 node2 ] - rsc2 (ocf::pacemaker:Dummy): Stopped - rsc3 (ocf::pacemaker:Dummy): Stopped - Clone Set: clone4 [rsc4] -- Stopped: [ rsc4:0 rsc4:1 ] -+ Stopped: [ node1 node2 ] - Master/Slave Set: ms5 [rsc5] - Slaves: [ node1 node2 ] - -diff --git a/pengine/test10/ticket-rsc-sets-13.summary b/pengine/test10/ticket-rsc-sets-13.summary -index e4e3ed5..66f807a 100644 ---- a/pengine/test10/ticket-rsc-sets-13.summary -+++ b/pengine/test10/ticket-rsc-sets-13.summary -@@ -45,7 +45,7 @@ Online: [ node1 node2 ] - rsc2 (ocf::pacemaker:Dummy): Stopped - rsc3 (ocf::pacemaker:Dummy): Stopped - Clone Set: clone4 [rsc4] -- Stopped: [ rsc4:0 rsc4:1 ] -+ Stopped: [ node1 node2 ] - Master/Slave Set: ms5 [rsc5] - Slaves: [ node1 node2 ] - -diff --git a/pengine/test10/ticket-rsc-sets-14.summary b/pengine/test10/ticket-rsc-sets-14.summary -index e4e3ed5..66f807a 100644 ---- a/pengine/test10/ticket-rsc-sets-14.summary -+++ b/pengine/test10/ticket-rsc-sets-14.summary -@@ -45,7 +45,7 @@ Online: [ node1 node2 ] - rsc2 (ocf::pacemaker:Dummy): Stopped - rsc3 (ocf::pacemaker:Dummy): Stopped - Clone Set: clone4 [rsc4] -- Stopped: [ rsc4:0 rsc4:1 ] -+ Stopped: [ node1 node2 ] - Master/Slave Set: ms5 [rsc5] - Slaves: [ node1 node2 ] - -diff --git a/pengine/test10/ticket-rsc-sets-2.summary b/pengine/test10/ticket-rsc-sets-2.summary -index 21357a1..e17dfdb 100644 ---- a/pengine/test10/ticket-rsc-sets-2.summary -+++ b/pengine/test10/ticket-rsc-sets-2.summary -@@ -8,7 +8,7 @@ Online: [ node1 node2 ] - rsc2 (ocf::pacemaker:Dummy): Stopped - rsc3 (ocf::pacemaker:Dummy): Stopped - Clone Set: clone4 [rsc4] -- Stopped: [ rsc4:0 rsc4:1 ] -+ Stopped: [ node1 node2 ] - Master/Slave Set: ms5 [rsc5] - Slaves: [ node1 node2 ] - -diff --git a/pengine/test10/ticket-rsc-sets-3.summary b/pengine/test10/ticket-rsc-sets-3.summary -index e4e3ed5..66f807a 100644 ---- a/pengine/test10/ticket-rsc-sets-3.summary -+++ b/pengine/test10/ticket-rsc-sets-3.summary -@@ -45,7 +45,7 @@ Online: [ node1 node2 ] - rsc2 (ocf::pacemaker:Dummy): Stopped - rsc3 (ocf::pacemaker:Dummy): Stopped - Clone Set: clone4 [rsc4] -- Stopped: [ rsc4:0 rsc4:1 ] -+ Stopped: [ node1 node2 ] - Master/Slave Set: ms5 [rsc5] - Slaves: [ node1 node2 ] - -diff --git a/pengine/test10/ticket-rsc-sets-4.summary b/pengine/test10/ticket-rsc-sets-4.summary -index 5d1c19f..b893af6 100644 ---- a/pengine/test10/ticket-rsc-sets-4.summary -+++ b/pengine/test10/ticket-rsc-sets-4.summary -@@ -8,9 +8,9 @@ Online: [ node1 node2 ] - rsc2 (ocf::pacemaker:Dummy): Stopped - rsc3 (ocf::pacemaker:Dummy): Stopped - Clone Set: clone4 [rsc4] -- Stopped: [ rsc4:0 rsc4:1 ] -+ Stopped: [ node1 node2 ] - Master/Slave Set: ms5 [rsc5] -- Stopped: [ rsc5:0 rsc5:1 ] -+ Stopped: [ node1 node2 ] - - Transition Summary: - * Start rsc5:0 (node2) -@@ -42,7 +42,7 @@ Online: [ node1 node2 ] - rsc2 (ocf::pacemaker:Dummy): Stopped - rsc3 (ocf::pacemaker:Dummy): Stopped - Clone Set: clone4 [rsc4] -- Stopped: [ rsc4:0 rsc4:1 ] -+ Stopped: [ node1 node2 ] - Master/Slave Set: ms5 [rsc5] - Slaves: [ node1 node2 ] - -diff --git a/pengine/test10/ticket-rsc-sets-5.summary b/pengine/test10/ticket-rsc-sets-5.summary -index ceb25af..2982a43 100644 ---- a/pengine/test10/ticket-rsc-sets-5.summary -+++ b/pengine/test10/ticket-rsc-sets-5.summary -@@ -8,7 +8,7 @@ Online: [ node1 node2 ] - rsc2 (ocf::pacemaker:Dummy): Stopped - rsc3 (ocf::pacemaker:Dummy): Stopped - Clone Set: clone4 [rsc4] -- Stopped: [ rsc4:0 rsc4:1 ] -+ Stopped: [ node1 node2 ] - Master/Slave Set: ms5 [rsc5] - Slaves: [ node1 node2 ] - -@@ -36,7 +36,7 @@ Online: [ node1 node2 ] - rsc2 (ocf::pacemaker:Dummy): Started node1 - rsc3 (ocf::pacemaker:Dummy): Started node1 - Clone Set: clone4 [rsc4] -- Stopped: [ rsc4:0 rsc4:1 ] -+ Stopped: [ node1 node2 ] - Master/Slave Set: ms5 [rsc5] - Slaves: [ node1 node2 ] - -diff --git a/pengine/test10/ticket-rsc-sets-6.summary b/pengine/test10/ticket-rsc-sets-6.summary -index 74a6550..7bb1686 100644 ---- a/pengine/test10/ticket-rsc-sets-6.summary -+++ b/pengine/test10/ticket-rsc-sets-6.summary -@@ -8,7 +8,7 @@ Online: [ node1 node2 ] - rsc2 (ocf::pacemaker:Dummy): Started node1 - rsc3 (ocf::pacemaker:Dummy): Started node1 - Clone Set: clone4 [rsc4] -- Stopped: [ rsc4:0 rsc4:1 ] -+ Stopped: [ node1 node2 ] - Master/Slave Set: ms5 [rsc5] - Slaves: [ node1 node2 ] - -diff --git a/pengine/test10/ticket-rsc-sets-7.summary b/pengine/test10/ticket-rsc-sets-7.summary -index e4e3ed5..66f807a 100644 ---- a/pengine/test10/ticket-rsc-sets-7.summary -+++ b/pengine/test10/ticket-rsc-sets-7.summary -@@ -45,7 +45,7 @@ Online: [ node1 node2 ] - rsc2 (ocf::pacemaker:Dummy): Stopped - rsc3 (ocf::pacemaker:Dummy): Stopped - Clone Set: clone4 [rsc4] -- Stopped: [ rsc4:0 rsc4:1 ] -+ Stopped: [ node1 node2 ] - Master/Slave Set: ms5 [rsc5] - Slaves: [ node1 node2 ] - -diff --git a/pengine/test10/ticket-rsc-sets-8.summary b/pengine/test10/ticket-rsc-sets-8.summary -index d44934f..47d3923 100644 ---- a/pengine/test10/ticket-rsc-sets-8.summary -+++ b/pengine/test10/ticket-rsc-sets-8.summary -@@ -8,7 +8,7 @@ Online: [ node1 node2 ] - rsc2 (ocf::pacemaker:Dummy): Stopped - rsc3 (ocf::pacemaker:Dummy): Stopped - Clone Set: clone4 [rsc4] -- Stopped: [ rsc4:0 rsc4:1 ] -+ Stopped: [ node1 node2 ] - Master/Slave Set: ms5 [rsc5] - Slaves: [ node1 node2 ] - -@@ -25,7 +25,7 @@ Online: [ node1 node2 ] - rsc2 (ocf::pacemaker:Dummy): Stopped - rsc3 (ocf::pacemaker:Dummy): Stopped - Clone Set: clone4 [rsc4] -- Stopped: [ rsc4:0 rsc4:1 ] -+ Stopped: [ node1 node2 ] - Master/Slave Set: ms5 [rsc5] - Slaves: [ node1 node2 ] - -diff --git a/pengine/test10/ticket-rsc-sets-9.summary b/pengine/test10/ticket-rsc-sets-9.summary -index e4e3ed5..66f807a 100644 ---- a/pengine/test10/ticket-rsc-sets-9.summary -+++ b/pengine/test10/ticket-rsc-sets-9.summary -@@ -45,7 +45,7 @@ Online: [ node1 node2 ] - rsc2 (ocf::pacemaker:Dummy): Stopped - rsc3 (ocf::pacemaker:Dummy): Stopped - Clone Set: clone4 [rsc4] -- Stopped: [ rsc4:0 rsc4:1 ] -+ Stopped: [ node1 node2 ] - Master/Slave Set: ms5 [rsc5] - Slaves: [ node1 node2 ] - -diff --git a/pengine/test10/unmanaged-master.summary b/pengine/test10/unmanaged-master.summary -index 3dded53..066f139 100644 ---- a/pengine/test10/unmanaged-master.summary -+++ b/pengine/test10/unmanaged-master.summary -@@ -4,9 +4,9 @@ Online: [ pcmk-1 pcmk-2 ] - OFFLINE: [ pcmk-3 pcmk-4 ] - - Clone Set: Fencing [FencingChild] (unmanaged) -- FencingChild:2 (stonith:fence_xvm): Started pcmk-2 (unmanaged) -- FencingChild:3 (stonith:fence_xvm): Started pcmk-1 (unmanaged) -- Stopped: [ FencingChild:0 FencingChild:1 ] -+ FencingChild (stonith:fence_xvm): Started pcmk-2 (unmanaged) -+ FencingChild (stonith:fence_xvm): Started pcmk-1 (unmanaged) -+ Stopped: [ pcmk-3 pcmk-4 ] - Resource Group: group-1 - r192.168.122.126 (ocf::heartbeat:IPaddr): Started pcmk-2 (unmanaged) - r192.168.122.127 (ocf::heartbeat:IPaddr): Started pcmk-2 (unmanaged) -@@ -18,13 +18,13 @@ OFFLINE: [ pcmk-3 pcmk-4 ] - lsb-dummy (lsb:/usr/share/pacemaker/tests/cts/LSBDummy): Started pcmk-2 (unmanaged) - migrator (ocf::pacemaker:Dummy): Started pcmk-4 (unmanaged) - Clone Set: Connectivity [ping-1] (unmanaged) -- ping-1:2 (ocf::pacemaker:ping): Started pcmk-2 (unmanaged) -- ping-1:3 (ocf::pacemaker:ping): Started pcmk-1 (unmanaged) -- Stopped: [ ping-1:0 ping-1:1 ] -+ ping-1 (ocf::pacemaker:ping): Started pcmk-2 (unmanaged) -+ ping-1 (ocf::pacemaker:ping): Started pcmk-1 (unmanaged) -+ Stopped: [ pcmk-3 pcmk-4 ] - Master/Slave Set: master-1 [stateful-1] (unmanaged) -- stateful-1:2 (ocf::pacemaker:Stateful): Master pcmk-2 (unmanaged) -- stateful-1:3 (ocf::pacemaker:Stateful): Started pcmk-1 (unmanaged) -- Stopped: [ stateful-1:0 stateful-1:1 ] -+ stateful-1 (ocf::pacemaker:Stateful): Master pcmk-2 (unmanaged) -+ stateful-1 (ocf::pacemaker:Stateful): Started pcmk-1 (unmanaged) -+ Stopped: [ pcmk-3 pcmk-4 ] - - Transition Summary: - -@@ -37,9 +37,9 @@ Online: [ pcmk-1 pcmk-2 ] - OFFLINE: [ pcmk-3 pcmk-4 ] - - Clone Set: Fencing [FencingChild] (unmanaged) -- FencingChild:2 (stonith:fence_xvm): Started pcmk-2 (unmanaged) -- FencingChild:3 (stonith:fence_xvm): Started pcmk-1 (unmanaged) -- Stopped: [ FencingChild:0 FencingChild:1 ] -+ FencingChild (stonith:fence_xvm): Started pcmk-2 (unmanaged) -+ FencingChild (stonith:fence_xvm): Started pcmk-1 (unmanaged) -+ Stopped: [ pcmk-3 pcmk-4 ] - Resource Group: group-1 - r192.168.122.126 (ocf::heartbeat:IPaddr): Started pcmk-2 (unmanaged) - r192.168.122.127 (ocf::heartbeat:IPaddr): Started pcmk-2 (unmanaged) -@@ -51,11 +51,11 @@ OFFLINE: [ pcmk-3 pcmk-4 ] - lsb-dummy (lsb:/usr/share/pacemaker/tests/cts/LSBDummy): Started pcmk-2 (unmanaged) - migrator (ocf::pacemaker:Dummy): Started pcmk-4 (unmanaged) - Clone Set: Connectivity [ping-1] (unmanaged) -- ping-1:2 (ocf::pacemaker:ping): Started pcmk-2 (unmanaged) -- ping-1:3 (ocf::pacemaker:ping): Started pcmk-1 (unmanaged) -- Stopped: [ ping-1:0 ping-1:1 ] -+ ping-1 (ocf::pacemaker:ping): Started pcmk-2 (unmanaged) -+ ping-1 (ocf::pacemaker:ping): Started pcmk-1 (unmanaged) -+ Stopped: [ pcmk-3 pcmk-4 ] - Master/Slave Set: master-1 [stateful-1] (unmanaged) -- stateful-1:2 (ocf::pacemaker:Stateful): Master pcmk-2 (unmanaged) -- stateful-1:3 (ocf::pacemaker:Stateful): Started pcmk-1 (unmanaged) -- Stopped: [ stateful-1:0 stateful-1:1 ] -+ stateful-1 (ocf::pacemaker:Stateful): Master pcmk-2 (unmanaged) -+ stateful-1 (ocf::pacemaker:Stateful): Started pcmk-1 (unmanaged) -+ Stopped: [ pcmk-3 pcmk-4 ] - -diff --git a/pengine/test10/unmanaged-stop-1.dot b/pengine/test10/unmanaged-stop-1.dot -new file mode 100644 -index 0000000..e36de8b ---- /dev/null -+++ b/pengine/test10/unmanaged-stop-1.dot -@@ -0,0 +1,8 @@ -+digraph "g" { -+"all_stopped" [ style=dashed color="red" fontcolor="orange"] -+"probe_complete yingying.site" [ style=bold color="green" fontcolor="black"] -+"rsc1_start_0 " [ style=dashed color="red" fontcolor="black"] -+"rsc1_stop_0 yingying.site" -> "all_stopped" [ style = dashed] -+"rsc1_stop_0 yingying.site" -> "rsc1_start_0 " [ style = dashed] -+"rsc1_stop_0 yingying.site" [ style=dashed color="red" fontcolor="black"] -+} -diff --git a/pengine/test10/unmanaged-stop-1.exp b/pengine/test10/unmanaged-stop-1.exp -new file mode 100644 -index 0000000..7845919 ---- /dev/null -+++ b/pengine/test10/unmanaged-stop-1.exp -@@ -0,0 +1,11 @@ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -diff --git a/pengine/test10/unmanaged-stop-1.scores b/pengine/test10/unmanaged-stop-1.scores -new file mode 100644 -index 0000000..4cb1c8f ---- /dev/null -+++ b/pengine/test10/unmanaged-stop-1.scores -@@ -0,0 +1,3 @@ -+Allocation scores: -+native_color: rsc1 allocation score on yingying.site: -INFINITY -+native_color: rsc2 allocation score on yingying.site: -INFINITY -diff --git a/pengine/test10/unmanaged-stop-1.summary b/pengine/test10/unmanaged-stop-1.summary -new file mode 100644 -index 0000000..7a0f680 ---- /dev/null -+++ b/pengine/test10/unmanaged-stop-1.summary -@@ -0,0 +1,18 @@ -+ -+Current cluster status: -+Online: [ yingying.site ] -+ -+ rsc1 (ocf::pacemaker:Dummy): Started yingying.site -+ rsc2 (ocf::pacemaker:Dummy): Started yingying.site (unmanaged) FAILED -+ -+Transition Summary: -+ * Stop rsc1 (yingying.site - blocked) -+ -+Executing cluster transition: -+ -+Revised cluster status: -+Online: [ yingying.site ] -+ -+ rsc1 (ocf::pacemaker:Dummy): Started yingying.site -+ rsc2 (ocf::pacemaker:Dummy): Started yingying.site (unmanaged) FAILED -+ -diff --git a/pengine/test10/unmanaged-stop-1.xml b/pengine/test10/unmanaged-stop-1.xml -new file mode 100644 -index 0000000..93a114f ---- /dev/null -+++ b/pengine/test10/unmanaged-stop-1.xml -@@ -0,0 +1,56 @@ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -diff --git a/pengine/test10/unmanaged-stop-2.dot b/pengine/test10/unmanaged-stop-2.dot -new file mode 100644 -index 0000000..e36de8b ---- /dev/null -+++ b/pengine/test10/unmanaged-stop-2.dot -@@ -0,0 +1,8 @@ -+digraph "g" { -+"all_stopped" [ style=dashed color="red" fontcolor="orange"] -+"probe_complete yingying.site" [ style=bold color="green" fontcolor="black"] -+"rsc1_start_0 " [ style=dashed color="red" fontcolor="black"] -+"rsc1_stop_0 yingying.site" -> "all_stopped" [ style = dashed] -+"rsc1_stop_0 yingying.site" -> "rsc1_start_0 " [ style = dashed] -+"rsc1_stop_0 yingying.site" [ style=dashed color="red" fontcolor="black"] -+} -diff --git a/pengine/test10/unmanaged-stop-2.exp b/pengine/test10/unmanaged-stop-2.exp -new file mode 100644 -index 0000000..7845919 ---- /dev/null -+++ b/pengine/test10/unmanaged-stop-2.exp -@@ -0,0 +1,11 @@ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -diff --git a/pengine/test10/unmanaged-stop-2.scores b/pengine/test10/unmanaged-stop-2.scores -new file mode 100644 -index 0000000..4cb1c8f ---- /dev/null -+++ b/pengine/test10/unmanaged-stop-2.scores -@@ -0,0 +1,3 @@ -+Allocation scores: -+native_color: rsc1 allocation score on yingying.site: -INFINITY -+native_color: rsc2 allocation score on yingying.site: -INFINITY -diff --git a/pengine/test10/unmanaged-stop-2.summary b/pengine/test10/unmanaged-stop-2.summary -new file mode 100644 -index 0000000..7a0f680 ---- /dev/null -+++ b/pengine/test10/unmanaged-stop-2.summary -@@ -0,0 +1,18 @@ -+ -+Current cluster status: -+Online: [ yingying.site ] -+ -+ rsc1 (ocf::pacemaker:Dummy): Started yingying.site -+ rsc2 (ocf::pacemaker:Dummy): Started yingying.site (unmanaged) FAILED -+ -+Transition Summary: -+ * Stop rsc1 (yingying.site - blocked) -+ -+Executing cluster transition: -+ -+Revised cluster status: -+Online: [ yingying.site ] -+ -+ rsc1 (ocf::pacemaker:Dummy): Started yingying.site -+ rsc2 (ocf::pacemaker:Dummy): Started yingying.site (unmanaged) FAILED -+ -diff --git a/pengine/test10/unmanaged-stop-2.xml b/pengine/test10/unmanaged-stop-2.xml -new file mode 100644 -index 0000000..9ed61cd ---- /dev/null -+++ b/pengine/test10/unmanaged-stop-2.xml -@@ -0,0 +1,56 @@ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -diff --git a/pengine/test10/unmanaged-stop-3.dot b/pengine/test10/unmanaged-stop-3.dot -new file mode 100644 -index 0000000..02d8d88 ---- /dev/null -+++ b/pengine/test10/unmanaged-stop-3.dot -@@ -0,0 +1,11 @@ -+digraph "g" { -+"all_stopped" [ style=dashed color="red" fontcolor="orange"] -+"group1_stop_0" -> "group1_stopped_0" [ style = dashed] -+"group1_stop_0" -> "rsc1_stop_0 yingying.site" [ style = dashed] -+"group1_stop_0" [ style=bold color="green" fontcolor="orange"] -+"group1_stopped_0" [ style=dashed color="red" fontcolor="orange"] -+"probe_complete yingying.site" [ style=bold color="green" fontcolor="black"] -+"rsc1_stop_0 yingying.site" -> "all_stopped" [ style = dashed] -+"rsc1_stop_0 yingying.site" -> "group1_stopped_0" [ style = dashed] -+"rsc1_stop_0 yingying.site" [ style=dashed color="red" fontcolor="black"] -+} -diff --git a/pengine/test10/unmanaged-stop-3.exp b/pengine/test10/unmanaged-stop-3.exp -new file mode 100644 -index 0000000..2cb2435 ---- /dev/null -+++ b/pengine/test10/unmanaged-stop-3.exp -@@ -0,0 +1,19 @@ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -diff --git a/pengine/test10/unmanaged-stop-3.scores b/pengine/test10/unmanaged-stop-3.scores -new file mode 100644 -index 0000000..8106031 ---- /dev/null -+++ b/pengine/test10/unmanaged-stop-3.scores -@@ -0,0 +1,6 @@ -+Allocation scores: -+group_color: group1 allocation score on yingying.site: 0 -+group_color: rsc1 allocation score on yingying.site: 0 -+group_color: rsc2 allocation score on yingying.site: -INFINITY -+native_color: rsc1 allocation score on yingying.site: -INFINITY -+native_color: rsc2 allocation score on yingying.site: -INFINITY -diff --git a/pengine/test10/unmanaged-stop-3.summary b/pengine/test10/unmanaged-stop-3.summary -new file mode 100644 -index 0000000..9edcfd5 ---- /dev/null -+++ b/pengine/test10/unmanaged-stop-3.summary -@@ -0,0 +1,21 @@ -+ -+Current cluster status: -+Online: [ yingying.site ] -+ -+ Resource Group: group1 -+ rsc1 (ocf::pacemaker:Dummy): Started yingying.site -+ rsc2 (ocf::pacemaker:Dummy): Started yingying.site (unmanaged) FAILED -+ -+Transition Summary: -+ * Stop rsc1 (yingying.site - blocked) -+ -+Executing cluster transition: -+ * Pseudo action: group1_stop_0 -+ -+Revised cluster status: -+Online: [ yingying.site ] -+ -+ Resource Group: group1 -+ rsc1 (ocf::pacemaker:Dummy): Started yingying.site -+ rsc2 (ocf::pacemaker:Dummy): Started yingying.site (unmanaged) FAILED -+ -diff --git a/pengine/test10/unmanaged-stop-3.xml b/pengine/test10/unmanaged-stop-3.xml -new file mode 100644 -index 0000000..36ff29f ---- /dev/null -+++ b/pengine/test10/unmanaged-stop-3.xml -@@ -0,0 +1,56 @@ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -diff --git a/pengine/test10/unmanaged-stop-4.dot b/pengine/test10/unmanaged-stop-4.dot -new file mode 100644 -index 0000000..02d8d88 ---- /dev/null -+++ b/pengine/test10/unmanaged-stop-4.dot -@@ -0,0 +1,11 @@ -+digraph "g" { -+"all_stopped" [ style=dashed color="red" fontcolor="orange"] -+"group1_stop_0" -> "group1_stopped_0" [ style = dashed] -+"group1_stop_0" -> "rsc1_stop_0 yingying.site" [ style = dashed] -+"group1_stop_0" [ style=bold color="green" fontcolor="orange"] -+"group1_stopped_0" [ style=dashed color="red" fontcolor="orange"] -+"probe_complete yingying.site" [ style=bold color="green" fontcolor="black"] -+"rsc1_stop_0 yingying.site" -> "all_stopped" [ style = dashed] -+"rsc1_stop_0 yingying.site" -> "group1_stopped_0" [ style = dashed] -+"rsc1_stop_0 yingying.site" [ style=dashed color="red" fontcolor="black"] -+} -diff --git a/pengine/test10/unmanaged-stop-4.exp b/pengine/test10/unmanaged-stop-4.exp -new file mode 100644 -index 0000000..2cb2435 ---- /dev/null -+++ b/pengine/test10/unmanaged-stop-4.exp -@@ -0,0 +1,19 @@ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -diff --git a/pengine/test10/unmanaged-stop-4.scores b/pengine/test10/unmanaged-stop-4.scores -new file mode 100644 -index 0000000..8811025 ---- /dev/null -+++ b/pengine/test10/unmanaged-stop-4.scores -@@ -0,0 +1,8 @@ -+Allocation scores: -+group_color: group1 allocation score on yingying.site: 0 -+group_color: rsc1 allocation score on yingying.site: 0 -+group_color: rsc2 allocation score on yingying.site: -INFINITY -+group_color: rsc3 allocation score on yingying.site: 0 -+native_color: rsc1 allocation score on yingying.site: -INFINITY -+native_color: rsc2 allocation score on yingying.site: -INFINITY -+native_color: rsc3 allocation score on yingying.site: -INFINITY -diff --git a/pengine/test10/unmanaged-stop-4.summary b/pengine/test10/unmanaged-stop-4.summary -new file mode 100644 -index 0000000..96996c3 ---- /dev/null -+++ b/pengine/test10/unmanaged-stop-4.summary -@@ -0,0 +1,23 @@ -+ -+Current cluster status: -+Online: [ yingying.site ] -+ -+ Resource Group: group1 -+ rsc1 (ocf::pacemaker:Dummy): Started yingying.site -+ rsc2 (ocf::pacemaker:Dummy): Started yingying.site (unmanaged) FAILED -+ rsc3 (ocf::heartbeat:Dummy): Stopped -+ -+Transition Summary: -+ * Stop rsc1 (yingying.site - blocked) -+ -+Executing cluster transition: -+ * Pseudo action: group1_stop_0 -+ -+Revised cluster status: -+Online: [ yingying.site ] -+ -+ Resource Group: group1 -+ rsc1 (ocf::pacemaker:Dummy): Started yingying.site -+ rsc2 (ocf::pacemaker:Dummy): Started yingying.site (unmanaged) FAILED -+ rsc3 (ocf::heartbeat:Dummy): Stopped -+ -diff --git a/pengine/test10/unmanaged-stop-4.xml b/pengine/test10/unmanaged-stop-4.xml -new file mode 100644 -index 0000000..5a793ca ---- /dev/null -+++ b/pengine/test10/unmanaged-stop-4.xml -@@ -0,0 +1,65 @@ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -diff --git a/pengine/test10/unrunnable-1.summary b/pengine/test10/unrunnable-1.summary -index ec05391..e9597f3 100644 ---- a/pengine/test10/unrunnable-1.summary -+++ b/pengine/test10/unrunnable-1.summary -@@ -27,7 +27,7 @@ Transition Summary: - * Start rsc_c001n02 (c001n03 - blocked) - * Start rsc_c001n03 (c001n03 - blocked) - * Start rsc_c001n01 (c001n03 - blocked) -- * Stop child_DoFencing:1 (c001n02) -+ * Stop child_DoFencing:1 (c001n02 - blocked) - - Executing cluster transition: - * Resource action: DcIPaddr monitor on c001n03 -diff --git a/pengine/test10/use-after-free-merge.summary b/pengine/test10/use-after-free-merge.summary -index cc17523..c4e9ac6 100644 ---- a/pengine/test10/use-after-free-merge.summary -+++ b/pengine/test10/use-after-free-merge.summary -@@ -7,7 +7,7 @@ Online: [ hex-13 hex-14 ] - d0 (ocf::heartbeat:Dummy): Stopped - d1 (ocf::heartbeat:Dummy): Stopped - Master/Slave Set: ms0 [s0] -- Stopped: [ s0:0 s0:1 ] -+ Stopped: [ hex-13 hex-14 ] - - Transition Summary: - * Start fencing-sbd (hex-14) -diff --git a/pengine/test10/utilization-order2.summary b/pengine/test10/utilization-order2.summary -index 6a6d845..7871579 100644 ---- a/pengine/test10/utilization-order2.summary -+++ b/pengine/test10/utilization-order2.summary -@@ -33,6 +33,6 @@ Online: [ node1 node2 ] - rsc3 (ocf::pacemaker:Dummy): Started node2 - Clone Set: clone-rsc2 [rsc2] - Started: [ node2 ] -- Stopped: [ rsc2:1 ] -+ Stopped: [ node1 ] - rsc1 (ocf::pacemaker:Dummy): Stopped - -diff --git a/pengine/test10/utilization-order4.summary b/pengine/test10/utilization-order4.summary -index 22a9610..20fe903 100644 ---- a/pengine/test10/utilization-order4.summary -+++ b/pengine/test10/utilization-order4.summary -@@ -53,8 +53,8 @@ Online: [ deglxen001 ] - stonith_sbd (stonith:external/sbd): Started deglxen001 - Clone Set: clone-nfs [grp-nfs] - Started: [ deglxen001 ] -- Stopped: [ grp-nfs:1 ] -+ Stopped: [ deglxen002 ] - Clone Set: clone-ping [prim-ping] - Started: [ deglxen001 ] -- Stopped: [ prim-ping:1 ] -+ Stopped: [ deglxen002 ] - -diff --git a/pengine/test10/whitebox-fail1.exp b/pengine/test10/whitebox-fail1.exp -index cc46c36..5741955 100644 ---- a/pengine/test10/whitebox-fail1.exp -+++ b/pengine/test10/whitebox-fail1.exp -@@ -173,7 +173,7 @@ - - - -- -+ - - - -diff --git a/pengine/test10/whitebox-fail1.summary b/pengine/test10/whitebox-fail1.summary -index 4df3c74..8bf6d52 100644 ---- a/pengine/test10/whitebox-fail1.summary -+++ b/pengine/test10/whitebox-fail1.summary -@@ -1,14 +1,13 @@ - - Current cluster status: --Online: [ 18node1 18node2 18node3 lxc2 ] --OFFLINE: [ lxc1 ] -+Online: [ 18node1 18node2 18node3 lxc2:container2 ] -+OFFLINE: [ lxc1:container1 ] - - container1 (ocf::heartbeat:VirtualDomain): Started 18node2 FAILED - container2 (ocf::heartbeat:VirtualDomain): Started 18node2 - shoot1 (stonith:fence_xvm): Started 18node3 - Clone Set: M-clone [M] - Started: [ 18node1 18node2 18node3 lxc2 ] -- Stopped: [ M:4 ] - A (ocf::pacemaker:Dummy): Started 18node1 - B (ocf::pacemaker:Dummy): Started lxc1 FAILED - C (ocf::pacemaker:Dummy): Started lxc2 -@@ -39,7 +38,7 @@ Executing cluster transition: - * Resource action: B monitor=10000 on lxc1 - - Revised cluster status: --Online: [ 18node1 18node2 18node3 lxc1 lxc2 ] -+Online: [ 18node1 18node2 18node3 lxc1:container1 lxc2:container2 ] - - container1 (ocf::heartbeat:VirtualDomain): Started 18node2 - container2 (ocf::heartbeat:VirtualDomain): Started 18node2 -diff --git a/pengine/test10/whitebox-fail2.exp b/pengine/test10/whitebox-fail2.exp -index b8e4d9a..5741955 100644 ---- a/pengine/test10/whitebox-fail2.exp -+++ b/pengine/test10/whitebox-fail2.exp -@@ -8,13 +8,13 @@ - - - -- -+ - - - - - -- -+ - - - -@@ -68,7 +68,7 @@ - - - -- -+ - - - -@@ -165,7 +165,7 @@ - - - -- -+ - - - -@@ -173,7 +173,7 @@ - - - -- -+ - - - -@@ -196,7 +196,7 @@ - - - -- -+ - - - -@@ -215,7 +215,7 @@ - - - -- -+ - - - -diff --git a/pengine/test10/whitebox-fail2.summary b/pengine/test10/whitebox-fail2.summary -index d185251..81407d3 100644 ---- a/pengine/test10/whitebox-fail2.summary -+++ b/pengine/test10/whitebox-fail2.summary -@@ -1,14 +1,13 @@ - - Current cluster status: --Online: [ 18node1 18node2 18node3 lxc2 ] --OFFLINE: [ lxc1 ] -+Online: [ 18node1 18node2 18node3 lxc2:container2 ] -+OFFLINE: [ lxc1:container1 ] - - container1 (ocf::heartbeat:VirtualDomain): Started 18node2 FAILED - container2 (ocf::heartbeat:VirtualDomain): Started 18node2 - shoot1 (stonith:fence_xvm): Started 18node3 - Clone Set: M-clone [M] - Started: [ 18node1 18node2 18node3 lxc2 ] -- Stopped: [ M:4 ] - A (ocf::pacemaker:Dummy): Started 18node1 - B (ocf::pacemaker:Dummy): Started lxc1 FAILED - C (ocf::pacemaker:Dummy): Started lxc2 -@@ -39,7 +38,7 @@ Executing cluster transition: - * Resource action: B monitor=10000 on lxc1 - - Revised cluster status: --Online: [ 18node1 18node2 18node3 lxc1 lxc2 ] -+Online: [ 18node1 18node2 18node3 lxc1:container1 lxc2:container2 ] - - container1 (ocf::heartbeat:VirtualDomain): Started 18node2 - container2 (ocf::heartbeat:VirtualDomain): Started 18node2 -diff --git a/pengine/test10/whitebox-fail2.xml b/pengine/test10/whitebox-fail2.xml -index 496189d..2244c48 100644 ---- a/pengine/test10/whitebox-fail2.xml -+++ b/pengine/test10/whitebox-fail2.xml -@@ -146,13 +146,13 @@ - - - -- -+ - - - - - -- -+ - - - -diff --git a/pengine/test10/whitebox-fail3.dot b/pengine/test10/whitebox-fail3.dot -new file mode 100644 -index 0000000..278d0d2 ---- /dev/null -+++ b/pengine/test10/whitebox-fail3.dot -@@ -0,0 +1,40 @@ -+digraph "g" { -+"18builder_monitor_0 dvossel-laptop2" -> "probe_complete dvossel-laptop2" [ style = bold] -+"18builder_monitor_0 dvossel-laptop2" [ style=bold color="green" fontcolor="black"] -+"18builder_monitor_30000 dvossel-laptop2" [ style=bold color="green" fontcolor="black"] -+"18builder_start_0 dvossel-laptop2" -> "18builder_monitor_30000 dvossel-laptop2" [ style = bold] -+"18builder_start_0 dvossel-laptop2" -> "FAKE_start_0 18builder" [ style = bold] -+"18builder_start_0 dvossel-laptop2" -> "W:1_monitor_10000 18builder" [ style = bold] -+"18builder_start_0 dvossel-laptop2" -> "W:1_start_0 18builder" [ style = bold] -+"18builder_start_0 dvossel-laptop2" -> "X:1_monitor_10000 18builder" [ style = bold] -+"18builder_start_0 dvossel-laptop2" -> "X:1_start_0 18builder" [ style = bold] -+"18builder_start_0 dvossel-laptop2" [ style=bold color="green" fontcolor="black"] -+"FAKE_start_0 18builder" [ style=bold color="green" fontcolor="black"] -+"FAKE_stop_0 dvossel-laptop2" -> "FAKE_start_0 18builder" [ style = bold] -+"FAKE_stop_0 dvossel-laptop2" -> "all_stopped" [ style = bold] -+"FAKE_stop_0 dvossel-laptop2" [ style=bold color="green" fontcolor="black"] -+"W-master_running_0" [ style=bold color="green" fontcolor="orange"] -+"W-master_start_0" -> "W-master_running_0" [ style = bold] -+"W-master_start_0" -> "W:1_start_0 18builder" [ style = bold] -+"W-master_start_0" [ style=bold color="green" fontcolor="orange"] -+"W:1_monitor_10000 18builder" [ style=bold color="green" fontcolor="black"] -+"W:1_start_0 18builder" -> "W-master_running_0" [ style = bold] -+"W:1_start_0 18builder" -> "W:1_monitor_10000 18builder" [ style = bold] -+"W:1_start_0 18builder" [ style=bold color="green" fontcolor="black"] -+"X-master_running_0" [ style=bold color="green" fontcolor="orange"] -+"X-master_start_0" -> "X-master_running_0" [ style = bold] -+"X-master_start_0" -> "X:1_start_0 18builder" [ style = bold] -+"X-master_start_0" [ style=bold color="green" fontcolor="orange"] -+"X:1_monitor_10000 18builder" [ style=bold color="green" fontcolor="black"] -+"X:1_start_0 18builder" -> "X-master_running_0" [ style = bold] -+"X:1_start_0 18builder" -> "X:1_monitor_10000 18builder" [ style = bold] -+"X:1_start_0 18builder" [ style=bold color="green" fontcolor="black"] -+"all_stopped" [ style=bold color="green" fontcolor="orange"] -+"probe_complete dvossel-laptop2" -> "probe_complete" [ style = bold] -+"probe_complete dvossel-laptop2" [ style=bold color="green" fontcolor="black"] -+"probe_complete" -> "18builder_start_0 dvossel-laptop2" [ style = bold] -+"probe_complete" -> "FAKE_stop_0 dvossel-laptop2" [ style = bold] -+"probe_complete" [ style=bold color="green" fontcolor="orange"] -+"vm_start_0 dvossel-laptop2" -> "18builder_start_0 dvossel-laptop2" [ style = bold] -+"vm_start_0 dvossel-laptop2" [ style=bold color="green" fontcolor="black"] -+} -diff --git a/pengine/test10/whitebox-fail3.exp b/pengine/test10/whitebox-fail3.exp -new file mode 100644 -index 0000000..1b8d144 ---- /dev/null -+++ b/pengine/test10/whitebox-fail3.exp -@@ -0,0 +1,225 @@ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -diff --git a/pengine/test10/whitebox-fail3.scores b/pengine/test10/whitebox-fail3.scores -new file mode 100644 -index 0000000..6e09dd5 ---- /dev/null -+++ b/pengine/test10/whitebox-fail3.scores -@@ -0,0 +1,64 @@ -+Allocation scores: -+W:0 promotion score on dvossel-laptop2: 10 -+W:1 promotion score on 18builder: -1 -+W:2 promotion score on none: 0 -+X:0 promotion score on dvossel-laptop2: 10 -+X:1 promotion score on 18builder: -1 -+X:2 promotion score on none: 0 -+clone_color: W-master allocation score on 18builder: 0 -+clone_color: W-master allocation score on 18node1: 0 -+clone_color: W-master allocation score on dvossel-laptop2: 0 -+clone_color: W:0 allocation score on 18builder: 0 -+clone_color: W:0 allocation score on 18node1: 0 -+clone_color: W:0 allocation score on dvossel-laptop2: 11 -+clone_color: W:1 allocation score on 18builder: 0 -+clone_color: W:1 allocation score on 18node1: 0 -+clone_color: W:1 allocation score on dvossel-laptop2: 0 -+clone_color: W:2 allocation score on 18builder: 0 -+clone_color: W:2 allocation score on 18node1: 0 -+clone_color: W:2 allocation score on dvossel-laptop2: 0 -+clone_color: X-master allocation score on 18builder: 0 -+clone_color: X-master allocation score on 18node1: 0 -+clone_color: X-master allocation score on dvossel-laptop2: 0 -+clone_color: X:0 allocation score on 18builder: 0 -+clone_color: X:0 allocation score on 18node1: 0 -+clone_color: X:0 allocation score on dvossel-laptop2: 11 -+clone_color: X:1 allocation score on 18builder: 0 -+clone_color: X:1 allocation score on 18node1: 0 -+clone_color: X:1 allocation score on dvossel-laptop2: 0 -+clone_color: X:2 allocation score on 18builder: 0 -+clone_color: X:2 allocation score on 18node1: 0 -+clone_color: X:2 allocation score on dvossel-laptop2: 0 -+native_color: 18builder allocation score on 18builder: -INFINITY -+native_color: 18builder allocation score on 18node1: -INFINITY -+native_color: 18builder allocation score on dvossel-laptop2: 0 -+native_color: 18node1 allocation score on 18builder: -INFINITY -+native_color: 18node1 allocation score on 18node1: -INFINITY -+native_color: 18node1 allocation score on dvossel-laptop2: -INFINITY -+native_color: FAKE allocation score on 18builder: 0 -+native_color: FAKE allocation score on 18node1: 0 -+native_color: FAKE allocation score on dvossel-laptop2: 0 -+native_color: W:0 allocation score on 18builder: 0 -+native_color: W:0 allocation score on 18node1: -INFINITY -+native_color: W:0 allocation score on dvossel-laptop2: 11 -+native_color: W:1 allocation score on 18builder: 0 -+native_color: W:1 allocation score on 18node1: -INFINITY -+native_color: W:1 allocation score on dvossel-laptop2: -INFINITY -+native_color: W:2 allocation score on 18builder: -INFINITY -+native_color: W:2 allocation score on 18node1: -INFINITY -+native_color: W:2 allocation score on dvossel-laptop2: -INFINITY -+native_color: X:0 allocation score on 18builder: 0 -+native_color: X:0 allocation score on 18node1: -INFINITY -+native_color: X:0 allocation score on dvossel-laptop2: 11 -+native_color: X:1 allocation score on 18builder: 0 -+native_color: X:1 allocation score on 18node1: -INFINITY -+native_color: X:1 allocation score on dvossel-laptop2: -INFINITY -+native_color: X:2 allocation score on 18builder: -INFINITY -+native_color: X:2 allocation score on 18node1: -INFINITY -+native_color: X:2 allocation score on dvossel-laptop2: -INFINITY -+native_color: vm allocation score on 18builder: -INFINITY -+native_color: vm allocation score on 18node1: -INFINITY -+native_color: vm allocation score on dvossel-laptop2: 0 -+native_color: vm2 allocation score on 18builder: -INFINITY -+native_color: vm2 allocation score on 18node1: -INFINITY -+native_color: vm2 allocation score on dvossel-laptop2: -INFINITY -diff --git a/pengine/test10/whitebox-fail3.summary b/pengine/test10/whitebox-fail3.summary -new file mode 100644 -index 0000000..1d25724 ---- /dev/null -+++ b/pengine/test10/whitebox-fail3.summary -@@ -0,0 +1,56 @@ -+ -+Current cluster status: -+Online: [ dvossel-laptop2 ] -+OFFLINE: [ 18builder:vm 18node1:vm2 ] -+ -+ vm (ocf::heartbeat:VirtualDomain): Stopped -+ vm2 (ocf::heartbeat:VirtualDomain): Stopped -+ FAKE (ocf::pacemaker:Dummy): Started dvossel-laptop2 -+ Master/Slave Set: W-master [W] -+ Masters: [ dvossel-laptop2 ] -+ Stopped: [ 18builder 18node1 ] -+ Master/Slave Set: X-master [X] -+ Masters: [ dvossel-laptop2 ] -+ Stopped: [ 18builder 18node1 ] -+ -+Transition Summary: -+ * Start vm (dvossel-laptop2) -+ * Move FAKE (Started dvossel-laptop2 -> 18builder) -+ * Start W:1 (18builder) -+ * Start X:1 (18builder) -+ * Start 18builder (dvossel-laptop2) -+ -+Executing cluster transition: -+ * Resource action: vm start on dvossel-laptop2 -+ * Pseudo action: W-master_start_0 -+ * Pseudo action: X-master_start_0 -+ * Resource action: 18builder monitor on dvossel-laptop2 -+ * Pseudo action: probe_complete -+ * Resource action: FAKE stop on dvossel-laptop2 -+ * Resource action: 18builder start on dvossel-laptop2 -+ * Pseudo action: all_stopped -+ * Resource action: FAKE start on 18builder -+ * Resource action: W start on 18builder -+ * Pseudo action: W-master_running_0 -+ * Resource action: X start on 18builder -+ * Pseudo action: X-master_running_0 -+ * Resource action: 18builder monitor=30000 on dvossel-laptop2 -+ * Resource action: W monitor=10000 on 18builder -+ * Resource action: X monitor=10000 on 18builder -+ -+Revised cluster status: -+Online: [ 18builder:vm dvossel-laptop2 ] -+OFFLINE: [ 18node1:vm2 ] -+ -+ vm (ocf::heartbeat:VirtualDomain): Started dvossel-laptop2 -+ vm2 (ocf::heartbeat:VirtualDomain): Stopped -+ FAKE (ocf::pacemaker:Dummy): Started 18builder -+ Master/Slave Set: W-master [W] -+ Masters: [ dvossel-laptop2 ] -+ Slaves: [ 18builder ] -+ Stopped: [ 18node1 ] -+ Master/Slave Set: X-master [X] -+ Masters: [ dvossel-laptop2 ] -+ Slaves: [ 18builder ] -+ Stopped: [ 18node1 ] -+ -diff --git a/pengine/test10/whitebox-fail3.xml b/pengine/test10/whitebox-fail3.xml -new file mode 100644 -index 0000000..081708d ---- /dev/null -+++ b/pengine/test10/whitebox-fail3.xml -@@ -0,0 +1,104 @@ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -diff --git a/pengine/test10/whitebox-move.exp b/pengine/test10/whitebox-move.exp -index 1a3d89b..8dbdda0 100644 ---- a/pengine/test10/whitebox-move.exp -+++ b/pengine/test10/whitebox-move.exp -@@ -181,7 +181,7 @@ - - - -- -+ - - - -diff --git a/pengine/test10/whitebox-move.summary b/pengine/test10/whitebox-move.summary -index eacefdf..6dc2f6f 100644 ---- a/pengine/test10/whitebox-move.summary -+++ b/pengine/test10/whitebox-move.summary -@@ -1,6 +1,6 @@ - - Current cluster status: --Online: [ 18node1 18node2 18node3 lxc1 lxc2 ] -+Online: [ 18node1 18node2 18node3 lxc1:container1 lxc2:container2 ] - - container1 (ocf::heartbeat:VirtualDomain): Started 18node1 - container2 (ocf::heartbeat:VirtualDomain): Started 18node2 -@@ -34,7 +34,7 @@ Executing cluster transition: - * Resource action: lxc1 monitor=30000 on 18node2 - - Revised cluster status: --Online: [ 18node1 18node2 18node3 lxc1 lxc2 ] -+Online: [ 18node1 18node2 18node3 lxc1:container1 lxc2:container2 ] - - container1 (ocf::heartbeat:VirtualDomain): Started 18node2 - container2 (ocf::heartbeat:VirtualDomain): Started 18node2 -diff --git a/pengine/test10/whitebox-start.exp b/pengine/test10/whitebox-start.exp -index fc54e18..4dcfdc4 100644 ---- a/pengine/test10/whitebox-start.exp -+++ b/pengine/test10/whitebox-start.exp -@@ -80,7 +80,7 @@ - - - -- -+ - - - -diff --git a/pengine/test10/whitebox-start.summary b/pengine/test10/whitebox-start.summary -index a3dd39c..e5d654b 100644 ---- a/pengine/test10/whitebox-start.summary -+++ b/pengine/test10/whitebox-start.summary -@@ -1,14 +1,14 @@ - - Current cluster status: --Online: [ 18node1 18node2 18node3 lxc2 ] --OFFLINE: [ lxc1 ] -+Online: [ 18node1 18node2 18node3 lxc2:container2 ] -+OFFLINE: [ lxc1:container1 ] - - container1 (ocf::heartbeat:VirtualDomain): Stopped - container2 (ocf::heartbeat:VirtualDomain): Started 18node2 - shoot1 (stonith:fence_xvm): Started 18node3 - Clone Set: M-clone [M] - Started: [ 18node1 18node2 18node3 lxc2 ] -- Stopped: [ M:4 ] -+ Stopped: [ lxc1 ] - A (ocf::pacemaker:Dummy): Started 18node1 - B (ocf::pacemaker:Dummy): Started lxc2 - C (ocf::pacemaker:Dummy): Started lxc2 -@@ -29,7 +29,7 @@ Executing cluster transition: - * Resource action: M monitor=10000 on lxc1 - - Revised cluster status: --Online: [ 18node1 18node2 18node3 lxc1 lxc2 ] -+Online: [ 18node1 18node2 18node3 lxc1:container1 lxc2:container2 ] - - container1 (ocf::heartbeat:VirtualDomain): Started 18node1 - container2 (ocf::heartbeat:VirtualDomain): Started 18node2 -diff --git a/pengine/test10/whitebox-stop.summary b/pengine/test10/whitebox-stop.summary -index c1e5f96..4116571 100644 ---- a/pengine/test10/whitebox-stop.summary -+++ b/pengine/test10/whitebox-stop.summary -@@ -1,6 +1,6 @@ - - Current cluster status: --Online: [ 18node1 18node2 18node3 lxc1 lxc2 ] -+Online: [ 18node1 18node2 18node3 lxc1:container1 lxc2:container2 ] - - container1 (ocf::heartbeat:VirtualDomain): Started 18node2 - container2 (ocf::heartbeat:VirtualDomain): Started 18node2 -@@ -30,15 +30,15 @@ Executing cluster transition: - * Pseudo action: all_stopped - - Revised cluster status: --Online: [ 18node1 18node2 18node3 lxc2 ] --OFFLINE: [ lxc1 ] -+Online: [ 18node1 18node2 18node3 lxc2:container2 ] -+OFFLINE: [ lxc1:container1 ] - - container1 (ocf::heartbeat:VirtualDomain): Stopped - container2 (ocf::heartbeat:VirtualDomain): Started 18node2 - shoot1 (stonith:fence_xvm): Started 18node3 - Clone Set: M-clone [M] - Started: [ 18node1 18node2 18node3 lxc2 ] -- Stopped: [ M:4 ] -+ Stopped: [ lxc1 ] - A (ocf::pacemaker:Dummy): Started 18node1 - B (ocf::pacemaker:Dummy): Started lxc2 - C (ocf::pacemaker:Dummy): Started lxc2 -diff --git a/tools/Makefile.am b/tools/Makefile.am -index 0e7b1a9..ad469d2 100644 ---- a/tools/Makefile.am -+++ b/tools/Makefile.am -@@ -5,17 +5,17 @@ - # modify it under the terms of the GNU General Public License - # as published by the Free Software Foundation; either version 2 - # of the License, or (at your option) any later version. --# -+# - # This program is distributed in the hope that it will be useful, - # but WITHOUT ANY WARRANTY; without even the implied warranty of - # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - # GNU General Public License for more details. --# -+# - # You should have received a copy of the GNU General Public License - # along with this program; if not, write to the Free Software - # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - # --MAINTAINERCLEANFILES = Makefile.in -+MAINTAINERCLEANFILES = Makefile.in - - INCLUDES = -I$(top_builddir)/include -I$(top_srcdir)/include \ - -I$(top_builddir)/libltdl -I$(top_srcdir)/libltdl -@@ -23,15 +23,18 @@ INCLUDES = -I$(top_builddir)/include -I$(top_srcdir)/include \ - COMMONLIBS = \ - $(top_builddir)/lib/common/libcrmcommon.la \ - $(top_builddir)/lib/cib/libcib.la \ -- $(CURSESLIBS) $(CLUSTERLIBS) -+ $(CURSESLIBS) $(CLUSTERLIBS) - - headerdir = $(pkgincludedir)/crm --header_HEADERS = -+header_HEADERS = - - pcmkdir = $(datadir)/$(PACKAGE) - pcmk_DATA = report.common report.collector - - sbin_SCRIPTS = crm_report crm_standby crm_master crm_failcount -+if BUILD_CIBSECRETS -+sbin_SCRIPTS += cibsecret -+endif - EXTRA_DIST = $(sbin_SCRIPTS) - - halibdir = $(CRM_DAEMON_DIR) -@@ -40,9 +43,9 @@ halib_PROGRAMS = attrd - sbin_PROGRAMS = crm_simulate crmadmin cibadmin crm_node crm_attribute crm_resource crm_verify \ - crm_shadow attrd_updater crm_diff crm_mon iso8601 crm_ticket crm_error - --clidir = $(datadir)/$(PACKAGE)/tests/cli --cli_SCRIPTS = regression.sh --cli_DATA = regression.exp cli.supp -+testdir = $(datadir)/$(PACKAGE)/tests/cli -+test_SCRIPTS = regression.sh -+test_DATA = regression.exp - - if BUILD_HEARTBEAT_SUPPORT - sbin_PROGRAMS += crm_uuid -@@ -60,7 +63,7 @@ endif - - ## SOURCES - --noinst_HEADERS = -+noinst_HEADERS = - - crmadmin_SOURCES = crmadmin.c - crmadmin_LDADD = $(top_builddir)/lib/pengine/libpe_status.la \ -@@ -73,10 +76,10 @@ crm_uuid_SOURCES = crm_uuid.c - crm_uuid_LDADD = $(COMMONLIBS) $(top_builddir)/lib/cluster/libcrmcluster.la - - cibadmin_SOURCES = cibadmin.c --cibadmin_LDADD = $(COMMONLIBS) -+cibadmin_LDADD = $(COMMONLIBS) - - crm_shadow_SOURCES = cib_shadow.c --crm_shadow_LDADD = $(COMMONLIBS) -+crm_shadow_LDADD = $(COMMONLIBS) - - crm_node_SOURCES = crm_node.c - crm_node_LDADD = $(top_builddir)/lib/cluster/libcrmcluster.la \ -@@ -108,7 +111,7 @@ crm_verify_LDADD = $(top_builddir)/lib/pengine/libpe_status.la \ - $(COMMONLIBS) - - crm_attribute_SOURCES = crm_attribute.c --crm_attribute_LDADD = $(top_builddir)/lib/cluster/libcrmcluster.la $(COMMONLIBS) -+crm_attribute_LDADD = $(top_builddir)/lib/cluster/libcrmcluster.la $(COMMONLIBS) - - crm_resource_SOURCES = crm_resource.c - crm_resource_LDADD = $(top_builddir)/lib/pengine/libpe_rules.la \ -@@ -119,7 +122,7 @@ crm_resource_LDADD = $(top_builddir)/lib/pengine/libpe_rules.la \ - $(COMMONLIBS) - - iso8601_SOURCES = test.iso8601.c --iso8601_LDADD = $(COMMONLIBS) -+iso8601_LDADD = $(COMMONLIBS) - - attrd_SOURCES = attrd.c - attrd_LDADD = $(top_builddir)/lib/cluster/libcrmcluster.la $(COMMONLIBS) -@@ -144,13 +147,14 @@ endif - - if BUILD_OPENIPMI_SERVICELOG - ipmiservicelogd_SOURCES = ipmiservicelogd.c --ipmiservicelogd_CFLAGS = $(OPENIPMI_SERVICELOG_CFLAGS) $(SERVICELOG_CFLAGS) -+ipmiservicelogd_CFLAGS = $(OPENIPMI_SERVICELOG_CFLAGS) $(SERVICELOG_CFLAGS) - ipmiservicelogd_LDFLAGS = $(top_builddir)/lib/common/libcrmcommon.la $(OPENIPMI_SERVICELOG_LIBS) $(SERVICELOG_LIBS) - endif - - %.8: % crm_attribute - echo Creating $@ - chmod a+x $(top_builddir)/tools/$< -+ $(top_builddir)/tools/$< --help - PATH=$(top_builddir)/tools:$$PATH $(HELP2MAN) --output $@ --no-info --section 8 --name "Part of the Pacemaker cluster resource manager" $(top_builddir)/tools/$< - - clean-generic: -diff --git a/tools/attrd.c b/tools/attrd.c -index 571708a..1e834ea 100644 ---- a/tools/attrd.c -+++ b/tools/attrd.c -@@ -179,7 +179,7 @@ attrd_shutdown(int nsig) - if (mainloop != NULL && g_main_is_running(mainloop)) { - g_main_quit(mainloop); - } else { -- crm_exit(0); -+ crm_exit(pcmk_ok); - } - } - -@@ -296,7 +296,7 @@ attrd_ha_connection_destroy(gpointer user_data) - g_main_quit(mainloop); - return; - } -- crm_exit(EX_OK); -+ crm_exit(pcmk_ok); - } - - static void -@@ -377,7 +377,7 @@ attrd_ais_destroy(gpointer unused) - g_main_quit(mainloop); - return; - } -- crm_exit(EX_USAGE); -+ crm_exit(EINVAL); - } - #endif - -@@ -394,7 +394,7 @@ attrd_cib_connection_destroy(gpointer user_data) - } else { - /* eventually this will trigger a reconnect, not a shutdown */ - crm_err("Connection to the CIB terminated..."); -- crm_exit(1); -+ crm_exit(ENOTCONN); - } - - return; -@@ -483,7 +483,7 @@ cib_connect(void *user_data) - - if (was_err) { - crm_err("Aborting startup"); -- crm_exit(100); -+ crm_exit(DAEMON_RESPAWN_STOP); - } - - cib_conn = local_conn; -@@ -565,11 +565,7 @@ main(int argc, char **argv) - crm_info("Cluster connection active"); - - if (was_err == FALSE) { -- ipcs = mainloop_add_ipc_server(T_ATTRD, QB_IPC_NATIVE, &ipc_callbacks); -- if (ipcs == NULL) { -- crm_err("Failed to create IPC server: shutting down and inhibiting respawn"); -- crm_exit(100); -- } -+ attrd_ipc_server_init(&ipcs, &ipc_callbacks); - } - - crm_info("Accepting attribute updates"); -@@ -618,9 +614,8 @@ main(int argc, char **argv) - - g_hash_table_destroy(attr_hash); - free(attrd_uuid); -- empty_uuid_cache(); - -- return crm_exit(0); -+ return crm_exit(pcmk_ok); - } - - struct attrd_callback_s { -diff --git a/tools/attrd_updater.c b/tools/attrd_updater.c -index 10d9c8a..5c8944d 100644 ---- a/tools/attrd_updater.c -+++ b/tools/attrd_updater.c -@@ -140,13 +140,14 @@ main(int argc, char **argv) - fprintf(stderr, "-Q,--query is not yet implemented, use -D to delete existing values\n\n"); - crm_help('?', EX_USAGE); - -- } else -- if (FALSE == -- attrd_update_delegate(NULL, command, NULL, attr_name, attr_value, attr_section, -- attr_set, attr_dampen, NULL)) { -- fprintf(stderr, "Could not update %s=%s\n", attr_name, attr_value); -- crm_exit(1); -+ } else { -+ int rc = attrd_update_delegate(NULL, command, NULL, attr_name, attr_value, attr_section, -+ attr_set, attr_dampen, NULL); -+ if (rc != pcmk_ok) { -+ fprintf(stderr, "Could not update %s=%s: %s (%d)\n", attr_name, attr_value, pcmk_strerror(rc), rc); -+ } -+ crm_exit(rc); - } - -- crm_exit(0); -+ crm_exit(pcmk_ok); - } -diff --git a/tools/cib_shadow.c b/tools/cib_shadow.c -index d33be20..ebb17d3 100644 ---- a/tools/cib_shadow.c -+++ b/tools/cib_shadow.c -@@ -1,17 +1,17 @@ - --/* -+/* - * Copyright (C) 2004 Andrew Beekhof -- * -+ * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. -- * -+ * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. -- * -+ * - * You should have received a copy of the GNU General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -@@ -148,7 +148,7 @@ static struct crm_option long_options[] = { - {"help", 0, 0, '?', "\t\tThis text"}, - {"version", 0, 0, '$', "\t\tVersion information" }, - {"verbose", 0, 0, 'V', "\t\tIncrease debug output"}, -- -+ - {"-spacer-", 1, 0, '-', "\nQueries:"}, - {"which", no_argument, NULL, 'w', "\t\tIndicate the active shadow copy"}, - {"display", no_argument, NULL, 'p', "\t\tDisplay the contents of the active shadow copy"}, -@@ -163,7 +163,7 @@ static struct crm_option long_options[] = { - {"delete", required_argument, NULL, 'D', "\tDelete the contents of the named shadow copy"}, - {"reset", required_argument, NULL, 'r', "\tRecreate the named shadow copy from the active cluster configuration"}, - {"switch", required_argument, NULL, 's', "\t(Advanced) Switch to the named shadow copy"}, -- -+ - {"-spacer-", 1, 0, '-', "\nAdditional Options:"}, - {"force", no_argument, NULL, 'f', "\t\t(Advanced) Force the action to be performed"}, - {"batch", no_argument, NULL, 'b', "\t\t(Advanced) Don't spawn a new shell" }, -@@ -180,7 +180,7 @@ static struct crm_option long_options[] = { - {"-spacer-", 1, 0, '-', " crm_shadow --delete myShadow", pcmk_option_example}, - {"-spacer-", 1, 0, '-', "Upload the current shadow configuration (named myShadow) to the running cluster:", pcmk_option_paragraph}, - {"-spacer-", 1, 0, '-', " crm_shadow --commit myShadow", pcmk_option_example}, -- -+ - {0, 0, 0, 0} - }; - /* *INDENT-ON* */ -@@ -226,7 +226,16 @@ main(int argc, char **argv) - case 'F': - command = flag; - free(shadow); -- shadow = strdup(getenv("CIB_shadow")); -+ shadow = NULL; -+ { -+ const char *env = getenv("CIB_shadow"); -+ if(env) { -+ shadow = strdup(env); -+ } else { -+ fprintf(stderr, "No active shadow configuration defined\n"); -+ crm_exit(ENOENT); -+ } -+ } - break; - case 'e': - case 'c': -diff --git a/tools/cibadmin.c b/tools/cibadmin.c -index 886fd9c..0fef594 100644 ---- a/tools/cibadmin.c -+++ b/tools/cibadmin.c -@@ -71,6 +71,8 @@ int request_id = 0; - int operation_status = 0; - cib_t *the_cib = NULL; - gboolean force_flag = FALSE; -+gboolean quiet = FALSE; -+int bump_log_num = 0; - - /* *INDENT-OFF* */ - static struct crm_option long_options[] = { -@@ -227,7 +229,7 @@ main(int argc, char **argv) - - int option_index = 0; - -- crm_log_init(NULL, LOG_CRIT, FALSE, FALSE, argc, argv, FALSE); -+ crm_system_name = strdup("cibadmin"); - crm_set_options(NULL, "command [options] [data]", long_options, - "Provides direct access to the cluster configuration." - "\n\nAllows the configuration, or sections of it, to be queried, modified, replaced and deleted." -@@ -266,6 +268,7 @@ main(int argc, char **argv) - break; - case 'Q': - cib_action = CIB_OP_QUERY; -+ quiet = TRUE; - break; - case 'P': - cib_action = CIB_OP_APPLY_DIFF; -@@ -316,7 +319,7 @@ main(int argc, char **argv) - break; - case 'V': - command_options = command_options | cib_verbose; -- crm_bump_log_level(argc, argv); -+ bump_log_num++; - break; - case '?': - case '$': -@@ -384,6 +387,15 @@ main(int argc, char **argv) - break; - } - } -+ -+ if (bump_log_num > 0) { -+ quiet = FALSE; -+ } -+ crm_log_init(NULL, LOG_CRIT, FALSE, FALSE, argc, argv, quiet); -+ while (bump_log_num > 0) { -+ crm_bump_log_level(argc, argv); -+ bump_log_num--; -+ } - - if (optind < argc) { - printf("non-option ARGV-elements: "); -@@ -469,7 +481,7 @@ main(int argc, char **argv) - if (exit_code != pcmk_ok) { - crm_err("Init failed, could not perform requested operations"); - fprintf(stderr, "Init failed, could not perform requested operations\n"); -- return -exit_code; -+ return crm_exit(-exit_code); - } - - exit_code = do_work(input, command_options, &output); -@@ -523,7 +535,7 @@ main(int argc, char **argv) - the_cib->cmds->signoff(the_cib); - cib_delete(the_cib); - bail: -- return crm_exit(-exit_code); -+ return crm_exit(exit_code); - } - - int -diff --git a/tools/cibsecret.in b/tools/cibsecret.in -new file mode 100644 -index 0000000..157feee ---- /dev/null -+++ b/tools/cibsecret.in -@@ -0,0 +1,380 @@ -+#!/bin/sh -+ -+# Copyright (C) 2011 Dejan Muhamedagic -+# -+# This program is free software; you can redistribute it and/or -+# modify it under the terms of the GNU General Public -+# License as published by the Free Software Foundation; either -+# version 2.1 of the License, or (at your option) any later version. -+# -+# This software is distributed in the hope that it will be useful, -+# but WITHOUT ANY WARRANTY; without even the implied warranty of -+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+# General Public License for more details. -+# -+# You should have received a copy of the GNU General Public -+# License along with this library; if not, write to the Free Software -+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -+# -+ -+# WARNING: -+# -+# The CIB secrets interface and implementation is still being -+# discussed, it may change -+ -+# -+# cibsecret: manage the secrets directory /var/lib/heartbeat/lrm/secrets -+# -+# secrets are ascii files, holding just one value per file: -+# /var/lib/heartbeat/lrm/secrets// -+# -+# NB: this program depends on utillib.sh -+# -+ -+. @OCF_ROOT_DIR@/lib/heartbeat/ocf-shellfuncs -+ -+LRM_CIBSECRETS=@LRM_CIBSECRETS_DIR@ -+LRM_LEGACY_CIBSECRETS=@LRM_LEGACY_CIBSECRETS_DIR@ -+ -+PROG=`basename $0` -+SSH_OPTS="-o StrictHostKeyChecking=no" -+ -+usage() { -+ echo "cibsecret - A tool for managing cib secrets"; -+ echo ""; -+ echo "usage: $PROG [-C] "; -+ echo ""; -+ echo "-C: don't read/write the CIB" -+ echo "" -+ echo "command: set | delete | stash | unstash | get | check | sync" -+ echo "" -+ echo " set " -+ echo "" -+ echo " get " -+ echo "" -+ echo " check " -+ echo "" -+ echo " stash (if not -C)" -+ echo "" -+ echo " unstash (if not -C)" -+ echo "" -+ echo " delete " -+ echo "" -+ echo " sync" -+ echo "" -+ echo "stash/unstash: move the parameter from/to the CIB (if you already" -+ echo "have the parameter set in the CIB)." -+ echo "" -+ echo "set/delete: add/remove a parameter from the local file." -+ echo "" -+ echo "get: display the parameter from the local file." -+ echo "" -+ echo "check: verify MD5 hash of the parameter from the local file and the CIB." -+ echo "" -+ echo "sync: copy $LRM_CIBSECRETS to other nodes." -+ echo "" -+ echo "Examples:" -+ echo "" -+ echo " $PROG set ipmi_node1 passwd SecreT_PASS" -+ echo "" -+ echo " $PROG stash ipmi_node1 passwd" -+ echo "" -+ echo " $PROG get ipmi_node1 passwd" -+ echo "" -+ echo " $PROG check ipmi_node1 passwd" -+ echo "" -+ echo " $PROG sync" -+ -+ exit $1 -+} -+fatal() { -+ echo "ERROR: $*" -+ exit 1 -+} -+warn() { -+ echo "WARNING: $*" -+} -+info() { -+ echo "INFO: $*" -+} -+ -+check_env() { -+ which md5sum >/dev/null 2>&1 || -+ fatal "please install md5sum to run $PROG" -+ if which pssh >/dev/null 2>&1; then -+ rsh=pssh_fun -+ rcp=pscp_fun -+ elif which pdsh >/dev/null 2>&1; then -+ rsh=pdsh_fun -+ rcp=pdcp_fun -+ elif which ssh >/dev/null 2>&1; then -+ rsh=ssh_fun -+ rcp=scp_fun -+ else -+ fatal "please install pssh, pdsh, or ssh to run $PROG" -+ fi -+ ps -ef | grep '[c]rmd' >/dev/null || -+ fatal "pacemaker not running? $PROG needs pacemaker" -+} -+ -+get_other_nodes() { -+ crm_node -l | awk '{print $2}' | grep -v `uname -n` -+} -+ -+get_live_nodes() { -+ if [ `id -u` = 0 ] && which fping >/dev/null 2>&1; then -+ fping -a $@ 2>/dev/null -+ else -+ local h -+ for h; do ping -c 2 -q $h >/dev/null 2>&1 && echo $h; done -+ fi -+} -+ -+check_down_nodes() { -+ local n down_nodes -+ down_nodes=`(for n; do echo $n; done) | sort | uniq -u` -+ if [ -n "$down_nodes" ]; then -+ if [ `echo $down_nodes | wc -w` = 1 ]; then -+ warn "node $down_nodes is down" -+ warn "you'll need to update it using $PROG sync later" -+ else -+ warn "nodes `echo $down_nodes` are down" -+ warn "you'll need to update them using $PROG sync later" -+ fi -+ fi -+} -+ -+pssh_fun() { -+ pssh -qi -H "$nodes" -x "$SSH_OPTS" $* -+} -+pscp_fun() { -+ pscp -q -H "$nodes" -x "-pr" -x "$SSH_OPTS" $* -+} -+pdsh_fun() { -+ local pdsh_nodes=`echo $nodes | tr ' ' ','` -+ export PDSH_SSH_ARGS_APPEND="$SSH_OPTS" -+ pdsh -w $pdsh_nodes $* -+} -+pdcp_fun() { -+ local pdsh_nodes=`echo $nodes | tr ' ' ','` -+ export PDSH_SSH_ARGS_APPEND="$SSH_OPTS" -+ pdcp -pr -w $pdsh_nodes $* -+} -+ssh_fun() { -+ local h -+ for h in $nodes; do -+ ssh $SSH_OPTS $h $* || return -+ done -+} -+scp_fun() { -+ local h src="$1" dest=$2 -+ for h in $nodes; do -+ scp -pr -q $SSH_OPTS $src $h:$dest || return -+ done -+} -+# TODO: this procedure should be replaced with csync2 -+# provided that csync2 has already been configured -+sync_files() { -+ local crm_nodes=`get_other_nodes` -+ local nodes=`get_live_nodes $crm_nodes` -+ check_down_nodes $nodes $crm_nodes -+ [ "$nodes" = "" ] && { -+ info "no other nodes live" -+ return -+ } -+ info "syncing $LRM_CIBSECRETS to `echo $nodes` ..." -+ $rsh rm -rf $LRM_CIBSECRETS && -+ $rsh mkdir -p `dirname $LRM_CIBSECRETS` && -+ $rcp $LRM_CIBSECRETS `dirname $LRM_CIBSECRETS` -+} -+sync_one() { -+ local f=$1 f_all="$1 $1.sign" -+ local crm_nodes=`get_other_nodes` -+ local nodes=`get_live_nodes $crm_nodes` -+ check_down_nodes $nodes $crm_nodes -+ [ "$nodes" = "" ] && { -+ info "no other nodes live" -+ return -+ } -+ info "syncing $f to `echo $nodes` ..." -+ $rsh mkdir -p `dirname $f` && -+ if [ -f "$f" ]; then -+ $rcp "$f_all" `dirname $f` -+ else -+ $rsh rm -f $f_all -+ fi -+} -+ -+is_secret() { -+ # assume that the secret is in the CIB if we cannot talk to -+ # cib -+ [ "$NO_CRM" ] || -+ test "$1" = "$MAGIC" -+} -+check_cib_rsc() { -+ local rsc=$1 output -+ output=`$NO_CRM crm_resource -r $rsc -W >/dev/null 2>&1` || -+ fatal "resource $rsc doesn't exist: $output" -+} -+get_cib_param() { -+ local rsc=$1 param=$2 -+ check_cib_rsc $rsc -+ $NO_CRM crm_resource -r $rsc -g $param 2>/dev/null -+} -+set_cib_param() { -+ local rsc=$1 param=$2 value=$3 -+ check_cib_rsc $rsc -+ $NO_CRM crm_resource -r $rsc -p $param -v "$value" 2>/dev/null -+} -+remove_cib_param() { -+ local rsc=$1 param=$2 -+ check_cib_rsc $rsc -+ $NO_CRM crm_resource -r $rsc -d $param 2>/dev/null -+} -+ -+localfiles() { -+ local cmd=$1 -+ local rsc=$2 param=$3 value=$4 -+ local local_file=$LRM_CIBSECRETS/$rsc/$param -+ local local_legacy_file=$LRM_LEGACY_CIBSECRETS/$rsc/$param -+ case $cmd in -+ "get") -+ cat $local_file 2>/dev/null || -+ cat $local_legacy_file 2>/dev/null -+ true -+ ;; -+ "getsum") -+ cat $local_file.sign 2>/dev/null || -+ cat $local_legacy_file.sign 2>/dev/null -+ true -+ ;; -+ "set") -+ local md5sum -+ md5sum=`printf $value | md5sum` || -+ fatal "md5sum failed to produce hash for resource $rsc parameter $param" -+ md5sum=`echo $md5sum | awk '{print $1}'` -+ mkdir -p `dirname $local_file` && -+ echo $value > $local_file && -+ echo $md5sum > $local_file.sign && ( -+ sync_one $local_file -+ rm -f $local_legacy_file -+ rm -f $local_legacy_file.sign -+ sync_one $local_legacy_file) -+ ;; -+ "remove") -+ rm -f $local_legacy_file -+ rm -f $local_legacy_file.sign -+ sync_one $local_legacy_file -+ -+ rm -f $local_file -+ rm -f $local_file.sign -+ sync_one $local_file -+ ;; -+ *) -+ # not reached, this is local interface -+ ;; -+ esac -+} -+get_local_param() { -+ local rsc=$1 param=$2 -+ localfiles get $rsc $param -+} -+set_local_param() { -+ local rsc=$1 param=$2 value=$3 -+ localfiles set $rsc $param $value -+} -+remove_local_param() { -+ local rsc=$1 param=$2 -+ localfiles remove $rsc $param -+} -+ -+cibsecret_set() { -+ local value=$1 -+ -+ if [ -z "$NO_CRM" ]; then -+ [ "$current" -a "$current" != "$MAGIC" -a "$current" != "$value" ] && -+ fatal "CIB value <$current> different for $rsc parameter $param; please delete it first" -+ fi -+ set_local_param $rsc $param $value && -+ set_cib_param $rsc $param "$MAGIC" -+} -+ -+cibsecret_check() { -+ local md5sum local_md5sum -+ is_secret "$current" || -+ fatal "resource $rsc parameter $param not set as secret, nothing to check" -+ local_md5sum=`localfiles getsum $rsc $param` -+ [ "$local_md5sum" ] || -+ fatal "no MD5 hash for resource $rsc parameter $param" -+ md5sum=`printf "$current_local" | md5sum | awk '{print $1}'` -+ [ "$md5sum" = "$local_md5sum" ] || -+ fatal "MD5 hash mismatch for resource $rsc parameter $param" -+} -+ -+cibsecret_get() { -+ cibsecret_check -+ echo "$current_local" -+} -+ -+cibsecret_delete() { -+ remove_local_param $rsc $param && -+ remove_cib_param $rsc $param -+} -+ -+cibsecret_stash() { -+ [ "$NO_CRM" ] && -+ fatal "no access to Pacemaker, stash not supported" -+ [ "$current" = "" ] && -+ fatal "nothing to stash for resource $rsc parameter $param" -+ is_secret "$current" && -+ fatal "resource $rsc parameter $param already set as secret, nothing to stash" -+ cibsecret_set "$current" -+} -+ -+cibsecret_unstash() { -+ [ "$NO_CRM" ] && -+ fatal "no access to Pacemaker, unstash not supported" -+ [ "$current_local" = "" ] && -+ fatal "nothing to unstash for resource $rsc parameter $param" -+ is_secret "$current" || -+ warn "resource $rsc parameter $param not set as secret, but we have local value so proceeding anyway" -+ remove_local_param $rsc $param && -+ set_cib_param $rsc $param $current_local -+} -+ -+cibsecret_sync() { -+ sync_files -+} -+ -+check_env -+ -+MAGIC="lrm://" -+umask 0077 -+ -+if [ "$1" = "-C" ]; then -+ NO_CRM=':' -+ shift 1 -+fi -+ -+cmd=$1 -+rsc=$2 -+param=$3 -+value=$4 -+ -+case "$cmd" in -+ set) [ $# -ne 4 ] && usage 1;; -+ get) [ $# -ne 3 ] && usage 1;; -+ check) [ $# -ne 3 ] && usage 1;; -+ stash) [ $# -ne 3 ] && usage 1;; -+ unstash) [ $# -ne 3 ] && usage 1;; -+ delete) [ $# -ne 3 ] && usage 1;; -+ sync) [ $# -ne 1 ] && usage 1;; -+ --help) usage 0;; -+ *) usage 1; -+esac -+ -+# we'll need these two often -+current=`get_cib_param $rsc $param` -+current_local=`get_local_param $rsc $param` -+ -+cibsecret_$cmd $value -diff --git a/tools/cli.supp b/tools/cli.supp -deleted file mode 100644 -index 19470e6..0000000 ---- a/tools/cli.supp -+++ /dev/null -@@ -1,7 +0,0 @@ --# Valgrind suppressions file for CLI tools --{ -- Valgrind bug -- Memcheck:Addr8 -- fun:__strspn_sse42 -- fun:crm_get_msec --} -diff --git a/tools/crm_attribute.c b/tools/crm_attribute.c -index 4b00f24..26df264 100644 ---- a/tools/crm_attribute.c -+++ b/tools/crm_attribute.c -@@ -122,6 +122,7 @@ main(int argc, char **argv) - int flag; - - int option_index = 0; -+ int is_remote_node = 0; - - crm_log_cli_init("crm_attribute"); - crm_set_options(NULL, "command -n attribute [options]", long_options, -@@ -232,16 +233,26 @@ main(int argc, char **argv) - - } else if (safe_str_neq(type, XML_CIB_TAG_TICKETS)) { - if (dest_uname == NULL) { -- dest_uname = get_local_node_name(); -+ dest_uname = get_node_name(0); - } -- if (pcmk_ok != query_node_uuid(the_cib, dest_uname, &dest_node)) { -+ if (pcmk_ok != query_node_uuid(the_cib, dest_uname, &dest_node, &is_remote_node)) { - fprintf(stderr, "Could not map name=%s to a UUID\n", dest_uname); - } - } - -+ if (is_remote_node && safe_str_neq(type, XML_CIB_TAG_STATUS)) { -+ /* Only the status section can exists for remote_nodes */ -+ type = XML_CIB_TAG_STATUS; -+ if (command == 'v') { -+ fprintf(stderr, "Remote-nodes do not maintain permanent attributes, '%s=%s' will be removed after %s reboots.\n", -+ attr_name, attr_value, dest_uname); -+ } -+ } -+ - if ((command == 'v' || command == 'D') -+ && is_remote_node == FALSE /* always send remote node attr directly to cib */ - && safe_str_eq(type, XML_CIB_TAG_STATUS) -- && attrd_update_delegate(NULL, command, dest_uname, attr_name, attr_value, type, set_name, -+ && pcmk_ok == attrd_update_delegate(NULL, command, dest_uname, attr_name, attr_value, type, set_name, - NULL, NULL)) { - crm_info("Update %s=%s sent via attrd", attr_name, command == 'D' ? "" : attr_value); - -diff --git a/tools/crm_error.c b/tools/crm_error.c -index 5fb0c7b..bd75a8f 100644 ---- a/tools/crm_error.c -+++ b/tools/crm_error.c -@@ -27,6 +27,11 @@ static struct crm_option long_options[] = { - {"version", 0, 0, '$', "\tVersion information" }, - {"verbose", 0, 0, 'V', "\tIncrease debug output"}, - -+ {"name", 0, 0, 'n', "\tShow the error's name rather than the description." -+ "\n\t\t\tUseful for looking for sources of the error in source code"}, -+ -+ {"list", 0, 0, 'l', "\tShow all known errors."}, -+ - {0, 0, 0, 0} - }; - /* *INDENT-ON* */ -@@ -39,9 +44,12 @@ main(int argc, char **argv) - int flag = 0; - int option_index = 0; - -+ bool do_list = FALSE; -+ bool with_name = FALSE; -+ - crm_log_cli_init("crm_error"); - crm_set_options(NULL, "[options] -- rc", long_options, -- "Tool for displaying the textual description of a reported error code"); -+ "Tool for displaying the textual name or description of a reported error code"); - - while (flag >= 0) { - flag = crm_get_option(argc, argv, &option_index); -@@ -55,15 +63,40 @@ main(int argc, char **argv) - case '?': - crm_help(flag, EX_OK); - break; -+ case 'n': -+ with_name = TRUE; -+ break; -+ case 'l': -+ do_list = TRUE; -+ break; - default: - crm_help(flag, EX_OK); - break; - } - } - -+ if(do_list) { -+ for (rc = 0; rc < 256; rc++) { -+ const char *name = pcmk_errorname(rc); -+ const char *desc = pcmk_strerror(rc); -+ if(name == NULL || strcmp("Unknown", name) == 0) { -+ /* Unknown */ -+ } else if(with_name) { -+ printf("%.3d: %-25s %s\n", rc, name, desc); -+ } else { -+ printf("%.3d: %s\n", rc, desc); -+ } -+ } -+ return 0; -+ } -+ - for (lpc = optind; lpc < argc; lpc++) { - rc = crm_atoi(argv[lpc], NULL); -- printf("%s\n", pcmk_strerror(rc)); -+ if(with_name) { -+ printf("%s - %s\n", pcmk_errorname(rc), pcmk_strerror(rc)); -+ } else { -+ printf("%s\n", pcmk_strerror(rc)); -+ } - } - return 0; - } -diff --git a/tools/crm_mon.c b/tools/crm_mon.c -index b646a83..f203caf 100644 ---- a/tools/crm_mon.c -+++ b/tools/crm_mon.c -@@ -1,17 +1,17 @@ - --/* -+/* - * Copyright (C) 2004 Andrew Beekhof -- * -+ * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. -- * -+ * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. -- * -+ * - * You should have received a copy of the GNU General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -@@ -318,7 +318,7 @@ static struct crm_option long_options[] = { - {"snmp-traps", 1, 0, 'S', "Send SNMP traps to this station", !ENABLE_SNMP}, - {"snmp-community", 1, 0, 'C', "Specify community for SNMP traps(default is NULL)", !ENABLE_SNMP}, - {"mail-to", 1, 0, 'T', "Send Mail alerts to this user. See also --mail-from, --mail-host, --mail-prefix", !ENABLE_ESMTP}, -- -+ - {"-spacer-", 1, 0, '-', "\nDisplay Options:"}, - {"group-by-node", 0, 0, 'n', "\tGroup resources by node" }, - {"inactive", 0, 0, 'r', "\tDisplay inactive resources" }, -@@ -341,7 +341,7 @@ static struct crm_option long_options[] = { - {"external-agent", 1, 0, 'E', "A program to run when resource operations take place."}, - {"external-recipient",1, 0, 'e', "A recipient for your program (assuming you want the program to send something to someone)."}, - -- -+ - {"xml-file", 1, 0, 'x', NULL, 1}, - - {"-spacer-", 1, 0, '-', "\nExamples:", pcmk_option_paragraph}, -@@ -359,11 +359,98 @@ static struct crm_option long_options[] = { - {"-spacer-", 1, 0, '-', " crm_mon --daemonize --mail-to user@example.com --mail-host mail.example.com", pcmk_option_example|!ENABLE_ESMTP}, - {"-spacer-", 1, 0, '-', "Start crm_mon as a background daemon and have it send SNMP alerts:", pcmk_option_paragraph|!ENABLE_SNMP}, - {"-spacer-", 1, 0, '-', " crm_mon --daemonize --snmp-traps snmptrapd.example.com", pcmk_option_example|!ENABLE_SNMP}, -- -+ - {NULL, 0, 0, 0} - }; - /* *INDENT-ON* */ - -+#if CURSES_ENABLED -+static const char * -+get_option_desc(char c) -+{ -+ int lpc; -+ -+ for (lpc = 0; long_options[lpc].name != NULL; lpc++) { -+ -+ if (long_options[lpc].name[0] == '-') -+ continue; -+ -+ if (long_options[lpc].val == c) { -+ const char * tab = NULL; -+ tab = strrchr(long_options[lpc].desc, '\t'); -+ return tab ? ++tab : long_options[lpc].desc; -+ } -+ } -+ -+ return NULL; -+} -+ -+static gboolean -+detect_user_input(GIOChannel *channel, GIOCondition condition, gpointer unused) -+{ -+ char c; -+ gboolean config_mode = FALSE; -+ -+ while (1) { -+ -+ /* Get user input */ -+ c = getchar(); -+ -+ switch (c) { -+ case 'c': -+ print_tickets = ! print_tickets; -+ break; -+ case 'f': -+ print_failcount = ! print_failcount; -+ break; -+ case 'n': -+ group_by_node = ! group_by_node; -+ break; -+ case 'o': -+ print_operations = ! print_operations; -+ break; -+ case 'r': -+ inactive_resources = ! inactive_resources; -+ break; -+ case 't': -+ print_timing = ! print_timing; -+ if (print_timing) -+ print_operations = TRUE; -+ break; -+ case 'A': -+ print_nodes_attr = ! print_nodes_attr; -+ break; -+ case '?': -+ config_mode = TRUE; -+ break; -+ default: -+ goto refresh; -+ } -+ -+ if (!config_mode) -+ goto refresh; -+ -+ blank_screen(); -+ -+ print_as("Display option change mode\n"); -+ print_as("\n"); -+ print_as("%c c: \t%s\n", print_tickets ? '*': ' ', get_option_desc('c')); -+ print_as("%c f: \t%s\n", print_failcount ? '*': ' ', get_option_desc('f')); -+ print_as("%c n: \t%s\n", group_by_node ? '*': ' ', get_option_desc('n')); -+ print_as("%c o: \t%s\n", print_operations ? '*': ' ', get_option_desc('o')); -+ print_as("%c r: \t%s\n", inactive_resources ? '*': ' ', get_option_desc('r')); -+ print_as("%c t: \t%s\n", print_timing ? '*': ' ', get_option_desc('t')); -+ print_as("%c A: \t%s\n", print_nodes_attr ? '*': ' ', get_option_desc('A')); -+ print_as("\n"); -+ print_as("Toggle fields via field letter, type any other key to return"); -+ } -+ -+refresh: -+ mon_refresh_display(NULL); -+ return TRUE; -+} -+#endif -+ - int - main(int argc, char **argv) - { -@@ -584,6 +671,7 @@ main(int argc, char **argv) - if (ncurses_winch_handler == SIG_DFL || - ncurses_winch_handler == SIG_IGN || ncurses_winch_handler == SIG_ERR) - ncurses_winch_handler = NULL; -+ g_io_add_watch(g_io_channel_unix_new(STDIN_FILENO), G_IO_IN, detect_user_input, NULL); - } - #endif - refresh_trigger = mainloop_add_trigger(G_PRIORITY_LOW, mon_refresh_display, NULL); -@@ -696,9 +784,6 @@ print_simple_status(pe_working_set_t * data_set) - return 0; - } - --extern int get_failcount(node_t * node, resource_t * rsc, int *last_failure, -- pe_working_set_t * data_set); -- - static void - print_date(time_t time) - { -@@ -714,23 +799,18 @@ print_date(time_t time) - print_as("'%s'", date_str); - } - -+#include - static void - print_rsc_summary(pe_working_set_t * data_set, node_t * node, resource_t * rsc, gboolean all) - { - gboolean printed = FALSE; -- time_t last_failure = 0; -- -- char *fail_attr = crm_concat("fail-count", rsc->id, '-'); -- const char *value = g_hash_table_lookup(node->details->attrs, fail_attr); -- -- int failcount = char2score(value); /* Get the true value, not the effective one from get_failcount() */ - -- get_failcount(node, rsc, (int *)&last_failure, data_set); -- free(fail_attr); -+ time_t last_failure = 0; -+ int failcount = get_failcount_full(node, rsc, &last_failure, FALSE, data_set); - - if (all || failcount || last_failure > 0) { - printed = TRUE; -- print_as(" %s: migration-threshold=%d", rsc->id, rsc->migration_threshold); -+ print_as(" %s: migration-threshold=%d", rsc_printable_id(rsc), rsc->migration_threshold); - } - - if (failcount > 0) { -@@ -805,31 +885,35 @@ print_rsc_history(pe_working_set_t * data_set, node_t * node, xmlNode * rsc_entr - - if (print_timing) { - int int_value; -- const char *attr = "last-rc-change"; -+ const char *attr = XML_RSC_OP_LAST_CHANGE; - - value = crm_element_value(xml_op, attr); - if (value) { - int_value = crm_parse_int(value, NULL); -- print_as(" %s=", attr); -- print_date(int_value); -+ if (int_value > 0) { -+ print_as(" %s=", attr); -+ print_date(int_value); -+ } - } - -- attr = "last-run"; -+ attr = XML_RSC_OP_LAST_RUN; - value = crm_element_value(xml_op, attr); - if (value) { - int_value = crm_parse_int(value, NULL); -- print_as(" %s=", attr); -- print_date(int_value); -+ if (int_value > 0) { -+ print_as(" %s=", attr); -+ print_date(int_value); -+ } - } - -- attr = "exec-time"; -+ attr = XML_RSC_OP_T_EXEC; - value = crm_element_value(xml_op, attr); - if (value) { - int_value = crm_parse_int(value, NULL); - print_as(" %s=%dms", attr, int_value); - } - -- attr = "queue-time"; -+ attr = XML_RSC_OP_T_QUEUE; - value = crm_element_value(xml_op, attr); - if (value) { - int_value = crm_parse_int(value, NULL); -@@ -1106,6 +1190,13 @@ print_status(pe_working_set_t * data_set) - for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) { - node_t *node = (node_t *) gIter->data; - const char *node_mode = NULL; -+ char *node_name = NULL; -+ -+ if(node->details->remote_rsc) { -+ node_name = g_strdup_printf("%s:%s", node->details->uname, node->details->remote_rsc->container->id); -+ } else { -+ node_name = g_strdup_printf("%s", node->details->uname); -+ } - - if (node->details->unclean) { - if (node->details->online && node->details->unclean) { -@@ -1134,22 +1225,26 @@ print_status(pe_working_set_t * data_set) - } else if (node->details->online) { - node_mode = "online"; - if (group_by_node == FALSE) { -- online_nodes = add_list_element(online_nodes, node->details->uname); -+ online_nodes = add_list_element(online_nodes, node_name); - continue; - } - - } else { - node_mode = "OFFLINE"; - if (group_by_node == FALSE) { -- offline_nodes = add_list_element(offline_nodes, node->details->uname); -+ offline_nodes = add_list_element(offline_nodes, node_name); - continue; - } - } - -+ if(node->details->remote_rsc) { -+ online_nodes = add_list_element(online_nodes, node->details->remote_rsc->id); -+ } -+ - if (safe_str_eq(node->details->uname, node->details->id)) { -- print_as("Node %s: %s\n", node->details->uname, node_mode); -+ print_as("Node %s: %s\n", node_name, node_mode); - } else { -- print_as("Node %s (%s): %s\n", node->details->uname, node->details->id, node_mode); -+ print_as("Node %s (%s): %s\n", node_name, node->details->id, node_mode); - } - - if (group_by_node) { -@@ -1161,6 +1256,7 @@ print_status(pe_working_set_t * data_set) - rsc->fns->print(rsc, "\t", print_opts | pe_print_rsconly, stdout); - } - } -+ free(node_name); - } - - if (online_nodes) { -@@ -1229,7 +1325,7 @@ print_status(pe_working_set_t * data_set) - int val = 0; - const char *id = ID(xml_op); - const char *op_key = crm_element_value(xml_op, XML_LRM_ATTR_TASK_KEY); -- const char *last = crm_element_value(xml_op, "last_run"); -+ const char *last = crm_element_value(xml_op, XML_RSC_OP_LAST_CHANGE); - const char *node = crm_element_value(xml_op, XML_ATTR_UNAME); - const char *call = crm_element_value(xml_op, XML_LRM_ATTR_CALLID); - const char *rc = crm_element_value(xml_op, XML_LRM_ATTR_RC); -@@ -1242,10 +1338,10 @@ print_status(pe_working_set_t * data_set) - if (last) { - time_t run_at = crm_parse_int(last, "0"); - -- print_as(", last-run=%s, queued=%sms, exec=%sms\n", -+ print_as(", last-rc-change=%s, queued=%sms, exec=%sms\n", - ctime(&run_at), -- crm_element_value(xml_op, "exec_time"), -- crm_element_value(xml_op, "queue_time")); -+ crm_element_value(xml_op, XML_RSC_OP_T_EXEC), -+ crm_element_value(xml_op, XML_RSC_OP_T_QUEUE)); - } - - val = crm_parse_int(rc, "0"); -@@ -2127,7 +2223,7 @@ crm_diff_update(const char *event, xmlNode * msg) - { - int rc = -1; - long now = time(NULL); -- const char *op = NULL; -+ static bool stale = FALSE; - - print_dot(); - -@@ -2140,14 +2236,13 @@ crm_diff_update(const char *event, xmlNode * msg) - free_xml(cib_last); - - switch (rc) { -- case pcmk_err_diff_resync: -- case pcmk_err_diff_failed: -- crm_warn("[%s] %s Patch aborted: %s (%d)", event, op, pcmk_strerror(rc), rc); -+ case -pcmk_err_diff_resync: -+ case -pcmk_err_diff_failed: -+ crm_notice("[%s] Patch aborted: %s (%d)", event, pcmk_strerror(rc), rc); - case pcmk_ok: - break; - default: -- crm_warn("[%s] %s ABORTED: %s (%d)", event, op, pcmk_strerror(rc), rc); -- return; -+ crm_notice("[%s] ABORTED: %s (%d)", event, pcmk_strerror(rc), rc); - } - } - -@@ -2160,20 +2255,25 @@ crm_diff_update(const char *event, xmlNode * msg) - xmlXPathObject *xpathObj = xpath_search(msg, - "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED - "//" XML_LRM_TAG_RSC_OP); -- if (xpathObj && xpathObj->nodesetval->nodeNr > 0) { -- int lpc = 0, max = xpathObj->nodesetval->nodeNr; -+ int lpc = 0, max = numXpathResults(xpathObj); - -- for (lpc = 0; lpc < max; lpc++) { -- xmlNode *rsc_op = getXpathResult(xpathObj, lpc); -+ for (lpc = 0; lpc < max; lpc++) { -+ xmlNode *rsc_op = getXpathResult(xpathObj, lpc); - -- handle_rsc_op(rsc_op); -- } -+ handle_rsc_op(rsc_op); - } -- if (xpathObj) { -- xmlXPathFreeObject(xpathObj); -+ freeXpathObject(xpathObj); -+ } -+ -+ if (current_cib == NULL) { -+ if(!stale) { -+ print_as("--- Stale data ---"); - } -+ stale = TRUE; -+ return; - } - -+ stale = FALSE; - if ((now - last_refresh) > (reconnect_msec / 1000)) { - /* Force a refresh */ - mon_refresh_display(NULL); -diff --git a/tools/crm_node.c b/tools/crm_node.c -index eac2494..a25b3b4 100644 ---- a/tools/crm_node.c -+++ b/tools/crm_node.c -@@ -220,7 +220,7 @@ read_local_hb_uuid(void) - fseek(input, 0L, start); - if (start != ftell(input)) { - fprintf(stderr, "fseek not behaving: %ld vs. %ld\n", start, ftell(input)); -- crm_exit(2); -+ crm_exit(pcmk_err_generic); - } - - buffer = malloc(50); -@@ -229,7 +229,7 @@ read_local_hb_uuid(void) - - if (read_len != UUID_LEN) { - fprintf(stderr, "Expected and read bytes differ: %d vs. %ld\n", UUID_LEN, read_len); -- crm_exit(3); -+ crm_exit(pcmk_err_generic); - - } else if (buffer != NULL) { - cl_uuid_unparse(&uuid, buffer); -@@ -238,7 +238,7 @@ read_local_hb_uuid(void) - - } else { - fprintf(stderr, "No buffer to unparse\n"); -- crm_exit(4); -+ crm_exit(ENODATA); - } - - free(buffer); -@@ -294,7 +294,7 @@ ccm_age_callback(oc_ed_t event, void *cookie, size_t size, const void *data) - fprintf(stdout, "\n"); - } - fflush(stdout); -- crm_exit(0); -+ crm_exit(pcmk_ok); - } - - static gboolean -@@ -353,15 +353,15 @@ try_heartbeat(int command, enum cluster_type_e stack) - - if (command == 'i') { - if (read_local_hb_uuid()) { -- crm_exit(0); -+ crm_exit(pcmk_ok); - } - - } else if (command == 'R') { - if (crmd_remove_node_cache(target_uname)) { - crm_err("Failed to connect to crmd to remove node id %s", target_uname); -- crm_exit(-pcmk_err_generic); -+ crm_exit(pcmk_err_generic); - } -- crm_exit(0); -+ crm_exit(pcmk_ok); - - } else if (ccm_age_connect(&ccm_fd)) { - int rc = 0; -@@ -463,11 +463,11 @@ try_cman(int command, enum cluster_type_e stack) - crm_help('?', EX_USAGE); - } - cman_finish(cman_handle); -- crm_exit(0); -+ crm_exit(pcmk_ok); - - cman_bail: - cman_finish(cman_handle); -- return crm_exit(EX_USAGE); -+ return crm_exit(EINVAL); - } - #endif - -@@ -477,7 +477,7 @@ ais_membership_destroy(gpointer user_data) - { - crm_err("AIS connection terminated"); - ais_fd_sync = -1; -- crm_exit(1); -+ crm_exit(ENOTCONN); - } - - static gint -@@ -548,7 +548,7 @@ ais_membership_dispatch(int kind, const char *from, const char *data) - fprintf(stdout, "\n"); - } - -- crm_exit(0); -+ crm_exit(pcmk_ok); - - return TRUE; - } -@@ -586,7 +586,7 @@ node_mcp_dispatch(const char *buffer, ssize_t length, gpointer userdata) - fprintf(stdout, "\n"); - } - -- crm_exit(0); -+ crm_exit(pcmk_ok); - } - - return 0; -@@ -595,7 +595,7 @@ node_mcp_dispatch(const char *buffer, ssize_t length, gpointer userdata) - static void - node_mcp_destroy(gpointer user_data) - { -- crm_exit(1); -+ crm_exit(ENOTCONN); - } - - static gboolean -@@ -626,7 +626,7 @@ try_corosync(int command, enum cluster_type_e stack) - case 'e': - /* Age makes no sense (yet) in an AIS cluster */ - fprintf(stdout, "1\n"); -- crm_exit(0); -+ crm_exit(pcmk_ok); - - case 'q': - /* Go direct to the Quorum API */ -@@ -648,7 +648,7 @@ try_corosync(int command, enum cluster_type_e stack) - fprintf(stdout, "0\n"); - } - quorum_finalize(q_handle); -- crm_exit(0); -+ crm_exit(pcmk_ok); - - case 'i': - /* Go direct to the CPG API */ -@@ -666,7 +666,7 @@ try_corosync(int command, enum cluster_type_e stack) - - fprintf(stdout, "%u\n", nodeid); - cpg_finalize(c_handle); -- crm_exit(0); -+ crm_exit(pcmk_ok); - - case 'l': - case 'p': -@@ -704,13 +704,13 @@ try_openais(int command, enum cluster_type_e stack) - switch (command) { - case 'R': - send_ais_text(crm_class_rmpeer, target_uname, TRUE, NULL, crm_msg_ais); -- cib_remove_node(atoi(target_uname), NULL); -- crm_exit(0); -+ cib_remove_node(0, target_uname); -+ crm_exit(pcmk_ok); - - case 'e': - /* Age makes no sense (yet) in an AIS cluster */ - fprintf(stdout, "1\n"); -- crm_exit(0); -+ crm_exit(pcmk_ok); - - case 'q': - send_ais_text(crm_class_quorum, NULL, TRUE, NULL, crm_msg_ais); -@@ -724,7 +724,7 @@ try_openais(int command, enum cluster_type_e stack) - - case 'i': - printf("%u\n", cluster.nodeid); -- crm_exit(0); -+ crm_exit(pcmk_ok); - - default: - fprintf(stderr, "Unknown option '%c'\n", command); -@@ -819,11 +819,11 @@ main(int argc, char **argv) - - if (command == 'n') { - fprintf(stdout, "%s\n", get_local_node_name()); -- crm_exit(0); -+ crm_exit(pcmk_ok); - - } else if (command == 'N') { - fprintf(stdout, "%s\n", get_node_name(nodeid)); -- crm_exit(0); -+ crm_exit(pcmk_ok); - } - - if (dangerous_cmd && force_flag == FALSE) { -@@ -831,7 +831,7 @@ main(int argc, char **argv) - " To prevent accidental destruction of the cluster," - " the --force flag is required in order to proceed.\n"); - fflush(stderr); -- crm_exit(EX_USAGE); -+ crm_exit(EINVAL); - } - - try_stack = get_cluster_type(); -diff --git a/tools/crm_report.in b/tools/crm_report.in -index f1d0f14..3af1b3f 100755 ---- a/tools/crm_report.in -+++ b/tools/crm_report.in -@@ -1,17 +1,17 @@ - #!/bin/sh - - # Copyright (C) 2010 Andrew Beekhof -- # -+ # - # This program is free software; you can redistribute it and/or - # modify it under the terms of the GNU General Public - # License as published by the Free Software Foundation; either - # version 2.1 of the License, or (at your option) any later version. -- # -+ # - # This software is distributed in the hope that it will be useful, - # but WITHOUT ANY WARRANTY; without even the implied warranty of - # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - # General Public License for more details. -- # -+ # - # You should have received a copy of the GNU General Public - # License along with this library; if not, write to the Free Software - # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -@@ -20,8 +20,8 @@ - - # Note the quotes around `$TEMP': they are essential! - TEMP=`getopt \ -- -o hv?xl:f:t:n:T:Lp:c:dSACHu:MVs \ -- --long help,cts:,cts-log:,dest:,node:,nodes:,from:,to:logfile:,as-directory,single-node,cluster:,user:,version,features \ -+ -o hv?xl:f:t:n:T:Lp:c:dSACHu:D:MVs \ -+ --long help,cts:,cts-log:,dest:,node:,nodes:,from:,to:logfile:,as-directory,single-node,cluster:,user:,max-depth:,version,features \ - -n 'crm_report' -- "$@"` - eval set -- "$TEMP" - -@@ -44,7 +44,7 @@ usage() { - - cat< - #include - -+bool scope_master = FALSE; - gboolean do_force = FALSE; - gboolean BE_QUIET = FALSE; - const char *attr_set_type = XML_TAG_ATTR_SETS; -@@ -112,7 +113,7 @@ resource_ipc_callback(const char *buffer, ssize_t length, gpointer userdata) - if (crmd_replies_needed == 0) { - fprintf(stderr, " OK\n"); - crm_debug("Got all the replies we expected"); -- return crm_exit(0); -+ return crm_exit(pcmk_ok); - } - - free_xml(msg); -@@ -668,14 +669,14 @@ send_lrm_rsc_op(crm_ipc_t * crmd_channel, const char *op, - } - } - -- key = generate_transition_key(0, getpid(), 0, __FILE__); -+ key = generate_transition_key(0, getpid(), 0, "xxxxxxxx-xrsc-opxx-xcrm-resourcexxxx"); - - msg_data = create_xml_node(NULL, XML_GRAPH_TAG_RSC_OP); - crm_xml_add(msg_data, XML_ATTR_TRANSITION_KEY, key); - free(key); - - crm_xml_add(msg_data, XML_LRM_ATTR_TARGET, host_uname); -- if (safe_str_eq(router_node, host_uname)) { -+ if (safe_str_neq(router_node, host_uname)) { - crm_xml_add(msg_data, XML_LRM_ATTR_ROUTER_NODE, router_node); - } - -@@ -730,7 +731,7 @@ send_lrm_rsc_op(crm_ipc_t * crmd_channel, const char *op, - } - - static int --delete_lrm_rsc(crm_ipc_t * crmd_channel, const char *host_uname, -+delete_lrm_rsc(cib_t *cib_conn, crm_ipc_t * crmd_channel, const char *host_uname, - resource_t * rsc, pe_working_set_t * data_set) - { - int rc = pcmk_ok; -@@ -744,7 +745,7 @@ delete_lrm_rsc(crm_ipc_t * crmd_channel, const char *host_uname, - for (lpc = rsc->children; lpc != NULL; lpc = lpc->next) { - resource_t *child = (resource_t *) lpc->data; - -- delete_lrm_rsc(crmd_channel, host_uname, child, data_set); -+ delete_lrm_rsc(cib_conn, crmd_channel, host_uname, child, data_set); - } - return pcmk_ok; - -@@ -755,7 +756,7 @@ delete_lrm_rsc(crm_ipc_t * crmd_channel, const char *host_uname, - node_t *node = (node_t *) lpc->data; - - if (node->details->online) { -- delete_lrm_rsc(crmd_channel, node->details->uname, rsc, data_set); -+ delete_lrm_rsc(cib_conn, crmd_channel, node->details->uname, rsc, data_set); - } - } - -@@ -767,14 +768,22 @@ delete_lrm_rsc(crm_ipc_t * crmd_channel, const char *host_uname, - if (rc == pcmk_ok) { - char *attr_name = NULL; - const char *id = rsc->id; -+ node_t *node = pe_find_node(data_set->nodes, host_uname); - - if (rsc->clone_name) { - id = rsc->clone_name; - } - - attr_name = crm_concat("fail-count", id, '-'); -- attrd_update_delegate(NULL, 'D', host_uname, attr_name, NULL, XML_CIB_TAG_STATUS, NULL, -+ if (node && node->details->remote_rsc) { -+ /* TODO talk directly to cib for remote nodes until we can re-write -+ * attrd to handle remote-nodes */ -+ rc = delete_attr_delegate(cib_conn, cib_sync_call, XML_CIB_TAG_STATUS, node->details->id, NULL, NULL, -+ NULL, attr_name, NULL, FALSE, NULL); -+ } else { -+ rc = attrd_update_delegate(NULL, 'D', host_uname, attr_name, NULL, XML_CIB_TAG_STATUS, NULL, - NULL, NULL); -+ } - free(attr_name); - } - return rc; -@@ -788,150 +797,162 @@ fail_lrm_rsc(crm_ipc_t * crmd_channel, const char *host_uname, - return send_lrm_rsc_op(crmd_channel, CRM_OP_LRM_FAIL, host_uname, rsc_id, FALSE, data_set); - } - --static int --refresh_lrm(crm_ipc_t * crmd_channel, const char *host_uname) -+static char * -+parse_cli_lifetime(const char *input) - { -- xmlNode *cmd = NULL; -- int rc = -ECOMM; -+ char *later_s = NULL; -+ crm_time_t *now = NULL; -+ crm_time_t *later = NULL; -+ crm_time_t *duration = NULL; - -- cmd = create_request(CRM_OP_LRM_REFRESH, NULL, host_uname, -- CRM_SYSTEM_CRMD, crm_system_name, our_pid); -+ if (input == NULL) { -+ return NULL; -+ } - -- if (crm_ipc_send(crmd_channel, cmd, 0, 0, NULL) > 0) { -- rc = 0; -+ duration = crm_time_parse_duration(move_lifetime); -+ if (duration == NULL) { -+ CMD_ERR("Invalid duration specified: %s\n", move_lifetime); -+ CMD_ERR("Please refer to" -+ " http://en.wikipedia.org/wiki/ISO_8601#Duration" -+ " for examples of valid durations\n"); -+ return NULL; - } -- free_xml(cmd); -- return rc; -+ -+ now = crm_time_new(NULL); -+ later = crm_time_add(now, duration); -+ crm_time_log(LOG_INFO, "now ", now, -+ crm_time_log_date | crm_time_log_timeofday | crm_time_log_with_timezone); -+ crm_time_log(LOG_INFO, "later ", later, -+ crm_time_log_date | crm_time_log_timeofday | crm_time_log_with_timezone); -+ crm_time_log(LOG_INFO, "duration", duration, crm_time_log_date | crm_time_log_timeofday); -+ later_s = crm_time_as_string(later, crm_time_log_date | crm_time_log_timeofday); -+ printf("Migration will take effect until: %s\n", later_s); -+ -+ crm_time_free(duration); -+ crm_time_free(later); -+ crm_time_free(now); -+ return later_s; - } - - static int --move_resource(const char *rsc_id, -- const char *existing_node, const char *preferred_node, cib_t * cib_conn) -+ban_resource(const char *rsc_id, const char *host, GListPtr allnodes, cib_t * cib_conn) - { -- char *later_s = NULL; -+ char *later_s = parse_cli_lifetime(move_lifetime); - int rc = pcmk_ok; - char *id = NULL; -- xmlNode *rule = NULL; -- xmlNode *expr = NULL; -- xmlNode *constraints = NULL; - xmlNode *fragment = NULL; -+ xmlNode *location = NULL; - -- xmlNode *can_run = NULL; -- xmlNode *dont_run = NULL; -+ if(host == NULL) { -+ GListPtr n = allnodes; -+ for(; n && rc == pcmk_ok; n = n->next) { -+ node_t *target = n->data; -+ -+ rc = ban_resource(rsc_id, target->details->uname, NULL, cib_conn); -+ } -+ return rc; -+ } -+ -+ later_s = parse_cli_lifetime(move_lifetime); -+ if(move_lifetime && later_s == NULL) { -+ return -EINVAL; -+ } - - fragment = create_xml_node(NULL, XML_CIB_TAG_CONSTRAINTS); -- constraints = fragment; - -- id = crm_concat("cli-prefer", rsc_id, '-'); -- can_run = create_xml_node(NULL, XML_CONS_TAG_RSC_LOCATION); -- crm_xml_add(can_run, XML_ATTR_ID, id); -+ id = g_strdup_printf("cli-ban-%s-on-%s", rsc_id, host); -+ location = create_xml_node(fragment, XML_CONS_TAG_RSC_LOCATION); -+ crm_xml_add(location, XML_ATTR_ID, id); - free(id); - -- id = crm_concat("cli-standby", rsc_id, '-'); -- dont_run = create_xml_node(NULL, XML_CONS_TAG_RSC_LOCATION); -- crm_xml_add(dont_run, XML_ATTR_ID, id); -- free(id); -+ if (BE_QUIET == FALSE) { -+ CMD_ERR("WARNING: Creating rsc_location constraint '%s'" -+ " with a score of -INFINITY for resource %s" -+ " on %s.\n", ID(location), rsc_id, host); -+ CMD_ERR("\tThis will prevent %s from running" -+ " on %s until the constraint is removed using" -+ " the 'crm_resource --clear' command or manually" -+ " with cibadmin\n", rsc_id, host); -+ CMD_ERR("\tThis will be the case even if %s is" -+ " the last node in the cluster\n", host); -+ CMD_ERR("\tThis message can be disabled with --quiet\n"); -+ } - -- if (move_lifetime) { -- crm_time_t *now = NULL; -- crm_time_t *later = NULL; -- crm_time_t *duration = crm_time_parse_duration(move_lifetime); -- -- if (duration == NULL) { -- CMD_ERR("Invalid duration specified: %s\n", move_lifetime); -- CMD_ERR("Please refer to" -- " http://en.wikipedia.org/wiki/ISO_8601#Duration" -- " for examples of valid durations\n"); -- return -EINVAL; -- } -- now = crm_time_new(NULL); -- later = crm_time_add(now, duration); -- crm_time_log(LOG_INFO, "now ", now, -- crm_time_log_date | crm_time_log_timeofday | crm_time_log_with_timezone); -- crm_time_log(LOG_INFO, "later ", later, -- crm_time_log_date | crm_time_log_timeofday | crm_time_log_with_timezone); -- crm_time_log(LOG_INFO, "duration", duration, crm_time_log_date | crm_time_log_timeofday); -- later_s = crm_time_as_string(later, crm_time_log_date | crm_time_log_timeofday); -- printf("Migration will take effect until: %s\n", later_s); -- -- crm_time_free(duration); -- crm_time_free(later); -- crm_time_free(now); -- } -- -- if (existing_node == NULL) { -- crm_log_xml_notice(can_run, "Deleting"); -- rc = cib_conn->cmds->delete(cib_conn, XML_CIB_TAG_CONSTRAINTS, dont_run, cib_options); -- if (rc == -ENXIO) { -- rc = pcmk_ok; -- -- } else if (rc != pcmk_ok) { -- goto bail; -- } -+ crm_xml_add(location, XML_COLOC_ATTR_SOURCE, rsc_id); -+ -+ if (later_s == NULL) { -+ /* Short form */ -+ crm_xml_add(location, XML_CIB_TAG_NODE, host); -+ crm_xml_add(location, XML_RULE_ATTR_SCORE, MINUS_INFINITY_S); - - } else { -- if (BE_QUIET == FALSE) { -- fprintf(stderr, -- "WARNING: Creating rsc_location constraint '%s'" -- " with a score of -INFINITY for resource %s" -- " on %s.\n", ID(dont_run), rsc_id, existing_node); -- CMD_ERR("\tThis will prevent %s from running" -- " on %s until the constraint is removed using" -- " the 'crm_resource -U' command or manually" -- " with cibadmin\n", rsc_id, existing_node); -- CMD_ERR("\tThis will be the case even if %s is" -- " the last node in the cluster\n", existing_node); -- CMD_ERR("\tThis message can be disabled with -Q\n"); -- } -- -- crm_xml_add(dont_run, "rsc", rsc_id); -- -- rule = create_xml_node(dont_run, XML_TAG_RULE); -- expr = create_xml_node(rule, XML_TAG_EXPRESSION); -- id = crm_concat("cli-standby-rule", rsc_id, '-'); -+ xmlNode *rule = create_xml_node(location, XML_TAG_RULE); -+ xmlNode *expr = create_xml_node(rule, XML_TAG_EXPRESSION); -+ -+ id = g_strdup_printf("cli-ban-%s-on-%s-rule", rsc_id, host); - crm_xml_add(rule, XML_ATTR_ID, id); - free(id); - - crm_xml_add(rule, XML_RULE_ATTR_SCORE, MINUS_INFINITY_S); - crm_xml_add(rule, XML_RULE_ATTR_BOOLEAN_OP, "and"); - -- id = crm_concat("cli-standby-expr", rsc_id, '-'); -+ id = g_strdup_printf("cli-ban-%s-on-%s-expr", rsc_id, host); - crm_xml_add(expr, XML_ATTR_ID, id); - free(id); - - crm_xml_add(expr, XML_EXPR_ATTR_ATTRIBUTE, "#uname"); - crm_xml_add(expr, XML_EXPR_ATTR_OPERATION, "eq"); -- crm_xml_add(expr, XML_EXPR_ATTR_VALUE, existing_node); -+ crm_xml_add(expr, XML_EXPR_ATTR_VALUE, host); - crm_xml_add(expr, XML_EXPR_ATTR_TYPE, "string"); - -- if (later_s) { -- expr = create_xml_node(rule, "date_expression"); -- id = crm_concat("cli-standby-lifetime-end", rsc_id, '-'); -- crm_xml_add(expr, XML_ATTR_ID, id); -- free(id); -+ expr = create_xml_node(rule, "date_expression"); -+ id = g_strdup_printf("cli-ban-%s-on-%s-lifetime", rsc_id, host); -+ crm_xml_add(expr, XML_ATTR_ID, id); -+ free(id); - -- crm_xml_add(expr, "operation", "lt"); -- crm_xml_add(expr, "end", later_s); -- } -+ crm_xml_add(expr, "operation", "lt"); -+ crm_xml_add(expr, "end", later_s); -+ } -+ -+ crm_log_xml_notice(fragment, "Modify"); -+ rc = cib_conn->cmds->update(cib_conn, XML_CIB_TAG_CONSTRAINTS, fragment, cib_options); - -- add_node_copy(constraints, dont_run); -+ free_xml(fragment); -+ free(later_s); -+ return rc; -+} -+ -+static int -+prefer_resource(const char *rsc_id, const char *host, cib_t * cib_conn) -+{ -+ char *later_s = parse_cli_lifetime(move_lifetime); -+ int rc = pcmk_ok; -+ char *id = NULL; -+ xmlNode *location = NULL; -+ xmlNode *fragment = NULL; -+ -+ if(move_lifetime && later_s == NULL) { -+ return -EINVAL; - } - -- if (preferred_node == NULL) { -- crm_log_xml_notice(can_run, "Deleting"); -- rc = cib_conn->cmds->delete(cib_conn, XML_CIB_TAG_CONSTRAINTS, can_run, cib_options); -- if (rc == -ENXIO) { -- rc = pcmk_ok; -+ fragment = create_xml_node(NULL, XML_CIB_TAG_CONSTRAINTS); - -- } else if (rc != pcmk_ok) { -- goto bail; -- } -+ id = g_strdup_printf("cli-prefer-%s", rsc_id); -+ location = create_xml_node(fragment, XML_CONS_TAG_RSC_LOCATION); -+ crm_xml_add(location, XML_ATTR_ID, id); -+ free(id); -+ -+ crm_xml_add(location, XML_COLOC_ATTR_SOURCE, rsc_id); -+ -+ if (later_s == NULL) { -+ /* Short form */ -+ crm_xml_add(location, XML_CIB_TAG_NODE, host); -+ crm_xml_add(location, XML_RULE_ATTR_SCORE, INFINITY_S); - - } else { -- crm_xml_add(can_run, "rsc", rsc_id); -+ xmlNode *rule = create_xml_node(location, XML_TAG_RULE); -+ xmlNode *expr = create_xml_node(rule, XML_TAG_EXPRESSION); - -- rule = create_xml_node(can_run, XML_TAG_RULE); -- expr = create_xml_node(rule, XML_TAG_EXPRESSION); - id = crm_concat("cli-prefer-rule", rsc_id, '-'); - crm_xml_add(rule, XML_ATTR_ID, id); - free(id); -@@ -945,32 +966,73 @@ move_resource(const char *rsc_id, - - crm_xml_add(expr, XML_EXPR_ATTR_ATTRIBUTE, "#uname"); - crm_xml_add(expr, XML_EXPR_ATTR_OPERATION, "eq"); -- crm_xml_add(expr, XML_EXPR_ATTR_VALUE, preferred_node); -+ crm_xml_add(expr, XML_EXPR_ATTR_VALUE, host); - crm_xml_add(expr, XML_EXPR_ATTR_TYPE, "string"); - -- if (later_s) { -- expr = create_xml_node(rule, "date_expression"); -- id = crm_concat("cli-prefer-lifetime-end", rsc_id, '-'); -- crm_xml_add(expr, XML_ATTR_ID, id); -- free(id); -+ expr = create_xml_node(rule, "date_expression"); -+ id = crm_concat("cli-prefer-lifetime-end", rsc_id, '-'); -+ crm_xml_add(expr, XML_ATTR_ID, id); -+ free(id); -+ -+ crm_xml_add(expr, "operation", "lt"); -+ crm_xml_add(expr, "end", later_s); -+ } -+ -+ crm_log_xml_info(fragment, "Modify"); -+ rc = cib_conn->cmds->update(cib_conn, XML_CIB_TAG_CONSTRAINTS, fragment, cib_options); -+ -+ free_xml(fragment); -+ free(later_s); -+ return rc; -+} -+ -+static int -+clear_resource(const char *rsc_id, const char *host, GListPtr allnodes, cib_t * cib_conn) -+{ -+ char *id = NULL; -+ int rc = pcmk_ok; -+ xmlNode *fragment = NULL; -+ xmlNode *location = NULL; -+ -+ fragment = create_xml_node(NULL, XML_CIB_TAG_CONSTRAINTS); - -- crm_xml_add(expr, "operation", "lt"); -- crm_xml_add(expr, "end", later_s); -+ if(host) { -+ id = g_strdup_printf("cli-ban-%s-on-%s", rsc_id, host); -+ location = create_xml_node(fragment, XML_CONS_TAG_RSC_LOCATION); -+ crm_xml_add(location, XML_ATTR_ID, id); -+ free(id); -+ -+ } else { -+ GListPtr n = allnodes; -+ for(; n; n = n->next) { -+ node_t *target = n->data; -+ -+ id = g_strdup_printf("cli-ban-%s-on-%s", rsc_id, target->details->uname); -+ location = create_xml_node(fragment, XML_CONS_TAG_RSC_LOCATION); -+ crm_xml_add(location, XML_ATTR_ID, id); -+ free(id); - } -+ } - -- add_node_copy(constraints, can_run); -+ id = g_strdup_printf("cli-prefer-%s", rsc_id); -+ location = create_xml_node(fragment, XML_CONS_TAG_RSC_LOCATION); -+ crm_xml_add(location, XML_ATTR_ID, id); -+ if(host && do_force == FALSE) { -+ crm_xml_add(location, XML_CIB_TAG_NODE, host); - } -+ free(id); -+ -+ crm_log_xml_info(fragment, "Delete"); -+ rc = cib_conn->cmds->delete(cib_conn, XML_CIB_TAG_CONSTRAINTS, fragment, cib_options); -+ if (rc == -ENXIO) { -+ rc = pcmk_ok; - -- if (preferred_node != NULL || existing_node != NULL) { -- crm_log_xml_notice(fragment, "CLI Update"); -- rc = cib_conn->cmds->update(cib_conn, XML_CIB_TAG_CONSTRAINTS, fragment, cib_options); -+ } else if (rc != pcmk_ok) { -+ goto bail; - } - - bail: - free_xml(fragment); -- free_xml(dont_run); -- free_xml(can_run); -- free(later_s); - return rc; - } - -@@ -987,13 +1049,17 @@ list_resource_operations(const char *rsc_id, const char *host_uname, gboolean ac - xmlNode *xml_op = (xmlNode *) lpc->data; - - const char *op_rsc = crm_element_value(xml_op, "resource"); -- const char *last = crm_element_value(xml_op, "last_run"); -+ const char *last = crm_element_value(xml_op, XML_RSC_OP_LAST_CHANGE); - const char *status_s = crm_element_value(xml_op, XML_LRM_ATTR_OPSTATUS); - const char *op_key = crm_element_value(xml_op, XML_LRM_ATTR_TASK_KEY); - int status = crm_parse_int(status_s, "0"); - - rsc = pe_find_resource(data_set->resources, op_rsc); -- rsc->fns->print(rsc, "", opts, stdout); -+ if(rsc) { -+ rsc->fns->print(rsc, "", opts, stdout); -+ } else { -+ fprintf(stdout, "Unknown resource %s", op_rsc); -+ } - - fprintf(stdout, ": %s (node=%s, call=%s, rc=%s", - op_key ? op_key : ID(xml_op), -@@ -1003,8 +1069,8 @@ list_resource_operations(const char *rsc_id, const char *host_uname, gboolean ac - if (last) { - time_t run_at = crm_parse_int(last, "0"); - -- fprintf(stdout, ", last-run=%s, exec=%sms\n", -- ctime(&run_at), crm_element_value(xml_op, "exec_time")); -+ fprintf(stdout, ", last-rc-change=%s, exec=%sms\n", -+ ctime(&run_at), crm_element_value(xml_op, XML_RSC_OP_T_EXEC)); - } - fprintf(stdout, "): %s\n", services_lrm_status_str(status)); - } -@@ -1183,32 +1249,53 @@ static struct crm_option long_options[] = { - {"constraints",0, 0, 'a', "\tDisplay the (co)location constraints that apply to a resource"}, - - {"-spacer-", 1, 0, '-', "\nCommands:"}, -+ {"cleanup", 0, 0, 'C', "\t\tDelete the resource history and re-check the current state. Optional: --resource"}, - {"set-parameter", 1, 0, 'p', "Set the named parameter for a resource. See also -m, --meta"}, - {"get-parameter", 1, 0, 'g', "Display the named parameter for a resource. See also -m, --meta"}, - {"delete-parameter",1, 0, 'd', "Delete the named parameter for a resource. See also -m, --meta"}, - {"get-property", 1, 0, 'G', "Display the 'class', 'type' or 'provider' of a resource", 1}, - {"set-property", 1, 0, 'S', "(Advanced) Set the class, type or provider of a resource", 1}, -- {"move", 0, 0, 'M', -- "\t\tMove a resource from its current location, optionally specifying a destination (-N) and/or a period for which it should take effect (-u)" -- "\n\t\t\t\tIf -N is not specified, the cluster will force the resource to move by creating a rule for the current location and a score of -INFINITY" -- "\n\t\t\t\tNOTE: This will prevent the resource from running on this node until the constraint is removed with -U"}, -- {"un-move", 0, 0, 'U', "\t\tRemove all constraints created by a move command"}, -+ -+ {"-spacer-", 1, 0, '-', "\nResource location:"}, -+ { -+ "move", 0, 0, 'M', -+ "\t\tMove a resource from its current location to the named destination.\n " -+ "\t\t\t\tRequires: --host. Optional: --lifetime, --master\n\n" -+ "\t\t\t\tNOTE: This may prevent the resource from running on the previous location node until the implicit constraints expire or are removed with --unban\n" -+ }, -+ { -+ "ban", 0, 0, 'B', -+ "\t\tPrevent the named resource from running on the named --host. \n" -+ "\t\t\t\tRequires: --resource. Optional: --host, --lifetime, --master\n\n" -+ "\t\t\t\tIf --host is not specified, it defaults to:\n" -+ "\t\t\t\t * the curent location for primitives and groups, or\n\n" -+ "\t\t\t\t * the curent location of the master for m/s resources with master-max=1\n\n" -+ "\t\t\t\tAll other situations result in an error as there is no sane default.\n\n" -+ "\t\t\t\tNOTE: This will prevent the resource from running on this node until the constraint expires or is removed with --clear\n" -+ }, -+ { -+ "clear", 0, 0, 'U', "\t\tRemove all constraints created by the --ban and/or --move commands. \n" -+ "\t\t\t\tRequires: --resource. Optional: --host, --master\n\n" -+ "\t\t\t\tIf --host is not specified, all constraints created by --ban and --move will be removed for the named resource.\n" -+ }, -+ {"lifetime", 1, 0, 'u', "\tLifespan of constraints created by the --ban and --move commands"}, -+ { -+ "master", 0, 0, 0, -+ "\t\tLimit the scope of the --ban, --move and --clear commands to the Master role.\n" -+ "\t\t\t\tFor --ban and --move, the previous master can still remain active in the Slave role." -+ }, - - {"-spacer-", 1, 0, '-', "\nAdvanced Commands:"}, - {"delete", 0, 0, 'D', "\t\t(Advanced) Delete a resource from the CIB"}, - {"fail", 0, 0, 'F', "\t\t(Advanced) Tell the cluster this resource has failed"}, -- {"refresh", 0, 0, 'R', "\t\t(Advanced) Refresh the CIB from the LRM"}, -- {"cleanup", 0, 0, 'C', "\t\t(Advanced) Delete a resource from the LRM"}, -- {"reprobe", 0, 0, 'P', "\t\t(Advanced) Re-check for resources started outside of the CRM\n"}, -- {"force-stop", 0, 0, 0, "\t(Advanced) Bypass the cluster and stop a resource on the local node"}, -- {"force-start",0, 0, 0, "\t(Advanced) Bypass the cluster and start a resource on the local node"}, -- {"force-check",0, 0, 0, "\t(Advanced) Bypass the cluster and check the state of a resource on the local node\n"}, -+ {"force-stop", 0, 0, 0, "\t(Advanced) Bypass the cluster and stop a resource on the local node. Additional detail with -V"}, -+ {"force-start",0, 0, 0, "\t(Advanced) Bypass the cluster and start a resource on the local node. Additional detail with -V"}, -+ {"force-check",0, 0, 0, "\t(Advanced) Bypass the cluster and check the state of a resource on the local node. Additional detail with -V\n"}, - - {"-spacer-", 1, 0, '-', "\nAdditional Options:"}, - {"node", 1, 0, 'N', "\tHost uname"}, - {"resource-type", 1, 0, 't', "Resource type (primitive, clone, group, ...)"}, - {"parameter-value", 1, 0, 'v', "Value to use with -p, -g or -d"}, -- {"lifetime", 1, 0, 'u', "\tLifespan of migration constraints\n"}, - {"meta", 0, 0, 'm', "\t\tModify a resource's configuration option rather than one which is passed to the resource agent script. For use with -p, -g, -d"}, - {"utilization", 0, 0, 'z', "\tModify a resource's utilization attribute. For use with -p, -g, -d"}, - {"set-name", 1, 0, 's', "\t(Advanced) ID of the instance_attributes object to change"}, -@@ -1224,6 +1311,10 @@ static struct crm_option long_options[] = { - {"host-uname", 1, 0, 'H', NULL, 1}, - {"migrate", 0, 0, 'M', NULL, 1}, - {"un-migrate", 0, 0, 'U', NULL, 1}, -+ {"un-move", 0, 0, 'U', NULL, 1}, -+ -+ {"refresh", 0, 0, 'R', NULL, 1}, -+ {"reprobe", 0, 0, 'P', NULL, 1}, - - {"-spacer-", 1, 0, '-', "\nExamples:", pcmk_option_paragraph}, - {"-spacer-", 1, 0, '-', "List the configured resources:", pcmk_option_paragraph}, -@@ -1263,10 +1354,10 @@ main(int argc, char **argv) - const char *longname = NULL; - pe_working_set_t data_set; - xmlNode *cib_xml_copy = NULL; -- - cib_t *cib_conn = NULL; -- int rc = pcmk_ok; -+ bool do_trace = FALSE; - -+ int rc = pcmk_ok; - int option_index = 0; - int argerr = 0; - int flag; -@@ -1286,7 +1377,10 @@ main(int argc, char **argv) - - switch (flag) { - case 0: -- if (safe_str_eq("force-stop", longname) -+ if (safe_str_eq("master", longname)) { -+ scope_master = TRUE; -+ -+ } else if (safe_str_eq("force-stop", longname) - || safe_str_eq("force-start", longname) - || safe_str_eq("force-check", longname)) { - rsc_cmd = flag; -@@ -1396,6 +1490,7 @@ main(int argc, char **argv) - } - break; - case 'V': -+ do_trace = TRUE; - crm_bump_log_level(argc, argv); - break; - case '$': -@@ -1435,9 +1530,10 @@ main(int argc, char **argv) - case 't': - rsc_type = optarg; - break; -+ case 'C': - case 'R': - case 'P': -- rsc_cmd = flag; -+ rsc_cmd = 'C'; - break; - case 'L': - case 'c': -@@ -1446,10 +1542,10 @@ main(int argc, char **argv) - case 'w': - case 'D': - case 'F': -- case 'C': - case 'W': - case 'M': - case 'U': -+ case 'B': - case 'O': - case 'o': - case 'A': -@@ -1507,7 +1603,7 @@ main(int argc, char **argv) - } - - set_working_set_defaults(&data_set); -- if (rsc_cmd != 'P') { -+ if (rsc_cmd != 'P' || rsc_id) { - resource_t *rsc = NULL; - - cib_conn = cib_new(); -@@ -1536,7 +1632,7 @@ main(int argc, char **argv) - if (rsc_id) { - rsc = find_rsc_or_clone(rsc_id, &data_set); - } -- if (rsc == NULL) { -+ if (rsc == NULL && rsc_cmd != 'C') { - rc = -ENXIO; - } - } -@@ -1606,11 +1702,26 @@ main(int argc, char **argv) - rclass = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS); - rprov = crm_element_value(rsc->xml, XML_AGENT_ATTR_PROVIDER); - rtype = crm_element_value(rsc->xml, XML_ATTR_TYPE); -- params = generate_resource_params(rsc, &data_set); - -+ if(safe_str_eq(rclass, "stonith")){ -+ CMD_ERR("Sorry, --%s doesn't support %s resources yet\n", rsc_long_cmd, rclass); -+ crm_exit(EOPNOTSUPP); -+ } -+ -+ params = generate_resource_params(rsc, &data_set); - op = resources_action_create(rsc->id, rclass, rprov, rtype, action, 0, -1, params); - -- if (services_action_sync(op)) { -+ if(do_trace) { -+ setenv("OCF_TRACE_RA", "1", 1); -+ } -+ -+ if(op == NULL) { -+ /* Re-run but with stderr enabled so we can display a sane error message */ -+ crm_enable_stderr(TRUE); -+ resources_action_create(rsc->id, rclass, rprov, rtype, action, 0, -1, params); -+ return crm_exit(EINVAL); -+ -+ } else if (services_action_sync(op)) { - int more, lpc, last; - char *local_copy = NULL; - -@@ -1700,14 +1811,6 @@ main(int argc, char **argv) - } - print_cts_constraints(&data_set); - -- } else if (rsc_cmd == 'C') { -- resource_t *rsc = pe_find_resource(data_set.resources, rsc_id); -- -- rc = delete_lrm_rsc(crmd_channel, host_uname, rsc, &data_set); -- if (rc == pcmk_ok) { -- start_mainloop(); -- } -- - } else if (rsc_cmd == 'F') { - rc = fail_lrm_rsc(crmd_channel, host_uname, rsc_id, &data_set); - if (rc == pcmk_ok) { -@@ -1721,7 +1824,7 @@ main(int argc, char **argv) - rc = list_resource_operations(rsc_id, host_uname, FALSE, &data_set); - - } else if (rc == -ENXIO) { -- CMD_ERR("Resource %s not found: %s\n", crm_str(rsc_id), pcmk_strerror(rc)); -+ CMD_ERR("Resource '%s' not found: %s\n", crm_str(rsc_id), pcmk_strerror(rc)); - - } else if (rsc_cmd == 'W') { - if (rsc_id == NULL) { -@@ -1748,57 +1851,141 @@ main(int argc, char **argv) - rc = dump_resource(rsc_id, &data_set, FALSE); - - } else if (rsc_cmd == 'U') { -+ node_t *dest = NULL; -+ - if (rsc_id == NULL) { -- CMD_ERR("Must supply a resource id with -r\n"); -+ CMD_ERR("No value specified for --resource\n"); - rc = -ENXIO; - goto bail; - } -- /* coverity[var_deref_model] False positive */ -- rc = move_resource(rsc_id, NULL, NULL, cib_conn); - -- } else if (rsc_cmd == 'M') { -- node_t *dest = NULL; -- node_t *current = NULL; -- const char *current_uname = NULL; -+ if (host_uname) { -+ dest = pe_find_node(data_set.nodes, host_uname); -+ if (dest == NULL) { -+ CMD_ERR("Unknown node: %s\n", host_uname); -+ rc = -ENXIO; -+ goto bail; -+ } -+ rc = clear_resource(rsc_id, dest->details->uname, NULL, cib_conn); -+ -+ } else { -+ rc = clear_resource(rsc_id, NULL, data_set.nodes, cib_conn); -+ } -+ -+ } else if (rsc_cmd == 'M' && host_uname) { - resource_t *rsc = pe_find_resource(data_set.resources, rsc_id); -+ node_t *dest = pe_find_node(data_set.nodes, host_uname); -+ -+ rc = -EINVAL; -+ -+ if (rsc == NULL) { -+ CMD_ERR("Resource '%s' not moved: not found\n", rsc_id); -+ rc = -ENXIO; -+ goto bail; -+ -+ } else if(rsc->variant == pe_clone) { -+ CMD_ERR("Resource '%s' not moved: moving a clone makes no sense\n", rsc_id); -+ goto bail; -+ -+ } else if (rsc->variant < pe_clone && g_list_length(rsc->running_on) > 1) { -+ CMD_ERR("Resource '%s' not moved: active on multiple nodes\n", rsc_id); -+ goto bail; -+ } - -- if (rsc != NULL && rsc->running_on != NULL) { -- current = rsc->running_on->data; -- if (current != NULL) { -- current_uname = current->details->uname; -+ if(dest == NULL) { -+ CMD_ERR("Error performing operation: node '%s' is unknown\n", host_uname); -+ rc = -ENXIO; -+ goto bail; -+ } -+ -+ if(g_list_length(rsc->running_on) == 1) { -+ node_t *current = rsc->running_on->data; -+ -+ if (safe_str_eq(current->details->uname, dest->details->uname)) { -+ CMD_ERR("Error performing operation: %s is already active on %s\n", rsc_id, dest->details->uname); -+ goto bail; - } -+ /* } else if (rsc->variant == pe_master) { Find the master and ban it */ - } - -- if (host_uname != NULL) { -- dest = pe_find_node(data_set.nodes, host_uname); -+ /* Clear any previous constraints for 'dest' */ -+ clear_resource(rsc_id, dest->details->uname, data_set.nodes, cib_conn); -+ -+ /* Record an explicit preference for 'dest' */ -+ rc = prefer_resource(rsc_id, dest->details->uname, cib_conn); -+ -+ if(do_force && g_list_length(rsc->running_on) == 1) { -+ node_t *current = rsc->running_on->data; -+ -+ /* Ban the original location */ -+ ban_resource(rsc_id, current->details->uname, NULL, cib_conn); - } - -- if (rsc == NULL) { -- CMD_ERR("Resource %s not moved: not found\n", rsc_id); -+ } else if (rsc_cmd == 'B' && host_uname) { -+ resource_t *rsc = pe_find_resource(data_set.resources, rsc_id); -+ node_t *dest = pe_find_node(data_set.nodes, host_uname); - -- } else if (rsc->variant == pe_native && g_list_length(rsc->running_on) > 1) { -- CMD_ERR("Resource %s not moved: active on multiple nodes\n", rsc_id); -+ rc = -ENXIO; -+ if (rsc_id == NULL) { -+ CMD_ERR("No value specified for --resource\n"); -+ goto bail; -+ } else if(rsc == NULL) { -+ CMD_ERR("Resource '%s' not moved: unknown\n", rsc_id); - -- } else if (host_uname != NULL && dest == NULL) { -- CMD_ERR("Error performing operation: %s is not a known node\n", host_uname); -- rc = -ENXIO; -+ } else if (dest == NULL) { -+ CMD_ERR("Error performing operation: node '%s' is unknown\n", host_uname); -+ goto bail; -+ } -+ rc = ban_resource(rsc_id, dest->details->uname, NULL, cib_conn); -+ -+ } else if (rsc_cmd == 'B' || rsc_cmd == 'M') { -+ resource_t *rsc = pe_find_resource(data_set.resources, rsc_id); -+ -+ rc = -ENXIO; -+ if (rsc_id == NULL) { -+ CMD_ERR("No value specified for --resource\n"); -+ goto bail; -+ } - -- } else if (host_uname != NULL && safe_str_eq(current_uname, host_uname)) { -- CMD_ERR("Error performing operation: " -- "%s is already active on %s\n", rsc_id, host_uname); -+ rc = -EINVAL; -+ if(rsc == NULL) { -+ CMD_ERR("Resource '%s' not moved: unknown\n", rsc_id); -+ -+ } else if(g_list_length(rsc->running_on) == 1) { -+ node_t *current = rsc->running_on->data; -+ rc = ban_resource(rsc_id, current->details->uname, NULL, cib_conn); -+ -+ } else if(scope_master && rsc->variant == pe_master) { -+ int count = 0; -+ GListPtr iter = NULL; -+ node_t *current = NULL; -+ -+ for(iter = rsc->children; iter; iter = iter->next) { -+ resource_t *child = (resource_t *)iter->data; -+ if(child->role == RSC_ROLE_MASTER) { -+ count++; -+ current = child->running_on->data; -+ } -+ } - -- } else if (current_uname != NULL && (do_force || host_uname == NULL)) { -- /* coverity[var_deref_model] False positive */ -- rc = move_resource(rsc_id, current_uname, host_uname, cib_conn); -+ if(count == 1 && current) { -+ rc = ban_resource(rsc_id, current->details->uname, NULL, cib_conn); - -- } else if (host_uname != NULL) { -- /* coverity[var_deref_model] False positive */ -- rc = move_resource(rsc_id, NULL, host_uname, cib_conn); -+ } else { -+ CMD_ERR("Resource '%s' not moved: currently promoted in %d locations.\n", rsc_id, count); -+ CMD_ERR("You can prevent '%s' from being promoted at a specific location with:" -+ " --ban --master --host \n", rsc_id); -+ } - - } else { -- CMD_ERR("Resource %s not moved: not-active and no preferred location specified.\n", -- rsc_id); -- rc = -EINVAL; -+ CMD_ERR("Resource '%s' not moved: active in %d locations.\n", rsc_id, g_list_length(rsc->running_on)); -+ CMD_ERR("You can prevent '%s' from running on a specific location with: --ban --host \n", rsc_id); -+ -+ if(rsc->variant == pe_master && g_list_length(rsc->running_on) > 0) { -+ CMD_ERR("You can prevent '%s' from being promoted at its current location with: --ban --master\n", rsc_id); -+ CMD_ERR("You can prevent '%s' from being promoted at a specific location with:" -+ " --ban --master --host \n", rsc_id); -+ } - } - - } else if (rsc_cmd == 'G') { -@@ -1870,22 +2057,31 @@ main(int argc, char **argv) - /* coverity[var_deref_model] False positive */ - rc = delete_resource_attr(rsc_id, prop_set, prop_id, prop_name, cib_conn, &data_set); - -- } else if (rsc_cmd == 'P') { -+ } else if (rsc_cmd == 'C' && rsc_id) { -+ resource_t *rsc = pe_find_resource(data_set.resources, rsc_id); -+ -+ crm_debug("Re-checking the state of %s on %s", rsc_id, host_uname); -+ if(rsc) { -+ rc = delete_lrm_rsc(cib_conn, crmd_channel, host_uname, rsc, &data_set); -+ } else { -+ rc = -ENODEV; -+ } -+ -+ if (rc == pcmk_ok) { -+ start_mainloop(); -+ } -+ -+ } else if (rsc_cmd == 'C') { - xmlNode *cmd = create_request(CRM_OP_REPROBE, NULL, host_uname, - CRM_SYSTEM_CRMD, crm_system_name, our_pid); - -+ crm_debug("Re-checking the state of all resources on %s", host_uname); - if (crm_ipc_send(crmd_channel, cmd, 0, 0, NULL) > 0) { - start_mainloop(); - } - - free_xml(cmd); - -- } else if (rsc_cmd == 'R') { -- rc = refresh_lrm(crmd_channel, host_uname); -- if (rc == pcmk_ok) { -- start_mainloop(); -- } -- - } else if (rsc_cmd == 'D') { - xmlNode *msg_data = NULL; - -diff --git a/tools/crm_simulate.c b/tools/crm_simulate.c -index 38ebe26..6b9835b 100644 ---- a/tools/crm_simulate.c -+++ b/tools/crm_simulate.c -@@ -1,16 +1,16 @@ --/* -+/* - * Copyright (C) 2009 Andrew Beekhof -- * -+ * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. -- * -+ * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. -- * -+ * - * You should have received a copy of the GNU General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -@@ -46,8 +46,6 @@ gboolean bringing_nodes_online = FALSE; - #define op_template "//"XML_CIB_TAG_STATE"[@uname='%s']//"XML_LRM_TAG_RESOURCE"[@id='%s']/"XML_LRM_TAG_RSC_OP"[@id='%s']" - /* #define op_template "//"XML_CIB_TAG_STATE"[@uname='%s']//"XML_LRM_TAG_RESOURCE"[@id='%s']/"XML_LRM_TAG_RSC_OP"[@id='%s' and @"XML_LRM_ATTR_CALLID"='%d']" */ - --#define FAKE_TE_ID "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -- - #define quiet_log(fmt, args...) do { \ - if(quiet == FALSE) { \ - printf(fmt , ##args); \ -@@ -135,14 +133,14 @@ inject_node_state(cib_t * cib_conn, const char *node, const char *uuid) - if (cib_object && ID(cib_object) == NULL) { - crm_err("Detected multiple node_state entries for xpath=%s, bailing", xpath); - crm_log_xml_warn(cib_object, "Duplicates"); -- crm_exit(1); -+ crm_exit(ENOTUNIQ); - } - - if (rc == -ENXIO) { - char *found_uuid = NULL; - - if (uuid == NULL) { -- query_node_uuid(cib_conn, node, &found_uuid); -+ query_node_uuid(cib_conn, node, &found_uuid, NULL); - } else { - found_uuid = strdup(uuid); - } -@@ -243,6 +241,9 @@ inject_resource(xmlNode * cib_node, const char *resource, const char *rclass, co - } else if (safe_str_neq(rclass, "ocf") - && safe_str_neq(rclass, "stonith") - && safe_str_neq(rclass, "heartbeat") -+ && safe_str_neq(rclass, "service") -+ && safe_str_neq(rclass, "upstart") -+ && safe_str_neq(rclass, "systemd") - && safe_str_neq(rclass, "lsb")) { - fprintf(stderr, "Invalid class for %s: %s\n", resource, rclass); - return NULL; -@@ -294,6 +295,8 @@ create_op(xmlNode * cib_resource, const char *task, int interval, int outcome) - op->rc = outcome; - op->op_status = 0; - op->params = NULL; /* TODO: Fill me in */ -+ op->t_run = time(NULL); -+ op->t_rcchange = op->t_run; - - op->call_id = 0; - for (xop = __xml_first_child(cib_resource); xop != NULL; xop = __xml_next(xop)) { -@@ -362,7 +365,7 @@ exec_rsc_action(crm_graph_t * graph, crm_action_t * action) - GListPtr gIter = NULL; - lrmd_event_data_t *op = NULL; - int target_outcome = 0; -- gboolean uname_is_uuid = TRUE; -+ gboolean uname_is_uuid = FALSE; - - const char *rtype = NULL; - const char *rclass = NULL; -@@ -376,6 +379,7 @@ exec_rsc_action(crm_graph_t * graph, crm_action_t * action) - xmlNode *action_rsc = first_named_child(action->xml, XML_CIB_TAG_RESOURCE); - - char *node = crm_element_value_copy(action->xml, XML_LRM_ATTR_TARGET); -+ char *uuid = crm_element_value_copy(action->xml, XML_LRM_ATTR_TARGET_UUID); - const char *router_node = crm_element_value(action->xml, XML_LRM_ATTR_ROUTER_NODE); - - if (safe_str_eq(operation, "probe_complete")) { -@@ -385,7 +389,7 @@ exec_rsc_action(crm_graph_t * graph, crm_action_t * action) - - if (action_rsc == NULL) { - crm_log_xml_err(action->xml, "Bad"); -- free(node); -+ free(node); free(uuid); - return FALSE; - } - -@@ -422,7 +426,7 @@ exec_rsc_action(crm_graph_t * graph, crm_action_t * action) - uname_is_uuid = TRUE; - } - -- cib_node = inject_node_state(global_cib, node, uname_is_uuid ? node : NULL); -+ cib_node = inject_node_state(global_cib, node, uname_is_uuid ? node : uuid); - CRM_ASSERT(cib_node != NULL); - - cib_resource = inject_resource(cib_node, resource, rclass, rtype, rprovider); -@@ -464,7 +468,7 @@ exec_rsc_action(crm_graph_t * graph, crm_action_t * action) - CRM_ASSERT(rc == pcmk_ok); - - done: -- free(node); -+ free(node); free(uuid); - free_xml(cib_node); - action->confirmed = TRUE; - update_graph(graph, action); -@@ -476,10 +480,15 @@ exec_crmd_action(crm_graph_t * graph, crm_action_t * action) - { - const char *node = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); - const char *task = crm_element_value(action->xml, XML_LRM_ATTR_TASK); -+ xmlNode *rsc = first_named_child(action->xml, XML_CIB_TAG_RESOURCE); - - action->confirmed = TRUE; - -- quiet_log(" * Cluster action: %s on %s\n", task, node); -+ if(rsc) { -+ quiet_log(" * Cluster action: %s for %s on %s\n", task, ID(rsc), node); -+ } else { -+ quiet_log(" * Cluster action: %s on %s\n", task, node); -+ } - update_graph(graph, action); - return TRUE; - } -@@ -528,6 +537,13 @@ print_cluster_status(pe_working_set_t * data_set) - for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) { - node_t *node = (node_t *) gIter->data; - const char *node_mode = NULL; -+ char *node_name = NULL; -+ -+ if(node->details->remote_rsc) { -+ node_name = g_strdup_printf("%s:%s", node->details->uname, node->details->remote_rsc->container->id); -+ } else { -+ node_name = g_strdup_printf("%s", node->details->uname); -+ } - - if (node->details->unclean) { - if (node->details->online && node->details->unclean) { -@@ -555,20 +571,23 @@ print_cluster_status(pe_working_set_t * data_set) - - } else if (node->details->online) { - node_mode = "online"; -- online_nodes = add_list_element(online_nodes, node->details->uname); -+ online_nodes = add_list_element(online_nodes, node_name); -+ free(node_name); - continue; - - } else { - node_mode = "OFFLINE"; -- offline_nodes = add_list_element(offline_nodes, node->details->uname); -+ offline_nodes = add_list_element(offline_nodes, node_name); -+ free(node_name); - continue; - } - -- if (safe_str_eq(node->details->uname, node->details->id)) { -- printf("Node %s: %s\n", node->details->uname, node_mode); -+ if (safe_str_eq(node_name, node->details->id)) { -+ printf("Node %s: %s\n", node_name, node_mode); - } else { -- printf("Node %s (%s): %s\n", node->details->uname, node->details->id, node_mode); -+ printf("Node %s (%s): %s\n", node_name, node->details->id, node_mode); - } -+ free(node_name); - } - - if (online_nodes) { -@@ -925,6 +944,7 @@ modify_configuration(pe_working_set_t * data_set, - rc = global_cib->cmds->modify(global_cib, XML_CIB_TAG_STATUS, cib_node, - cib_sync_call | cib_scope_local); - CRM_ASSERT(rc == pcmk_ok); -+ free_xml(cib_node); - } - - for (gIter = node_down; gIter != NULL; gIter = gIter->next) { -@@ -937,6 +957,7 @@ modify_configuration(pe_working_set_t * data_set, - rc = global_cib->cmds->modify(global_cib, XML_CIB_TAG_STATUS, cib_node, - cib_sync_call | cib_scope_local); - CRM_ASSERT(rc == pcmk_ok); -+ free_xml(cib_node); - } - - for (gIter = node_fail; gIter != NULL; gIter = gIter->next) { -@@ -950,6 +971,7 @@ modify_configuration(pe_working_set_t * data_set, - rc = global_cib->cmds->modify(global_cib, XML_CIB_TAG_STATUS, cib_node, - cib_sync_call | cib_scope_local); - CRM_ASSERT(rc == pcmk_ok); -+ free_xml(cib_node); - } - - for (gIter = ticket_grant; gIter != NULL; gIter = gIter->next) { -@@ -1077,7 +1099,7 @@ setup_input(const char *input, const char *output) - - if (cib_object == NULL) { - fprintf(stderr, "Live CIB query failed: empty result\n"); -- crm_exit(3); -+ crm_exit(ENOTCONN); - } - - } else if (safe_str_eq(input, "-")) { -@@ -1093,12 +1115,12 @@ setup_input(const char *input, const char *output) - - if (cli_config_update(&cib_object, NULL, FALSE) == FALSE) { - free_xml(cib_object); -- crm_exit(-ENOKEY); -+ crm_exit(ENOKEY); - } - - if (validate_xml(cib_object, NULL, FALSE) != TRUE) { - free_xml(cib_object); -- crm_exit(-pcmk_err_dtd_validation); -+ crm_exit(pcmk_err_dtd_validation); - } - - if (output == NULL) { -@@ -1158,13 +1180,13 @@ static struct crm_option long_options[] = { - {"ticket-activate", 1, 0, 'e', "Activate a ticket"}, - - {"-spacer-", 0, 0, '-', "\nOutput Options:"}, -- -+ - {"save-input", 1, 0, 'I', "\tSave the input configuration to the named file"}, - {"save-output", 1, 0, 'O', "Save the output configuration to the named file"}, - {"save-graph", 1, 0, 'G', "\tSave the transition graph (XML format) to the named file"}, - {"save-dotfile", 1, 0, 'D', "Save the transition graph (DOT format) to the named file"}, - {"all-actions", 0, 0, 'a', "\tDisplay all possible actions in the DOT graph - even ones not part of the transition"}, -- -+ - {"-spacer-", 0, 0, '-', "\nData Source:"}, - {"live-check", 0, 0, 'L', "\tConnect to the CIB and use the current contents as input"}, - {"xml-file", 1, 0, 'x', "\tRetrieve XML from the named file"}, -@@ -1175,7 +1197,7 @@ static struct crm_option long_options[] = { - {"-spacer-", 0, 0, '-', " crm_simulate -LS --op-inject memcached:0_monitor_20000@bart.example.com=7 --op-fail memcached:0_stop_0@fred.example.com=1 --save-output /tmp/memcached-test.xml", pcmk_option_example}, - {"-spacer-", 0, 0, '-', "Now see what the reaction to the stop failure would be", pcmk_option_paragraph}, - {"-spacer-", 0, 0, '-', " crm_simulate -S --xml-file /tmp/memcached-test.xml", pcmk_option_example}, -- -+ - {0, 0, 0, 0} - }; - /* *INDENT-ON* */ -@@ -1495,20 +1517,7 @@ main(int argc, char **argv) - input = NULL; /* Don't try and free it twice */ - - if (graph_file != NULL) { -- char *msg_buffer = dump_xml_formatted(data_set.graph); -- FILE *graph_strm = fopen(graph_file, "w"); -- -- if (graph_strm == NULL) { -- crm_perror(LOG_ERR, "Could not open %s for writing", graph_file); -- -- } else { -- if (fprintf(graph_strm, "%s\n", msg_buffer) < 0) { -- crm_perror(LOG_ERR, "Write to %s failed", graph_file); -- } -- fflush(graph_strm); -- fclose(graph_strm); -- } -- free(msg_buffer); -+ write_xml_file(data_set.graph, graph_file, FALSE); - } - - if (dot_file != NULL) { -diff --git a/tools/crmadmin.c b/tools/crmadmin.c -index ca0a318..c7ac30f 100644 ---- a/tools/crmadmin.c -+++ b/tools/crmadmin.c -@@ -391,7 +391,7 @@ crmadmin_ipc_connection_destroy(gpointer user_data) - if (mainloop) { - g_main_quit(mainloop); - } else { -- crm_exit(1); -+ crm_exit(ENOTCONN); - } - } - -@@ -503,7 +503,7 @@ admin_msg_callback(const char *buffer, ssize_t length, gpointer userdata) - if (BE_SILENT && dc != NULL) { - fprintf(stderr, "%s\n", dc); - } -- crm_exit(0); -+ crm_exit(pcmk_ok); - } - } - -@@ -512,7 +512,7 @@ admin_msg_callback(const char *buffer, ssize_t length, gpointer userdata) - if (received_responses >= expected_responses) { - crm_trace("Received expected number (%d) of messages from Heartbeat." - " Exiting normally.", expected_responses); -- crm_exit(0); -+ crm_exit(pcmk_ok); - } - - message_timer_id = g_timeout_add(message_timeout_ms, admin_message_timeout, NULL); -diff --git a/tools/ipmiservicelogd.c b/tools/ipmiservicelogd.c -index 07acad5..b24c7bc 100644 ---- a/tools/ipmiservicelogd.c -+++ b/tools/ipmiservicelogd.c -@@ -511,7 +511,7 @@ entity_change(enum ipmi_update_e op, ipmi_domain_t * domain, ipmi_entity_t * ent - rv = ipmi_entity_add_sensor_update_handler(entity, sensor_change, entity); - if (rv) { - crm_err("ipmi_entity_set_sensor_update_handler: 0x%x", rv); -- crm_exit(1); -+ crm_exit(pcmk_err_generic); - } - } - } -@@ -548,7 +548,7 @@ main(int argc, char *argv[]) - os_hnd = ipmi_posix_setup_os_handler(); - if (!os_hnd) { - crm_err("ipmi_smi_setup_con: Unable to allocate os handler"); -- crm_exit(1); -+ crm_exit(pcmk_err_generic); - } - - /* Initialize the OpenIPMI library. */ -@@ -559,7 +559,7 @@ main(int argc, char *argv[]) - if (rv) { - crm_err("Error parsing command arguments, argument %d: %s", curr_arg, strerror(rv)); - usage(argv[0]); -- crm_exit(1); -+ crm_exit(pcmk_err_generic); - } - #endif - -@@ -572,7 +572,7 @@ main(int argc, char *argv[]) - if (rv) { - crm_err("ipmi_ip_setup_con: %s", strerror(rv)); - crm_err("Error: Is IPMI configured correctly?"); -- crm_exit(1); -+ crm_exit(pcmk_err_generic); - } - #else - /* If all you need is an SMI connection, this is all the code you -@@ -587,14 +587,14 @@ main(int argc, char *argv[]) - if (rv) { - crm_err("ipmi_smi_setup_con: %s", strerror(rv)); - crm_err("Error: Is IPMI configured correctly?"); -- crm_exit(1); -+ crm_exit(pcmk_err_generic); - } - #endif - - rv = ipmi_open_domain("", &con, 1, setup_done, NULL, NULL, NULL, NULL, 0, NULL); - if (rv) { - crm_err("ipmi_init_domain: %s", strerror(rv)); -- crm_exit(1); -+ crm_exit(pcmk_err_generic); - } - - /* This is the main loop of the event-driven program. -diff --git a/tools/regression.exp b/tools/regression.exp -index 1e9bd44..ef27353 100755 ---- a/tools/regression.exp -+++ b/tools/regression.exp -@@ -1,103 +1,244 @@ -+=#=#=#= Begin test: 2006-W01-7 =#=#=#= - Date: 2006-01-08 00:00:00Z --Date: 2006-W01-7 00:00:00Z -+=#=#=#= End test: 2006-W01-7 - OK (0) =#=#=#= - * Passed: iso8601 - 2006-W01-7 -+=#=#=#= Begin test: 2006-W01-7 - round-trip =#=#=#= -+Date: 2006-W01-7 00:00:00Z -+=#=#=#= End test: 2006-W01-7 - round-trip - OK (0) =#=#=#= -+* Passed: iso8601 - 2006-W01-7 - round-trip -+=#=#=#= Begin test: 2006-W01-1 =#=#=#= - Date: 2006-01-02 00:00:00Z --Date: 2006-W01-1 00:00:00Z -+=#=#=#= End test: 2006-W01-1 - OK (0) =#=#=#= - * Passed: iso8601 - 2006-W01-1 -+=#=#=#= Begin test: 2006-W01-1 - round-trip =#=#=#= -+Date: 2006-W01-1 00:00:00Z -+=#=#=#= End test: 2006-W01-1 - round-trip - OK (0) =#=#=#= -+* Passed: iso8601 - 2006-W01-1 - round-trip -+=#=#=#= Begin test: 2007-W01-7 =#=#=#= - Date: 2007-01-07 00:00:00Z --Date: 2007-W01-7 00:00:00Z -+=#=#=#= End test: 2007-W01-7 - OK (0) =#=#=#= - * Passed: iso8601 - 2007-W01-7 -+=#=#=#= Begin test: 2007-W01-7 - round-trip =#=#=#= -+Date: 2007-W01-7 00:00:00Z -+=#=#=#= End test: 2007-W01-7 - round-trip - OK (0) =#=#=#= -+* Passed: iso8601 - 2007-W01-7 - round-trip -+=#=#=#= Begin test: 2007-W01-1 =#=#=#= - Date: 2007-01-01 00:00:00Z --Date: 2007-W01-1 00:00:00Z -+=#=#=#= End test: 2007-W01-1 - OK (0) =#=#=#= - * Passed: iso8601 - 2007-W01-1 -+=#=#=#= Begin test: 2007-W01-1 - round-trip =#=#=#= -+Date: 2007-W01-1 00:00:00Z -+=#=#=#= End test: 2007-W01-1 - round-trip - OK (0) =#=#=#= -+* Passed: iso8601 - 2007-W01-1 - round-trip -+=#=#=#= Begin test: 2008-W01-7 =#=#=#= - Date: 2008-01-06 00:00:00Z --Date: 2008-W01-7 00:00:00Z -+=#=#=#= End test: 2008-W01-7 - OK (0) =#=#=#= - * Passed: iso8601 - 2008-W01-7 -+=#=#=#= Begin test: 2008-W01-7 - round-trip =#=#=#= -+Date: 2008-W01-7 00:00:00Z -+=#=#=#= End test: 2008-W01-7 - round-trip - OK (0) =#=#=#= -+* Passed: iso8601 - 2008-W01-7 - round-trip -+=#=#=#= Begin test: 2008-W01-1 =#=#=#= - Date: 2007-12-31 00:00:00Z --Date: 2008-W01-1 00:00:00Z -+=#=#=#= End test: 2008-W01-1 - OK (0) =#=#=#= - * Passed: iso8601 - 2008-W01-1 -+=#=#=#= Begin test: 2008-W01-1 - round-trip =#=#=#= -+Date: 2008-W01-1 00:00:00Z -+=#=#=#= End test: 2008-W01-1 - round-trip - OK (0) =#=#=#= -+* Passed: iso8601 - 2008-W01-1 - round-trip -+=#=#=#= Begin test: 2009-W01-7 =#=#=#= - Date: 2009-01-04 00:00:00Z --Date: 2009-W01-7 00:00:00Z -+=#=#=#= End test: 2009-W01-7 - OK (0) =#=#=#= - * Passed: iso8601 - 2009-W01-7 -+=#=#=#= Begin test: 2009-W01-7 - round-trip =#=#=#= -+Date: 2009-W01-7 00:00:00Z -+=#=#=#= End test: 2009-W01-7 - round-trip - OK (0) =#=#=#= -+* Passed: iso8601 - 2009-W01-7 - round-trip -+=#=#=#= Begin test: 2009-W01-1 =#=#=#= - Date: 2008-12-29 00:00:00Z --Date: 2009-W01-1 00:00:00Z -+=#=#=#= End test: 2009-W01-1 - OK (0) =#=#=#= - * Passed: iso8601 - 2009-W01-1 -+=#=#=#= Begin test: 2009-W01-1 - round-trip =#=#=#= -+Date: 2009-W01-1 00:00:00Z -+=#=#=#= End test: 2009-W01-1 - round-trip - OK (0) =#=#=#= -+* Passed: iso8601 - 2009-W01-1 - round-trip -+=#=#=#= Begin test: 2010-W01-7 =#=#=#= - Date: 2010-01-10 00:00:00Z --Date: 2010-W01-7 00:00:00Z -+=#=#=#= End test: 2010-W01-7 - OK (0) =#=#=#= - * Passed: iso8601 - 2010-W01-7 -+=#=#=#= Begin test: 2010-W01-7 - round-trip =#=#=#= -+Date: 2010-W01-7 00:00:00Z -+=#=#=#= End test: 2010-W01-7 - round-trip - OK (0) =#=#=#= -+* Passed: iso8601 - 2010-W01-7 - round-trip -+=#=#=#= Begin test: 2010-W01-1 =#=#=#= - Date: 2010-01-04 00:00:00Z --Date: 2010-W01-1 00:00:00Z -+=#=#=#= End test: 2010-W01-1 - OK (0) =#=#=#= - * Passed: iso8601 - 2010-W01-1 -+=#=#=#= Begin test: 2010-W01-1 - round-trip =#=#=#= -+Date: 2010-W01-1 00:00:00Z -+=#=#=#= End test: 2010-W01-1 - round-trip - OK (0) =#=#=#= -+* Passed: iso8601 - 2010-W01-1 - round-trip -+=#=#=#= Begin test: 2011-W01-7 =#=#=#= - Date: 2011-01-09 00:00:00Z --Date: 2011-W01-7 00:00:00Z -+=#=#=#= End test: 2011-W01-7 - OK (0) =#=#=#= - * Passed: iso8601 - 2011-W01-7 -+=#=#=#= Begin test: 2011-W01-7 - round-trip =#=#=#= -+Date: 2011-W01-7 00:00:00Z -+=#=#=#= End test: 2011-W01-7 - round-trip - OK (0) =#=#=#= -+* Passed: iso8601 - 2011-W01-7 - round-trip -+=#=#=#= Begin test: 2011-W01-1 =#=#=#= - Date: 2011-01-03 00:00:00Z --Date: 2011-W01-1 00:00:00Z -+=#=#=#= End test: 2011-W01-1 - OK (0) =#=#=#= - * Passed: iso8601 - 2011-W01-1 -+=#=#=#= Begin test: 2011-W01-1 - round-trip =#=#=#= -+Date: 2011-W01-1 00:00:00Z -+=#=#=#= End test: 2011-W01-1 - round-trip - OK (0) =#=#=#= -+* Passed: iso8601 - 2011-W01-1 - round-trip -+=#=#=#= Begin test: 2012-W01-7 =#=#=#= - Date: 2012-01-08 00:00:00Z --Date: 2012-W01-7 00:00:00Z -+=#=#=#= End test: 2012-W01-7 - OK (0) =#=#=#= - * Passed: iso8601 - 2012-W01-7 -+=#=#=#= Begin test: 2012-W01-7 - round-trip =#=#=#= -+Date: 2012-W01-7 00:00:00Z -+=#=#=#= End test: 2012-W01-7 - round-trip - OK (0) =#=#=#= -+* Passed: iso8601 - 2012-W01-7 - round-trip -+=#=#=#= Begin test: 2012-W01-1 =#=#=#= - Date: 2012-01-02 00:00:00Z --Date: 2012-W01-1 00:00:00Z -+=#=#=#= End test: 2012-W01-1 - OK (0) =#=#=#= - * Passed: iso8601 - 2012-W01-1 -+=#=#=#= Begin test: 2012-W01-1 - round-trip =#=#=#= -+Date: 2012-W01-1 00:00:00Z -+=#=#=#= End test: 2012-W01-1 - round-trip - OK (0) =#=#=#= -+* Passed: iso8601 - 2012-W01-1 - round-trip -+=#=#=#= Begin test: 2013-W01-7 =#=#=#= - Date: 2013-01-06 00:00:00Z --Date: 2013-W01-7 00:00:00Z -+=#=#=#= End test: 2013-W01-7 - OK (0) =#=#=#= - * Passed: iso8601 - 2013-W01-7 -+=#=#=#= Begin test: 2013-W01-7 - round-trip =#=#=#= -+Date: 2013-W01-7 00:00:00Z -+=#=#=#= End test: 2013-W01-7 - round-trip - OK (0) =#=#=#= -+* Passed: iso8601 - 2013-W01-7 - round-trip -+=#=#=#= Begin test: 2013-W01-1 =#=#=#= - Date: 2012-12-31 00:00:00Z --Date: 2013-W01-1 00:00:00Z -+=#=#=#= End test: 2013-W01-1 - OK (0) =#=#=#= - * Passed: iso8601 - 2013-W01-1 -+=#=#=#= Begin test: 2013-W01-1 - round-trip =#=#=#= -+Date: 2013-W01-1 00:00:00Z -+=#=#=#= End test: 2013-W01-1 - round-trip - OK (0) =#=#=#= -+* Passed: iso8601 - 2013-W01-1 - round-trip -+=#=#=#= Begin test: 2014-W01-7 =#=#=#= - Date: 2014-01-05 00:00:00Z --Date: 2014-W01-7 00:00:00Z -+=#=#=#= End test: 2014-W01-7 - OK (0) =#=#=#= - * Passed: iso8601 - 2014-W01-7 -+=#=#=#= Begin test: 2014-W01-7 - round-trip =#=#=#= -+Date: 2014-W01-7 00:00:00Z -+=#=#=#= End test: 2014-W01-7 - round-trip - OK (0) =#=#=#= -+* Passed: iso8601 - 2014-W01-7 - round-trip -+=#=#=#= Begin test: 2014-W01-1 =#=#=#= - Date: 2013-12-30 00:00:00Z --Date: 2014-W01-1 00:00:00Z -+=#=#=#= End test: 2014-W01-1 - OK (0) =#=#=#= - * Passed: iso8601 - 2014-W01-1 -+=#=#=#= Begin test: 2014-W01-1 - round-trip =#=#=#= -+Date: 2014-W01-1 00:00:00Z -+=#=#=#= End test: 2014-W01-1 - round-trip - OK (0) =#=#=#= -+* Passed: iso8601 - 2014-W01-1 - round-trip -+=#=#=#= Begin test: 2015-W01-7 =#=#=#= - Date: 2015-01-04 00:00:00Z --Date: 2015-W01-7 00:00:00Z -+=#=#=#= End test: 2015-W01-7 - OK (0) =#=#=#= - * Passed: iso8601 - 2015-W01-7 -+=#=#=#= Begin test: 2015-W01-7 - round-trip =#=#=#= -+Date: 2015-W01-7 00:00:00Z -+=#=#=#= End test: 2015-W01-7 - round-trip - OK (0) =#=#=#= -+* Passed: iso8601 - 2015-W01-7 - round-trip -+=#=#=#= Begin test: 2015-W01-1 =#=#=#= - Date: 2014-12-29 00:00:00Z --Date: 2015-W01-1 00:00:00Z -+=#=#=#= End test: 2015-W01-1 - OK (0) =#=#=#= - * Passed: iso8601 - 2015-W01-1 -+=#=#=#= Begin test: 2015-W01-1 - round-trip =#=#=#= -+Date: 2015-W01-1 00:00:00Z -+=#=#=#= End test: 2015-W01-1 - round-trip - OK (0) =#=#=#= -+* Passed: iso8601 - 2015-W01-1 - round-trip -+=#=#=#= Begin test: 2016-W01-7 =#=#=#= - Date: 2016-01-10 00:00:00Z --Date: 2016-W01-7 00:00:00Z -+=#=#=#= End test: 2016-W01-7 - OK (0) =#=#=#= - * Passed: iso8601 - 2016-W01-7 -+=#=#=#= Begin test: 2016-W01-7 - round-trip =#=#=#= -+Date: 2016-W01-7 00:00:00Z -+=#=#=#= End test: 2016-W01-7 - round-trip - OK (0) =#=#=#= -+* Passed: iso8601 - 2016-W01-7 - round-trip -+=#=#=#= Begin test: 2016-W01-1 =#=#=#= - Date: 2016-01-04 00:00:00Z --Date: 2016-W01-1 00:00:00Z -+=#=#=#= End test: 2016-W01-1 - OK (0) =#=#=#= - * Passed: iso8601 - 2016-W01-1 -+=#=#=#= Begin test: 2016-W01-1 - round-trip =#=#=#= -+Date: 2016-W01-1 00:00:00Z -+=#=#=#= End test: 2016-W01-1 - round-trip - OK (0) =#=#=#= -+* Passed: iso8601 - 2016-W01-1 - round-trip -+=#=#=#= Begin test: 2017-W01-7 =#=#=#= - Date: 2017-01-08 00:00:00Z --Date: 2017-W01-7 00:00:00Z -+=#=#=#= End test: 2017-W01-7 - OK (0) =#=#=#= - * Passed: iso8601 - 2017-W01-7 -+=#=#=#= Begin test: 2017-W01-7 - round-trip =#=#=#= -+Date: 2017-W01-7 00:00:00Z -+=#=#=#= End test: 2017-W01-7 - round-trip - OK (0) =#=#=#= -+* Passed: iso8601 - 2017-W01-7 - round-trip -+=#=#=#= Begin test: 2017-W01-1 =#=#=#= - Date: 2017-01-02 00:00:00Z --Date: 2017-W01-1 00:00:00Z -+=#=#=#= End test: 2017-W01-1 - OK (0) =#=#=#= - * Passed: iso8601 - 2017-W01-1 -+=#=#=#= Begin test: 2017-W01-1 - round-trip =#=#=#= -+Date: 2017-W01-1 00:00:00Z -+=#=#=#= End test: 2017-W01-1 - round-trip - OK (0) =#=#=#= -+* Passed: iso8601 - 2017-W01-1 - round-trip -+=#=#=#= Begin test: 2018-W01-7 =#=#=#= - Date: 2018-01-07 00:00:00Z --Date: 2018-W01-7 00:00:00Z -+=#=#=#= End test: 2018-W01-7 - OK (0) =#=#=#= - * Passed: iso8601 - 2018-W01-7 -+=#=#=#= Begin test: 2018-W01-7 - round-trip =#=#=#= -+Date: 2018-W01-7 00:00:00Z -+=#=#=#= End test: 2018-W01-7 - round-trip - OK (0) =#=#=#= -+* Passed: iso8601 - 2018-W01-7 - round-trip -+=#=#=#= Begin test: 2018-W01-1 =#=#=#= - Date: 2018-01-01 00:00:00Z --Date: 2018-W01-1 00:00:00Z -+=#=#=#= End test: 2018-W01-1 - OK (0) =#=#=#= - * Passed: iso8601 - 2018-W01-1 -+=#=#=#= Begin test: 2018-W01-1 - round-trip =#=#=#= -+Date: 2018-W01-1 00:00:00Z -+=#=#=#= End test: 2018-W01-1 - round-trip - OK (0) =#=#=#= -+* Passed: iso8601 - 2018-W01-1 - round-trip -+=#=#=#= Begin test: 2009-W53-07 =#=#=#= - Date: 2009-W53-7 00:00:00Z -+=#=#=#= End test: 2009-W53-07 - OK (0) =#=#=#= - * Passed: iso8601 - 2009-W53-07 -+=#=#=#= Begin test: 2009-01-31 + 1 Month =#=#=#= - Date: 2009-01-31 00:00:00Z - Duration: 0000-01-00 00:00:00Z - Duration ends at: 2009-02-28 00:00:00Z -+=#=#=#= End test: 2009-01-31 + 1 Month - OK (0) =#=#=#= - * Passed: iso8601 - 2009-01-31 + 1 Month -+=#=#=#= Begin test: 2009-01-31 + 2 Months =#=#=#= - Date: 2009-01-31 00:00:00Z - Duration: 0000-02-00 00:00:00Z - Duration ends at: 2009-03-31 00:00:00Z -+=#=#=#= End test: 2009-01-31 + 2 Months - OK (0) =#=#=#= - * Passed: iso8601 - 2009-01-31 + 2 Months -+=#=#=#= Begin test: 2009-01-31 + 3 Months =#=#=#= - Date: 2009-01-31 00:00:00Z - Duration: 0000-03-00 00:00:00Z - Duration ends at: 2009-04-30 00:00:00Z -+=#=#=#= End test: 2009-01-31 + 3 Months - OK (0) =#=#=#= - * Passed: iso8601 - 2009-01-31 + 3 Months -+=#=#=#= Begin test: 2009-03-31 - 1 Month =#=#=#= - Date: 2009-03-31 00:00:00Z - Duration: 0000--01-00 00:00:00Z - Duration ends at: 2009-02-28 00:00:00Z -+=#=#=#= End test: 2009-03-31 - 1 Month - OK (0) =#=#=#= - * Passed: iso8601 - 2009-03-31 - 1 Month - Setting up shadow instance - A new shadow instance was created. To begin using it paste the following into your shell: - CIB_shadow=tools-regression ; export CIB_shadow -- -+=#=#=#= Begin test: Validate CIB =#=#=#= -+ - - - -@@ -106,8 +247,20 @@ A new shadow instance was created. To begin using it paste the following into y - - - -+ -+ -+ -+ -+ -+ -+ -+ -+ -+=#=#=#= End test: Validate CIB - OK (0) =#=#=#= -+* Passed: cibadmin - Validate CIB -+=#=#=#= Begin test: Require --force for CIB erasure =#=#=#= - The supplied command is considered dangerous. To prevent accidental destruction of the cluster, the --force flag is required in order to proceed. -- -+ - - - -@@ -116,8 +269,10 @@ The supplied command is considered dangerous. To prevent accidental destruction - - - -+=#=#=#= End test: Require --force for CIB erasure - Invalid argument (22) =#=#=#= - * Passed: cibadmin - Require --force for CIB erasure -- -+=#=#=#= Begin test: Allow CIB erasure with --force =#=#=#= -+ - - - -@@ -126,8 +281,10 @@ The supplied command is considered dangerous. To prevent accidental destruction - - - -+=#=#=#= End test: Allow CIB erasure with --force - OK (0) =#=#=#= - * Passed: cibadmin - Allow CIB erasure with --force -- -+=#=#=#= Begin test: Query CIB =#=#=#= -+ - - - -@@ -136,8 +293,10 @@ The supplied command is considered dangerous. To prevent accidental destruction - - - -+=#=#=#= End test: Query CIB - OK (0) =#=#=#= - * Passed: cibadmin - Query CIB -- -+=#=#=#= Begin test: Set cluster option =#=#=#= -+ - - - -@@ -150,9 +309,11 @@ The supplied command is considered dangerous. To prevent accidental destruction - - - -+=#=#=#= End test: Set cluster option - OK (0) =#=#=#= - * Passed: crm_attribute - Set cluster option -+=#=#=#= Begin test: Query new cluster option =#=#=#= - -- -+ - - - -@@ -165,8 +326,10 @@ The supplied command is considered dangerous. To prevent accidental destruction - - - -+=#=#=#= End test: Query new cluster option - OK (0) =#=#=#= - * Passed: cibadmin - Query new cluster option -- -+=#=#=#= Begin test: Query cluster options =#=#=#= -+ - - - -@@ -179,11 +342,32 @@ The supplied command is considered dangerous. To prevent accidental destruction - - - -+=#=#=#= End test: Query cluster options - OK (0) =#=#=#= - * Passed: cibadmin - Query cluster options -- -+=#=#=#= Begin test: Set no-quorum policy =#=#=#= -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+=#=#=#= End test: Set no-quorum policy - OK (0) =#=#=#= -+* Passed: crm_attribute - Set no-quorum policy -+=#=#=#= Begin test: Delete nvpair =#=#=#= -+ - - -- -+ -+ -+ - - - -@@ -191,7 +375,9 @@ The supplied command is considered dangerous. To prevent accidental destruction - - - -+=#=#=#= End test: Delete nvpair - OK (0) =#=#=#= - * Passed: cibadmin - Delete nvpair -+=#=#=#= Begin test: Create operaton should fail =#=#=#= - Call failed: Name not unique on network - - -@@ -200,10 +386,12 @@ Call failed: Name not unique on network - - - -- -+ - - -- -+ -+ -+ - - - -@@ -211,11 +399,14 @@ Call failed: Name not unique on network - - - --* Passed: cibadmin - Create operaton should fail with: -76, The object already exists -- -+=#=#=#= End test: Create operaton should fail - Name not unique on network (76) =#=#=#= -+* Passed: cibadmin - Create operaton should fail -+=#=#=#= Begin test: Modify cluster options section =#=#=#= -+ - - - -+ - - - -@@ -225,12 +416,15 @@ Call failed: Name not unique on network - - - -+=#=#=#= End test: Modify cluster options section - OK (0) =#=#=#= - * Passed: cibadmin - Modify cluster options section -+=#=#=#= Begin test: Query updated cluster option =#=#=#= - -- -+ - - - -+ - - - -@@ -240,11 +434,14 @@ Call failed: Name not unique on network - - - -+=#=#=#= End test: Query updated cluster option - OK (0) =#=#=#= - * Passed: cibadmin - Query updated cluster option -- -+=#=#=#= Begin test: Set duplicate cluster option =#=#=#= -+ - - - -+ - - - -@@ -257,12 +454,18 @@ Call failed: Name not unique on network - - - -+=#=#=#= End test: Set duplicate cluster option - OK (0) =#=#=#= - * Passed: crm_attribute - Set duplicate cluster option --Please choose from one of the matches above and suppy the 'id' with --attr-id -- -+=#=#=#= Begin test: Setting multiply defined cluster option should fail =#=#=#= -+Error performing operation: Name not unique on network -+Multiple attributes match name=cluster-delay -+ Value: 60s (id=cib-bootstrap-options-cluster-delay) -+ Value: 40s (id=duplicate-cluster-delay) -+ - - - -+ - - - -@@ -275,11 +478,14 @@ Please choose from one of the matches above and suppy the 'id' with --attr-id - - - --* Passed: crm_attribute - Setting multiply defined cluster option should fail with -216, Could not set cluster option -- -+=#=#=#= End test: Setting multiply defined cluster option should fail - Name not unique on network (76) =#=#=#= -+* Passed: crm_attribute - Setting multiply defined cluster option should fail -+=#=#=#= Begin test: Set cluster option with -s =#=#=#= -+ - - - -+ - - - -@@ -292,13 +498,17 @@ Please choose from one of the matches above and suppy the 'id' with --attr-id - - - -+=#=#=#= End test: Set cluster option with -s - OK (0) =#=#=#= - * Passed: crm_attribute - Set cluster option with -s -+=#=#=#= Begin test: Delete cluster option with -i =#=#=#= - Deleted crm_config option: id=(null) name=cluster-delay - -- -+ - - -- -+ -+ -+ - - - -@@ -309,55 +519,61 @@ Deleted crm_config option: id=(null) name=cluster-delay - - - -+=#=#=#= End test: Delete cluster option with -i - OK (0) =#=#=#= - * Passed: crm_attribute - Delete cluster option with -i -- -+=#=#=#= Begin test: Create node1 and bring it online =#=#=#= -+ -+Current cluster status: -+ -+ -+Performing requested modifications -+ + Bringing node node1 online -+ -+Transition Summary: -+ -+Executing cluster transition: -+ -+Revised cluster status: -+Online: [ node1 ] -+ -+ -+ - - -- -- -- -+ -+ - -- -- -- -- -- -- -- -- -- --* Passed: cibadmin - Create node entry -- -- -- -- - - - - - -- -+ - - - - - -- -+ - - --* Passed: cibadmin - Create node status entry -- -+=#=#=#= End test: Create node1 and bring it online - OK (0) =#=#=#= -+* Passed: crm_simulate - Create node1 and bring it online -+=#=#=#= Begin test: Create node attribute =#=#=#= -+ - - -- -+ -+ -+ - - - - - -- -- -- -+ -+ -+ - - - -@@ -365,23 +581,27 @@ Deleted crm_config option: id=(null) name=cluster-delay - - - -- -+ - - -+=#=#=#= End test: Create node attribute - OK (0) =#=#=#= - * Passed: crm_attribute - Create node attribute -- -- -+=#=#=#= Begin test: Query new node attribute =#=#=#= -+ -+ - - -- -+ -+ -+ - - - - - -- -- -- -+ -+ -+ - - - -@@ -389,22 +609,26 @@ Deleted crm_config option: id=(null) name=cluster-delay - - - -- -+ - - -+=#=#=#= End test: Query new node attribute - OK (0) =#=#=#= - * Passed: cibadmin - Query new node attribute --Digest: -+=#=#=#= Begin test: Digest calculation =#=#=#= -+Digest: - - -- -+ -+ -+ - - - - - -- -- -- -+ -+ -+ - - - -@@ -412,23 +636,27 @@ Digest: -+ - - -+=#=#=#= End test: Digest calculation - OK (0) =#=#=#= - * Passed: cibadmin - Digest calculation -+=#=#=#= Begin test: Replace operation should fail =#=#=#= - Call failed: Update was older than existing configuration -- -+ - - -- -+ -+ -+ - - - - - -- -- -- -+ -+ -+ - - - -@@ -436,24 +664,28 @@ Call failed: Update was older than existing configuration - - - -- -+ - - --* Passed: cibadmin - Replace operation should fail with: -45, Update was older than existing configuration -+=#=#=#= End test: Replace operation should fail - Update was older than existing configuration (205) =#=#=#= -+* Passed: cibadmin - Replace operation should fail -+=#=#=#= Begin test: Default standby value =#=#=#= - Error performing operation: No such device or address - scope=status name=standby value=off -- -+ - - -- -+ -+ -+ - - - - - -- -- -- -+ -+ -+ - - - -@@ -461,23 +693,27 @@ scope=status name=standby value=off - - - -- -+ - - -+=#=#=#= End test: Default standby value - OK (0) =#=#=#= - * Passed: crm_standby - Default standby value -- -+=#=#=#= Begin test: Set standby status =#=#=#= -+ - - -- -+ -+ -+ - - - - - -- -- -- -- -+ -+ -+ -+ - - - -@@ -485,24 +721,28 @@ scope=status name=standby value=off - - - -- -+ - - -+=#=#=#= End test: Set standby status - OK (0) =#=#=#= - * Passed: crm_standby - Set standby status -+=#=#=#= Begin test: Query standby value =#=#=#= - scope=nodes name=standby value=true -- -+ - - -- -+ -+ -+ - - - - - -- -- -- -- -+ -+ -+ -+ - - - -@@ -510,29 +750,33 @@ scope=nodes name=standby value=true - - - -- -+ - - -+=#=#=#= End test: Query standby value - OK (0) =#=#=#= - * Passed: crm_standby - Query standby value --Deleted nodes attribute: id=nodes-clusterNode-UUID-standby name=standby -+=#=#=#= Begin test: Delete standby value =#=#=#= -+Deleted nodes attribute: id=nodes-node1-standby name=standby - - Could not establish attrd connection: Connection refused (111) - Could not establish attrd connection: Connection refused (111) - Could not establish attrd connection: Connection refused (111) - Could not establish attrd connection: Connection refused (111) - Could not establish attrd connection: Connection refused (111) -- -+ - - -- -+ -+ -+ - - - - - -- -- -- -+ -+ -+ - - - -@@ -540,22 +784,26 @@ Could not establish attrd connection: Connection refused (111) - - - -- -+ - - -+=#=#=#= End test: Delete standby value - OK (0) =#=#=#= - * Passed: crm_standby - Delete standby value -- -+=#=#=#= Begin test: Create a resource =#=#=#= -+ - - -- -+ -+ -+ - - - - - -- -- -- -+ -+ -+ - - - -@@ -565,22 +813,26 @@ Could not establish attrd connection: Connection refused (111) - - - -- -+ - - -+=#=#=#= End test: Create a resource - OK (0) =#=#=#= - * Passed: cibadmin - Create a resource -- -+=#=#=#= Begin test: Create a resource meta attribute =#=#=#= -+ - - -- -+ -+ -+ - - - - - -- -- -- -+ -+ -+ - - - -@@ -594,23 +846,27 @@ Could not establish attrd connection: Connection refused (111) - - - -- -+ - - -+=#=#=#= End test: Create a resource meta attribute - OK (0) =#=#=#= - * Passed: crm_resource - Create a resource meta attribute -+=#=#=#= Begin test: Query a resource meta attribute =#=#=#= - false -- -+ - - -- -+ -+ -+ - - - - - -- -- -- -+ -+ -+ - - - -@@ -624,23 +880,27 @@ false - - - -- -+ - - -+=#=#=#= End test: Query a resource meta attribute - OK (0) =#=#=#= - * Passed: crm_resource - Query a resource meta attribute -+=#=#=#= Begin test: Remove a resource meta attribute =#=#=#= - Deleted dummy option: id=dummy-meta_attributes-is-managed name=is-managed -- -+ - - -- -+ -+ -+ - - - - - -- -- -- -+ -+ -+ - - - -@@ -652,22 +912,26 @@ Deleted dummy option: id=dummy-meta_attributes-is-managed name=is-managed - - - -- -+ - - -+=#=#=#= End test: Remove a resource meta attribute - OK (0) =#=#=#= - * Passed: crm_resource - Remove a resource meta attribute -- -+=#=#=#= Begin test: Create a resource attribute =#=#=#= -+ - - -- -+ -+ -+ - - - - - -- -- -- -+ -+ -+ - - - -@@ -682,23 +946,27 @@ Deleted dummy option: id=dummy-meta_attributes-is-managed name=is-managed - - - -- -+ - - -+=#=#=#= End test: Create a resource attribute - OK (0) =#=#=#= - * Passed: crm_resource - Create a resource attribute -+=#=#=#= Begin test: List the configured resources =#=#=#= - dummy (ocf::pacemaker:Dummy): Stopped -- -+ - - -- -+ -+ -+ - - - - - -- -- -- -+ -+ -+ - - - -@@ -713,27 +981,31 @@ Deleted dummy option: id=dummy-meta_attributes-is-managed name=is-managed - - - -- -+ - - -+=#=#=#= End test: List the configured resources - OK (0) =#=#=#= - * Passed: crm_resource - List the configured resources -+=#=#=#= Begin test: Set a resource's fail-count =#=#=#= - Could not establish attrd connection: Connection refused (111) - Could not establish attrd connection: Connection refused (111) - Could not establish attrd connection: Connection refused (111) - Could not establish attrd connection: Connection refused (111) - Could not establish attrd connection: Connection refused (111) -- -+ - - -- -+ -+ -+ - - - - - -- -- -- -+ -+ -+ - - - -@@ -748,30 +1020,35 @@ Could not establish attrd connection: Connection refused (111) - - - -- -- -- -- -+ -+ -+ -+ - - - - - --* Passed: crm_resource - Set a resource's fail-count --Resource dummy not moved: not-active and no preferred location specified. -+=#=#=#= End test: Set a resource's fail-count - OK (0) =#=#=#= -+* Passed: crm_failcount - Set a resource's fail-count -+=#=#=#= Begin test: Require a destination when migrating a resource that is stopped =#=#=#= -+Resource 'dummy' not moved: active in 0 locations. -+You can prevent 'dummy' from running on a specific location with: --ban --host - Error performing operation: Invalid argument -- -+ - - -- -+ -+ -+ - - - - - -- -- -- -+ -+ -+ - - - -@@ -786,30 +1063,34 @@ Error performing operation: Invalid argument - - - -- -- -- -- -+ -+ -+ -+ - - - - - -+=#=#=#= End test: Require a destination when migrating a resource that is stopped - Invalid argument (22) =#=#=#= - * Passed: crm_resource - Require a destination when migrating a resource that is stopped --Error performing operation: i.dont.exist is not a known node -+=#=#=#= Begin test: Don't support migration to non-existant locations =#=#=#= -+Error performing operation: node 'i.dont.exist' is unknown - Error performing operation: No such device or address -- -+ - - -- -+ -+ -+ - - - - - -- -- -- -+ -+ -+ - - - -@@ -824,28 +1105,205 @@ Error performing operation: No such device or address - - - -- -- -- -- -+ -+ -+ -+ - - - - - -+=#=#=#= End test: Don't support migration to non-existant locations - No such device or address (6) =#=#=#= - * Passed: crm_resource - Don't support migration to non-existant locations -- -+=#=#=#= Begin test: Create a fencing resource =#=#=#= -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+=#=#=#= End test: Create a fencing resource - OK (0) =#=#=#= -+* Passed: cibadmin - Create a fencing resource -+=#=#=#= Begin test: Bring resources online =#=#=#= -+ -+Current cluster status: -+Online: [ node1 ] -+ -+ dummy (ocf::pacemaker:Dummy): Stopped -+ Fence (stonith:fence_true): Stopped -+ -+Transition Summary: -+ * Start dummy (node1) -+ * Start Fence (node1) -+ -+Executing cluster transition: -+ * Resource action: dummy monitor on node1 -+ * Resource action: Fence monitor on node1 -+ * Pseudo action: probe_complete -+ * Resource action: dummy start on node1 -+ * Resource action: Fence start on node1 -+ -+Revised cluster status: -+Online: [ node1 ] -+ -+ dummy (ocf::pacemaker:Dummy): Started node1 -+ Fence (stonith:fence_true): Started node1 -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+=#=#=#= End test: Bring resources online - OK (0) =#=#=#= -+* Passed: crm_simulate - Bring resources online -+=#=#=#= Begin test: Try to move a resource to its existing location =#=#=#= -+Error performing operation: dummy is already active on node1 -+Error performing operation: Invalid argument -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+=#=#=#= End test: Try to move a resource to its existing location - Invalid argument (22) =#=#=#= -+* Passed: crm_resource - Try to move a resource to its existing location -+=#=#=#= Begin test: Move a resource from its existing location =#=#=#= -+WARNING: Creating rsc_location constraint 'cli-ban-dummy-on-node1' with a score of -INFINITY for resource dummy on node1. -+ This will prevent dummy from running on node1 until the constraint is removed using the 'crm_resource --clear' command or manually with cibadmin -+ This will be the case even if node1 is the last node in the cluster -+ This message can be disabled with --quiet -+ - - -- -+ -+ -+ - - - - - -- -- -- -+ -+ -+ - - - -@@ -856,38 +1314,49 @@ Error performing operation: No such device or address - - - -+ - - -- -- -- -- -- -+ - - - -- -- -- -- -+ -+ -+ -+ - - -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ - - - --* Passed: crm_resource - Migrate a resource -- -+=#=#=#= End test: Move a resource from its existing location - OK (0) =#=#=#= -+* Passed: crm_resource - Move a resource from its existing location -+=#=#=#= Begin test: Clear out constraints generated by --move =#=#=#= -+ - - -- -+ -+ -+ - - - - - -- -- -- -+ -+ -+ - - - -@@ -898,33 +1367,48 @@ Error performing operation: No such device or address - - - -+ - - - - -- -- -- -- -+ -+ -+ -+ - - -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ - - - --* Passed: crm_resource - Un-migrate a resource -+=#=#=#= End test: Clear out constraints generated by --move - OK (0) =#=#=#= -+* Passed: crm_resource - Clear out constraints generated by --move -+=#=#=#= Begin test: Default ticket granted state =#=#=#= - false -- -+ - - -- -+ -+ -+ - - - - - -- -- -- -+ -+ -+ - - - -@@ -935,32 +1419,47 @@ false - - - -+ - - - - -- -- -- -- -+ -+ -+ -+ - - -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ - - - -+=#=#=#= End test: Default ticket granted state - OK (0) =#=#=#= - * Passed: crm_ticket - Default ticket granted state -- -+=#=#=#= Begin test: Set ticket granted state =#=#=#= -+ - - -- -+ -+ -+ - - - - - -- -- -- -+ -+ -+ - - - -@@ -971,36 +1470,51 @@ false - - - -+ - - - - -- -- -- -- -+ -+ -+ -+ - - -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ - - - - - - -+=#=#=#= End test: Set ticket granted state - OK (0) =#=#=#= - * Passed: crm_ticket - Set ticket granted state -+=#=#=#= Begin test: Query ticket granted state =#=#=#= - false -- -+ - - -- -+ -+ -+ - - - - - -- -- -- -+ -+ -+ - - - -@@ -1011,36 +1525,51 @@ false - - - -+ - - - - -- -- -- -- -+ -+ -+ -+ - - -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ - - - - - - -+=#=#=#= End test: Query ticket granted state - OK (0) =#=#=#= - * Passed: crm_ticket - Query ticket granted state -+=#=#=#= Begin test: Delete ticket granted state =#=#=#= - Deleted ticketA state attribute: name=granted -- -+ - - -- -+ -+ -+ - - - - - -- -- -- -+ -+ -+ - - - -@@ -1051,35 +1580,50 @@ Deleted ticketA state attribute: name=granted - - - -+ - - - - -- -- -- -- -+ -+ -+ -+ - - -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ - - - - - - -+=#=#=#= End test: Delete ticket granted state - OK (0) =#=#=#= - * Passed: crm_ticket - Delete ticket granted state -- -+=#=#=#= Begin test: Make a ticket standby =#=#=#= -+ - - -- -+ -+ -+ - - - - - -- -- -- -+ -+ -+ - - - -@@ -1090,36 +1634,51 @@ Deleted ticketA state attribute: name=granted - - - -+ - - - - -- -- -- -- -+ -+ -+ -+ - - -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ - - - - - - -+=#=#=#= End test: Make a ticket standby - OK (0) =#=#=#= - * Passed: crm_ticket - Make a ticket standby -+=#=#=#= Begin test: Query ticket standby state =#=#=#= - true -- -+ - - -- -+ -+ -+ - - - - - -- -- -- -+ -+ -+ - - - -@@ -1130,35 +1689,50 @@ true - - - -+ - - - - -- -- -- -- -+ -+ -+ -+ - - -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ - - - - - - -+=#=#=#= End test: Query ticket standby state - OK (0) =#=#=#= - * Passed: crm_ticket - Query ticket standby state -- -+=#=#=#= Begin test: Activate a ticket =#=#=#= -+ - - -- -+ -+ -+ - - - - - -- -- -- -+ -+ -+ - - - -@@ -1169,36 +1743,51 @@ true - - - -+ - - - - -- -- -- -- -+ -+ -+ -+ - - -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ - - - - - - -+=#=#=#= End test: Activate a ticket - OK (0) =#=#=#= - * Passed: crm_ticket - Activate a ticket -+=#=#=#= Begin test: Delete ticket standby state =#=#=#= - Deleted ticketA state attribute: name=standby -- -+ - - -- -+ -+ -+ - - - - - -- -- -- -+ -+ -+ - - - -@@ -1209,20 +1798,640 @@ Deleted ticketA state attribute: name=standby - - - -+ - - - - -- -- -- -- -+ -+ -+ -+ - - -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ - - - - - - -+=#=#=#= End test: Delete ticket standby state - OK (0) =#=#=#= - * Passed: crm_ticket - Delete ticket standby state -+=#=#=#= Begin test: Ban a resource on unknown node =#=#=#= -+Error performing operation: node 'host1' is unknown -+Error performing operation: No such device or address -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+=#=#=#= End test: Ban a resource on unknown node - No such device or address (6) =#=#=#= -+* Passed: crm_resource - Ban a resource on unknown node -+=#=#=#= Begin test: Create two more nodes and bring them online =#=#=#= -+ -+Current cluster status: -+Online: [ node1 ] -+ -+ dummy (ocf::pacemaker:Dummy): Started node1 -+ Fence (stonith:fence_true): Started node1 -+ -+Performing requested modifications -+ + Bringing node node2 online -+ + Bringing node node3 online -+ -+Transition Summary: -+ * Move Fence (Started node1 -> node2) -+ -+Executing cluster transition: -+ * Resource action: dummy monitor on node3 -+ * Resource action: dummy monitor on node2 -+ * Resource action: Fence monitor on node3 -+ * Resource action: Fence monitor on node2 -+ * Pseudo action: probe_complete -+ * Resource action: Fence stop on node1 -+ * Pseudo action: all_stopped -+ * Resource action: Fence start on node2 -+ -+Revised cluster status: -+Online: [ node1 node2 node3 ] -+ -+ dummy (ocf::pacemaker:Dummy): Started node1 -+ Fence (stonith:fence_true): Started node2 -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+=#=#=#= End test: Create two more nodes and bring them online - OK (0) =#=#=#= -+* Passed: crm_simulate - Create two more nodes and bring them online -+=#=#=#= Begin test: Ban dummy from node1 =#=#=#= -+WARNING: Creating rsc_location constraint 'cli-ban-dummy-on-node1' with a score of -INFINITY for resource dummy on node1. -+ This will prevent dummy from running on node1 until the constraint is removed using the 'crm_resource --clear' command or manually with cibadmin -+ This will be the case even if node1 is the last node in the cluster -+ This message can be disabled with --quiet -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+=#=#=#= End test: Ban dummy from node1 - OK (0) =#=#=#= -+* Passed: crm_resource - Ban dummy from node1 -+=#=#=#= Begin test: Ban dummy from node2 =#=#=#= -+WARNING: Creating rsc_location constraint 'cli-ban-dummy-on-node2' with a score of -INFINITY for resource dummy on node2. -+ This will prevent dummy from running on node2 until the constraint is removed using the 'crm_resource --clear' command or manually with cibadmin -+ This will be the case even if node2 is the last node in the cluster -+ This message can be disabled with --quiet -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+=#=#=#= End test: Ban dummy from node2 - OK (0) =#=#=#= -+* Passed: crm_resource - Ban dummy from node2 -+=#=#=#= Begin test: Relocate resources due to ban =#=#=#= -+ -+Current cluster status: -+Online: [ node1 node2 node3 ] -+ -+ dummy (ocf::pacemaker:Dummy): Started node1 -+ Fence (stonith:fence_true): Started node2 -+ -+Transition Summary: -+ * Move dummy (Started node1 -> node3) -+ -+Executing cluster transition: -+ * Resource action: dummy stop on node1 -+ * Pseudo action: all_stopped -+ * Resource action: dummy start on node3 -+ -+Revised cluster status: -+Online: [ node1 node2 node3 ] -+ -+ dummy (ocf::pacemaker:Dummy): Started node3 -+ Fence (stonith:fence_true): Started node2 -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+=#=#=#= End test: Relocate resources due to ban - OK (0) =#=#=#= -+* Passed: crm_simulate - Relocate resources due to ban -+=#=#=#= Begin test: Move dummy to node1 =#=#=#= -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+=#=#=#= End test: Move dummy to node1 - OK (0) =#=#=#= -+* Passed: crm_resource - Move dummy to node1 -+=#=#=#= Begin test: Clear implicit constraints for dummy on node2 =#=#=#= -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+=#=#=#= End test: Clear implicit constraints for dummy on node2 - OK (0) =#=#=#= -+* Passed: crm_resource - Clear implicit constraints for dummy on node2 -diff --git a/tools/regression.sh b/tools/regression.sh -index 3b8d3d4..12e2bcf 100755 ---- a/tools/regression.sh -+++ b/tools/regression.sh -@@ -6,28 +6,30 @@ num_errors=0 - num_passed=0 - GREP_OPTIONS= - --function assert() { -- rc=$1; shift -+function test_assert() { - target=$1; shift -- app=$1; shift -- msg=$1; shift - cib=$1; shift -+ app=`echo "$cmd" | sed 's/\ .*//'` -+ printf "* Running: $app - $desc\n" 1>&2 - -- if [ x$cib = x0 ]; then -- : nothing -- else -+ printf "=#=#=#= Begin test: $desc =#=#=#=\n" -+ eval $VALGRIND_CMD $cmd 2>&1 -+ rc=$? -+ -+ if [ x$cib != x0 ]; then - cibadmin -Q - fi - -+ printf "=#=#=#= End test: $desc - `crm_error $rc` ($rc) =#=#=#=\n" -+ - if [ $rc -ne $target ]; then - num_errors=`expr $num_errors + 1` -- printf "* Failed (rc=%.3d): %-14s - %s\n" $rc $app "$msg" -- printf "* Failed (rc=%.3d): %-14s - %s\n" $rc $app "$msg" 1>&2 -+ printf "* Failed (rc=%.3d): %-14s - %s\n" $rc $app "$desc" -+ printf "* Failed (rc=%.3d): %-14s - %s\n" $rc $app "$desc" 1>&2 - return - exit 1 - else -- printf "* Passed: %-14s - %s\n" $app "$msg" -- printf "* Passed: %-14s - %s\n" $app "$msg" 1>&2 -+ printf "* Passed: %-14s - %s\n" $app "$desc" - - num_passed=`expr $num_passed + 1` - fi -@@ -44,9 +46,9 @@ VALGRIND_CMD= - while test "$done" = "0"; do - case "$1" in - -V|--verbose) verbose=1; shift;; -- -v|--valgrind) -+ -v|--valgrind) - export G_SLICE=always-malloc -- VALGRIND_CMD="valgrind -q --show-reachable=no --leak-check=full --trace-children=no --time-stamp=yes --num-callers=20 --suppressions=$test_home/cli.supp" -+ VALGRIND_CMD="valgrind -q --gen-suppressions=all --show-reachable=no --leak-check=full --trace-children=no --time-stamp=yes --num-callers=20 --suppressions=/usr/share/pacemaker/tests/valgrind-pcmk.suppressions" - shift;; - -x) set -x; shift;; - -s) do_save=1; shift;; -@@ -66,164 +68,263 @@ function test_tools() { - export CIB_shadow_dir=$test_home - $VALGRIND_CMD crm_shadow --batch --force --create-empty $shadow 2>&1 - export CIB_shadow=$shadow -- $VALGRIND_CMD cibadmin -Q 2>&1 -- -- $VALGRIND_CMD cibadmin -E 2>&1 -- assert $? 22 cibadmin "Require --force for CIB erasure" -- -- $VALGRIND_CMD cibadmin -E --force -- assert $? 0 cibadmin "Allow CIB erasure with --force" -- -- $VALGRIND_CMD cibadmin -Q > /tmp/$$.existing.xml -- assert $? 0 cibadmin "Query CIB" -- -- $VALGRIND_CMD crm_attribute -n cluster-delay -v 60s -- assert $? 0 crm_attribute "Set cluster option" -- -- $VALGRIND_CMD cibadmin -Q -o crm_config | grep cib-bootstrap-options-cluster-delay -- assert $? 0 cibadmin "Query new cluster option" -- -- $VALGRIND_CMD cibadmin -Q -o crm_config > /tmp/$$.opt.xml -- assert $? 0 cibadmin "Query cluster options" -- -- $VALGRIND_CMD cibadmin -D -o crm_config --xml-text '' -- assert $? 0 cibadmin "Delete nvpair" -- -- $VALGRIND_CMD cibadmin -C -o crm_config --xml-file /tmp/$$.opt.xml 2>&1 -- assert $? 76 cibadmin "Create operaton should fail with: -76, The object already exists" -- -- $VALGRIND_CMD cibadmin -M -o crm_config --xml-file /tmp/$$.opt.xml -- assert $? 0 cibadmin "Modify cluster options section" -- -- $VALGRIND_CMD cibadmin -Q -o crm_config | grep cib-bootstrap-options-cluster-delay -- assert $? 0 cibadmin "Query updated cluster option" -- -- $VALGRIND_CMD crm_attribute -n cluster-delay -v 40s -s duplicate -- assert $? 0 crm_attribute "Set duplicate cluster option" -- -- $VALGRIND_CMD crm_attribute -n cluster-delay -v 30s -- assert $? 234 crm_attribute "Setting multiply defined cluster option should fail with -216, Could not set cluster option" -- -- $VALGRIND_CMD crm_attribute -n cluster-delay -v 30s -s duplicate -- assert $? 0 crm_attribute "Set cluster option with -s" -- -- $VALGRIND_CMD crm_attribute -n cluster-delay -D -i cib-bootstrap-options-cluster-delay -- assert $? 0 crm_attribute "Delete cluster option with -i" -- -- $VALGRIND_CMD cibadmin -C -o nodes --xml-text '' -- assert $? 0 cibadmin "Create node entry" -- -- $VALGRIND_CMD cibadmin -C -o status --xml-text '' -- assert $? 0 cibadmin "Create node status entry" -- -- $VALGRIND_CMD crm_attribute -n ram -v 1024M -U clusterNode-UNAME -t nodes -- assert $? 0 crm_attribute "Create node attribute" -- -- $VALGRIND_CMD cibadmin -Q -o nodes | grep clusterNode-UUID-ram -- assert $? 0 cibadmin "Query new node attribute" -- -- $VALGRIND_CMD cibadmin -Q | cibadmin -5 -p 2>&1 > /dev/null -- assert $? 0 cibadmin "Digest calculation" -- -- # This update will fail because it has version numbers -- $VALGRIND_CMD cibadmin -R --xml-file /tmp/$$.existing.xml 2>&1 -- assert $? 237 cibadmin "Replace operation should fail with: -45, Update was older than existing configuration" - -- crm_standby -N clusterNode-UNAME -G -- assert $? 0 crm_standby "Default standby value" -+ desc="Validate CIB" -+ cmd="cibadmin -Q" -+ test_assert 0 -+ -+ desc="Require --force for CIB erasure" -+ cmd="cibadmin -E" -+ test_assert 22 -+ -+ desc="Allow CIB erasure with --force" -+ cmd="cibadmin -E --force" -+ test_assert 0 - -- crm_standby -N clusterNode-UNAME -v true -- assert $? 0 crm_standby "Set standby status" -+ desc="Query CIB" -+ cmd="cibadmin -Q > /tmp/$$.existing.xml" -+ test_assert 0 - -- crm_standby -N clusterNode-UNAME -G -- assert $? 0 crm_standby "Query standby value" -- -- crm_standby -N clusterNode-UNAME -D 2>&1 -- assert $? 0 crm_standby "Delete standby value" -- -- $VALGRIND_CMD cibadmin -C -o resources --xml-text '' -- assert $? 0 cibadmin "Create a resource" -+ desc="Set cluster option" -+ cmd="crm_attribute -n cluster-delay -v 60s" -+ test_assert 0 - -- $VALGRIND_CMD crm_resource -r dummy --meta -p is-managed -v false -- assert $? 0 crm_resource "Create a resource meta attribute" -+ desc="Query new cluster option" -+ cmd="cibadmin -Q -o crm_config | grep cib-bootstrap-options-cluster-delay" -+ test_assert 0 - -- $VALGRIND_CMD crm_resource -r dummy --meta -g is-managed -- assert $? 0 crm_resource "Query a resource meta attribute" -+ desc="Query cluster options" -+ cmd="cibadmin -Q -o crm_config > /tmp/$$.opt.xml" -+ test_assert 0 - -- $VALGRIND_CMD crm_resource -r dummy --meta -d is-managed -- assert $? 0 crm_resource "Remove a resource meta attribute" -+ desc="Set no-quorum policy" -+ cmd="crm_attribute -n no-quorum-policy -v ignore" -+ test_assert 0 - -- $VALGRIND_CMD crm_resource -r dummy -p delay -v 10s -- assert $? 0 crm_resource "Create a resource attribute" -+ desc="Delete nvpair" -+ cmd="cibadmin -D -o crm_config --xml-text ''" -+ test_assert 0 - -- $VALGRIND_CMD crm_resource -L -- assert $? 0 crm_resource "List the configured resources" -+ desc="Create operaton should fail" -+ cmd="cibadmin -C -o crm_config --xml-file /tmp/$$.opt.xml" -+ test_assert 76 - -- crm_failcount -r dummy -v 10 -N clusterNode-UNAME 2>&1 -- assert $? 0 crm_resource "Set a resource's fail-count" -+ desc="Modify cluster options section" -+ cmd="cibadmin -M -o crm_config --xml-file /tmp/$$.opt.xml" -+ test_assert 0 - -- $VALGRIND_CMD crm_resource -r dummy -M 2>&1 -- assert $? 234 crm_resource "Require a destination when migrating a resource that is stopped" -+ desc="Query updated cluster option" -+ cmd="cibadmin -Q -o crm_config | grep cib-bootstrap-options-cluster-delay" -+ test_assert 0 - -- $VALGRIND_CMD crm_resource -r dummy -M -N i.dont.exist 2>&1 -- assert $? 250 crm_resource "Don't support migration to non-existant locations" -+ desc="Set duplicate cluster option" -+ cmd="crm_attribute -n cluster-delay -v 40s -s duplicate" -+ test_assert 0 - -- $VALGRIND_CMD crm_resource -r dummy -M -N clusterNode-UNAME -- assert $? 0 crm_resource "Migrate a resource" -+ desc="Setting multiply defined cluster option should fail" -+ cmd="crm_attribute -n cluster-delay -v 30s" -+ test_assert 76 - -- $VALGRIND_CMD crm_resource -r dummy -U -- assert $? 0 crm_resource "Un-migrate a resource" -+ desc="Set cluster option with -s" -+ cmd="crm_attribute -n cluster-delay -v 30s -s duplicate" -+ test_assert 0 - -- $VALGRIND_CMD crm_ticket -t ticketA -G granted -d false -- assert $? 0 crm_ticket "Default ticket granted state" -+ desc="Delete cluster option with -i" -+ cmd="crm_attribute -n cluster-delay -D -i cib-bootstrap-options-cluster-delay" -+ test_assert 0 - -- $VALGRIND_CMD crm_ticket -t ticketA -r --force -- assert $? 0 crm_ticket "Set ticket granted state" -+ desc="Create node1 and bring it online" -+ cmd="crm_simulate --live-check --in-place --node-up=node1" -+ test_assert 0 -+ -+ desc="Create node attribute" -+ cmd="crm_attribute -n ram -v 1024M -U node1 -t nodes" -+ test_assert 0 -+ -+ desc="Query new node attribute" -+ cmd="cibadmin -Q -o nodes | grep node1-ram" -+ test_assert 0 -+ -+ desc="Digest calculation" -+ cmd="cibadmin -Q | cibadmin -5 -p 2>&1 > /dev/null" -+ test_assert 0 -+ -+ # This update will fail because it has version numbers -+ desc="Replace operation should fail" -+ cmd="cibadmin -R --xml-file /tmp/$$.existing.xml" -+ test_assert 205 -+ -+ desc="Default standby value" -+ cmd="crm_standby -N node1 -G" -+ test_assert 0 -+ -+ desc="Set standby status" -+ cmd="crm_standby -N node1 -v true" -+ test_assert 0 -+ -+ desc="Query standby value" -+ cmd="crm_standby -N node1 -G" -+ test_assert 0 -+ -+ desc="Delete standby value" -+ cmd="crm_standby -N node1 -D" -+ test_assert 0 -+ -+ desc="Create a resource" -+ cmd="cibadmin -C -o resources --xml-text ''" -+ test_assert 0 -+ -+ desc="Create a resource meta attribute" -+ cmd="crm_resource -r dummy --meta -p is-managed -v false" -+ test_assert 0 -+ -+ desc="Query a resource meta attribute" -+ cmd="crm_resource -r dummy --meta -g is-managed" -+ test_assert 0 -+ -+ desc="Remove a resource meta attribute" -+ cmd="crm_resource -r dummy --meta -d is-managed" -+ test_assert 0 -+ -+ desc="Create a resource attribute" -+ cmd="crm_resource -r dummy -p delay -v 10s" -+ test_assert 0 -+ -+ desc="List the configured resources" -+ cmd="crm_resource -L" -+ test_assert 0 -+ -+ desc="Set a resource's fail-count" -+ cmd="crm_failcount -r dummy -v 10 -N node1" -+ test_assert 0 -+ -+ desc="Require a destination when migrating a resource that is stopped" -+ cmd="crm_resource -r dummy -M" -+ test_assert 22 -+ -+ desc="Don't support migration to non-existant locations" -+ cmd="crm_resource -r dummy -M -N i.dont.exist" -+ test_assert 6 -+ -+ desc="Create a fencing resource" -+ cmd="cibadmin -C -o resources --xml-text ''" -+ test_assert 0 -+ -+ desc="Bring resources online" -+ cmd="crm_simulate --live-check --in-place -S" -+ test_assert 0 -+ -+ desc="Try to move a resource to its existing location" -+ cmd="crm_resource -r dummy --move --host node1" -+ test_assert 22 -+ -+ desc="Move a resource from its existing location" -+ cmd="crm_resource -r dummy --move" -+ test_assert 0 -+ -+ desc="Clear out constraints generated by --move" -+ cmd="crm_resource -r dummy --clear" -+ test_assert 0 -+ -+ desc="Default ticket granted state" -+ cmd="crm_ticket -t ticketA -G granted -d false" -+ test_assert 0 -+ -+ desc="Set ticket granted state" -+ cmd="crm_ticket -t ticketA -r --force" -+ test_assert 0 -+ -+ desc="Query ticket granted state" -+ cmd="crm_ticket -t ticketA -G granted" -+ test_assert 0 -+ -+ desc="Delete ticket granted state" -+ cmd="crm_ticket -t ticketA -D granted --force" -+ test_assert 0 -+ -+ desc="Make a ticket standby" -+ cmd="crm_ticket -t ticketA -s" -+ test_assert 0 -+ -+ desc="Query ticket standby state" -+ cmd="crm_ticket -t ticketA -G standby" -+ test_assert 0 -+ -+ desc="Activate a ticket" -+ cmd="crm_ticket -t ticketA -a" -+ test_assert 0 -+ -+ desc="Delete ticket standby state" -+ cmd="crm_ticket -t ticketA -D standby" -+ test_assert 0 -+ -+ desc="Ban a resource on unknown node" -+ cmd="crm_resource -r dummy -B -N host1" -+ test_assert 6 -+ -+ desc="Create two more nodes and bring them online" -+ cmd="crm_simulate --live-check --in-place --node-up=node2 --node-up=node3" -+ test_assert 0 -+ -+ desc="Ban dummy from node1" -+ cmd="crm_resource -r dummy -B -N node1" -+ test_assert 0 - -- $VALGRIND_CMD crm_ticket -t ticketA -G granted -- assert $? 0 crm_ticket "Query ticket granted state" -- -- $VALGRIND_CMD crm_ticket -t ticketA -D granted --force -- assert $? 0 crm_ticket "Delete ticket granted state" -+ desc="Ban dummy from node2" -+ cmd="crm_resource -r dummy -B -N node2" -+ test_assert 0 - -- $VALGRIND_CMD crm_ticket -t ticketA -s -- assert $? 0 crm_ticket "Make a ticket standby" -+ desc="Relocate resources due to ban" -+ cmd="crm_simulate --live-check --in-place -S" -+ test_assert 0 - -- $VALGRIND_CMD crm_ticket -t ticketA -G standby -- assert $? 0 crm_ticket "Query ticket standby state" -- -- $VALGRIND_CMD crm_ticket -t ticketA -a -- assert $? 0 crm_ticket "Activate a ticket" -+ desc="Move dummy to node1" -+ cmd="crm_resource -r dummy -M -N node1" -+ test_assert 0 - -- $VALGRIND_CMD crm_ticket -t ticketA -D standby -- assert $? 0 crm_ticket "Delete ticket standby state" -+ desc="Clear implicit constraints for dummy on node2" -+ cmd="crm_resource -r dummy -U -N node2" -+ test_assert 0 - } - - function test_date() { --# $VALGRIND_CMD cibadmin -Q - for y in 06 07 08 09 10 11 12 13 14 15 16 17 18; do -- $VALGRIND_CMD iso8601 -d "20$y-W01-7 00Z" -- $VALGRIND_CMD iso8601 -d "20$y-W01-7 00Z" -W -E "20$y-W01-7 00:00:00Z" -- assert $? 0 iso8601 "20$y-W01-7" 0 -- $VALGRIND_CMD iso8601 -d "20$y-W01-1 00Z" -- $VALGRIND_CMD iso8601 -d "20$y-W01-1 00Z" -W -E "20$y-W01-1 00:00:00Z" -- assert $? 0 iso8601 "20$y-W01-1" 0 -+ desc="20$y-W01-7" -+ cmd="iso8601 -d '20$y-W01-7 00Z'" -+ test_assert 0 0 -+ -+ desc="20$y-W01-7 - round-trip" -+ cmd="iso8601 -d '20$y-W01-7 00Z' -W -E '20$y-W01-7 00:00:00Z'" -+ test_assert 0 0 -+ -+ desc="20$y-W01-1" -+ cmd="iso8601 -d '20$y-W01-1 00Z'" -+ test_assert 0 0 -+ -+ desc="20$y-W01-1 - round-trip" -+ cmd="iso8601 -d '20$y-W01-1 00Z' -W -E '20$y-W01-1 00:00:00Z'" -+ test_assert 0 0 - done - -- $VALGRIND_CMD iso8601 -d "2009-W53-7 00:00:00Z" -W -E "2009-W53-7 00:00:00Z" -- assert $? 0 iso8601 "2009-W53-07" 0 -+ desc="2009-W53-07" -+ cmd="iso8601 -d '2009-W53-7 00:00:00Z' -W -E '2009-W53-7 00:00:00Z'" -+ test_assert 0 0 - -- $VALGRIND_CMD iso8601 -d "2009-01-31 00:00:00Z" -D "P1M" -E "2009-02-28 00:00:00Z" -- assert $? 0 iso8601 "2009-01-31 + 1 Month" 0 -+ desc="2009-01-31 + 1 Month" -+ cmd="iso8601 -d '2009-01-31 00:00:00Z' -D P1M -E '2009-02-28 00:00:00Z'" -+ test_assert 0 0 - -- $VALGRIND_CMD iso8601 -d "2009-01-31 00:00:00Z" -D "P2M" -E "2009-03-31 00:00:00Z" -- assert $? 0 iso8601 "2009-01-31 + 2 Months" 0 -+ desc="2009-01-31 + 2 Months" -+ cmd="iso8601 -d '2009-01-31 00:00:00Z' -D P2M -E '2009-03-31 00:00:00Z'" -+ test_assert 0 0 - -- $VALGRIND_CMD iso8601 -d "2009-01-31 00:00:00Z" -D "P3M" -E "2009-04-30 00:00:00Z" -- assert $? 0 iso8601 "2009-01-31 + 3 Months" 0 -+ desc="2009-01-31 + 3 Months" -+ cmd="iso8601 -d '2009-01-31 00:00:00Z' -D P3M -E '2009-04-30 00:00:00Z'" -+ test_assert 0 0 - -- $VALGRIND_CMD iso8601 -d "2009-03-31 00:00:00Z" -D "P-1M" -E "2009-02-28 00:00:00Z" -- assert $? 0 iso8601 "2009-03-31 - 1 Month" 0 -+ desc="2009-03-31 - 1 Month" -+ cmd="iso8601 -d '2009-03-31 00:00:00Z' -D P-1M -E '2009-02-28 00:00:00Z'" -+ test_assert 0 0 - } - - echo "Testing dates" -@@ -231,24 +332,30 @@ test_date > $test_home/regression.out - echo "Testing tools" - test_tools >> $test_home/regression.out - sed -i.sed 's/cib-last-written.*>/>/' $test_home/regression.out -+sed -i.sed 's/ last-run=\"[0-9]*\"//' $test_home/regression.out -+sed -i.sed 's/ last-rc-change=\"[0-9]*\"//' $test_home/regression.out - - if [ $do_save = 1 ]; then - cp $test_home/regression.out $test_home/regression.exp - fi - -+failed=0 -+ -+echo -e "\n\nResults" -+diff -wu $test_home/regression.exp $test_home/regression.out -+if [ $? != 0 ]; then -+ failed=1 -+fi -+ -+echo -e "\n\nSummary" - grep -e "^*" $test_home/regression.out - - if [ $num_errors != 0 ]; then - echo $num_errors tests failed -- diff -u $test_home/regression.exp $test_home/regression.out - exit 1 --fi -- --diff -u $test_home/regression.exp $test_home/regression.out --if [ $? != 0 ]; then -+elif [ $failed = 1 ]; then - echo $num_passed tests passed but diff failed - exit 2 -- - else - echo $num_passed tests passed - exit 0 -diff --git a/tools/report.collector b/tools/report.collector -index e4d1013..41ff00a 100644 ---- a/tools/report.collector -+++ b/tools/report.collector -@@ -352,7 +352,7 @@ pkg_ver() { - # for Linux .deb based systems - case $pkg_mgr in - deb) -- dpkg-query -f '${Package} ${Version} ${Architecture}\n' -W -+ dpkg-query -f '${Package} ${Version} ${Architecture}\n' -W | sort - for pkg in $*; do - if dpkg-query -W $pkg 2>/dev/null ; then - debug "Verifying installation of: $pkg" -@@ -362,7 +362,7 @@ pkg_ver() { - done - ;; - rpm) -- rpm -qa --qf '%{name} %{version}-%{release} - %{distribution} %{arch}\n' -+ rpm -qa --qf '%{name} %{version}-%{release} - %{distribution} %{arch}\n' | sort - for pkg in $*; do - if rpm -q $pkg >/dev/null 2>&1 ; then - debug "Verifying installation of: $pkg" -@@ -738,7 +738,12 @@ for l in $logfiles $EXTRA_LOGS; do - fi - done - --if [ "$REPORT_MASTER" != "$REPORT_TARGET" ]; then -+if [ -e $REPORT_HOME/.env ]; then -+ debug "Localhost: $REPORT_MASTER $REPORT_TARGET" -+ # Need to send something back or tar on the caller will complain -+ (cd $REPORT_HOME && tar cf - .env) -+ -+elif [ "$REPORT_MASTER" != "$REPORT_TARGET" ]; then - debug "Streaming report back to $REPORT_MASTER" - (cd $REPORT_HOME && tar cf - $REPORT_TARGET) - if [ "$REMOVE" = "1" ]; then -diff --git a/tools/report.common b/tools/report.common -deleted file mode 100644 -index 0e3b945..0000000 ---- a/tools/report.common -+++ /dev/null -@@ -1,742 +0,0 @@ -- # Copyright (C) 2007 Dejan Muhamedagic -- # Almost everything as part of hb_report -- # Copyright (C) 2010 Andrew Beekhof -- # Cleanups, refactoring, extensions -- # -- # -- # This program is free software; you can redistribute it and/or -- # modify it under the terms of the GNU General Public -- # License as published by the Free Software Foundation; either -- # version 2.1 of the License, or (at your option) any later version. -- # -- # This software is distributed in the hope that it will be useful, -- # but WITHOUT ANY WARRANTY; without even the implied warranty of -- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -- # General Public License for more details. -- # -- # You should have received a copy of the GNU General Public -- # License along with this library; if not, write to the Free Software -- # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -- # -- --host=`uname -n` --shorthost=`echo $host | sed s:\\\\..*::` --if [ -z $verbose ]; then -- verbose=0 --fi -- --# Target Files --EVENTS_F=events.txt --ANALYSIS_F=analysis.txt --DESCRIPTION_F=description.txt --HALOG_F=cluster-log.txt --BT_F=backtraces.txt --SYSINFO_F=sysinfo.txt --SYSSTATS_F=sysstats.txt --DLM_DUMP_F=dlm_dump.txt --CRM_MON_F=crm_mon.txt --MEMBERSHIP_F=members.txt --HB_UUID_F=hb_uuid.txt --HOSTCACHE=hostcache --CRM_VERIFY_F=crm_verify.txt --PERMISSIONS_F=permissions.txt --CIB_F=cib.xml --CIB_TXT_F=cib.txt -- --EVENT_PATTERNS=" --state do_state_transition --membership pcmk_peer_update.*(lost|memb): --quorum crmd.*crm_update_quorum|crmd.*ais.disp.*quorum.(lost|ac?quir) --pause Process.pause.detected --resources lrmd.*rsc:(start|stop) --stonith te_fence_node|stonith-ng.*log_oper.*report|stonithd.*(requests|(Succeeded|Failed).to.STONITH|result=) --start_stop sutdown.decision|Starting.heartbeat|Corosync.Cluster.Engine|corosync.*Initializing.transport|Executive.Service.RELEASE|crm_shutdown:.Requesting.shutdown|pcmk_shutdown:.Shutdown.complete --" -- --PACKAGES="pacemaker pacemaker-libs libpacemaker3 --pacemaker-pygui pacemaker-pymgmt pymgmt-client --openais libopenais2 libopenais3 corosync libcorosync4 --resource-agents cluster-glue-libs cluster-glue libglue2 ldirectord --heartbeat heartbeat-common heartbeat-resources libheartbeat2 --ocfs2-tools ocfs2-tools-o2cb ocfs2console --ocfs2-kmp-default ocfs2-kmp-pae ocfs2-kmp-xen ocfs2-kmp-debug ocfs2-kmp-trace --drbd drbd-kmp-xen drbd-kmp-pae drbd-kmp-default drbd-kmp-debug drbd-kmp-trace --drbd-heartbeat drbd-pacemaker drbd-utils drbd-bash-completion drbd-xen --lvm2 lvm2-clvm cmirrord --libdlm libdlm2 libdlm3 --hawk ruby lighttpd --kernel-default kernel-pae kernel-xen --glibc --" -- --# --# keep the user posted --# -- --log() { -- printf "%-10s $*\n" "$shorthost:" 1>&2 --} -- --debug() { -- if [ $verbose -gt 0 ]; then -- log "Debug: $*" -- fi --} -- --info() { -- log "$*" --} -- --warning() { -- log "WARN: $*" --} -- --fatal() { -- log "ERROR: $*" -- exit 1 --} -- --detect_host() { -- -- depth="-maxdepth 5" -- local_state_dir=/var -- -- if [ -d $local_state_dir/run ]; then -- CRM_STATE_DIR=$local_state_dir/run/crm -- else -- info "Searching for where Pacemaker keeps runtime data... this may take a while" -- for d in `find / $depth -type d -name run`; do -- local_state_dir=`dirname $d` -- CRM_STATE_DIR=$d/crm -- break -- done -- info "Found: $CRM_STATE_DIR" -- fi -- debug "Machine runtime directory: $local_state_dir" -- debug "Pacemaker runtime data located in: $CRM_STATE_DIR" -- -- CRM_DAEMON_DIR= -- for p in /usr /usr/local /opt/local; do -- for d in libexec lib64 lib; do -- if [ -e $p/$d/pacemaker/pengine ]; then -- CRM_DAEMON_DIR=$p/$d/pacemaker -- break -- elif [ -e $p/$d/heartbeat/pengine ]; then -- CRM_DAEMON_DIR=$p/$d/heartbeat -- break -- fi -- done -- done -- -- if [ ! -d $CRM_DAEMON_DIR ]; then -- info "Searching for where Pacemaker daemons live... this may take a while" -- for f in `find / $depth -type f -name pengine`; do -- CRM_DAEMON_DIR=`dirname $f` -- break -- done -- info "Found: $CRM_DAEMON_DIR" -- fi -- -- if [ -z $CRM_DAEMON_DIR ]; then -- fatal "Non-standard Pacemaker installation: daemons not found" -- else -- debug "Pacemaker daemons located under: $CRM_DAEMON_DIR" -- fi -- -- CRM_CONFIG_DIR= -- for d in pacemaker/cib heartbeat/crm; do -- if [ -f $local_state_dir/lib/$d/cib.xml ]; then -- CRM_CONFIG_DIR=$local_state_dir/lib/$d -- break -- fi -- done -- -- if [ ! -d $CRM_CONFIG_DIR ]; then -- info "Detecting where Pacemaker keeps config information... this may take a while" -- for f in `find / $depth -type f -name cib.xml`; do -- CRM_CONFIG_DIR=`dirname $f` -- break -- done -- info "Found: $CRM_CONFIG_DIR" -- fi -- if [ -z $CRM_CONFIG_DIR ]; then -- warning "Non-standard Pacemaker installation: config not found" -- else -- debug "Pacemaker config files located in: $CRM_CONFIG_DIR" -- fi -- -- # Assume new layout -- # $local_state_dir/lib/pacemaker/(cib,pengine,blackbox,cores) -- config_root=`dirname $CRM_CONFIG_DIR` -- -- # Older versions had none -- BLACKBOX_DIR=$config_root/blackbox -- debug "Pacemaker blackboxes (if any) located in: $BLACKBOX_DIR" -- -- PE_STATE_DIR=$config_root/pengine -- if [ ! -d $PE_STATE_DIR ]; then -- info "Detecting where Pacemaker keeps Policy Engine inputs... this may take a while" -- for d in `find / $depth -type d -name pengine`; do -- PE_STATE_DIR=$d -- break -- done -- info "Found: $PE_STATE_DIR" -- fi -- if [ -z $PE_STATE_DIR ]; then -- fatal "Non-standard Pacemaker installation: Policy Engine directory not found" -- else -- debug "PE files located in: $PE_STATE_DIR" -- fi -- -- HA_STATE_DIR=$local_state_dir/lib/heartbeat -- debug "Assuming Heartbeat state files, if any, are located in: $HA_STATE_DIR" -- -- CRM_CORE_DIRS="" -- for d in $config_root/cores $HA_STATE_DIR/cores $local_state_dir/lib/corosync $local_state_dir/lib/openais; do -- if [ -d $d ]; then -- CRM_CORE_DIRS="$CRM_CORE_DIRS $d" -- fi -- done -- debug "Core files located under: $CRM_CORE_DIRS" --} -- --time2str() { -- perl -e "use POSIX; print strftime('%x %X',localtime($1));" --} -- --get_time() { -- perl -e "\$time=\"$*\";" -e ' -- eval "use Date::Parse"; -- if (index($time, ":") < 0) { -- -- } elsif (!$@) { -- print str2time($time); -- } else { -- eval "use Date::Manip"; -- if (!$@) { -- print UnixDate(ParseDateString($time), "%s"); -- } -- } -- ' --} -- --get_time_() { -- warning "Unknown time format used by: $*" --} -- --get_time_syslog() { -- awk '{print $1,$2,$3}' --} -- --get_time_legacy() { -- awk '{print $2}' | sed 's/_/ /' --} -- --get_time_format_for_string() { -- l="$*" -- t=$(get_time `echo $l | get_time_syslog`) -- if [ "x$t" != x ]; then -- echo syslog -- return -- fi -- -- t=$(get_time `echo $l | get_time_legacy`) -- if [ "x$t" != x ]; then -- echo legacy -- return -- fi --} -- --get_time_format() { -- t=0 l="" func="" -- trycnt=10 -- while [ $trycnt -gt 0 ] && read l; do -- func=$(get_time_format_for_string $l) -- if [ "x$func" != x ]; then -- break -- fi -- trycnt=$(($trycnt-1)) -- done -- #debug "Logfile uses the $func time format" -- echo $func --} -- --get_first_time() { -- l="" -- format=$1 -- while read l; do -- t=$(echo $l | get_time_$format) -- ts=$(get_time $t) -- if [ "x$ts" != x ]; then -- echo "$ts" -- return -- fi -- done --} -- --get_last_time() { -- l="" -- best=`date +%s` # Now -- format=$1 -- while read l; do -- t=$(echo $l | get_time_$format) -- ts=$(get_time $t) -- if [ "x$ts" != x ]; then -- best=$ts -- fi -- done -- echo $best --} -- --linetime() { -- l=`tail -n +$2 $1 | grep ":[0-5][0-9]:" | head -1` -- format=`get_time_format_for_string $l` -- t=`echo $l | get_time_$format` -- get_time "$t" --} -- --# Find pattern in a logfile somewhere --# Return $max ordered results by age (newest first) --findmsg() { -- max=$1 -- pattern=$2 -- logfiles="" -- syslogdirs="/var/log /var/logs /var/syslog /var/adm /var/log/ha /var/log/cluster" -- -- for d in $syslogdirs; do -- if [ -d $d ]; then -- logfiles="$logfiles `grep -l -e "$pattern" $d/*`" -- fi -- done 2>/dev/null -- -- if [ "x$logfiles" != "x" ]; then -- list=`ls -t $logfiles | head -n $max | tr '\n' ' '` -- echo $list -- debug "Pattern \'$pattern\' found in: [ $list ]" -- else -- debug "Pattern \'$pattern\' not found anywhere" -- fi --} -- --node_events() { -- if [ -e $1 ]; then -- Epatt=`echo "$EVENT_PATTERNS" | -- while read title p; do [ -n "$p" ] && echo -n "|$p"; done | -- sed 's/.//' -- ` -- grep -E "$Epatt" $1 -- fi --} -- --pickfirst() { -- for x; do -- which $x >/dev/null 2>&1 && { -- echo $x -- return 0 -- } -- done -- return 1 --} -- --shrink() { -- olddir=$PWD -- dir=`dirname $1` -- base=`basename $1` -- -- target=$1.tar -- tar_options="cf" -- -- variant=`pickfirst bzip2 gzip false` -- case $variant in -- bz*) -- tar_options="jcf" -- target="$target.bz2" -- ;; -- gz*) -- tar_options="zcf" -- target="$target.gz" -- ;; -- *) -- warning "Could not find a compression program, the resulting tarball may be huge" -- ;; -- esac -- -- if [ -e $target ]; then -- fatal "Destination $target already exists, specify an alternate name with --dest" -- fi -- -- cd $dir >/dev/null 2>&1 -- tar $tar_options $target $base >/dev/null 2>&1 -- cd $olddir >/dev/null 2>&1 -- -- echo $target --} -- --findln_by_time() { -- local logf=$1 -- local tm=$2 -- local first=1 -- local last=`wc -l < $logf` -- while [ $first -le $last ]; do -- mid=$((($last+$first)/2)) -- trycnt=10 -- while [ $trycnt -gt 0 ]; do -- tmid=`linetime $logf $mid` -- [ "$tmid" ] && break -- warning "cannot extract time: $logf:$mid; will try the next one" -- trycnt=$(($trycnt-1)) -- # shift the whole first-last segment -- first=$(($first-1)) -- last=$(($last-1)) -- mid=$((($last+$first)/2)) -- done -- if [ -z "$tmid" ]; then -- warning "giving up on log..." -- return -- fi -- if [ $tmid -gt $tm ]; then -- last=$(($mid-1)) -- elif [ $tmid -lt $tm ]; then -- first=$(($mid+1)) -- else -- break -- fi -- done -- echo $mid --} -- --dumplog() { -- local logf=$1 -- local from_line=$2 -- local to_line=$3 -- [ "$from_line" ] || -- return -- tail -n +$from_line $logf | -- if [ "$to_line" ]; then -- head -$(($to_line-$from_line+1)) -- else -- cat -- fi --} -- --# --# find log/set of logs which are interesting for us --# --# --# find log slices --# -- --find_decompressor() { -- if echo $1 | grep -qs 'bz2$'; then -- echo "bzip2 -dc" -- elif echo $1 | grep -qs 'gz$'; then -- echo "gzip -dc" -- else -- echo "cat" -- fi --} --# --# check if the log contains a piece of our segment --# --is_our_log() { -- local logf=$1 -- local from_time=$2 -- local to_time=$3 -- -- local cat=`find_decompressor $logf` -- local format=`$cat $logf | get_time_format` -- local first_time=`$cat $logf | head -10 | get_first_time $format` -- local last_time=`$cat $logf | tail -10 | get_last_time $format` -- -- if [ x = "x$first_time" -o x = "x$last_time" ]; then -- warning "Skipping bad logfile '$1': Could not determine log dates" -- return 0 # skip (empty log?) -- fi -- if [ $from_time -gt $last_time ]; then -- # we shouldn't get here anyway if the logs are in order -- return 2 # we're past good logs; exit -- fi -- if [ $from_time -ge $first_time ]; then -- return 3 # this is the last good log -- fi -- # have to go further back -- if [ x = "x$to_time" -o $to_time -ge $first_time ]; then -- return 1 # include this log -- else -- return 0 # don't include this log -- fi --} --# --# go through archived logs (timewise backwards) and see if there --# are lines belonging to us --# (we rely on untouched log files, i.e. that modify time --# hasn't been changed) --# --arch_logs() { -- local logf=$1 -- local from_time=$2 -- local to_time=$3 -- -- # look for files such as: ha-log-20090308 or -- # ha-log-20090308.gz (.bz2) or ha-log.0, etc -- ls -t $logf $logf*[0-9z] 2>/dev/null | -- while read next_log; do -- is_our_log $next_log $from_time $to_time -- case $? in -- 0) ;; # noop, continue -- 1) echo $next_log # include log and continue -- debug "Found log $next_log" -- ;; -- 2) break;; # don't go through older logs! -- 3) echo $next_log # include log and continue -- debug "Found log $next_log" -- break -- ;; # don't go through older logs! -- esac -- done --} -- --# --# print part of the log --# --drop_tmp_file() { -- [ -z "$tmp" ] || rm -f "$tmp" --} -- --print_logseg() { -- local logf=$1 -- local from_time=$2 -- local to_time=$3 -- -- # uncompress to a temp file (if necessary) -- local cat=`find_decompressor $logf` -- if [ "$cat" != "cat" ]; then -- tmp=`mktemp` -- $cat $logf > $tmp -- trap drop_tmp_file 0 -- sourcef=$tmp -- else -- sourcef=$logf -- tmp="" -- fi -- -- if [ "$from_time" = 0 ]; then -- FROM_LINE=1 -- else -- FROM_LINE=`findln_by_time $sourcef $from_time` -- fi -- if [ -z "$FROM_LINE" ]; then -- warning "couldn't find line for time $from_time; corrupt log file?" -- return -- fi -- -- TO_LINE="" -- if [ "$to_time" != 0 ]; then -- TO_LINE=`findln_by_time $sourcef $to_time` -- if [ -z "$TO_LINE" ]; then -- warning "couldn't find line for time $to_time; corrupt log file?" -- return -- fi -- if [ $FROM_LINE -lt $TO_LINE ]; then -- dumplog $sourcef $FROM_LINE $TO_LINE -- log "Including segment [$FROM_LINE-$TO_LINE] from $logf" -- else -- debug "Empty segment [$FROM_LINE-$TO_LINE] from $logf" -- fi -- else -- dumplog $sourcef $FROM_LINE $TO_LINE -- log "Including all logs after line $FROM_LINE from $logf" -- fi -- drop_tmp_file -- trap "" 0 --} -- --# --# find log/set of logs which are interesting for us --# --dumplogset() { -- local logf=$1 -- local from_time=$2 -- local to_time=$3 -- -- local logf_set=`arch_logs $logf $from_time $to_time` -- if [ x = "x$logf_set" ]; then -- return -- fi -- -- local num_logs=`echo "$logf_set" | wc -l` -- local oldest=`echo $logf_set | awk '{print $NF}'` -- local newest=`echo $logf_set | awk '{print $1}'` -- local mid_logfiles=`echo $logf_set | awk '{for(i=NF-1; i>1; i--) print $i}'` -- -- # the first logfile: from $from_time to $to_time (or end) -- # logfiles in the middle: all -- # the last logfile: from beginning to $to_time (or end) -- case $num_logs in -- 1) print_logseg $newest $from_time $to_time;; -- *) -- print_logseg $oldest $from_time 0 -- for f in $mid_logfiles; do -- `find_decompressor $f` $f -- debug "including complete $f logfile" -- done -- print_logseg $newest 0 $to_time -- ;; -- esac --} -- --# cut out a stanza --getstanza() { -- awk -v name="$1" ' -- !in_stanza && NF==2 && /^[a-z][a-z]*[[:space:]]*{/ { # stanza start -- if ($1 == name) -- in_stanza = 1 -- } -- in_stanza { print } -- in_stanza && NF==1 && $1 == "}" { exit } -- ' --} --# supply stanza in $1 and variable name in $2 --# (stanza is optional) --getcfvar() { -- cf_type=$1; shift; -- cf_var=$1; shift; -- cf_file=$* -- -- [ -f "$cf_file" ] || return -- case $cf_type in -- cman) -- grep $cf_var $cf_file | sed s/.*$cf_var=\"// | sed s/\".*// -- ;; -- corosync|openais) -- sed 's/#.*//' < $cf_file | -- if [ $# -eq 2 ]; then -- getstanza "$cf_var" -- shift 1 -- else -- cat -- fi | -- awk -v varname="$cf_var" ' -- NF==2 && match($1,varname":$")==1 { print $2; exit; } -- ' -- ;; -- heartbeat) -- sed 's/#.*//' < $cf_file | -- grep -w "^$cf_var" | -- sed 's/^[^[:space:]]*[[:space:]]*//' -- -- ;; -- logd) -- sed 's/#.*//' < $cf_file | -- grep -w "^$cf_var" | -- sed 's/^[^[:space:]]*[[:space:]]*//' -- -- ;; -- esac --} -- --pickfirst() { -- for x; do -- which $x >/dev/null 2>&1 && { -- echo $x -- return 0 -- } -- done -- return 1 --} -- --# --# figure out the cluster type, depending on the process list --# and existence of configuration files --# --get_cluster_type() { -- if -- ps -ef | egrep -qs '[c]orosync' -- then -- tool=`pickfirst corosync-objctl corosync-cmapctl` -- case $tool in -- *objctl) quorum=`$tool -a | grep quorum.provider | sed s/.*=//`;; -- *cmapctl) quorum=`$tool | grep quorum.provider | sed s/.*=//`;; -- esac -- if [ x"$quorum" = x"quorum_cman" ]; then -- stack="cman" -- else -- stack="corosync" -- fi -- -- elif -- ps -ef | egrep -qs '[a]isexec' -- then -- stack="openais" -- elif -- ps -ef | grep -v -e grep -e "eartbeat/[clasp]" | egrep -qs '[h]eartbeat' -- then -- stack="heartbeat" -- -- # Now we're guessing... -- -- elif [ -f /etc/cluster/cluster.conf ]; then -- stack="cman" -- -- # TODO: Technically these could be anywhere :-/ -- elif [ -f /etc/corosync/corosync.conf ]; then -- stack="corosync" -- -- elif [ -f /etc/ais/openais.conf ]; then -- stack="openais" -- -- else -- stack="heartbeat" -- fi -- -- debug "Detected the '$stack' cluster stack" -- echo $stack --} -- --find_cluster_cf() { -- case $1 in -- cman) echo "/etc/cluster/cluster.conf";; -- corosync) -- best_size=0 -- best_file="" -- -- # TODO: Technically these could be anywhere :-/ -- for cf in /etc/ais/openais.conf /etc/corosync/corosync.conf; do -- if [ -f $cf ]; then -- size=`wc -l $cf | awk '{print $1}'` -- if [ $size -gt $best_size ]; then -- best_size=$size -- best_file=$cf -- fi -- fi -- done -- echo "$best_file" -- ;; -- openais) -- # TODO: Technically it could be anywhere :-/ -- cf="/etc/ais/openais.conf" -- if [ -f $cf ]; then -- echo "$cf" -- fi -- ;; -- heartbeat) -- cf="/etc/ha.d/ha.cf" -- if [ -f $cf ]; then -- echo "$cf" -- fi -- ;; -- *) -- warning "Unknown cluster type: $1" -- ;; -- esac --} -- --# --# check for the major prereq for a) parameter parsing and b) --# parsing logs --# --t=`get_time "12:00"` --if [ "$t" = "" ]; then -- fatal "please install the perl Date::Parse module (perl-DateTime-Format-DateParse on Fedora/Red Hat)" --fi -- -diff --git a/tools/report.common.in b/tools/report.common.in -new file mode 100644 -index 0000000..c4023a8 ---- /dev/null -+++ b/tools/report.common.in -@@ -0,0 +1,760 @@ -+ # Copyright (C) 2007 Dejan Muhamedagic -+ # Almost everything as part of hb_report -+ # Copyright (C) 2010 Andrew Beekhof -+ # Cleanups, refactoring, extensions -+ # -+ # -+ # This program is free software; you can redistribute it and/or -+ # modify it under the terms of the GNU General Public -+ # License as published by the Free Software Foundation; either -+ # version 2.1 of the License, or (at your option) any later version. -+ # -+ # This software is distributed in the hope that it will be useful, -+ # but WITHOUT ANY WARRANTY; without even the implied warranty of -+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ # General Public License for more details. -+ # -+ # You should have received a copy of the GNU General Public -+ # License along with this library; if not, write to the Free Software -+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -+ # -+ -+host=`uname -n` -+shorthost=`echo $host | sed s:\\\\..*::` -+if [ -z $verbose ]; then -+ verbose=0 -+fi -+ -+# Target Files -+EVENTS_F=events.txt -+ANALYSIS_F=analysis.txt -+DESCRIPTION_F=description.txt -+HALOG_F=cluster-log.txt -+BT_F=backtraces.txt -+SYSINFO_F=sysinfo.txt -+SYSSTATS_F=sysstats.txt -+DLM_DUMP_F=dlm_dump.txt -+CRM_MON_F=crm_mon.txt -+MEMBERSHIP_F=members.txt -+HB_UUID_F=hb_uuid.txt -+HOSTCACHE=hostcache -+CRM_VERIFY_F=crm_verify.txt -+PERMISSIONS_F=permissions.txt -+CIB_F=cib.xml -+CIB_TXT_F=cib.txt -+ -+EVENT_PATTERNS=" -+state do_state_transition -+membership pcmk_peer_update.*(lost|memb): -+quorum crmd.*crm_update_quorum|crmd.*ais.disp.*quorum.(lost|ac?quir) -+pause Process.pause.detected -+resources lrmd.*rsc:(start|stop) -+stonith te_fence_node|stonith-ng.*log_oper.*report|stonithd.*(requests|(Succeeded|Failed).to.STONITH|result=) -+start_stop sutdown.decision|Starting.heartbeat|Corosync.Cluster.Engine|corosync.*Initializing.transport|Executive.Service.RELEASE|crm_shutdown:.Requesting.shutdown|pcmk_shutdown:.Shutdown.complete -+" -+ -+PACKAGES="pacemaker pacemaker-libs libpacemaker3 -+pacemaker-pygui pacemaker-pymgmt pymgmt-client -+openais libopenais2 libopenais3 corosync libcorosync4 -+resource-agents cluster-glue-libs cluster-glue libglue2 ldirectord -+heartbeat heartbeat-common heartbeat-resources libheartbeat2 -+ocfs2-tools ocfs2-tools-o2cb ocfs2console -+ocfs2-kmp-default ocfs2-kmp-pae ocfs2-kmp-xen ocfs2-kmp-debug ocfs2-kmp-trace -+drbd drbd-kmp-xen drbd-kmp-pae drbd-kmp-default drbd-kmp-debug drbd-kmp-trace -+drbd-heartbeat drbd-pacemaker drbd-utils drbd-bash-completion drbd-xen -+lvm2 lvm2-clvm cmirrord -+libdlm libdlm2 libdlm3 -+hawk ruby lighttpd -+kernel-default kernel-pae kernel-xen -+glibc -+" -+ -+# -+# keep the user posted -+# -+ -+log() { -+ printf "%-10s $*\n" "$shorthost:" 1>&2 -+} -+ -+debug() { -+ if [ $verbose -gt 0 ]; then -+ log "Debug: $*" -+ fi -+} -+ -+info() { -+ log "$*" -+} -+ -+warning() { -+ log "WARN: $*" -+} -+ -+fatal() { -+ log "ERROR: $*" -+ exit 1 -+} -+ -+detect_host() { -+ if [ -z "$maxdepth" ]; then -+ depth="-maxdepth 5" -+ else -+ depth="-maxdepth $maxdepth" -+ fi -+ -+ local_state_dir=@localstatedir@ -+ -+ if [ -d $local_state_dir/run ]; then -+ CRM_STATE_DIR=$local_state_dir/run/crm -+ else -+ info "Searching for where Pacemaker keeps runtime data... this may take a while" -+ for d in `find / $depth -type d -name run`; do -+ local_state_dir=`dirname $d` -+ CRM_STATE_DIR=$d/crm -+ break -+ done -+ info "Found: $CRM_STATE_DIR" -+ fi -+ debug "Machine runtime directory: $local_state_dir" -+ debug "Pacemaker runtime data located in: $CRM_STATE_DIR" -+ -+ CRM_DAEMON_DIR= -+ for p in /usr /usr/local /opt/local @exec_prefix@; do -+ for d in libexec lib64 lib; do -+ if [ -e $p/$d/pacemaker/pengine ]; then -+ CRM_DAEMON_DIR=$p/$d/pacemaker -+ break -+ elif [ -e $p/$d/heartbeat/pengine ]; then -+ CRM_DAEMON_DIR=$p/$d/heartbeat -+ break -+ fi -+ done -+ done -+ -+ if [ ! -d $CRM_DAEMON_DIR ]; then -+ info "Searching for where Pacemaker daemons live... this may take a while" -+ for f in `find / $depth -type f -name pengine`; do -+ CRM_DAEMON_DIR=`dirname $f` -+ break -+ done -+ info "Found: $CRM_DAEMON_DIR" -+ fi -+ -+ if [ -z $CRM_DAEMON_DIR ]; then -+ fatal "Non-standard Pacemaker installation: daemons not found" -+ else -+ debug "Pacemaker daemons located under: $CRM_DAEMON_DIR" -+ fi -+ -+ CRM_CONFIG_DIR= -+ for d in pacemaker/cib heartbeat/crm; do -+ if [ -f $local_state_dir/lib/$d/cib.xml ]; then -+ CRM_CONFIG_DIR=$local_state_dir/lib/$d -+ break -+ fi -+ done -+ -+ if [ ! -d $CRM_CONFIG_DIR ]; then -+ info "Detecting where Pacemaker keeps config information... this may take a while" -+ for f in `find / $depth -type f -name cib.xml`; do -+ CRM_CONFIG_DIR=`dirname $f` -+ break -+ done -+ info "Found: $CRM_CONFIG_DIR" -+ fi -+ if [ -z $CRM_CONFIG_DIR ]; then -+ warning "Non-standard Pacemaker installation: config not found" -+ else -+ debug "Pacemaker config files located in: $CRM_CONFIG_DIR" -+ fi -+ -+ # Assume new layout -+ # $local_state_dir/lib/pacemaker/(cib,pengine,blackbox,cores) -+ config_root=`dirname $CRM_CONFIG_DIR` -+ -+ # Older versions had none -+ BLACKBOX_DIR=$config_root/blackbox -+ debug "Pacemaker blackboxes (if any) located in: $BLACKBOX_DIR" -+ -+ PE_STATE_DIR=$config_root/pengine -+ if [ ! -d $PE_STATE_DIR ]; then -+ info "Detecting where Pacemaker keeps Policy Engine inputs... this may take a while" -+ for d in `find / $depth -type d -name pengine`; do -+ PE_STATE_DIR=$d -+ break -+ done -+ info "Found: $PE_STATE_DIR" -+ fi -+ if [ -z $PE_STATE_DIR ]; then -+ fatal "Non-standard Pacemaker installation: Policy Engine directory not found" -+ else -+ debug "PE files located in: $PE_STATE_DIR" -+ fi -+ -+ HA_STATE_DIR=$local_state_dir/lib/heartbeat -+ debug "Assuming Heartbeat state files, if any, are located in: $HA_STATE_DIR" -+ -+ CRM_CORE_DIRS="" -+ for d in $config_root/cores $HA_STATE_DIR/cores $local_state_dir/lib/corosync $local_state_dir/lib/openais; do -+ if [ -d $d ]; then -+ CRM_CORE_DIRS="$CRM_CORE_DIRS $d" -+ fi -+ done -+ debug "Core files located under: $CRM_CORE_DIRS" -+} -+ -+time2str() { -+ perl -e "use POSIX; print strftime('%x %X',localtime($1));" -+} -+ -+get_time() { -+ perl -e "\$time=\"$*\";" -e ' -+ eval "use Date::Parse"; -+ if (index($time, ":") < 0) { -+ -+ } elsif (!$@) { -+ print str2time($time); -+ } else { -+ eval "use Date::Manip"; -+ if (!$@) { -+ print UnixDate(ParseDateString($time), "%s"); -+ } -+ } -+ ' -+} -+ -+get_time_() { -+ warning "Unknown time format used by: $*" -+} -+ -+get_time_syslog() { -+ awk '{print $1,$2,$3}' -+} -+ -+get_time_legacy() { -+ awk '{print $2}' | sed 's/_/ /' -+} -+ -+get_time_format_for_string() { -+ l="$*" -+ t=$(get_time `echo $l | get_time_syslog`) -+ if [ "x$t" != x ]; then -+ echo syslog -+ return -+ fi -+ -+ t=$(get_time `echo $l | get_time_legacy`) -+ if [ "x$t" != x ]; then -+ echo legacy -+ return -+ fi -+} -+ -+get_time_format() { -+ t=0 l="" func="" -+ trycnt=10 -+ while [ $trycnt -gt 0 ] && read l; do -+ func=$(get_time_format_for_string $l) -+ if [ "x$func" != x ]; then -+ break -+ fi -+ trycnt=$(($trycnt-1)) -+ done -+ #debug "Logfile uses the $func time format" -+ echo $func -+} -+ -+get_first_time() { -+ l="" -+ format=$1 -+ while read l; do -+ t=$(echo $l | get_time_$format) -+ ts=$(get_time $t) -+ if [ "x$ts" != x ]; then -+ echo "$ts" -+ return -+ fi -+ done -+} -+ -+get_last_time() { -+ l="" -+ best=`date +%s` # Now -+ format=$1 -+ while read l; do -+ t=$(echo $l | get_time_$format) -+ ts=$(get_time $t) -+ if [ "x$ts" != x ]; then -+ best=$ts -+ fi -+ done -+ echo $best -+} -+ -+linetime() { -+ l=`tail -n +$2 $1 | grep ":[0-5][0-9]:" | head -1` -+ format=`get_time_format_for_string $l` -+ t=`echo $l | get_time_$format` -+ get_time "$t" -+} -+ -+# Find pattern in a logfile somewhere -+# Return $max ordered results by age (newest first) -+findmsg() { -+ max=$1 -+ pattern=$2 -+ logfiles="" -+ syslogdirs="/var/log /var/logs /var/syslog /var/adm /var/log/ha /var/log/cluster" -+ -+ for d in $syslogdirs; do -+ if [ -d $d ]; then -+ files=`find $d -type f -maxdepth 1` -+ for f in $files; do -+ local cat=`find_decompressor $f` -+ $cat $f | grep -l -e "$pattern" -+ if [ $? = 0 ]; then -+ logfiles="$logfiles $f" -+ fi -+ done -+ fi -+ done 2>/dev/null -+ -+ if [ "x$logfiles" != "x" ]; then -+ list=`ls -t $logfiles | head -n $max | tr '\n' ' '` -+ echo $list -+ debug "Pattern \'$pattern\' found in: [ $list ]" -+ else -+ debug "Pattern \'$pattern\' not found anywhere" -+ fi -+} -+ -+node_events() { -+ if [ -e $1 ]; then -+ Epatt=`echo "$EVENT_PATTERNS" | -+ while read title p; do [ -n "$p" ] && echo -n "|$p"; done | -+ sed 's/.//' -+ ` -+ grep -E "$Epatt" $1 -+ fi -+} -+ -+pickfirst() { -+ for x; do -+ which $x >/dev/null 2>&1 && { -+ echo $x -+ return 0 -+ } -+ done -+ return 1 -+} -+ -+shrink() { -+ olddir=$PWD -+ dir=`dirname $1` -+ base=`basename $1` -+ -+ target=$1.tar -+ tar_options="cf" -+ -+ variant=`pickfirst bzip2 gzip false` -+ case $variant in -+ bz*) -+ tar_options="jcf" -+ target="$target.bz2" -+ ;; -+ gz*) -+ tar_options="zcf" -+ target="$target.gz" -+ ;; -+ *) -+ warning "Could not find a compression program, the resulting tarball may be huge" -+ ;; -+ esac -+ -+ if [ -e $target ]; then -+ fatal "Destination $target already exists, specify an alternate name with --dest" -+ fi -+ -+ cd $dir >/dev/null 2>&1 -+ tar $tar_options $target $base >/dev/null 2>&1 -+ cd $olddir >/dev/null 2>&1 -+ -+ echo $target -+} -+ -+findln_by_time() { -+ local logf=$1 -+ local tm=$2 -+ local first=1 -+ local last=`wc -l < $logf` -+ while [ $first -le $last ]; do -+ mid=$((($last+$first)/2)) -+ trycnt=10 -+ while [ $trycnt -gt 0 ]; do -+ tmid=`linetime $logf $mid` -+ [ "$tmid" ] && break -+ warning "cannot extract time: $logf:$mid; will try the next one" -+ trycnt=$(($trycnt-1)) -+ # shift the whole first-last segment -+ first=$(($first-1)) -+ last=$(($last-1)) -+ mid=$((($last+$first)/2)) -+ done -+ if [ -z "$tmid" ]; then -+ warning "giving up on log..." -+ return -+ fi -+ if [ $tmid -gt $tm ]; then -+ last=$(($mid-1)) -+ elif [ $tmid -lt $tm ]; then -+ first=$(($mid+1)) -+ else -+ break -+ fi -+ done -+ echo $mid -+} -+ -+dumplog() { -+ local logf=$1 -+ local from_line=$2 -+ local to_line=$3 -+ [ "$from_line" ] || -+ return -+ tail -n +$from_line $logf | -+ if [ "$to_line" ]; then -+ head -$(($to_line-$from_line+1)) -+ else -+ cat -+ fi -+} -+ -+# -+# find log/set of logs which are interesting for us -+# -+# -+# find log slices -+# -+ -+find_decompressor() { -+ if echo $1 | grep -qs 'bz2$'; then -+ echo "bzip2 -dc" -+ elif echo $1 | grep -qs 'gz$'; then -+ echo "gzip -dc" -+ else -+ echo "cat" -+ fi -+} -+# -+# check if the log contains a piece of our segment -+# -+is_our_log() { -+ local logf=$1 -+ local from_time=$2 -+ local to_time=$3 -+ -+ local cat=`find_decompressor $logf` -+ local format=`$cat $logf | get_time_format` -+ local first_time=`$cat $logf | head -10 | get_first_time $format` -+ local last_time=`$cat $logf | tail -10 | get_last_time $format` -+ -+ if [ x = "x$first_time" -o x = "x$last_time" ]; then -+ warning "Skipping bad logfile '$1': Could not determine log dates" -+ return 0 # skip (empty log?) -+ fi -+ if [ $from_time -gt $last_time ]; then -+ # we shouldn't get here anyway if the logs are in order -+ return 2 # we're past good logs; exit -+ fi -+ if [ $from_time -ge $first_time ]; then -+ return 3 # this is the last good log -+ fi -+ # have to go further back -+ if [ x = "x$to_time" -o $to_time -ge $first_time ]; then -+ return 1 # include this log -+ else -+ return 0 # don't include this log -+ fi -+} -+# -+# go through archived logs (timewise backwards) and see if there -+# are lines belonging to us -+# (we rely on untouched log files, i.e. that modify time -+# hasn't been changed) -+# -+arch_logs() { -+ local logf=$1 -+ local from_time=$2 -+ local to_time=$3 -+ -+ # look for files such as: ha-log-20090308 or -+ # ha-log-20090308.gz (.bz2) or ha-log.0, etc -+ ls -t $logf $logf*[0-9z] 2>/dev/null | -+ while read next_log; do -+ is_our_log $next_log $from_time $to_time -+ case $? in -+ 0) ;; # noop, continue -+ 1) echo $next_log # include log and continue -+ debug "Found log $next_log" -+ ;; -+ 2) break;; # don't go through older logs! -+ 3) echo $next_log # include log and continue -+ debug "Found log $next_log" -+ break -+ ;; # don't go through older logs! -+ esac -+ done -+} -+ -+# -+# print part of the log -+# -+drop_tmp_file() { -+ [ -z "$tmp" ] || rm -f "$tmp" -+} -+ -+print_logseg() { -+ local logf=$1 -+ local from_time=$2 -+ local to_time=$3 -+ -+ # uncompress to a temp file (if necessary) -+ local cat=`find_decompressor $logf` -+ if [ "$cat" != "cat" ]; then -+ tmp=`mktemp` -+ $cat $logf > $tmp -+ trap drop_tmp_file 0 -+ sourcef=$tmp -+ else -+ sourcef=$logf -+ tmp="" -+ fi -+ -+ if [ "$from_time" = 0 ]; then -+ FROM_LINE=1 -+ else -+ FROM_LINE=`findln_by_time $sourcef $from_time` -+ fi -+ if [ -z "$FROM_LINE" ]; then -+ warning "couldn't find line for time $from_time; corrupt log file?" -+ return -+ fi -+ -+ TO_LINE="" -+ if [ "$to_time" != 0 ]; then -+ TO_LINE=`findln_by_time $sourcef $to_time` -+ if [ -z "$TO_LINE" ]; then -+ warning "couldn't find line for time $to_time; corrupt log file?" -+ return -+ fi -+ if [ $FROM_LINE -lt $TO_LINE ]; then -+ dumplog $sourcef $FROM_LINE $TO_LINE -+ log "Including segment [$FROM_LINE-$TO_LINE] from $logf" -+ else -+ debug "Empty segment [$FROM_LINE-$TO_LINE] from $logf" -+ fi -+ else -+ dumplog $sourcef $FROM_LINE $TO_LINE -+ log "Including all logs after line $FROM_LINE from $logf" -+ fi -+ drop_tmp_file -+ trap "" 0 -+} -+ -+# -+# find log/set of logs which are interesting for us -+# -+dumplogset() { -+ local logf=$1 -+ local from_time=$2 -+ local to_time=$3 -+ -+ local logf_set=`arch_logs $logf $from_time $to_time` -+ if [ x = "x$logf_set" ]; then -+ return -+ fi -+ -+ local num_logs=`echo "$logf_set" | wc -l` -+ local oldest=`echo $logf_set | awk '{print $NF}'` -+ local newest=`echo $logf_set | awk '{print $1}'` -+ local mid_logfiles=`echo $logf_set | awk '{for(i=NF-1; i>1; i--) print $i}'` -+ -+ # the first logfile: from $from_time to $to_time (or end) -+ # logfiles in the middle: all -+ # the last logfile: from beginning to $to_time (or end) -+ case $num_logs in -+ 1) print_logseg $newest $from_time $to_time;; -+ *) -+ print_logseg $oldest $from_time 0 -+ for f in $mid_logfiles; do -+ `find_decompressor $f` $f -+ debug "including complete $f logfile" -+ done -+ print_logseg $newest 0 $to_time -+ ;; -+ esac -+} -+ -+# cut out a stanza -+getstanza() { -+ awk -v name="$1" ' -+ !in_stanza && NF==2 && /^[a-z][a-z]*[[:space:]]*{/ { # stanza start -+ if ($1 == name) -+ in_stanza = 1 -+ } -+ in_stanza { print } -+ in_stanza && NF==1 && $1 == "}" { exit } -+ ' -+} -+# supply stanza in $1 and variable name in $2 -+# (stanza is optional) -+getcfvar() { -+ cf_type=$1; shift; -+ cf_var=$1; shift; -+ cf_file=$* -+ -+ [ -f "$cf_file" ] || return -+ case $cf_type in -+ cman) -+ grep $cf_var $cf_file | sed s/.*$cf_var=\"// | sed s/\".*// -+ ;; -+ corosync|openais) -+ sed 's/#.*//' < $cf_file | -+ if [ $# -eq 2 ]; then -+ getstanza "$cf_var" -+ shift 1 -+ else -+ cat -+ fi | -+ awk -v varname="$cf_var" ' -+ NF==2 && match($1,varname":$")==1 { print $2; exit; } -+ ' -+ ;; -+ heartbeat) -+ sed 's/#.*//' < $cf_file | -+ grep -w "^$cf_var" | -+ sed 's/^[^[:space:]]*[[:space:]]*//' -+ -+ ;; -+ logd) -+ sed 's/#.*//' < $cf_file | -+ grep -w "^$cf_var" | -+ sed 's/^[^[:space:]]*[[:space:]]*//' -+ -+ ;; -+ esac -+} -+ -+pickfirst() { -+ for x; do -+ which $x >/dev/null 2>&1 && { -+ echo $x -+ return 0 -+ } -+ done -+ return 1 -+} -+ -+# -+# figure out the cluster type, depending on the process list -+# and existence of configuration files -+# -+get_cluster_type() { -+ if -+ ps -ef | egrep -qs '[c]orosync' -+ then -+ tool=`pickfirst corosync-objctl corosync-cmapctl` -+ case $tool in -+ *objctl) quorum=`$tool -a | grep quorum.provider | sed s/.*=//`;; -+ *cmapctl) quorum=`$tool | grep quorum.provider | sed s/.*=//`;; -+ esac -+ if [ x"$quorum" = x"quorum_cman" ]; then -+ stack="cman" -+ else -+ stack="corosync" -+ fi -+ -+ elif -+ ps -ef | egrep -qs '[a]isexec' -+ then -+ stack="openais" -+ elif -+ ps -ef | grep -v -e grep -e "eartbeat/[clasp]" | egrep -qs '[h]eartbeat' -+ then -+ stack="heartbeat" -+ -+ # Now we're guessing... -+ -+ elif [ -f /etc/cluster/cluster.conf ]; then -+ stack="cman" -+ -+ # TODO: Technically these could be anywhere :-/ -+ elif [ -f /etc/corosync/corosync.conf ]; then -+ stack="corosync" -+ -+ elif [ -f /etc/ais/openais.conf ]; then -+ stack="openais" -+ -+ else -+ stack="heartbeat" -+ fi -+ -+ debug "Detected the '$stack' cluster stack" -+ echo $stack -+} -+ -+find_cluster_cf() { -+ case $1 in -+ cman) echo "/etc/cluster/cluster.conf";; -+ corosync) -+ best_size=0 -+ best_file="" -+ -+ # TODO: Technically these could be anywhere :-/ -+ for cf in /etc/ais/openais.conf /etc/corosync/corosync.conf; do -+ if [ -f $cf ]; then -+ size=`wc -l $cf | awk '{print $1}'` -+ if [ $size -gt $best_size ]; then -+ best_size=$size -+ best_file=$cf -+ fi -+ fi -+ done -+ if [ -z "$best_file" ]; then -+ debug "Looking for corosync configuration file. This may take a while..." -+ for f in `find / $depth -type f -name corosync.conf`; do -+ best_file=$f -+ break -+ done -+ fi -+ debug "Located corosync config file: $best_file" -+ echo "$best_file" -+ ;; -+ openais) -+ # TODO: Technically it could be anywhere :-/ -+ cf="/etc/ais/openais.conf" -+ if [ -f $cf ]; then -+ echo "$cf" -+ fi -+ ;; -+ heartbeat) -+ cf="/etc/ha.d/ha.cf" -+ if [ -f $cf ]; then -+ echo "$cf" -+ fi -+ ;; -+ *) -+ warning "Unknown cluster type: $1" -+ ;; -+ esac -+} -+ -+# -+# check for the major prereq for a) parameter parsing and b) -+# parsing logs -+# -+t=`get_time "12:00"` -+if [ "$t" = "" ]; then -+ fatal "please install the perl Date::Parse module (perl-DateTime-Format-DateParse on Fedora/Red Hat)" -+fi -diff --git a/xml/resources-1.2.rng b/xml/resources-1.2.rng -index 0ef6066..81a8f82 100644 ---- a/xml/resources-1.2.rng -+++ b/xml/resources-1.2.rng -@@ -36,6 +36,7 @@ - upstart - service - systemd -+ nagios - - - -@@ -210,6 +211,7 @@ - restart - standby - fence -+ restart-container - - - diff --git a/pacemaker-7d8acec.patch b/pacemaker-7d8acec.patch deleted file mode 100644 index ce15322..0000000 --- a/pacemaker-7d8acec.patch +++ /dev/null @@ -1,3677 +0,0 @@ -diff --git a/cib/callbacks.c b/cib/callbacks.c -index 754e218..77853d9 100644 ---- a/cib/callbacks.c -+++ b/cib/callbacks.c -@@ -1391,7 +1391,6 @@ initiate_exit(void) - - extern int remote_fd; - extern int remote_tls_fd; --extern void terminate_cs_connection(void); - - void - terminate_cib(const char *caller, gboolean fast) -diff --git a/cib/main.c b/cib/main.c -index 6b56274..3328558 100644 ---- a/cib/main.c -+++ b/cib/main.c -@@ -371,15 +371,25 @@ ccm_connect(void) - #endif - - #if SUPPORT_COROSYNC --static gboolean --cib_ais_dispatch(int kind, const char *from, const char *data) -+static void -+cib_cs_dispatch(cpg_handle_t handle, -+ const struct cpg_name *groupName, -+ uint32_t nodeid, uint32_t pid, void *msg, size_t msg_len) - { -+ uint32_t kind = 0; - xmlNode *xml = NULL; -+ const char *from = NULL; -+ char *data = pcmk_message_common_cs(handle, nodeid, pid, msg, &kind, &from); - -+ if(data == NULL) { -+ return; -+ } - if (kind == crm_class_cluster) { - xml = string2xml(data); - if (xml == NULL) { -- goto bail; -+ crm_err("Invalid XML: '%.120s'", data); -+ free(data); -+ return; - } - crm_xml_add(xml, F_ORIG, from); - /* crm_xml_add_int(xml, F_SEQ, wrapper->id); */ -@@ -387,16 +397,11 @@ cib_ais_dispatch(int kind, const char *from, const char *data) - } - - free_xml(xml); -- return TRUE; -- -- bail: -- crm_err("Invalid XML: '%.120s'", data); -- return TRUE; -- -+ free(data); - } - - static void --cib_ais_destroy(gpointer user_data) -+cib_cs_destroy(gpointer user_data) - { - if (cib_shutdown_flag) { - crm_info("Corosync disconnection complete"); -@@ -463,8 +468,9 @@ cib_init(void) - { - if (is_openais_cluster()) { - #if SUPPORT_COROSYNC -- crm_cluster.destroy = cib_ais_destroy; -- crm_cluster.cs_dispatch = cib_ais_dispatch; -+ crm_cluster.destroy = cib_cs_destroy; -+ crm_cluster.cpg.cpg_deliver_fn = cib_cs_dispatch; -+ crm_cluster.cpg.cpg_confchg_fn = pcmk_cpg_membership; - #endif - } else if (is_heartbeat_cluster()) { - #if SUPPORT_HEARTBEAT -diff --git a/configure.ac b/configure.ac -index be8261a..7d2e384 100644 ---- a/configure.ac -+++ b/configure.ac -@@ -132,7 +132,7 @@ try_extract_header_define() { - AC_MSG_RESULT($value) - fi - printf $value -- rm -rf ${Cfile}.cc ${Cfile} ${Cfile}.dSYM ${Cfile}.gcno -+ rm -rf ${Cfile}.c ${Cfile} ${Cfile}.dSYM ${Cfile}.gcno - } - - extract_header_define() { -@@ -669,14 +669,6 @@ else - fi - AC_MSG_RESULT(using $GLIBCONFIG) - --if -- $PKGCONFIG --exists systemd --then -- systemdunitdir=`$PKGCONFIG --variable=systemdsystemunitdir systemd` -- AC_SUBST(systemdunitdir) --fi --AM_CONDITIONAL(HAVE_SYSTEMD, test -n "$systemdunitdir" -a "x$systemdunitdir" != xno) -- - # - # Where is dlopen? - # -@@ -965,50 +957,37 @@ dnl ======================================================================== - dnl Profiling and GProf - dnl ======================================================================== - --case $SUPPORT_PROFILING in -+case $SUPPORT_GCOV in - 1|yes|true) - SUPPORT_PROFILING=1 -- -- dnl Enable gprof -- #LIBS="$LIBS -pg" -- #CFLAGS="$CFLAGS -pg" -- -- dnl Disable various compiler optimizations -- CFLAGS="$CFLAGS -fno-omit-frame-pointer" -- #CFLAGS="$CFLAGS -fno-inline-functions -fno-inline-functions-called-once -fno-optimize-sibling-calls" -- dnl CFLAGS="$CFLAGS -fno-default-inline -fno-inline" -- -- dnl Update features -- PCMK_FEATURES="$PCMK_FEATURES gprof" - ;; -- *) SUPPORT_PROFILING=0;; - esac --AC_DEFINE_UNQUOTED(SUPPORT_PROFILING, $SUPPORT_PROFILING, Support for gprof profiling) - --case $SUPPORT_GCOV in -+case $SUPPORT_PROFILING in - 1|yes|true) -- SUPPORT_GCOV=1 -+ SUPPORT_PROFILING=1 - - dnl Enable gprof - #LIBS="$LIBS -pg" - #CFLAGS="$CFLAGS -pg" - - dnl Disable various compiler optimizations -- CFLAGS="$CFLAGS -fprofile-arcs -ftest-coverage -fno-inline" -+ CFLAGS="$CFLAGS -fno-omit-frame-pointer -fprofile-arcs -ftest-coverage -fno-inline" -+ #CFLAGS="$CFLAGS -fno-inline-functions -fno-inline-functions-called-once -fno-optimize-sibling-calls" -+ dnl CFLAGS="$CFLAGS -fno-default-inline -fno-inline" - -- dnl Turn off optimization so code coverage tool -- dnl can get accurate line numbers -+ dnl Turn off optimization so code coverage tool can get accurate line numbers - AC_MSG_NOTICE(Old CFLAGS: $CFLAGS) -- CFLAGS=`echo $CFLAGS | sed -e 's/-O.\ //g' -e 's/-Wp,-D_FORTIFY_SOURCE=.\ //g'` -+ CFLAGS=`echo $CFLAGS | sed -e 's/-O.\ //g' -e 's/-Wp,-D_FORTIFY_SOURCE=.\ //g' -e 's/-D_FORTIFY_SOURCE=.\ //g'` - CFLAGS="$CFLAGS -O0" - AC_MSG_NOTICE(New CFLAGS: $CFLAGS) - - dnl Update features -- PCMK_FEATURES="$PCMK_FEATURES gcov" -+ PCMK_FEATURES="$PCMK_FEATURES profile" - ;; - *) SUPPORT_PROFILING=0;; - esac --AC_DEFINE_UNQUOTED(SUPPORT_GCOV, $SUPPORT_GCOV, Support for gcov coverage testing) -+AC_DEFINE_UNQUOTED(SUPPORT_PROFILING, $SUPPORT_PROFILING, Support for profiling) - - dnl ======================================================================== - dnl Cluster infrastructure - Heartbeat / LibQB -@@ -1192,14 +1171,25 @@ fi - AC_DEFINE_UNQUOTED(SUPPORT_UPSTART, $HAVE_upstart, Support upstart based system services) - AM_CONDITIONAL(BUILD_UPSTART, test $HAVE_upstart = 1) - -+if -+ $PKGCONFIG --exists systemd -+then -+ systemdunitdir=`$PKGCONFIG --variable=systemdsystemunitdir systemd` -+ AC_SUBST(systemdunitdir) -+else -+ enable_systemd=no -+fi -+ - if test $HAVE_gio = 1 -a "x${enable_systemd}" != xno; then -- HAVE_systemd=1 -- PCMK_FEATURES="$PCMK_FEATURES systemd" -+ if test -n "$systemdunitdir" -a "x$systemdunitdir" != xno; then -+ HAVE_systemd=1 -+ PCMK_FEATURES="$PCMK_FEATURES systemd" -+ fi - fi -+ - AC_DEFINE_UNQUOTED(SUPPORT_SYSTEMD, $HAVE_systemd, Support systemd based system services) - AM_CONDITIONAL(BUILD_SYSTEMD, test $HAVE_systemd = 1) - -- - case $SUPPORT_NAGIOS in - 1|yes|true|try) - SUPPORT_NAGIOS=1;; -diff --git a/crmd/control.c b/crmd/control.c -index 7f423db..0808f56 100644 ---- a/crmd/control.c -+++ b/crmd/control.c -@@ -915,7 +915,7 @@ config_query_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void - if (is_classic_ais_cluster()) { - value = crmd_pref(config_hash, XML_ATTR_EXPECTED_VOTES); - crm_debug("Sending expected-votes=%s to corosync", value); -- send_ais_text(crm_class_quorum, value, TRUE, NULL, crm_msg_ais); -+ send_cluster_text(crm_class_quorum, value, TRUE, NULL, crm_msg_ais); - } - #endif - -diff --git a/crmd/corosync.c b/crmd/corosync.c -index 6385780..c4aef38 100644 ---- a/crmd/corosync.c -+++ b/crmd/corosync.c -@@ -41,8 +41,10 @@ extern void crmd_ha_connection_destroy(gpointer user_data); - /* A_HA_CONNECT */ - #if SUPPORT_COROSYNC - --static gboolean --crmd_ais_dispatch(int kind, const char *from, const char *data) -+static void -+crmd_cs_dispatch(cpg_handle_t handle, -+ const struct cpg_name *groupName, -+ uint32_t nodeid, uint32_t pid, void *msg, size_t msg_len) - { - int seq = 0; - xmlNode *xml = NULL; -@@ -50,10 +52,18 @@ crmd_ais_dispatch(int kind, const char *from, const char *data) - crm_node_t *peer = NULL; - enum crm_proc_flag flag = crm_proc_cpg; - -+ uint32_t kind = 0; -+ const char *from = NULL; -+ char *data = pcmk_message_common_cs(handle, nodeid, pid, msg, &kind, &from); -+ -+ if(data == NULL) { -+ return; -+ } - xml = string2xml(data); - if (xml == NULL) { - crm_err("Could not parse message content (%d): %.100s", kind, data); -- return TRUE; -+ free(data); -+ return; - } - - switch (kind) { -@@ -103,8 +113,8 @@ crmd_ais_dispatch(int kind, const char *from, const char *data) - /* If we can still talk to our peer process on that node, - * then its also part of the corosync membership - */ -- crm_err("Recieving messages from a node we think is dead: %s[%d]", peer->uname, -- peer->id); -+ crm_warn("Recieving messages from a node we think is dead: %s[%d]", peer->uname, -+ peer->id); - crm_update_peer_proc(__FUNCTION__, peer, flag, ONLINESTATUS); - } - crmd_ha_msg_filter(xml); -@@ -123,8 +133,8 @@ crmd_ais_dispatch(int kind, const char *from, const char *data) - crm_err("Invalid message class (%d): %.100s", kind, data); - } - -+ free(data); - free_xml(xml); -- return TRUE; - } - - static gboolean -@@ -148,7 +158,7 @@ crmd_quorum_destroy(gpointer user_data) - } - - static void --crmd_ais_destroy(gpointer user_data) -+crmd_cs_destroy(gpointer user_data) - { - if (is_not_set(fsa_input_register, R_HA_DISCONNECTED)) { - crm_err("connection terminated"); -@@ -182,8 +192,9 @@ crm_connect_corosync(crm_cluster_t * cluster) - - if (is_openais_cluster()) { - crm_set_status_callback(&peer_update_callback); -- cluster->cs_dispatch = crmd_ais_dispatch; -- cluster->destroy = crmd_ais_destroy; -+ cluster->cpg.cpg_deliver_fn = crmd_cs_dispatch; -+ cluster->cpg.cpg_confchg_fn = pcmk_cpg_membership; -+ cluster->destroy = crmd_cs_destroy; - - rc = crm_cluster_connect(cluster); - } -diff --git a/crmd/election.c b/crmd/election.c -index 1946858..25cb647 100644 ---- a/crmd/election.c -+++ b/crmd/election.c -@@ -518,7 +518,7 @@ do_dc_takeover(long long action, - - #if SUPPORT_COROSYNC - if (is_classic_ais_cluster()) { -- send_ais_text(crm_class_quorum, NULL, TRUE, NULL, crm_msg_ais); -+ send_cluster_text(crm_class_quorum, NULL, TRUE, NULL, crm_msg_ais); - } - #endif - -diff --git a/crmd/lrm.c b/crmd/lrm.c -index 31f00d7..15bad88 100644 ---- a/crmd/lrm.c -+++ b/crmd/lrm.c -@@ -1929,6 +1929,7 @@ do_update_resource(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, lrmd_event_da - - } else { - crm_warn("Resource %s no longer exists in the lrmd", op->rsc_id); -+ send_direct_ack(NULL, NULL, rsc, op, op->rsc_id); - goto cleanup; - } - -diff --git a/doc/Pacemaker_Remote/en-US/Revision_History.xml b/doc/Pacemaker_Remote/en-US/Revision_History.xml -index 26d8ab6..257ecbd 100644 ---- a/doc/Pacemaker_Remote/en-US/Revision_History.xml -+++ b/doc/Pacemaker_Remote/en-US/Revision_History.xml -@@ -8,13 +8,13 @@ - - - -- 1 -+ 1-0 - Tue Mar 19 2013 - DavidVosseldvossel@redhat.com - Import from Pages.app - - -- 2 -+ 2-0 - Tue May 13 2013 - DavidVosseldvossel@redhat.com - Added Future Features Section -diff --git a/extra/cluster-init b/extra/cluster-init -index 5dc71c2..fe0ff61 100755 ---- a/extra/cluster-init -+++ b/extra/cluster-init -@@ -294,10 +294,10 @@ esac - - case $DATE in - [Yy][Ee][Ss]|[Yy]) -- now=`date` - for host in $host_list; do - echo "Setting time on ${host}" - scp /etc/localtime root@${host}:/etc -+ now=`date` - ssh -l root ${host} -- date -s "'$now'" - echo "" - done -diff --git a/fencing/fence_dummy b/fencing/fence_dummy -index b202977..8cf5103 100644 ---- a/fencing/fence_dummy -+++ b/fencing/fence_dummy -@@ -5,7 +5,7 @@ - # Virsh 0.3.3 on RHEL 5.2 with xen-3.0.3-51 - # - --import sys, time, random -+import sys, time, random, os, atexit, getopt, re - - #BEGIN_VERSION_GENERATION - RELEASE_VERSION="3.1.6" -@@ -42,14 +42,28 @@ all_opt = { - "debug" : { - "getopt" : "D:", - "longopt" : "debug-file", -- "help" : "-D, --debug-file= Debugging to output file", -+ "help" : "-D, --debug-file=[debugfile] Debugging to output file", - "required" : "0", - "shortdesc" : "Write debug information to given file", - "order" : 52 }, -+ "random_sleep_range": { -+ "getopt" : "R:", -+ "required" : "0", -+ "longopt" : "random_sleep_range", -+ "help" : "--random_sleep-range=[seconds] Issue a sleep between 1 and [seconds]. Used for testing.", -+ "shortdesc" : "Issue a sleep between 1 and [seconds]", -+ "order" : 1 }, -+ "mode": { -+ "getopt" : "M:", -+ "longopt" : "mode", -+ "required" : "0", -+ "help" : "--mode=(pass|fail|random). Used for testing.", -+ "shortdesc" : "Should operations always pass, always fail or fail at random", -+ "order" : 1 }, - "delay" : { - "getopt" : "f:", - "longopt" : "delay", -- "help" : "--delay Wait X seconds before fencing is started", -+ "help" : "--delay [seconds] Wait X seconds before fencing is started", - "required" : "0", - "shortdesc" : "Wait X seconds before fencing is started", - "default" : "0", -@@ -57,7 +71,7 @@ all_opt = { - "action" : { - "getopt" : "o:", - "longopt" : "action", -- "help" : "-o, --action= Action: status, reboot (default), off or on", -+ "help" : "-o, --action=[action] Action: status, reboot (default), off or on", - "required" : "1", - "shortdesc" : "Fencing Action", - "default" : "reboot", -@@ -65,7 +79,7 @@ all_opt = { - "port" : { - "getopt" : "n:", - "longopt" : "plug", -- "help" : "-n, --plug= Physical plug number on device or\n" + -+ "help" : "-n, --plug=[id] Physical plug number on device or\n" + - " name of virtual machine", - "required" : "1", - "shortdesc" : "Physical plug number or name of virtual machine", -@@ -73,7 +87,7 @@ all_opt = { - "switch" : { - "getopt" : "s:", - "longopt" : "switch", -- "help" : "-s, --switch= Physical switch number on device", -+ "help" : "-s, --switch=[id] Physical switch number on device", - "required" : "0", - "shortdesc" : "Physical switch number on device", - "order" : 1 }, -@@ -86,8 +100,6 @@ all_opt = { - "order" : 1} - } - --common_opt = [ "retry_on", "delay" ] -- - def show_docs(options, docs = None): - device_opt = options["device_opt"] - -@@ -189,12 +201,6 @@ def metadata(avail_opt, options, docs): - - def process_input(avail_opt): - global all_opt -- global common_opt -- -- ## -- ## Add options which are available for every fence agent -- ##### -- avail_opt.extend(common_opt) - - ## - ## Set standard environment -@@ -290,24 +296,11 @@ def atexit_handler(): - os.close(1) - except IOError: - sys.stderr.write("%s failed to close standard output\n"%(sys.argv[0])) -- sys.exit(EC_GENERIC_ERROR) -+ sys.exit(1) - - def main(): - global all_opt -- device_opt = [ "help", "version", "verbose", "debug", "action", "port", -- "power_timeout", "random_sleep_range"] -- -- all_opt["random_sleep_range"] = { -- "getopt" : "R:", -- "longopt" : "random_sleep_range", -- "help" : "--random_sleep-range=Issue a sleep between 1 and . Used for testing.", -- "order" : 1 } -- -- all_opt["mode"] = { -- "getopt" : "M:", -- "longopt" : "mode", -- "help" : "--mode=(pass|fail|random). Used for testing.", -- "order" : 1 } -+ device_opt = [ "help", "version", "verbose", "debug", "action", "port", "mode", "random_sleep_range"] - - ## Defaults for fence agent - docs = { } -@@ -316,6 +309,7 @@ def main(): - - atexit.register(atexit_handler) - options = process_input(device_opt) -+ options["device_opt"] = device_opt - show_docs(options, docs) - - # random sleep for testing -diff --git a/fencing/main.c b/fencing/main.c -index c7b67a1..fee9f7a 100644 ---- a/fencing/main.c -+++ b/fencing/main.c -@@ -190,15 +190,25 @@ stonith_peer_hb_destroy(gpointer user_data) - #endif - - #if SUPPORT_COROSYNC --static gboolean --stonith_peer_ais_callback(int kind, const char *from, const char *data) -+static void -+stonith_peer_ais_callback(cpg_handle_t handle, -+ const struct cpg_name *groupName, -+ uint32_t nodeid, uint32_t pid, void *msg, size_t msg_len) - { -+ uint32_t kind = 0; - xmlNode *xml = NULL; -+ const char *from = NULL; -+ char *data = pcmk_message_common_cs(handle, nodeid, pid, msg, &kind, &from); - -+ if(data == NULL) { -+ return; -+ } - if (kind == crm_class_cluster) { - xml = string2xml(data); - if (xml == NULL) { -- goto bail; -+ crm_err("Invalid XML: '%.120s'", data); -+ free(data); -+ return; - } - crm_xml_add(xml, F_ORIG, from); - /* crm_xml_add_int(xml, F_SEQ, wrapper->id); */ -@@ -206,18 +216,14 @@ stonith_peer_ais_callback(int kind, const char *from, const char *data) - } - - free_xml(xml); -- return TRUE; -- -- bail: -- crm_err("Invalid XML: '%.120s'", data); -- return TRUE; -- -+ free(data); -+ return; - } - - static void --stonith_peer_ais_destroy(gpointer user_data) -+stonith_peer_cs_destroy(gpointer user_data) - { -- crm_err("AIS connection terminated"); -+ crm_err("Corosync connection terminated"); - stonith_shutdown(0); - } - #endif -@@ -1084,8 +1090,9 @@ main(int argc, char **argv) - - if (is_openais_cluster()) { - #if SUPPORT_COROSYNC -- cluster.destroy = stonith_peer_ais_destroy; -- cluster.cs_dispatch = stonith_peer_ais_callback; -+ cluster.destroy = stonith_peer_cs_destroy; -+ cluster.cpg.cpg_deliver_fn = stonith_peer_ais_callback; -+ cluster.cpg.cpg_confchg_fn = pcmk_cpg_membership; - #endif - } - -diff --git a/include/crm/cluster.h b/include/crm/cluster.h -index cac863f..c999367 100644 ---- a/include/crm/cluster.h -+++ b/include/crm/cluster.h -@@ -26,9 +26,12 @@ - # include - # endif - -+# if SUPPORT_COROSYNC -+# include -+# endif -+ - extern gboolean crm_have_quorum; - extern GHashTable *crm_peer_cache; --extern GHashTable *crm_peer_id_cache; - extern unsigned long long crm_peer_seq; - - # ifndef CRM_SERVICE -@@ -73,21 +76,24 @@ typedef struct crm_peer_node_s { - - void crm_peer_init(void); - void crm_peer_destroy(void); --char *get_corosync_uuid(crm_node_t *peer); --int get_corosync_id(int id, const char *uuid); - - typedef struct crm_cluster_s { - char *uuid; - char *uname; - uint32_t nodeid; - -+ void (*destroy) (gpointer); -+ - # if SUPPORT_HEARTBEAT - ll_cluster_t *hb_conn; - void (*hb_dispatch) (HA_Message * msg, void *private); - # endif - -- gboolean(*cs_dispatch) (int kind, const char *from, const char *data); -- void (*destroy) (gpointer); -+# if SUPPORT_COROSYNC -+ struct cpg_name group; -+ cpg_callbacks_t cpg; -+ cpg_handle_t cpg_handle; -+# endif - - } crm_cluster_t; - -@@ -122,8 +128,6 @@ enum crm_ais_msg_types { - gboolean send_cluster_message(crm_node_t * node, enum crm_ais_msg_types service, - xmlNode * data, gboolean ordered); - --void destroy_crm_node(gpointer /* crm_node_t* */ data); -- - crm_node_t *crm_get_peer(unsigned int id, const char *uname); - - guint crm_active_peers(void); -@@ -138,8 +142,18 @@ gboolean crm_is_heartbeat_peer_active(const crm_node_t * node); - - # if SUPPORT_COROSYNC - extern int ais_fd_sync; -+uint32_t get_local_nodeid(cpg_handle_t handle); -+ -+gboolean cluster_connect_cpg(crm_cluster_t *cluster); -+void cluster_disconnect_cpg(crm_cluster_t * cluster); -+ -+void pcmk_cpg_membership(cpg_handle_t handle, -+ const struct cpg_name *groupName, -+ const struct cpg_address *member_list, size_t member_list_entries, -+ const struct cpg_address *left_list, size_t left_list_entries, -+ const struct cpg_address *joined_list, size_t joined_list_entries); - gboolean crm_is_corosync_peer_active(const crm_node_t * node); --gboolean send_ais_text(int class, const char *data, gboolean local, -+gboolean send_cluster_text(int class, const char *data, gboolean local, - crm_node_t * node, enum crm_ais_msg_types dest); - # endif - -@@ -180,4 +194,7 @@ gboolean is_heartbeat_cluster(void); - const char *get_local_node_name(void); - char *get_node_name(uint32_t nodeid); - -+char *pcmk_message_common_cs(cpg_handle_t handle, uint32_t nodeid, uint32_t pid, void *msg, -+ uint32_t *kind, const char **from); -+ - #endif -diff --git a/include/crm/cluster/internal.h b/include/crm/cluster/internal.h -index 2fa8e08..791a1f9 100644 ---- a/include/crm/cluster/internal.h -+++ b/include/crm/cluster/internal.h -@@ -349,20 +349,24 @@ gboolean heartbeat_initialize_nodelist(void *cluster, gboolean force_member, xml - - # if SUPPORT_COROSYNC - -+gboolean send_cpg_iov(struct iovec * iov); -+ - # if SUPPORT_PLUGIN - char *classic_node_name(uint32_t nodeid); -+void plugin_handle_membership(AIS_Message *msg); -+bool send_plugin_text(int class, struct iovec *iov); - # else - char *corosync_node_name(uint64_t /*cmap_handle_t */ cmap_handle, uint32_t nodeid); - # endif - - gboolean corosync_initialize_nodelist(void *cluster, gboolean force_member, xmlNode * xml_parent); - --gboolean send_ais_message(xmlNode * msg, gboolean local, -- crm_node_t * node, enum crm_ais_msg_types dest); -+gboolean send_cluster_message_cs(xmlNode * msg, gboolean local, -+ crm_node_t * node, enum crm_ais_msg_types dest); - - enum cluster_type_e find_corosync_variant(void); - --void terminate_cs_connection(void); -+void terminate_cs_connection(crm_cluster_t * cluster); - gboolean init_cs_connection(crm_cluster_t * cluster); - gboolean init_cs_connection_once(crm_cluster_t * cluster); - # endif -@@ -377,6 +381,8 @@ enum crm_quorum_source { - crm_quorum_pacemaker, - }; - -+int get_corosync_id(int id, const char *uuid); -+char *get_corosync_uuid(crm_node_t *peer); - enum crm_quorum_source get_quorum_source(void); - - void crm_update_peer_proc(const char *source, crm_node_t * peer, uint32_t flag, const char *status); -diff --git a/lib/cluster/Makefile.am b/lib/cluster/Makefile.am -index a5a70ff..744ff27 100644 ---- a/lib/cluster/Makefile.am -+++ b/lib/cluster/Makefile.am -@@ -33,6 +33,7 @@ libcrmcluster_la_LIBADD = $(top_builddir)/lib/common/libcrmcommon.la $(top_buil - libcrmcluster_la_DEPENDENCIES = $(top_builddir)/lib/common/libcrmcommon.la $(top_builddir)/lib/fencing/libstonithd.la - - if BUILD_CS_SUPPORT -+libcrmcluster_la_SOURCES += cpg.c - if BUILD_CS_PLUGIN - libcrmcluster_la_SOURCES += legacy.c - else -diff --git a/lib/cluster/cluster.c b/lib/cluster/cluster.c -index 9538816..5820c8d 100644 ---- a/lib/cluster/cluster.c -+++ b/lib/cluster/cluster.c -@@ -240,7 +240,7 @@ crm_cluster_disconnect(crm_cluster_t * cluster) - #if SUPPORT_COROSYNC - if (is_openais_cluster()) { - crm_peer_destroy(); -- terminate_cs_connection(); -+ terminate_cs_connection(cluster); - crm_info("Disconnected from %s", type_str); - return; - } -@@ -274,7 +274,7 @@ send_cluster_message(crm_node_t * node, enum crm_ais_msg_types service, xmlNode - - #if SUPPORT_COROSYNC - if (is_openais_cluster()) { -- return send_ais_message(data, FALSE, node, service); -+ return send_cluster_message_cs(data, FALSE, node, service); - } - #endif - #if SUPPORT_HEARTBEAT -diff --git a/lib/cluster/corosync.c b/lib/cluster/corosync.c -index 83a0c78..5a64fe1 100644 ---- a/lib/cluster/corosync.c -+++ b/lib/cluster/corosync.c -@@ -34,69 +34,16 @@ - #include - #include - #include --#include - #include - #include - #include - - #include - --cpg_handle_t pcmk_cpg_handle = 0; -- --struct cpg_name pcmk_cpg_group = { -- .length = 0, -- .value[0] = 0, --}; -- - quorum_handle_t pcmk_quorum_handle = 0; - - gboolean(*quorum_app_callback) (unsigned long long seq, gboolean quorate) = NULL; - --#define cs_repeat(counter, max, code) do { \ -- code; \ -- if(rc == CS_ERR_TRY_AGAIN || rc == CS_ERR_QUEUE_FULL) { \ -- counter++; \ -- crm_debug("Retrying operation after %ds", counter); \ -- sleep(counter); \ -- } else { \ -- break; \ -- } \ -- } while(counter < max) -- --static uint32_t get_local_nodeid(cpg_handle_t handle) --{ -- int rc = CS_OK; -- int retries = 0; -- static uint32_t local_nodeid = 0; -- cpg_handle_t local_handle = handle; -- cpg_callbacks_t cb = { }; -- -- if(local_nodeid != 0) { -- return local_nodeid; -- } -- -- if(handle == 0) { -- crm_trace("Creating connection"); -- cs_repeat(retries, 5, rc = cpg_initialize(&local_handle, &cb)); -- } -- -- if (rc == CS_OK) { -- retries = 0; -- crm_trace("Performing lookup"); -- cs_repeat(retries, 5, rc = cpg_local_get(local_handle, &local_nodeid)); -- } -- -- if (rc != CS_OK) { -- crm_err("Could not get local node id from the CPG API: %s (%d)", ais_error2text(rc), rc); -- } -- if(handle == 0) { -- crm_trace("Closing connection"); -- cpg_finalize(local_handle); -- } -- crm_debug("Local nodeid is %u", local_nodeid); -- return local_nodeid; --} -- - /* - * CFG functionality stolen from node_name() in corosync-quorumtool.c - * This resolves the first address assigned to a node and returns the name or IP address. -@@ -189,281 +136,12 @@ corosync_node_name(uint64_t /*cmap_handle_t */ cmap_handle, uint32_t nodeid) - return name; - } - --enum crm_ais_msg_types --text2msg_type(const char *text) --{ -- int type = crm_msg_none; -- -- CRM_CHECK(text != NULL, return type); -- if (safe_str_eq(text, "ais")) { -- type = crm_msg_ais; -- } else if (safe_str_eq(text, "crm_plugin")) { -- type = crm_msg_ais; -- } else if (safe_str_eq(text, CRM_SYSTEM_CIB)) { -- type = crm_msg_cib; -- } else if (safe_str_eq(text, CRM_SYSTEM_CRMD)) { -- type = crm_msg_crmd; -- } else if (safe_str_eq(text, CRM_SYSTEM_DC)) { -- type = crm_msg_crmd; -- } else if (safe_str_eq(text, CRM_SYSTEM_TENGINE)) { -- type = crm_msg_te; -- } else if (safe_str_eq(text, CRM_SYSTEM_PENGINE)) { -- type = crm_msg_pe; -- } else if (safe_str_eq(text, CRM_SYSTEM_LRMD)) { -- type = crm_msg_lrmd; -- } else if (safe_str_eq(text, CRM_SYSTEM_STONITHD)) { -- type = crm_msg_stonithd; -- } else if (safe_str_eq(text, "stonith-ng")) { -- type = crm_msg_stonith_ng; -- } else if (safe_str_eq(text, "attrd")) { -- type = crm_msg_attrd; -- -- } else { -- /* This will normally be a transient client rather than -- * a cluster daemon. Set the type to the pid of the client -- */ -- int scan_rc = sscanf(text, "%d", &type); -- -- if (scan_rc != 1) { -- /* Ensure its sane */ -- type = crm_msg_none; -- } -- } -- return type; --} -- --GListPtr cs_message_queue = NULL; --int cs_message_timer = 0; -- --static ssize_t crm_cs_flush(void); -- --static gboolean --crm_cs_flush_cb(gpointer data) --{ -- cs_message_timer = 0; -- crm_cs_flush(); -- return FALSE; --} -- --#define CS_SEND_MAX 200 --static ssize_t --crm_cs_flush(void) --{ -- int sent = 0; -- ssize_t rc = 0; -- int queue_len = 0; -- static unsigned int last_sent = 0; -- -- if (pcmk_cpg_handle == 0) { -- crm_trace("Connection is dead"); -- return pcmk_ok; -- } -- -- queue_len = g_list_length(cs_message_queue); -- if ((queue_len % 1000) == 0 && queue_len > 1) { -- crm_err("CPG queue has grown to %d", queue_len); -- -- } else if (queue_len == CS_SEND_MAX) { -- crm_warn("CPG queue has grown to %d", queue_len); -- } -- -- if (cs_message_timer) { -- /* There is already a timer, wait until it goes off */ -- crm_trace("Timer active %d", cs_message_timer); -- return pcmk_ok; -- } -- -- while (cs_message_queue && sent < CS_SEND_MAX) { -- AIS_Message *header = NULL; -- struct iovec *iov = cs_message_queue->data; -- -- errno = 0; -- rc = cpg_mcast_joined(pcmk_cpg_handle, CPG_TYPE_AGREED, iov, 1); -- -- if (rc != CS_OK) { -- break; -- } -- -- sent++; -- header = iov->iov_base; -- last_sent = header->id; -- if (header->compressed_size) { -- crm_trace("CPG message %d (%d compressed bytes) sent", -- header->id, header->compressed_size); -- } else { -- crm_trace("CPG message %d (%d bytes) sent: %.200s", -- header->id, header->size, header->data); -- } -- -- cs_message_queue = g_list_remove(cs_message_queue, iov); -- free(iov[0].iov_base); -- free(iov); -- } -- -- queue_len -= sent; -- if (sent > 1 || cs_message_queue) { -- crm_info("Sent %d CPG messages (%d remaining, last=%u): %s (%d)", -- sent, queue_len, last_sent, ais_error2text(rc), rc); -- } else { -- crm_trace("Sent %d CPG messages (%d remaining, last=%u): %s (%d)", -- sent, queue_len, last_sent, ais_error2text(rc), rc); -- } -- -- if (cs_message_queue) { -- uint32_t delay_ms = 100; -- if(rc != CS_OK) { -- /* Proportionally more if sending failed but cap at 1s */ -- delay_ms = QB_MIN(1000, CS_SEND_MAX + (10 * queue_len)); -- } -- cs_message_timer = g_timeout_add(delay_ms, crm_cs_flush_cb, NULL); -- } -- -- return rc; --} -- --gboolean --send_ais_text(int class, const char *data, -- gboolean local, crm_node_t * node, enum crm_ais_msg_types dest) --{ -- static int msg_id = 0; -- static int local_pid = 0; -- static int local_name_len = 0; -- static const char *local_name = NULL; -- -- char *target = NULL; -- struct iovec *iov; -- AIS_Message *ais_msg = NULL; -- enum crm_ais_msg_types sender = text2msg_type(crm_system_name); -- -- /* There are only 6 handlers registered to crm_lib_service in plugin.c */ -- CRM_CHECK(class < 6, crm_err("Invalid message class: %d", class); -- return FALSE); -- -- CRM_CHECK(dest != crm_msg_ais, return FALSE); -- -- if(local_name == NULL) { -- local_name = get_local_node_name(); -- } -- if(local_name_len == 0 && local_name) { -- local_name_len = strlen(local_name); -- } -- -- if (data == NULL) { -- data = ""; -- } -- -- if (local_pid == 0) { -- local_pid = getpid(); -- } -- -- if (sender == crm_msg_none) { -- sender = local_pid; -- } -- -- ais_msg = calloc(1, sizeof(AIS_Message)); -- -- ais_msg->id = msg_id++; -- ais_msg->header.id = class; -- ais_msg->header.error = CS_OK; -- -- ais_msg->host.type = dest; -- ais_msg->host.local = local; -- -- if (node) { -- if (node->uname) { -- target = strdup(node->uname); -- ais_msg->host.size = strlen(node->uname); -- memset(ais_msg->host.uname, 0, MAX_NAME); -- memcpy(ais_msg->host.uname, node->uname, ais_msg->host.size); -- } else { -- target = g_strdup_printf("%u", node->id); -- } -- ais_msg->host.id = node->id; -- } else { -- target = strdup("all"); -- } -- -- ais_msg->sender.id = 0; -- ais_msg->sender.type = sender; -- ais_msg->sender.pid = local_pid; -- ais_msg->sender.size = local_name_len; -- memset(ais_msg->sender.uname, 0, MAX_NAME); -- memcpy(ais_msg->sender.uname, local_name, ais_msg->sender.size); -- -- ais_msg->size = 1 + strlen(data); -- ais_msg->header.size = sizeof(AIS_Message) + ais_msg->size; -- -- if (ais_msg->size < CRM_BZ2_THRESHOLD) { -- ais_msg = realloc(ais_msg, ais_msg->header.size); -- memcpy(ais_msg->data, data, ais_msg->size); -- -- } else { -- char *compressed = NULL; -- unsigned int new_size = 0; -- char *uncompressed = strdup(data); -- -- if (crm_compress_string(uncompressed, ais_msg->size, 0, &compressed, &new_size)) { -- -- ais_msg->header.size = sizeof(AIS_Message) + new_size + 1; -- ais_msg = realloc(ais_msg, ais_msg->header.size); -- memcpy(ais_msg->data, compressed, new_size); -- ais_msg->data[new_size] = 0; -- -- ais_msg->is_compressed = TRUE; -- ais_msg->compressed_size = new_size; -- -- } else { -- ais_msg = realloc(ais_msg, ais_msg->header.size); -- memcpy(ais_msg->data, data, ais_msg->size); -- } -- -- free(uncompressed); -- free(compressed); -- } -- -- if (ais_msg->compressed_size) { -- crm_trace("Queueing CPG message %u to %s (%d compressed bytes)", -- ais_msg->id, target, ais_msg->compressed_size); -- } else { -- crm_trace("Queueing CPG message %u to %s (%d bytes)", -- ais_msg->id, target, ais_msg->size); -- } -- -- iov = calloc(1, sizeof(struct iovec)); -- iov->iov_base = ais_msg; -- iov->iov_len = ais_msg->header.size; -- cs_message_queue = g_list_append(cs_message_queue, iov); -- crm_cs_flush(); -- -- free(target); -- return TRUE; --} -- --gboolean --send_ais_message(xmlNode * msg, gboolean local, crm_node_t * node, enum crm_ais_msg_types dest) --{ -- gboolean rc = TRUE; -- char *data = dump_xml_unformatted(msg); -- -- rc = send_ais_text(crm_class_cluster, data, local, node, dest); -- free(data); -- return rc; --} -- - void --terminate_cs_connection(void) -+terminate_cs_connection(crm_cluster_t *cluster) - { - crm_notice("Disconnecting from Corosync"); - -- if (pcmk_cpg_handle) { -- crm_trace("Disconnecting CPG"); -- cpg_leave(pcmk_cpg_handle, &pcmk_cpg_group); -- cpg_finalize(pcmk_cpg_handle); -- pcmk_cpg_handle = 0; -- -- } else { -- crm_info("No CPG connection"); -- } -+ cluster_disconnect_cpg(cluster); - - if (pcmk_quorum_handle) { - crm_trace("Disconnecting quorum"); -@@ -478,284 +156,6 @@ terminate_cs_connection(void) - int ais_membership_timer = 0; - gboolean ais_membership_force = FALSE; - --static gboolean --ais_dispatch_message(AIS_Message * msg, -- gboolean(*dispatch) (int kind, const char *from, const char *data)) --{ -- char *data = NULL; -- char *uncompressed = NULL; -- -- xmlNode *xml = NULL; -- -- CRM_ASSERT(msg != NULL); -- -- crm_trace("Got new%s message (size=%d, %d, %d)", -- msg->is_compressed ? " compressed" : "", -- ais_data_len(msg), msg->size, msg->compressed_size); -- -- data = msg->data; -- if (msg->is_compressed && msg->size > 0) { -- int rc = BZ_OK; -- unsigned int new_size = msg->size + 1; -- -- if (check_message_sanity(msg, NULL) == FALSE) { -- goto badmsg; -- } -- -- crm_trace("Decompressing message data"); -- uncompressed = calloc(1, new_size); -- rc = BZ2_bzBuffToBuffDecompress(uncompressed, &new_size, data, msg->compressed_size, 1, 0); -- -- if (rc != BZ_OK) { -- crm_err("Decompression failed: %d", rc); -- goto badmsg; -- } -- -- CRM_ASSERT(rc == BZ_OK); -- CRM_ASSERT(new_size == msg->size); -- -- data = uncompressed; -- -- } else if (check_message_sanity(msg, data) == FALSE) { -- goto badmsg; -- -- } else if (safe_str_eq("identify", data)) { -- int pid = getpid(); -- char *pid_s = crm_itoa(pid); -- -- send_ais_text(crm_class_cluster, pid_s, TRUE, NULL, crm_msg_ais); -- free(pid_s); -- goto done; -- } -- -- if (msg->header.id != crm_class_members) { -- /* Is this even needed anymore? */ -- crm_get_peer(msg->sender.id, msg->sender.uname); -- } -- -- if (msg->header.id == crm_class_rmpeer) { -- uint32_t id = crm_int_helper(data, NULL); -- -- crm_info("Removing peer %s/%u", data, id); -- reap_crm_member(id, NULL); -- goto done; -- } -- -- crm_trace("Payload: %.200s", data); -- if (dispatch != NULL) { -- dispatch(msg->header.id, msg->sender.uname, data); -- } -- -- done: -- free(uncompressed); -- free_xml(xml); -- return TRUE; -- -- badmsg: -- crm_err("Invalid message (id=%d, dest=%s:%s, from=%s:%s.%d):" -- " min=%d, total=%d, size=%d, bz2_size=%d", -- msg->id, ais_dest(&(msg->host)), msg_type2text(msg->host.type), -- ais_dest(&(msg->sender)), msg_type2text(msg->sender.type), -- msg->sender.pid, (int)sizeof(AIS_Message), -- msg->header.size, msg->size, msg->compressed_size); -- goto done; --} -- --static bool cpg_evicted = FALSE; --gboolean(*pcmk_cpg_dispatch_fn) (int kind, const char *from, const char *data) = NULL; -- --static int --pcmk_cpg_dispatch(gpointer user_data) --{ -- int rc = 0; -- -- pcmk_cpg_dispatch_fn = user_data; -- rc = cpg_dispatch(pcmk_cpg_handle, CS_DISPATCH_ALL); -- if (rc != CS_OK) { -- crm_err("Connection to the CPG API failed: %d", rc); -- pcmk_cpg_handle = 0; -- return -1; -- -- } else if(cpg_evicted) { -- crm_err("Evicted from CPG membership"); -- return -1; -- } -- return 0; --} -- --static void --pcmk_cpg_deliver(cpg_handle_t handle, -- const struct cpg_name *groupName, -- uint32_t nodeid, uint32_t pid, void *msg, size_t msg_len) --{ -- AIS_Message *ais_msg = (AIS_Message *) msg; -- uint32_t local_nodeid = get_local_nodeid(handle); -- const char *local_name = get_local_node_name(); -- -- if (ais_msg->sender.id > 0 && ais_msg->sender.id != nodeid) { -- crm_err("Nodeid mismatch from %d.%d: claimed nodeid=%u", nodeid, pid, ais_msg->sender.id); -- return; -- -- } else if (ais_msg->host.id != 0 && (local_nodeid != ais_msg->host.id)) { -- /* Not for us */ -- crm_trace("Not for us: %u != %u", ais_msg->host.id, local_nodeid); -- return; -- } else if (ais_msg->host.size != 0 && safe_str_neq(ais_msg->host.uname, local_name)) { -- /* Not for us */ -- crm_trace("Not for us: %s != %s", ais_msg->host.uname, local_name); -- return; -- } -- -- ais_msg->sender.id = nodeid; -- if (ais_msg->sender.size == 0) { -- crm_node_t *peer = crm_get_peer(nodeid, NULL); -- -- if (peer == NULL) { -- crm_err("Peer with nodeid=%u is unknown", nodeid); -- -- } else if (peer->uname == NULL) { -- crm_err("No uname for peer with nodeid=%u", nodeid); -- -- } else { -- crm_notice("Fixing uname for peer with nodeid=%u", nodeid); -- ais_msg->sender.size = strlen(peer->uname); -- memset(ais_msg->sender.uname, 0, MAX_NAME); -- memcpy(ais_msg->sender.uname, peer->uname, ais_msg->sender.size); -- } -- } -- -- ais_dispatch_message(ais_msg, pcmk_cpg_dispatch_fn); --} -- --static void --pcmk_cpg_membership(cpg_handle_t handle, -- const struct cpg_name *groupName, -- const struct cpg_address *member_list, size_t member_list_entries, -- const struct cpg_address *left_list, size_t left_list_entries, -- const struct cpg_address *joined_list, size_t joined_list_entries) --{ -- int i; -- gboolean found = FALSE; -- static int counter = 0; -- uint32_t local_nodeid = get_local_nodeid(handle); -- -- for (i = 0; i < left_list_entries; i++) { -- crm_node_t *peer = crm_get_peer(left_list[i].nodeid, NULL); -- -- crm_info("Left[%d.%d] %s.%u ", counter, i, groupName->value, left_list[i].nodeid); -- crm_update_peer_proc(__FUNCTION__, peer, crm_proc_cpg, OFFLINESTATUS); -- } -- -- for (i = 0; i < joined_list_entries; i++) { -- crm_info("Joined[%d.%d] %s.%u ", counter, i, groupName->value, joined_list[i].nodeid); -- } -- -- for (i = 0; i < member_list_entries; i++) { -- crm_node_t *peer = crm_get_peer(member_list[i].nodeid, NULL); -- -- crm_info("Member[%d.%d] %s.%u ", counter, i, groupName->value, member_list[i].nodeid); -- -- /* Anyone that is sending us CPG messages must also be a _CPG_ member. -- * But its _not_ safe to assume its in the quorum membership. -- * We may have just found out its dead and are processing the last couple of messages it sent -- */ -- crm_update_peer_proc(__FUNCTION__, peer, crm_proc_cpg, ONLINESTATUS); -- if(peer && peer->state && crm_is_peer_active(peer) == FALSE) { -- time_t now = time(NULL); -- -- /* Co-opt the otherwise unused votes field */ -- if(peer->votes == 0) { -- peer->votes = now; -- -- } else if(now > (60 + peer->votes)) { -- /* On the otherhand, if we're still getting messages, at a certain point -- * we need to acknowledge our internal cache is probably wrong -- * -- * Set the threshold to 1 minute -- */ -- crm_err("Node %s[%u] appears to be online even though we think it is dead", peer->uname, peer->id); -- crm_update_peer_state(__FUNCTION__, peer, CRM_NODE_MEMBER, 0); -- peer->votes = 0; -- } -- } -- -- if (local_nodeid == member_list[i].nodeid) { -- found = TRUE; -- } -- } -- -- if (!found) { -- crm_err("We're not part of CPG group '%s' anymore!", groupName->value); -- cpg_evicted = TRUE; -- } -- -- counter++; --} -- --cpg_callbacks_t cpg_callbacks = { -- .cpg_deliver_fn = pcmk_cpg_deliver, -- .cpg_confchg_fn = pcmk_cpg_membership, --}; -- --static gboolean --init_cpg_connection(gboolean(*dispatch) (int kind, const char *from, const char *data), -- void (*destroy) (gpointer), uint32_t * nodeid) --{ -- int rc = -1; -- int fd = 0; -- int retries = 0; -- uint32_t id = 0; -- crm_node_t *peer = NULL; -- -- struct mainloop_fd_callbacks cpg_fd_callbacks = { -- .dispatch = pcmk_cpg_dispatch, -- .destroy = destroy, -- }; -- -- cpg_evicted = FALSE; -- strncpy(pcmk_cpg_group.value, crm_system_name, 128); -- pcmk_cpg_group.length = strlen(crm_system_name) + 1; -- -- cs_repeat(retries, 30, rc = cpg_initialize(&pcmk_cpg_handle, &cpg_callbacks)); -- if (rc != CS_OK) { -- crm_err("Could not connect to the Cluster Process Group API: %d\n", rc); -- goto bail; -- } -- -- id = get_local_nodeid(pcmk_cpg_handle); -- if (id == 0) { -- crm_err("Could not get local node id from the CPG API"); -- goto bail; -- -- } else if(nodeid) { -- *nodeid = id; -- } -- -- retries = 0; -- cs_repeat(retries, 30, rc = cpg_join(pcmk_cpg_handle, &pcmk_cpg_group)); -- if (rc != CS_OK) { -- crm_err("Could not join the CPG group '%s': %d", crm_system_name, rc); -- goto bail; -- } -- -- rc = cpg_fd_get(pcmk_cpg_handle, &fd); -- if (rc != CS_OK) { -- crm_err("Could not obtain the CPG API connection: %d\n", rc); -- goto bail; -- } -- -- mainloop_add_fd("corosync-cpg", G_PRIORITY_MEDIUM, fd, dispatch, &cpg_fd_callbacks); -- -- bail: -- if (rc != CS_OK) { -- cpg_finalize(pcmk_cpg_handle); -- return FALSE; -- } -- -- peer = crm_get_peer(id, NULL); -- crm_update_peer_proc(__FUNCTION__, peer, crm_proc_cpg, ONLINESTATUS); -- return TRUE; --} - - static int - pcmk_quorum_dispatch(gpointer user_data) -@@ -940,7 +340,7 @@ init_cs_connection_once(crm_cluster_t * cluster) - return FALSE; - } - -- if (init_cpg_connection(cluster->cs_dispatch, cluster->destroy, NULL) == FALSE) { -+ if (cluster_connect_cpg(cluster) == FALSE) { - return FALSE; - } - crm_info("Connection to '%s': established", name_for_cluster_type(stack)); -diff --git a/lib/cluster/cpg.c b/lib/cluster/cpg.c -new file mode 100644 -index 0000000..903576e ---- /dev/null -+++ b/lib/cluster/cpg.c -@@ -0,0 +1,689 @@ -+/* -+ * Copyright (C) 2004 Andrew Beekhof -+ * -+ * This library is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU Lesser General Public -+ * License as published by the Free Software Foundation; either -+ * version 2.1 of the License, or (at your option) any later version. -+ * -+ * This library is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * Lesser General Public License for more details. -+ * -+ * You should have received a copy of the GNU Lesser General Public -+ * License along with this library; if not, write to the Free Software -+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include -+#include -+#include -+#include -+ -+#include -+#include -+ -+#include -+#include -+#include -+#include -+ -+#include -+ -+cpg_handle_t pcmk_cpg_handle = 0; /* TODO: Remove, use cluster.cpg_handle */ -+ -+static bool cpg_evicted = FALSE; -+gboolean(*pcmk_cpg_dispatch_fn) (int kind, const char *from, const char *data) = NULL; -+ -+#define cs_repeat(counter, max, code) do { \ -+ code; \ -+ if(rc == CS_ERR_TRY_AGAIN || rc == CS_ERR_QUEUE_FULL) { \ -+ counter++; \ -+ crm_debug("Retrying operation after %ds", counter); \ -+ sleep(counter); \ -+ } else { \ -+ break; \ -+ } \ -+ } while(counter < max) -+ -+void -+cluster_disconnect_cpg(crm_cluster_t *cluster) -+{ -+ pcmk_cpg_handle = 0; -+ if (cluster->cpg_handle) { -+ crm_trace("Disconnecting CPG"); -+ cpg_leave(cluster->cpg_handle, &cluster->group); -+ cpg_finalize(cluster->cpg_handle); -+ cluster->cpg_handle = 0; -+ -+ } else { -+ crm_info("No CPG connection"); -+ } -+} -+ -+uint32_t get_local_nodeid(cpg_handle_t handle) -+{ -+ int rc = CS_OK; -+ int retries = 0; -+ static uint32_t local_nodeid = 0; -+ cpg_handle_t local_handle = handle; -+ cpg_callbacks_t cb = { }; -+ -+ if(local_nodeid != 0) { -+ return local_nodeid; -+ } -+ -+#if 0 -+ /* Should not be necessary */ -+ if(get_cluster_type() == pcmk_cluster_classic_ais) { -+ get_ais_details(&local_nodeid, NULL); -+ goto done; -+ } -+#endif -+ -+ if(handle == 0) { -+ crm_trace("Creating connection"); -+ cs_repeat(retries, 5, rc = cpg_initialize(&local_handle, &cb)); -+ } -+ -+ if (rc == CS_OK) { -+ retries = 0; -+ crm_trace("Performing lookup"); -+ cs_repeat(retries, 5, rc = cpg_local_get(local_handle, &local_nodeid)); -+ } -+ -+ if (rc != CS_OK) { -+ crm_err("Could not get local node id from the CPG API: %s (%d)", ais_error2text(rc), rc); -+ } -+ if(handle == 0) { -+ crm_trace("Closing connection"); -+ cpg_finalize(local_handle); -+ } -+ crm_debug("Local nodeid is %u", local_nodeid); -+ return local_nodeid; -+} -+ -+ -+GListPtr cs_message_queue = NULL; -+int cs_message_timer = 0; -+ -+static ssize_t crm_cs_flush(gpointer data); -+ -+static gboolean -+crm_cs_flush_cb(gpointer data) -+{ -+ cs_message_timer = 0; -+ crm_cs_flush(data); -+ return FALSE; -+} -+ -+#define CS_SEND_MAX 200 -+static ssize_t -+crm_cs_flush(gpointer data) -+{ -+ int sent = 0; -+ ssize_t rc = 0; -+ int queue_len = 0; -+ static unsigned int last_sent = 0; -+ cpg_handle_t *handle = (cpg_handle_t *)data; -+ -+ if (*handle == 0) { -+ crm_trace("Connection is dead"); -+ return pcmk_ok; -+ } -+ -+ queue_len = g_list_length(cs_message_queue); -+ if ((queue_len % 1000) == 0 && queue_len > 1) { -+ crm_err("CPG queue has grown to %d", queue_len); -+ -+ } else if (queue_len == CS_SEND_MAX) { -+ crm_warn("CPG queue has grown to %d", queue_len); -+ } -+ -+ if (cs_message_timer) { -+ /* There is already a timer, wait until it goes off */ -+ crm_trace("Timer active %d", cs_message_timer); -+ return pcmk_ok; -+ } -+ -+ while (cs_message_queue && sent < CS_SEND_MAX) { -+ struct iovec *iov = cs_message_queue->data; -+ -+ errno = 0; -+ rc = cpg_mcast_joined(*handle, CPG_TYPE_AGREED, iov, 1); -+ -+ if (rc != CS_OK) { -+ break; -+ } -+ -+ sent++; -+ last_sent++; -+ crm_trace("CPG message sent, size=%d", iov->iov_len); -+ -+ cs_message_queue = g_list_remove(cs_message_queue, iov); -+ free(iov[0].iov_base); -+ free(iov); -+ } -+ -+ queue_len -= sent; -+ if (sent > 1 || cs_message_queue) { -+ crm_info("Sent %d CPG messages (%d remaining, last=%u): %s (%d)", -+ sent, queue_len, last_sent, ais_error2text(rc), rc); -+ } else { -+ crm_trace("Sent %d CPG messages (%d remaining, last=%u): %s (%d)", -+ sent, queue_len, last_sent, ais_error2text(rc), rc); -+ } -+ -+ if (cs_message_queue) { -+ uint32_t delay_ms = 100; -+ if(rc != CS_OK) { -+ /* Proportionally more if sending failed but cap at 1s */ -+ delay_ms = QB_MIN(1000, CS_SEND_MAX + (10 * queue_len)); -+ } -+ cs_message_timer = g_timeout_add(delay_ms, crm_cs_flush_cb, data); -+ } -+ -+ return rc; -+} -+ -+gboolean -+send_cpg_iov(struct iovec * iov) -+{ -+ static unsigned int queued = 0; -+ -+ queued++; -+ crm_trace("Queueing CPG message %u (%d bytes)", queued, iov->iov_len); -+ cs_message_queue = g_list_append(cs_message_queue, iov); -+ crm_cs_flush(&pcmk_cpg_handle); -+ return TRUE; -+} -+ -+static int -+pcmk_cpg_dispatch(gpointer user_data) -+{ -+ int rc = 0; -+ crm_cluster_t *cluster = (crm_cluster_t*) user_data; -+ -+ rc = cpg_dispatch(cluster->cpg_handle, CS_DISPATCH_ALL); -+ if (rc != CS_OK) { -+ crm_err("Connection to the CPG API failed: %s (%d)", ais_error2text(rc), rc); -+ cluster->cpg_handle = 0; -+ return -1; -+ -+ } else if(cpg_evicted) { -+ crm_err("Evicted from CPG membership"); -+ return -1; -+ } -+ return 0; -+} -+ -+/* -+static void -+pcmk_cpg_deliver_message(cpg_handle_t handle, -+ const struct cpg_name *groupName, -+ uint32_t nodeid, uint32_t pid, void *msg, size_t msg_len) -+{ -+ uint32_t kind = 0; -+ const char *from = NULL; -+ char *data = pcmk_message_common_cs(handle, nodeid, pid, msg, &kind, &from); -+ -+ free(data); -+} -+*/ -+ -+char * -+pcmk_message_common_cs(cpg_handle_t handle, uint32_t nodeid, uint32_t pid, void *content, -+ uint32_t *kind, const char **from) -+{ -+ char *data = NULL; -+ AIS_Message *msg = (AIS_Message *) content; -+ -+ if(handle) { -+ /* 'msg' came from CPG not the plugin -+ * Do filtering and field massaging -+ */ -+ uint32_t local_nodeid = get_local_nodeid(handle); -+ const char *local_name = get_local_node_name(); -+ -+ if (msg->sender.id > 0 && msg->sender.id != nodeid) { -+ crm_err("Nodeid mismatch from %d.%d: claimed nodeid=%u", nodeid, pid, msg->sender.id); -+ return NULL; -+ -+ } else if (msg->host.id != 0 && (local_nodeid != msg->host.id)) { -+ /* Not for us */ -+ crm_trace("Not for us: %u != %u", msg->host.id, local_nodeid); -+ return NULL; -+ } else if (msg->host.size != 0 && safe_str_neq(msg->host.uname, local_name)) { -+ /* Not for us */ -+ crm_trace("Not for us: %s != %s", msg->host.uname, local_name); -+ return NULL; -+ } -+ -+ msg->sender.id = nodeid; -+ if (msg->sender.size == 0) { -+ crm_node_t *peer = crm_get_peer(nodeid, NULL); -+ -+ if (peer == NULL) { -+ crm_err("Peer with nodeid=%u is unknown", nodeid); -+ -+ } else if (peer->uname == NULL) { -+ crm_err("No uname for peer with nodeid=%u", nodeid); -+ -+ } else { -+ crm_notice("Fixing uname for peer with nodeid=%u", nodeid); -+ msg->sender.size = strlen(peer->uname); -+ memset(msg->sender.uname, 0, MAX_NAME); -+ memcpy(msg->sender.uname, peer->uname, msg->sender.size); -+ } -+ } -+ } -+ -+ crm_trace("Got new%s message (size=%d, %d, %d)", -+ msg->is_compressed ? " compressed" : "", -+ ais_data_len(msg), msg->size, msg->compressed_size); -+ -+ if (kind != NULL) { -+ *kind = msg->header.id; -+ } -+ if (from != NULL) { -+ *from = msg->sender.uname; -+ } -+ -+ if (msg->is_compressed && msg->size > 0) { -+ int rc = BZ_OK; -+ char *uncompressed = NULL; -+ unsigned int new_size = msg->size + 1; -+ -+ if (check_message_sanity(msg, NULL) == FALSE) { -+ goto badmsg; -+ } -+ -+ crm_trace("Decompressing message data"); -+ uncompressed = calloc(1, new_size); -+ rc = BZ2_bzBuffToBuffDecompress(uncompressed, &new_size, msg->data, msg->compressed_size, 1, 0); -+ -+ if (rc != BZ_OK) { -+ crm_err("Decompression failed: %d", rc); -+ goto badmsg; -+ } -+ -+ CRM_ASSERT(rc == BZ_OK); -+ CRM_ASSERT(new_size == msg->size); -+ -+ data = uncompressed; -+ -+ } else if (check_message_sanity(msg, data) == FALSE) { -+ goto badmsg; -+ -+ } else if (safe_str_eq("identify", data)) { -+ int pid = getpid(); -+ char *pid_s = crm_itoa(pid); -+ -+ send_cluster_text(crm_class_cluster, pid_s, TRUE, NULL, crm_msg_ais); -+ free(pid_s); -+ return NULL; -+ -+ } else { -+ data = strdup(msg->data); -+ } -+ -+ if (msg->header.id != crm_class_members) { -+ /* Is this even needed anymore? */ -+ crm_get_peer(msg->sender.id, msg->sender.uname); -+ } -+ -+ if (msg->header.id == crm_class_rmpeer) { -+ uint32_t id = crm_int_helper(data, NULL); -+ -+ crm_info("Removing peer %s/%u", data, id); -+ reap_crm_member(id, NULL); -+ free(data); -+ return NULL; -+ -+#if SUPPORT_PLUGIN -+ } else if (is_classic_ais_cluster()) { -+ plugin_handle_membership(msg); -+#endif -+ } -+ -+ crm_trace("Payload: %.200s", data); -+ return data; -+ -+ badmsg: -+ crm_err("Invalid message (id=%d, dest=%s:%s, from=%s:%s.%d):" -+ " min=%d, total=%d, size=%d, bz2_size=%d", -+ msg->id, ais_dest(&(msg->host)), msg_type2text(msg->host.type), -+ ais_dest(&(msg->sender)), msg_type2text(msg->sender.type), -+ msg->sender.pid, (int)sizeof(AIS_Message), -+ msg->header.size, msg->size, msg->compressed_size); -+ -+ free(data); -+ return NULL; -+} -+ -+void -+pcmk_cpg_membership(cpg_handle_t handle, -+ const struct cpg_name *groupName, -+ const struct cpg_address *member_list, size_t member_list_entries, -+ const struct cpg_address *left_list, size_t left_list_entries, -+ const struct cpg_address *joined_list, size_t joined_list_entries) -+{ -+ int i; -+ gboolean found = FALSE; -+ static int counter = 0; -+ uint32_t local_nodeid = get_local_nodeid(handle); -+ -+ for (i = 0; i < left_list_entries; i++) { -+ crm_node_t *peer = crm_get_peer(left_list[i].nodeid, NULL); -+ -+ crm_info("Left[%d.%d] %s.%u ", counter, i, groupName->value, left_list[i].nodeid); -+ crm_update_peer_proc(__FUNCTION__, peer, crm_proc_cpg, OFFLINESTATUS); -+ } -+ -+ for (i = 0; i < joined_list_entries; i++) { -+ crm_info("Joined[%d.%d] %s.%u ", counter, i, groupName->value, joined_list[i].nodeid); -+ } -+ -+ for (i = 0; i < member_list_entries; i++) { -+ crm_node_t *peer = crm_get_peer(member_list[i].nodeid, NULL); -+ -+ crm_info("Member[%d.%d] %s.%u ", counter, i, groupName->value, member_list[i].nodeid); -+ -+ /* Anyone that is sending us CPG messages must also be a _CPG_ member. -+ * But its _not_ safe to assume its in the quorum membership. -+ * We may have just found out its dead and are processing the last couple of messages it sent -+ */ -+ crm_update_peer_proc(__FUNCTION__, peer, crm_proc_cpg, ONLINESTATUS); -+ if(peer && peer->state && crm_is_peer_active(peer) == FALSE) { -+ time_t now = time(NULL); -+ -+ /* Co-opt the otherwise unused votes field */ -+ if(peer->votes == 0) { -+ peer->votes = now; -+ -+ } else if(now > (60 + peer->votes)) { -+ /* On the otherhand, if we're still getting messages, at a certain point -+ * we need to acknowledge our internal cache is probably wrong -+ * -+ * Set the threshold to 1 minute -+ */ -+ crm_err("Node %s[%u] appears to be online even though we think it is dead", peer->uname, peer->id); -+ crm_update_peer_state(__FUNCTION__, peer, CRM_NODE_MEMBER, 0); -+ peer->votes = 0; -+ } -+ } -+ -+ if (local_nodeid == member_list[i].nodeid) { -+ found = TRUE; -+ } -+ } -+ -+ if (!found) { -+ crm_err("We're not part of CPG group '%s' anymore!", groupName->value); -+ cpg_evicted = TRUE; -+ } -+ -+ counter++; -+} -+ -+gboolean -+cluster_connect_cpg(crm_cluster_t *cluster) -+{ -+ int rc = -1; -+ int fd = 0; -+ int retries = 0; -+ uint32_t id = 0; -+ crm_node_t *peer = NULL; -+ cpg_handle_t handle = 0; -+ -+ struct mainloop_fd_callbacks cpg_fd_callbacks = { -+ .dispatch = pcmk_cpg_dispatch, -+ .destroy = cluster->destroy, -+ }; -+ -+ cpg_callbacks_t cpg_callbacks = { -+ .cpg_deliver_fn = cluster->cpg.cpg_deliver_fn, -+ .cpg_confchg_fn = cluster->cpg.cpg_confchg_fn, -+ /* .cpg_deliver_fn = pcmk_cpg_deliver, */ -+ /* .cpg_confchg_fn = pcmk_cpg_membership, */ -+ }; -+ -+ cpg_evicted = FALSE; -+ cluster->group.length = 0; -+ cluster->group.value[0] = 0; -+ -+ strncpy(cluster->group.value, crm_system_name, 128); -+ cluster->group.length = strlen(crm_system_name) + 1; -+ -+ cs_repeat(retries, 30, rc = cpg_initialize(&handle, &cpg_callbacks)); -+ if (rc != CS_OK) { -+ crm_err("Could not connect to the Cluster Process Group API: %d\n", rc); -+ goto bail; -+ } -+ -+ id = get_local_nodeid(handle); -+ if (id == 0) { -+ crm_err("Could not get local node id from the CPG API"); -+ goto bail; -+ -+ } -+ cluster->nodeid = id; -+ -+ retries = 0; -+ cs_repeat(retries, 30, rc = cpg_join(handle, &cluster->group)); -+ if (rc != CS_OK) { -+ crm_err("Could not join the CPG group '%s': %d", crm_system_name, rc); -+ goto bail; -+ } -+ -+ rc = cpg_fd_get(handle, &fd); -+ if (rc != CS_OK) { -+ crm_err("Could not obtain the CPG API connection: %d\n", rc); -+ goto bail; -+ } -+ -+ pcmk_cpg_handle = handle; -+ cluster->cpg_handle = handle; -+ mainloop_add_fd("corosync-cpg", G_PRIORITY_MEDIUM, fd, cluster, &cpg_fd_callbacks); -+ -+ bail: -+ if (rc != CS_OK) { -+ cpg_finalize(handle); -+ return FALSE; -+ } -+ -+ peer = crm_get_peer(id, NULL); -+ crm_update_peer_proc(__FUNCTION__, peer, crm_proc_cpg, ONLINESTATUS); -+ return TRUE; -+} -+ -+gboolean -+send_cluster_message_cs(xmlNode * msg, gboolean local, crm_node_t * node, enum crm_ais_msg_types dest) -+{ -+ gboolean rc = TRUE; -+ char *data = NULL; -+ -+ data = dump_xml_unformatted(msg); -+ rc = send_cluster_text(crm_class_cluster, data, local, node, dest); -+ free(data); -+ return rc; -+} -+ -+gboolean -+send_cluster_text(int class, const char *data, -+ gboolean local, crm_node_t * node, enum crm_ais_msg_types dest) -+{ -+ static int msg_id = 0; -+ static int local_pid = 0; -+ static int local_name_len = 0; -+ static const char *local_name = NULL; -+ -+ char *target = NULL; -+ struct iovec *iov; -+ AIS_Message *msg = NULL; -+ enum crm_ais_msg_types sender = text2msg_type(crm_system_name); -+ -+ /* There are only 6 handlers registered to crm_lib_service in plugin.c */ -+ CRM_CHECK(class < 6, crm_err("Invalid message class: %d", class); -+ return FALSE); -+ -+#if !SUPPORT_PLUGIN -+ CRM_CHECK(dest != crm_msg_ais, return FALSE); -+#endif -+ -+ if(local_name == NULL) { -+ local_name = get_local_node_name(); -+ } -+ if(local_name_len == 0 && local_name) { -+ local_name_len = strlen(local_name); -+ } -+ -+ if (data == NULL) { -+ data = ""; -+ } -+ -+ if (local_pid == 0) { -+ local_pid = getpid(); -+ } -+ -+ if (sender == crm_msg_none) { -+ sender = local_pid; -+ } -+ -+ msg = calloc(1, sizeof(AIS_Message)); -+ -+ msg_id++; -+ msg->id = msg_id; -+ msg->header.id = class; -+ msg->header.error = CS_OK; -+ -+ msg->host.type = dest; -+ msg->host.local = local; -+ -+ if (node) { -+ if (node->uname) { -+ target = strdup(node->uname); -+ msg->host.size = strlen(node->uname); -+ memset(msg->host.uname, 0, MAX_NAME); -+ memcpy(msg->host.uname, node->uname, msg->host.size); -+ } else { -+ target = g_strdup_printf("%u", node->id); -+ } -+ msg->host.id = node->id; -+ } else { -+ target = strdup("all"); -+ } -+ -+ msg->sender.id = 0; -+ msg->sender.type = sender; -+ msg->sender.pid = local_pid; -+ msg->sender.size = local_name_len; -+ memset(msg->sender.uname, 0, MAX_NAME); -+ memcpy(msg->sender.uname, local_name, msg->sender.size); -+ -+ msg->size = 1 + strlen(data); -+ msg->header.size = sizeof(AIS_Message) + msg->size; -+ -+ if (msg->size < CRM_BZ2_THRESHOLD) { -+ msg = realloc(msg, msg->header.size); -+ memcpy(msg->data, data, msg->size); -+ -+ } else { -+ char *compressed = NULL; -+ unsigned int new_size = 0; -+ char *uncompressed = strdup(data); -+ -+ if (crm_compress_string(uncompressed, msg->size, 0, &compressed, &new_size)) { -+ -+ msg->header.size = sizeof(AIS_Message) + new_size + 1; -+ msg = realloc(msg, msg->header.size); -+ memcpy(msg->data, compressed, new_size); -+ msg->data[new_size] = 0; -+ -+ msg->is_compressed = TRUE; -+ msg->compressed_size = new_size; -+ -+ } else { -+ msg = realloc(msg, msg->header.size); -+ memcpy(msg->data, data, msg->size); -+ } -+ -+ free(uncompressed); -+ free(compressed); -+ } -+ -+ iov = calloc(1, sizeof(struct iovec)); -+ iov->iov_base = msg; -+ iov->iov_len = msg->header.size; -+ -+ if (msg->compressed_size) { -+ crm_trace("Queueing CPG message %u to %s (%d bytes, %d bytes compressed payload): %.200s", -+ msg->id, target, iov->iov_len, msg->compressed_size, data); -+ } else { -+ crm_trace("Queueing CPG message %u to %s (%d bytes, %d bytes payload): %.200s", -+ msg->id, target, iov->iov_len, msg->size, data); -+ } -+ -+#if SUPPORT_PLUGIN -+ /* The plugin is the only time we dont use CPG messaging */ -+ if(get_cluster_type() == pcmk_cluster_classic_ais) { -+ return send_plugin_text(class, iov); -+ } -+#endif -+ -+ send_cpg_iov(iov); -+ -+ free(target); -+ return TRUE; -+} -+ -+enum crm_ais_msg_types -+text2msg_type(const char *text) -+{ -+ int type = crm_msg_none; -+ -+ CRM_CHECK(text != NULL, return type); -+ if (safe_str_eq(text, "ais")) { -+ type = crm_msg_ais; -+ } else if (safe_str_eq(text, "crm_plugin")) { -+ type = crm_msg_ais; -+ } else if (safe_str_eq(text, CRM_SYSTEM_CIB)) { -+ type = crm_msg_cib; -+ } else if (safe_str_eq(text, CRM_SYSTEM_CRMD)) { -+ type = crm_msg_crmd; -+ } else if (safe_str_eq(text, CRM_SYSTEM_DC)) { -+ type = crm_msg_crmd; -+ } else if (safe_str_eq(text, CRM_SYSTEM_TENGINE)) { -+ type = crm_msg_te; -+ } else if (safe_str_eq(text, CRM_SYSTEM_PENGINE)) { -+ type = crm_msg_pe; -+ } else if (safe_str_eq(text, CRM_SYSTEM_LRMD)) { -+ type = crm_msg_lrmd; -+ } else if (safe_str_eq(text, CRM_SYSTEM_STONITHD)) { -+ type = crm_msg_stonithd; -+ } else if (safe_str_eq(text, "stonith-ng")) { -+ type = crm_msg_stonith_ng; -+ } else if (safe_str_eq(text, "attrd")) { -+ type = crm_msg_attrd; -+ -+ } else { -+ /* This will normally be a transient client rather than -+ * a cluster daemon. Set the type to the pid of the client -+ */ -+ int scan_rc = sscanf(text, "%d", &type); -+ -+ if (scan_rc != 1) { -+ /* Ensure its sane */ -+ type = crm_msg_none; -+ } -+ } -+ return type; -+} -diff --git a/lib/cluster/legacy.c b/lib/cluster/legacy.c -index 14749e4..8b16f7e 100644 ---- a/lib/cluster/legacy.c -+++ b/lib/cluster/legacy.c -@@ -31,12 +31,6 @@ - # include - # include - # include --cpg_handle_t pcmk_cpg_handle = 0; -- --struct cpg_name pcmk_cpg_group = { -- .length = 0, -- .value[0] = 0, --}; - #endif - - #if HAVE_CMAP -@@ -50,88 +44,8 @@ cman_handle_t pcmk_cman_handle = NULL; - - int ais_membership_timer = 0; - gboolean ais_membership_force = FALSE; --int ais_dispatch(gpointer user_data); -- --#define cs_repeat(counter, max, code) do { \ -- code; \ -- if(rc == CS_ERR_TRY_AGAIN || rc == CS_ERR_QUEUE_FULL) { \ -- counter++; \ -- crm_debug("Retrying operation after %ds", counter); \ -- sleep(counter); \ -- } else { \ -- break; \ -- } \ -- } while(counter < max) -- --enum crm_ais_msg_types --text2msg_type(const char *text) --{ -- int type = crm_msg_none; -- -- CRM_CHECK(text != NULL, return type); -- if (safe_str_eq(text, "ais")) { -- type = crm_msg_ais; -- } else if (safe_str_eq(text, "crm_plugin")) { -- type = crm_msg_ais; -- } else if (safe_str_eq(text, CRM_SYSTEM_CIB)) { -- type = crm_msg_cib; -- } else if (safe_str_eq(text, CRM_SYSTEM_CRMD)) { -- type = crm_msg_crmd; -- } else if (safe_str_eq(text, CRM_SYSTEM_DC)) { -- type = crm_msg_crmd; -- } else if (safe_str_eq(text, CRM_SYSTEM_TENGINE)) { -- type = crm_msg_te; -- } else if (safe_str_eq(text, CRM_SYSTEM_PENGINE)) { -- type = crm_msg_pe; -- } else if (safe_str_eq(text, CRM_SYSTEM_LRMD)) { -- type = crm_msg_lrmd; -- } else if (safe_str_eq(text, CRM_SYSTEM_STONITHD)) { -- type = crm_msg_stonithd; -- } else if (safe_str_eq(text, "stonith-ng")) { -- type = crm_msg_stonith_ng; -- } else if (safe_str_eq(text, "attrd")) { -- type = crm_msg_attrd; -- -- } else { -- /* This will normally be a transient client rather than -- * a cluster daemon. Set the type to the pid of the client -- */ -- int scan_rc = sscanf(text, "%d", &type); -- -- if (scan_rc != 1 || type <= crm_msg_stonith_ng) { -- /* Ensure its sane */ -- type = crm_msg_none; -- } -- } -- return type; --} -+int plugin_dispatch(gpointer user_data); - --char * --get_ais_data(const AIS_Message * msg) --{ -- int rc = BZ_OK; -- char *uncompressed = NULL; -- unsigned int new_size = msg->size + 1; -- -- if (msg->is_compressed == FALSE) { -- crm_trace("Returning uncompressed message data"); -- uncompressed = strdup(msg->data); -- -- } else { -- crm_trace("Decompressing message data"); -- uncompressed = calloc(1, new_size); -- -- rc = BZ2_bzBuffToBuffDecompress(uncompressed, &new_size, (char *)msg->data, -- msg->compressed_size, 1, 0); -- -- CRM_ASSERT(rc == BZ_OK); -- CRM_ASSERT(new_size == msg->size); -- } -- -- return uncompressed; --} -- --#if SUPPORT_COROSYNC - int ais_fd_sync = -1; - int ais_fd_async = -1; /* never send messages via this channel */ - void *ais_ipc_ctx = NULL; -@@ -160,9 +74,6 @@ get_ais_details(uint32_t * id, char **uname) - header.id = crm_class_nodeid; - header.size = sizeof(cs_ipc_header_response_t); - -- CRM_CHECK(id != NULL, return FALSE); -- CRM_CHECK(uname != NULL, return FALSE); -- - iov.iov_base = &header; - iov.iov_len = header.size; - -@@ -203,140 +114,7 @@ get_ais_details(uint32_t * id, char **uname) - return TRUE; - } - --static uint32_t get_local_nodeid(cpg_handle_t handle) --{ -- int rc = CS_OK; -- int retries = 0; -- static uint32_t local_nodeid = 0; -- cpg_handle_t local_handle = handle; -- cpg_callbacks_t cb = { }; -- -- if(local_nodeid != 0) { -- return local_nodeid; -- } -- --#if 0 -- /* Should not be necessary */ -- if(get_cluster_type() == pcmk_cluster_classic_ais) { -- get_ais_details(&local_nodeid, NULL); -- goto done; -- } --#endif -- -- if(local_handle == 0) { -- crm_trace("Creating connection"); -- cs_repeat(retries, 5, rc = cpg_initialize(&local_handle, &cb)); -- } -- -- if (rc == CS_OK) { -- retries = 0; -- crm_trace("Performing lookup"); -- cs_repeat(retries, 5, rc = cpg_local_get(local_handle, &local_nodeid)); -- } -- -- if (rc != CS_OK) { -- crm_err("Could not get local node id from the CPG API: %s (%d)", ais_error2text(rc), rc); -- } -- -- if(handle != local_handle) { -- crm_trace("Closing connection %u", local_handle); -- cpg_finalize(local_handle); -- } -- -- crm_debug("Local nodeid is %u", local_nodeid); -- return local_nodeid; --} -- --GListPtr cs_message_queue = NULL; --int cs_message_timer = 0; -- --static ssize_t crm_cs_flush(void); -- --static gboolean --crm_cs_flush_cb(gpointer data) --{ -- cs_message_timer = 0; -- crm_cs_flush(); -- return FALSE; --} -- --#define CS_SEND_MAX 200 --static ssize_t --crm_cs_flush(void) --{ -- int sent = 0; -- ssize_t rc = 0; -- int queue_len = 0; -- static unsigned int last_sent = 0; -- -- if (pcmk_cpg_handle == 0) { -- crm_trace("Connection is dead"); -- return pcmk_ok; -- } -- -- queue_len = g_list_length(cs_message_queue); -- if ((queue_len % 1000) == 0 && queue_len > 1) { -- crm_err("CPG queue has grown to %d", queue_len); -- -- } else if (queue_len == CS_SEND_MAX) { -- crm_warn("CPG queue has grown to %d", queue_len); -- } -- -- if (cs_message_timer) { -- /* There is already a timer, wait until it goes off */ -- crm_trace("Timer active %d", cs_message_timer); -- return pcmk_ok; -- } -- -- while (cs_message_queue && sent < CS_SEND_MAX) { -- AIS_Message *header = NULL; -- struct iovec *iov = cs_message_queue->data; -- -- errno = 0; -- rc = cpg_mcast_joined(pcmk_cpg_handle, CPG_TYPE_AGREED, iov, 1); -- -- if (rc != CS_OK) { -- break; -- } -- -- sent++; -- header = iov->iov_base; -- last_sent = header->id; -- if (header->compressed_size) { -- crm_trace("CPG message %d (%d compressed bytes) sent", -- header->id, header->compressed_size); -- } else { -- crm_trace("CPG message %d (%d bytes) sent: %.200s", -- header->id, header->size, header->data); -- } -- -- cs_message_queue = g_list_remove(cs_message_queue, iov); -- free(iov[0].iov_base); -- free(iov); -- } -- -- queue_len -= sent; -- if (sent > 1 || cs_message_queue) { -- crm_info("Sent %d CPG messages (%d remaining, last=%u): %s (%d)", -- sent, queue_len, last_sent, ais_error2text(rc), rc); -- } else { -- crm_trace("Sent %d CPG messages (%d remaining, last=%u): %s (%d)", -- sent, queue_len, last_sent, ais_error2text(rc), rc); -- } -- -- if (cs_message_queue) { -- uint32_t delay_ms = 100; -- if(rc != CS_OK) { -- /* Proportionally more if sending failed but cap at 1s */ -- delay_ms = QB_MIN(1000, CS_SEND_MAX + (10 * queue_len)); -- } -- cs_message_timer = g_timeout_add(delay_ms, crm_cs_flush_cb, NULL); -- } -- -- return rc; --} -- --static bool -+bool - send_plugin_text(int class, struct iovec *iov) - { - int rc = CS_OK; -@@ -386,154 +164,8 @@ send_plugin_text(int class, struct iovec *iov) - return (rc == CS_OK); - } - --gboolean --send_ais_text(int class, const char *data, -- gboolean local, crm_node_t * node, enum crm_ais_msg_types dest) --{ -- static int msg_id = 0; -- static int local_pid = 0; -- static int local_name_len = 0; -- static const char *local_name = NULL; -- -- char *target = NULL; -- struct iovec *iov; -- AIS_Message *ais_msg = NULL; -- enum cluster_type_e cluster_type = get_cluster_type(); -- enum crm_ais_msg_types sender = text2msg_type(crm_system_name); -- -- /* There are only 6 handlers registered to crm_lib_service in plugin.c */ -- CRM_CHECK(class < 6, crm_err("Invalid message class: %d", class); -- return FALSE); -- -- CRM_CHECK(dest != crm_msg_ais, return FALSE); -- -- if(local_name == NULL) { -- local_name = get_local_node_name(); -- } -- if(local_name_len == 0 && local_name) { -- local_name_len = strlen(local_name); -- } -- -- if (data == NULL) { -- data = ""; -- } -- -- if (local_pid == 0) { -- local_pid = getpid(); -- } -- -- if (sender == crm_msg_none) { -- sender = local_pid; -- } -- -- ais_msg = calloc(1, sizeof(AIS_Message)); -- -- ais_msg->id = msg_id++; -- ais_msg->header.id = class; -- ais_msg->header.error = CS_OK; -- -- ais_msg->host.type = dest; -- ais_msg->host.local = local; -- -- if (node) { -- if (node->uname) { -- target = strdup(node->uname); -- ais_msg->host.size = strlen(node->uname); -- memset(ais_msg->host.uname, 0, MAX_NAME); -- memcpy(ais_msg->host.uname, node->uname, ais_msg->host.size); -- } else { -- target = g_strdup_printf("%u", node->id); -- } -- ais_msg->host.id = node->id; -- } else { -- target = strdup("all"); -- } -- -- ais_msg->sender.id = 0; -- ais_msg->sender.type = sender; -- ais_msg->sender.pid = local_pid; -- ais_msg->sender.size = local_name_len; -- memset(ais_msg->sender.uname, 0, MAX_NAME); -- memcpy(ais_msg->sender.uname, local_name, ais_msg->sender.size); -- -- ais_msg->size = 1 + strlen(data); -- ais_msg->header.size = sizeof(AIS_Message) + ais_msg->size; -- -- if (ais_msg->size < CRM_BZ2_THRESHOLD) { -- ais_msg = realloc(ais_msg, ais_msg->header.size); -- memcpy(ais_msg->data, data, ais_msg->size); -- -- } else { -- char *compressed = NULL; -- unsigned int new_size = 0; -- char *uncompressed = strdup(data); -- -- if (crm_compress_string(uncompressed, ais_msg->size, 0, &compressed, &new_size)) { -- -- ais_msg->header.size = sizeof(AIS_Message) + new_size + 1; -- ais_msg = realloc(ais_msg, ais_msg->header.size); -- memcpy(ais_msg->data, compressed, new_size); -- ais_msg->data[new_size] = 0; -- -- ais_msg->is_compressed = TRUE; -- ais_msg->compressed_size = new_size; -- -- } else { -- ais_msg = realloc(ais_msg, ais_msg->header.size); -- memcpy(ais_msg->data, data, ais_msg->size); -- } -- -- free(uncompressed); -- free(compressed); -- } -- -- iov = calloc(1, sizeof(struct iovec)); -- iov->iov_base = ais_msg; -- iov->iov_len = ais_msg->header.size; -- -- if (ais_msg->compressed_size) { -- crm_trace("Queueing %s message %u to %s (%d compressed bytes)", -- cluster_type == pcmk_cluster_classic_ais?"plugin":"CPG", -- ais_msg->id, target, ais_msg->compressed_size); -- } else { -- crm_trace("Queueing %s message %u to %s (%d bytes)", -- cluster_type == pcmk_cluster_classic_ais?"plugin":"CPG", -- ais_msg->id, target, ais_msg->size); -- } -- -- /* The plugin is the only time we dont use CPG messaging */ -- if(cluster_type == pcmk_cluster_classic_ais) { -- return send_plugin_text(class, iov); -- } -- -- cs_message_queue = g_list_append(cs_message_queue, iov); -- crm_cs_flush(); -- -- free(target); -- return TRUE; --} -- --gboolean --send_ais_message(xmlNode * msg, gboolean local, crm_node_t * node, enum crm_ais_msg_types dest) --{ -- gboolean rc = TRUE; -- char *data = NULL; -- -- if (is_classic_ais_cluster()) { -- if (ais_fd_async < 0) { -- crm_err("Not connected to AIS: %d", ais_fd_async); -- return FALSE; -- } -- } -- -- data = dump_xml_unformatted(msg); -- rc = send_ais_text(crm_class_cluster, data, local, node, dest); -- free(data); -- return rc; --} -- - void --terminate_cs_connection(void) -+terminate_cs_connection(crm_cluster_t *cluster) - { - crm_notice("Disconnecting from Corosync"); - -@@ -545,20 +177,8 @@ terminate_cs_connection(void) - } else { - crm_info("No plugin connection"); - } -- -- } else { -- if (pcmk_cpg_handle) { -- crm_info("Disconnecting CPG"); -- if (cpg_leave(pcmk_cpg_handle, &pcmk_cpg_group) == CS_OK) { -- crm_info("Destroying CPG"); -- cpg_finalize(pcmk_cpg_handle); -- } -- pcmk_cpg_handle = 0; -- -- } else { -- crm_info("No CPG connection"); -- } - } -+ cluster_disconnect_cpg(cluster); - - # if SUPPORT_CMAN - if (is_cman_cluster()) { -@@ -578,155 +198,66 @@ terminate_cs_connection(void) - ais_fd_sync = -1; - } - --static crm_node_t * --crm_update_ais_node(xmlNode * member, long long seq) --{ -- const char *id_s = crm_element_value(member, "id"); -- const char *addr = crm_element_value(member, "addr"); -- const char *uname = crm_element_value(member, "uname"); -- const char *state = crm_element_value(member, "state"); -- const char *born_s = crm_element_value(member, "born"); -- const char *seen_s = crm_element_value(member, "seen"); -- const char *votes_s = crm_element_value(member, "votes"); -- const char *procs_s = crm_element_value(member, "processes"); -- -- int votes = crm_int_helper(votes_s, NULL); -- unsigned int id = crm_int_helper(id_s, NULL); -- unsigned int procs = crm_int_helper(procs_s, NULL); -- -- /* TODO: These values will contain garbage if version < 0.7.1 */ -- uint64_t born = crm_int_helper(born_s, NULL); -- uint64_t seen = crm_int_helper(seen_s, NULL); -- -- return crm_update_peer(__FUNCTION__, id, born, seen, votes, procs, uname, uname, addr, state); --} -- --static gboolean --ais_dispatch_message(AIS_Message * msg, -- gboolean(*dispatch) (int kind, const char *from, const char *data)) -+void -+plugin_handle_membership(AIS_Message *msg) - { -- char *data = NULL; -- char *uncompressed = NULL; -- -- xmlNode *xml = NULL; -+ if (msg->header.id == crm_class_members || msg->header.id == crm_class_quorum) { -+ xmlNode *member = NULL; -+ const char *value = NULL; -+ gboolean quorate = FALSE; -+ xmlNode *xml = string2xml(msg->data); - -- CRM_ASSERT(msg != NULL); -- -- crm_trace("Got new%s message (size=%d, %d, %d)", -- msg->is_compressed ? " compressed" : "", -- ais_data_len(msg), msg->size, msg->compressed_size); -- -- data = msg->data; -- if (msg->is_compressed && msg->size > 0) { -- int rc = BZ_OK; -- unsigned int new_size = msg->size + 1; -- -- if (check_message_sanity(msg, NULL) == FALSE) { -- goto badmsg; -+ if (xml == NULL) { -+ crm_err("Invalid membership update: %s", msg->data); -+ return; - } - -- crm_trace("Decompressing message data"); -- uncompressed = calloc(1, new_size); -- rc = BZ2_bzBuffToBuffDecompress(uncompressed, &new_size, data, msg->compressed_size, 1, 0); -- -- if (rc != BZ_OK) { -- crm_err("Decompression failed: %d", rc); -- goto badmsg; -+ value = crm_element_value(xml, "quorate"); -+ CRM_CHECK(value != NULL, crm_log_xml_err(xml, "No quorum value:"); return); -+ if (crm_is_true(value)) { -+ quorate = TRUE; - } - -- CRM_ASSERT(rc == BZ_OK); -- CRM_ASSERT(new_size == msg->size); -- -- data = uncompressed; -- -- } else if (check_message_sanity(msg, data) == FALSE) { -- goto badmsg; -- -- } else if (safe_str_eq("identify", data)) { -- int pid = getpid(); -- char *pid_s = crm_itoa(pid); -- -- send_ais_text(crm_class_cluster, pid_s, TRUE, NULL, crm_msg_ais); -- free(pid_s); -- goto done; -- } -- -- if (msg->header.id != crm_class_members) { -- crm_get_peer(msg->sender.id, msg->sender.uname); -- } -- -- if (msg->header.id == crm_class_rmpeer) { -- uint32_t id = crm_int_helper(data, NULL); -- -- crm_info("Removing peer %s/%u", data, id); -- reap_crm_member(id, NULL); -- goto done; -- -- } else if (is_classic_ais_cluster()) { -- if (msg->header.id == crm_class_members || msg->header.id == crm_class_quorum) { -- xmlNode *node = NULL; -- const char *value = NULL; -- gboolean quorate = FALSE; -- -- xml = string2xml(data); -- if (xml == NULL) { -- crm_err("Invalid membership update: %s", data); -- goto badmsg; -- } -- -- value = crm_element_value(xml, "quorate"); -- CRM_CHECK(value != NULL, crm_log_xml_err(xml, "No quorum value:"); -- goto badmsg); -- if (crm_is_true(value)) { -- quorate = TRUE; -- } -- -- value = crm_element_value(xml, "id"); -- CRM_CHECK(value != NULL, crm_log_xml_err(xml, "No membership id"); -- goto badmsg); -- crm_peer_seq = crm_int_helper(value, NULL); -+ value = crm_element_value(xml, "id"); -+ CRM_CHECK(value != NULL, crm_log_xml_err(xml, "No membership id"); return); -+ crm_peer_seq = crm_int_helper(value, NULL); - -- if (quorate != crm_have_quorum) { -- crm_notice("Membership %s: quorum %s", value, quorate ? "acquired" : "lost"); -- crm_have_quorum = quorate; -+ if (quorate != crm_have_quorum) { -+ crm_notice("Membership %s: quorum %s", value, quorate ? "acquired" : "lost"); -+ crm_have_quorum = quorate; - -- } else { -- crm_info("Membership %s: quorum %s", value, quorate ? "retained" : "still lost"); -- } -- -- for (node = __xml_first_child(xml); node != NULL; node = __xml_next(node)) { -- crm_update_ais_node(node, crm_peer_seq); -- } -+ } else { -+ crm_info("Membership %s: quorum %s", value, quorate ? "retained" : "still lost"); - } -- } - -- crm_trace("Payload: %s", data); -- if (dispatch != NULL) { -- dispatch(msg->header.id, msg->sender.uname, data); -+ for (member = __xml_first_child(xml); member != NULL; member = __xml_next(member)) { -+ const char *id_s = crm_element_value(member, "id"); -+ const char *addr = crm_element_value(member, "addr"); -+ const char *uname = crm_element_value(member, "uname"); -+ const char *state = crm_element_value(member, "state"); -+ const char *born_s = crm_element_value(member, "born"); -+ const char *seen_s = crm_element_value(member, "seen"); -+ const char *votes_s = crm_element_value(member, "votes"); -+ const char *procs_s = crm_element_value(member, "processes"); -+ -+ int votes = crm_int_helper(votes_s, NULL); -+ unsigned int id = crm_int_helper(id_s, NULL); -+ unsigned int procs = crm_int_helper(procs_s, NULL); -+ -+ /* TODO: These values will contain garbage if version < 0.7.1 */ -+ uint64_t born = crm_int_helper(born_s, NULL); -+ uint64_t seen = crm_int_helper(seen_s, NULL); -+ -+ crm_update_peer(__FUNCTION__, id, born, seen, votes, procs, uname, uname, addr, state); -+ } - } -- -- done: -- free(uncompressed); -- free_xml(xml); -- return TRUE; -- -- badmsg: -- crm_err("Invalid message (id=%d, dest=%s:%s, from=%s:%s.%d):" -- " min=%d, total=%d, size=%d, bz2_size=%d", -- msg->id, ais_dest(&(msg->host)), msg_type2text(msg->host.type), -- ais_dest(&(msg->sender)), msg_type2text(msg->sender.type), -- msg->sender.pid, (int)sizeof(AIS_Message), -- msg->header.size, msg->size, msg->compressed_size); -- goto done; - } - - int --ais_dispatch(gpointer user_data) -+plugin_dispatch(gpointer user_data) - { - int rc = CS_OK; -- gboolean good = TRUE; -- -- gboolean(*dispatch) (int kind, const char *from, const char *data) = user_data; -+ crm_cluster_t *cluster = (crm_cluster_t *) user_data; - - do { - char *buffer = NULL; -@@ -743,20 +274,20 @@ ais_dispatch(gpointer user_data) - /* NULL is a legal "no message afterall" value */ - return 0; - } -- good = ais_dispatch_message((AIS_Message *) buffer, dispatch); -+ /* -+ cpg_deliver_fn_t(cpg_handle_t handle, const struct cpg_name *group_name, -+ uint32_t nodeid, uint32_t pid, void *msg, size_t msg_len); -+ */ -+ cluster->cpg.cpg_deliver_fn(0, NULL, 0, 0, buffer, 0); - coroipcc_dispatch_put(ais_ipc_handle); - -- } while (good && ais_ipc_handle); -- -- if (good) { -- return 0; -- } -+ } while (ais_ipc_handle); - -- return -1; -+ return 0; - } - - static void --ais_destroy(gpointer user_data) -+plugin_destroy(gpointer user_data) - { - crm_err("AIS connection terminated"); - ais_fd_sync = -1; -@@ -896,179 +427,6 @@ init_cman_connection(gboolean(*dispatch) (unsigned long long, gboolean), void (* - } - - # ifdef SUPPORT_COROSYNC --gboolean(*pcmk_cpg_dispatch_fn) (int kind, const char *from, const char *data) = NULL; --static bool cpg_evicted = FALSE; -- --static int --pcmk_cpg_dispatch(gpointer user_data) --{ -- int rc = 0; -- -- pcmk_cpg_dispatch_fn = user_data; -- rc = cpg_dispatch(pcmk_cpg_handle, CS_DISPATCH_ALL); -- if (rc != CS_OK) { -- crm_err("Connection to the CPG API failed: %d", rc); -- pcmk_cpg_handle = 0; -- return -1; -- -- } else if(cpg_evicted) { -- crm_err("Evicted from CPG membership"); -- return -1; -- } -- return 0; --} -- --static void --pcmk_cpg_deliver(cpg_handle_t handle, -- const struct cpg_name *groupName, -- uint32_t nodeid, uint32_t pid, void *msg, size_t msg_len) --{ -- AIS_Message *ais_msg = (AIS_Message *) msg; -- uint32_t local_nodeid = get_local_nodeid(handle); -- const char *local_name = get_local_node_name(); -- -- if (ais_msg->sender.id > 0 && ais_msg->sender.id != nodeid) { -- crm_err("Nodeid mismatch from %d.%d: claimed nodeid=%u", nodeid, pid, ais_msg->sender.id); -- return; -- -- } else if (ais_msg->host.id != 0 && (local_nodeid != ais_msg->host.id)) { -- /* Not for us */ -- return; -- -- } else if (ais_msg->host.size != 0 && safe_str_neq(ais_msg->host.uname, local_name)) { -- /* Not for us */ -- return; -- } -- -- ais_msg->sender.id = nodeid; -- if (ais_msg->sender.size == 0) { -- crm_node_t *peer = crm_get_peer(nodeid, NULL); -- -- if (peer == NULL) { -- crm_err("Peer with nodeid=%u is unknown", nodeid); -- -- } else if (peer->uname == NULL) { -- crm_err("No uname for peer with nodeid=%u", nodeid); -- -- } else { -- crm_notice("Fixing uname for peer with nodeid=%u", nodeid); -- ais_msg->sender.size = strlen(peer->uname); -- memset(ais_msg->sender.uname, 0, MAX_NAME); -- memcpy(ais_msg->sender.uname, peer->uname, ais_msg->sender.size); -- } -- } -- -- ais_dispatch_message(ais_msg, pcmk_cpg_dispatch_fn); --} -- --static void --pcmk_cpg_membership(cpg_handle_t handle, -- const struct cpg_name *groupName, -- const struct cpg_address *member_list, size_t member_list_entries, -- const struct cpg_address *left_list, size_t left_list_entries, -- const struct cpg_address *joined_list, size_t joined_list_entries) --{ -- int i; -- gboolean found = FALSE; -- static int counter = 0; -- uint32_t local_nodeid = get_local_nodeid(handle); -- -- for (i = 0; i < left_list_entries; i++) { -- crm_node_t *peer = crm_get_peer(left_list[i].nodeid, NULL); -- -- crm_info("Left[%d.%d] %s.%u ", counter, i, groupName->value, left_list[i].nodeid); -- crm_update_peer_proc(__FUNCTION__, peer, crm_proc_cpg, OFFLINESTATUS); -- } -- -- for (i = 0; i < joined_list_entries; i++) { -- crm_info("Joined[%d.%d] %s.%u ", counter, i, groupName->value, joined_list[i].nodeid); -- } -- -- for (i = 0; i < member_list_entries; i++) { -- crm_node_t *peer = crm_get_peer(member_list[i].nodeid, NULL); -- -- crm_info("Member[%d.%d] %s.%u ", counter, i, groupName->value, member_list[i].nodeid); -- crm_update_peer_proc(__FUNCTION__, peer, crm_proc_cpg, ONLINESTATUS); -- if (local_nodeid == member_list[i].nodeid) { -- found = TRUE; -- } -- } -- -- if (!found) { -- crm_err("We're not part of CPG group %s anymore!", groupName->value); -- cpg_evicted = TRUE; -- } -- -- counter++; --} -- --cpg_callbacks_t cpg_callbacks = { -- .cpg_deliver_fn = pcmk_cpg_deliver, -- .cpg_confchg_fn = pcmk_cpg_membership, --}; --# endif -- --static gboolean --init_cpg_connection(crm_cluster_t * cluster) --{ --# ifdef SUPPORT_COROSYNC -- int rc = -1; -- int fd = 0; -- int retries = 0; -- crm_node_t *peer = NULL; -- -- struct mainloop_fd_callbacks cpg_fd_callbacks = { -- .dispatch = pcmk_cpg_dispatch, -- .destroy = cluster->destroy, -- }; -- -- cpg_evicted = FALSE; -- strcpy(pcmk_cpg_group.value, crm_system_name); -- pcmk_cpg_group.length = strlen(crm_system_name) + 1; -- -- cs_repeat(retries, 30, rc = cpg_initialize(&pcmk_cpg_handle, &cpg_callbacks)); -- if (rc != CS_OK) { -- crm_err("Could not connect to the Cluster Process Group API: %d\n", rc); -- goto bail; -- } -- -- retries = 0; -- cs_repeat(retries, 30, rc = cpg_local_get(pcmk_cpg_handle, (unsigned int *)&cluster->nodeid)); -- if (rc != CS_OK) { -- crm_err("Could not get local node id from the CPG API"); -- goto bail; -- } -- -- retries = 0; -- cs_repeat(retries, 30, rc = cpg_join(pcmk_cpg_handle, &pcmk_cpg_group)); -- if (rc != CS_OK) { -- crm_err("Could not join the CPG group '%s': %d", crm_system_name, rc); -- goto bail; -- } -- -- rc = cpg_fd_get(pcmk_cpg_handle, &fd); -- if (rc != CS_OK) { -- crm_err("Could not obtain the CPG API connection: %d\n", rc); -- goto bail; -- } -- -- mainloop_add_fd("corosync-cpg", G_PRIORITY_MEDIUM, fd, cluster->cs_dispatch, &cpg_fd_callbacks); -- -- bail: -- if (rc != CS_OK) { -- cpg_finalize(pcmk_cpg_handle); -- return FALSE; -- } -- -- peer = crm_get_peer(cluster->nodeid, NULL); -- crm_update_peer_proc(__FUNCTION__, peer, crm_proc_cpg, ONLINESTATUS); -- --# else -- crm_err("The Corosync CPG API is not supported in this build"); -- crm_exit(DAEMON_RESPAWN_STOP); --# endif -- return TRUE; --} - - gboolean - init_quorum_connection(gboolean(*dispatch) (unsigned long long, gboolean), -@@ -1086,9 +444,11 @@ init_cs_connection_classic(crm_cluster_t * cluster) - int pid = 0; - char *pid_s = NULL; - const char *name = NULL; -+ crm_node_t *peer = NULL; -+ enum crm_proc_flag proc = 0; - - struct mainloop_fd_callbacks ais_fd_callbacks = { -- .dispatch = ais_dispatch, -+ .dispatch = plugin_dispatch, - .destroy = cluster->destroy, - }; - -@@ -1099,7 +459,7 @@ init_cs_connection_classic(crm_cluster_t * cluster) - if (ais_ipc_handle) { - coroipcc_fd_get(ais_ipc_handle, &ais_fd_async); - } else { -- crm_info("Connection to our AIS plugin (%d) failed: %s (%d)", -+ crm_info("Connection to our Corosync plugin (%d) failed: %s (%d)", - PCMK_SERVICE_ID, strerror(errno), errno); - return FALSE; - } -@@ -1108,7 +468,7 @@ init_cs_connection_classic(crm_cluster_t * cluster) - rc = CS_ERR_LIBRARY; - } - if (rc != CS_OK) { -- crm_info("Connection to our AIS plugin (%d) failed: %s (%d)", PCMK_SERVICE_ID, -+ crm_info("Connection to our Corosync plugin (%d) failed: %s (%d)", PCMK_SERVICE_ID, - ais_error2text(rc), rc); - } - -@@ -1117,16 +477,15 @@ init_cs_connection_classic(crm_cluster_t * cluster) - } - - if (ais_fd_callbacks.destroy == NULL) { -- ais_fd_callbacks.destroy = ais_destroy; -+ ais_fd_callbacks.destroy = plugin_destroy; - } - -- mainloop_add_fd("corosync-plugin", G_PRIORITY_MEDIUM, ais_fd_async, cluster->cs_dispatch, -- &ais_fd_callbacks); -+ mainloop_add_fd("corosync-plugin", G_PRIORITY_MEDIUM, ais_fd_async, cluster, &ais_fd_callbacks); - crm_info("AIS connection established"); - - pid = getpid(); - pid_s = crm_itoa(pid); -- send_ais_text(crm_class_cluster, pid_s, TRUE, NULL, crm_msg_ais); -+ send_cluster_text(crm_class_cluster, pid_s, TRUE, NULL, crm_msg_ais); - free(pid_s); - - cluster->nodeid = get_local_nodeid(0); -@@ -1141,6 +500,9 @@ init_cs_connection_classic(crm_cluster_t * cluster) - crm_exit(ENOTUNIQ); - } - -+ proc = text2proc(crm_system_name); -+ peer = crm_get_peer(cluster->nodeid, cluster->uname); -+ crm_update_peer_proc(__FUNCTION__, peer, proc|crm_proc_plugin, ONLINESTATUS); - - return TRUE; - } -@@ -1275,7 +637,7 @@ init_cs_connection_once(crm_cluster_t * cluster) - } - break; - case pcmk_cluster_cman: -- if (init_cpg_connection(cluster) == FALSE) { -+ if (cluster_connect_cpg(cluster) == FALSE) { - return FALSE; - } - cluster->uname = cman_node_name(0 /* CMAN_NODEID_US */ ); -diff --git a/lib/cluster/membership.c b/lib/cluster/membership.c -index a1e044c..875c1c8 100644 ---- a/lib/cluster/membership.c -+++ b/lib/cluster/membership.c -@@ -125,7 +125,7 @@ crm_active_peers(void) - return count; - } - --void -+static void - destroy_crm_node(gpointer data) - { - crm_node_t *node = data; -@@ -143,14 +143,6 @@ destroy_crm_node(gpointer data) - void - crm_peer_init(void) - { -- static gboolean initialized = FALSE; -- -- if (initialized) { -- return; -- } -- initialized = TRUE; -- -- crm_peer_destroy(); - if (crm_peer_cache == NULL) { - crm_peer_cache = g_hash_table_new_full(crm_str_hash, g_str_equal, free, destroy_crm_node); - } -diff --git a/lib/common/logging.c b/lib/common/logging.c -index a1b01f2..155a068 100644 ---- a/lib/common/logging.c -+++ b/lib/common/logging.c -@@ -95,6 +95,10 @@ crm_glib_handler(const gchar * log_domain, GLogLevelFlags flags, const gchar * m - static void - crm_trigger_blackbox(int nsig) - { -+ if(nsig == SIGTRAP) { -+ /* Turn it on if it wasn't already */ -+ crm_enable_blackbox(nsig); -+ } - crm_write_blackbox(nsig, NULL); - } - -@@ -344,15 +348,6 @@ crm_enable_blackbox(int nsig) - - crm_update_callsites(); - -- /* Original meanings from signal(7) -- * -- * Signal Value Action Comment -- * SIGTRAP 5 Core Trace/breakpoint trap -- * -- * Our usage is as similar as possible -- */ -- mainloop_add_signal(SIGTRAP, crm_trigger_blackbox); -- - blackbox_trigger = qb_log_custom_open(blackbox_logger, NULL, NULL, NULL); - qb_log_ctl(blackbox_trigger, QB_LOG_CONF_ENABLED, QB_TRUE); - crm_trace("Trigger: %d is %d %d", blackbox_trigger, -@@ -762,7 +757,17 @@ crm_log_init(const char *entity, int level, gboolean daemon, gboolean to_stderr, - } - #endif - } -+ -+ /* Original meanings from signal(7) -+ * -+ * Signal Value Action Comment -+ * SIGTRAP 5 Core Trace/breakpoint trap -+ * SIGUSR1 30,10,16 Term User-defined signal 1 -+ * -+ * Our usage is as similar as possible -+ */ - mainloop_add_signal(SIGUSR1, crm_enable_blackbox); -+ mainloop_add_signal(SIGTRAP, crm_trigger_blackbox); - } - - crm_xml_init(); /* Sets buffer allocation strategy */ -diff --git a/lib/services/systemd.c b/lib/services/systemd.c -index 886cb35..2a66da5 100644 ---- a/lib/services/systemd.c -+++ b/lib/services/systemd.c -@@ -407,6 +407,8 @@ systemd_unit_exec_done(GObject * source_object, GAsyncResult * res, gpointer use - } - } - -+#define SYSTEMD_OVERRIDE_ROOT "/run/systemd/system/" -+ - gboolean - systemd_unit_exec(svc_action_t * op, gboolean synchronous) - { -@@ -453,9 +455,42 @@ systemd_unit_exec(svc_action_t * op, gboolean synchronous) - goto cleanup; - - } else if (g_strcmp0(action, "start") == 0) { -+ FILE *file_strm = NULL; -+ char *override_dir = g_strdup_printf("%s/%s", SYSTEMD_OVERRIDE_ROOT, unit); -+ char *override_file = g_strdup_printf("%s/50-pacemaker.conf", override_dir); -+ - action = "StartUnit"; -+ crm_build_path(override_dir, 0755); -+ -+ file_strm = fopen(override_file, "w"); -+ if (file_strm != NULL) { -+ int rc = fprintf(file_strm, "[Service]\nRestart=no"); -+ if (rc < 0) { -+ crm_perror(LOG_ERR, "Cannot write to systemd override file %s: %s (%d)", override_file, pcmk_strerror(errno), errno); -+ } -+ -+ } else { -+ crm_err("Cannot open systemd override file %s for writing: %s (%d)", override_file, pcmk_strerror(errno), errno); -+ } -+ -+ if (file_strm != NULL) { -+ fflush(file_strm); -+ fclose(file_strm); -+ } -+ systemd_daemon_reload(systemd_proxy, &error); -+ g_error_free(error); error = NULL; -+ free(override_file); -+ free(override_dir); -+ - } else if (g_strcmp0(action, "stop") == 0) { -+ char *override_file = g_strdup_printf("%s/%s/50-pacemaker.conf", SYSTEMD_OVERRIDE_ROOT, unit); -+ - action = "StopUnit"; -+ unlink(override_file); -+ free(override_file); -+ systemd_daemon_reload(systemd_proxy, &error); -+ g_error_free(error); error = NULL; -+ - } else if (g_strcmp0(action, "restart") == 0) { - action = "RestartUnit"; - } else { -diff --git a/lrmd/Makefile.am b/lrmd/Makefile.am -index 73f1d7e..82cb65f 100644 ---- a/lrmd/Makefile.am -+++ b/lrmd/Makefile.am -@@ -27,7 +27,7 @@ initdir = $(INITDIR) - init_SCRIPTS = pacemaker_remote - sbin_PROGRAMS = pacemaker_remoted - --if HAVE_SYSTEMD -+if BUILD_SYSTEMD - systemdunit_DATA = pacemaker_remote.service - endif - -diff --git a/mcp/Makefile.am b/mcp/Makefile.am -index 73a71c4..f98f286 100644 ---- a/mcp/Makefile.am -+++ b/mcp/Makefile.am -@@ -29,7 +29,7 @@ if BUILD_HELP - man8_MANS = $(sbin_PROGRAMS:%=%.8) - endif - --if HAVE_SYSTEMD -+if BUILD_SYSTEMD - systemdunit_DATA = pacemaker.service - endif - -diff --git a/mcp/corosync.c b/mcp/corosync.c -index 64d6eb5..ca37871 100644 ---- a/mcp/corosync.c -+++ b/mcp/corosync.c -@@ -43,13 +43,7 @@ - # include - #endif - --static struct cpg_name cpg_group = { -- .length = 0, -- .value[0] = 0, --}; -- - enum cluster_type_e stack = pcmk_cluster_unknown; --static cpg_handle_t cpg_handle; - static corosync_cfg_handle_t cfg_handle; - - /* =::=::=::= CFG - Shutdown stuff =::=::=::= */ -@@ -155,169 +149,6 @@ cluster_connect_cfg(uint32_t * nodeid) - return FALSE; - } - --/* =::=::=::= CPG - Closed Process Group Messaging =::=::=::= */ -- --static int --pcmk_cpg_dispatch(gpointer user_data) --{ -- cpg_handle_t *handle = (cpg_handle_t *) user_data; -- cs_error_t rc = cpg_dispatch(*handle, CS_DISPATCH_ALL); -- -- if (rc != CS_OK) { -- return -1; -- } -- return 0; --} -- --static void --cpg_connection_destroy(gpointer user_data) --{ -- crm_err("Connection destroyed"); -- cpg_handle = 0; -- crm_exit(ENOTCONN); --} -- --static void --pcmk_cpg_deliver(cpg_handle_t handle, -- const struct cpg_name *groupName, -- uint32_t nodeid, uint32_t pid, void *msg, size_t msg_len) --{ -- if (nodeid != local_nodeid) { -- uint32_t procs = 0; -- xmlNode *xml = string2xml(msg); -- const char *uname = crm_element_value(xml, "uname"); -- -- crm_element_value_int(xml, "proclist", (int *)&procs); -- /* crm_debug("Got proclist %.32x from %s", procs, uname); */ -- if (update_node_processes(nodeid, uname, procs)) { -- update_process_clients(); -- } -- } --} -- --static void --pcmk_cpg_membership(cpg_handle_t handle, -- const struct cpg_name *groupName, -- const struct cpg_address *member_list, size_t member_list_entries, -- const struct cpg_address *left_list, size_t left_list_entries, -- const struct cpg_address *joined_list, size_t joined_list_entries) --{ -- /* Don't care about CPG membership */ -- update_process_peers(); --} -- --cpg_callbacks_t cpg_callbacks = { -- .cpg_deliver_fn = pcmk_cpg_deliver, -- .cpg_confchg_fn = pcmk_cpg_membership, --}; -- --gboolean --cluster_disconnect_cpg(void) --{ -- if (cpg_handle) { -- cpg_finalize(cpg_handle); -- cpg_handle = 0; -- } -- return TRUE; --} -- --gboolean --cluster_connect_cpg(void) --{ -- cs_error_t rc; -- unsigned int nodeid; -- int fd; -- int retries = 0; -- -- static struct mainloop_fd_callbacks cpg_fd_callbacks = { -- .dispatch = pcmk_cpg_dispatch, -- .destroy = cpg_connection_destroy, -- }; -- -- strcpy(cpg_group.value, "pcmk"); -- cpg_group.length = strlen(cpg_group.value) + 1; -- -- retries = 0; -- cs_repeat(retries, 30, rc = cpg_initialize(&cpg_handle, &cpg_callbacks)); -- if (rc != CS_OK) { -- crm_err("corosync cpg init error %d", rc); -- return FALSE; -- } -- -- rc = cpg_fd_get(cpg_handle, &fd); -- if (rc != CS_OK) { -- crm_err("corosync cpg fd_get error %d", rc); -- goto bail; -- } -- -- retries = 0; -- cs_repeat(retries, 30, rc = cpg_local_get(cpg_handle, &nodeid)); -- if (rc != CS_OK) { -- crm_err("corosync cpg local_get error %d", rc); -- goto bail; -- } -- -- crm_debug("Our nodeid: %d", nodeid); -- -- retries = 0; -- cs_repeat(retries, 30, rc = cpg_join(cpg_handle, &cpg_group)); -- -- if (rc != CS_OK) { -- crm_err("Could not join the CPG group '%s': %d", crm_system_name, rc); -- goto bail; -- } -- -- mainloop_add_fd("corosync-cpg", G_PRIORITY_DEFAULT, fd, &cpg_handle, &cpg_fd_callbacks); -- return TRUE; -- -- bail: -- cpg_finalize(cpg_handle); -- return FALSE; --} -- --gboolean --send_cpg_message(struct iovec * iov) --{ -- int rc = CS_OK; -- int retries = 0; -- -- errno = 0; -- -- do { -- rc = cpg_mcast_joined(cpg_handle, CPG_TYPE_AGREED, iov, 1); -- if (rc == CS_ERR_TRY_AGAIN || rc == CS_ERR_QUEUE_FULL) { -- cpg_flow_control_state_t fc_state = CPG_FLOW_CONTROL_DISABLED; -- int rc2 = cpg_flow_control_state_get(cpg_handle, &fc_state); -- -- if (rc2 == CS_OK && fc_state == CPG_FLOW_CONTROL_ENABLED) { -- crm_debug("Attempting to clear cpg dispatch queue"); -- rc2 = cpg_dispatch(cpg_handle, CS_DISPATCH_ALL); -- } -- -- if (rc2 != CS_OK) { -- crm_warn("Could not check/clear the cpg connection"); -- goto bail; -- -- } else { -- retries++; -- crm_debug("Retrying operation after %ds", retries); -- sleep(retries); -- } -- } else { -- break; -- } -- -- /* 5 retires is plenty, we'll resend once the membership reforms anyway */ -- } while (retries < 5); -- -- bail: -- if (rc != CS_OK) { -- crm_err("Sending message via cpg FAILED: (rc=%d) %s", rc, ais_error2text(rc)); -- } -- -- return (rc == CS_OK); --} -- - /* =::=::=::= Configuration =::=::=::= */ - #if HAVE_CONFDB - static int -@@ -447,7 +278,7 @@ read_config(void) - - #if HAVE_CONFDB - char *value = NULL; -- confdb_handle_t config; -+ confdb_handle_t config = 0; - confdb_handle_t top_handle = 0; - hdb_handle_t local_handle; - static confdb_callbacks_t callbacks = { }; -@@ -456,7 +287,8 @@ read_config(void) - rc = confdb_initialize(&config, &callbacks); - if (rc != CS_OK) { - retries++; -- printf("Connection setup failed: %d. Retrying in %ds\n", rc, retries); -+ printf("confdb connection setup failed: %s. Retrying in %ds\n", ais_error2text(rc), retries); -+ crm_info("confdb connection setup failed: %s. Retrying in %ds", ais_error2text(rc), retries); - sleep(retries); - - } else { -@@ -473,8 +305,8 @@ read_config(void) - rc = cmap_initialize(&local_handle); - if (rc != CS_OK) { - retries++; -- printf("API connection setup failed: %s. Retrying in %ds\n", cs_strerror(rc), retries); -- crm_info("API connection setup failed: %s. Retrying in %ds", cs_strerror(rc), retries); -+ printf("cmap connection setup failed: %s. Retrying in %ds\n", cs_strerror(rc), retries); -+ crm_info("cmap connection setup failed: %s. Retrying in %ds", cs_strerror(rc), retries); - sleep(retries); - - } else { -diff --git a/mcp/pacemaker.c b/mcp/pacemaker.c -index 47fdd68..6f8d9b9 100644 ---- a/mcp/pacemaker.c -+++ b/mcp/pacemaker.c -@@ -29,6 +29,7 @@ - #include - #include - #include -+#include - #include - - #include -@@ -44,22 +45,6 @@ uint32_t local_nodeid = 0; - crm_trigger_t *shutdown_trigger = NULL; - const char *pid_file = "/var/run/pacemaker.pid"; - --/* *INDENT-OFF* */ --enum crm_proc_flag { -- crm_proc_none = 0x00000001, -- crm_proc_plugin = 0x00000002, -- crm_proc_lrmd = 0x00000010, -- crm_proc_cib = 0x00000100, -- crm_proc_crmd = 0x00000200, -- crm_proc_attrd = 0x00001000, -- crm_proc_stonithd = 0x00002000, -- crm_proc_pe = 0x00010000, -- crm_proc_te = 0x00020000, -- crm_proc_mgmtd = 0x00040000, -- crm_proc_stonith_ng = 0x00100000, --}; --/* *INDENT-ON* */ -- - typedef struct pcmk_child_s { - int pid; - long flag; -@@ -539,8 +524,10 @@ update_process_clients(void) - void - update_process_peers(void) - { -+ /* Do nothing for corosync-2 based clusters */ -+ - char buffer[1024]; -- struct iovec iov; -+ struct iovec *iov; - int rc = 0; - - memset(buffer, 0, SIZEOF(buffer)); -@@ -552,11 +539,11 @@ update_process_peers(void) - rc = snprintf(buffer, SIZEOF(buffer) - 1, "", get_process_list()); - } - -- iov.iov_base = buffer; -- iov.iov_len = rc + 1; -- - crm_trace("Sending %s", buffer); -- send_cpg_message(&iov); -+ iov = calloc(1, sizeof(struct iovec)); -+ iov->iov_base = strdup(buffer); -+ iov->iov_len = rc + 1; -+ send_cpg_iov(iov); - } - - gboolean -@@ -619,6 +606,7 @@ update_node_processes(uint32_t id, const char *uname, uint32_t procs) - return changed; - } - -+ - /* *INDENT-OFF* */ - static struct crm_option long_options[] = { - /* Top-level Options */ -@@ -779,6 +767,42 @@ init_children_processes(void) - } - } - -+static void -+mcp_cpg_destroy(gpointer user_data) -+{ -+ crm_err("Connection destroyed"); -+ crm_exit(ENOTCONN); -+} -+ -+static void -+mcp_cpg_deliver(cpg_handle_t handle, -+ const struct cpg_name *groupName, -+ uint32_t nodeid, uint32_t pid, void *msg, size_t msg_len) -+{ -+ if (nodeid != local_nodeid) { -+ uint32_t procs = 0; -+ xmlNode *xml = string2xml(msg); -+ const char *uname = crm_element_value(xml, "uname"); -+ -+ crm_element_value_int(xml, "proclist", (int *)&procs); -+ /* crm_debug("Got proclist %.32x from %s", procs, uname); */ -+ if (update_node_processes(nodeid, uname, procs)) { -+ update_process_clients(); -+ } -+ } -+} -+ -+static void -+mcp_cpg_membership(cpg_handle_t handle, -+ const struct cpg_name *groupName, -+ const struct cpg_address *member_list, size_t member_list_entries, -+ const struct cpg_address *left_list, size_t left_list_entries, -+ const struct cpg_address *joined_list, size_t joined_list_entries) -+{ -+ /* Don't care about CPG membership, but we do want to broadcast our own presence */ -+ update_process_peers(); -+} -+ - int - main(int argc, char **argv) - { -@@ -795,6 +819,7 @@ main(int argc, char **argv) - crm_ipc_t *old_instance = NULL; - qb_ipcs_service_t *ipcs = NULL; - const char *facility = daemon_option("logfacility"); -+ static crm_cluster_t cluster; - - setenv("LC_ALL", "C", 1); - setenv("HA_LOGD", "no", 1); -@@ -951,12 +976,17 @@ main(int argc, char **argv) - crm_exit(EIO); - } - -+ /* Allows us to block shutdown */ - if (cluster_connect_cfg(&local_nodeid) == FALSE) { - crm_err("Couldn't connect to Corosync's CFG service"); - crm_exit(ENOPROTOOPT); - } - -- if (cluster_connect_cpg() == FALSE) { -+ cluster.destroy = mcp_cpg_destroy; -+ cluster.cpg.cpg_deliver_fn = mcp_cpg_deliver; -+ cluster.cpg.cpg_confchg_fn = mcp_cpg_membership; -+ -+ if(cluster_connect_cpg(&cluster) == FALSE) { - crm_err("Couldn't connect to Corosync's CPG service"); - crm_exit(ENOPROTOOPT); - } -@@ -982,7 +1012,7 @@ main(int argc, char **argv) - - g_main_destroy(mainloop); - -- cluster_disconnect_cpg(); -+ cluster_disconnect_cpg(&cluster); - cluster_disconnect_cfg(); - - crm_info("Exiting %s", crm_system_name); -diff --git a/mcp/pacemaker.h b/mcp/pacemaker.h -index 224df93..8967966 100644 ---- a/mcp/pacemaker.h -+++ b/mcp/pacemaker.h -@@ -41,20 +41,16 @@ typedef struct pcmk_peer_s { - char *uname; - } pcmk_peer_t; - --extern gboolean read_config(void); -+gboolean read_config(void); - --extern gboolean cluster_connect_cfg(uint32_t * nodeid); --extern gboolean cluster_disconnect_cfg(void); -+gboolean cluster_connect_cfg(uint32_t * nodeid); -+gboolean cluster_disconnect_cfg(void); - --extern gboolean cluster_connect_cpg(void); --extern gboolean cluster_disconnect_cpg(void); --extern gboolean send_cpg_message(struct iovec *iov); -+void update_process_clients(void); -+void update_process_peers(void); -+gboolean update_node_processes(uint32_t node, const char *uname, uint32_t procs); - --extern void update_process_clients(void); --extern void update_process_peers(void); --extern gboolean update_node_processes(uint32_t node, const char *uname, uint32_t procs); -+void enable_mgmtd(gboolean enable); -+void enable_crmd_as_root(gboolean enable); - --extern void enable_mgmtd(gboolean enable); --extern void enable_crmd_as_root(gboolean enable); -- --extern void pcmk_shutdown(int nsig); -+void pcmk_shutdown(int nsig); -diff --git a/mcp/pacemaker.in b/mcp/pacemaker.in -index a6647fe..c96f1d1 100644 ---- a/mcp/pacemaker.in -+++ b/mcp/pacemaker.in -@@ -111,6 +111,7 @@ cman_pre_start() - pid=$(pidof corosync 2>/dev/null) - if [ $? -ne 0 ]; then - service cman start -+ sleep 2 - fi - } - -diff --git a/tools/attrd.c b/tools/attrd.c -index 1e834ea..2d485f9 100644 ---- a/tools/attrd.c -+++ b/tools/attrd.c -@@ -325,11 +325,19 @@ attrd_ha_callback(HA_Message * msg, void *private_data) - #endif - - #if SUPPORT_COROSYNC --static gboolean --attrd_ais_dispatch(int kind, const char *from, const char *data) -+static void -+attrd_cs_dispatch(cpg_handle_t handle, -+ const struct cpg_name *groupName, -+ uint32_t nodeid, uint32_t pid, void *msg, size_t msg_len) - { -+ uint32_t kind = 0; - xmlNode *xml = NULL; -+ const char *from = NULL; -+ char *data = pcmk_message_common_cs(handle, nodeid, pid, msg, &kind, &from); - -+ if(data == NULL) { -+ return; -+ } - if (kind == crm_class_cluster) { - xml = string2xml(data); - if (xml == NULL) { -@@ -360,11 +368,11 @@ attrd_ais_dispatch(int kind, const char *from, const char *data) - free_xml(xml); - } - -- return TRUE; -+ free(data); - } - - static void --attrd_ais_destroy(gpointer unused) -+attrd_cs_destroy(gpointer unused) - { - if (need_shutdown) { - /* we signed out, so this is expected */ -@@ -405,7 +413,7 @@ update_for_hash_entry(gpointer key, gpointer value, gpointer user_data) - { - attr_hash_entry_t *entry = value; - -- if (entry->value != NULL) { -+ if (entry->value != NULL || entry->stored_value != NULL) { - attrd_timer_callback(value); - } - } -@@ -537,8 +545,9 @@ main(int argc, char **argv) - - #if SUPPORT_COROSYNC - if (is_openais_cluster()) { -- cluster.destroy = attrd_ais_destroy; -- cluster.cs_dispatch = attrd_ais_dispatch; -+ cluster.destroy = attrd_cs_destroy; -+ cluster.cpg.cpg_deliver_fn = attrd_cs_dispatch; -+ cluster.cpg.cpg_confchg_fn = pcmk_cpg_membership; - } - #endif - -diff --git a/tools/crm_node.c b/tools/crm_node.c -index a25b3b4..aacea76 100644 ---- a/tools/crm_node.c -+++ b/tools/crm_node.c -@@ -500,16 +500,23 @@ crm_add_member(gpointer key, gpointer value, gpointer user_data) - } - } - --static gboolean --ais_membership_dispatch(int kind, const char *from, const char *data) -+static void -+ais_membership_dispatch(cpg_handle_t handle, -+ const struct cpg_name *groupName, -+ uint32_t nodeid, uint32_t pid, void *msg, size_t msg_len) - { -+ uint32_t kind = 0; -+ const char *from = NULL; -+ char *data = pcmk_message_common_cs(handle, nodeid, pid, msg, &kind, &from); -+ - switch (kind) { - case crm_class_members: - case crm_class_notify: - case crm_class_quorum: - break; - default: -- return TRUE; -+ free(data); -+ return; - - break; - } -@@ -548,9 +555,10 @@ ais_membership_dispatch(int kind, const char *from, const char *data) - fprintf(stdout, "\n"); - } - -+ free(data); - crm_exit(pcmk_ok); - -- return TRUE; -+ return; - } - #endif - -@@ -695,7 +703,8 @@ try_openais(int command, enum cluster_type_e stack) - static crm_cluster_t cluster; - - cluster.destroy = ais_membership_destroy; -- cluster.cs_dispatch = ais_membership_dispatch; -+ cluster.cpg.cpg_deliver_fn = ais_membership_dispatch; -+ cluster.cpg.cpg_confchg_fn = NULL; - - if (init_cs_connection_once(&cluster)) { - -@@ -703,7 +712,7 @@ try_openais(int command, enum cluster_type_e stack) - - switch (command) { - case 'R': -- send_ais_text(crm_class_rmpeer, target_uname, TRUE, NULL, crm_msg_ais); -+ send_cluster_text(crm_class_rmpeer, target_uname, TRUE, NULL, crm_msg_ais); - cib_remove_node(0, target_uname); - crm_exit(pcmk_ok); - -@@ -713,13 +722,13 @@ try_openais(int command, enum cluster_type_e stack) - crm_exit(pcmk_ok); - - case 'q': -- send_ais_text(crm_class_quorum, NULL, TRUE, NULL, crm_msg_ais); -+ send_cluster_text(crm_class_quorum, NULL, TRUE, NULL, crm_msg_ais); - break; - - case 'l': - case 'p': - crm_info("Requesting the list of configured nodes"); -- send_ais_text(crm_class_members, __FUNCTION__, TRUE, NULL, crm_msg_ais); -+ send_cluster_text(crm_class_members, __FUNCTION__, TRUE, NULL, crm_msg_ais); - break; - - case 'i': diff --git a/pacemaker-d19719c.patch b/pacemaker-d19719c.patch deleted file mode 100644 index 626a6e7..0000000 --- a/pacemaker-d19719c.patch +++ /dev/null @@ -1,37184 +0,0 @@ -diff --git a/ChangeLog b/ChangeLog -index fa9cea4..180c363 100644 ---- a/ChangeLog -+++ b/ChangeLog -@@ -1,3 +1,102 @@ -+* Fri Mar 08 2013 Andrew Beekhof Pacemaker-1.1.9-1 -+- Update source tarball to revision: 7e42d77 -+- Statistics: -+ Changesets: 731 -+ Diff: 1301 files changed, 92909 insertions(+), 57455 deletions(-) -+ -+- Features added in Pacemaker-1.1.9 -+ + corosync: Allow cman and corosync 2.0 nodes to use a name other than uname() -+ + corosync: Use queues to avoid blocking when sending CPG messages -+ + ipc: Compress messages that exceed the configured IPC message limit -+ + ipc: Use queues to prevent slow clients from blocking the server -+ + ipc: Use shared memory by default -+ + lrmd: Support nagios remote monitoring -+ + lrmd: Pacemaker Remote Daemon for extending pacemaker functionality outside corosync cluster. -+ + pengine: Check for master/slave resources that are not OCF agents -+ + pengine: Support a 'requires' resource meta-attribute for controlling whether it needs quorum, fencing or nothing -+ + pengine: Support for resource container -+ + pengine: Support resources that require unfencing before start -+ -+- Changes since Pacemaker-1.1.8 -+ + attrd: Correctly handle deletion of non-existant attributes -+ + Bug cl#5135 - Improved detection of the active cluster type -+ + Bug rhbz#913093 - Use crm_node instead of uname -+ + cib: Avoid use-after-free by correctly support cib_no_children for non-xpath queries -+ + cib: Correctly process XML diff's involving element removal -+ + cib: Performance improvements for non-DC nodes -+ + cib: Prevent error message by correctly handling peer replies -+ + cib: Prevent ordering changes when applying xml diffs -+ + cib: Remove text nodes from cib replace operations -+ + cluster: Detect node name collisions in corosync -+ + cluster: Preserve corosync membership state when matching node name/id entries -+ + cman: Force fenced to terminate on shutdown -+ + cman: Ignore qdisk 'nodes' -+ + core: Drop per-user core directories -+ + corosync: Avoid errors when closing failed connections -+ + corosync: Ensure peer state is preserved when matching names to nodeids -+ + corosync: Clean up CMAP connections after querying node name -+ + corosync: Correctly detect corosync 2.0 clusters even if we don't have permission to access it -+ + crmd: Bug cl#5144 - Do not updated the expected status of failed nodes -+ + crmd: Correctly determin if cluster disconnection was abnormal -+ + crmd: Correctly relay messages for remote clients (bnc#805626, bnc#804704) -+ + crmd: Correctly stall the FSA when waiting for additional inputs -+ + crmd: Detect and recover when we are evicted from CPG -+ + crmd: Differentiate between a node that is up and coming up in peer_update_callback() -+ + crmd: Have cib operation timeouts scale with node count -+ + crmd: Improved continue/wait logic in do_dc_join_finalize() -+ + crmd: Prevent election storms caused by getrusage() values being too close -+ + crmd: Prevent timeouts when performing pacemaker level membership negotiation -+ + crmd: Prevent use-after-free of fsa_message_queue during exit -+ + crmd: Store all current actions when stalling the FSA -+ + crm_mon: Do not try to render a blank cib and indicate the previous output is now stale -+ + crm_mon: Fixes crm_mon crash when using snmp traps. -+ + crm_mon: Look for the correct error codes when applying configuration updates -+ + crm_report: Ensure policy engine logs are found -+ + crm_report: Fix node list detection -+ + crm_resource: Have crm_resource generate a valid transition key when sending resource commands to the crmd -+ + date/time: Bug cl#5118 - Correctly convert seconds-since-epoch to the current time -+ + fencing: Attempt to provide more information that just 'generic error' for failed actions -+ + fencing: Correctly record completed but previously unknown fencing operations -+ + fencing: Correctly terminate when all device options have been exhausted -+ + fencing: cov#739453 - String not null terminated -+ + fencing: Do not merge new fencing requests with stale ones from dead nodes -+ + fencing: Do not start fencing until entire device topology is found or query results timeout. -+ + fencing: Do not wait for the query timeout if all replies have arrived -+ + fencing: Fix passing of parameters from CMAN containing '=' -+ + fencing: Fix non-comparison when sorting devices by priority -+ + fencing: On failure, only try a topology device once from the remote level. -+ + fencing: Only try peers for non-topology based operations once -+ + fencing: Retry stonith device for duration of action's timeout period. -+ + heartbeat: Remove incorrect assert during cluster connect -+ + ipc: Bug cl#5110 - Prevent 100% CPU usage when looking for synchronous replies -+ + ipc: Use 50k as the default compression threshold -+ + legacy: Prevent assertion failure on routing ais messages (bnc#805626) -+ + legacy: Re-enable logging from the pacemaker plugin -+ + legacy: Relax the 'active' check for plugin based clusters to avoid false negatives -+ + legacy: Skip peer process check if the process list is empty in crm_is_corosync_peer_active() -+ + mcp: Only define HA_DEBUGLOG to avoid agent calls to ocf_log printing everything twice -+ + mcp: Re-attach to existing pacemaker components when mcp fails -+ + pengine: Any location constraint for the slave role applies to all roles -+ + pengine: Avoid leaking memory when cleaning up failcounts and using containers -+ + pengine: Bug cl#5101 - Ensure stop order is preserved for partially active groups -+ + pengine: Bug cl#5140 - Allow set members to be stopped when the subseqent set has require-all=false -+ + pengine: Bug cl#5143 - Prevent shuffling of anonymous master/slave instances -+ + pengine: Bug rhbz#880249 - Ensure orphan masters are demoted before being stopped -+ + pengine: Bug rhbz#880249 - Teach the PE how to recover masters into primitives -+ + pengine: cl#5025 - Automatically clear failcount for start/monitor failures after resource parameters change -+ + pengine: cl#5099 - Probe operation uses the timeout value from the minimum interval monitor by default (#bnc776386) -+ + pengine: cl#5111 - When clone/master child rsc has on-fail=stop, insure all children stop on failure. -+ + pengine: cl#5142 - Do not delete orphaned children of an anonymous clone -+ + pengine: Correctly unpack active anonymous clones -+ + pengine: Ensure previous migrations are closed out before attempting another one -+ + pengine: Introducing the whitebox container resources feature -+ + pengine: Prevent double-free for cloned primitive from template -+ + pengine: Process rsc_ticket dependencies earlier for correctly allocating resources (bnc#802307) -+ + pengine: Remove special cases for fencing resources -+ + pengine: rhbz#902459 - Remove rsc node status for orphan resources -+ + systemd: Gracefully handle unexpected DBus return types -+ + Replace the use of the insecure mktemp(3) with mkstemp(3) -+ - * Thu Sep 20 2012 Andrew Beekhof Pacemaker-1.1.8-1 - - - Update source tarball to revision: 1a5341f -@@ -650,7 +749,7 @@ - - No longer remove RPATH data, it prevents us finding libperl.so and no other - libraries were being hardcoded - - Compile in support for heartbeat --- Conditionally add heartbeat-devel and corosynclib-devel to the -devel requirements -+- Conditionally add heartbeat-devel and corosynclib-devel to the -devel requirements - depending on which stacks are supported - - * Mon Aug 17 2009 Andrew Beekhof - 1.0.5-1 -@@ -682,7 +781,7 @@ - * Fri Jul 24 2009 Andrew Beekhof - 1.0.4-3 - - Initial Fedora checkin - - Include an AUTHORS and license file in each package --- Change the library package name to pacemaker-libs to be more -+- Change the library package name to pacemaker-libs to be more - Fedora compliant - - Remove execute permissions from xml related files - - Reference the new cluster-glue devel package name -diff --git a/Makefile.am b/Makefile.am -index 4f742e4..8cd9342 100644 ---- a/Makefile.am -+++ b/Makefile.am -@@ -34,6 +34,7 @@ ACLOCAL_AMFLAGS = -I m4 - - testdir = $(datadir)/$(PACKAGE)/tests/ - test_SCRIPTS = coverage.sh BasicSanity.sh -+test_DATA = valgrind-pcmk.suppressions - - # Scratch file for ad-hoc testing - scratch_SOURCES = scratch.c -diff --git a/cib/Makefile.am b/cib/Makefile.am -index 12493ee..220451d 100644 ---- a/cib/Makefile.am -+++ b/cib/Makefile.am -@@ -5,12 +5,12 @@ - # modify it under the terms of the GNU General Public License - # as published by the Free Software Foundation; either version 2 - # of the License, or (at your option) any later version. --# -+# - # This program is distributed in the hope that it will be useful, - # but WITHOUT ANY WARRANTY; without even the implied warranty of - # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - # GNU General Public License for more details. --# -+# - # You should have received a copy of the GNU General Public License - # along with this program; if not, write to the Free Software - # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -@@ -33,10 +33,11 @@ COMMONLIBS = $(top_builddir)/lib/common/libcrmcommon.la \ - halib_PROGRAMS = cib cibmon - - if BUILD_HELP --man8_MANS = -+man8_MANS = - %.8: % - echo Creating $@ -- chmod a+x $< -+ chmod a+x $(top_builddir)/cib/$< -+ $(top_builddir)/cib/$< --help - $(HELP2MAN) --output $@ --no-info --section 8 --name "Part of the Pacemaker cluster resource manager" $(top_builddir)/cib/$< - endif - -@@ -50,7 +51,7 @@ cib_LDADD = $(top_builddir)/lib/cluster/libcrmcluster.la \ - $(COMMONLIBS) $(CRYPTOLIB) $(CLUSTERLIBS) - - cibmon_SOURCES = cibmon.c --cibmon_LDADD = $(COMMONLIBS) -+cibmon_LDADD = $(COMMONLIBS) - - clean-generic: - rm -f *.log *.debug *.xml *~ -diff --git a/cib/callbacks.c b/cib/callbacks.c -index 07b0d45..754e218 100644 ---- a/cib/callbacks.c -+++ b/cib/callbacks.c -@@ -235,9 +235,14 @@ cib_common_callback(qb_ipcs_connection_t * c, void *data, size_t size, gboolean - } - - crm_trace("Inbound: %.200s", data); -- if (op_request == NULL || cib_client == NULL) { -+ if (op_request == NULL) { -+ crm_trace("Invalid message from %p", c); - crm_ipcs_send_ack(cib_client, id, "nack", __FUNCTION__, __LINE__); - return 0; -+ -+ } else if(cib_client == NULL) { -+ crm_trace("Invalid client %p", c); -+ return 0; - } - - if (is_set(call_options, cib_sync_call)) { -@@ -692,12 +697,28 @@ cib_process_request(xmlNode * request, gboolean force_synchronous, gboolean priv - } - - if (cib_status != pcmk_ok) { -+ const char *call = crm_element_value(request, F_CIB_CALLID); -+ - rc = cib_status; - crm_err("Operation ignored, cluster configuration is invalid." - " Please repair and restart: %s", pcmk_strerror(cib_status)); -- op_reply = cib_construct_reply(request, the_cib, cib_status); -+ -+ op_reply = create_xml_node(NULL, "cib-reply"); -+ crm_xml_add(op_reply, F_TYPE, T_CIB); -+ crm_xml_add(op_reply, F_CIB_OPERATION, op); -+ crm_xml_add(op_reply, F_CIB_CALLID, call); -+ crm_xml_add(op_reply, F_CIB_CLIENTID, client_id); -+ crm_xml_add_int(op_reply, F_CIB_CALLOPTS, call_options); -+ crm_xml_add_int(op_reply, F_CIB_RC, rc); -+ -+ crm_trace("Attaching reply output"); -+ add_message_xml(op_reply, F_CIB_CALLDATA, the_cib); -+ -+ crm_log_xml_explicit(op_reply, "cib:reply"); - - } else if (process) { -+ time_t finished = 0; -+ - int now = time(NULL); - int level = LOG_INFO; - const char *section = crm_element_value(request, F_CIB_SECTION); -@@ -744,7 +765,9 @@ cib_process_request(xmlNode * request, gboolean force_synchronous, gboolean priv - the_cib ? crm_element_value(the_cib, XML_ATTR_GENERATION) : "0", - the_cib ? crm_element_value(the_cib, XML_ATTR_NUMUPDATES) : "0"); - -- if ((now + 1) < time(NULL)) { -+ finished = time(NULL); -+ if (finished - now > 3) { -+ crm_trace("%s operation took %ds to complete", op, finished - now); - crm_write_blackbox(0, NULL); - } - -@@ -817,41 +840,6 @@ cib_process_request(xmlNode * request, gboolean force_synchronous, gboolean priv - return; - } - --xmlNode * --cib_construct_reply(xmlNode * request, xmlNode * output, int rc) --{ -- int lpc = 0; -- xmlNode *reply = NULL; -- const char *name = NULL; -- const char *value = NULL; -- -- const char *names[] = { -- F_CIB_OPERATION, -- F_CIB_CALLID, -- F_CIB_CLIENTID, -- F_CIB_CALLOPTS -- }; -- static int max = DIMOF(names); -- -- crm_trace("Creating a basic reply"); -- reply = create_xml_node(NULL, "cib-reply"); -- crm_xml_add(reply, F_TYPE, T_CIB); -- -- for (lpc = 0; lpc < max; lpc++) { -- name = names[lpc]; -- value = crm_element_value(request, name); -- crm_xml_add(reply, name, value); -- } -- -- crm_xml_add_int(reply, F_CIB_RC, rc); -- -- if (output != NULL) { -- crm_trace("Attaching reply output"); -- add_message_xml(reply, F_CIB_CALLDATA, output); -- } -- return reply; --} -- - int - cib_process_command(xmlNode * request, xmlNode ** reply, xmlNode ** cib_diff, gboolean privileged) - { -@@ -870,6 +858,7 @@ cib_process_command(xmlNode * request, xmlNode ** reply, xmlNode ** cib_diff, gb - - const char *op = NULL; - const char *section = NULL; -+ const char *call_id = crm_element_value(request, F_CIB_CALLID); - - int rc = pcmk_ok; - int rc2 = pcmk_ok; -@@ -1034,9 +1023,9 @@ cib_process_command(xmlNode * request, xmlNode ** reply, xmlNode ** cib_diff, gb - } - - if ((call_options & cib_inhibit_notify) == 0) { -- const char *call_id = crm_element_value(request, F_CIB_CALLID); - const char *client = crm_element_value(request, F_CIB_CLIENTNAME); - -+ crm_trace("Sending notifications"); - #ifdef SUPPORT_POSTNOTIFY - cib_post_notify(call_options, op, input, rc, the_cib); - #endif -@@ -1070,9 +1059,25 @@ cib_process_command(xmlNode * request, xmlNode ** reply, xmlNode ** cib_diff, gb - - done: - if ((call_options & cib_discard_reply) == 0) { -- *reply = cib_construct_reply(request, output, rc); -+ const char *caller = crm_element_value(request, F_CIB_CLIENTID); -+ -+ *reply = create_xml_node(NULL, "cib-reply"); -+ crm_xml_add(*reply, F_TYPE, T_CIB); -+ crm_xml_add(*reply, F_CIB_OPERATION, op); -+ crm_xml_add(*reply, F_CIB_CALLID, call_id); -+ crm_xml_add(*reply, F_CIB_CLIENTID, caller); -+ crm_xml_add_int(*reply, F_CIB_CALLOPTS, call_options); -+ crm_xml_add_int(*reply, F_CIB_RC, rc); -+ -+ if (output != NULL) { -+ crm_trace("Attaching reply output"); -+ add_message_xml(*reply, F_CIB_CALLDATA, output); -+ } -+ - crm_log_xml_explicit(*reply, "cib:reply"); - } -+ -+ crm_trace("cleanup"); - #if ENABLE_ACL - if (filtered_current_cib != NULL) { - free_xml(filtered_current_cib); -@@ -1082,6 +1087,7 @@ cib_process_command(xmlNode * request, xmlNode ** reply, xmlNode ** cib_diff, gb - if (call_type >= 0) { - cib_op_cleanup(call_type, call_options, &input, &output); - } -+ crm_trace("done"); - return rc; - } - -@@ -1206,9 +1212,7 @@ cib_ccm_dispatch(gpointer user_data) - - /* eventually it might be nice to recover and reconnect... but until then... */ - crm_err("Exiting to recover from CCM connection failure"); -- crm_exit(2); -- -- return -1; -+ return crm_exit(ENOTCONN); - } - - int current_instance = 0; -@@ -1419,9 +1423,9 @@ terminate_cib(const char *caller, gboolean fast) - qb_ipcs_destroy(ipcs_shm); - - if (fast) { -- crm_exit(EX_USAGE); -+ crm_exit(EINVAL); - } else { -- crm_exit(EX_OK); -+ crm_exit(pcmk_ok); - } - } - } -diff --git a/cib/cibmon.c b/cib/cibmon.c -index f0e173b..fa12d26 100644 ---- a/cib/cibmon.c -+++ b/cib/cibmon.c -@@ -251,5 +251,5 @@ cibmon_diff(const char *event, xmlNode * msg) - void - cibmon_shutdown(int nsig) - { -- crm_exit(EX_OK); -+ crm_exit(pcmk_ok); - } -diff --git a/cib/common.c b/cib/common.c -index 3fd1b73..0d66857 100644 ---- a/cib/common.c -+++ b/cib/common.c -@@ -1,16 +1,16 @@ --/* -+/* - * Copyright (C) 2008 Andrew Beekhof -- * -+ * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. -- * -+ * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. -- * -+ * - * You should have received a copy of the GNU General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -@@ -219,11 +219,12 @@ cib_get_operation_id(const char *op, int *operation) - - operation_hash = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, g_hash_destroy_str); - for (lpc = 1; lpc < max_msg_types; lpc++) { -- /* coverity[returned_null] Ignore */ - int *value = malloc(sizeof(int)); - -- *value = lpc; -- g_hash_table_insert(operation_hash, (gpointer) cib_server_ops[lpc].operation, value); -+ if(value) { -+ *value = lpc; -+ g_hash_table_insert(operation_hash, (gpointer) cib_server_ops[lpc].operation, value); -+ } - } - } - -@@ -336,11 +337,13 @@ cib_op_can_run(int call_type, int call_options, gboolean privileged, gboolean gl - int - cib_op_prepare(int call_type, xmlNode * request, xmlNode ** input, const char **section) - { -+ crm_trace("Prepare %d", call_type); - return cib_server_ops[call_type].prepare(request, input, section); - } - - int - cib_op_cleanup(int call_type, int options, xmlNode ** input, xmlNode ** output) - { -+ crm_trace("Cleanup %d", call_type); - return cib_server_ops[call_type].cleanup(options, input, output); - } -diff --git a/cib/io.c b/cib/io.c -index 1fd020f..b94030f 100644 ---- a/cib/io.c -+++ b/cib/io.c -@@ -1,16 +1,16 @@ --/* -+/* - * Copyright (C) 2004 Andrew Beekhof -- * -+ * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. -- * -+ * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. -- * -+ * - * You should have received a copy of the GNU General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -@@ -533,19 +533,13 @@ activateCibXml(xmlNode * new_cib, gboolean to_disk, const char *op) - } - - static void --cib_diskwrite_complete(GPid pid, gint status, gpointer user_data) -+cib_diskwrite_complete(mainloop_child_t * p, pid_t pid, int core, int signo, int exitcode) - { -- int exitcode = -1; -- -- if (WIFSIGNALED(status)) { -- int signo = WTERMSIG(status); -- int core = WCOREDUMP(status); -- -+ if (signo) { - crm_notice("Disk write process terminated with signal %d (pid=%d, core=%d)", signo, pid, - core); - -- } else if (WIFEXITED(status)) { -- exitcode = WEXITSTATUS(status); -+ } else { - do_crm_log(exitcode == 0 ? LOG_TRACE : LOG_ERR, "Disk write process exited (pid=%d, rc=%d)", - pid, exitcode); - } -@@ -562,7 +556,7 @@ int - write_cib_contents(gpointer p) - { - int fd = -1; -- int exit_rc = EX_OK; -+ int exit_rc = pcmk_ok; - char *digest = NULL; - xmlNode *cib_status_root = NULL; - -@@ -608,7 +602,7 @@ write_cib_contents(gpointer p) - - if (pid) { - /* Parent */ -- g_child_watch_add(pid, cib_diskwrite_complete, NULL); -+ mainloop_child_add(pid, 0, "disk-writer", NULL, cib_diskwrite_complete); - if (bb_state == QB_LOG_STATE_ENABLED) { - /* Re-enable now that it it safe */ - qb_log_ctl(QB_LOG_BLACKBOX, QB_LOG_CONF_ENABLED, QB_TRUE); -@@ -619,9 +613,6 @@ write_cib_contents(gpointer p) - - /* A-synchronous write out after a fork() */ - -- /* Don't log anything unless strictly necessary */ -- set_crm_log_level(LOG_ERR); -- - /* In theory we can scribble on "the_cib" here and not affect the parent - * But lets be safe anyway - */ -@@ -645,7 +636,7 @@ write_cib_contents(gpointer p) - /* check the admin didnt modify it underneath us */ - if (validate_on_disk_cib(primary_file, NULL) == FALSE) { - crm_err("%s was manually modified while the cluster was active!", primary_file); -- exit_rc = 1; -+ exit_rc = pcmk_err_cib_modified; - goto cleanup; - } - -@@ -657,14 +648,14 @@ write_cib_contents(gpointer p) - - rc = link(primary_file, backup_file); - if (rc < 0) { -- exit_rc = 4; -+ exit_rc = pcmk_err_cib_backup; - crm_perror(LOG_ERR, "Cannot link %s to %s", primary_file, backup_file); - goto cleanup; - } - - rc = link(digest_file, backup_digest); - if (rc < 0 && errno != ENOENT) { -- exit_rc = 5; -+ exit_rc = pcmk_err_cib_backup; - crm_perror(LOG_ERR, "Cannot link %s to %s", digest_file, backup_digest); - goto cleanup; - } -@@ -696,9 +687,9 @@ write_cib_contents(gpointer p) - umask(S_IWGRP | S_IWOTH | S_IROTH); - - tmp_cib_fd = mkstemp(tmp_cib); -- if (write_xml_fd(cib_local, tmp_cib, tmp_cib_fd, FALSE) <= 0) { -+ if (tmp_cib_fd < 0 || write_xml_fd(cib_local, tmp_cib, tmp_cib_fd, FALSE) <= 0) { - crm_err("Changes couldn't be written to %s", tmp_cib); -- exit_rc = 2; -+ exit_rc = pcmk_err_cib_save; - goto cleanup; - } - -@@ -708,9 +699,9 @@ write_cib_contents(gpointer p) - admin_epoch ? admin_epoch : "0", epoch ? epoch : "0", digest); - - tmp_digest_fd = mkstemp(tmp_digest); -- if (write_cib_digest(cib_local, tmp_digest, tmp_digest_fd, digest) <= 0) { -+ if (tmp_digest_fd < 0 || write_cib_digest(cib_local, tmp_digest, tmp_digest_fd, digest) <= 0) { - crm_err("Digest couldn't be written to %s", tmp_digest); -- exit_rc = 3; -+ exit_rc = pcmk_err_cib_save; - goto cleanup; - } - crm_debug("Wrote digest %s to disk", digest); -diff --git a/cib/main.c b/cib/main.c -index 878aad6..6b56274 100644 ---- a/cib/main.c -+++ b/cib/main.c -@@ -478,13 +478,13 @@ cib_init(void) - - if (startCib("cib.xml") == FALSE) { - crm_crit("Cannot start CIB... terminating"); -- crm_exit(1); -+ crm_exit(ENODATA); - } - - if (stand_alone == FALSE) { - if (crm_cluster_connect(&crm_cluster) == FALSE) { - crm_crit("Cannot sign in to the cluster... terminating"); -- crm_exit(100); -+ crm_exit(DAEMON_RESPAWN_STOP); - } - cib_our_uname = crm_cluster.uname; - if (is_openais_cluster()) { -@@ -522,31 +522,24 @@ cib_init(void) - cib_our_uname = strdup("localhost"); - } - -- ipcs_ro = mainloop_add_ipc_server(cib_channel_ro, QB_IPC_NATIVE, &ipc_ro_callbacks); -- ipcs_rw = mainloop_add_ipc_server(cib_channel_rw, QB_IPC_NATIVE, &ipc_rw_callbacks); -- ipcs_shm = mainloop_add_ipc_server(cib_channel_shm, QB_IPC_SHM, &ipc_rw_callbacks); -+ cib_ipc_servers_init(&ipcs_ro, -+ &ipcs_rw, -+ &ipcs_shm, -+ &ipc_ro_callbacks, -+ &ipc_rw_callbacks); - - if (stand_alone) { - cib_is_master = TRUE; - } - -- if (ipcs_ro != NULL && ipcs_rw != NULL && ipcs_shm != NULL) { -- /* Create the mainloop and run it... */ -- mainloop = g_main_new(FALSE); -- crm_info("Starting %s mainloop", crm_system_name); -+ /* Create the mainloop and run it... */ -+ mainloop = g_main_new(FALSE); -+ crm_info("Starting %s mainloop", crm_system_name); - -- g_main_run(mainloop); -+ g_main_run(mainloop); -+ cib_ipc_servers_destroy(ipcs_ro, ipcs_rw, ipcs_shm); - -- } else { -- crm_err("Failed to create IPC servers: shutting down and inhibiting respawn"); -- crm_exit(100); -- } -- -- qb_ipcs_destroy(ipcs_ro); -- qb_ipcs_destroy(ipcs_rw); -- qb_ipcs_destroy(ipcs_shm); -- -- return crm_exit(0); -+ return crm_exit(pcmk_ok); - } - - gboolean -diff --git a/cib/messages.c b/cib/messages.c -index e2892f2..8ea57c4 100644 ---- a/cib/messages.c -+++ b/cib/messages.c -@@ -132,7 +132,7 @@ cib_process_quit(const char *op, int options, const char *section, xmlNode * req - crm_trace("Processing \"%s\" event", op); - - crm_warn("The CRMd has asked us to exit... complying"); -- crm_exit(0); -+ crm_exit(pcmk_ok); - return result; - } - -diff --git a/cib/remote.c b/cib/remote.c -index caa7057..d260e35 100644 ---- a/cib/remote.c -+++ b/cib/remote.c -@@ -133,6 +133,11 @@ init_remote_listener(int port, gboolean encrypted) - - /* create server socket */ - ssock = malloc(sizeof(int)); -+ if(ssock == NULL) { -+ crm_perror(LOG_ERR, "Can not create server socket." ERROR_SUFFIX); -+ return -1; -+ } -+ - *ssock = socket(AF_INET, SOCK_STREAM, 0); - if (*ssock == -1) { - crm_perror(LOG_ERR, "Can not create server socket." ERROR_SUFFIX); -@@ -314,20 +319,6 @@ cib_remote_listen(gpointer data) - return TRUE; - } - -- if (ssock == remote_tls_fd) { --#ifdef HAVE_GNUTLS_GNUTLS_H -- /* create gnutls session for the server socket */ -- new_client->remote->tls_session = -- crm_create_anon_tls_session(csock, GNUTLS_SERVER, anon_cred_s); -- -- if (new_client->remote->tls_session == NULL) { -- crm_err("TLS session creation failed"); -- close(csock); -- return TRUE; -- } --#endif -- } -- - num_clients++; - - crm_client_init(); -@@ -338,19 +329,29 @@ cib_remote_listen(gpointer data) - - g_hash_table_insert(client_connections, new_client->id /* Should work */ , new_client); - -- /* clients have a few seconds to perform handshake. */ -- new_client->remote->auth_timeout = -- g_timeout_add(REMOTE_AUTH_TIMEOUT, remote_auth_timeout_cb, new_client); -- - if (ssock == remote_tls_fd) { - #ifdef HAVE_GNUTLS_GNUTLS_H - new_client->kind = CRM_CLIENT_TLS; -+ -+ /* create gnutls session for the server socket */ -+ new_client->remote->tls_session = -+ crm_create_anon_tls_session(csock, GNUTLS_SERVER, anon_cred_s); -+ -+ if (new_client->remote->tls_session == NULL) { -+ crm_err("TLS session creation failed"); -+ close(csock); -+ return TRUE; -+ } - #endif - } else { - new_client->kind = CRM_CLIENT_TCP; - new_client->remote->tcp_socket = csock; - } - -+ /* clients have a few seconds to perform handshake. */ -+ new_client->remote->auth_timeout = -+ g_timeout_add(REMOTE_AUTH_TIMEOUT, remote_auth_timeout_cb, new_client); -+ - new_client->remote->source = - mainloop_add_fd("cib-remote-client", G_PRIORITY_DEFAULT, csock, new_client, - &remote_client_fd_callbacks); -diff --git a/configure.ac b/configure.ac -index 454677a..bc59853 100644 ---- a/configure.ac -+++ b/configure.ac -@@ -4,7 +4,7 @@ dnl - dnl License: GNU General Public License (GPL) - - dnl =============================================== --dnl Bootstrap -+dnl Bootstrap - dnl =============================================== - AC_PREREQ(2.59) - -@@ -19,7 +19,7 @@ dnl checks for compiler characteristics - dnl checks for library functions - dnl checks for system services - --AC_INIT(pacemaker, 1.1.8, pacemaker@oss.clusterlabs.org,,http://www.clusterlabs.org) -+AC_INIT(pacemaker, 1.1.9, pacemaker@oss.clusterlabs.org,,http://www.clusterlabs.org) - CRM_DTD_VERSION="1.2" - - PCMK_FEATURES="" -@@ -61,7 +61,7 @@ AC_SUBST(PACKAGE_SERIES) - AC_SUBST(PACKAGE_VERSION) - - dnl automake >= 1.11 offers --enable-silent-rules for suppressing the output from --dnl normal compilation. When a failure occurs, it will then display the full -+dnl normal compilation. When a failure occurs, it will then display the full - dnl command line - dnl Wrap in m4_ifdef to avoid breaking on older platforms - m4_ifdef([AM_SILENT_RULES],[AM_SILENT_RULES([yes])]) -@@ -102,7 +102,7 @@ AC_CHECK_SIZEOF(long long) - AC_STRUCT_TIMEZONE - - dnl =============================================== --dnl Helpers -+dnl Helpers - dnl =============================================== - cc_supports_flag() { - local CFLAGS="$@" -@@ -164,7 +164,7 @@ AC_ARG_ENABLE([fatal-warnings], - [default=yes]]) - - AC_ARG_ENABLE([quiet], --[ --enable-quiet -+[ --enable-quiet - Supress make output unless there is an error - [default=no]]) - -@@ -173,7 +173,7 @@ AC_ARG_ENABLE([thread-safe], - [default=no]]) - - AC_ARG_ENABLE([bundled-ltdl], --[ --enable-bundled-ltdl Configure, build and install the standalone ltdl library bundled with ${PACKAGE} [default=no]]) -+[ --enable-bundled-ltdl Configure, build and install the standalone ltdl library bundled with ${PACKAGE} [default=no]]) - LTDL_LIBS="" - - AC_ARG_ENABLE([no-stack], -@@ -189,14 +189,14 @@ AC_ARG_ENABLE([systemd], - Do not build support for the Systemd init system [default=yes]]) - - AC_ARG_WITH(ais, -- [ --with-ais -+ [ --with-ais - Support the Corosync messaging and membership layer ], - [ SUPPORT_CS=$withval ], - [ SUPPORT_CS=try ], - ) - - AC_ARG_WITH(corosync, -- [ --with-corosync -+ [ --with-corosync - Support the Corosync messaging and membership layer ], - [ SUPPORT_CS=$withval ] - dnl initialized in AC_ARG_WITH(ais...) already, -@@ -204,21 +204,21 @@ dnl don't reset to try if it was given as --without-ais - ) - - AC_ARG_WITH(heartbeat, -- [ --with-heartbeat -+ [ --with-heartbeat - Support the Heartbeat messaging and membership layer ], - [ SUPPORT_HEARTBEAT=$withval ], - [ SUPPORT_HEARTBEAT=try ], - ) - - AC_ARG_WITH(cman, -- [ --with-cman -+ [ --with-cman - Support the consumption of membership and quorum from cman ], - [ SUPPORT_CMAN=$withval ], - [ SUPPORT_CMAN=try ], - ) - - AC_ARG_WITH(cpg, -- [ --with-cs-quorum -+ [ --with-cs-quorum - Support the consumption of membership and quorum from corosync ], - [ SUPPORT_CS_QUORUM=$withval ], - [ SUPPORT_CS_QUORUM=try ], -@@ -230,7 +230,7 @@ AC_ARG_WITH(nagios, - [ SUPPORT_NAGIOS=$withval ], - [ SUPPORT_NAGIOS=try ], - ) -- -+ - AC_ARG_WITH(nagios-plugin-dir, - [ --with-nagios-plugin-dir=DIR - Directory for nagios plugins [${NAGIOS_PLUGIN_DIR}]], -@@ -244,14 +244,14 @@ AC_ARG_WITH(nagios-metadata-dir, - ) - - AC_ARG_WITH(snmp, -- [ --with-snmp -+ [ --with-snmp - Support the SNMP protocol ], - [ SUPPORT_SNMP=$withval ], - [ SUPPORT_SNMP=try ], - ) - - AC_ARG_WITH(esmtp, -- [ --with-esmtp -+ [ --with-esmtp - Support the sending mail notifications with the esmtp library ], - [ SUPPORT_ESMTP=$withval ], - [ SUPPORT_ESMTP=try ], -@@ -264,14 +264,21 @@ AC_ARG_WITH(acl, - [ SUPPORT_ACL=no ], - ) - -+AC_ARG_WITH(cibsecrets, -+ [ --with-cibsecrets -+ Support CIB secrets ], -+ [ SUPPORT_CIBSECRETS=$withval ], -+ [ SUPPORT_CIBSECRETS=no ], -+) -+ - CSPREFIX="" - AC_ARG_WITH(ais-prefix, - [ --with-ais-prefix=DIR Prefix used when Corosync was installed [$prefix]], -- [ CSPREFIX=$withval ], -+ [ CSPREFIX=$withval ], - [ CSPREFIX=$prefix ]) - - LCRSODIR="" --AC_ARG_WITH(lcrso-dir, -+AC_ARG_WITH(lcrso-dir, - [ --with-lcrso-dir=DIR Corosync lcrso files. ], - [ LCRSODIR="$withval" ]) - -@@ -282,7 +289,7 @@ AC_ARG_WITH(initdir, - - SUPPORT_PROFILING=0 - AC_ARG_WITH(profiling, -- [ --with-profiling -+ [ --with-profiling - Support gprof profiling ], - [ SUPPORT_PROFILING=$withval ]) - -@@ -381,7 +388,7 @@ case $libdir in - ;; - esac - --dnl Expand autoconf variables so that we dont end up with '${prefix}' -+dnl Expand autoconf variables so that we dont end up with '${prefix}' - dnl in #defines and python scripts - dnl NOTE: Autoconf deliberately leaves them unexpanded to allow - dnl make exec_prefix=/foo install -@@ -414,7 +421,7 @@ AC_SUBST(docdir) - for j in prefix exec_prefix bindir sbindir libexecdir datadir sysconfdir \ - sharedstatedir localstatedir libdir includedir oldincludedir infodir \ - mandir INITDIR docdir --do -+do - dirname=`eval echo '${'${j}'}'` - if - test ! -d "$dirname" -@@ -426,7 +433,7 @@ done - dnl This OS-based decision-making is poor autotools practice; - dnl feature-based mechanisms are strongly preferred. - dnl --dnl So keep this section to a bare minimum; regard as a "necessary evil". -+dnl So keep this section to a bare minimum; regard as a "necessary evil". - - case "$host_os" in - *bsd*) LIBS="-L/usr/local/lib" -@@ -435,18 +442,18 @@ case "$host_os" in - ;; - *solaris*) - ;; --*linux*) -+*linux*) - AC_DEFINE_UNQUOTED(ON_LINUX, 1, Compiling for Linux platform) - CFLAGS="$CFLAGS -I${prefix}/include" - ;; --darwin*) -+darwin*) - AC_DEFINE_UNQUOTED(ON_DARWIN, 1, Compiling for Darwin platform) - LIBS="$LIBS -L${prefix}/lib" - CFLAGS="$CFLAGS -I${prefix}/include" - ;; - esac - --dnl Eventually remove this -+dnl Eventually remove this - CFLAGS="$CFLAGS -I${prefix}/include/heartbeat" - - AC_SUBST(INIT_EXT) -@@ -475,14 +482,14 @@ AC_COMPILE_IFELSE( - #include - ], - [ --int max = 512; -+int max = 512; - uint64_t bignum = 42; - char *buffer = malloc(max); - const char *random = "random"; - snprintf(buffer, max-1, "", bignum, random); - fprintf(stderr, "Result: %s\n", buffer); - ] -- )], -+ )], - [U64T="%lu"], - [U64T="%llu"] - ) -@@ -641,7 +648,7 @@ else - GPKGNAME="glib-2.0" - fi - --if -+if - $PKGCONFIG --exists $GPKGNAME - then - GLIBCONFIG="$PKGCONFIG $GPKGNAME" -@@ -652,22 +659,12 @@ else - $PKGCONFIG --cflags $GPKGNAME; echo $? - $PKGCONFIG $GPKGNAME; echo $? - set +x -- -+ - AC_MSG_ERROR(You need glib2-devel installed in order to build ${PACKAGE}) - fi - AC_MSG_RESULT(using $GLIBCONFIG) - --AC_CHECK_LIB(glib-2.0, g_hash_table_get_values) --if test "x$ac_cv_lib_glib_2_0_g_hash_table_get_values" != x""yes; then -- AC_DEFINE_UNQUOTED(NEED_G_HASH_ITER, 1, glib-2.0 has no hashtable iterators) --fi -- --AC_CHECK_LIB(glib-2.0, g_list_free_full) --if test "x$ac_cv_lib_glib_2_0_g_list_free_full" != x""yes; then -- AC_DEFINE_UNQUOTED(NEED_G_LIST_FREE_FULL, 1, glib-2.0 has no g_list_free_full) --fi -- --if -+if - $PKGCONFIG --exists systemd - then - systemdunitdir=`$PKGCONFIG --variable=systemdsystemunitdir systemd` -@@ -754,6 +751,7 @@ AC_CHECK_HEADERS(sys/dir.h) - AC_CHECK_HEADERS(sys/ioctl.h) - AC_CHECK_HEADERS(sys/param.h) - AC_CHECK_HEADERS(sys/poll.h) -+AC_CHECK_HEADERS(sys/reboot.h) - AC_CHECK_HEADERS(sys/resource.h) - AC_CHECK_HEADERS(sys/select.h) - AC_CHECK_HEADERS(sys/socket.h) -@@ -770,7 +768,7 @@ AC_CHECK_HEADERS(time.h) - AC_CHECK_HEADERS(unistd.h) - AC_CHECK_HEADERS(winsock.h) - --dnl These headers need prerequisits before the tests will pass -+dnl These headers need prerequisits before the tests will pass - dnl AC_CHECK_HEADERS(net/if.h) - dnl AC_CHECK_HEADERS(netinet/icmp6.h) - dnl AC_CHECK_HEADERS(netinet/ip6.h) -@@ -920,7 +918,7 @@ if test "x$CURSESLIBS" != "x"; then - fi - - dnl Check for printw() prototype compatibility --if test X"$CURSESLIBS" != X"" && cc_supports_flag -Wcast-qual && cc_supports_flag -Werror; then -+if test X"$CURSESLIBS" != X"" && cc_supports_flag -Wcast-qual && cc_supports_flag -Werror; then - AC_MSG_CHECKING(whether printw() requires argument of "const char *") - ac_save_LIBS=$LIBS - LIBS="$CURSESLIBS $LIBS" -@@ -939,7 +937,7 @@ if test X"$CURSESLIBS" != X"" && cc_supports_flag -Wcast-qual && cc_supports_fla - #endif - ], - [printw((const char *)"Test");] -- )], -+ )], - [ac_cv_compatible_printw=yes], - [ac_cv_compatible_printw=no] - ) -@@ -963,7 +961,7 @@ dnl Profiling and GProf - dnl ======================================================================== - - case $SUPPORT_PROFILING in -- 1|yes|true) -+ 1|yes|true) - SUPPORT_PROFILING=1 - - dnl Enable gprof -@@ -983,7 +981,7 @@ esac - AC_DEFINE_UNQUOTED(SUPPORT_PROFILING, $SUPPORT_PROFILING, Support for gprof profiling) - - case $SUPPORT_GCOV in -- 1|yes|true) -+ 1|yes|true) - SUPPORT_GCOV=1 - - dnl Enable gprof -@@ -995,10 +993,10 @@ case $SUPPORT_GCOV in - - dnl Turn off optimization so code coverage tool - dnl can get accurate line numbers -- AC_MSG_NOTICE(Old CFLAGS: $CFLAGS) -+ AC_MSG_NOTICE(Old CFLAGS: $CFLAGS) - CFLAGS=`echo $CFLAGS | sed -e 's/-O.\ //g' -e 's/-Wp,-D_FORTIFY_SOURCE=.\ //g'` - CFLAGS="$CFLAGS -O0" -- AC_MSG_NOTICE(New CFLAGS: $CFLAGS) -+ AC_MSG_NOTICE(New CFLAGS: $CFLAGS) - - dnl Update features - PCMK_FEATURES="$PCMK_FEATURES gcov" -@@ -1021,19 +1019,17 @@ fi - - PKG_CHECK_MODULES(libqb, libqb, HAVE_libqb=1, HAVE_libqb=0) - AC_CHECK_HEADERS(qb/qbipc_common.h) --AC_CHECK_LIB(qb, qb_ipcc_is_connected) --AC_CHECK_FUNCS(qb_ipcc_is_connected) -+AC_CHECK_LIB(qb, qb_ipcs_connection_auth_set) - - LIBQB_LOG=1 - PCMK_FEATURES="$PCMK_FEATURES libqb-logging libqb-ipc" - --if test $ac_cv_lib_qb_qb_ipcc_is_connected != yes; then -- AC_MSG_FAILURE(Version of IPC in libqb is not new enough) -+if -+ !pkg-config --atleast-version 0.13 libqb -+then -+ AC_MSG_FAILURE(Version of libqb is too old: v0.13 or greater requried) - fi - --AC_DEFINE_UNQUOTED(LIBQB_LOGGING, $LIBQB_LOG, Use libqb for logging) --AC_DEFINE_UNQUOTED(LIBQB_IPC, 0, Use libqb for IPC) -- - LIBS="$LIBS $libqb_LIBS" - - AC_CHECK_HEADERS(heartbeat/hb_config.h) -@@ -1160,7 +1156,7 @@ elif test -x $GIT -a -d .git; then - - else - # The current directory name make a reasonable default -- # Most generated archives will include the hash or tag -+ # Most generated archives will include the hash or tag - BASE=`basename $PWD` - BUILD_VERSION=`echo $BASE | sed s:.*[[Pp]]acemaker-::` - AC_MSG_RESULT(directory based hash: $BUILD_VERSION) -@@ -1201,7 +1197,7 @@ AM_CONDITIONAL(BUILD_SYSTEMD, test $HAVE_systemd = 1) - case $SUPPORT_NAGIOS in - 1|yes|true|try) - SUPPORT_NAGIOS=1;; -- *) -+ *) - SUPPORT_NAGIOS=0;; - esac - -@@ -1295,13 +1291,13 @@ else - SUPPORT_CS=1 - CFLAGS="$CFLAGS $oldipc_FLAGS $cpg_FLAGS $cfg_FLAGS" - COROSYNC_LIBS="$COROSYNC_LIBS $oldipc_LIBS $cpg_LIBS $cfg_LIBS" -- -+ - elif test $HAVE_libqb = 1; then - SUPPORT_CS=1 - CS_USES_LIBQB=1 - CFLAGS="$CFLAGS $libqb_FLAGS $cpg_FLAGS $cfg_FLAGS" - COROSYNC_LIBS="$COROSYNC_LIBS $libqb_LIBS $cpg_LIBS $cfg_LIBS" -- AC_CHECK_LIB(corosync_common, cs_strerror) -+ AC_CHECK_LIB(corosync_common, cs_strerror) - - else - aisreason="corosync/libqb IPC libraries not found by pkg_config" -@@ -1318,7 +1314,7 @@ if test $SUPPORT_CS = 1 -a x$HAVE_oldipc = x0 ; then - dnl The only option now is the built-in quorum API - CFLAGS="$CFLAGS $cmap_CFLAGS $quorum_CFLAGS" - COROSYNC_LIBS="$COROSYNC_LIBS $cmap_LIBS $quorum_LIBS" -- -+ - STACKS="$STACKS corosync-native" - AC_DEFINE_UNQUOTED(SUPPORT_CS_QUORUM, 1, Support the consumption of membership and quorum from corosync) - fi -@@ -1352,9 +1348,9 @@ if test $SUPPORT_CS = 1; then - elif test $SUPPORT_CS != 0; then - SUPPORT_CS=0 - if test $missingisfatal = 0; then -- AC_MSG_WARN(Unable to support Corosync: $aisreason) -+ AC_MSG_WARN(Unable to support Corosync: $aisreason) - else -- AC_MSG_FAILURE(Unable to support Corosync: $aisreason) -+ AC_MSG_FAILURE(Unable to support Corosync: $aisreason) - fi - fi - -@@ -1453,9 +1449,9 @@ else - SNMPLIBS="" - SUPPORT_SNMP=0 - if test $missingisfatal = 0; then -- AC_MSG_WARN(Unable to support SNMP) -+ AC_MSG_WARN(Unable to support SNMP) - else -- AC_MSG_FAILURE(Unable to support SNMP) -+ AC_MSG_FAILURE(Unable to support SNMP) - fi - else - SUPPORT_SNMP=1 -@@ -1512,9 +1508,9 @@ else - if test $SUPPORT_ESMTP = no; then - SUPPORT_ESMTP=0 - if test $missingisfatal = 0; then -- AC_MSG_WARN(Unable to support ESMTP) -+ AC_MSG_WARN(Unable to support ESMTP) - else -- AC_MSG_FAILURE(Unable to support ESMTP) -+ AC_MSG_FAILURE(Unable to support ESMTP) - fi - else - SUPPORT_ESMTP=1 -@@ -1527,7 +1523,7 @@ AM_CONDITIONAL(ENABLE_ESMTP, test "$SUPPORT_ESMTP" = "1") - AC_DEFINE_UNQUOTED(ENABLE_ESMTP, $SUPPORT_ESMTP, Build in support for sending mail notifications with ESMTP) - - dnl ======================================================================== --dnl ACL -+dnl ACL - dnl ======================================================================== - - case $SUPPORT_ACL in -@@ -1551,9 +1547,9 @@ else - - if test $SUPPORT_ACL = 0; then - if test $missingisfatal = 0; then -- AC_MSG_WARN(Unable to support ACL. You need to use libqb > 0.13.0) -+ AC_MSG_WARN(Unable to support ACL. You need to use libqb > 0.13.0) - else -- AC_MSG_FAILURE(Unable to support ACL. You need to use libqb > 0.13.0) -+ AC_MSG_FAILURE(Unable to support ACL. You need to use libqb > 0.13.0) - fi - fi - fi -@@ -1566,6 +1562,32 @@ AM_CONDITIONAL(ENABLE_ACL, test "$SUPPORT_ACL" = "1") - AC_DEFINE_UNQUOTED(ENABLE_ACL, $SUPPORT_ACL, Build in support for CIB ACL) - - dnl ======================================================================== -+dnl CIB secrets -+dnl ======================================================================== -+ -+case $SUPPORT_CIBSECRETS in -+ 1|yes|true|try) -+ SUPPORT_CIBSECRETS=1;; -+ *) -+ SUPPORT_CIBSECRETS=0;; -+esac -+ -+AC_DEFINE_UNQUOTED(SUPPORT_CIBSECRETS, $SUPPORT_CIBSECRETS, Support CIB secrets) -+AM_CONDITIONAL(BUILD_CIBSECRETS, test $SUPPORT_CIBSECRETS = 1) -+ -+if test $SUPPORT_CIBSECRETS = 1; then -+ PCMK_FEATURES="$PCMK_FEATURES cibsecrets" -+ -+ LRM_CIBSECRETS_DIR="${localstatedir}/lib/pacemaker/lrm/secrets" -+ AC_DEFINE_UNQUOTED(LRM_CIBSECRETS_DIR,"$LRM_CIBSECRETS_DIR", Location for CIB secrets) -+ AC_SUBST(LRM_CIBSECRETS_DIR) -+ -+ LRM_LEGACY_CIBSECRETS_DIR="${localstatedir}/lib/heartbeat/lrm/secrets" -+ AC_DEFINE_UNQUOTED(LRM_LEGACY_CIBSECRETS_DIR,"$LRM_LEGACY_CIBSECRETS_DIR", Legacy location for CIB secrets) -+ AC_SUBST(LRM_LEGACY_CIBSECRETS_DIR) -+fi -+ -+dnl ======================================================================== - dnl GnuTLS - dnl ======================================================================== - -@@ -1652,21 +1674,21 @@ else - -fstack-protector-all - -Wall - -Waggregate-return -- -Wbad-function-cast -- -Wcast-align -+ -Wbad-function-cast -+ -Wcast-align - -Wdeclaration-after-statement - -Wendif-labels - -Wfloat-equal - -Wformat=2 - -Wformat-security - -Wformat-nonliteral -- -Wmissing-prototypes -- -Wmissing-declarations -+ -Wmissing-prototypes -+ -Wmissing-declarations - -Wnested-externs - -Wno-long-long - -Wno-strict-aliasing - -Wunused-but-set-variable -- -Wpointer-arith -+ -Wpointer-arith - -Wstrict-prototypes - -Wunsigned-char - -Wwrite-strings" -@@ -1740,7 +1762,7 @@ AC_SUBST(LIBADD_INTL) dnl extra flags for GNU gettext stuff... - - AC_SUBST(LOCALE) - --dnl Options for cleaning up the compiler output -+dnl Options for cleaning up the compiler output - QUIET_LIBTOOL_OPTS="" - QUIET_MAKE_OPTS="" - if test "x${enable_quiet}" = "xyes"; then -@@ -1778,6 +1800,7 @@ pengine/Makefile \ - doc/Makefile \ - doc/Pacemaker_Explained/publican.cfg \ - doc/Clusters_from_Scratch/publican.cfg \ -+ doc/Pacemaker_Remote/publican.cfg \ - include/Makefile \ - include/crm/Makefile \ - include/crm/cib/Makefile \ -@@ -1806,20 +1829,26 @@ lib/Makefile \ - mcp/Makefile \ - mcp/pacemaker \ - mcp/pacemaker.service \ -+ mcp/pacemaker.upstart \ -+ mcp/pacemaker.combined.upstart \ - fencing/Makefile \ - fencing/regression.py \ - lrmd/Makefile \ - lrmd/regression.py \ -+ lrmd/pacemaker_remote.service \ -+ lrmd/pacemaker_remote \ - extra/Makefile \ - extra/resources/Makefile \ - extra/rgmanager/Makefile \ - tools/Makefile \ - tools/crm_report \ -+ tools/report.common \ -+ tools/cibsecret \ - xml/Makefile \ - lib/gnu/Makefile \ - ) - --dnl Now process the entire list of files added by previous -+dnl Now process the entire list of files added by previous - dnl calls to AC_CONFIG_FILES() - AC_OUTPUT() - -@@ -1850,4 +1879,3 @@ AC_MSG_RESULT([]) - AC_MSG_RESULT([ CFLAGS = ${CFLAGS}]) - AC_MSG_RESULT([ Libraries = ${LIBS}]) - AC_MSG_RESULT([ Stack Libraries = ${CLUSTERLIBS}]) -- -diff --git a/crmd/callbacks.c b/crmd/callbacks.c -index 954473f..f88fc93 100644 ---- a/crmd/callbacks.c -+++ b/crmd/callbacks.c -@@ -194,7 +194,7 @@ peer_update_callback(enum crm_status_type type, crm_node_t * node, const void *d - /* down->confirmed = TRUE; Only stonith-ng returning should imply completion */ - stop_te_timer(down->timer); - -- erase_node_from_join(node->uname); -+ crm_update_peer_join(__FUNCTION__, node, crm_join_none); - crm_update_peer_expected(__FUNCTION__, node, CRMD_JOINSTATE_DOWN); - check_join_state(fsa_state, __FUNCTION__); - -@@ -208,8 +208,7 @@ peer_update_callback(enum crm_status_type type, crm_node_t * node, const void *d - } else if (appeared == FALSE) { - crm_notice("Stonith/shutdown of %s not matched", node->uname); - -- erase_node_from_join(node->uname); -- crm_update_peer_expected(__FUNCTION__, node, CRMD_JOINSTATE_DOWN); -+ crm_update_peer_join(__FUNCTION__, node, crm_join_none); - check_join_state(fsa_state, __FUNCTION__); - - abort_transition(INFINITY, tg_restart, "Node failure", NULL); -diff --git a/crmd/control.c b/crmd/control.c -index 50db30b..7f423db 100644 ---- a/crmd/control.c -+++ b/crmd/control.c -@@ -118,6 +118,10 @@ do_ha_control(long long action, - } - fsa_our_uname = cluster->uname; - fsa_our_uuid = cluster->uuid; -+ if(cluster->uuid == NULL) { -+ crm_err("Could not obtain local uuid"); -+ registered = FALSE; -+ } - - if (registered == FALSE) { - set_bit(fsa_input_register, R_HA_DISCONNECTED); -@@ -193,6 +197,7 @@ extern xmlNode *max_generation_xml; - extern GHashTable *resource_history; - extern GHashTable *voted; - extern GHashTable *reload_hash; -+extern char *te_client_id; - - void log_connected_client(gpointer key, gpointer value, gpointer user_data); - -@@ -205,20 +210,92 @@ log_connected_client(gpointer key, gpointer value, gpointer user_data) - } - - int -+crmd_fast_exit(int rc) -+{ -+ if (is_set(fsa_input_register, R_STAYDOWN)) { -+ crm_warn("Inhibiting respawn: %d -> %d", rc, 100); -+ rc = 100; -+ } -+ -+ if (rc == pcmk_ok && is_set(fsa_input_register, R_IN_RECOVERY)) { -+ crm_err("Could not recover from internal error"); -+ rc = pcmk_err_generic; -+ } -+ return crm_exit(rc); -+} -+ -+int - crmd_exit(int rc) - { - GListPtr gIter = NULL; -+ GMainLoop *mloop = crmd_mainloop; -+ -+ static bool in_progress = FALSE; -+ -+ if(in_progress && rc == 0) { -+ crm_debug("Exit is already in progress"); -+ return rc; -+ -+ } else if(in_progress) { -+ crm_notice("Error during shutdown process, terminating now: %s (%d)", pcmk_strerror(rc), rc); -+ crm_write_blackbox(SIGTRAP, NULL); -+ crmd_fast_exit(rc); -+ } -+ -+ in_progress = TRUE; -+ crm_trace("Preparing to exit: %d", rc); -+ -+ /* Suppress secondary errors resulting from us disconnecting everything */ -+ set_bit(fsa_input_register, R_HA_DISCONNECTED); -+ -+/* Close all IPC servers and clients to ensure any and all shared memory files are cleaned up */ -+ -+ if(ipcs) { -+ crm_trace("Closing IPC server"); -+ mainloop_del_ipc_server(ipcs); -+ ipcs = NULL; -+ } - - if (attrd_ipc) { -+ crm_trace("Closing attrd connection"); - crm_ipc_close(attrd_ipc); - crm_ipc_destroy(attrd_ipc); -+ attrd_ipc = NULL; - } -- if (crmd_mainloop) { -- g_main_loop_quit(crmd_mainloop); -- g_main_loop_unref(crmd_mainloop); -+ -+ if (pe_subsystem && pe_subsystem->client && pe_subsystem->client->ipcs) { -+ crm_trace("Disconnecting Policy Engine"); -+ qb_ipcs_disconnect(pe_subsystem->client->ipcs); -+ } -+ -+ if(stonith_api) { -+ crm_trace("Disconnecting fencing API"); -+ clear_bit(fsa_input_register, R_ST_REQUIRED); -+ stonith_api->cmds->free(stonith_api); stonith_api = NULL; -+ } -+ -+ if (rc == pcmk_ok && crmd_mainloop == NULL) { -+ crm_debug("No mainloop detected"); -+ rc = EPROTO; - } -+ -+ /* On an error, just get out. -+ * -+ * Otherwise, make the effort to have mainloop exit gracefully so -+ * that it (mostly) cleans up after itself and valgrind has less -+ * to report on - allowing real errors stand out -+ */ -+ if(rc != pcmk_ok) { -+ crm_notice("Forcing immediate exit: %s (%d)", pcmk_strerror(rc), rc); -+ crm_write_blackbox(SIGTRAP, NULL); -+ return crmd_fast_exit(rc); -+ } -+ -+/* Clean up as much memory as possible for valgrind */ -+ - #if SUPPORT_HEARTBEAT - if (fsa_cluster_conn) { -+ crm_trace("Disconnecting heartbeat"); - fsa_cluster_conn->llc_ops->delete(fsa_cluster_conn); - fsa_cluster_conn = NULL; - } -@@ -233,58 +310,104 @@ crmd_exit(int rc) - fsa_cause2string(fsa_data->fsa_cause), fsa_data->origin); - delete_fsa_input(fsa_data); - } -- g_list_free(fsa_message_queue); -- fsa_message_queue = NULL; - -- crm_client_cleanup(); -- empty_uuid_cache(); -- crm_peer_destroy(); - clear_bit(fsa_input_register, R_MEMBERSHIP); -+ g_list_free(fsa_message_queue); fsa_message_queue = NULL; - -- if (te_subsystem->client && te_subsystem->client->ipcs) { -- crm_debug("Full destroy: TE"); -- qb_ipcs_disconnect(te_subsystem->client->ipcs); -- } -- free(te_subsystem); -- -- if (pe_subsystem->client && pe_subsystem->client->ipcs) { -- crm_debug("Full destroy: PE"); -- qb_ipcs_disconnect(pe_subsystem->client->ipcs); -- } -- free(pe_subsystem); -- -- free(cib_subsystem); -+ free(pe_subsystem); pe_subsystem = NULL; -+ free(te_subsystem); te_subsystem = NULL; -+ free(cib_subsystem); cib_subsystem = NULL; - - if (reload_hash) { -- g_hash_table_destroy(reload_hash); -+ crm_trace("Destroying reload cache with %d members", g_hash_table_size(reload_hash)); -+ g_hash_table_destroy(reload_hash); reload_hash = NULL; - } -+ - if (voted) { -- g_hash_table_destroy(voted); -+ crm_trace("Destroying voted cache with %d members", g_hash_table_size(voted)); -+ g_hash_table_destroy(voted); voted = NULL; - } - - cib_delete(fsa_cib_conn); - fsa_cib_conn = NULL; - -+ verify_stopped(fsa_state, LOG_WARNING); -+ clear_bit(fsa_input_register, R_LRM_CONNECTED); - lrm_state_destroy_all(); - -- free(transition_timer); -- free(integration_timer); -- free(finalization_timer); -- free(election_trigger); -- free(election_timeout); -- free(shutdown_escalation_timer); -- free(wait_timer); -- free(recheck_timer); -+ /* This basically will not work, since mainloop has a reference to it */ -+ mainloop_destroy_trigger(fsa_source); fsa_source = NULL; -+ -+ mainloop_destroy_trigger(config_read); config_read = NULL; -+ mainloop_destroy_trigger(stonith_reconnect); stonith_reconnect = NULL; -+ mainloop_destroy_trigger(transition_trigger); transition_trigger = NULL; - -- free(fsa_our_dc_version); -- free(fsa_our_uname); -- free(fsa_our_uuid); -- free(fsa_our_dc); -+ crm_client_cleanup(); -+ crm_peer_destroy(); - -- free(max_generation_from); -- free_xml(max_generation_xml); -+ crm_timer_stop(transition_timer); -+ crm_timer_stop(integration_timer); -+ crm_timer_stop(finalization_timer); -+ crm_timer_stop(election_trigger); -+ crm_timer_stop(election_timeout); -+ crm_timer_stop(shutdown_escalation_timer); -+ crm_timer_stop(wait_timer); -+ crm_timer_stop(recheck_timer); -+ -+ free(transition_timer); transition_timer = NULL; -+ free(integration_timer); integration_timer = NULL; -+ free(finalization_timer); finalization_timer = NULL; -+ free(election_trigger); election_trigger = NULL; -+ free(election_timeout); election_timeout = NULL; -+ free(shutdown_escalation_timer); shutdown_escalation_timer = NULL; -+ free(wait_timer); wait_timer = NULL; -+ free(recheck_timer); recheck_timer = NULL; -+ -+ free(fsa_our_dc_version); fsa_our_dc_version = NULL; -+ free(fsa_our_uname); fsa_our_uname = NULL; -+ free(fsa_our_uuid); fsa_our_uuid = NULL; -+ free(fsa_our_dc); fsa_our_dc = NULL; -+ -+ free(te_uuid); te_uuid = NULL; -+ free(te_client_id); te_client_id = NULL; -+ free(fsa_pe_ref); fsa_pe_ref = NULL; -+ free(failed_stop_offset); failed_stop_offset = NULL; -+ free(failed_start_offset); failed_start_offset = NULL; -+ -+ free(max_generation_from); max_generation_from = NULL; -+ free_xml(max_generation_xml); max_generation_xml = NULL; -+ -+ mainloop_destroy_signal(SIGUSR1); -+ mainloop_destroy_signal(SIGTERM); -+ mainloop_destroy_signal(SIGTRAP); -+ mainloop_destroy_signal(SIGCHLD); -+ -+ if (mloop) { -+ int lpc = 0; -+ GMainContext *ctx = g_main_loop_get_context(crmd_mainloop); -+ -+ /* Don't re-enter this block */ -+ crmd_mainloop = NULL; -+ -+ crm_trace("Draining mainloop %d %d", g_main_loop_is_running(mloop), g_main_context_pending(ctx)); -+ -+ while(g_main_context_pending(ctx) && lpc < 10) { -+ lpc++; -+ crm_trace("Iteration %d", lpc); -+ g_main_context_dispatch(ctx); -+ } - -- return crm_exit(rc); -+ crm_trace("Closing mainloop %d %d", g_main_loop_is_running(mloop), g_main_context_pending(ctx)); -+ g_main_loop_quit(mloop); -+ -+ /* Won't do anything yet, since we're inside it now */ -+ g_main_loop_unref(mloop); -+ -+ crm_trace("Done %d", rc); -+ } -+ -+ /* Graceful */ -+ return rc; - } - - /* A_EXIT_0, A_EXIT_1 */ -@@ -293,31 +416,22 @@ do_exit(long long action, - enum crmd_fsa_cause cause, - enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) - { -- int exit_code = 0; -+ int exit_code = pcmk_ok; - int log_level = LOG_INFO; - const char *exit_type = "gracefully"; - - if (action & A_EXIT_1) { -- exit_code = 1; -+ /* exit_code = pcmk_err_generic; */ - log_level = LOG_ERR; - exit_type = "forcefully"; -+ exit_code = pcmk_err_generic; - } - - verify_stopped(cur_state, LOG_ERR); - do_crm_log(log_level, "Performing %s - %s exiting the CRMd", - fsa_action2string(action), exit_type); - -- if (is_set(fsa_input_register, R_IN_RECOVERY)) { -- crm_err("Could not recover from internal error"); -- exit_code = 2; -- } -- if (is_set(fsa_input_register, R_STAYDOWN)) { -- crm_warn("Inhibiting respawn by Heartbeat"); -- exit_code = 100; -- } -- - crm_info("[%s] stopped (%d)", crm_system_name, exit_code); -- delete_fsa_input(msg_data); - crmd_exit(exit_code); - } - -@@ -335,6 +449,7 @@ do_startup(long long action, - - fsa_source = mainloop_add_trigger(G_PRIORITY_HIGH, crm_fsa_trigger, NULL); - config_read = mainloop_add_trigger(G_PRIORITY_HIGH, crm_read_options, NULL); -+ transition_trigger = mainloop_add_trigger(G_PRIORITY_LOW, te_graph_trigger, NULL); - - crm_debug("Creating CIB and LRM objects"); - fsa_cib_conn = cib_new(); -@@ -539,7 +654,7 @@ crmd_ipc_dispatch(qb_ipcs_connection_t * c, void *data, size_t size) - crm_log_xml_trace(msg, "CRMd[inbound]"); - - crm_xml_add(msg, F_CRM_SYS_FROM, client->id); -- if (crmd_authorize_message(msg, client)) { -+ if (crmd_authorize_message(msg, client, NULL)) { - route_message(C_IPC_MESSAGE, msg); - } - -@@ -603,7 +718,8 @@ do_stop(long long action, - stop_subsystem(pe_subsystem, FALSE); - } - -- mainloop_del_ipc_server(ipcs); -+ crm_trace("Closing IPC server"); -+ mainloop_del_ipc_server(ipcs); ipcs = NULL; - register_fsa_input(C_FSA_INTERNAL, I_TERMINATE, NULL); - } - -@@ -671,7 +787,7 @@ do_started(long long action, - } - - crm_debug("Init server comms"); -- ipcs = mainloop_add_ipc_server(CRM_SYSTEM_CRMD, QB_IPC_NATIVE, &crmd_callbacks); -+ ipcs = crmd_ipc_server_init(&crmd_callbacks); - if (ipcs == NULL) { - crm_err("Failed to create IPC server: shutting down and inhibiting respawn"); - register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); -@@ -697,7 +813,7 @@ do_recover(long long action, - enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) - { - set_bit(fsa_input_register, R_IN_RECOVERY); -- crm_err("Action %s (%.16llx) not supported", fsa_action2string(action), action); -+ crm_warn("Fast-tracking shutdown in response to errors"); - - register_fsa_input(C_FSA_INTERNAL, I_TERMINATE, NULL); - } -@@ -861,6 +977,6 @@ crm_shutdown(int nsig) - - } else { - crm_info("exit from shutdown"); -- crmd_exit(EX_OK); -+ crmd_exit(pcmk_ok); - } - } -diff --git a/crmd/corosync.c b/crmd/corosync.c -index 989d25f..6385780 100644 ---- a/crmd/corosync.c -+++ b/crmd/corosync.c -@@ -140,7 +140,7 @@ crmd_quorum_destroy(gpointer user_data) - { - if (is_not_set(fsa_input_register, R_HA_DISCONNECTED)) { - crm_err("connection terminated"); -- crmd_exit(1); -+ crmd_exit(ENOLINK); - - } else { - crm_info("connection closed"); -@@ -152,7 +152,7 @@ crmd_ais_destroy(gpointer user_data) - { - if (is_not_set(fsa_input_register, R_HA_DISCONNECTED)) { - crm_err("connection terminated"); -- crmd_exit(1); -+ crmd_exit(ENOLINK); - - } else { - crm_info("connection closed"); -@@ -165,7 +165,7 @@ crmd_cman_destroy(gpointer user_data) - { - if (is_not_set(fsa_input_register, R_HA_DISCONNECTED)) { - crm_err("connection terminated"); -- crmd_exit(1); -+ crmd_exit(ENOLINK); - - } else { - crm_info("connection closed"); -diff --git a/crmd/crmd_lrm.h b/crmd/crmd_lrm.h -index 0fddb21..d0ca58c 100644 ---- a/crmd/crmd_lrm.h -+++ b/crmd/crmd_lrm.h -@@ -1,27 +1,28 @@ --/* -+/* - * Copyright (C) 2004 Andrew Beekhof -- * -+ * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. -- * -+ * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. -- * -+ * - * You should have received a copy of the GNU General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ - - extern gboolean verify_stopped(enum crmd_fsa_state cur_state, int log_level); --extern void lrm_clear_last_failure(const char *rsc_id); -+extern void lrm_clear_last_failure(const char *rsc_id, const char *node_name); - void lrm_op_callback(lrmd_event_data_t * op); - - typedef struct resource_history_s { - char *id; -+ uint32_t last_callid; - lrmd_rsc_info_t rsc; - lrmd_event_data_t *last; - lrmd_event_data_t *failed; -@@ -46,7 +47,9 @@ struct recurring_op_s { - - typedef struct lrm_state_s { - const char *node_name; -+ /* reserved for lrm_state.c usage only */ - void *conn; -+ /* reserved for remote_lrmd_ra.c usage only */ - void *remote_ra_data; - - GHashTable *resource_history; -@@ -64,7 +67,7 @@ struct pending_deletion_op_s { - xmlNode *do_lrm_query_internal(lrm_state_t * lrm_state, gboolean is_replace); - - /*! -- * \brief Clear all state information from a single state entry. -+ * \brief Clear all state information from a single state entry. - * \note This does not close the lrmd connection - */ - void lrm_state_reset_tables(lrm_state_t * lrm_state); -diff --git a/crmd/crmd_messages.h b/crmd/crmd_messages.h -index 50a56cd..6688e92 100644 ---- a/crmd/crmd_messages.h -+++ b/crmd/crmd_messages.h -@@ -100,7 +100,10 @@ extern gboolean add_pending_outgoing_reply(const char *originating_node_name, - const char *crm_msg_reference, - const char *sys_to, const char *sys_from); - --extern gboolean crmd_authorize_message(xmlNode * client_msg, crm_client_t * curr_client); -+gboolean crmd_is_proxy_session(const char *session); -+void crmd_proxy_send(const char *session, xmlNode *msg); -+ -+extern gboolean crmd_authorize_message(xmlNode * client_msg, crm_client_t * curr_client, const char *proxy_session); - - extern gboolean send_request(xmlNode * msg, char **msg_reference); - -diff --git a/crmd/crmd_utils.h b/crmd/crmd_utils.h -index a26b114..d0ef040 100644 ---- a/crmd/crmd_utils.h -+++ b/crmd/crmd_utils.h -@@ -78,6 +78,7 @@ xmlNode *create_node_state(const char *uname, const char *in_cluster, - const char *exp_state, gboolean clear_shutdown, const char *src); - - int crmd_exit(int rc); -+int crmd_fast_exit(int rc); - gboolean stop_subsystem(struct crm_subsystem_s *centry, gboolean force_quit); - gboolean start_subsystem(struct crm_subsystem_s *centry); - -@@ -85,18 +86,19 @@ void fsa_dump_actions(long long action, const char *text); - void fsa_dump_inputs(int log_level, const char *text, long long input_register); - - gboolean update_dc(xmlNode * msg); --void erase_node_from_join(const char *node); -+void crm_update_peer_join(const char *source, crm_node_t * node, enum crm_join_phase phase); - xmlNode *do_update_node_cib(crm_node_t * node, int flags, xmlNode * parent, const char *source); - void populate_cib_nodes(enum node_update_flags flags, const char *source); - void crm_update_quorum(gboolean quorum, gboolean force_update); - void erase_status_tag(const char *uname, const char *tag, int options); --void update_attrd(const char *host, const char *name, const char *value, const char *user_name); -+void update_attrd(const char *host, const char *name, const char *value, const char *user_name, gboolean is_remote_node); - - int crmd_join_phase_count(enum crm_join_phase phase); - void crmd_join_phase_log(int level); - - const char *get_timer_desc(fsa_timer_t * timer); - gboolean too_many_st_failures(void); -+void reset_st_fail_count(const char * target); - - # define fsa_register_cib_callback(id, flag, data, fn) do { \ - fsa_cib_conn->cmds->register_callback( \ -diff --git a/crmd/election.c b/crmd/election.c -index daa0f66..1946858 100644 ---- a/crmd/election.c -+++ b/crmd/election.c -@@ -180,22 +180,6 @@ struct election_data_s { - unsigned int winning_bornon; - }; - --static void --log_member_name(gpointer key, gpointer value, gpointer user_data) --{ -- const crm_node_t *node = value; -- -- if (crm_is_peer_active(node)) { -- crm_err("%s: %s proc=%.32x", (char *)user_data, (char *)key, node->processes); -- } --} -- --static void --log_node(gpointer key, gpointer value, gpointer user_data) --{ -- crm_err("%s: %s", (char *)user_data, (char *)key); --} -- - void - do_election_check(long long action, - enum crmd_fsa_cause cause, -@@ -220,15 +204,21 @@ do_election_check(long long action, - crm_timer_stop(election_timeout); - register_fsa_input(C_FSA_INTERNAL, I_ELECTION_DC, NULL); - if (voted_size > num_members) { -- char *data = NULL; -- -- data = strdup("member"); -- g_hash_table_foreach(crm_peer_cache, log_member_name, data); -- free(data); -+ GHashTableIter gIter; -+ const crm_node_t *node; -+ char *key = NULL; -+ -+ g_hash_table_iter_init(&gIter, crm_peer_cache); -+ while (g_hash_table_iter_next(&gIter, NULL, (gpointer *) & node)) { -+ if (crm_is_peer_active(node)) { -+ crm_err("member: %s proc=%.32x", node->uname, node->processes); -+ } -+ } - -- data = strdup("voted"); -- g_hash_table_foreach(voted, log_node, data); -- free(data); -+ g_hash_table_iter_init(&gIter, voted); -+ while (g_hash_table_iter_next(&gIter, (gpointer *) & key, NULL)) { -+ crm_err("voted: %s", key); -+ } - - } - crm_debug("Destroying voted hash"); -@@ -280,7 +270,9 @@ do_election_count_vote(long long action, - CRM_CHECK(vote->msg != NULL, crm_err("Bogus data from %s", msg_data->origin); return); - - if(crm_peer_cache == NULL) { -- CRM_LOG_ASSERT(is_set(fsa_input_register, R_SHUTDOWN)); -+ if(is_not_set(fsa_input_register, R_SHUTDOWN)) { -+ crm_err("Internal error, no peer cache"); -+ } - return; - } - -@@ -492,6 +484,7 @@ feature_update_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, vo - if (rc != pcmk_ok) { - fsa_data_t *msg_data = NULL; - -+ crm_notice("Update failed: %s (%d)", pcmk_strerror(rc), rc); - register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); - } - } -@@ -513,7 +506,8 @@ do_dc_takeover(long long action, - - for (gIter = stonith_cleanup_list; gIter != NULL; gIter = gIter->next) { - char *target = gIter->data; -- const char *uuid = get_uuid(target); -+ crm_node_t *target_node = crm_get_peer(0, target); -+ const char *uuid = crm_peer_uuid(target_node); - - crm_notice("Marking %s, target of a previous stonith action, as clean", target); - send_stonith_update(NULL, target, uuid); -diff --git a/crmd/fsa.c b/crmd/fsa.c -index 92490b6..b5c442b 100644 ---- a/crmd/fsa.c -+++ b/crmd/fsa.c -@@ -48,14 +48,14 @@ char *fsa_our_uname = NULL; - ll_cluster_t *fsa_cluster_conn; - #endif - --fsa_timer_t *wait_timer = NULL; --fsa_timer_t *recheck_timer = NULL; --fsa_timer_t *election_trigger = NULL; --fsa_timer_t *election_timeout = NULL; --fsa_timer_t *transition_timer = NULL; -+fsa_timer_t *wait_timer = NULL; /* How long to wait before retrying to connect to the cib/lrmd/ccm */ -+fsa_timer_t *recheck_timer = NULL; /* Periodically re-run the PE to account for time based rules/preferences */ -+fsa_timer_t *election_trigger = NULL; /* How long to wait at startup, or after an election, for the DC to make contact */ -+fsa_timer_t *election_timeout = NULL; /* How long to declare an election over - even if not everyone voted */ -+fsa_timer_t *transition_timer = NULL; /* How long to delay the start of a new transition with the expectation something else might happen too */ - fsa_timer_t *integration_timer = NULL; - fsa_timer_t *finalization_timer = NULL; --fsa_timer_t *shutdown_escalation_timer = NULL; -+fsa_timer_t *shutdown_escalation_timer = NULL; /* How long to wait for the DC to stop all resources and give us the all-clear to shut down */ - - volatile gboolean do_fsa_stall = FALSE; - volatile long long fsa_input_register = 0; -@@ -362,6 +362,7 @@ s_crmd_fsa_actions(fsa_data_t * fsa_data) - - /* - * Medium priority actions -+ * - Membership - */ - } else if (fsa_actions & A_DC_TAKEOVER) { - do_fsa_action(fsa_data, A_DC_TAKEOVER, do_dc_takeover); -@@ -373,10 +374,6 @@ s_crmd_fsa_actions(fsa_data_t * fsa_data) - do_fsa_action(fsa_data, A_ELECTION_CHECK, do_election_check); - } else if (fsa_actions & A_ELECTION_START) { - do_fsa_action(fsa_data, A_ELECTION_START, do_election_vote); -- } else if (fsa_actions & A_TE_HALT) { -- do_fsa_action(fsa_data, A_TE_HALT, do_te_invoke); -- } else if (fsa_actions & A_TE_CANCEL) { -- do_fsa_action(fsa_data, A_TE_CANCEL, do_te_invoke); - } else if (fsa_actions & A_DC_JOIN_OFFER_ALL) { - do_fsa_action(fsa_data, A_DC_JOIN_OFFER_ALL, do_dc_join_offer_all); - } else if (fsa_actions & A_DC_JOIN_OFFER_ONE) { -@@ -385,24 +382,28 @@ s_crmd_fsa_actions(fsa_data_t * fsa_data) - do_fsa_action(fsa_data, A_DC_JOIN_PROCESS_REQ, do_dc_join_filter_offer); - } else if (fsa_actions & A_DC_JOIN_PROCESS_ACK) { - do_fsa_action(fsa_data, A_DC_JOIN_PROCESS_ACK, do_dc_join_ack); -+ } else if (fsa_actions & A_DC_JOIN_FINALIZE) { -+ do_fsa_action(fsa_data, A_DC_JOIN_FINALIZE, do_dc_join_finalize); -+ } else if (fsa_actions & A_CL_JOIN_ANNOUNCE) { -+ do_fsa_action(fsa_data, A_CL_JOIN_ANNOUNCE, do_cl_join_announce); - - /* - * Low(er) priority actions - * Make sure the CIB is always updated before invoking the - * PE, and the PE before the TE - */ -- } else if (fsa_actions & A_DC_JOIN_FINALIZE) { -- do_fsa_action(fsa_data, A_DC_JOIN_FINALIZE, do_dc_join_finalize); -+ } else if (fsa_actions & A_TE_HALT) { -+ do_fsa_action(fsa_data, A_TE_HALT, do_te_invoke); -+ } else if (fsa_actions & A_TE_CANCEL) { -+ do_fsa_action(fsa_data, A_TE_CANCEL, do_te_invoke); - } else if (fsa_actions & A_LRM_INVOKE) { - do_fsa_action(fsa_data, A_LRM_INVOKE, do_lrm_invoke); - } else if (fsa_actions & A_PE_INVOKE) { - do_fsa_action(fsa_data, A_PE_INVOKE, do_pe_invoke); - } else if (fsa_actions & A_TE_INVOKE) { - do_fsa_action(fsa_data, A_TE_INVOKE, do_te_invoke); -- } else if (fsa_actions & A_CL_JOIN_ANNOUNCE) { -- do_fsa_action(fsa_data, A_CL_JOIN_ANNOUNCE, do_cl_join_announce); - -- /* sub-system stop */ -+ /* Shutdown actions */ - } else if (fsa_actions & A_DC_RELEASED) { - do_fsa_action(fsa_data, A_DC_RELEASED, do_dc_release); - } else if (fsa_actions & A_PE_STOP) { -@@ -489,6 +490,8 @@ do_state_transition(long long actions, - level = LOG_NOTICE; - } else if (cur_state == S_ELECTION) { - level = LOG_NOTICE; -+ } else if (cur_state == S_STARTING) { -+ level = LOG_NOTICE; - } else if (next_state == S_RECOVERY) { - level = LOG_WARNING; - } -diff --git a/crmd/heartbeat.c b/crmd/heartbeat.c -index 568e529..1d63190 100644 ---- a/crmd/heartbeat.c -+++ b/crmd/heartbeat.c -@@ -424,13 +424,15 @@ crmd_client_status_callback(const char *node, const char *client, const char *st - crm_notice("Status update: Client %s/%s now has status [%s] (DC=%s)", - node, client, status, AM_I_DC ? "true" : "false"); - -+ peer = crm_get_peer(0, node); -+ - if (safe_str_eq(status, ONLINESTATUS)) { - /* remove the cached value in case it changed */ - crm_trace("Uncaching UUID for %s", node); -- unget_uuid(node); -+ free(peer->uuid); -+ peer->uuid = NULL; - } - -- peer = crm_get_peer(0, node); - crm_update_peer_proc(__FUNCTION__, peer, crm_proc_crmd, status); - - if (AM_I_DC) { -diff --git a/crmd/join_client.c b/crmd/join_client.c -index aba2d56..70b3246 100644 ---- a/crmd/join_client.c -+++ b/crmd/join_client.c -@@ -263,8 +263,8 @@ do_cl_join_finalize_respond(long long action, - - /* Just in case attrd was still around too */ - if (is_not_set(fsa_input_register, R_SHUTDOWN)) { -- update_attrd(fsa_our_uname, "terminate", NULL, NULL); -- update_attrd(fsa_our_uname, XML_CIB_ATTR_SHUTDOWN, NULL, NULL); -+ update_attrd(fsa_our_uname, "terminate", NULL, NULL, FALSE); -+ update_attrd(fsa_our_uname, XML_CIB_ATTR_SHUTDOWN, NULL, NULL, FALSE); - } - } - -@@ -273,7 +273,7 @@ do_cl_join_finalize_respond(long long action, - - if (AM_I_DC == FALSE) { - register_fsa_input_adv(cause, I_NOT_DC, NULL, A_NOTHING, TRUE, __FUNCTION__); -- update_attrd(NULL, NULL, NULL, NULL); -+ update_attrd(NULL, NULL, NULL, NULL, FALSE); - } - - free_xml(tmp1); -diff --git a/crmd/join_dc.c b/crmd/join_dc.c -index 473e323..b45fff2 100644 ---- a/crmd/join_dc.c -+++ b/crmd/join_dc.c -@@ -40,18 +40,49 @@ static int current_join_id = 0; - unsigned long long saved_ccm_membership_id = 0; - - void -+crm_update_peer_join(const char *source, crm_node_t * node, enum crm_join_phase phase) -+{ -+ enum crm_join_phase last = 0; -+ -+ if(node == NULL) { -+ crm_err("%s: Could not set join-%u to %d for NULL", source, current_join_id, phase); -+ return; -+ } -+ -+ last = node->join; -+ -+ if(phase == last) { -+ crm_trace("%s: Node %s[%u] - join-%u phase still %u", -+ source, node->uname, node->id, current_join_id, last); -+ -+ } else if (phase <= crm_join_none) { -+ node->join = phase; -+ crm_info("%s: Node %s[%u] - join-%u phase %u -> %u", -+ source, node->uname, node->id, current_join_id, last, phase); -+ -+ } else if(phase == last + 1) { -+ node->join = phase; -+ crm_info("%s: Node %s[%u] - join-%u phase %u -> %u", -+ source, node->uname, node->id, current_join_id, last, phase); -+ } else { -+ crm_err("%s: Node %s[%u] - join-%u phase cannot transition from %u to %u", -+ source, node->uname, node->id, current_join_id, last, phase); -+ -+ } -+} -+ -+void - initialize_join(gboolean before) - { - GHashTableIter iter; - crm_node_t *peer = NULL; -- char *key = NULL; - - /* clear out/reset a bunch of stuff */ - crm_debug("join-%d: Initializing join data (flag=%s)", - current_join_id, before ? "true" : "false"); - - g_hash_table_iter_init(&iter, crm_peer_cache); -- while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) &peer)) { -+ while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &peer)) { - crm_update_peer_join(__FUNCTION__, peer, crm_join_none); - } - -@@ -69,16 +100,6 @@ initialize_join(gboolean before) - } - } - --void --erase_node_from_join(const char *uname) --{ -- -- if (uname != NULL) { -- crm_node_t *peer = crm_get_peer(0, uname); -- crm_update_peer_join(__FUNCTION__, peer, crm_join_none); -- } --} -- - static void - join_make_offer(gpointer key, gpointer value, gpointer user_data) - { -@@ -107,7 +128,7 @@ join_make_offer(gpointer key, gpointer value, gpointer user_data) - return; - } - -- erase_node_from_join(join_to); -+ crm_update_peer_join(__FUNCTION__, (crm_node_t*)member, crm_join_none); - - if (crm_is_peer_active(member)) { - crm_node_t *peer = crm_get_peer(0, join_to); -@@ -206,7 +227,7 @@ do_dc_join_offer_one(long long action, - crm_info("join-%d: Processing %s request from %s in state %s", - current_join_id, op, join_to, fsa_state2string(cur_state)); - -- erase_node_from_join(join_to); -+ crm_update_peer_join(__FUNCTION__, member, crm_join_none); - join_make_offer(NULL, member, NULL); - - /* always offer to the DC (ourselves) -@@ -516,7 +537,7 @@ finalize_join_for(gpointer key, gpointer value, gpointer user_data) - crm_trace("Creating node entry for %s", join_to); - - tmp1 = create_xml_node(NULL, XML_CIB_TAG_NODE); -- set_uuid(tmp1, XML_ATTR_UUID, join_to); -+ set_uuid(tmp1, XML_ATTR_UUID, join_node); - crm_xml_add(tmp1, XML_ATTR_UNAME, join_to); - - fsa_cib_anon_update(XML_CIB_TAG_NODES, tmp1, -@@ -617,19 +638,18 @@ do_dc_join_final(long long action, - enum crmd_fsa_input current_input, fsa_data_t * msg_data) - { - crm_debug("Ensuring DC, quorum and node attributes are up-to-date"); -- update_attrd(NULL, NULL, NULL, NULL); -+ update_attrd(NULL, NULL, NULL, NULL, FALSE); - crm_update_quorum(crm_have_quorum, TRUE); - } - - int crmd_join_phase_count(enum crm_join_phase phase) - { - int count = 0; -- const char *key; - crm_node_t *peer; - GHashTableIter iter; - - g_hash_table_iter_init(&iter, crm_peer_cache); -- while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) &peer)) { -+ while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &peer)) { - if(peer->join == phase) { - count++; - } -@@ -639,12 +659,11 @@ int crmd_join_phase_count(enum crm_join_phase phase) - - void crmd_join_phase_log(int level) - { -- const char *key; - crm_node_t *peer; - GHashTableIter iter; - - g_hash_table_iter_init(&iter, crm_peer_cache); -- while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) &peer)) { -+ while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &peer)) { - const char *state = "unknown"; - switch(peer->join) { - case crm_join_nack: -diff --git a/crmd/lrm.c b/crmd/lrm.c -index b2e1a6b..31f00d7 100644 ---- a/crmd/lrm.c -+++ b/crmd/lrm.c -@@ -141,6 +141,7 @@ update_history_cache(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, lrmd_event_ - return; - } - -+ entry->last_callid = op->call_id; - target_rc = rsc_op_expected_rc(op); - if (op->op_status == PCMK_LRM_OP_CANCELLED) { - if (op->interval > 0) { -@@ -255,6 +256,9 @@ do_lrm_control(long long action, - - lrm_state_t *lrm_state = NULL; - -+ if(fsa_our_uname == NULL) { -+ return; /* Nothing to do */ -+ } - lrm_state = lrm_state_find_or_create(fsa_our_uname); - if (lrm_state == NULL) { - register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); -@@ -270,6 +274,7 @@ do_lrm_control(long long action, - } - - clear_bit(fsa_input_register, R_LRM_CONNECTED); -+ crm_info("Disconnecting from the LRM"); - lrm_state_disconnect(lrm_state); - lrm_state_reset_tables(lrm_state); - crm_notice("Disconnected from the LRM"); -@@ -300,7 +305,7 @@ do_lrm_control(long long action, - } - - set_bit(fsa_input_register, R_LRM_CONNECTED); -- crm_debug("LRM connection established"); -+ crm_info("LRM connection established"); - } - - if (action & ~(A_LRM_CONNECT | A_LRM_DISCONNECT)) { -@@ -417,6 +422,7 @@ lrm_state_verify_stopped(lrm_state_t * lrm_state, enum crmd_fsa_state cur_state, - static char * - get_rsc_metadata(const char *type, const char *class, const char *provider) - { -+ int rc = 0; - char *metadata = NULL; - - /* Always use a local connection for this operation */ -@@ -431,7 +437,7 @@ get_rsc_metadata(const char *type, const char *class, const char *provider) - } - - crm_trace("Retreiving metadata for %s::%s:%s", type, class, provider); -- lrm_state_get_metadata(lrm_state, class, provider, type, &metadata, 0); -+ rc = lrm_state_get_metadata(lrm_state, class, provider, type, &metadata, 0); - - if (metadata) { - /* copy the metadata because the LRM likes using -@@ -443,7 +449,7 @@ get_rsc_metadata(const char *type, const char *class, const char *provider) - metadata = m_copy; - - } else { -- crm_warn("No metadata found for %s::%s:%s", type, class, provider); -+ crm_warn("No metadata found for %s::%s:%s: %s (%d)", type, class, provider, pcmk_strerror(rc), rc); - } - - return metadata; -@@ -496,11 +502,11 @@ get_rsc_restart_list(lrmd_rsc_info_t * rsc, lrmd_event_data_t * op) - } - - len = strlen(rsc->type) + strlen(rsc->class) + strlen(provider) + 4; -- /* coverity[returned_null] Ignore */ - key = malloc(len); -- snprintf(key, len, "%s::%s:%s", rsc->type, rsc->class, provider); -- -- reload = g_hash_table_lookup(reload_hash, key); -+ if(key) { -+ snprintf(key, len, "%s::%s:%s", rsc->type, rsc->class, provider); -+ reload = g_hash_table_lookup(reload_hash, key); -+ } - - if (reload && ((now - 9) > reload->last_query) - && safe_str_eq(op->op_type, RSC_START)) { -@@ -518,6 +524,10 @@ get_rsc_restart_list(lrmd_rsc_info_t * rsc, lrmd_event_data_t * op) - key = NULL; - reload->metadata = get_rsc_metadata(rsc->type, rsc->class, provider); - -+ if(reload->metadata == NULL) { -+ goto cleanup; -+ } -+ - metadata = string2xml(reload->metadata); - if (metadata == NULL) { - crm_err("Metadata for %s::%s:%s is not valid XML", -@@ -972,7 +982,7 @@ delete_op_entry(lrm_state_t * lrm_state, lrmd_event_data_t * op, const char *rsc - } - - void --lrm_clear_last_failure(const char *rsc_id) -+lrm_clear_last_failure(const char *rsc_id, const char *node_name) - { - char *attr = NULL; - GHashTableIter iter; -@@ -982,10 +992,17 @@ lrm_clear_last_failure(const char *rsc_id) - - attr = generate_op_key(rsc_id, "last_failure", 0); - -- /* This clears last failure for every lrm state that has this rsc. */ -+ /* This clears last failure for every lrm state that has this rsc.*/ - for (state_entry = lrm_state_list; state_entry != NULL; state_entry = state_entry->next) { - lrm_state_t *lrm_state = state_entry->data; - -+ if (node_name != NULL) { -+ if (strcmp(node_name, lrm_state->node_name) != 0) { -+ /* filter by node_name if node_name is present */ -+ continue; -+ } -+ } -+ - delete_op_entry(lrm_state, NULL, rsc_id, attr, 0); - - if (!lrm_state->resource_history) { -@@ -1001,7 +1018,7 @@ lrm_clear_last_failure(const char *rsc_id) - } - } - free(attr); -- -+ g_list_free(lrm_state_list); - } - - static gboolean -@@ -1192,17 +1209,24 @@ do_lrm_invoke(long long action, - const char *operation = NULL; - ha_msg_input_t *input = fsa_typed_data(fsa_dt_ha_msg); - const char *user_name = NULL; -- const char *remote_node = NULL; -+ const char *target_node = NULL; -+ gboolean is_remote_node = FALSE; - - if (input->xml != NULL) { - /* Remote node operations are routed here to their remote connections */ -- remote_node = crm_element_value(input->xml, XML_LRM_ATTR_TARGET); -+ target_node = crm_element_value(input->xml, XML_LRM_ATTR_TARGET); -+ } -+ if (target_node == NULL) { -+ target_node = fsa_our_uname; -+ } else if (safe_str_neq(target_node, fsa_our_uname)) { -+ is_remote_node = TRUE; - } -- lrm_state = lrm_state_find(remote_node ? remote_node : fsa_our_uname); - -- if (lrm_state == NULL && remote_node) { -+ lrm_state = lrm_state_find(target_node); -+ -+ if (lrm_state == NULL && is_remote_node) { - crm_err("no lrmd connection for remote node %s found on cluster node %s. Can not process request.", -- remote_node, fsa_our_uname); -+ target_node, fsa_our_uname); - return; - } - -@@ -1245,14 +1269,16 @@ do_lrm_invoke(long long action, - * we want to fail. We then pass that event to the lrmd client callback - * so it will be processed as if it actually came from the lrmd. */ - op = construct_op(lrm_state, input->xml, ID(xml_rsc), "asyncmon"); -+ CRM_ASSERT(op != NULL); -+ - free((char *)op->user_data); - op->user_data = NULL; - entry = g_hash_table_lookup(lrm_state->resource_history, op->rsc_id); - /* Make sure the call id is greater than the last successful operation, - * otherwise the failure will not result in a possible recovery of the resource - * as it could appear the failure occurred before the successful start */ -- if (entry && entry->last) { -- op->call_id = entry->last->call_id + 1; -+ if (entry) { -+ op->call_id = entry->last_callid + 1; - if (op->call_id < 0) { - op->call_id = 1; - } -@@ -1260,7 +1286,8 @@ do_lrm_invoke(long long action, - op->interval = 0; - op->op_status = PCMK_LRM_OP_DONE; - op->rc = PCMK_EXECRA_UNKNOWN_ERROR; -- CRM_ASSERT(op != NULL); -+ op->t_run = time(NULL); -+ op->t_rcchange = op->t_run; - - #if ENABLE_ACL - if (user_name && is_privileged(user_name) == FALSE) { -@@ -1297,6 +1324,20 @@ do_lrm_invoke(long long action, - - fsa_cib_update(XML_CIB_TAG_STATUS, fragment, cib_quorum_override, rc, user_name); - crm_info("Forced a local LRM refresh: call=%d", rc); -+ -+ if(strcmp(CRM_SYSTEM_CRMD, from_sys) != 0) { -+ xmlNode *reply = create_request( -+ CRM_OP_INVOKE_LRM, fragment, -+ from_host, from_sys, CRM_SYSTEM_LRMD, fsa_our_uuid); -+ -+ crm_debug("ACK'ing refresh from %s (%s)", from_sys, from_host); -+ -+ if (relay_message(reply, TRUE) == FALSE) { -+ crm_log_xml_err(reply, "Unable to route reply"); -+ } -+ free_xml(reply); -+ } -+ - free_xml(fragment); - - } else if (safe_str_eq(crm_op, CRM_OP_LRM_QUERY)) { -@@ -1311,7 +1352,7 @@ do_lrm_invoke(long long action, - free_xml(data); - - } else if (safe_str_eq(operation, CRM_OP_PROBED)) { -- update_attrd(NULL, CRM_OP_PROBED, XML_BOOLEAN_TRUE, user_name); -+ update_attrd(lrm_state->node_name, CRM_OP_PROBED, XML_BOOLEAN_TRUE, user_name, is_remote_node); - - } else if (safe_str_eq(crm_op, CRM_OP_REPROBE)) { - GHashTableIter gIter; -@@ -1331,7 +1372,20 @@ do_lrm_invoke(long long action, - /* And finally, _delete_ the value in attrd - * Setting it to FALSE results in the PE sending us back here again - */ -- update_attrd(NULL, CRM_OP_PROBED, NULL, user_name); -+ update_attrd(lrm_state->node_name, CRM_OP_PROBED, NULL, user_name, is_remote_node); -+ -+ if(strcmp(CRM_SYSTEM_CRMD, from_sys) != 0) { -+ xmlNode *reply = create_request( -+ CRM_OP_INVOKE_LRM, NULL, -+ from_host, from_sys, CRM_SYSTEM_LRMD, fsa_our_uuid); -+ -+ crm_debug("ACK'ing re-probe from %s (%s)", from_sys, from_host); -+ -+ if (relay_message(reply, TRUE) == FALSE) { -+ crm_log_xml_err(reply, "Unable to route reply"); -+ } -+ free_xml(reply); -+ } - - } else if (operation != NULL) { - lrmd_rsc_info_t *rsc = NULL; -@@ -1428,17 +1482,15 @@ do_lrm_invoke(long long action, - free(op_key); - lrmd_free_event(op); - -- } else if (safe_str_eq(operation, CRMD_ACTION_DELETE)) { -- int cib_rc = pcmk_ok; -- -- CRM_ASSERT(rsc != NULL); -+ } else if (rsc != NULL && safe_str_eq(operation, CRMD_ACTION_DELETE)) { - -- cib_rc = delete_rsc_status(lrm_state, rsc->id, cib_dryrun | cib_sync_call, user_name); -+#if ENABLE_ACL -+ int cib_rc = delete_rsc_status(lrm_state, rsc->id, cib_dryrun | cib_sync_call, user_name); - if (cib_rc != pcmk_ok) { - lrmd_event_data_t *op = NULL; - - crm_err -- ("Attempt of deleting resource status '%s' from CIB for %s (user=%s) on %s failed: (rc=%d) %s", -+ ("Attempted deletion of resource status '%s' from CIB for %s (user=%s) on %s failed: (rc=%d) %s", - rsc->id, from_sys, user_name ? user_name : "unknown", from_host, cib_rc, - pcmk_strerror(cib_rc)); - -@@ -1454,7 +1506,7 @@ do_lrm_invoke(long long action, - lrmd_free_event(op); - return; - } -- -+#endif - delete_resource(lrm_state, rsc->id, rsc, NULL, from_sys, from_host, user_name, input); - - } else if (rsc != NULL) { -@@ -1637,6 +1689,7 @@ verify_stopped(enum crmd_fsa_state cur_state, int log_level) - } - - set_bit(fsa_input_register, R_SENT_RSC_STOP); -+ g_list_free(lrm_state_list); lrm_state_list = NULL; - return res; - } - -@@ -1710,7 +1763,7 @@ do_lrm_rsc_op(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, const char *operat - } - - /* now do the op */ -- crm_debug("Performing key=%s op=%s_%s_%d", transition, rsc->id, operation, op->interval); -+ crm_info("Performing key=%s op=%s_%s_%d", transition, rsc->id, operation, op->interval); - - if (fsa_state != S_NOT_DC && fsa_state != S_POLICY_ENGINE && fsa_state != S_TRANSITION_ENGINE) { - if (safe_str_neq(operation, "fail") -@@ -1839,13 +1892,21 @@ do_update_resource(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, lrmd_event_da - iter = create_xml_node(iter, XML_CIB_TAG_STATE); - - if (safe_str_eq(lrm_state->node_name, fsa_our_uname)) { -- set_uuid(iter, XML_ATTR_UUID, lrm_state->node_name); - uuid = fsa_our_uuid; -+ - } else { - /* remote nodes uuid and uname are equal */ -- crm_xml_add(iter, XML_ATTR_UUID, lrm_state->node_name); - uuid = lrm_state->node_name; -+ crm_xml_add(iter, XML_NODE_IS_REMOTE, "true"); - } -+ -+ CRM_LOG_ASSERT(uuid != NULL); -+ if(uuid == NULL) { -+ rc = -EINVAL; -+ goto done; -+ } -+ -+ crm_xml_add(iter, XML_ATTR_UUID, uuid); - crm_xml_add(iter, XML_ATTR_UNAME, lrm_state->node_name); - crm_xml_add(iter, XML_ATTR_ORIGIN, __FUNCTION__); - -@@ -1871,6 +1932,8 @@ do_update_resource(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, lrmd_event_da - goto cleanup; - } - -+ crm_log_xml_trace(update, __FUNCTION__); -+ - /* make it an asyncronous call and be done with it - * - * Best case: -@@ -1893,7 +1956,7 @@ do_update_resource(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, lrmd_event_da - if (rc > 0) { - last_resource_update = rc; - } -- -+ done: - /* the return code is a call number, not an error code */ - crm_trace("Sent resource state update message: %d for %s=%d on %s", rc, - op->op_type, op->interval, op->rsc_id); -diff --git a/crmd/lrm_state.c b/crmd/lrm_state.c -index 0c15926..2eec178 100644 ---- a/crmd/lrm_state.c -+++ b/crmd/lrm_state.c -@@ -18,6 +18,7 @@ - - #include - #include -+#include - - #include - #include -@@ -26,6 +27,20 @@ - #include - - GHashTable *lrm_state_table = NULL; -+GHashTable *proxy_table = NULL; -+int lrmd_internal_proxy_send(lrmd_t * lrmd, xmlNode *msg); -+void lrmd_internal_set_proxy_callback(lrmd_t * lrmd, void *userdata, void (*callback)(lrmd_t *lrmd, void *userdata, xmlNode *msg)); -+ -+typedef struct remote_proxy_s { -+ char *node_name; -+ char *session_id; -+ -+ gboolean is_local; -+ -+ crm_ipc_t *ipc; -+ mainloop_io_t *source; -+ -+} remote_proxy_t; - - static void - history_cache_destroy(gpointer data) -@@ -70,14 +85,16 @@ free_recurring_op(gpointer value) - lrm_state_t * - lrm_state_create(const char *node_name) - { -+ lrm_state_t *state = NULL; - -- lrm_state_t *state = calloc(1, sizeof(lrm_state_t)); -+ if (!node_name) { -+ crm_err("No node name given for lrm state object"); -+ return NULL; -+ } - -+ state = calloc(1, sizeof(lrm_state_t)); - if (!state) { - return NULL; -- } else if (!node_name) { -- crm_err("No node name given for lrm state object"); -- return NULL; - } - - state->node_name = strdup(node_name); -@@ -102,6 +119,19 @@ lrm_state_destroy(const char *node_name) - g_hash_table_remove(lrm_state_table, node_name); - } - -+static gboolean -+remote_proxy_remove_by_node(gpointer key, gpointer value, gpointer user_data) -+{ -+ remote_proxy_t *proxy = value; -+ const char *node_name = user_data; -+ -+ if (safe_str_eq(node_name, proxy->node_name)) { -+ return TRUE; -+ } -+ -+ return FALSE; -+} -+ - static void - internal_lrm_state_destroy(gpointer data) - { -@@ -111,16 +141,21 @@ internal_lrm_state_destroy(gpointer data) - return; - } - -+ crm_trace("Destroying proxy table with %d members", g_hash_table_size(proxy_table)); -+ g_hash_table_foreach_remove(proxy_table, remote_proxy_remove_by_node, (char *) lrm_state->node_name); - remote_ra_cleanup(lrm_state); - lrmd_api_delete(lrm_state->conn); - - if (lrm_state->resource_history) { -+ crm_trace("Destroying history op cache with %d members", g_hash_table_size(lrm_state->resource_history)); - g_hash_table_destroy(lrm_state->resource_history); - } - if (lrm_state->deletion_ops) { -+ crm_trace("Destroying deletion op cache with %d members", g_hash_table_size(lrm_state->deletion_ops)); - g_hash_table_destroy(lrm_state->deletion_ops); - } - if (lrm_state->pending_ops) { -+ crm_trace("Destroying pending op cache with %d members", g_hash_table_size(lrm_state->pending_ops)); - g_hash_table_destroy(lrm_state->pending_ops); - } - -@@ -143,6 +178,20 @@ lrm_state_reset_tables(lrm_state_t * lrm_state) - } - } - -+static void -+remote_proxy_free(gpointer data) -+{ -+ remote_proxy_t *proxy = data; -+ crm_debug("Signing out of the IPC Service"); -+ -+ if (proxy->source != NULL) { -+ mainloop_del_ipc_client(proxy->source); -+ } -+ -+ free(proxy->node_name); -+ free(proxy->session_id); -+} -+ - gboolean - lrm_state_init_local(void) - { -@@ -156,6 +205,13 @@ lrm_state_init_local(void) - return FALSE; - } - -+ proxy_table = -+ g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, remote_proxy_free); -+ if (!proxy_table) { -+ g_hash_table_destroy(lrm_state_table); -+ return FALSE; -+ } -+ - return TRUE; - } - -@@ -163,7 +219,12 @@ void - lrm_state_destroy_all(void) - { - if (lrm_state_table) { -- g_hash_table_destroy(lrm_state_table); -+ crm_trace("Destroying state table with %d members", g_hash_table_size(lrm_state_table)); -+ g_hash_table_destroy(lrm_state_table); lrm_state_table = NULL; -+ } -+ if(proxy_table) { -+ crm_trace("Destroying proxy table with %d members", g_hash_table_size(proxy_table)); -+ g_hash_table_destroy(proxy_table); proxy_table = NULL; - } - } - -@@ -246,6 +307,221 @@ lrm_state_ipc_connect(lrm_state_t * lrm_state) - return ret; - } - -+static void -+remote_proxy_notify_destroy(lrmd_t *lrmd, const char *session_id) -+{ -+ /* sending to the remote node that an ipc connection has been destroyed */ -+ xmlNode *msg = create_xml_node(NULL, T_LRMD_IPC_PROXY); -+ crm_xml_add(msg, F_LRMD_IPC_OP, "destroy"); -+ crm_xml_add(msg, F_LRMD_IPC_SESSION, session_id); -+ lrmd_internal_proxy_send(lrmd, msg); -+ free_xml(msg); -+} -+ -+static void -+remote_proxy_relay_event(lrmd_t *lrmd, const char *session_id, xmlNode *msg) -+{ -+ /* sending to the remote node an event msg. */ -+ xmlNode *event = create_xml_node(NULL, T_LRMD_IPC_PROXY); -+ crm_xml_add(event, F_LRMD_IPC_OP, "event"); -+ crm_xml_add(event, F_LRMD_IPC_SESSION, session_id); -+ add_message_xml(event, F_LRMD_IPC_MSG, msg); -+ lrmd_internal_proxy_send(lrmd, event); -+ free_xml(event); -+} -+ -+static void -+remote_proxy_relay_response(lrmd_t *lrmd, const char *session_id, xmlNode *msg, int msg_id) -+{ -+ /* sending to the remote node a response msg. */ -+ xmlNode *response = create_xml_node(NULL, T_LRMD_IPC_PROXY); -+ crm_xml_add(response, F_LRMD_IPC_OP, "response"); -+ crm_xml_add(response, F_LRMD_IPC_SESSION, session_id); -+ crm_xml_add_int(response, F_LRMD_IPC_MSG_ID, msg_id); -+ add_message_xml(response, F_LRMD_IPC_MSG, msg); -+ lrmd_internal_proxy_send(lrmd, response); -+ free_xml(response); -+} -+ -+static int -+remote_proxy_dispatch_internal(const char *buffer, ssize_t length, gpointer userdata) -+{ -+ xmlNode *xml = NULL; -+ remote_proxy_t *proxy = userdata; -+ lrm_state_t *lrm_state = lrm_state_find(proxy->node_name); -+ -+ if (lrm_state == NULL) { -+ return 0; -+ } -+ -+ xml = string2xml(buffer); -+ if (xml == NULL) { -+ crm_warn("Received a NULL msg from IPC service."); -+ return 1; -+ } -+ -+ remote_proxy_relay_event(lrm_state->conn, proxy->session_id, xml); -+ free_xml(xml); -+ return 1; -+} -+ -+static void -+remote_proxy_disconnected(void *userdata) -+{ -+ remote_proxy_t *proxy = userdata; -+ lrm_state_t *lrm_state = lrm_state_find(proxy->node_name); -+ -+ crm_trace("destroying %p", userdata); -+ -+ proxy->source = NULL; -+ proxy->ipc = NULL; -+ -+ if (lrm_state && lrm_state->conn) { -+ remote_proxy_notify_destroy(lrm_state->conn, proxy->session_id); -+ } -+ g_hash_table_remove(proxy_table, proxy->session_id); -+} -+ -+static remote_proxy_t * -+remote_proxy_new(const char *node_name, const char *session_id, const char *channel) -+{ -+ static struct ipc_client_callbacks proxy_callbacks = { -+ .dispatch = remote_proxy_dispatch_internal, -+ .destroy = remote_proxy_disconnected -+ }; -+ remote_proxy_t *proxy = calloc(1, sizeof(remote_proxy_t)); -+ -+ proxy->node_name = strdup(node_name); -+ proxy->session_id = strdup(session_id); -+ -+ if (safe_str_eq(channel, CRM_SYSTEM_CRMD)) { -+ proxy->is_local = TRUE; -+ } else { -+ proxy->source = mainloop_add_ipc_client(channel, G_PRIORITY_LOW, 512 * 1024 /* 512k */ , proxy, &proxy_callbacks); -+ proxy->ipc = mainloop_get_ipc_client(proxy->source); -+ -+ if (proxy->source == NULL) { -+ remote_proxy_free(proxy); -+ return NULL; -+ } -+ } -+ -+ g_hash_table_insert(proxy_table, proxy->session_id, proxy); -+ -+ return proxy; -+} -+ -+gboolean -+crmd_is_proxy_session(const char *session) -+{ -+ return g_hash_table_lookup(proxy_table, session) ? TRUE : FALSE; -+} -+ -+void -+crmd_proxy_send(const char *session, xmlNode *msg) -+{ -+ remote_proxy_t *proxy = g_hash_table_lookup(proxy_table, session); -+ lrm_state_t *lrm_state = NULL; -+ -+ if (!proxy) { -+ return; -+ } -+ lrm_state = lrm_state_find(proxy->node_name); -+ if (lrm_state) { -+ remote_proxy_relay_event(lrm_state->conn, session, msg); -+ } -+} -+ -+static void -+crmd_proxy_dispatch(const char *user, -+ const char *session, -+ xmlNode *msg) -+{ -+ -+#if ENABLE_ACL -+ determine_request_user(user, msg, F_CRM_USER); -+#endif -+ crm_log_xml_trace(msg, "CRMd-PROXY[inbound]"); -+ -+ crm_xml_add(msg, F_CRM_SYS_FROM, session); -+ if (crmd_authorize_message(msg, NULL, session)) { -+ route_message(C_IPC_MESSAGE, msg); -+ } -+ -+ trigger_fsa(fsa_source); -+} -+ -+static void -+remote_proxy_cb(lrmd_t *lrmd, void *userdata, xmlNode *msg) -+{ -+ lrm_state_t *lrm_state = userdata; -+ xmlNode *op_reply = NULL; -+ const char *op = crm_element_value(msg, F_LRMD_IPC_OP); -+ const char *session = crm_element_value(msg, F_LRMD_IPC_SESSION); -+ const char *user = crm_element_value(msg, F_LRMD_IPC_USER); -+ int msg_id = 0; -+ -+ /* sessions are raw ipc connections to IPC, -+ * all we do is proxy requests/responses exactly -+ * like they are given to us at the ipc level. */ -+ -+ CRM_CHECK(op != NULL, return); -+ CRM_CHECK(session != NULL, return); -+ -+ crm_element_value_int(msg, F_LRMD_IPC_MSG_ID, &msg_id); -+ -+ /* This is msg from remote ipc client going to real ipc server */ -+ if (safe_str_eq(op, "new")) { -+ const char *channel = crm_element_value(msg, F_LRMD_IPC_IPC_SERVER); -+ -+ CRM_CHECK(channel != NULL, return); -+ -+ if (remote_proxy_new(lrm_state->node_name, session, channel) == NULL) { -+ remote_proxy_notify_destroy(lrmd, session); -+ } -+ crm_info("new remote proxy client established, session id %s", session); -+ } else if (safe_str_eq(op, "destroy")) { -+ g_hash_table_remove(proxy_table, session); -+ -+ } else if (safe_str_eq(op, "request")) { -+ int flags = 0; -+ xmlNode *request = get_message_xml(msg, F_LRMD_IPC_MSG); -+ remote_proxy_t *proxy = g_hash_table_lookup(proxy_table, session); -+ -+ CRM_CHECK(request != NULL, return); -+ -+ if (proxy == NULL) { -+ /* proxy connection no longer exists */ -+ remote_proxy_notify_destroy(lrmd, session); -+ return; -+ } else if ((proxy->is_local == FALSE) && (crm_ipc_connected(proxy->ipc) == FALSE)) { -+ g_hash_table_remove(proxy_table, session); -+ return; -+ } -+ crm_element_value_int(msg, F_LRMD_IPC_MSG_FLAGS, &flags); -+ -+ if (proxy->is_local) { -+ /* this is for the crmd, which we are, so don't try -+ * and connect/send to ourselves over ipc. instead -+ * do it directly. */ -+ if (flags & crm_ipc_client_response) { -+ op_reply = create_xml_node(NULL, "ack"); -+ crm_xml_add(op_reply, "function", __FUNCTION__); -+ crm_xml_add_int(op_reply, "line", __LINE__); -+ } -+ crmd_proxy_dispatch(user, session, request); -+ } else { -+ /* TODO make this async. */ -+ crm_ipc_send(proxy->ipc, request, flags, 10000, &op_reply); -+ } -+ } -+ -+ if (op_reply) { -+ remote_proxy_relay_response(lrmd, session, op_reply, msg_id); -+ free_xml(op_reply); -+ } -+} -+ - int - lrm_state_remote_connect_async(lrm_state_t * lrm_state, const char *server, int port, - int timeout_ms) -@@ -258,6 +534,7 @@ lrm_state_remote_connect_async(lrm_state_t * lrm_state, const char *server, int - return -1; - } - ((lrmd_t *) lrm_state->conn)->cmds->set_callback(lrm_state->conn, remote_lrm_op_callback); -+ lrmd_internal_set_proxy_callback(lrm_state->conn, lrm_state, remote_proxy_cb); - } - - crm_trace("initiating remote connection to %s at %d with timeout %d", server, port, timeout_ms); -diff --git a/crmd/main.c b/crmd/main.c -index 1ae4c7a..749c4b7 100644 ---- a/crmd/main.c -+++ b/crmd/main.c -@@ -62,6 +62,7 @@ main(int argc, char **argv) - int index = 0; - int argerr = 0; - -+ crmd_mainloop = g_main_new(FALSE); - crm_log_init(NULL, LOG_INFO, TRUE, FALSE, argc, argv, FALSE); - crm_set_options(NULL, "[options]", long_options, - "Daemon for aggregating resource and node failures as well as co-ordinating the cluster's response"); -@@ -138,7 +139,6 @@ crmd_init(void) - - if (state == S_PENDING || state == S_STARTING) { - /* Create the mainloop and run it... */ -- crmd_mainloop = g_main_new(FALSE); - crm_trace("Starting %s's mainloop", crm_system_name); - - #ifdef REALTIME_SUPPORT -@@ -163,6 +163,6 @@ crmd_init(void) - exit_code = 1; - } - -- crm_info("[%s] stopped (%d)", crm_system_name, exit_code); -- return crmd_exit(exit_code); -+ crm_info("%u stopped: %s (%d)", getpid(), pcmk_strerror(exit_code), exit_code); -+ return crmd_fast_exit(exit_code); - } -diff --git a/crmd/membership.c b/crmd/membership.c -index 18cd6b9..370d1a2 100644 ---- a/crmd/membership.c -+++ b/crmd/membership.c -@@ -40,48 +40,34 @@ int last_peer_update = 0; - - extern GHashTable *voted; - --struct update_data_s { -- const char *caller; -- xmlNode *parent; -- int flags; --}; -- - extern gboolean check_join_state(enum crmd_fsa_state cur_state, const char *source); - - static void --check_dead_member(const char *uname, GHashTable * members) -+reap_dead_nodes(gpointer key, gpointer value, gpointer user_data) - { -- CRM_CHECK(uname != NULL, return); -- if (members != NULL && g_hash_table_lookup(members, uname) != NULL) { -- crm_err("%s didnt really leave the membership!", uname); -- return; -- } -- -- erase_node_from_join(uname); -- if (voted != NULL) { -- g_hash_table_remove(voted, uname); -- } -+ crm_node_t *node = value; - -- if (safe_str_eq(fsa_our_uname, uname)) { -- crm_err("We're not part of the cluster anymore"); -- register_fsa_input(C_FSA_INTERNAL, I_ERROR, NULL); -+ if (crm_is_peer_active(node) == FALSE) { -+ crm_update_peer_join(__FUNCTION__, node, crm_join_none); - -- } else if (AM_I_DC == FALSE && safe_str_eq(uname, fsa_our_dc)) { -- crm_warn("Our DC node (%s) left the cluster", uname); -- register_fsa_input(C_FSA_INTERNAL, I_ELECTION, NULL); -+ if(node->uname) { -+ if (voted != NULL) { -+ g_hash_table_remove(voted, node->uname); -+ } - -- } else if (fsa_state == S_INTEGRATION || fsa_state == S_FINALIZE_JOIN) { -- check_join_state(fsa_state, __FUNCTION__); -- } --} -+ if (safe_str_eq(fsa_our_uname, node->uname)) { -+ crm_err("We're not part of the cluster anymore"); -+ register_fsa_input(C_FSA_INTERNAL, I_ERROR, NULL); - --static void --reap_dead_nodes(gpointer key, gpointer value, gpointer user_data) --{ -- crm_node_t *node = value; -+ } else if (AM_I_DC == FALSE && safe_str_eq(node->uname, fsa_our_dc)) { -+ crm_warn("Our DC node (%s) left the cluster", node->uname); -+ register_fsa_input(C_FSA_INTERNAL, I_ELECTION, NULL); -+ } -+ } - -- if (crm_is_peer_active(node) == FALSE) { -- check_dead_member(node->uname, NULL); -+ if (fsa_state == S_INTEGRATION || fsa_state == S_FINALIZE_JOIN) { -+ check_join_state(fsa_state, __FUNCTION__); -+ } - fail_incompletable_actions(transition_graph, node->uuid); - } - } -@@ -129,8 +115,13 @@ crmd_node_update_complete(xmlNode * msg, int call_id, int rc, xmlNode * output, - if (rc == pcmk_ok) { - crm_trace("Node update %d complete", call_id); - -+ } else if(call_id < pcmk_ok) { -+ crm_err("Node update failed: %s (%d)", pcmk_strerror(call_id), call_id); -+ crm_log_xml_debug(msg, "failed"); -+ register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); -+ - } else { -- crm_err("Node update %d failed", call_id); -+ crm_err("Node update %d failed: %s (%d)", call_id, pcmk_strerror(rc), rc); - crm_log_xml_debug(msg, "failed"); - register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); - } -@@ -142,7 +133,7 @@ do_update_node_cib(crm_node_t * node, int flags, xmlNode * parent, const char *s - const char *value = NULL; - xmlNode *node_state = create_xml_node(parent, XML_CIB_TAG_STATE); - -- set_uuid(node_state, XML_ATTR_UUID, node->uname); -+ set_uuid(node_state, XML_ATTR_UUID, node); - - if (crm_element_value(node_state, XML_ATTR_UUID) == NULL) { - crm_info("Node update for %s cancelled: no id", node->uname); -@@ -189,37 +180,20 @@ do_update_node_cib(crm_node_t * node, int flags, xmlNode * parent, const char *s - return node_state; - } - --static void --ghash_update_cib_node(gpointer key, gpointer value, gpointer user_data) --{ -- crm_node_t *node = value; -- struct update_data_s *data = (struct update_data_s *)user_data; -- -- do_update_node_cib(node, data->flags, data->parent, data->caller); --} -- --static void --create_cib_node_definition(gpointer key, gpointer value, gpointer user_data) --{ -- crm_node_t *node = value; -- xmlNode *cib_nodes = user_data; -- xmlNode *cib_new_node = NULL; -- -- crm_trace("Creating node entry for %s/%s", node->uname, node->uuid); -- cib_new_node = create_xml_node(cib_nodes, XML_CIB_TAG_NODE); -- crm_xml_add(cib_new_node, XML_ATTR_ID, node->uuid); -- crm_xml_add(cib_new_node, XML_ATTR_UNAME, node->uname); --} - - static void - node_list_update_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) - { -- if (rc != pcmk_ok) { -- fsa_data_t *msg_data = NULL; -+ fsa_data_t *msg_data = NULL; - -- crm_err("CIB Update %d failed: %s", call_id, pcmk_strerror(rc)); -- crm_log_xml_warn(output, "update:failed"); -+ if(call_id < pcmk_ok) { -+ crm_err("Node list update failed: %s (%d)", pcmk_strerror(call_id), call_id); -+ crm_log_xml_debug(msg, "update:failed"); -+ register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); - -+ } else if(rc < pcmk_ok) { -+ crm_err("Node update %d failed: %s (%d)", call_id, pcmk_strerror(rc), rc); -+ crm_log_xml_debug(msg, "update:failed"); - register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); - } - } -@@ -247,11 +221,21 @@ populate_cib_nodes(enum node_update_flags flags, const char *source) - #endif - - if (from_hashtable) { -- /* if(uname_is_uuid()) { */ -- /* g_hash_table_foreach(crm_peer_id_cache, create_cib_node_definition, node_list); */ -- /* } else { */ -- g_hash_table_foreach(crm_peer_cache, create_cib_node_definition, node_list); -- /* } */ -+ GHashTableIter iter; -+ crm_node_t *node = NULL; -+ -+ g_hash_table_iter_init(&iter, crm_peer_cache); -+ while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) { -+ xmlNode *new_node = NULL; -+ -+ crm_trace("Creating node entry for %s/%s", node->uname, node->uuid); -+ if(node->uuid && node->uname) { -+ /* We need both to be valid */ -+ new_node = create_xml_node(node_list, XML_CIB_TAG_NODE); -+ crm_xml_add(new_node, XML_ATTR_ID, node->uuid); -+ crm_xml_add(new_node, XML_ATTR_UNAME, node->uname); -+ } -+ } - } - - crm_trace("Populating section from %s", from_hashtable ? "hashtable" : "cluster"); -@@ -261,20 +245,20 @@ populate_cib_nodes(enum node_update_flags flags, const char *source) - - free_xml(node_list); - -- if (crm_peer_cache != NULL && AM_I_DC) { -+ if (call_id >= pcmk_ok && crm_peer_cache != NULL && AM_I_DC) { - /* - * There is no need to update the local CIB with our values if - * we've not seen valid membership data - */ -- struct update_data_s update_data; -+ GHashTableIter iter; -+ crm_node_t *node = NULL; - - node_list = create_xml_node(NULL, XML_CIB_TAG_STATUS); - -- update_data.caller = source; -- update_data.parent = node_list; -- update_data.flags = flags; -- -- g_hash_table_foreach(crm_peer_cache, ghash_update_cib_node, &update_data); -+ g_hash_table_iter_init(&iter, crm_peer_cache); -+ while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) { -+ do_update_node_cib(node, flags, node_list, source); -+ } - - fsa_cib_update(XML_CIB_TAG_STATUS, node_list, call_options, call_id, NULL); - fsa_register_cib_callback(call_id, FALSE, NULL, crmd_node_update_complete); -@@ -293,7 +277,7 @@ cib_quorum_update_complete(xmlNode * msg, int call_id, int rc, xmlNode * output, - crm_trace("Quorum update %d complete", call_id); - - } else { -- crm_err("Quorum update %d failed", call_id); -+ crm_err("Quorum update %d failed: %s (%d)", call_id, pcmk_strerror(rc), rc); - crm_log_xml_debug(msg, "failed"); - register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); - } -@@ -310,7 +294,7 @@ crm_update_quorum(gboolean quorum, gboolean force_update) - - update = create_xml_node(NULL, XML_TAG_CIB); - crm_xml_add_int(update, XML_ATTR_HAVE_QUORUM, quorum); -- set_uuid(update, XML_ATTR_DC_UUID, fsa_our_uname); -+ crm_xml_add(update, XML_ATTR_DC_UUID, fsa_our_uuid); - - fsa_cib_update(XML_TAG_CIB, update, call_options, call_id, NULL); - crm_debug("Updating quorum status to %s (call=%d)", quorum ? "true" : "false", call_id); -diff --git a/crmd/messages.c b/crmd/messages.c -index 9780090..dec84f9 100644 ---- a/crmd/messages.c -+++ b/crmd/messages.c -@@ -159,7 +159,7 @@ register_fsa_input_adv(enum crmd_fsa_cause cause, enum crmd_fsa_input input, - case C_STARTUP: - crm_err("Copying %s data (from %s)" - " not yet implemented", fsa_cause2string(cause), raised_from); -- crmd_exit(1); -+ crmd_exit(pcmk_err_generic); - break; - } - crm_trace("%s data copied", fsa_cause2string(fsa_data->fsa_cause)); -@@ -256,7 +256,7 @@ delete_fsa_input(fsa_data_t * fsa_data) - if (fsa_data->data != NULL) { - crm_err("Dont know how to free %s data from %s", - fsa_cause2string(fsa_data->fsa_cause), fsa_data->origin); -- crmd_exit(1); -+ crmd_exit(pcmk_err_generic); - } - break; - } -@@ -466,6 +466,10 @@ relay_message(xmlNode * msg, gboolean originated_locally) - #if SUPPORT_COROSYNC - if (is_openais_cluster()) { - dest = text2msg_type(sys_to); -+ -+ if (dest == crm_msg_none || dest > crm_msg_stonith_ng) { -+ dest = crm_msg_crmd; -+ } - } - #endif - ROUTER_RESULT("Message result: External relay"); -@@ -517,7 +521,7 @@ process_hello_message(xmlNode * hello, - } - - gboolean --crmd_authorize_message(xmlNode * client_msg, crm_client_t * curr_client) -+crmd_authorize_message(xmlNode * client_msg, crm_client_t * curr_client, const char *proxy_session) - { - char *client_name = NULL; - char *major_version = NULL; -@@ -526,8 +530,9 @@ crmd_authorize_message(xmlNode * client_msg, crm_client_t * curr_client) - - xmlNode *xml = NULL; - const char *op = crm_element_value(client_msg, F_CRM_TASK); -+ const char *uuid = curr_client ? curr_client->id : proxy_session; - -- if (curr_client == NULL) { -+ if (uuid == NULL) { - crm_warn("Message [%s] not authorized", crm_element_value(client_msg, XML_ATTR_REFERENCE)); - return FALSE; - -@@ -541,7 +546,7 @@ crmd_authorize_message(xmlNode * client_msg, crm_client_t * curr_client) - if (auth_result == TRUE) { - if (client_name == NULL) { - crm_err("Bad client details (client_name=%s, uuid=%s)", -- crm_str(client_name), curr_client->id); -+ crm_str(client_name), uuid); - auth_result = FALSE; - } - } -@@ -559,15 +564,19 @@ crmd_authorize_message(xmlNode * client_msg, crm_client_t * curr_client) - } - - if (auth_result == TRUE) { -- crm_trace("Accepted client %s", crm_client_name(curr_client)); -- curr_client->userdata = strdup(client_name); -+ crm_trace("Accepted client %s", client_name); -+ if (curr_client) { -+ curr_client->userdata = strdup(client_name); -+ } - - crm_trace("Triggering FSA: %s", __FUNCTION__); - mainloop_set_trigger(fsa_source); - - } else { - crm_warn("Rejected client logon request"); -- qb_ipcs_disconnect(curr_client->ipcs); -+ if (curr_client) { -+ qb_ipcs_disconnect(curr_client->ipcs); -+ } - } - - free(minor_version); -@@ -602,26 +611,33 @@ static enum crmd_fsa_input - handle_failcount_op(xmlNode * stored_msg) - { - const char *rsc = NULL; -+ const char *uname = NULL; -+ gboolean is_remote_node = FALSE; - xmlNode *xml_rsc = get_xpath_object("//" XML_CIB_TAG_RESOURCE, stored_msg, LOG_ERR); - - if (xml_rsc) { - rsc = ID(xml_rsc); - } - -+ uname = crm_element_value(stored_msg, XML_LRM_ATTR_TARGET); -+ if (crm_element_value(stored_msg, XML_LRM_ATTR_ROUTER_NODE)) { -+ is_remote_node = TRUE; -+ } -+ - if (rsc) { - char *attr = NULL; - - crm_info("Removing failcount for %s", rsc); - - attr = crm_concat("fail-count", rsc, '-'); -- update_attrd(NULL, attr, NULL, NULL); -+ update_attrd(uname, attr, NULL, NULL, is_remote_node); - free(attr); - - attr = crm_concat("last-failure", rsc, '-'); -- update_attrd(NULL, attr, NULL, NULL); -+ update_attrd(uname, attr, NULL, NULL, is_remote_node); - free(attr); - -- lrm_clear_last_failure(rsc); -+ lrm_clear_last_failure(rsc, uname); - } else { - crm_log_xml_warn(stored_msg, "invalid failcount op"); - } -@@ -766,7 +782,9 @@ handle_request(xmlNode * stored_msg) - crm_notice("Current ping state: %s", fsa_state2string(fsa_state)); - - msg = create_reply(stored_msg, ping); -- relay_message(msg, TRUE); -+ if(msg) { -+ relay_message(msg, TRUE); -+ } - - free_xml(ping); - free_xml(msg); -@@ -851,7 +869,7 @@ handle_shutdown_request(xmlNode * stored_msg) - crm_log_xml_trace(stored_msg, "message"); - - now_s = crm_itoa(now); -- update_attrd(host_from, XML_CIB_ATTR_SHUTDOWN, now_s, NULL); -+ update_attrd(host_from, XML_CIB_ATTR_SHUTDOWN, now_s, NULL, FALSE); - free(now_s); - - /* will be picked up by the TE as long as its running */ -@@ -900,6 +918,9 @@ send_msg_via_ipc(xmlNode * msg, const char *sys) - #endif - do_lrm_invoke(A_LRM_INVOKE, C_IPC_MESSAGE, fsa_state, I_MESSAGE, &fsa_data); - -+ } else if (sys != NULL && crmd_is_proxy_session(sys)) { -+ crmd_proxy_send(sys, msg); -+ - } else { - crm_err("Unknown Sub-system (%s)... discarding message.", crm_str(sys)); - send_ok = FALSE; -diff --git a/crmd/remote_lrmd_ra.c b/crmd/remote_lrmd_ra.c -index 5e51f5e..07cd67c 100644 ---- a/crmd/remote_lrmd_ra.c -+++ b/crmd/remote_lrmd_ra.c -@@ -396,7 +396,7 @@ handle_remote_ra_exec(gpointer user_data) - fsa_cib_delete(XML_CIB_TAG_STATUS, status, cib_quorum_override, rc, NULL); - crm_info("Forced a remote LRM refresh before connection start: call=%d", rc); - crm_log_xml_trace(status, "CLEAR LRM"); -- free(status); -+ free_xml(status); - - rc = handle_remote_ra_start(lrm_state, cmd, cmd->timeout); - if (rc == 0) { -diff --git a/crmd/subsystems.c b/crmd/subsystems.c -index ce12f42..a4d07b3 100644 ---- a/crmd/subsystems.c -+++ b/crmd/subsystems.c -@@ -1,16 +1,16 @@ --/* -+/* - * Copyright (C) 2004 Andrew Beekhof -- * -+ * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. -- * -+ * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. -- * -+ * - * You should have received a copy of the GNU General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -@@ -45,26 +45,19 @@ - #include - - static void --crmdManagedChildDied(GPid pid, gint status, gpointer user_data) -+crmd_child_exit(mainloop_child_t * p, pid_t pid, int core, int signo, int exitcode) - { -- struct crm_subsystem_s *the_subsystem = user_data; -- -- if (WIFSIGNALED(status)) { -- int signo = WTERMSIG(status); -- int core = WCOREDUMP(status); -+ /* struct crm_subsystem_s *the_subsystem = mainloop_child_userdata(p); */ -+ const char *name = mainloop_child_name(p); - -+ if (signo) { - crm_notice("Child process %s terminated with signal %d (pid=%d, core=%d)", -- the_subsystem->name, signo, the_subsystem->pid, core); -- -- } else if (WIFEXITED(status)) { -- int exitcode = WEXITSTATUS(status); -- -- do_crm_log(exitcode == 0 ? LOG_INFO : LOG_ERR, -- "Child process %s exited (pid=%d, rc=%d)", the_subsystem->name, -- the_subsystem->pid, exitcode); -+ name, signo, pid, core); - - } else { -- crm_err("Process %s:[%d] exited?", the_subsystem->name, the_subsystem->pid); -+ do_crm_log(exitcode == 0 ? LOG_INFO : LOG_ERR, -+ "Child process %s exited (pid=%d, rc=%d)", name, -+ pid, exitcode); - } - } - -@@ -150,7 +143,7 @@ start_subsystem(struct crm_subsystem_s * the_subsystem) - return FALSE; - - default: /* Parent */ -- g_child_watch_add(pid, crmdManagedChildDied, the_subsystem); -+ mainloop_child_add(pid, 0, the_subsystem->name, the_subsystem, crmd_child_exit); - crm_trace("Client %s is has pid: %d", the_subsystem->name, pid); - the_subsystem->pid = pid; - return TRUE; -@@ -185,6 +178,5 @@ start_subsystem(struct crm_subsystem_s * the_subsystem) - /* Should not happen */ - crm_perror(LOG_ERR, "FATAL: Cannot exec %s", the_subsystem->command); - -- crmd_exit(100); /* Suppress respawning */ -- return TRUE; /* never reached */ -+ return crm_exit(DAEMON_RESPAWN_STOP); /* Suppress respawning */ - } -diff --git a/crmd/te_actions.c b/crmd/te_actions.c -index 895a809..b533f58 100644 ---- a/crmd/te_actions.c -+++ b/crmd/te_actions.c -@@ -69,10 +69,6 @@ send_stonith_update(crm_action_t * action, const char *target, const char *uuid) - CRM_CHECK(target != NULL, return); - CRM_CHECK(uuid != NULL, return); - -- if (get_node_uuid(0, target) == NULL) { -- set_node_uuid(target, uuid); -- } -- - /* Make sure the membership and join caches are accurate */ - peer = crm_get_peer(0, target); - if (peer->uuid == NULL) { -@@ -82,7 +78,7 @@ send_stonith_update(crm_action_t * action, const char *target, const char *uuid) - crm_update_peer_proc(__FUNCTION__, peer, crm_proc_none, NULL); - crm_update_peer_state(__FUNCTION__, peer, CRM_NODE_LOST, 0); - crm_update_peer_expected(__FUNCTION__, peer, CRMD_JOINSTATE_DOWN); -- erase_node_from_join(target); -+ crm_update_peer_join(__FUNCTION__, peer, crm_join_none); - - node_state = - do_update_node_cib(peer, -@@ -464,7 +460,9 @@ te_rsc_command(crm_graph_t * graph, crm_action_t * action) - } - - value = crm_meta_value(action->params, XML_OP_ATTR_PENDING); -- if (crm_is_true(value) && safe_str_neq(task, CRMD_ACTION_CANCEL)) { -+ if (crm_is_true(value) -+ && safe_str_neq(task, CRMD_ACTION_CANCEL) -+ && safe_str_neq(task, CRMD_ACTION_DELETE)) { - /* write a "pending" entry to the CIB, inhibit notification */ - crm_debug("Recording pending op %s in the CIB", task_uuid); - cib_action_update(action, PCMK_LRM_OP_PENDING, PCMK_EXECRA_STATUS_UNKNOWN); -@@ -504,11 +502,15 @@ notify_crmd(crm_graph_t * graph) - case tg_restart: - type = "restart"; - if (fsa_state == S_TRANSITION_ENGINE) { -- if (transition_timer->period_ms > 0) { -- crm_timer_stop(transition_timer); -- crm_timer_start(transition_timer); -- } else if (too_many_st_failures() == FALSE) { -- event = I_PE_CALC; -+ if (too_many_st_failures() == FALSE) { -+ if (transition_timer->period_ms > 0) { -+ crm_timer_stop(transition_timer); -+ crm_timer_start(transition_timer); -+ } else { -+ event = I_PE_CALC; -+ } -+ } else { -+ event = I_TE_SUCCESS; - } - - } else if (fsa_state == S_POLICY_ENGINE) { -diff --git a/crmd/te_callbacks.c b/crmd/te_callbacks.c -index a91f192..3c04277 100644 ---- a/crmd/te_callbacks.c -+++ b/crmd/te_callbacks.c -@@ -1,16 +1,16 @@ --/* -+/* - * Copyright (C) 2004 Andrew Beekhof -- * -+ * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. -- * -+ * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. -- * -+ * - * You should have received a copy of the GNU General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -@@ -63,7 +63,7 @@ process_resource_updates(xmlXPathObject * xpathObj) - - - */ -- int lpc = 0, max = xpathObj->nodesetval->nodeNr; -+ int lpc = 0, max = numXpathResults(xpathObj); - - for (lpc = 0; lpc < max; lpc++) { - xmlNode *rsc_op = getXpathResult(xpathObj, lpc); -@@ -76,6 +76,7 @@ process_resource_updates(xmlXPathObject * xpathObj) - void - te_update_diff(const char *event, xmlNode * msg) - { -+ int lpc, max; - int rc = -1; - const char *op = NULL; - -@@ -119,7 +120,7 @@ te_update_diff(const char *event, xmlNode * msg) - crm_debug("Processing diff (%s): %d.%d.%d -> %d.%d.%d (%s)", op, - diff_del_admin_epoch, diff_del_epoch, diff_del_updates, - diff_add_admin_epoch, diff_add_epoch, diff_add_updates, fsa_state2string(fsa_state)); -- log_cib_diff(LOG_DEBUG_2, diff, op); -+ log_cib_diff(LOG_DEBUG_2, diff, __FUNCTION__); - - if (cib_config_changed(NULL, NULL, &diff)) { - abort_transition(INFINITY, tg_restart, "Non-status change", diff); -@@ -130,79 +131,72 @@ te_update_diff(const char *event, xmlNode * msg) - xpathObj = - xpath_search(diff, - "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//" XML_CIB_TAG_TICKETS); -- if (xpathObj && xpathObj->nodesetval->nodeNr > 0) { -+ if (numXpathResults(xpathObj) > 0) { - xmlNode *aborted = getXpathResult(xpathObj, 0); - - abort_transition(INFINITY, tg_restart, "Ticket attribute: update", aborted); - goto bail; - -- } else if (xpathObj) { -- xmlXPathFreeObject(xpathObj); - } -+ freeXpathObject(xpathObj); - - /* Tickets Attributes - Removed */ - xpathObj = - xpath_search(diff, - "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_REMOVED "//" XML_CIB_TAG_TICKETS); -- if (xpathObj && xpathObj->nodesetval->nodeNr > 0) { -+ if (numXpathResults(xpathObj) > 0) { - xmlNode *aborted = getXpathResult(xpathObj, 0); - - abort_transition(INFINITY, tg_restart, "Ticket attribute: removal", aborted); - goto bail; -- -- } else if (xpathObj) { -- xmlXPathFreeObject(xpathObj); - } -+ freeXpathObject(xpathObj); - - /* Transient Attributes - Added/Updated */ - xpathObj = - xpath_search(diff, - "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//" - XML_TAG_TRANSIENT_NODEATTRS "//" XML_CIB_TAG_NVPAIR); -- if (xpathObj && xpathObj->nodesetval->nodeNr > 0) { -- int lpc; -+ max = numXpathResults(xpathObj); - -- for (lpc = 0; lpc < xpathObj->nodesetval->nodeNr; lpc++) { -- xmlNode *attr = getXpathResult(xpathObj, lpc); -- const char *name = crm_element_value(attr, XML_NVPAIR_ATTR_NAME); -- const char *value = NULL; -- -- if (safe_str_eq(CRM_OP_PROBED, name)) { -- value = crm_element_value(attr, XML_NVPAIR_ATTR_VALUE); -- } -+ for (lpc = 0; lpc < max; lpc++) { -+ xmlNode *attr = getXpathResult(xpathObj, lpc); -+ const char *name = crm_element_value(attr, XML_NVPAIR_ATTR_NAME); -+ const char *value = NULL; - -- if (crm_is_true(value) == FALSE) { -- abort_transition(INFINITY, tg_restart, "Transient attribute: update", attr); -- crm_log_xml_trace(attr, "Abort"); -- goto bail; -- } -+ if (safe_str_eq(CRM_OP_PROBED, name)) { -+ value = crm_element_value(attr, XML_NVPAIR_ATTR_VALUE); - } - -- } else if (xpathObj) { -- xmlXPathFreeObject(xpathObj); -+ if (crm_is_true(value) == FALSE) { -+ abort_transition(INFINITY, tg_restart, "Transient attribute: update", attr); -+ crm_log_xml_trace(attr, "Abort"); -+ goto bail; -+ } - } - -+ freeXpathObject(xpathObj); -+ - /* Transient Attributes - Removed */ - xpathObj = - xpath_search(diff, - "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_REMOVED "//" - XML_TAG_TRANSIENT_NODEATTRS); -- if (xpathObj && xpathObj->nodesetval->nodeNr > 0) { -+ if (numXpathResults(xpathObj) > 0) { - xmlNode *aborted = getXpathResult(xpathObj, 0); - - abort_transition(INFINITY, tg_restart, "Transient attribute: removal", aborted); - goto bail; - -- } else if (xpathObj) { -- xmlXPathFreeObject(xpathObj); - } -+ freeXpathObject(xpathObj); - - /* - * Check for and fast-track the processing of LRM refreshes - * In large clusters this can result in _huge_ speedups - * - * Unfortunately we can only do so when there are no pending actions -- * Otherwise we could miss updates we're waiting for and stall -+ * Otherwise we could miss updates we're waiting for and stall - * - */ - xpathObj = NULL; -@@ -213,84 +207,71 @@ te_update_diff(const char *event, xmlNode * msg) - XML_LRM_TAG_RESOURCE); - } - -- if (xpathObj) { -- int updates = xpathObj->nodesetval->nodeNr; -- -- if (updates > 1) { -- /* Updates by, or in response to, TE actions will never contain updates -- * for more than one resource at a time -- */ -- crm_debug("Detected LRM refresh - %d resources updated: Skipping all resource events", -- updates); -- crm_log_xml_trace(diff, "lrm-refresh"); -- abort_transition(INFINITY, tg_restart, "LRM Refresh", NULL); -- goto bail; -- } -- xmlXPathFreeObject(xpathObj); -+ max = numXpathResults(xpathObj); -+ if (max > 1) { -+ /* Updates by, or in response to, TE actions will never contain updates -+ * for more than one resource at a time -+ */ -+ crm_debug("Detected LRM refresh - %d resources updated: Skipping all resource events", max); -+ crm_log_xml_trace(diff, "lrm-refresh"); -+ abort_transition(INFINITY, tg_restart, "LRM Refresh", NULL); -+ goto bail; - } -+ freeXpathObject(xpathObj); - - /* Process operation updates */ - xpathObj = - xpath_search(diff, - "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//" XML_LRM_TAG_RSC_OP); -- if (xpathObj) { -+ if (numXpathResults(xpathObj)) { - process_resource_updates(xpathObj); -- xmlXPathFreeObject(xpathObj); - } -+ freeXpathObject(xpathObj); - - /* Detect deleted (as opposed to replaced or added) actions - eg. crm_resource -C */ - xpathObj = xpath_search(diff, "//" XML_TAG_DIFF_REMOVED "//" XML_LRM_TAG_RSC_OP); -- if (xpathObj) { -- int lpc = 0, max = xpathObj->nodesetval->nodeNr; -- -- for (lpc = 0; lpc < max; lpc++) { -- int max = 0; -- const char *op_id = NULL; -- char *rsc_op_xpath = NULL; -- xmlXPathObject *op_match = NULL; -- xmlNode *match = getXpathResult(xpathObj, lpc); -- -- CRM_CHECK(match != NULL, continue); -- -- op_id = ID(match); -- -- max = strlen(rsc_op_template) + strlen(op_id) + 1; -- rsc_op_xpath = calloc(1, max); -- snprintf(rsc_op_xpath, max, rsc_op_template, op_id); -- -- op_match = xpath_search(diff, rsc_op_xpath); -- if (op_match == NULL || op_match->nodesetval->nodeNr == 0) { -- /* Prevent false positives by matching cancelations too */ -- const char *node = get_node_id(match); -- crm_action_t *cancelled = get_cancel_action(op_id, node); -- -- if (cancelled == NULL) { -- crm_debug("No match for deleted action %s (%s on %s)", rsc_op_xpath, op_id, -- node); -- abort_transition(INFINITY, tg_restart, "Resource op removal", match); -- if (op_match) { -- xmlXPathFreeObject(op_match); -- } -- free(rsc_op_xpath); -- goto bail; -- -- } else { -- crm_debug("Deleted lrm_rsc_op %s on %s was for graph event %d", -- op_id, node, cancelled->id); -- } -- } -+ max = numXpathResults(xpathObj); -+ for (lpc = 0; lpc < max; lpc++) { -+ int path_max = 0; -+ const char *op_id = NULL; -+ char *rsc_op_xpath = NULL; -+ xmlXPathObject *op_match = NULL; -+ xmlNode *match = getXpathResult(xpathObj, lpc); -+ -+ CRM_CHECK(match != NULL, continue); -+ -+ op_id = ID(match); -+ -+ path_max = strlen(rsc_op_template) + strlen(op_id) + 1; -+ rsc_op_xpath = calloc(1, path_max); -+ snprintf(rsc_op_xpath, path_max, rsc_op_template, op_id); -+ -+ op_match = xpath_search(diff, rsc_op_xpath); -+ if (numXpathResults(op_match) == 0) { -+ /* Prevent false positives by matching cancelations too */ -+ const char *node = get_node_id(match); -+ crm_action_t *cancelled = get_cancel_action(op_id, node); -+ -+ if (cancelled == NULL) { -+ crm_debug("No match for deleted action %s (%s on %s)", rsc_op_xpath, op_id, -+ node); -+ abort_transition(INFINITY, tg_restart, "Resource op removal", match); -+ freeXpathObject(op_match); -+ free(rsc_op_xpath); -+ goto bail; - -- if (op_match) { -- xmlXPathFreeObject(op_match); -+ } else { -+ crm_debug("Deleted lrm_rsc_op %s on %s was for graph event %d", -+ op_id, node, cancelled->id); - } -- free(rsc_op_xpath); - } -+ -+ freeXpathObject(op_match); -+ free(rsc_op_xpath); - } - - bail: -- if (xpathObj) { -- xmlXPathFreeObject(xpathObj); -- } -+ freeXpathObject(xpathObj); - } - - gboolean -@@ -323,13 +304,13 @@ process_te_message(xmlNode * msg, xmlNode * xml_data) - crm_debug("Processing (N)ACK %s from %s", crm_element_value(msg, F_CRM_REFERENCE), from); - - xpathObj = xpath_search(xml_data, "//" XML_LRM_TAG_RSC_OP); -- if (xpathObj) { -+ if (numXpathResults(xpathObj)) { - process_resource_updates(xpathObj); -- xmlXPathFreeObject(xpathObj); -- xpathObj = NULL; -+ freeXpathObject(xpathObj); - - } else { - crm_log_xml_err(msg, "Invalid (N)ACK"); -+ freeXpathObject(xpathObj); - return FALSE; - } - -@@ -369,6 +350,20 @@ too_many_st_failures(void) - } - - void -+reset_st_fail_count(const char *target) -+{ -+ struct st_fail_rec *rec = NULL; -+ -+ if (stonith_failures) { -+ rec = g_hash_table_lookup(stonith_failures, target); -+ } -+ -+ if (rec) { -+ rec->count = 0; -+ } -+} -+ -+void - tengine_stonith_callback(stonith_t * stonith, stonith_callback_data_t * data) - { - char *uuid = NULL; -@@ -539,7 +534,7 @@ action_timer_callback(gpointer data) - - if (timer->action->type != action_type_rsc) { - send_update = FALSE; -- } else if (safe_str_eq(task, "cancel")) { -+ } else if (safe_str_eq(task, RSC_CANCEL)) { - /* we dont need to update the CIB with these */ - send_update = FALSE; - } -diff --git a/crmd/te_events.c b/crmd/te_events.c -index e289a8b..521cef6 100644 ---- a/crmd/te_events.c -+++ b/crmd/te_events.c -@@ -97,8 +97,34 @@ fail_incompletable_actions(crm_graph_t * graph, const char *down_node) - return FALSE; - } - -+static const char * -+get_uname_from_event(xmlNode * event) -+{ -+ xmlNode *node = event; -+ -+ while (node != NULL && safe_str_neq(XML_CIB_TAG_STATE, TYPE(node))) { -+ node = node->parent; -+ } -+ -+ CRM_CHECK(node != NULL, return NULL); -+ return crm_element_value(node, XML_ATTR_UNAME); -+} -+ -+static gboolean -+get_is_remote_from_event(xmlNode * event) -+{ -+ xmlNode *node = event; -+ -+ while (node != NULL && safe_str_neq(XML_CIB_TAG_STATE, TYPE(node))) { -+ node = node->parent; -+ } -+ -+ CRM_CHECK(node != NULL, return FALSE); -+ return crm_element_value(node, XML_NODE_IS_REMOTE) ? TRUE : FALSE; -+} -+ - static gboolean --update_failcount(xmlNode * event, const char *event_node, int rc, int target_rc, gboolean do_update) -+update_failcount(xmlNode * event, const char *event_node_uuid, int rc, int target_rc, gboolean do_update) - { - int interval = 0; - -@@ -108,7 +134,8 @@ update_failcount(xmlNode * event, const char *event_node, int rc, int target_rc, - - const char *value = NULL; - const char *id = crm_element_value(event, XML_LRM_ATTR_TASK_KEY); -- const char *on_uname = get_uname(event_node); -+ const char *on_uname = get_uname_from_event(event); -+ const char *origin = crm_element_value(event, XML_ATTR_ORIGIN); - - if (rc == 99) { - /* this is an internal code for "we're busy, try again" */ -@@ -118,6 +145,12 @@ update_failcount(xmlNode * event, const char *event_node, int rc, int target_rc, - return FALSE; - } - -+ if (safe_str_eq(origin, "build_active_RAs")) { -+ crm_debug("No update for %s (rc=%d) on %s: Old failure from lrm status refresh", -+ id, rc, on_uname); -+ return FALSE; -+ } -+ - if (failed_stop_offset == NULL) { - failed_stop_offset = strdup(INFINITY_S); - } -@@ -126,7 +159,12 @@ update_failcount(xmlNode * event, const char *event_node, int rc, int target_rc, - failed_start_offset = strdup(INFINITY_S); - } - -- CRM_CHECK(on_uname != NULL, return TRUE); -+ if (on_uname == NULL) { -+ /* uname not in event, check cache */ -+ on_uname = crm_peer_uname(event_node_uuid); -+ CRM_CHECK(on_uname != NULL, return TRUE); -+ } -+ - CRM_CHECK(parse_op_key(id, &rsc_id, &task, &interval), crm_err("Couldn't parse: %s", ID(event)); - goto bail); - CRM_CHECK(task != NULL, goto bail); -@@ -160,16 +198,17 @@ update_failcount(xmlNode * event, const char *event_node, int rc, int target_rc, - - if (do_update) { - char *now = crm_itoa(time(NULL)); -+ gboolean is_remote_node = get_is_remote_from_event(event); - - crm_warn("Updating failcount for %s on %s after failed %s:" - " rc=%d (update=%s, time=%s)", rsc_id, on_uname, task, rc, value, now); - - attr_name = crm_concat("fail-count", rsc_id, '-'); -- update_attrd(on_uname, attr_name, value, NULL); -+ update_attrd(on_uname, attr_name, value, NULL, is_remote_node); - free(attr_name); - - attr_name = crm_concat("last-failure", rsc_id, '-'); -- update_attrd(on_uname, attr_name, now, NULL); -+ update_attrd(on_uname, attr_name, now, NULL, is_remote_node); - free(attr_name); - - free(now); -diff --git a/crmd/te_utils.c b/crmd/te_utils.c -index 0bac2f7..2a962e0 100644 ---- a/crmd/te_utils.c -+++ b/crmd/te_utils.c -@@ -1,16 +1,16 @@ --/* -+/* - * Copyright (C) 2004 Andrew Beekhof -- * -+ * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. -- * -+ * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. -- * -+ * - * You should have received a copy of the GNU General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -@@ -90,7 +90,9 @@ tengine_stonith_connection_destroy(stonith_t * st, stonith_event_t * e) - } - - /* cbchan will be garbage at this point, arrange for it to be reset */ -- stonith_api->state = stonith_disconnected; -+ if(stonith_api) { -+ stonith_api->state = stonith_disconnected; -+ } - - if (AM_I_DC) { - fail_incompletable_stonith(transition_graph); -@@ -102,24 +104,64 @@ tengine_stonith_connection_destroy(stonith_t * st, stonith_event_t * e) - # include - #endif - -+char *te_client_id = NULL; -+ -+#ifdef HAVE_SYS_REBOOT_H -+# include -+# include -+#endif -+ - static void - tengine_stonith_notify(stonith_t * st, stonith_event_t * st_event) - { -+ if(te_client_id == NULL) { -+ te_client_id = g_strdup_printf("%s.%d", crm_system_name, getpid()); -+ } -+ - if (st_event == NULL) { - crm_err("Notify data not found"); - return; - } - - if (st_event->result == pcmk_ok && crm_str_eq(st_event->target, fsa_our_uname, TRUE)) { -- crm_err("We were alegedly just fenced by %s for %s!", st_event->executioner, -- st_event->origin); -- register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, NULL, NULL, __FUNCTION__); -+ crm_crit("We were alegedly just fenced by %s for %s with %s!", st_event->executioner, -+ st_event->origin, st_event->device); /* Dumps blackbox if enabled */ -+ -+ qb_log_fini(); /* Try to get the above log message to disk - somehow */ -+ -+ /* Get out ASAP and do not come back up. -+ * -+ * Triggering a reboot is also not the worst idea either since -+ * the rest of the cluster thinks we're safely down -+ */ -+ -+#ifdef RB_HALT_SYSTEM -+ reboot(RB_HALT_SYSTEM); -+#endif -+ -+ /* -+ * If reboot() fails or is not supported, coming back up will -+ * probably lead to a situation where the other nodes set our -+ * status to 'lost' because of the fencing callback and will -+ * discard subsequent election votes with: -+ * -+ * Election 87 (current: 5171, owner: 103): Processed vote from east-03 (Peer is not part of our cluster) -+ * -+ * So just stay dead, something is seriously messed up anyway. -+ * -+ */ -+ exit(100); /* None of our wrappers since we already called qb_log_fini() */ - return; - } - -+ if (st_event->result == pcmk_ok && -+ safe_str_eq(st_event->operation, T_STONITH_NOTIFY_FENCE)) { -+ reset_st_fail_count(st_event->target); -+ } -+ - crm_notice("Peer %s was%s terminated (%s) by %s for %s: %s (ref=%s) by client %s", - st_event->target, st_event->result == pcmk_ok ? "" : " not", -- st_event->operation, -+ st_event->action, - st_event->executioner ? st_event->executioner : "", - st_event->origin, pcmk_strerror(st_event->result), st_event->id, - st_event->client_origin ? st_event->client_origin : ""); -@@ -144,11 +186,27 @@ tengine_stonith_notify(stonith_t * st, stonith_event_t * st_event) - } - #endif - -- if (st_event->result == pcmk_ok) { -- gboolean we_are_executioner = safe_str_eq(st_event->executioner, fsa_our_uname); -+ if (st_event->result == pcmk_ok) { -+ crm_node_t *peer = crm_get_peer(0, st_event->target); -+ const char *uuid = crm_peer_uuid(peer); -+ gboolean we_are_executioner = safe_str_eq(st_event->executioner, fsa_our_uname); - - crm_trace("target=%s dc=%s", st_event->target, fsa_our_dc); -- if (fsa_our_dc == NULL || safe_str_eq(fsa_our_dc, st_event->target)) { -+ if(AM_I_DC) { -+ /* The DC always sends updates */ -+ send_stonith_update(NULL, st_event->target, uuid); -+ -+ if (st_event->client_origin && safe_str_neq(st_event->client_origin, te_client_id)) { -+ -+ /* Abort the current transition graph if it wasn't us -+ * that invoked stonith to fence someone -+ */ -+ crm_info("External fencing operation from %s fenced %s", st_event->client_origin, st_event->target); -+ abort_transition(INFINITY, tg_restart, "External Fencing Operation", NULL); -+ } -+ -+ /* Assume it was our leader if we dont currently have one */ -+ } else if (fsa_our_dc == NULL || safe_str_eq(fsa_our_dc, st_event->target)) { - crm_notice("Target %s our leader %s (recorded: %s)", - fsa_our_dc ? "was" : "may have been", st_event->target, - fsa_our_dc ? fsa_our_dc : ""); -@@ -158,26 +216,18 @@ tengine_stonith_notify(stonith_t * st, stonith_event_t * st_event) - * have them do so too after the election - */ - if (we_are_executioner) { -- const char *uuid = get_uuid(st_event->target); -- - send_stonith_update(NULL, st_event->target, uuid); - } - stonith_cleanup_list = g_list_append(stonith_cleanup_list, strdup(st_event->target)); - -- } else if (AM_I_DC && -- st_event->client_origin && -- safe_str_neq(st_event->client_origin, crm_system_name)) { -- const char *uuid = get_uuid(st_event->target); -- -- /* If a remote process outside of pacemaker invoked stonith to -- * fence someone, report the fencing result to the cib -- * and abort the transition graph. */ -- crm_info("External fencing operation from %s fenced %s", st_event->client_origin, -- st_event->target); -- send_stonith_update(NULL, st_event->target, uuid); -- abort_transition(INFINITY, tg_restart, "External Fencing Operation", NULL); - } -- } -+ -+ /* Everyone records them as safely down */ -+ crm_update_peer_proc(__FUNCTION__, peer, crm_proc_none, NULL); -+ crm_update_peer_state(__FUNCTION__, peer, CRM_NODE_LOST, 0); -+ crm_update_peer_expected(__FUNCTION__, peer, CRMD_JOINSTATE_DOWN); -+ crm_update_peer_join(__FUNCTION__, peer, crm_join_none); -+ } - } - - gboolean -@@ -307,8 +357,8 @@ te_graph_trigger(gpointer user_data) - void - trigger_graph_processing(const char *fn, int line) - { -- mainloop_set_trigger(transition_trigger); - crm_trace("%s:%d - Triggered graph processing", fn, line); -+ mainloop_set_trigger(transition_trigger); - } - - void -@@ -327,32 +377,44 @@ abort_transition_graph(int abort_priority, enum transition_action abort_action, - int diff_del_updates = 0; - int diff_del_epoch = 0; - int diff_del_admin_epoch = 0; -+ -+ const char *uname = ""; -+ xmlNode *search = reason; - xmlNode *diff = get_xpath_object("//" F_CIB_UPDATE_RESULT "//diff", reason, LOG_DEBUG_2); - - magic = crm_element_value(reason, XML_ATTR_TRANSITION_MAGIC); - -+ while(search) { -+ const char *kind = TYPE(search); -+ if(safe_str_eq(XML_CIB_TAG_STATE, kind) -+ || safe_str_eq(XML_CIB_TAG_NODE, kind)) { -+ uname = crm_peer_uname(ID(search)); -+ } -+ search = search->parent; -+ } -+ - if (diff) { - cib_diff_version_details(diff, - &diff_add_admin_epoch, &diff_add_epoch, &diff_add_updates, - &diff_del_admin_epoch, &diff_del_epoch, &diff_del_updates); - if (crm_str_eq(TYPE(reason), XML_CIB_TAG_NVPAIR, TRUE)) { - crm_info -- ("%s:%d - Triggered transition abort (complete=%d, tag=%s, id=%s, name=%s, value=%s, magic=%s, cib=%d.%d.%d) : %s", -- fn, line, transition_graph->complete, TYPE(reason), ID(reason), NAME(reason), -+ ("%s:%d - Triggered transition abort (complete=%d, node=%s, tag=%s, id=%s, name=%s, value=%s, magic=%s, cib=%d.%d.%d) : %s", -+ fn, line, transition_graph->complete, uname, TYPE(reason), ID(reason), NAME(reason), - VALUE(reason), magic ? magic : "NA", diff_add_admin_epoch, diff_add_epoch, - diff_add_updates, abort_text); - } else { - crm_info -- ("%s:%d - Triggered transition abort (complete=%d, tag=%s, id=%s, magic=%s, cib=%d.%d.%d) : %s", -- fn, line, transition_graph->complete, TYPE(reason), ID(reason), -+ ("%s:%d - Triggered transition abort (complete=%d, node=%s, tag=%s, id=%s, magic=%s, cib=%d.%d.%d) : %s", -+ fn, line, transition_graph->complete, uname, TYPE(reason), ID(reason), - magic ? magic : "NA", diff_add_admin_epoch, diff_add_epoch, diff_add_updates, - abort_text); - } - - } else { - crm_info -- ("%s:%d - Triggered transition abort (complete=%d, tag=%s, id=%s, magic=%s) : %s", -- fn, line, transition_graph->complete, TYPE(reason), ID(reason), -+ ("%s:%d - Triggered transition abort (complete=%d, node=%s, tag=%s, id=%s, magic=%s) : %s", -+ fn, line, transition_graph->complete, uname, TYPE(reason), ID(reason), - magic ? magic : "NA", abort_text); - } - -@@ -388,7 +450,7 @@ abort_transition_graph(int abort_priority, enum transition_action abort_action, - if (transition_timer->period_ms > 0) { - crm_timer_stop(transition_timer); - crm_timer_start(transition_timer); -- } else if (too_many_st_failures() == FALSE) { -+ } else { - register_fsa_input(C_FSA_INTERNAL, I_PE_CALC, NULL); - } - return; -diff --git a/crmd/tengine.c b/crmd/tengine.c -index 9ff458c..8e236f1 100644 ---- a/crmd/tengine.c -+++ b/crmd/tengine.c -@@ -106,10 +106,6 @@ do_te_control(long long action, - te_uuid = crm_generate_uuid(); - crm_info("Registering TE UUID: %s", te_uuid); - -- if (transition_trigger == NULL) { -- transition_trigger = mainloop_add_trigger(G_PRIORITY_LOW, te_graph_trigger, NULL); -- } -- - if (pcmk_ok != - fsa_cib_conn->cmds->add_notify_callback(fsa_cib_conn, T_CIB_DIFF_NOTIFY, te_update_diff)) { - crm_err("Could not set CIB notification callback"); -diff --git a/crmd/utils.c b/crmd/utils.c -index d06b785..07c71b2 100644 ---- a/crmd/utils.c -+++ b/crmd/utils.c -@@ -131,6 +131,7 @@ crm_timer_popped(gpointer data) - - if (timer == election_trigger && election_trigger->counter > 5) { - crm_notice("We appear to be in an election loop, something may be wrong"); -+ crm_write_blackbox(0, NULL); - election_trigger->counter = 0; - } - -@@ -983,7 +984,7 @@ update_dc(xmlNode * msg) - crm_info("Set DC to %s (%s)", crm_str(fsa_our_dc), crm_str(fsa_our_dc_version)); - - } else if (last_dc != NULL) { -- crm_debug("Unset DC. Was %s", crm_str(last_dc)); -+ crm_info("Unset DC. Was %s", crm_str(last_dc)); - } - - free(last_dc); -@@ -1018,12 +1019,36 @@ erase_status_tag(const char *uname, const char *tag, int options) - - crm_ipc_t *attrd_ipc = NULL; - -+static int -+update_without_attrd(const char *host_uuid, const char *name, const char *value, const char *user_name) -+{ -+ if (fsa_cib_conn == NULL) { -+ return -1; -+ } -+ -+ crm_trace("updating status for host_uuid %s, %s=%s", host_uuid, name ? name : "", value ? value : ""); -+ return update_attr_delegate(fsa_cib_conn, cib_none, XML_CIB_TAG_STATUS, host_uuid, NULL, NULL, -+ NULL, name, value, FALSE, user_name); -+} -+ - void --update_attrd(const char *host, const char *name, const char *value, const char *user_name) -+update_attrd(const char *host, const char *name, const char *value, const char *user_name, gboolean is_remote_node) - { - gboolean rc; - int max = 5; - -+ /* TODO eventually we will want to update/replace the attrd with -+ * something that can handle remote nodes as well as cluster nodes */ -+ if (is_remote_node) { -+ /* host is required for updating a remote node */ -+ CRM_CHECK(host != NULL, return;); -+ /* remote node uname and uuid are equal */ -+ if (update_without_attrd(host, name, value, user_name) < pcmk_ok) { -+ crm_err("Could not update attribute %s for remote-node %s", name, host); -+ } -+ return; -+ } -+ - if (attrd_ipc == NULL) { - attrd_ipc = crm_ipc_new(T_ATTRD, 0); - } -@@ -1037,10 +1062,10 @@ update_attrd(const char *host, const char *name, const char *value, const char * - - rc = attrd_update_delegate(attrd_ipc, 'U', host, name, value, XML_CIB_TAG_STATUS, NULL, - NULL, user_name); -- if (rc > 0) { -+ if (rc == pcmk_ok) { - break; - -- } else if (rc != -EAGAIN && rc != -EREMOTEIO) { -+ } else if (rc != -EAGAIN && rc != -EALREADY) { - crm_info("Disconnecting from attrd: %s (%d)", pcmk_strerror(rc), rc); - crm_ipc_close(attrd_ipc); - } -@@ -1049,7 +1074,7 @@ update_attrd(const char *host, const char *name, const char *value, const char * - - } while (max--); - -- if (rc < 0) { -+ if (rc != pcmk_ok) { - if (name) { - crm_err("Could not send attrd %s update%s: %s (%d)", - name, is_set(fsa_input_register, R_SHUTDOWN) ? " at shutdown" : "", -diff --git a/doc/Clusters_from_Scratch/en-US/Revision_History.xml b/doc/Clusters_from_Scratch/en-US/Revision_History.xml -index 59e961f..19dd319 100644 ---- a/doc/Clusters_from_Scratch/en-US/Revision_History.xml -+++ b/doc/Clusters_from_Scratch/en-US/Revision_History.xml -@@ -8,43 +8,43 @@ - - - -- 1 -+ 1-0 - Mon May 17 2010 - AndrewBeekhofandrew@beekhof.net - Import from Pages.app - - -- 2 -+ 2-0 - Wed Sep 22 2010 - RaoulScarazzinirasca@miamammausalinux.org - Italian translation - - -- 3 -+ 3-0 - Wed Feb 9 2011 - AndrewBeekhofandrew@beekhof.net - Updated for Fedora 13 - - -- 4 -+ 4-0 - Wed Oct 5 2011 - AndrewBeekhofandrew@beekhof.net - Update the GFS2 section to use CMAN - - -- 5 -+ 5-0 - Fri Feb 10 2012 - AndrewBeekhofandrew@beekhof.net - Generate docbook content from asciidoc sources - - -- 6 -+ 6-0 - Tues July 3 2012 - AndrewBeekhofandrew@beekhof.net - Updated for Fedora 17 - - -- 7 -+ 7-0 - Fri Sept 14 2012 - DavidVosseldvossel@redhat.com - Updated for pcs -diff --git a/doc/Makefile.am b/doc/Makefile.am -index 1661df6..663315e 100644 ---- a/doc/Makefile.am -+++ b/doc/Makefile.am -@@ -7,12 +7,12 @@ - # modify it under the terms of the GNU General Public License - # as published by the Free Software Foundation; either version 2 - # of the License, or (at your option) any later version. --# -+# - # This program is distributed in the hope that it will be useful, - # but WITHOUT ANY WARRANTY; without even the implied warranty of - # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - # GNU General Public License for more details. --# -+# - # You should have received a copy of the GNU General Public License - # along with this program; if not, write to the Free Software - # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -@@ -22,12 +22,12 @@ MAINTAINERCLEANFILES = Makefile.in - helpdir = $(datadir)/$(PACKAGE) - - ascii = crm_fencing.txt acls.txt --docbook = Pacemaker_Explained Clusters_from_Scratch -+docbook = Pacemaker_Explained Clusters_from_Scratch Pacemaker_Remote - doc_DATA = README.hb2openais $(ascii) $(generated_docs) - - publican_docs = --generated_docs = --generated_mans = -+generated_docs = -+generated_mans = - - DOCBOOK_FORMATS := html-desktop - DOCBOOK_LANGS := en-US -@@ -35,8 +35,10 @@ DOTs = $(wildcard */en-US/images/*.dot) - SVG = $(wildcard */en-US/images/pcmk-*.svg) $(DOTs:%.dot=%.svg) - - PNGS = $(SVG:%.svg=%-small.png) $(SVG:%.svg=%.png) $(SVG:%.svg=%-large.png) \ -- Pacemaker_Explained/en-US/images/Policy-Engine-big.png -- Pacemaker_Explained/en-US/images/Policy-Engine-small.png -+ Pacemaker_Explained/en-US/images/Policy-Engine-big.png \ -+ Pacemaker_Explained/en-US/images/Policy-Engine-small.png \ -+ Pacemaker_Remote/en-US/images/pcmk-ha-cluster-stack.png \ -+ Pacemaker_Remote/en-US/images/pcmk-ha-remote-stack.png - - BRAND_PNGS = publican-clusterlabs/en-US/images/title_logo.png \ - publican-clusterlabs/en-US/images/image_left.png \ -@@ -62,7 +64,7 @@ publican_docs += $(docbook) - endif - endif - --EXTRA_DIST = $(docbook:%=%.xml) -+EXTRA_DIST = $(docbook:%=%.xml) - - %.html: %.txt - $(ASCIIDOC) --unsafe --backend=xhtml11 $< -@@ -80,11 +82,11 @@ EXTRA_DIST = $(docbook:%=%.xml) - sed -i.sed 's///' $@ - sed -i.sed 's/ //' $@ # Fix line endings - sed -i.sed 's/\ lang="en"//' $@ # Never specify a language in the chapters -- sed -i.sed 's/simpara/para/g' $@ # publican doesn't correctly render footnotes with simpara -+ sed -i.sed 's/simpara/para/g' $@ # publican doesn't correctly render footnotes with simpara - sed -i.sed 's/.*.*//g' $@ # Remove dangling tag - sed -i.sed 's/.*preface>//g' $@ # Remove preface elements - sed -i.sed 's:::g' $@ # Remove empty title -- sed -i.sed 's/chapter/section/g' $@ # Chapters become sections, so that books can become chapters -+ sed -i.sed 's/chapter/section/g' $@ # Chapters become sections, so that books can become chapters - sed -i.sed 's/<.*bookinfo.*>//g' $@ # Strip out bookinfo, we don't need it - -grep -qis "//' $@ # We just want the appendix tag - -grep -vqis "/chapter>/g' $@ # Rename to chapter -@@ -94,7 +96,7 @@ CFS_TXT=$(wildcard Clusters_from_Scratch/en-US/*.txt) - CFS_XML=$(CFS_TXT:%.txt=%.xml) - - # We have to hardcode the book name --# With '%' the test for 'newness' fails -+# With '%' the test for 'newness' fails - Clusters_from_Scratch.build: $(PNGS) $(wildcard Clusters_from_Scratch/en-US/*.xml) $(CFS_XML) - @echo Building $(@:%.build=%) because of $? - rm -rf $(@:%.build=%)/publish/* -@@ -106,7 +108,7 @@ PE_TXT=$(wildcard Pacemaker_Explained/en-US/*.txt) - PE_XML=$(PE_TXT:%.txt=%.xml) - - # We have to hardcode the book name --# With '%' the test for 'newness' fails -+# With '%' the test for 'newness' fails - Pacemaker_Explained.build: $(PNGS) $(wildcard Pacemaker_Explained/en-US/*.xml) $(PE_XML) - @echo Building $(@:%.build=%) because of $? - rm -rf $(@:%.build=%)/publish/* -@@ -114,6 +116,19 @@ Pacemaker_Explained.build: $(PNGS) $(wildcard Pacemaker_Explained/en-US/*.xml) $ - rm -rf $(@:%.build=%)/tmp - touch $@ - -+ -+PR_TXT=$(wildcard Pacemaker_Remote/en-US/*.txt) -+PR_XML=$(PR_TXT:%.txt=%.xml) -+ -+# We have to hardcode the book name -+# With '%' the test for 'newness' fails -+Pacemaker_Remote.build: $(PNGS) $(wildcard Pacemaker_Remote/en-US/*.xml) $(PR_XML) -+ @echo Building $(@:%.build=%) because of $? -+ rm -rf $(@:%.build=%)/publish/* -+ cd $(@:%.build=%) && RPM_BUILD_DIR="" $(PUBLICAN) build --publish --langs=$(DOCBOOK_LANGS) --formats=$(DOCBOOK_FORMATS) -+ rm -rf $(@:%.build=%)/tmp -+ touch $@ -+ - # Update the translation template - pot: - for book in $(docbook); do \ -@@ -161,7 +176,7 @@ pdf: - make DOCBOOK_FORMATS="pdf" ASCIIDOC_CLI_TYPE=$(ASCIIDOC_CLI_TYPE) all-local - - # Make sure www-(pcs|crmsh) happen in serial --www: -+www: - make www-pcs - make www-crmsh - make $(generated_docs) $(ascii) -@@ -183,7 +198,6 @@ www-cli: - if BUILD_DOCBOOK - for book in $(docbook); do \ - echo Uploading $$book...; \ -- echo "Requires Corosync 2.x and optimized for the $(ASCIIDOC_CLI_TYPE) CLI
" > $$book/publish/build-$(PACKAGE_SERIES)-$(ASCIIDOC_CLI_TYPE).txt; \ - echo "Generated on `date` from version: $(shell git log --pretty="format:%h %d" -n 1)" >> $$book/publish/build-$(PACKAGE_SERIES)-$(ASCIIDOC_CLI_TYPE).txt; \ - for lang in `ls -1 $$book/publish | grep [a-z][a-z]-[A-Z][A-Z]`; do \ - mv $$book/publish/$$lang/Pacemaker/$(PACKAGE_SERIES)-$(ASCIIDOC_CLI_TYPE)/epub/$$book/Pacemaker-1.1{-$(ASCIIDOC_CLI_TYPE),}-$$book-$$lang.epub; \ -@@ -195,7 +209,7 @@ if BUILD_DOCBOOK - endif - - clean-local: -- -rm -rf $(generated_docs) $(generated_mans) $(docbook_build) $(CFS_XML) $(PE_XML) -+ -rm -rf $(generated_docs) $(generated_mans) $(docbook_build) $(CFS_XML) $(PE_XML) $(PR_XML) - for book in $(docbook); do rm -rf $$book/tmp $$book/publish; done - - foo: -diff --git a/doc/Pacemaker_Explained/en-US/Ch-Constraints.txt b/doc/Pacemaker_Explained/en-US/Ch-Constraints.txt -index e199bf5..2acb9fe 100644 ---- a/doc/Pacemaker_Explained/en-US/Ch-Constraints.txt -+++ b/doc/Pacemaker_Explained/en-US/Ch-Constraints.txt -@@ -484,6 +484,49 @@ limit to the number of sets that can be specified. - .Visual representation of the start order for the three sets defined above - image::images/three-sets.png["Three ordered sets",width="16cm",height="7.5cm",align="center"] - -+ -+== Resource Set OR Logic == -+ -+The unordered set logic discussed so far has all been "AND" logic. -+To illustrate this take the 3 resource set figure in the previous section. -+Those sets can be expressed, +(A and B) then (C) then (D) then (E and F)+ -+ -+Say for example we want change the first set, (A and B), to use "OR" logic -+so the sets look like this, +(A or B) then (C) then (D) then (E and F)+. -+This functionality can be achieved through the use of the +require-all+ -+option. By default this option is 'require-all=true' which is why the -+"AND" logic is used by default. Changing +require-all=false+ means only one -+resource in the set needs to be started before continuing on to the next set. -+ -+Note that the 'require-all=false' option only makes sense to use in conjunction -+with unordered sets, 'sequential=false'. Think of it like this, 'sequential=false' -+modifies the set to be an unordered set that uses "AND" logic by default, by adding -+'require-all=false' the unordered set's "AND" logic is flipped to "OR" logic. -+ -+.Resource Set "OR" logic. Three ordered sets, where the first set is internally unordered with "OR" logic. -+====== -+[source,XML] -+------- -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+------- -+====== -+ -+ - [[s-resource-sets-collocation]] - == Collocating Sets of Resources == - -diff --git a/doc/Pacemaker_Explained/en-US/Ch-Options.txt b/doc/Pacemaker_Explained/en-US/Ch-Options.txt -index 0f46bbd..7b7d2db 100644 ---- a/doc/Pacemaker_Explained/en-US/Ch-Options.txt -+++ b/doc/Pacemaker_Explained/en-US/Ch-Options.txt -@@ -7,7 +7,7 @@ with the rest of cluster options is simply a matter of parsing. These - options are used by the configuration database which is, by design, - mostly ignorant of the content it holds. So the decision was made to - place them in an easy to find location. -- -+ - == Configuration Version == - - indexterm:[Configuration Version,Cluster] -@@ -34,7 +34,7 @@ _Never set this value to zero_, in such cases the cluster cannot tell - the difference between your configuration and the "empty" one used - when nothing is found on disk. - --| epoch | -+| epoch | - indexterm:[epoch,Cluster Option] - indexterm:[Cluster,Option,epoch] - Incremented every time the configuration is updated (usually by the admin) -@@ -43,7 +43,7 @@ Incremented every time the configuration is updated (usually by the admin) - indexterm:[num_updates,Cluster Option] - indexterm:[Cluster,Option,num_updates] - Incremented every time the configuration or status is updated (usually by the cluster) -- -+ - |========================================================= - - == Other Fields == -@@ -51,7 +51,7 @@ Incremented every time the configuration or status is updated (usually by the cl - [width="95%",cols="2m,5<",options="header",align="center"] - |========================================================= - |Field |Description -- -+ - | validate-with | - indexterm:[validate-with,Cluster Option] - indexterm:[Cluster,Option,validate-with] -@@ -59,7 +59,7 @@ Determines the type of validation being done on the configuration. If - set to "none", the cluster will not verify that updates conform to the - DTD (nor reject ones that don't). This option can be useful when - operating a mixed version cluster during an upgrade. -- -+ - |========================================================= - - == Fields Maintained by the Cluster == -@@ -69,7 +69,7 @@ operating a mixed version cluster during an upgrade. - |========================================================= - |Field |Description - --|cib-last-written | -+|cib-last-written | - indexterm:[cib-last-written,Cluster Property] - indexterm:[Cluster,Property,cib-last-written] - Indicates when the configuration was last written to disk. Informational purposes only. -@@ -87,7 +87,28 @@ indexterm:[Cluster,Property,have-quorum] - Indicates if the cluster has quorum. If false, this may mean that the - cluster cannot start resources or fence other nodes. See - +no-quorum-policy+ below. -- -+ -+| dc-version | -+indexterm:[dc-version,Cluster Peroperty] -+indexterm:[Cluster,Peroperty,dc-version] -+Version of Pacemaker on the cluster's DC. -+ -+Often includes the hash which identifies the exact Git changeset it -+was built from. Used for diagnostic purposes. -+ -+| cluster-infrastructure | -+indexterm:[cluster-infrastructure,Cluster Peroperty] -+indexterm:[Cluster,Peroperty,cluster-infrastructure] -+The messaging stack on which Pacemaker is currently running. -+Used for informational and diagnostic purposes. -+ -+| expected-quorum-votes | -+indexterm:[expected-quorum-votes,Cluster Peroperty] -+indexterm:[Cluster,Peroperty,expected-quorum-votes] -+The number of nodes expected to be in the cluster -+ -+Used to calculate quorum in Corosync 1.x (not CMAN) based clusters. -+ - |========================================================= - - Note that although these fields can be written to by the admin, in -@@ -130,7 +151,7 @@ For now we will describe the simple case where each option is present at most on - [width="95%",cols="5m,2,11<",options="header",align="center"] - |========================================================= - |Option |Default |Description -- -+ - | batch-limit | 30 | - indexterm:[batch-limit,Cluster Option] - indexterm:[Cluster,Option,batch-limit] -@@ -157,7 +178,7 @@ What to do when the cluster does not have quorum. Allowed values: - - * suicide - fence all nodes in the affected cluster partition - --| symmetric-cluster | TRUE | -+| symmetric-cluster | TRUE | - indexterm:[symmetric-cluster,Cluster Option] - indexterm:[Cluster,Option,symmetric-cluster] - Can all resources run on any node by default? -@@ -170,7 +191,7 @@ shot? If you value your data, set up a STONITH device and enable this. - - If true, or unset, the cluster will refuse to start resources unless - one or more STONITH resources have been configured also. -- -+ - | stonith-action | reboot | - indexterm:[stonith-action,Cluster Option] - indexterm:[Cluster,Option,stonith-action] -@@ -185,28 +206,33 @@ Round trip delay over the network (excluding action execution). The - "correct" value will depend on the speed and load of your network and - cluster nodes. - --| stop-orphan-resources | TRUE | -+| stop-all-resources | FALSE | -+indexterm:[stop-all-resources,Cluster Option] -+indexterm:[Cluster,Option,stop-all-resources] -+Should the cluster stop all stop -+ -+| resources-orphan-resources | TRUE | - indexterm:[stop-orphan-resources,Cluster Option] - indexterm:[Cluster,Option,stop-orphan-resources] - Should deleted resources be stopped? - --| stop-orphan-actions | TRUE | -+| stop-orphan-actions | TRUE | - indexterm:[stop-orphan-actions,Cluster Option] - indexterm:[Cluster,Option,stop-orphan-actions] - Should deleted actions be cancelled? - --| start-failure-is-fatal | TRUE | -+| start-failure-is-fatal | TRUE | - indexterm:[start-failure-is-fatal,Cluster Option] - indexterm:[Cluster,Option,start-failure-is-fatal] - When set to FALSE, the cluster will instead use the resource's - +failcount+ and value for +resource-failure-stickiness+. - --| pe-error-series-max | -1 (all) | -+| pe-error-series-max | -1 (all) | - indexterm:[pe-error-series-max,Cluster Option] - indexterm:[Cluster,Option,pe-error-series-max] - The number of PE inputs resulting in ERRORs to save. Used when reporting problems. - --| pe-warn-series-max | -1 (all) | -+| pe-warn-series-max | -1 (all) | - indexterm:[pe-warn-series-max,Cluster Option] - indexterm:[Cluster,Option,pe-warn-series-max] - The number of PE inputs resulting in WARNINGs to save. Used when reporting problems. -@@ -215,12 +241,87 @@ The number of PE inputs resulting in WARNINGs to save. Used when reporting probl - indexterm:[pe-input-series-max,Cluster Option] - indexterm:[Cluster,Option,pe-input-series-max] - The number of "normal" PE inputs to save. Used when reporting problems. -- -+ -+|default-resource-stickiness | 0 | -+indexterm:[default-resource-stickiness,Cluster Option] -+indexterm:[Cluster,Option,default-resource-stickiness] -++Deprecated:+ See <> instead -+ -+| is-managed-default | TRUE | -+indexterm:[is-managed-default,Cluster Option] -+indexterm:[Cluster,Option,is-managed-default] -++Deprecated:+ See <> instead -+ -+| maintenance-mode | FALSE | -+indexterm:[maintenance-mode,Cluster Option] -+indexterm:[Cluster,Option,maintenance-mode] -+Should the cluster monitor resources and start/stop them as required -+ -+| stonith-timeout | 60s | -+indexterm:[stonith-timeout,Cluster Option] -+indexterm:[Cluster,Option,stonith-timeout] -+How long to wait for the STONITH action to complete -+ -+| default-action-timeout | 20s | -+indexterm:[default-action-timeout,Cluster Option] -+indexterm:[Cluster,Option,default-action-timeout] -++Deprecated:+ See <> instead -+ -+| dc-deadtime | 20s | -+indexterm:[dc-deadtime,Cluster Option] -+indexterm:[Cluster,Option,dc-deadtime] -+How long to wait for a response from other nodes during startup. -+ -+The "correct" value will depend on the speed/load of your network and the type of switches used. -+ -+| cluster-recheck-interval | 15min | -+indexterm:[cluster-recheck-interval,Cluster Option] -+indexterm:[Cluster,Option,cluster-recheck-interval] -+Polling interval for time based changes to options, resource parameters and constraints. -+ -+The Cluster is primarily event driven, however the configuration can have elements that change based on time. To ensure these changes take effect, we can optionally poll the cluster's status for changes. -+ -+Allowed values: Zero disables polling. Positive values are an interval in seconds (unless other SI units are specified. eg. 5min) -+ -+| election-timeout | 2min | -+indexterm:[election-timeout,Cluster Option] -+indexterm:[Cluster,Option,election-timeout] -++Advanced Use Only+ -+ -+If need to adjust this value, it probably indicates the presence of a bug. -+ -+| shutdown-escalation | 20min | -+indexterm:[shutdown-escalation,Cluster Option] -+indexterm:[Cluster,Option,shutdown-escalation] -++Advanced Use Only+ -+ -+If need to adjust this value, it probably indicates the presence of a bug. -+ -+| crmd-integration-timeout | 3min | -+indexterm:[crmd-integration-timeout,Cluster Option] -+indexterm:[Cluster,Option,crmd-integration-timeout] -++Advanced Use Only+ -+ -+If need to adjust this value, it probably indicates the presence of a bug. -+ -+| crmd-finalization-timeout | 30min | -+indexterm:[crmd-finalization-timeout,Cluster Option] -+indexterm:[Cluster,Option,crmd-finalization-timeout] -++Advanced Use Only+ -+ -+If need to adjust this value, it probably indicates the presence of a bug. -+ -+| crmd-transition-delay | | -+indexterm:[crmd-transition-delay,Cluster Option] -+indexterm:[Cluster,Option,crmd-transition-delay] -++Advanced Use Only+ Enabling this option will slow down cluster recovery under all conditions. -+ -+Delay cluster recovery for the configured interval to allow for additional/related events to occur. Useful if your configuration is sensitive to the order in which ping updates arrive. -+ - |========================================================= - - You can always obtain an up-to-date list of cluster options, including --their default values, by running the `pengine --metadata` command. -+their default values, by running the `man pengine` and `man crmd` commands. - - == Querying and Setting Cluster Options == - -diff --git a/doc/Pacemaker_Explained/en-US/Ch-Resources.txt b/doc/Pacemaker_Explained/en-US/Ch-Resources.txt -index 8eacb05..3436bf8 100644 ---- a/doc/Pacemaker_Explained/en-US/Ch-Resources.txt -+++ b/doc/Pacemaker_Explained/en-US/Ch-Resources.txt -@@ -12,7 +12,7 @@ The cluster doesn't need to understand how the resource works because - it relies on the resource agent to do the right thing when given a - +start+, +stop+ or +monitor+ command. - --For this reason it is crucial that resource agents are well tested. -+For this reason it is crucial that resource agents are well tested. - - Typically resource agents come in the form of shell scripts, however - they can be written using any technology (such as C, Python or Perl) -@@ -23,7 +23,7 @@ that the author is comfortable with. - - indexterm:[Resource,class] - --There are five classes of agents supported by Pacemaker: -+There are six classes of agents supported by Pacemaker: - - * OCF - * LSB -@@ -31,6 +31,7 @@ There are five classes of agents supported by Pacemaker: - * Systemd - * Fencing - * Service -+* Nagios - - indexterm:[Resource,Heartbeat] - indexterm:[Heartbeat,Resources] -@@ -83,7 +84,7 @@ of as ip it will be passed to the script as +OCF_RESKEY_ip+. The - number and purpose of the parameters is completely arbitrary, however - your script should advertise any that it supports using the - +meta-data+ command. -- -+ - - The OCF class is the most preferred one as it is an industry standard, - highly flexible (allowing parameters to be passed to agents in a -@@ -183,6 +184,23 @@ There is also an additional class, STONITH, which is used exclusively - for fencing related resources. This is discussed later in - <>. - -+=== Nagios Plugins === -+indexterm:[Resource,Nagios Plugins] -+indexterm:[Nagios Plugins,Resources] -+ -+Nagios plugins allow us to monitor services on the remote hosts. -+http://nagiosplugins.org[Nagios Plugins]. -+ -+Pacemaker is able to do remote monitoring with the plugins _if they are -+present_. -+ -+An use case is to configure them as resources belonging to a resource -+container, which usually is a VM, and the container will be restarted -+if any of them has failed. While they can also be configured as ordinary -+resources to be just used for monitoring hosts or services via network. -+ -+The supported parameters are same as the long options of a nagios plugin. -+ - [[primitive-resource]] - == Resource Properties == - -@@ -235,7 +253,7 @@ might produce: - - ===== - --[NOTE] -+[NOTE] - ===== - One of the main drawbacks to system services (such as LSB, Systemd and - Upstart) resources is that they do not allow any parameters! -@@ -267,7 +285,7 @@ behave and can be easily set using the `--meta` option of the - |Field - |Default - |Description -- -+ - |priority - |+0+ - |If not all resources can be active, the cluster will stop lower -@@ -356,6 +374,22 @@ indexterm:[Resource,Option,target-role] - indexterm:[multiple-active,Resource Option] - indexterm:[Resource,Option,multiple-active] - -+|remote-node -+|++ (disabled) -+|The name of the remote-node this resource defines. This both enables the resource as a remote-node and defines the unique name used to identify the remote-node. If no other parameters are set, this value will also be assumed as the hostname to connect to at port 3121. +WARNING+ This value cannot overlap with any resource or node IDs. -+ -+|remote-port -+|+3121+ -+|Configure a custom port to use for the guest connection to pacemaker_remote. -+ -+|remote-addr -+|+remote-node+ value used as hostname -+|The ip address or hostname to connect to if remote-node's name is not the hostname of the guest. -+ -+|+remote-connect-timeout+ -+|+60s+ -+|How long before a pending guest connection will time out. -+ - |========================================================= - - If you performed the following commands on the previous LSB Email resource -@@ -428,7 +462,7 @@ The list of instance attributes supported by an OCF script can be - found by calling the resource script with the `meta-data` command. - The output contains an XML description of all the supported - attributes, their purpose and default values. -- -+ - .Displaying the metadata for the Dummy resource agent template - ===== - [source,C] -@@ -442,14 +476,14 @@ attributes, their purpose and default values. - - - 1.0 -- -+ - - This is a Dummy Resource Agent. It does absolutely nothing except - keep track of whether its running or not. - Its purpose in life is for testing and to serve as a template for RA writers. - - Dummy resource agent -- -+ - - - -@@ -458,7 +492,7 @@ attributes, their purpose and default values. - State file - - -- -+ - - - Dummy attribute that can be changed to cause a reload -@@ -467,7 +501,7 @@ attributes, their purpose and default values. - - - -- -+ - - - -@@ -491,7 +525,7 @@ indexterm:[Resource,Action] - By default, the cluster will not ensure your resources are still - healthy. To instruct the cluster to do this, you need to add a - +monitor+ operation to the resource's definition. -- -+ - .An OCF resource with a recurring health check - ===== - [source,XML] -@@ -575,7 +609,7 @@ To set a default value for a operation option, simply add it to the - would default each operation's +timeout+ to 20 seconds. If an - operation's definition also includes a value for +timeout+, then that - value would be used instead (for that operation only). -- -+ - ==== When Resources Take a Long Time to Start/Stop ==== - - There are a number of implicit operations that the cluster will always -@@ -584,7 +618,7 @@ perform - +start+, +stop+ and a non-recurring +monitor+ operation - of these is taking too long, then you can create an entry for them and - simply specify a new value. - --.An OCF resource with custom timeouts for its implicit actions -+.An OCF resource with custom timeouts for its implicit actions - ===== - [source,XML] - ------- -@@ -613,11 +647,11 @@ provide each monitor with a different value for a common parameter. - The OCF standard creates a special parameter called +OCF_CHECK_LEVEL+ - for this purpose and dictates that it is _"made available to the - resource agent without the normal +OCF_RESKEY+ prefix"_. -- -+ - Whatever name you choose, you can specify it by adding an - +instance_attributes+ block to the op tag. Note that it is up to each - resource agent to look for the parameter and decide how to use it. -- -+ - .An OCF resource with two recurring health checks, performing different levels of checks - specified via +OCF_CHECK_LEVEL+. - ===== - [source,XML] -@@ -649,7 +683,7 @@ However, there can be times when you only want to disable it - temporarily. In such cases, simply add +enabled="false"+ to the - operation's definition. - --.Example of an OCF resource with a disabled health check -+.Example of an OCF resource with a disabled health check - ===== - [source,XML] - ------- -diff --git a/doc/Pacemaker_Explained/en-US/Ch-Stonith.txt b/doc/Pacemaker_Explained/en-US/Ch-Stonith.txt -index 1df1b9f..f6108a1 100644 ---- a/doc/Pacemaker_Explained/en-US/Ch-Stonith.txt -+++ b/doc/Pacemaker_Explained/en-US/Ch-Stonith.txt -@@ -1,6 +1,6 @@ - = Configure STONITH = - --//// -+//// - We prefer [[ch-stonith]], but older versions of asciidoc dont deal well - with that construct for chapter headings - //// -@@ -9,7 +9,6 @@ indexterm:[STONITH, Configuration] - - == What Is STONITH == - -- - STONITH is an acronym for Shoot-The-Other-Node-In-The-Head and it - protects your data from being corrupted by rogue nodes or concurrent - access. -@@ -39,38 +38,241 @@ from a network fault. - Likewise, any device that relies on the machine being active (such as - SSH-based "devices" used during testing) are inappropriate. - --== Configuring STONITH == -+== Differences of STONITH Resources == -+ -+Stonith resources are somewhat special in Pacemaker. -+ -+In previous versions, only "running" resources could be used by -+Pacemaker for fencing. This requirement has been relaxed to allow -+other parts of the cluster (such as resources like DRBD) to reliably -+initiate fencing. footnote:[Fencing a node while Pacemaker was moving -+stonith resources around would otherwise fail] -+ -+Now all nodes have access to their definitions and instantiate them -+on-the-fly when needed, however preference is given to 'verified' -+instances which are the ones the cluster has explicitly started. -+ -+In the case of a cluster split, the partition with a verified instance -+will have a slight advantage as stonith-ng in the other partition will -+have to hear from all its current peers before choosing a node to -+perform the fencing. -+ -+[NOTE] -+=========== -+To disable a fencing device/resource, 'target-role' can be set as you would for a normal resource. -+=========== -+ -+[NOTE] -+=========== -+To prevent a specific node from using a fencing device, location constraints will work as expected. -+=========== -+ -+[IMPORTANT] -+=========== -+ -+Currently there is a limitation that fencing resources may only have a -+one set of meta-attributes and one set of instance-attributes. This -+can be revisited if it becomes a significant limitation for people. -+ -+=========== -+ -+.Properties of Fencing Devices -+[width="95%",cols="1m,1m,1m,5<",options="header",align="center"] -+|========================================================= -+ -+|Field -+|Type -+|Default -+|Description -+ -+|stonith-timeout -+|time -+|60s -+|How long to wait for the STONITH action to complete per a stonith device. -+ Overrides the stonith-timeout cluster property -+ indexterm:[stonith-timeout,Fencing] -+ indexterm:[Fencing,Property,stonith-timeout] -+ -+|priority -+|integer -+|0 -+|The priority of the stonith resource. Devices are tried in order of highest priority to lowest. -+ indexterm:[priority,Fencing] -+ indexterm:[Fencing,Property,priority] -+ -+|pcmk_host_argument -+|string -+|port -+|Advanced use only: An alternate parameter to supply instead of 'port' -+ Some devices do not support the standard 'port' parameter or may provide additional ones. Use this to specify an alternate, device-specific, parameter that should indicate the machine to be fenced. A value of 'none' can be used to tell the cluster not to supply any additional parameters. -+ indexterm:[pcmk_host_argument,Fencing] -+ indexterm:[Fencing,Property,pcmk_host_argument] -+ -+|pcmk_host_map -+|string -+| -+|A mapping of host names to ports numbers for devices that do not support host names. -+ Eg. node1:1;node2:2,3 would tell the cluster to use port 1 for node1 and ports 2 and 3 for node2 -+ indexterm:[pcmk_host_map,Fencing] -+ indexterm:[Fencing,Property,pcmk_host_map] -+ -+|pcmk_host_list -+|string -+| -+|A list of machines controlled by this device (Optional unless pcmk_host_check=static-list). -+ indexterm:[pcmk_host_list,Fencing] -+ indexterm:[Fencing,Property,pcmk_host_list] -+ -+|pcmk_host_check -+|string -+|dynamic-list -+|How to determin which machines are controlled by the device. -+ Allowed values: dynamic-list (query the device), static-list (check the pcmk_host_list attribute), none (assume every device can fence every machine) -+ indexterm:[pcmk_host_check,Fencing] -+ indexterm:[Fencing,Property,pcmk_host_check] -+ -+|pcmk_reboot_action -+|string -+|reboot -+|Advanced use only: An alternate command to run instead of 'reboot' -+ Some devices do not support the standard commands or may provide additional ones. Use this to specify an alternate, device-specific, command that implements the 'reboot' action. -+ indexterm:[pcmk_reboot_action,Fencing] -+ indexterm:[Fencing,Property,pcmk_reboot_action] -+ -+|pcmk_reboot_timeout -+|time -+|60s -+|Advanced use only: Specify an alternate timeout to use for reboot actions instead of stonith-timeout -+ Some devices need much more/less time to complete than normal. Use this to specify an alternate, device-specific, timeout for 'reboot' actions. -+ indexterm:[pcmk_reboot_timeout,Fencing] -+ indexterm:[Fencing,Property,pcmk_reboot_timeout] -+ -+|pcmk_reboot_retries -+|integer -+|2 -+|Advanced use only: The maximum number of times to retry the 'reboot' command within the timeout period -+ Some devices do not support multiple connections. Operations may 'fail' if the device is busy with another task so Pacemaker will automatically retry the operation, if there is time remaining. Use this option to alter the number of times Pacemaker retries 'reboot' actions before giving up. -+ indexterm:[pcmk_reboot_retries,Fencing] -+ indexterm:[Fencing,Property,pcmk_reboot_retries] -+ -+|pcmk_off_action -+|string -+|off -+|Advanced use only: An alternate command to run instead of 'off' -+ Some devices do not support the standard commands or may provide additional ones. Use this to specify an alternate, device-specific, command that implements the 'off' action. -+ indexterm:[pcmk_off_action,Fencing] -+ indexterm:[Fencing,Property,pcmk_off_action] -+ -+|pcmk_off_timeout -+|time -+|60s -+|Advanced use only: Specify an alternate timeout to use for off actions instead of stonith-timeout -+ Some devices need much more/less time to complete than normal. Use this to specify an alternate, device-specific, timeout for 'off' actions. -+ indexterm:[pcmk_off_timeout,Fencing] -+ indexterm:[Fencing,Property,pcmk_off_timeout] -+ -+|pcmk_off_retries -+|integer -+|2 -+|Advanced use only: The maximum number of times to retry the 'off' command within the timeout period -+ Some devices do not support multiple connections. Operations may 'fail' if the device is busy with another task so Pacemaker will automatically retry the operation, if there is time remaining. Use this option to alter the number of times Pacemaker retries 'off' actions before giving up. -+ indexterm:[pcmk_off_retries,Fencing] -+ indexterm:[Fencing,Property,pcmk_off_retries] -+ -+|pcmk_list_action -+|string -+|list -+|Advanced use only: An alternate command to run instead of 'list' -+ Some devices do not support the standard commands or may provide additional ones. Use this to specify an alternate, device-specific, command that implements the 'list' action. -+ indexterm:[pcmk_list_action,Fencing] -+ indexterm:[Fencing,Property,pcmk_list_action] -+ -+|pcmk_list_timeout -+|time -+|60s -+|Advanced use only: Specify an alternate timeout to use for list actions instead of stonith-timeout -+ Some devices need much more/less time to complete than normal. Use this to specify an alternate, device-specific, timeout for 'list' actions. -+ indexterm:[pcmk_list_timeout,Fencing] -+ indexterm:[Fencing,Property,pcmk_list_timeout] -+ -+|pcmk_list_retries -+|integer -+|2 -+|Advanced use only: The maximum number of times to retry the 'list' command within the timeout period -+ Some devices do not support multiple connections. Operations may 'fail' if the device is busy with another task so Pacemaker will automatically retry the operation, if there is time remaining. Use this option to alter the number of times Pacemaker retries 'list' actions before giving up. -+ indexterm:[pcmk_list_retries,Fencing] -+ indexterm:[Fencing,Property,pcmk_list_retries] -+ -+|pcmk_monitor_action -+|string -+|monitor -+|Advanced use only: An alternate command to run instead of 'monitor' -+ Some devices do not support the standard commands or may provide additional ones. Use this to specify an alternate, device-specific, command that implements the 'monitor' action. -+ indexterm:[pcmk_monitor_action,Fencing] -+ indexterm:[Fencing,Property,pcmk_monitor_action] -+ -+|pcmk_monitor_timeout -+|time -+|60s -+|Advanced use only: Specify an alternate timeout to use for monitor actions instead of stonith-timeout -+ Some devices need much more/less time to complete than normal. Use this to specify an alternate, device-specific, timeout for 'monitor' actions. -+ indexterm:[pcmk_monitor_timeout,Fencing] -+ indexterm:[Fencing,Property,pcmk_monitor_timeout] -+ -+|pcmk_monitor_retries -+|integer -+|2 -+|Advanced use only: The maximum number of times to retry the 'monitor' command within the timeout period -+ Some devices do not support multiple connections. Operations may 'fail' if the device is busy with another task so Pacemaker will automatically retry the operation, if there is time remaining. Use this option to alter the number of times Pacemaker retries 'monitor' actions before giving up. -+ indexterm:[pcmk_monitor_retries,Fencing] -+ indexterm:[Fencing,Property,pcmk_monitor_retries] -+ -+|pcmk_status_action -+|string -+|status -+|Advanced use only: An alternate command to run instead of 'status' -+ Some devices do not support the standard commands or may provide additional ones. Use this to specify an alternate, device-specific, command that implements the 'status' action. -+ indexterm:[pcmk_status_action,Fencing] -+ indexterm:[Fencing,Property,pcmk_status_action] -+ -+|pcmk_status_timeout -+|time -+|60s -+|Advanced use only: Specify an alternate timeout to use for status actions instead of stonith-timeout -+ Some devices need much more/less time to complete than normal. Use this to specify an alternate, device-specific, timeout for 'status' actions. -+ indexterm:[pcmk_status_timeout,Fencing] -+ indexterm:[Fencing,Property,pcmk_status_timeout] -+ -+|pcmk_status_retries -+|integer -+|2 -+|Advanced use only: The maximum number of times to retry the 'status' command within the timeout period -+ Some devices do not support multiple connections. Operations may 'fail' if the device is busy with another task so Pacemaker will automatically retry the operation, if there is time remaining. Use this option to alter the number of times Pacemaker retries 'status' actions before giving up. -+ indexterm:[pcmk_status_retries,Fencing] -+ indexterm:[Fencing,Property,pcmk_status_retries] -+ -+|========================================================= - --ifdef::pcs[] --. Find the correct driver: +pcs stonith list+ -- --. Find the parameters associated with the device: +pcs stonith describe + -+== Configuring STONITH == - --. Create a local config to make changes to +pcs cluster cib stonith_cfg+ -+[NOTE] -+=========== - --. Create the fencing resource using +pcs -f stonith_cfg stonith create -- [stonith device options]+ -+Both configuration shells include functionality to simplify the -+process below, particularly the step for deciding which parameters are -+required. However since this document deals only with core -+components, you should refer to the Stonith chapter of +Clusters from -+Scratch+ for those details. - --. Set stonith-enable to true. +pcs -f stonith_cfg property set stonith-enabled=true+ --endif::pcs[] -+=========== - --ifdef::crmsh[] - . Find the correct driver: +stonith_admin --list-installed+ - --. Since every device is different, the parameters needed to configure -- it will vary. To find out the parameters associated with the device, -- run: +stonith_admin --metadata --agent type+ -+. Find the required parameters associated with the device: +stonith_admin --metadata --agent + - -- The output should be XML formatted text containing additional -- parameter descriptions. We will endevor to make the output more -- friendly in a later version. -- --. Enter the shell crm Create an editable copy of the existing -- configuration +cib new stonith+ Create a fencing resource containing a -- primitive resource with a class of stonith, a type of type and a -- parameter for each of the values returned in step 2: +configure -- primitive ...+ --endif::crmsh[] -+. Create a file called +stonith.xml+ containing a primitive resource -+ with a class of 'stonith', a type of and a parameter -+ for each of the values returned in step 2. - - . If the device does not know how to fence nodes based on their uname, - you may also need to set the special +pcmk_host_map+ parameter. See -@@ -84,19 +286,15 @@ endif::crmsh[] - port parameter, you may also need to set the special - +pcmk_host_argument+ parameter. See +man stonithd+ for details. - --ifdef::crmsh[] --. Upload it into the CIB from the shell: +cib commit stonith+ --endif::crmsh[] -+. Upload it into the CIB using cibadmin: +cibadmin -C -o resources --xml-file stonith.xml+ - --ifdef::pcs[] --. Commit the new configuration. +pcs cluster push cib stonith_cfg+ --endif::pcs[] -+. Set stonith-enabled to true. +crm_attribute -t crm_config -n stonith-enabled -v true+ - - . Once the stonith resource is running, you can test it by executing: - +stonith_admin --reboot nodename+. Although you might want to stop the - cluster on that machine first. - --== Example == -+=== Example === - - Assuming we have an chassis containing four nodes and an IPMI device - active on 10.0.0.1, then we would chose the fence_ipmilan driver in step -@@ -104,33 +302,11 @@ active on 10.0.0.1, then we would chose the fence_ipmilan driver in step - - .Obtaining a list of STONITH Parameters - --ifdef::pcs[] --[source,Bash] ------ --# pcs stonith describe fence_ipmilan --Stonith options for: fence_ipmilan -- auth: IPMI Lan Auth type (md5, password, or none) -- ipaddr: IPMI Lan IP to talk to -- passwd: Password (if required) to control power on IPMI device -- passwd_script: Script to retrieve password (if required) -- lanplus: Use Lanplus -- login: Username/Login (if required) to control power on IPMI device -- action: Operation to perform. Valid operations: on, off, reboot, status, list, diag, monitor or metadata -- timeout: Timeout (sec) for IPMI operation -- cipher: Ciphersuite to use (same as ipmitool -C parameter) -- method: Method to fence (onoff or cycle) -- power_wait: Wait X seconds after on/off operation -- delay: Wait X seconds before fencing is started -- privlvl: Privilege level on IPMI device -- verbose: Verbose mode ------ --endif::pcs[] -- --ifdef::crmsh[] - [source,C] - ---- - # stonith_admin --metadata -a fence_ipmilan - ---- -+ - [source,XML] - ---- - -@@ -218,97 +394,107 @@ To use fence_ipmilan with HP iLO 3 you have to enable lanplus option (lanplus / - - - ---- --endif::crmsh[] - - from which we would create a STONITH resource fragment that might look --like this -+like this: - - .Sample STONITH Resource --ifdef::pcs[] --[source,Bash] ------ --# pcs cluster cib stonith_cfg --# pcs -f stonith_cfg stonith create impi-fencing fence_ipmilan \ -- pcmk_host_list="pcmk-1 pcmk-2" ipaddr=10.0.0.1 login=testuser \ -- passwd=acd123 op monitor interval=60s --# pcs -f stonith_cfg stonith -- impi-fencing (stonith:fence_ipmilan) Stopped ------ --endif::pcs[] -- --ifdef::crmsh[] --[source,Bash] -+[source,XML] - ---- --# crm crm(live)# cib new stonith --INFO: stonith shadow CIB created --crm(stonith)# configure primitive impi-fencing stonith::fence_ipmilan \ -- params pcmk_host_list="pcmk-1 pcmk-2" ipaddr=10.0.0.1 login=testuser passwd=abc123 \ -- op monitor interval="60s" -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ - ---- --endif::crmsh[] - - And finally, since we disabled it earlier, we need to re-enable STONITH. --At this point we should have the following configuration. - --ifdef::pcs[] - [source,Bash] - ---- --# pcs -f stonith_cfg property set stonith-enabled=true --# pcs -f stonith_cfg property --dc-version: 1.1.8-1.el7-60a19ed12fdb4d5c6a6b6767f52e5391e447fec0 --cluster-infrastructure: corosync --no-quorum-policy: ignore --stonith-enabled: true -+# crm_attribute -t crm_config -n stonith-enabled -v true - ---- --endif::pcs[] - --Now push the configuration into the cluster. -+== Advanced Fencing Configurations == - --ifdef::pcs[] --[source,C] ------ --# pcs cluster push cib stonith_cfg ------ --endif::pcs[] -+Some people consider that having one fencing device is a single point -+of failure footnote:[Not true, since a node or resource must fail -+before fencing even has a chance to], others prefer removing the node -+from the storage and network instead of turning it off. - --ifdef::crmsh[] --[source,Bash] -+Whatever the reason, Pacemaker supports fencing nodes with multiple -+devices through a feature called fencing topologies. -+ -+Simply create the individual devices as you normally would and then -+define one or more fencing levels in the fencing-topology section in -+the configuration. -+ -+* Each level is attempted in +ascending index+ order -+* If a device fails, +processing terminates+ for the current level. -+ No further devices in that level are exercised and the next level is attempted instead. -+* If the operation succeeds for all the listed devices in a level, the level is deemed to have passed -+* The operation is finished +when a level has passed+ (success), or all levels have been attempted (failed) -+* If the operation failed, the next step is determined by the Policy Engine and/or crmd. -+ -+Some possible uses of topologies include: -+ -+* try poison-pill and fail back to power -+* try disk and network, and fall back to power if either fails -+* initiate a kdump and then poweroff the node -+ -+.Properties of Fencing Levels -+[width="95%",cols="1m,6<",options="header",align="center"] -+|========================================================= -+ -+|Field -+|Description -+ -+|id -+|Your name for the level -+ indexterm:[id,fencing-level] -+ indexterm:[Fencing,fencing-level,id] -+ -+|target -+|The node to which this level applies -+ indexterm:[target,fencing-level] -+ indexterm:[Fencing,fencing-level,target] -+ -+|index -+|The order in which to attempt the levels. -+ Levels are attempted in +ascending index+ order +until one succeeds+. -+ indexterm:[index,fencing-level] -+ indexterm:[Fencing,fencing-level,index] -+ -+|devices -+|A comma separated list of devices for which the -+ indexterm:[devices,fencing-level] -+ indexterm:[Fencing,fencing-level,devices] -+ -+|========================================================= -+ -+.Example use of Fencing Topologies -+[source,XML] - ---- --crm(stonith)# configure property stonith-enabled="true" --crm(stonith)# configure shownode pcmk-1 --node pcmk-2 --primitive WebData ocf:linbit:drbd \ -- params drbd_resource="wwwdata" \ -- op monitor interval="60s" --primitive WebFS ocf:heartbeat:Filesystem \ -- params device="/dev/drbd/by-res/wwwdata" directory="/var/www/html" fstype="gfs2" --primitive WebSite ocf:heartbeat:apache \ -- params configfile="/etc/httpd/conf/httpd.conf" \ -- op monitor interval="1min" --primitive ClusterIP ocf:heartbeat:IPaddr2 \ -- params ip="192.168.122.101" cidr_netmask="32" clusterip_hash="sourceip" \ -- op monitor interval="30s"primitive ipmi-fencing stonith::fence_ipmilan \ params pcmk_host_list="pcmk-1 pcmk-2" ipaddr=10.0.0.1 login=testuser passwd=abc123 \ op monitor interval="60s"ms WebDataClone WebData \ -- meta master-max="2" master-node-max="1" clone-max="2" clone-node-max="1" notify="true" --clone WebFSClone WebFS --clone WebIP ClusterIP \ -- meta globally-unique="true" clone-max="2" clone-node-max="2" --clone WebSiteClone WebSite --colocation WebSite-with-WebFS inf: WebSiteClone WebFSClone --colocation fs_on_drbd inf: WebFSClone WebDataClone:Master --colocation website-with-ip inf: WebSiteClone WebIP --order WebFS-after-WebData inf: WebDataClone:promote WebFSClone:start --order WebSite-after-WebFS inf: WebFSClone WebSiteClone --order apache-after-ip inf: WebIP WebSiteClone --property $id="cib-bootstrap-options" \ -- dc-version="1.1.5-bdd89e69ba545404d02445be1f3d72e6a203ba2f" \ -- cluster-infrastructure="openais" \ -- expected-quorum-votes="2" \ -- stonith-enabled="true" \ -- no-quorum-policy="ignore" --rsc_defaults $id="rsc-options" \ -- resource-stickiness="100" --crm(stonith)# cib commit stonithINFO: commited 'stonith' shadow CIB to the cluster --crm(stonith)# quit --bye -+ -+ -+ ... -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ ... -+ -+ -+ - ---- --endif::crmsh[] -diff --git a/doc/Pacemaker_Explained/en-US/Revision_History.xml b/doc/Pacemaker_Explained/en-US/Revision_History.xml -index a351d9c..0afc90b 100644 ---- a/doc/Pacemaker_Explained/en-US/Revision_History.xml -+++ b/doc/Pacemaker_Explained/en-US/Revision_History.xml -@@ -6,19 +6,19 @@ - - - -- 1 -+ 1-0 - 19 Oct 2009 - AndrewBeekhofandrew@beekhof.net - Import from Pages.app - - -- 2 -+ 2-0 - 26 Oct 2009 - AndrewBeekhofandrew@beekhof.net - Cleanup and reformatting of docbook xml complete - - -- 3 -+ 3-0 - Tue Nov 12 2009 - AndrewBeekhofandrew@beekhof.net - -@@ -29,7 +29,7 @@ - - - -- 4 -+ 4-0 - Mon Oct 8 2012 - AndrewBeekhofandrew@beekhof.net - -diff --git a/doc/Pacemaker_Remote/en-US/Author_Group.xml b/doc/Pacemaker_Remote/en-US/Author_Group.xml -new file mode 100644 -index 0000000..3d9056e ---- /dev/null -+++ b/doc/Pacemaker_Remote/en-US/Author_Group.xml -@@ -0,0 +1,11 @@ -+ -+ -+ -+ -+ DavidVossel -+ Red Hat -+ Primary author -+ dvossel@redhat.com -+ -+ -diff --git a/doc/Pacemaker_Remote/en-US/Book_Info.xml b/doc/Pacemaker_Remote/en-US/Book_Info.xml -new file mode 100644 -index 0000000..426599e ---- /dev/null -+++ b/doc/Pacemaker_Remote/en-US/Book_Info.xml -@@ -0,0 +1,56 @@ -+ -+ -+%BOOK_ENTITIES; -+]> -+ -+ Pacemaker Remote -+ Extending High Availablity into Virtual Nodes -+ 1 -+ 0 -+ -+ -+ The document exists as both a reference and deployment guide for the Pacemaker Remote service. -+ -+ -+ The KVM and Linux Container walk-through tutorials will use: -+ -+ -+ -+ &DISTRO; &DISTRO_VERSION; as the host operating system -+ -+ -+ -+ -+ Pacemaker Remote to perform resource management within virtual nodes -+ -+ -+ -+ -+ libvirt to manage KVM and LXC virtual nodes -+ -+ -+ -+ -+ Corosync to provide messaging and membership services on the host nodes -+ -+ -+ -+ -+ Pacemaker to perform resource management on host nodes -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -diff --git a/doc/Pacemaker_Remote/en-US/Ch-Example.txt b/doc/Pacemaker_Remote/en-US/Ch-Example.txt -new file mode 100644 -index 0000000..ca94044 ---- /dev/null -+++ b/doc/Pacemaker_Remote/en-US/Ch-Example.txt -@@ -0,0 +1,107 @@ -+= Quick Example = -+ -+If you already know how to use pacemaker, you'll likely be able to grasp this new concept of remote-nodes by reading through this quick example without having to sort through all the detailed walk-through steps. Here are the key configuration ingredients that make this possible using libvirt and KVM virtual guests. These steps strip everything down to the very basics. -+ -+== Mile High View of Configuration Steps == -+ -+* +Put an authkey with this path, /etc/pacemaker/authkey, on every cluster-node and virtual machine+. This secures remote communication and authentication. -+ -+Run this command if you want to make a somewhat random authkey. -+ -+[source,C] -+---- -+dd if=/dev/urandom of=/etc/pacemaker/authkey bs=4096 count=1 -+---- -+ -+* +Install pacemaker_remote packages every virtual machine, enable pacemaker_remote on startup, and poke hole in firewall for tcp port 3121.+ -+ -+[source,C] -+---- -+yum install pacemaker-remote resource-agents -+systemctl enable pacemaker_remote -+# If you just want to see this work, disable iptables and ip6tables on most distros. -+# You may have to put selinux in permissive mode as well for the time being. -+firewall-cmd --add-port 3121/tcp --permanent -+---- -+ -+* +Give each virtual machine a static network address and unique hostname+ -+ -+* +Tell pacemaker to launch a virtual machine and that the virtual machine is a remote-node capable of running resources by using the "remote-node" meta-attribute.+ -+ -+with pcs -+ -+[source,C] -+---- -+# pcs resource create vm-guest1 VirtualDomain hypervisor="qemu:///system" config="vm-guest1.xml" meta +remote-node=guest1+ -+---- -+ -+raw xml -+[source,XML] -+---- -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+---- -+ -+In the example above the meta-attribute 'remote-node=guest1' tells pacemaker that this resource is a remote-node with the hostname 'guest1' that is capable of being integrated into the cluster. The cluster will attempt to contact the virtual machine's pacemaker_remote service at the hostname 'guest1' after it launches. -+ -+== What those steps just did == -+ -+Those steps just told pacemaker to launch a virtual machine called vm-guest1 and integrate that virtual machine as a remote-node called 'guest1'. -+ -+Example crm_mon output after guest1 is integrated into cluster. -+ -+[source,C] -+---- -+Last updated: Wed Mar 13 13:52:39 2013 -+Last change: Wed Mar 13 13:25:17 2013 via crmd on node1 -+Stack: corosync -+Current DC: node1 (24815808) - partition with quorum -+Version: 1.1.10 -+2 Nodes configured, unknown expected votes -+2 Resources configured. -+ -+Online: [ node1 guest1] -+ -+vm-guest1 (ocf::heartbeat:VirtualDomain): Started node1 -+---- -+ -+Now, you could place a resource, such as a webserver on guest1. -+ -+[source,C] -+---- -+# pcs resource create webserver apache params configfile=/etc/httpd/conf/httpd.conf op monitor interval=30s -+# pcs constraint webserver prefers guest1 -+---- -+ -+Now the crm_mon output would show a webserver launched on the guest1 remote-node. -+ -+[source,C] -+---- -+Last updated: Wed Mar 13 13:52:39 2013 -+Last change: Wed Mar 13 13:25:17 2013 via crmd on node1 -+Stack: corosync -+Current DC: node1 (24815808) - partition with quorum -+Version: 1.1.10 -+2 Nodes configured, unknown expected votes -+2 Resources configured. -+ -+Online: [ node1 guest1] -+ -+vm-guest1 (ocf::heartbeat:VirtualDomain): Started node1 -+webserver (ocf::heartbeat::apache): Started guest1 -+---- -+ -+== Accessing Cluster from Remote-node == -+ -+It is worth noting that after 'guest1' is integrated into the cluster, all the pacemaker cli tools immediately become available to the remote node. This means things like crm_mon, crm_resource, and crm_attribute will work natively on the remote-node as long as the connection between the remote-node and cluster-node exists. This is particularly important for any master/slave resources executing on the remote-node that need access to crm_master to set the nodes transient attributes. -+ -diff --git a/doc/Pacemaker_Remote/en-US/Ch-Future.txt b/doc/Pacemaker_Remote/en-US/Ch-Future.txt -new file mode 100644 -index 0000000..93c082f ---- /dev/null -+++ b/doc/Pacemaker_Remote/en-US/Ch-Future.txt -@@ -0,0 +1,15 @@ -+= Future Features = -+ -+Basic KVM and Linux container integration was the first phase of development for pacemaker_remote and was completed for Pacemaker v1.1.10. Here are some planned features that expand upon this initial functionality. -+ -+== Libvirt Sandbox Support == -+ -+Once the libvirt-sandbox project is integrated with pacemaker_remote, we will gain the ability to preform per-resource linux container isolation with very little performance impact. This functionality will allow resources living on a single node to be isolated from one another. At that point CPU and memory limits could be set per-resource dynamically just using the cluster config. -+ -+== Bare-metal Support == -+ -+The pacemaker_remote daemon already has the ability to run on bare-metal hardware nodes, but the policy engine logic for integrating bare-metal nodes is not complete. There are some complications involved with understanding a bare-metal node's state that virtual nodes don't have. Once this logic is complete, pacemaker will be able to integrate bare-metal nodes in the same way virtual remote-nodes currently are. Some special considerations for fencing will need to be addressed. -+ -+== KVM Migration Support == -+ -+Pacemaker's policy engine is limited in its ability to perform live migrations of KVM resources when resource dependencies are involved. This limitation affects how resources living within a KVM remote-node are handled when a live migration takes place. Currently when a live migration is performed on a KVM remote-node, all the resources within that remote-node have to be stopped before the migration takes place and started once again after migration has finished. This policy engine limitation is fully explained in this bug report, http://bugs.clusterlabs.org/show_bug.cgi?id=5055#c3 -diff --git a/doc/Pacemaker_Remote/en-US/Ch-Intro.txt b/doc/Pacemaker_Remote/en-US/Ch-Intro.txt -new file mode 100644 -index 0000000..c7b3001 ---- /dev/null -+++ b/doc/Pacemaker_Remote/en-US/Ch-Intro.txt -@@ -0,0 +1,55 @@ -+= Extending High Availability Cluster into Virtual Nodes = -+ -+== Overview == -+The recent addition of the +pacemaker_remote+ service supported by +Pacemaker version 1.1.10 and greater+ allows nodes not running the cluster stack (pacemaker+corosync) to integrate into the cluster and have the cluster manage their resources just as if they were a real cluster node. This means that pacemaker clusters are now capable of managing both launching virtual environments (KVM/LXC) as well as launching the resources that live within those virtual environments without requiring the virtual environments to run pacemaker or corosync. -+ -+== Terms == -++cluster-node+ - A baremetal hardware node running the High Availability stack (pacemaker + corosync) -+ -++remote-node+ - A virtual guest node running the pacemaker_remote service. -+ -++pacemaker_remote+ - A service daemon capable of performing remote application management within virtual guests (kvm and lxc) in both pacemaker cluster environments and standalone (non-cluster) environments. This service is an enhanced version of pacemaker's local resource manage daemon (LRMD) that is capable of managing and monitoring LSB, OCF, upstart, and systemd resources on a guest remotely. It also allows for most of pacemaker's cli tools (crm_mon, crm_resource, crm_master, crm_attribute, ect..) to work natively on remote-nodes. -+ -++LXC+ - A Linux Container defined by the libvirt-lxc Linux container driver. http://libvirt.org/drvlxc.html -+ -+== Virtual Machine Use Case == -+The use of pacemaker_remote in virtual machines solves a deployment scenario that has traditionally been difficult to solve. -+ -++"I want a pacemaker cluster to manage virtual machine resources, but I also want pacemaker to be able to manage the resources that live within those virtual machines."+ -+ -+In the past, users desiring this deployment had to make a decision. They would either have to sacrifice the ability of monitoring resources residing within virtual guests by running the cluster stack on the baremetal nodes, or run another cluster instance on the virtual guests where they potentially run into corosync scalability issues. There is a third scenario where the virtual guests run the cluster stack and join the same network as the baremetal nodes, but that can quickly hit issues with scalability as well. -+ -+With the pacemaker_remote service we have a new option. -+ -+* The baremetal cluster-nodes run the cluster stack (paceamaker+corosync). -+* The virtual remote-nodes run the pacemaker_remote service (nearly zero configuration required on the virtual machine side) -+* The cluster stack on the cluster-nodes launch the virtual machines and immediately connect to the pacemaker_remote service, allowing the virtual machines to integrate into the cluster just as if they were a real cluster-node. -+ -+The key difference here between the virtual machine remote-nodes and the cluster-nodes is that the remote-nodes are not running the cluster stack. This means the remote nodes will never become the DC, and they do not take place in quorum. On the hand this also means that the remote-nodes are not bound to the scalability limits associated with the cluster stack either. +No 16 node corosync member limits+ to deal with. That isn't to say remote-nodes can scale indefinitely, but the expectation is that remote-nodes scale horizontally much further than cluster-nodes. Other than the quorum limitation, these remote-nodes behave just like cluster nodes in respects to resource management. The cluster is fully capable of managing and monitoring resources on each remote-node. You can build constraints against remote-nodes, put them in standby, or whatever else you'd expect to be able to do with normal cluster-nodes. They even show up in the crm_mon output as you would expect cluster-nodes to. -+ -+To solidify the concept, an example cluster deployment integrating remote-nodes could look like this. -+ -+* 16 cluster-nodes running corosync+pacemaker stack. -+* 64 pacemaker managed virtual machine resources running pacemaker_remote configured as remote-nodes. -+* 64 pacemaker managed webserver and database resources configured to run on the 64 remote-nodes. -+ -+With this deployment you would have 64 webservers and databases running on 64 virtual machines on 16 hardware nodes all of which are managed and monitored by the same pacemaker deployment. -+ -+== Linux Container Use Case == -+ -++I want to isolate and limit the system resources (cpu, memory, filesystem) a cluster resource can consume without using virtual machines.+ -+ -+Using pacemaker_remote with Linux containers (libvirt-lxc) opens up some interesting possibilities for isolating resources in the cluster without the use of a hypervisor. We now have the ability to both define a contained environment with cpu and memory utilization limits and then assign resources to that contained environment all managed from within pacemaker. The LXC Walk-through section of this document outlines how pacemaker_remote can be used to bring Linux containers into the cluster as remote-nodes capable of executing resources. -+ -+== Expanding the Cluster Stack == -+ -+=== Traditional HA Stack === -+ -+image::images/pcmk-ha-cluster-stack.png["The Traditional Pacemaker Corosync HA Stack.",width="17cm",height="9cm",align="center"] -+ -+ -+=== Remote-Node Enabled HA Stack === -+ -+The stack grows one additional layer vertical so we can go further horizontal. -+ -+image::images/pcmk-ha-remote-stack.png["Placing Pacemaker Remote into the Traditional HA Stack.",width="20cm",height="10cm",align="center"] -diff --git a/doc/Pacemaker_Remote/en-US/Ch-KVM-Tutorial.txt b/doc/Pacemaker_Remote/en-US/Ch-KVM-Tutorial.txt -new file mode 100644 -index 0000000..fe00775 ---- /dev/null -+++ b/doc/Pacemaker_Remote/en-US/Ch-KVM-Tutorial.txt -@@ -0,0 +1,483 @@ -+= KVM Walk-through = -+ -++What this tutorial is:+ This tutorial is an in-depth walk-through of how to get pacemaker to manage a KVM guest instance and integrate that guest into the cluster as a remote-node. -+ -++What this tutorial is not:+ This tutorial is not a realistic deployment scenario. The steps shown here are meant to get users familiar with the concept of remote-nodes as quickly as possible. -+ -+== Step 1: Setup the Host == -+ -+This tutorial was created using Fedora 18 on the host and guest nodes. Anything that is capable of running libvirt and pacemaker v1.1.10 or greater will do though. An installation guide for installing Fedora 18 can be found here, http://docs.fedoraproject.org/en-US/Fedora/18/html/Installation_Guide/. -+ -+Fedora 18 (or similar distro) host preparation steps. -+ -+=== SElinux and Firewall === -+In order to simply this tutorial we will disable the selinux and the firewall on the host. -++WARNING:+ These actions will open a significant security threat to machines exposed to the outside world. -+[source,C] -+---- -+# setenforce 0 -+# sed -i.bak "s/SELINUX=enforcing/SELINUX=permissive/g" /etc/selinux/config -+# systemctl disable iptables.service -+# systemctl disable ip6tables.service -+# rm '/etc/systemd/system/basic.target.wants/iptables.service' -+# rm '/etc/systemd/system/basic.target.wants/ip6tables.service' -+# systemctl stop iptables.service -+# systemctl stop ip6tables.service -+---- -+ -+=== Install Cluster Software === -+ -+[source,C] -+---- -+# yum install -y pacemaker corosync pcs resource-agents -+---- -+ -+=== Setup Corosync === -+ -+Running the command below will attempt to detect the network address corosync should bind to. -+ -+[source,C] -+---- -+# export corosync_addr=`ip addr | grep "inet " | tail -n 1 | awk '{print $4}' | sed s/255/0/g` -+---- -+ -+Display and verify that address is correct -+ -+[source,C] -+---- -+# echo $corosync_addr -+---- -+ -+In many cases the address will be 192.168.1.0 if you are behind a standard home router. -+ -+Now copy over the example corosync.conf. This code will inject your bindaddress and enable the vote quorum api which is required by pacemaker. -+ -+[source,C] -+---- -+# cp /etc/corosync/corosync.conf.example /etc/corosync/corosync.conf -+# sed -i.bak "s/.*\tbindnetaddr:.*/bindnetaddr:\ $corosync_addr/g" /etc/corosync/corosync.conf -+# cat << END >> /etc/corosync/corosync.conf -+quorum { -+ provider: corosync_votequorum -+ expected_votes: 2 -+} -+END -+---- -+ -+=== Verify Cluster Software === -+ -+Start the cluster -+ -+[source,C] -+---- -+# pcs cluster start -+---- -+ -+Verify corosync membership -+ -+[source,C] -+---- -+# pcs status corosync -+ -+Membership information -+ Nodeid Votes Name -+1795270848 1 example-host (local) -+---- -+ -+Verify pacemaker status. At first the 'pcs cluster status' output will look like this. -+ -+[source,C] -+---- -+# pcs status -+ -+ Last updated: Thu Mar 14 12:26:00 2013 -+ Last change: Thu Mar 14 12:25:55 2013 via crmd on example-host -+ Stack: corosync -+ Current DC: -+ Version: 1.1.10 -+ 1 Nodes configured, unknown expected votes -+ 0 Resources configured. -+---- -+ -+After about a minute you should see your host as a single node in the cluster. -+ -+[source,C] -+---- -+# pcs status -+ -+ Last updated: Thu Mar 14 12:28:23 2013 -+ Last change: Thu Mar 14 12:25:55 2013 via crmd on example-host -+ Stack: corosync -+ Current DC: example-host (1795270848) - partition WITHOUT quorum -+ Version: 1.1.8-9b13ea1 -+ 1 Nodes configured, unknown expected votes -+ 0 Resources configured. -+ -+ Online: [ example-host ] -+---- -+ -+Go ahead and stop the cluster for now after verifying everything is in order. -+ -+[source,C] -+---- -+# pcs cluster stop -+---- -+ -+=== Install Virtualization Software === -+ -+[source,C] -+---- -+# yum install -y kvm libvirt qemu-system qemu-kvm bridge-utils virt-manager -+# systemctl enable libvirtd.service -+---- -+ -+reboot the host -+ -+== Step2: Create the KVM guest == -+ -+I am not going to outline the installation steps required to create a kvm guest. There are plenty of tutorials available elsewhere that do that. I recommend using a Fedora 18 or greater distro as your guest as that is what I am testing this tutorial with. -+ -+=== Setup Guest Network === -+ -+Run the commands below to set up a static ip address (192.168.122.10) and hostname (guest1). -+ -+[source,C] -+---- -+export remote_hostname=guest1 -+export remote_ip=192.168.122.10 -+export remote_gateway=192.168.122.1 -+ -+yum remove -y NetworkManager -+ -+rm -f /etc/hostname -+cat << END >> /etc/hostname -+$remote_hostname -+END -+ -+hostname $remote_hostname -+ -+cat << END >> /etc/sysconfig/network -+HOSTNAME=$remote_hostname -+GATEWAY=$remote_gateway -+END -+ -+sed -i.bak "s/.*BOOTPROTO=.*/BOOTPROTO=none/g" /etc/sysconfig/network-scripts/ifcfg-eth0 -+ -+cat << END >> /etc/sysconfig/network-scripts/ifcfg-eth0 -+IPADDR0=$remote_ip -+PREFIX0=24 -+GATEWAY0=$remote_gateway -+DNS1=$remote_gateway -+END -+ -+systemctl restart network -+systemctl enable network.service -+systemctl enable sshd -+systemctl start sshd -+ -+echo "checking connectivity" -+ping www.google.com -+---- -+ -+To simplify the tutorial we'll go ahead and disable selinux on the guest. We'll also need to poke a hole through the firewall on port 3121 (the default port for pacemaker_remote) so the host can contact the guest. -+ -+[source,C] -+---- -+# setenforce 0 -+# sed -i.bak "s/SELINUX=enforcing/SELINUX=permissive/g" /etc/selinux/config -+ -+# firewall-cmd --add-port 3121/tcp --permanent -+---- -+ -+If you still encounter connection issues just disable iptables and ipv6tables on the guest like we did on the host to guarantee you'll be able to contact the guest from the host. -+ -+At this point you should be able to ssh into the guest from the host. -+ -+=== Setup Pacemaker Remote === -+ -+On the +HOST+ machine run these commands to generate an authkey and copy it to the /etc/pacemaker folder on both the host and guest. -+ -+[source,C] -+---- -+# mkdir /etc/pacemaker -+# dd if=/dev/urandom of=/etc/pacemaker/authkey bs=4096 count=1 -+# scp -r /etc/pacemaker root@192.168.122.10:/etc/ -+---- -+ -+Now on the +GUEST+ install pacemaker-remote package and enable the daemon to run at startup. In the commands below you will notice the 'pacemaker' and 'pacemaker_remote' packages are being installed. The 'pacemaker' package is not required. The only reason it is being installed for this tutorial is because it contains the a 'Dummy' resource agent we will be using later on to test the remote-node. -+ -+[source,C] -+---- -+# yum install -y pacemaker paceamaker-remote resource-agents -+# systemctl enable pacemaker_remote.service -+---- -+ -+Now start pacemaker_remote on the guest and verify the start was successful. -+ -+[source,C] -+---- -+# systemctl start pacemaker_remote.service -+ -+# systemctl status pacemaker_remote -+ -+ pacemaker_remote.service - Pacemaker Remote Service -+ Loaded: loaded (/usr/lib/systemd/system/pacemaker_remote.service; enabled) -+ Active: active (running) since Thu 2013-03-14 18:24:04 EDT; 2min 8s ago -+ Main PID: 1233 (pacemaker_remot) -+ CGroup: name=systemd:/system/pacemaker_remote.service -+ └─1233 /usr/sbin/pacemaker_remoted -+ -+ Mar 14 18:24:04 guest1 systemd[1]: Starting Pacemaker Remote Service... -+ Mar 14 18:24:04 guest1 systemd[1]: Started Pacemaker Remote Service. -+ Mar 14 18:24:04 guest1 pacemaker_remoted[1233]: notice: lrmd_init_remote_tls_server: Starting a tls listener on port 3121. -+---- -+ -+=== Verify Host Connection to Guest === -+ -+Before moving forward it's worth going ahead and verifying the host can contact the guest on port 3121. Here's a trick you can use. Connect using telnet from the host. The connection will get destroyed, but how it is destroyed tells you whether it worked or not. -+ -+First add guest1 to the host machine's /etc/hosts file if you haven't already. This is required unless you have dns setup in a way where guest1's address can be discovered. -+ -+[source,C] -+---- -+# cat << END >> /etc/hosts -+192.168.122.10 guest1 -+END -+---- -+ -+If running the telnet command on the host results in this output before disconnecting, the connection works. -+[source,C] -+---- -+# telnet guest1 3121 -+ Trying 192.168.122.10... -+ Connected to guest1. -+ Escape character is '^]'. -+ Connection closed by foreign host. -+---- -+ -+If you see this, the connection is not working. -+[source,C] -+---- -+# telnet guest1 3121 -+Trying 192.168.122.10... -+telnet: connect to address 192.168.122.10: No route to host -+---- -+ -+Once you can successfully connect to the guest from the host, shutdown the guest. Pacemaker will be managing the virtual machine from this point forward. -+ -+== Step3: Integrate KVM guest into Cluster. == -+ -+Now the fun part, integrating the virtual machine you've just created into the cluster. It is incredibly simple. -+ -+=== Start the Cluster === -+On the host, start pacemaker. -+ -+[source,C] -+---- -+# pcs cluster start -+---- -+ -+Wait for the host to become the DC. The output of 'pcs status' should look similar to this after about a minute. -+ -+[source,C] -+---- -+Last updated: Thu Mar 14 16:41:22 2013 -+Last change: Thu Mar 14 16:41:08 2013 via crmd on example-host -+Stack: corosync -+Current DC: example-host (1795270848) - partition WITHOUT quorum -+Version: 1.1.10 -+1 Nodes configured, unknown expected votes -+0 Resources configured. -+ -+ -+Online: [ example-host ] -+---- -+ -+Now enable the cluster to work without quorum or stonith. This is required just for the sake of getting this tutorial to work with a single cluster-node. -+ -+[source,C] -+---- -+# pcs property set stonith-enabled=false -+# pcs property set no-quorum-policy=ignore -+---- -+ -+=== Integrate KVM Guest as remote-node === -+ -+If you didn't already do this earlier in the verify host to guest connection section, add the KVM guest's ip to the host's /etc/hosts file so we can connect by hostname. The command below will do that if you used the same ip address I used earlier. -+ -+[source,C] -+---- -+# cat << END >> /etc/hosts -+192.168.122.10 guest1 -+END -+---- -+ -+We will use the +VirtualDomain+ resource agent for the management of the virtual machine. This agent requires the virtual machine's xml config to be dumped to a file on disk. To do this pick out the name of the virtual machine you just created from the output of this list. -+ -+[source,C] -+---- -+# virsh list --all -+ Id Name State -+______________________________________________ -+ - guest1 shut off -+---- -+ -+In my case I named it guest1. Dump the xml to a file somewhere on the host using the following command. -+ -+[source,C] -+---- -+# virsh dumpxml guest1 > /root/guest1.xml -+---- -+ -+Now just register the resource with pacemaker and you're set! -+ -+[source,C] -+---- -+# pcs resource create vm-guest1 VirtualDomain hypervisor="qemu:///system" config="/root/guest1.xml" meta remote-node=guest1 -+---- -+ -+Once the 'vm-guest1' resource is started you will see 'guest1' appear in the 'pcs status' output as a node. The final 'pcs status' output should look something like this. -+ -+[source,C] -+---- -+Last updated: Fri Mar 15 09:30:30 2013 -+Last change: Thu Mar 14 17:21:35 2013 via cibadmin on example-host -+Stack: corosync -+Current DC: example-host (1795270848) - partition WITHOUT quorum -+Version: 1.1.10 -+2 Nodes configured, unknown expected votes -+2 Resources configured. -+ -+ -+Online: [ example-host guest1 ] -+ -+Full list of resources: -+ -+ vm-guest1 (ocf::heartbeat:VirtualDomain): Started example-host -+---- -+ -+=== Starting Resources on KVM Guest === -+ -+The commands below demonstrate how resources can be executed on both the remote-node and the cluster-node. -+ -+Create a few Dummy resources. Dummy resources are real resource agents used just for testing purposes. They actually execute on the host they are assigned to just like an apache server or database would, except their execution just means a file was created. When the resource is stopped, that the file it created is removed. -+ -+[source,C] -+---- -+# pcs resource create FAKE1 ocf:pacemaker:Dummy -+# pcs resource create FAKE2 ocf:pacemaker:Dummy -+# pcs resource create FAKE3 ocf:pacemaker:Dummy -+# pcs resource create FAKE4 ocf:pacemaker:Dummy -+# pcs resource create FAKE5 ocf:pacemaker:Dummy -+---- -+ -+Now check your 'pcs status' output. In the resource section you should see something like the following, where some of the resources got started on the cluster-node, and some started on the remote-node. -+ -+[source,C] -+---- -+Full list of resources: -+ -+ vm-guest1 (ocf::heartbeat:VirtualDomain): Started example-host -+ FAKE1 (ocf::pacemaker:Dummy): Started guest1 -+ FAKE2 (ocf::pacemaker:Dummy): Started guest1 -+ FAKE3 (ocf::pacemaker:Dummy): Started example-host -+ FAKE4 (ocf::pacemaker:Dummy): Started guest1 -+ FAKE5 (ocf::pacemaker:Dummy): Started example-host -+---- -+ -+ -+The remote-node, 'guest1', reacts just like any other node in the cluster. For example, pick out a resource that is running on your cluster-node. For my purposes I am picking FAKE3 from the output above. We can force FAKE3 to run on 'guest1' in the exact same way we would any other node. -+ -+[source,C] -+---- -+# pcs constraint FAKE3 prefers guest1 -+---- -+ -+Now looking at the bottom of the 'pcs status' output you'll see FAKE3 is on 'guest1'. -+ -+[source,C] -+---- -+Full list of resources: -+ -+ vm-guest1 (ocf::heartbeat:VirtualDomain): Started example-host -+ FAKE1 (ocf::pacemaker:Dummy): Started guest1 -+ FAKE2 (ocf::pacemaker:Dummy): Started guest1 -+ FAKE3 (ocf::pacemaker:Dummy): Started guest1 -+ FAKE4 (ocf::pacemaker:Dummy): Started example-host -+ FAKE5 (ocf::pacemaker:Dummy): Started example-host -+---- -+ -+=== Testing Remote-node Recovery and Fencing === -+ -+Pacemaker's policy engine is smart enough to know fencing remote-nodes associated with a virtual machine means shutting off/rebooting the virtual machine. No special configuration is necessary to make this happen. If you are interested in testing this functionality out, trying stopping the guest's pacemaker_remote daemon. This would be equivalent of abruptly terminating a cluster-node's corosync membership without properly shutting it down. -+ -+ssh into the guest and run this command. -+ -+[source,C] -+---- -+# kill -9 `pidof pacemaker_remoted` -+---- -+ -+After a few seconds or so you'll see this in your 'pcs status' output. The 'guest1' node will be show as offline as it is being recovered. -+ -+[source,C] -+---- -+Last updated: Fri Mar 15 11:00:31 2013 -+Last change: Fri Mar 15 09:54:16 2013 via cibadmin on example-host -+Stack: corosync -+Current DC: example-host (1795270848) - partition WITHOUT quorum -+Version: 1.1.10 -+2 Nodes configured, unknown expected votes -+7 Resources configured. -+ -+ -+Online: [ example-host ] -+OFFLINE: [ guest1 ] -+ -+Full list of resources: -+ -+ vm-guest1 (ocf::heartbeat:VirtualDomain): Started example-host -+ FAKE1 (ocf::pacemaker:Dummy): Stopped -+ FAKE2 (ocf::pacemaker:Dummy): Stopped -+ FAKE3 (ocf::pacemaker:Dummy): Stopped -+ FAKE4 (ocf::pacemaker:Dummy): Started example-host -+ FAKE5 (ocf::pacemaker:Dummy): Started example-host -+ -+Failed actions: -+ guest1_monitor_30000 (node=example-host, call=3, rc=7, status=complete): not running -+---- -+ -+Once recovery of the guest is complete, you'll see it automatically get re-integrated into the cluster. The final 'pcs status' output should look something like this. -+ -+[source,C] -+---- -+Last updated: Fri Mar 15 11:03:17 2013 -+Last change: Fri Mar 15 09:54:16 2013 via cibadmin on example-host -+Stack: corosync -+Current DC: example-host (1795270848) - partition WITHOUT quorum -+Version: 1.1.10 -+2 Nodes configured, unknown expected votes -+7 Resources configured. -+ -+ -+Online: [ example-host guest1 ] -+ -+Full list of resources: -+ -+ vm-guest1 (ocf::heartbeat:VirtualDomain): Started example-host -+ FAKE1 (ocf::pacemaker:Dummy): Started guest1 -+ FAKE2 (ocf::pacemaker:Dummy): Started guest1 -+ FAKE3 (ocf::pacemaker:Dummy): Started guest1 -+ FAKE4 (ocf::pacemaker:Dummy): Started example-host -+ FAKE5 (ocf::pacemaker:Dummy): Started example-host -+ -+Failed actions: -+ guest1_monitor_30000 (node=example-host, call=3, rc=7, status=complete): not running -+---- -+ -+=== Accessing Cluster Tools from Remote-node === -+ -+Besides just allowing the cluster to manage resources on a remote-node, pacemaker_remote has one other trick. +The pacemaker_remote daemon allows nearly all the pacemaker tools (crm_resource, crm_mon, crm_attribute, crm_master) to work on remote nodes natively.+ -+ -+Try it, run +crm_mon+ or +pcs status+ on the guest after pacemaker has integrated the remote-node into the cluster. These tools just work. These means resource agents such as master/slave resources which need access to tools like crm_master work seamlessly on the remote-nodes. -+ -diff --git a/doc/Pacemaker_Remote/en-US/Ch-LXC-Tutorial.txt b/doc/Pacemaker_Remote/en-US/Ch-LXC-Tutorial.txt -new file mode 100644 -index 0000000..c3459c0 ---- /dev/null -+++ b/doc/Pacemaker_Remote/en-US/Ch-LXC-Tutorial.txt -@@ -0,0 +1,328 @@ -+= Linux Container (LXC) Walk-through = -+ -++What this tutorial is:+ This tutorial demonstrates how pacemaker_remote can be used with Linux containers (managed by libvirt-lxc) to run cluster resources in an isolated environment. -+ -++What this tutorial is not:+ This tutorial is not a realistic deployment scenario. The steps shown here are meant to introduce users to the concept of managing Linux container environments with Pacemaker. -+ -+== Step 1: Setup LXC Host == -+ -+This tutorial was tested with Fedora 18. Anything that is capable of running libvirt and pacemaker v1.1.10 or greater will do though. An installation guide for installing Fedora 18 can be found here, http://docs.fedoraproject.org/en-US/Fedora/18/html/Installation_Guide/. -+ -+Fedora 18 (or similar distro) host preparation steps. -+ -+=== SElinux and Firewall Rules === -+In order to simply this tutorial we will disable the selinux and the firewall on the host. -+WARNING: These actions pose a significant security issues to machines exposed to the outside world. Basically, just don't do this on your production system. -+[source,C] -+---- -+# setenforce 0 -+# sed -i.bak "s/SELINUX=enforcing/SELINUX=permissive/g" /etc/selinux/config -+# firewall-cmd --add-port 3121/tcp --permanent -+ -+# systemctl disable iptables.service -+# systemctl disable ip6tables.service -+# rm '/etc/systemd/system/basic.target.wants/iptables.service' -+# rm '/etc/systemd/system/basic.target.wants/ip6tables.service' -+# systemctl stop iptables.service -+# systemctl stop ip6tables.service -+---- -+ -+=== Install Cluster Software on Host === -+ -+[source,C] -+---- -+# yum install -y pacemaker pacemaker-remote corosync pcs resource-agents -+---- -+ -+=== Configure Corosync === -+ -+Running the command below will attempt to detect the network address corosync should bind to. -+ -+[source,C] -+---- -+# export corosync_addr=`ip addr | grep "inet " | tail -n 1 | awk '{print $4}' | sed s/255/0/g` -+---- -+ -+Display and verify the address is correct -+ -+[source,C] -+---- -+# echo $corosync_addr -+---- -+ -+In most cases the address will be 192.168.1.0 if you are behind a standard home router. -+ -+Now copy over the example corosync.conf. This code will inject your bindaddress and enable the vote quorum api which is required by pacemaker. -+ -+[source,C] -+---- -+# cp /etc/corosync/corosync.conf.example /etc/corosync/corosync.conf -+# sed -i.bak "s/.*\tbindnetaddr:.*/bindnetaddr:\ $corosync_addr/g" /etc/corosync/corosync.conf -+# cat << END >> /etc/corosync/corosync.conf -+quorum { -+ provider: corosync_votequorum -+ expected_votes: 2 -+} -+END -+---- -+ -+=== Verify Cluster === -+ -+Start the cluster -+ -+[source,C] -+---- -+# pcs cluster start -+---- -+ -+Verify corosync membership -+ -+[source,C] -+---- -+# pcs status corosync -+ -+Membership information -+ Nodeid Votes Name -+1795270848 1 example-host (local) -+---- -+ -+Verify pacemaker status. At first the 'pcs cluster status' output will look like this. -+ -+[source,C] -+---- -+# pcs status -+ -+ Last updated: Thu Mar 14 12:26:00 2013 -+ Last change: Thu Mar 14 12:25:55 2013 via crmd on example-host -+ Stack: corosync -+ Current DC: -+ Version: 1.1.10 -+ 1 Nodes configured, unknown expected votes -+ 0 Resources configured. -+---- -+ -+After about a minute you should see your host as a single node in the cluster. -+ -+[source,C] -+---- -+# pcs status -+ -+ Last updated: Thu Mar 14 12:28:23 2013 -+ Last change: Thu Mar 14 12:25:55 2013 via crmd on example-host -+ Stack: corosync -+ Current DC: example-host (1795270848) - partition WITHOUT quorum -+ Version: 1.1.8-9b13ea1 -+ 1 Nodes configured, unknown expected votes -+ 0 Resources configured. -+ -+ Online: [ example-host ] -+---- -+ -+Go ahead and stop the cluster for now after verifying everything is in order. -+ -+[source,C] -+---- -+# pcs cluster stop -+---- -+ -+== Step 2: Setup LXC Environment == -+ -+=== Install Libvirt LXC software === -+ -+[source,C] -+---- -+# yum install -y libvirt libvirt-daemon-lxc wget -+# systemctl enable libvirtd -+---- -+ -+At this point, restart the host. -+ -+=== Generate Libvirt LXC domains === -+ -+I've attempted to simply this tutorial by creating a script to auto generate the libvirt-lxc xml domain definitions. -+ -+Download the script to whatever directory you want the containers to live in. In this example I am using the /root/lxc/ directory. -+ -+[source,C] -+---- -+# mkdir /root/lxc/ -+# cd /root/lxc/ -+# wget https://raw.github.com/davidvossel/pcmk-lxc-autogen/master/lxc-autogen -+# chmod 755 lxc-autogen -+---- -+ -+Now execute the script. -+ -+[source,C] -+---- -+# ./lxc-autogen -+---- -+ -+After executing the script you will see a bunch of directories and xml files are generated. Those xml files are the libvirt-lxc domain definitions, and the directories are used as some special mount points for each container. If you open up one of the xml files you'll be able to see how the cpu, memory, and filesystem resources for the container are defined. You can use the libvirt-lxc driver's documentation found here, http://libvirt.org/drvlxc.html, as a reference to help understand all the parts of the xml file. The lxc-autogen script is not complicated and is worth exploring in order to grasp how the environment is generated. -+ -+It is worth noting that this environment is dependent on use of libvirt's default network interface. Verify the commands below look the same as your environment. The default network address 192.168.122.1 should have been generated by automatically when you installed the virtualization software. -+ -+[source,C] -+---- -+# virsh net-list -+Name State Autostart Persistent -+________________________________________________________ -+default active yes yes -+ -+# virsh net-dumpxml default | grep -e "ip address=" -+ -+ -+---- -+ -+=== Generate the Authkey === -+ -+Generate the authkey used to secure connections between the host and the lxc guest pacemaker_remote instances. This is sort of a funny case because the lxc guests and the host will share the same key file in the /etc/pacemaker/ directory. If in a different deployment where the lxc guests do not share the host's /etc/pacemaker directory, this key will have to be copied into each lxc guest. -+ -+[source,C] -+---- -+# dd if=/dev/urandom of=/etc/pacemaker/authkey bs=4096 count=1 -+---- -+ -+== Step 3: Integrate LXC guests into Cluster. == -+ -+=== Start Cluster === -+On the host, start pacemaker. -+ -+[source,C] -+---- -+# pcs cluster start -+---- -+ -+Wait for the host to become the DC. The output of 'pcs status' should look similar to this after about a minute. -+ -+[source,C] -+---- -+Last updated: Thu Mar 14 16:41:22 2013 -+Last change: Thu Mar 14 16:41:08 2013 via crmd on example-host -+Stack: corosync -+Current DC: example-host (1795270848) - partition WITHOUT quorum -+Version: 1.1.10 -+1 Nodes configured, unknown expected votes -+0 Resources configured. -+ -+ -+Online: [ example-host ] -+---- -+ -+Now enable the cluster to work without quorum or stonith. This is required just for the sake of getting this tutorial to work with a single cluster-node. -+ -+[source,C] -+---- -+# pcs property set stonith-enabled=false -+# pcs property set no-quorum-policy=ignore -+---- -+ -+=== Integrate LXC Guests as remote-nodes === -+ -+If you ran the 'lxc-autogen' script with default parameters, 3 lxc domain definitions were created as .xml files. If you used the same directory I used for the lxc environment, the config files will be located in /root/lxc. Replace the 'config' parameters in the following pcs commands if yours should be different. -+ -+The pcs commands below each configure a lxc guest as a remote-node in pacemaker. Behind the scenes each lxc guest is launching an instance of pacemaker_remote allowing pacemaker to integrate the lxc guests as remote-nodes. The meta-attribute 'remote-node=' used in each command is what tells pacemaker that the lxc guest is both a resource and a remote-node capable of running resources. In this case, the 'remote-node' attribute also indicates to pacemaker that it can contact each lxc's pacemaker_remote service by using the remote-node name as the hostname. If you look in the /etc/hosts/ file you will see entries for each lxc guest. These entries were auto-generated earlier by the 'lxc-autogen' script. -+ -+[source,C] -+---- -+# pcs resource create container1 VirtualDomain force_stop="true" hypervisor="lxc:///" config="/root/lxc/lxc1.xml" meta remote-node=lxc1 -+# pcs resource create container2 VirtualDomain force_stop="true" hypervisor="lxc:///" config="/root/lxc/lxc2.xml" meta remote-node=lxc2 -+# pcs resource create container3 VirtualDomain force_stop="true" hypervisor="lxc:///" config="/root/lxc/lxc3.xml" meta remote-node=lxc3 -+---- -+ -+ -+After creating the container resources you 'pcs status' should look like this. -+ -+[source,C] -+---- -+Last updated: Mon Mar 18 17:15:46 2013 -+Last change: Mon Mar 18 17:15:26 2013 via cibadmin on guest1 -+Stack: corosync -+Current DC: example-host (175810752) - partition WITHOUT quorum -+Version: 1.1.10 -+4 Nodes configured, unknown expected votes -+6 Resources configured. -+ -+Online: [ example-host lxc1 lxc2 lxc3 ] -+ -+Full list of resources: -+ -+ container3 (ocf::heartbeat:VirtualDomain): Started example-host -+ container1 (ocf::heartbeat:VirtualDomain): Started example-host -+ container2 (ocf::heartbeat:VirtualDomain): Started example-host -+---- -+ -+ -+=== Starting Resources on LXC Guests === -+ -+Now that the lxc guests are integrated into the cluster, lets generate some Dummy resources to run on them. -+ -+Dummy resources are real resource agents used just for testing purposes. They actually execute on the node they are assigned to just like an apache server or database would, except their execution just means a file was created. When the resource is stopped, that the file it created is removed. -+ -+[source,C] -+---- -+# pcs resource create FAKE1 ocf:pacemaker:Dummy -+# pcs resource create FAKE2 ocf:pacemaker:Dummy -+# pcs resource create FAKE3 ocf:pacemaker:Dummy -+# pcs resource create FAKE4 ocf:pacemaker:Dummy -+# pcs resource create FAKE5 ocf:pacemaker:Dummy -+---- -+ -+ -+After creating the Dummy resources you will see that the resource got distributed among all the nodes. The 'pcs status' output should look similar to this. -+ -+[source,C] -+---- -+Last updated: Mon Mar 18 17:31:54 2013 -+Last change: Mon Mar 18 17:31:05 2013 via cibadmin on example-host -+Stack: corosync -+Current DC: example=host (175810752) - partition WITHOUT quorum -+Version: 1.1.10 -+4 Nodes configured, unknown expected votes -+11 Resources configured. -+ -+ -+Online: [ example-host lxc1 lxc2 lxc3 ] -+ -+Full list of resources: -+ -+ container3 (ocf::heartbeat:VirtualDomain): Started example-host -+ container1 (ocf::heartbeat:VirtualDomain): Started example-host -+ container2 (ocf::heartbeat:VirtualDomain): Started example-host -+ FAKE1 (ocf::pacemaker:Dummy): Started lxc1 -+ FAKE2 (ocf::pacemaker:Dummy): Started lxc2 -+ FAKE3 (ocf::pacemaker:Dummy): Started lxc3 -+ FAKE4 (ocf::pacemaker:Dummy): Started lxc1 -+ FAKE5 (ocf::pacemaker:Dummy): Started lxc2 -+---- -+ -+To witness that Dummy agents are running within the lxc guests browse one of the lxc domain's filesystem folders. Each lxc guest has a custom mount point for the '/var/run/'directory, which is the location the Dummy resources write their state files to. -+ -+[source,C] -+---- -+# ls lxc1-filesystem/var/run/ -+Dummy-FAKE4.state Dummy-FAKE.state -+---- -+ -+If you are curious, take a look at lxc1.xml to see how the filesystem is mounted. -+ -+=== Testing LXC Guest Failure === -+ -+You will be able to see each pacemaker_remoted process running in each lxc guest from the host machine. -+ -+[source,C] -+---- -+# ps -A | grep -e pacemaker_remote* -+ 9142 pts/2 00:00:00 pacemaker_remot -+10148 pts/4 00:00:00 pacemaker_remot -+10942 pts/6 00:00:00 pacemaker_remot -+---- -+ -+In order to see how the cluster reacts to a failed lxc guest. Try killing one of the pacemaker_remote instances. -+ -+[source,C] -+---- -+# kill -9 9142 -+---- -+ -+After a few moments the lxc guest that was running that instance of pacemaker_remote will be recovered along with all the resources running within that container. -diff --git a/doc/Pacemaker_Remote/en-US/Ch-Options.txt b/doc/Pacemaker_Remote/en-US/Ch-Options.txt -new file mode 100644 -index 0000000..9e14b31 ---- /dev/null -+++ b/doc/Pacemaker_Remote/en-US/Ch-Options.txt -@@ -0,0 +1,51 @@ -+= Configuration Explained = -+ -+The walk-through examples use some of these options, but don't explain exactly what they mean or do. This section is meant to be the go-to resource for all the options available for configuring remote-nodes. -+ -+== Resource Options == -+ -+When configuring a virtual machine or lxc resource to act as a remote-node, these are the metadata options available to both enable the resource as a remote-node and define the connection parameters. -+ -+.Metadata Options for configuring KVM/LXC resources as remote-nodes -+[width="95%",cols="1m,1,4<",options="header",align="center"] -+|========================================================= -+ -+|Option -+|Default -+|Description -+ -+|+remote-node+ -+| -+|The name of the remote-node this resource defines. This both enables the resource as a remote-node and defines the unique name used to identify the remote-node. If no other parameters are set, this value will also be assumed as the hostname to connect to at port 3121. +WARNING+ This value cannot overlap with any resource or node IDs. -+ -+|+remote-port+ -+|3121 -+|Configure a custom port to use for the guest connection to pacemaker_remote. -+ -+|+remote-addr+ -+|+remote-node+ value used as hostname -+|The ip address or hostname to connect to if remote-node's name is not the hostname of the guest. -+ -+|+remote-connect-timeout+ -+|60s -+|How long before a pending guest connection will time out. -+ -+|========================================================= -+ -+== Host and Guest Authentication == -+ -+Authentication and encryption of the connection between cluster-nodes (pacemaker) to remote-nodes (pacemaker_remote) is achieved using TLS with PSK encryption/authentication on +tcp port 3121+. This means both the cluster-node and remote-node must share the same private key. By default this +key must be placed at "/etc/pacemaker/authkey" on both cluster-nodes and remote-nodes+. -+ -+== Pacemaker and pacemaker_remote Options == -+ -+If you need to change the default port or authkey location for either pacemaker or pacemaker_remote, there are environment variables you can set that affect both of those daemons. These environment variables can be enabled by placing them in the /etc/sysconfig/pacemaker file. -+[source,C] -+---- -+#==#==# Pacemaker Remote -+# Use a custom directory for finding the authkey. -+PCMK_authkey_location=/etc/pacemaker/authkey -+# -+# Specify a custom port for Pacemaker Remote connections -+PCMK_remote_port=3121 -+---- -+ -diff --git a/doc/Pacemaker_Remote/en-US/Pacemaker_Remote.ent b/doc/Pacemaker_Remote/en-US/Pacemaker_Remote.ent -new file mode 100644 -index 0000000..65d8bad ---- /dev/null -+++ b/doc/Pacemaker_Remote/en-US/Pacemaker_Remote.ent -@@ -0,0 +1,6 @@ -+ -+ -+ -+ -+ -+ -diff --git a/doc/Pacemaker_Remote/en-US/Pacemaker_Remote.xml b/doc/Pacemaker_Remote/en-US/Pacemaker_Remote.xml -new file mode 100644 -index 0000000..9ee710c ---- /dev/null -+++ b/doc/Pacemaker_Remote/en-US/Pacemaker_Remote.xml -@@ -0,0 +1,17 @@ -+ -+ -+%BOOK_ENTITIES; -+]> -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -diff --git a/doc/Pacemaker_Remote/en-US/Revision_History.xml b/doc/Pacemaker_Remote/en-US/Revision_History.xml -new file mode 100644 -index 0000000..26d8ab6 ---- /dev/null -+++ b/doc/Pacemaker_Remote/en-US/Revision_History.xml -@@ -0,0 +1,25 @@ -+ -+ -+%BOOK_ENTITIES; -+]> -+ -+ Revision History -+ -+ -+ -+ 1 -+ Tue Mar 19 2013 -+ DavidVosseldvossel@redhat.com -+ Import from Pages.app -+ -+ -+ 2 -+ Tue May 13 2013 -+ DavidVosseldvossel@redhat.com -+ Added Future Features Section -+ -+ -+ -+ -+ -diff --git a/doc/Pacemaker_Remote/en-US/images/pcmk-ha-cluster-stack.png b/doc/Pacemaker_Remote/en-US/images/pcmk-ha-cluster-stack.png -new file mode 100644 -index 0000000..163ba45 -Binary files /dev/null and b/doc/Pacemaker_Remote/en-US/images/pcmk-ha-cluster-stack.png differ -diff --git a/doc/Pacemaker_Remote/en-US/images/pcmk-ha-remote-stack.png b/doc/Pacemaker_Remote/en-US/images/pcmk-ha-remote-stack.png -new file mode 100644 -index 0000000..11985a7 -Binary files /dev/null and b/doc/Pacemaker_Remote/en-US/images/pcmk-ha-remote-stack.png differ -diff --git a/doc/Pacemaker_Remote/publican.cfg.in b/doc/Pacemaker_Remote/publican.cfg.in -new file mode 100644 -index 0000000..314bb3d ---- /dev/null -+++ b/doc/Pacemaker_Remote/publican.cfg.in -@@ -0,0 +1,14 @@ -+# Config::Simple 4.59 -+# Fri Apr 23 15:33:52 2010 -+ -+docname: Pacemaker_Remote -+xml_lang: en-US -+#edition: 1 -+type: Book -+version: @PACKAGE_SERIES@ -+brand: @PUBLICAN_BRAND@ -+product: Pacemaker -+ -+chunk_first: 0 -+chunk_section_depth: 3 -+generate_section_toc_level: 4 -diff --git a/doc/openstack.md b/doc/openstack.md -index 6f994fb..7509a16 100644 ---- a/doc/openstack.md -+++ b/doc/openstack.md -@@ -15,15 +15,17 @@ Export your OpenStack credentials - export OS_REGION_NAME=... - export OS_TENANT_NAME=... - export OS_AUTH_URL=... -- export OS_USERNAME=... -+ export OS_USERNAME=... - export OS_PASSWORD=... - -+ export IMAGE_USER=fedora -+ - Allocate 5 floating IPs. For the purposes of the setup instructions --(and probably your sanity), they need to be consecutive and should --ideally start with a multiple of 10. Below we will assume -+(and probably your sanity), they need to be consecutive and to remain -+sane, should ideally start with a multiple of 10. Below we will assume - 10.16.16.60-64 - -- for n in 1 2 3 4 5; do nova floating-ip-create; done -+ for n in `seq 1 5`; do nova floating-ip-create; done - - Create some variables based on the IP addresses nova created for you: - -@@ -32,7 +34,11 @@ Create some variables based on the IP addresses nova created for you: - and a function for calculating offsets - - function nth_ipaddr() { -- echo $IP_BASE | awk -F. -v offset=$1 '{ printf "%s.%s.%s.%s\n", $1, $2, $3, $4 + offset }' -+ echo $IP_BASE | awk -F. -v offset=$1 '{ printf "%s.%s.%s.%s\n", $1, $2, $3, $4 + offset }' -+ } -+ -+ function ip_net() { -+ echo $IP_BASE | awk -F. '{ printf "%s.%s.%s.*\n", $1, $2, $3 }' - } - - Upload a public key that we can use to log into the images we create. -@@ -40,6 +46,15 @@ I created one especially for cluster testing and left it without a password. - - nova keypair-add --pub-key ~/.ssh/cluster Cluster - -+Make sure it gets used when connecting to the CTS master -+ -+ cat << EOF >> ~/.ssh/config -+ Host cts-master \`echo $IP_BASE | awk -F. '{ printf "%s.%s.%s.*", \$1, \$2, \$3 }'\` -+ User root -+ IdentityFile ~/.ssh/cluster -+ UserKnownHostsFile ~/.ssh/known.openstack -+ EOF -+ - Punch a hole in the firewall for SSH access and ping - - nova secgroup-add-rule default tcp 23 23 10.0.0.0/8 -@@ -54,93 +69,73 @@ Add the CTS master to /etc/hosts - Create helper scripts on a local host - - cat << END > ./master.sh -- -+ - echo export OS_REGION_NAME=$OS_REGION_NAME >> ~/.bashrc - echo export OS_TENANT_NAME=$OS_TENANT_NAME >> ~/.bashrc - echo export OS_AUTH_URL=$OS_AUTH_URL >> ~/.bashrc - echo export OS_USERNAME=$OS_USERNAME >> ~/.bashrc - echo export OS_PASSWORD=$OS_PASSWORD >> ~/.bashrc -- -+ - function nth_ipaddr() { -- echo $IP_BASE | awk -F. -v offset=\$1 '{ printf "%s.%s.%s.%s\n", \$1, \$2, \$3, \$4 + offset }' -+ echo $IP_BASE | awk -F. -v offset=\$1 '{ printf "%s.%s.%s.%s\n", \$1, \$2, \$3, \$4 + offset }' - } -- -+ - yum install -y python-novaclient git screen pdsh pdsh-mod-dshgroup -- -- git clone git://github.com/beekhof/fence_openstack.git -+ -+ git clone --depth 0 git://github.com/beekhof/fence_openstack.git - ln -s /root/fence_openstack/fence_openstack /sbin - - mkdir -p /root/.dsh/group/ - echo export cluster_name=openstack >> ~/.bashrc -- -+ - rm -f /root/.dsh/group/openstack -- for n in 1 2 3 4; do -+ for n in `seq 1 4`; do - echo "cluster-\$n" >> /root/.dsh/group/openstack - echo \`nth_ipaddr \$n\` cluster-\$n >> /etc/hosts - done -- -- cat << EOF >> /root/.ssh/config -- Host \`echo $IP_BASE | awk -F. '{ printf "%s.%s.%s.*", \$1, \$2, \$3 }'\` -- User root -- IdentityFile ~/.ssh/cluster - -- Host cts-master -+ cat << EOF >> /root/.ssh/config -+ Host cts-master \`echo $IP_BASE | awk -F. '{ printf "%s.%s.%s.*", \$1, \$2, \$3 }'\` - User root - IdentityFile ~/.ssh/cluster - EOF -- -+ - END - --Another script: -+Some images do not allow root to log in by default and insist on a -+'fedora' user. Create a script to disable this "feature": - - cat << EOF > fix-guest.sh - #!/bin/bash - # Re-allow root to log in - sudo sed -i s/.*ssh-/ssh-/ /root/.ssh/authorized_keys - EOF -- --## CTS master (Fedora-17) -+ -+## CTS master (Fedora-18) - - Create and update the master - -- nova boot --poll --image "Fedora 17" --key_name Cluster --flavor m1.tiny cts-master -+ nova boot --poll --image "Fedora 18" --key_name Cluster --flavor m1.tiny cts-master - nova add-floating-ip cts-master `nth_ipaddr 0` - --Some images do not allow root to log in by default and insist on a 'stack' user. --Disable this "feature". -+If your image does not allow root to log in by default, disable this -+"feature" with the script we created earlier: - -- scp fix-guest.sh stack@cts-master: -- ssh -l stack -t cts-master -- bash ./fix-guest.sh -+ scp fix-guest.sh $IMAGE_USER@cts-master: -+ ssh -l $IMAGE_USER -t cts-master -- bash ./fix-guest.sh - - Now we can set up the CTS master with the script we created earlier: - -- scp ~/.ssh/cluster root@cts-master:.ssh/id_rsa -- scp master.sh root@cts-master: -- ssh root@cts-master -- bash ./master.sh -- --## Create Guest Base Image -- --Create a guest that we can tweak -+ scp ~/.ssh/cluster cts-master:.ssh/id_rsa -+ scp master.sh cts-master: -+ ssh cts-master -- bash ./master.sh - -- nova boot --poll --image "Fedora 18 Alpha" --key_name Cluster --flavor m1.tiny TempGuest -- nova add-floating-ip TempGuest `nth_ipaddr 1` -- scp fix-guest.sh stack@`nth_ipaddr 1`: -- --Create snapshot with our changes called Fedora-18-base -- -- nova image-create --poll TempGuest Fedora-18-base -- --Release the IP and delete the temporary guest -- -- nova remove-floating-ip TempGuest `nth_ipaddr 1` -- nova delete TempGuest -- --### Create Guests -+### Create the Guests - - First create the guests - -- for n in 1 2 3 4; do -- nova boot --poll --image Fedora-18-base --key_name Cluster --flavor m1.tiny cluster-$n; -+ for n in `seq 1 4`; do -+ nova boot --poll --image "Fedora 18" --key_name Cluster --flavor m1.tiny cluster-$n; - nova add-floating-ip cluster-$n `nth_ipaddr $n` - done - -@@ -148,11 +143,14 @@ Then wait for everything to settle - - sleep 10 - --Now you can fix them -+### Fix the Guests -+ -+If your image does not allow root to log in by default, disable this -+"feature" with the script we created earlier: - -- for n in 1 2 3 4; do -- ssh -l stack -t `nth_ipaddr $n` -- bash ./fix-guest.sh; -- scp /etc/hosts root@`nth_ipaddr $n`:/etc/; -+ for n in `seq 1 4`; do -+ scp fix-guest.sh $IMAGE_USER@`nth_ipaddr $n`: -+ ssh -l $IMAGE_USER -t `nth_ipaddr $n` -- bash ./fix-guest.sh; - done - - ## Run CTS -@@ -161,20 +159,22 @@ Now you can fix them - - Switch to the CTS master - -- ssh -l root cts-master -+ ssh cts-master - - Clone Pacemaker for the latest version of CTS: - -- git clone git://github.com/ClusterLabs/pacemaker.git -- echo 'export PATH=\$PATH:/root/pacemaker/extra::/root/pacemaker/cts' >> ~/.bashrc -+ git clone --depth 0 git://github.com/ClusterLabs/pacemaker.git -+ echo 'export PATH=$PATH:/root/pacemaker/extra:/root/pacemaker/cts' >> ~/.bashrc -+ echo alias c=\'cluster-helper\' >> ~/.bashrc -+ . ~/.bashrc - --Now set up CTS to run from the local source tree -+Now set up CTS to run from the local source tree - - cts local-init - - Configure a cluster (this will install all needed packages and configure corosync on the guests in the $cluster_name group) - -- cluster-init -g openstack --yes --unicast fedora-17 -+ cluster-init -g openstack --yes --unicast --hosts fedora-18 - - ### Run - -diff --git a/doc/pcs-crmsh-quick-ref.md b/doc/pcs-crmsh-quick-ref.md -new file mode 100644 -index 0000000..a28960f ---- /dev/null -+++ b/doc/pcs-crmsh-quick-ref.md -@@ -0,0 +1,159 @@ -+## Display the configuration -+ -+ crmsh # crm configure show -+ pcs # pcs cluster cib -+ -+## Display the current status -+ -+ crmsh # crm_mon -1 -+ pcs # pcs status -+ -+## Node standby -+ -+ crmsh # crm node standby -+ pcs # pcs cluster standby pcmk-1 -+ -+ crmsh # crm node online -+ pcs # pcs cluster unstandby pcmk-1 -+ -+## Setting configuration options -+ -+ crmsh # crm configure property stonith-enabled=false -+ pcs # pcs property set stonith-enabled=false -+ -+## Listing available resources -+ -+ crmsh # crm ra classes -+ pcs # pcs resource standards -+ -+ crmsh # crm ra list ocf pacemaker -+ pcs # pcs resource agents ocf:pacemaker -+ -+## Creating a resource -+ -+ crmsh # crm configure primitive ClusterIP ocf:heartbeat:IPaddr2 \ -+ params ip=192.168.122.120 cidr_netmask=32 \ -+ op monitor interval=30s -+ pcs # pcs resource create ClusterIP IPaddr2 ip=192.168.0.120 cidr_netmask=32 -+ -+The standard and provider (`ocf:heartbeat`) are determined automatically since `IPaddr2` is unique. -+The monitor operation is automatically created based on the agent's metadata. -+ -+## Start a resource -+ crmsh # crm resource start ClusterIP -+ pcs # pcs resource start ClusterIP -+ -+## Stop a resource -+ -+ crmsh # crm resource stop ClusterIP -+ pcs # pcs resource stop ClusterIP -+ -+## Remove a resource -+ -+ crmsh # crm configure delete ClusterIP -+ pcs # -+ -+## Update a resource -+ crmsh # crm configure edit ClusterIP -+ pcs # pcs resource update ClusterIP clusterip_hash=sourceip -+ -+## Display a resource -+ -+ crmsh # -+ pcs # pcs resource show WebFS -+ -+## Resource defaults -+ -+ crmsh # crm configure rsc_defaults resource-stickiness=100 -+ pcs # pcs rsc defaults resource-stickiness=100 -+ -+Listing the current defaults: -+ -+ pcs # pcs rsc defaults -+ -+## Operation defaults -+ -+ crmsh # crm configure op_defaults timeout=240s -+ pcs # pcs resource op defaults timeout=240s -+ -+Listing the current defaults: -+ pcs # pcs resource op defaults -+ -+## Colocation -+ -+ crmsh # crm configure colocation website-with-ip INFINITY: WebSite ClusterIP -+ pcs # pcs constraint colocation add WebSite ClusterIP INFINITY -+ -+With roles -+ -+ crmsh # -+ pcs # -+ -+## Start/stop ordering -+ -+ crmsh # crm configure order apache-after-ip mandatory: ClusterIP WebSite -+ pcs # pcs constraint order ClusterIP then WebSite -+ -+With roles: -+ -+ crmsh # -+ pcs # -+ -+## Preferred locations -+ -+ crmsh # crm configure location prefer-pcmk-1 WebSite 50: pcmk-1 -+ pcs # pcs constraint location WebSite prefers pcmk-1=50 -+ -+With roles: -+ -+ crmsh # -+ pcs # -+ -+## Moving resources -+ -+ crmsh # crm resource move WebSite pcmk-1 -+ pcs # pcs constraint location WebSite prefers pcmk-1=INFINITY -+ -+ crmsh # crm resource unmove WebSite -+ pcs # pcs constraint rm location-WebSite-pcmk-1-INFINITY -+ -+## Creating a clone -+ -+ crmsh # configure clone WebIP ClusterIP meta globally-unique="true" clone-max="2" clone-node-max="2" -+ pcs # pcs resource clone ClusterIP globally-unique=true clone-max=2 clone-node-max=2 -+ -+## Creating a master/slave clone -+ -+ crmsh # crm configure ms WebDataClone WebData \ -+ meta master-max=1 master-node-max=1 \ -+ clone-max=2 clone-node-max=1 notify=true -+ pcs # resource master WebDataClone WebData \ -+ master-max=1 master-node-max=1 clone-max=2 clone-node-max=1 \ -+ notify=true -+ -+## ... -+ crmsh # -+ pcs # -+ -+ crmsh # -+ pcs # -+ -+ -+## Batch changes -+ -+ crmsh # crm -+ crmsh # cib new drbd_cfg -+ crmsh # configure primitive WebData ocf:linbit:drbd params drbd_resource=wwwdata \ -+ op monitor interval=60s -+ crmsh # configure ms WebDataClone WebData meta master-max=1 master-node-max=1 \ -+ clone-max=2 clone-node-max=1 notify=true -+ crmsh # cib commit drbd_cfg -+ crmsh # quit -+ -+ -+ pcs # pcs cluster cib drbd_cfg -+ pcs # pcs -f drbd_cfg resource create WebData ocf:linbit:drbd drbd_resource=wwwdata \ -+ op monitor interval=60s -+ pcs # pcs -f drbd_cfg resource master WebDataClone WebData master-max=1 master-node-max=1 \ -+ clone-max=2 clone-node-max=1 notify=true -+ pcs # pcs cluster push cib drbd_cfg -diff --git a/extra/cluster-clean b/extra/cluster-clean -index cf6e84f..d088a31 100755 ---- a/extra/cluster-clean -+++ b/extra/cluster-clean -@@ -36,7 +36,9 @@ cluster-helper --list bullet $target - - if [ $kill != 0 ]; then - echo "Cleaning processes" -- cluster-helper $target -- "killall -q -9 corosync aisexec heartbeat pacemakerd ccm stonithd ha_logd lrmd crmd pengine attrd pingd mgmtd cib fenced dlm_controld gfs_controld" &> /dev/null -+ cluster-helper $target -- "killall -q -9 corosync aisexec heartbeat pacemakerd pacemaker-remoted ccm stonithd ha_logd lrmd crmd pengine attrd pingd mgmtd cib fenced dlm_controld gfs_controld" &> /dev/null -+ -+ cluster-helper $target -- 'kill -9 `pidof valgrind`' &> /dev/null - - # Bah. Force systemd to actually look at the process and realize its dead" - cluster-helper $target -- "service corosync stop" &> /dev/null -@@ -58,6 +60,7 @@ log_files="$log_files pacemaker.log" - - state_files="" - state_files="$state_files 'cib.xml*'" -+state_files="$state_files 'valgrind-*'" - state_files="$state_files 'cib-*'" - state_files="$state_files 'core.*'" - state_files="$state_files hostcache" -@@ -74,7 +77,7 @@ done - - cluster-helper $target -- "find /dev/shm -name 'qb-*' -exec rm -f \{\} \;" - cluster-helper $target -- "find /var/lib/pacemaker/blackbox -name '*.*' -exec rm -f \{\} \;" --cluster-helper $target -- "find /tmp -name 'cts-*.valgrind' -exec rm -f \{\} \;" -+cluster-helper $target -- "find /tmp -name '*.valgrind' -exec rm -f \{\} \;" - - cluster-helper $target -- service rsyslog restart 2>&1 > /dev/null - cluster-helper $target -- logger -i -p daemon.info __clean_logs__ -diff --git a/extra/cluster-init b/extra/cluster-init -index 57ec42b..1e4fdc6 100755 ---- a/extra/cluster-init -+++ b/extra/cluster-init -@@ -2,6 +2,7 @@ - - accept_defaults=0 - do_raw=0 -+ETCHOSTS=0 - CMAN=0 - do_heartbeat=0 - plugin_ver=-1 -@@ -11,6 +12,7 @@ nodelist=0 - pkgs="corosync xinetd nmap abrt-cli fence-agents perl-TimeDate gdb" - - transport="multicast" -+inaddr_any="no" - - INSTALL= - cs_conf= -@@ -76,6 +78,7 @@ function helptext() { - echo "" - echo "-d, --debug Enable debug logging for the cluster" - echo "-10 install stable-1.0 packages, implies: -p 0 -R rpm-test -I" -+ echo "--hosts Copy /etc/hosts from the test master to the nodes" - echo "-e, --extra package-list" - echo " Extra packages to install" - exit $1 -@@ -102,6 +105,7 @@ while true; do - - -R|--repo) rpm_repo=$2; shift; shift;; - -I|--install) INSTALL=Yes; shift;; -+ --hosts) ETCHOSTS=1; shift;; - - cman|--cman) CTYPE=cman; shift;; - -h|--heartbeat) CTYPE=heartbeat; shift;; -@@ -109,7 +113,7 @@ while true; do - -C|--nodelist) CTYPE=corosync; nodelist=1; shift;; - -o|--openais) CTYPE=openais; shift;; - --plugin|-p) CTYPE=plugin; plugin_ver=$2; shift; shift;; -- -u|--unicast) nodelist=1; transport=udpu; shift;; -+ -u|--unicast) nodelist=1; transport=udpu; inaddr_any="yes"; shift;; - -e|--extra) pkgs="$pkgs $2"; shift; shift;; - -t|--test) rpm_repo=rpm-test-next; pkgs="$pkgs valgrind"; shift;; - -@@ -541,6 +545,7 @@ totem { - secauth: off - - transport: $transport -+ inaddr_any: $inaddr_any - - # interface: define at least one interface to communicate - # over. If you define more than one interface stanza, you must -@@ -769,6 +774,10 @@ for host in $host_list; do - fi - fi - -+ if [ $ETCHOSTS = 1 ]; then -+ scp /etc/hosts root@${host}:/etc/hosts -+ fi -+ - if [ $pcmk_ver = 10 ]; then - scp /etc/hosts root@${host}:/etc/hosts - scp ~/.ssh/id_dsa.suse root@${host}:.ssh/id_dsa -diff --git a/extra/pcmk_snmp_helper.sh b/extra/pcmk_snmp_helper.sh -new file mode 100644 -index 0000000..7eca8e6 ---- /dev/null -+++ b/extra/pcmk_snmp_helper.sh -@@ -0,0 +1,54 @@ -+#!/bin/bash -+ -+# -+# Copyright (C) 2013 Florian CROUZAT -+# -+# This program is free software; you can redistribute it and/or -+# modify it under the terms of the GNU General Public -+# License as published by the Free Software Foundation; either -+# version 2 of the License, or (at your option) any later version. -+# -+# This software is distributed in the hope that it will be useful, -+# but WITHOUT ANY WARRANTY; without even the implied warranty of -+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+# General Public License for more details. -+# -+# You should have received a copy of the GNU General Public -+# License along with this library; if not, write to the Free Software -+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -+ -+# Resources: -+# crm ra meta ocf:pacemaker:ClusterMon -+# man 8 crm_mon -+ -+# Sample configuration -+# ================================ -+# primitive ClusterMon ocf:pacemaker:ClusterMon \ -+# params user="root" update="30" extra_options="-E /path/to/pcmk_snmp_helper.sh -e 192.168.1.2" \ -+# op monitor on-fail="restart" interval="10" -+# -+# clone ClusterMon-clone ClusterMon \ -+# meta target-role="Started" -+# ================================ -+ -+# The external agent is fed with environment variables allowing us to know -+# what transition happened and to react accordingly: -+# http://clusterlabs.org/doc/en-US/Pacemaker/1.1-crmsh/html/Pacemaker_Explained/s-notification-external.html -+ -+# Generates SNMP alerts for any failing monitor operation -+# OR -+# for any operations (even successful) that are not a monitor -+if [[ ${CRM_notify_rc} != 0 && ${CRM_notify_task} == "monitor" ]] || [[ ${CRM_notify_task} != "monitor" ]] ; then -+ # This trap is compliant with PACEMAKER MIB -+ # https://github.com/ClusterLabs/pacemaker/blob/master/extra/PCMK-MIB.txt -+ /usr/bin/snmptrap -v 2c -c public ${CRM_notify_recipient} "" PACEMAKER-MIB::pacemakerNotification \ -+ PACEMAKER-MIB::pacemakerNotificationNode s "${CRM_notify_node}" \ -+ PACEMAKER-MIB::pacemakerNotificationResource s "${CRM_notify_rsc}" \ -+ PACEMAKER-MIB::pacemakerNotificationOperation s "${CRM_notify_task}" \ -+ PACEMAKER-MIB::pacemakerNotificationDescription s "${CRM_notify_desc}" \ -+ PACEMAKER-MIB::pacemakerNotificationStatus i "${CRM_notify_status}" \ -+ PACEMAKER-MIB::pacemakerNotificationReturnCode i ${CRM_notify_rc} \ -+ PACEMAKER-MIB::pacemakerNotificationTargetReturnCode i ${CRM_notify_target_rc} && exit 0 || exit 1 -+fi -+ -+exit 0 -diff --git a/extra/resources/ping b/extra/resources/ping -index dd1662e..b9a69b8 100755 ---- a/extra/resources/ping -+++ b/extra/resources/ping -@@ -112,6 +112,15 @@ A catch all for any other options that need to be passed to ping. - - - -+ -+ -+Resource is failed if the score is less than failure_score. -+Default never fails. -+ -+failure_score -+ -+ -+ - - - Enables to use default attrd_updater verbose logging on every call. -@@ -172,7 +181,10 @@ ping_stop() { - ping_monitor() { - if [ -f ${OCF_RESKEY_pidfile} ]; then - ping_update -- return $OCF_SUCCESS -+ if [ $? -eq 0 ]; then -+ return $OCF_SUCCESS -+ fi -+ return $OCF_ERR_GENERIC - fi - return $OCF_NOT_RUNNING - } -@@ -277,7 +289,15 @@ ping_update() { - 0) ping_conditional_log debug "Updated $OCF_RESKEY_name = $score" ;; - *) ocf_log warn "Could not update $OCF_RESKEY_name = $score: rc=$rc";; - esac -- return $rc -+ if [ $rc -ne 0 ]; then -+ return $rc -+ fi -+ -+ if [ -n "$OCF_RESKEY_failure_score" -a "$score" -lt "$OCF_RESKEY_failure_score" ]; then -+ ocf_log warn "$OCF_RESKEY_name is less than failure_score($OCF_RESKEY_failure_score)" -+ return 1 -+ fi -+ return 0 - } - - : ${OCF_RESKEY_name:="pingd"} -@@ -285,6 +305,7 @@ ping_update() { - : ${OCF_RESKEY_attempts:="3"} - : ${OCF_RESKEY_multiplier:="1"} - : ${OCF_RESKEY_debug:="false"} -+: ${OCF_RESKEY_failure_score:="0"} - - : ${OCF_RESKEY_CRM_meta_timeout:="20000"} - : ${OCF_RESKEY_CRM_meta_globally_unique:="true"} -diff --git a/fencing/Makefile.am b/fencing/Makefile.am -index 03d1a32..1fcd706 100644 ---- a/fencing/Makefile.am -+++ b/fencing/Makefile.am -@@ -5,19 +5,19 @@ - # modify it under the terms of the GNU General Public License - # as published by the Free Software Foundation; either version 2 - # of the License, or (at your option) any later version. --# -+# - # This program is distributed in the hope that it will be useful, - # but WITHOUT ANY WARRANTY; without even the implied warranty of - # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - # GNU General Public License for more details. --# -+# - # You should have received a copy of the GNU General Public License - # along with this program; if not, write to the Free Software - # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - # - MAINTAINERCLEANFILES = Makefile.in - --SUBDIRS = -+SUBDIRS = - - ## binary progs - testdir = $(datadir)/$(PACKAGE)/tests/fencing -@@ -31,8 +31,8 @@ sbin_SCRIPTS = fence_legacy fence_pcmk - - noinst_HEADERS = internal.h - --man7_MANS = --man8_MANS = -+man7_MANS = -+man8_MANS = - - if BUILD_XML_HELP - man7_MANS += stonithd.7 -@@ -44,10 +44,11 @@ stonithd.7: stonithd.xml - endif - - if BUILD_HELP --man8_MANS += $(sbin_PROGRAMS:%=%.8) $(sbin_SCRIPTS:%=%.8) -+man8_MANS += $(sbin_PROGRAMS:%=%.8) $(sbin_SCRIPTS:%=%.8) - %.8: % - echo Creating $@ -- chmod a+x $< -+ chmod a+x $(top_builddir)/fencing/$< -+ $(top_builddir)/fencing/$< --help - $(HELP2MAN) --output $@ --no-info --section 8 --name "Part of the Pacemaker cluster resource manager" $(top_builddir)/fencing/$< - endif - -@@ -67,6 +68,7 @@ stonith_admin_LDADD = $(top_builddir)/lib/common/libcrmcommon.la \ - $(top_builddir)/lib/fencing/libstonithd.la \ - $(CRYPTOLIB) $(CLUSTERLIBS) - -+stonithd_CFLAGS = -I$(top_srcdir)/pengine - stonithd_SOURCES = main.c commands.c remote.c - if BUILD_STONITH_CONFIG - BUILT_SOURCES = standalone_config.h -@@ -74,7 +76,7 @@ BUILT_SOURCES = standalone_config.h - stonithd_SOURCES += standalone_config.c config.y config.l - stonithd_AM_LFLAGS = -o$(LEX_OUTPUT_ROOT).c - --# lex/yacc issues: -+# lex/yacc issues: - - endif - stonithd_YFLAGS = -d -@@ -82,6 +84,8 @@ stonithd_YFLAGS = -d - stonithd_LDADD = $(top_builddir)/lib/common/libcrmcommon.la \ - $(top_builddir)/lib/cluster/libcrmcluster.la \ - $(top_builddir)/lib/fencing/libstonithd.la \ -+ $(top_builddir)/lib/pengine/libpe_status.la \ -+ $(top_builddir)/pengine/libpengine.la \ - $(CRYPTOLIB) $(CLUSTERLIBS) - - CFLAGS = $(CFLAGS_COPY:-Werror=) -diff --git a/fencing/admin.c b/fencing/admin.c -index 52780b4..c029861 100644 ---- a/fencing/admin.c -+++ b/fencing/admin.c -@@ -464,20 +464,21 @@ main(int argc, char **argv) - } - - if (hp->state == st_failed) { -- printf("%s failed to %s node %s on behalf of %s at %s\n", -- hp->delegate ? hp->delegate : "We", action_s, hp->target, hp->origin, -- ctime(&complete)); -+ printf("%s failed to %s node %s on behalf of %s from %s at %s\n", -+ hp->delegate ? hp->delegate : "We", action_s, hp->target, -+ hp->client, hp->origin, ctime(&complete)); - - } else if (hp->state == st_done && hp->delegate) { -- printf("%s was able to %s node %s on behalf of %s at %s\n", -- hp->delegate, action_s, hp->target, hp->origin, ctime(&complete)); -+ printf("%s was able to %s node %s on behalf of %s from %s at %s\n", -+ hp->delegate, action_s, hp->target, -+ hp->client, hp->origin, ctime(&complete)); - - } else if (hp->state == st_done) { -- printf("We were able to %s node %s on behalf of %s at %s\n", -- action_s, hp->target, hp->origin, ctime(&complete)); -+ printf("We were able to %s node %s on behalf of %s from %s at %s\n", -+ action_s, hp->target, hp->client, hp->origin, ctime(&complete)); - } else { -- printf("%s wishes to %s node %s - %d %d\n", -- hp->origin, action_s, hp->target, hp->state, hp->completed); -+ printf("%s at %s wishes to %s node %s - %d %d\n", -+ hp->client, hp->origin, action_s, hp->target, hp->state, hp->completed); - } - - free(action_s); -@@ -498,9 +499,9 @@ main(int argc, char **argv) - action_s = strdup(latest->action); - } - -- printf("%s was able to %s node %s on behalf of %s at %s\n", -+ printf("%s was able to %s node %s on behalf of %s from %s at %s\n", - latest->delegate ? latest->delegate : "We", action_s, latest->target, -- latest->origin, ctime(&complete)); -+ latest->client, latest->origin, ctime(&complete)); - - free(action_s); - } -diff --git a/fencing/commands.c b/fencing/commands.c -index 3ebbba3..698dd94 100644 ---- a/fencing/commands.c -+++ b/fencing/commands.c -@@ -42,6 +42,10 @@ - #include - #include - -+#if SUPPORT_CIBSECRETS -+# include -+#endif -+ - #include - - GHashTable *device_list = NULL; -@@ -144,6 +148,7 @@ free_async_command(async_command_t * cmd) - free(cmd->client); - free(cmd->client_name); - free(cmd->origin); -+ free(cmd->mode); - free(cmd->op); - free(cmd); - } -@@ -182,26 +187,6 @@ create_async_command(xmlNode * msg) - return cmd; - } - --static int --stonith_manual_ack(xmlNode * msg, remote_fencing_op_t * op) --{ -- async_command_t *cmd = create_async_command(msg); -- xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, msg, LOG_ERR); -- -- if (cmd == NULL) { -- return -EINVAL; -- } -- -- cmd->device = strdup("manual_ack"); -- cmd->remote_op_id = strdup(op->id); -- -- crm_notice("Injecting manual confirmation that %s is safely off/down", -- crm_element_value(dev, F_STONITH_TARGET)); -- -- cmd->done_cb(0, 0, NULL, cmd); -- return pcmk_ok; --} -- - static gboolean - stonith_device_execute(stonith_device_t * device) - { -@@ -229,6 +214,23 @@ stonith_device_execute(stonith_device_t * device) - return TRUE; - } - -+#if SUPPORT_CIBSECRETS -+ if (replace_secret_params(device->id, device->params) < 0) { -+ /* replacing secrets failed! */ -+ if (safe_str_eq(cmd->action,"stop")) { -+ /* don't fail on stop! */ -+ crm_info("proceeding with the stop operation for %s", device->id); -+ -+ } else { -+ crm_err("failed to get secrets for %s, " -+ "considering resource not configured", device->id); -+ exec_rc = PCMK_EXECRA_NOT_CONFIGURED; -+ cmd->done_cb(0, exec_rc, NULL, cmd); -+ return TRUE; -+ } -+ } -+#endif -+ - action = stonith_action_create(device->agent, - cmd->action, - cmd->victim, -@@ -386,15 +388,12 @@ build_port_aliases(const char *hostmap, GListPtr * targets) - } - - static void --parse_host_line(const char *line, GListPtr * output) -+parse_host_line(const char *line, int max, GListPtr * output) - { - int lpc = 0; -- int max = 0; - int last = 0; - -- if (line) { -- max = strlen(line); -- } else { -+ if (max <= 0) { - return; - } - -@@ -404,7 +403,7 @@ parse_host_line(const char *line, GListPtr * output) - return; - } - -- crm_trace("Processing: %s", line); -+ crm_trace("Processing %d bytes: [%s]", max, line); - /* Skip initial whitespace */ - for (lpc = 0; lpc <= max && isspace(line[lpc]); lpc++) { - last = lpc + 1; -@@ -458,16 +457,24 @@ parse_host_list(const char *hosts) - for (lpc = 0; lpc <= max; lpc++) { - if (hosts[lpc] == '\n' || hosts[lpc] == 0) { - char *line = NULL; -+ int len = lpc - last; -+ -+ if(len > 1) { -+ line = malloc(1 + len); -+ } - -- line = calloc(1, 2 + lpc - last); -- snprintf(line, 1 + lpc - last, "%s", hosts + last); -- parse_host_line(line, &output); -- free(line); -+ if(line) { -+ snprintf(line, 1 + len, "%s", hosts + last); -+ line[len] = 0; /* Because it might be '\n' */ -+ parse_host_line(line, len, &output); -+ free(line); -+ } - - last = lpc + 1; - } - } - -+ crm_trace("Parsed %d entries from '%s'", g_list_length(output), hosts); - return output; - } - -@@ -503,11 +510,14 @@ is_nodeid_required(xmlNode * xml) - if (!xml) { - return FALSE; - } -+ - xpath = xpath_search(xml, "//parameter[@name='nodeid']"); -- if (!xpath || xpath->nodesetval->nodeNr <= 0) { -+ if (numXpathResults(xpath) <= 0) { -+ freeXpathObject(xpath); - return FALSE; - } - -+ freeXpathObject(xpath); - return TRUE; - } - -@@ -524,13 +534,13 @@ get_on_target_actions(xmlNode * xml) - } - - xpath = xpath_search(xml, "//action"); -+ max = numXpathResults(xpath); - -- if (!xpath || !xpath->nodesetval) { -+ if (max <= 0) { -+ freeXpathObject(xpath); - return NULL; - } - -- max = xpath->nodesetval->nodeNr; -- - actions = calloc(1, 512); - - for (lpc = 0; lpc < max; lpc++) { -@@ -551,6 +561,8 @@ get_on_target_actions(xmlNode * xml) - } - } - -+ freeXpathObject(xpath); -+ - if (!strlen(actions)) { - free(actions); - actions = NULL; -@@ -660,6 +672,8 @@ string_in_list(GListPtr list, const char *item) - - if (safe_str_eq(item, value)) { - return TRUE; -+ } else { -+ crm_trace("%d: '%s' != '%s'", lpc, item, value); - } - } - return FALSE; -@@ -673,6 +687,8 @@ status_search_cb(GPid pid, int rc, const char *output, gpointer user_data) - stonith_device_t *dev = cmd->device ? g_hash_table_lookup(device_list, cmd->device) : NULL; - gboolean can = FALSE; - -+ free_async_command(cmd); -+ - if (!dev) { - search_devices_record_result(search, NULL, FALSE); - return; -@@ -702,6 +718,8 @@ dynamic_list_search_cb(GPid pid, int rc, const char *output, gpointer user_data) - stonith_device_t *dev = cmd->device ? g_hash_table_lookup(device_list, cmd->device) : NULL; - gboolean can_fence = FALSE; - -+ free_async_command(cmd); -+ - /* Host/alias must be in the list output to be eligable to be fenced - * - * Will cause problems if down'd nodes aren't listed or (for virtual nodes) -@@ -1488,7 +1506,7 @@ static gint - sort_device_priority(gconstpointer a, gconstpointer b) - { - const stonith_device_t *dev_a = a; -- const stonith_device_t *dev_b = a; -+ const stonith_device_t *dev_b = b; - - if (dev_a->priority > dev_b->priority) { - return -1; -@@ -1526,7 +1544,7 @@ stonith_fence_get_devices_cb(GList * devices, void *user_data) - } - - /* no device found! */ -- stonith_send_async_reply(cmd, NULL, -EHOSTUNREACH, 0); -+ stonith_send_async_reply(cmd, NULL, -ENODEV, 0); - - free_async_command(cmd); - g_list_free_full(devices, free); -@@ -1536,7 +1554,6 @@ static int - stonith_fence(xmlNode * msg) - { - const char *device_id = NULL; -- int rc = -EHOSTUNREACH; - stonith_device_t *device = NULL; - async_command_t *cmd = create_async_command(msg); - xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, msg, LOG_ERR); -@@ -1550,10 +1567,10 @@ stonith_fence(xmlNode * msg) - device = g_hash_table_lookup(device_list, device_id); - if (device == NULL) { - crm_err("Requested device '%s' is not available", device_id); -- } else { -- schedule_stonith_command(cmd, device); -- rc = -EINPROGRESS; -+ return -ENODEV; - } -+ schedule_stonith_command(cmd, device); -+ - } else { - const char *host = crm_element_value(dev, F_STONITH_TARGET); - -@@ -1567,10 +1584,9 @@ stonith_fence(xmlNode * msg) - } - get_capable_devices(host, cmd->action, cmd->default_timeout, cmd, - stonith_fence_get_devices_cb); -- rc = -EINPROGRESS; - } - -- return rc; -+ return -EINPROGRESS; - } - - xmlNode * -@@ -1703,7 +1719,7 @@ stonith_send_reply(xmlNode * reply, int call_options, const char *remote_peer, - if (remote_peer) { - send_cluster_message(crm_get_peer(0, remote_peer), crm_msg_stonith_ng, reply, FALSE); - } else { -- do_local_reply(reply, client_id, call_options & st_opt_sync_call, remote_peer != NULL); -+ do_local_reply(reply, client_id, is_set(call_options, st_opt_sync_call), remote_peer != NULL); - } - } - -@@ -1798,8 +1814,12 @@ handle_request(crm_client_t * client, uint32_t id, uint32_t flags, xmlNode * req - rc = stonith_fence(request); - - } else if (call_options & st_opt_manual_ack) { -- remote_fencing_op_t *rop = initiate_remote_stonith_op(client, request, TRUE); -+ remote_fencing_op_t *rop = NULL; -+ xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, request, LOG_TRACE); -+ const char *target = crm_element_value(dev, F_STONITH_TARGET); - -+ crm_notice("Recieved manual confirmation that %s is fenced", target); -+ rop = initiate_remote_stonith_op(client, request, TRUE); - rc = stonith_manual_ack(request, rop); - - } else { -@@ -1927,6 +1947,13 @@ handle_request(crm_client_t * client, uint32_t id, uint32_t flags, xmlNode * req - * If in progress, a reply will happen async after the request - * processing is finished */ - if (rc != -EINPROGRESS) { -+ crm_trace("Reply handling: %p %u %u %d %d %s", client, client?client->request_id:0, -+ id, is_set(call_options, st_opt_sync_call), call_options, -+ crm_element_value(request, F_STONITH_CALLOPTS)); -+ -+ if (is_set(call_options, st_opt_sync_call)) { -+ CRM_ASSERT(client == NULL || client->request_id == id); -+ } - reply = stonith_construct_reply(request, output, data, rc); - stonith_send_reply(reply, call_options, remote_peer, client_id); - } -@@ -1979,8 +2006,8 @@ stonith_command(crm_client_t * client, uint32_t id, uint32_t flags, xmlNode * re - } - - crm_element_value_int(request, F_STONITH_CALLOPTS, &call_options); -- crm_debug("Processing %s%s from %s (%16x)", op, is_reply ? " reply" : "", -- client ? client->name : remote_peer, call_options); -+ crm_debug("Processing %s%s %u from %s (%16x)", op, is_reply ? " reply" : "", -+ id, client ? client->name : remote_peer, call_options); - - if (is_set(call_options, st_opt_sync_call)) { - CRM_ASSERT(client == NULL || client->request_id == id); -diff --git a/fencing/fence_false b/fencing/fence_false -index 3e41751..27e0605 100755 ---- a/fencing/fence_false -+++ b/fencing/fence_false -@@ -53,7 +53,7 @@ def set_power_status(conn, options): - - def main(): - global all_opt -- device_opt = [ "help", "version", "agent", "quiet", "verbose", "debug", "action", "port", -+ device_opt = [ "help", "version", "agent", "verbose", "debug", "action", "port", - "no_password", "power_wait", "power_timeout", "random_sleep_range"] - - all_opt["random_sleep_range"] = { -@@ -77,7 +77,7 @@ def main(): - if options.has_key("-R"): - val = int(options["-R"]) - ran = random.randint(1, val) -- print "random sleep for %d seconds" % ran -+ sys.stderr.write("random sleep for %d seconds" % ran) - time.sleep(ran) - - if options.has_key("-o") and (options["-o"] == "monitor"): -diff --git a/fencing/fence_true b/fencing/fence_true -index d94e335..3968158 100755 ---- a/fencing/fence_true -+++ b/fencing/fence_true -@@ -48,7 +48,7 @@ def set_power_status(conn, options): - - def main(): - global all_opt -- device_opt = [ "help", "version", "agent", "quiet", "verbose", "debug", "action", "port", -+ device_opt = [ "help", "version", "agent", "verbose", "debug", "action", "port", - "no_password", "power_wait", "power_timeout", "random_sleep_range"] - - all_opt["random_sleep_range"] = { -@@ -72,7 +72,7 @@ def main(): - if options.has_key("-R"): - val = int(options["-R"]) - ran = random.randint(1, val) -- print "random sleep for %d seconds" % ran -+ sys.stderr.write("random sleep for %d seconds" % ran) - time.sleep(ran) - - if options.has_key("-o") and (options["-o"] == "monitor"): -diff --git a/fencing/internal.h b/fencing/internal.h -index de23e44..737fc18 100644 ---- a/fencing/internal.h -+++ b/fencing/internal.h -@@ -93,6 +93,8 @@ typedef struct remote_fencing_op_s { - char *originator; - /*! The local client id that initiated the fencing request */ - char *client_id; -+ /*! The client's call_id that initiated the fencing request */ -+ int client_callid; - /*! The name of client that initiated the fencing request */ - char *client_name; - /*! List of the received query results for all the nodes in the cpg group */ -@@ -158,6 +160,8 @@ extern void free_topology_entry(gpointer data); - - bool fencing_peer_active(crm_node_t *peer); - -+int stonith_manual_ack(xmlNode * msg, remote_fencing_op_t * op); -+ - extern char *stonith_our_uname; - extern gboolean stand_alone; - extern GHashTable *device_list; -diff --git a/fencing/main.c b/fencing/main.c -index 728f3dd..9195aca 100644 ---- a/fencing/main.c -+++ b/fencing/main.c -@@ -42,12 +42,15 @@ - #include - - #include -+#include -+#include - - #include - - #include - - char *stonith_our_uname = NULL; -+char *stonith_our_uuid = NULL; - - GMainLoop *mainloop = NULL; - -@@ -56,6 +59,10 @@ gboolean no_cib_connect = FALSE; - gboolean stonith_shutdown_flag = FALSE; - - qb_ipcs_service_t *ipcs = NULL; -+xmlNode *local_cib = NULL; -+ -+static cib_t *cib_api = NULL; -+static void *cib_library = NULL; - - static void stonith_shutdown(int nsig); - static void stonith_cleanup(void); -@@ -89,7 +96,10 @@ st_ipc_dispatch(qb_ipcs_connection_t * qbc, void *data, size_t size) - xmlNode *request = NULL; - crm_client_t *c = crm_client_get(qbc); - -- CRM_CHECK(c != NULL, goto cleanup); -+ if (c == NULL) { -+ crm_info("Invalid client: %p", qbc); -+ return 0; -+ } - - request = crm_ipcs_recv(c, data, size, &id, &flags); - if (request == NULL) { -@@ -106,7 +116,9 @@ st_ipc_dispatch(qb_ipcs_connection_t * qbc, void *data, size_t size) - c->name = g_strdup_printf("%s.%u", value, c->pid); - } - -+ crm_trace("Flags %u for command %u from %s", flags, id, crm_client_name(c)); - if (flags & crm_ipc_client_response) { -+ crm_trace("Need response"); - CRM_LOG_ASSERT(c->request_id == 0); /* This means the client has two synchronous events in-flight */ - c->request_id = id; /* Reply only to the last one */ - } -@@ -118,11 +130,6 @@ st_ipc_dispatch(qb_ipcs_connection_t * qbc, void *data, size_t size) - crm_log_xml_trace(request, "Client[inbound]"); - stonith_command(c, id, flags, request, NULL); - -- cleanup: -- if (c == NULL) { -- crm_log_xml_notice(request, "Invalid client"); -- } -- - free_xml(request); - return 0; - } -@@ -432,11 +439,7 @@ topology_register_helper(const char *node, int level, stonith_key_value_t * devi - static void - remove_cib_device(xmlXPathObjectPtr xpathObj) - { -- int max = 0, lpc = 0; -- -- if (xpathObj && xpathObj->nodesetval) { -- max = xpathObj->nodesetval->nodeNr; -- } -+ int max = numXpathResults(xpathObj), lpc = 0; - - for (lpc = 0; lpc < max; lpc++) { - const char *rsc_id = NULL; -@@ -460,11 +463,7 @@ remove_cib_device(xmlXPathObjectPtr xpathObj) - static void - remove_fencing_topology(xmlXPathObjectPtr xpathObj) - { -- int max = 0, lpc = 0; -- -- if (xpathObj && xpathObj->nodesetval) { -- max = xpathObj->nodesetval->nodeNr; -- } -+ int max = numXpathResults(xpathObj), lpc = 0; - - for (lpc = 0; lpc < max; lpc++) { - xmlNode *match = getXpathResult(xpathObj, lpc); -@@ -492,67 +491,9 @@ remove_fencing_topology(xmlXPathObjectPtr xpathObj) - } - - static void --register_cib_device(xmlXPathObjectPtr xpathObj, gboolean force) --{ -- int max = 0, lpc = 0; -- -- if (xpathObj && xpathObj->nodesetval) { -- max = xpathObj->nodesetval->nodeNr; -- } -- -- for (lpc = 0; lpc < max; lpc++) { -- const char *rsc_id = NULL; -- const char *agent = NULL; -- const char *standard = NULL; -- const char *provider = NULL; -- stonith_key_value_t *params = NULL; -- xmlNode *match = getXpathResult(xpathObj, lpc); -- xmlNode *attributes; -- xmlNode *attr; -- xmlNode *data; -- -- CRM_CHECK(match != NULL, continue); -- -- standard = crm_element_value(match, XML_AGENT_ATTR_CLASS); -- agent = crm_element_value(match, XML_EXPR_ATTR_TYPE); -- provider = crm_element_value(match, XML_AGENT_ATTR_PROVIDER); -- -- if (safe_str_neq(standard, "stonith") || !agent) { -- continue; -- } -- -- rsc_id = crm_element_value(match, XML_ATTR_ID); -- attributes = find_xml_node(match, XML_TAG_ATTR_SETS, FALSE); -- -- for (attr = __xml_first_child(attributes); attr; attr = __xml_next(attr)) { -- const char *name = crm_element_value(attr, XML_NVPAIR_ATTR_NAME); -- const char *value = crm_element_value(attr, XML_NVPAIR_ATTR_VALUE); -- -- if (!name || !value) { -- continue; -- } -- params = stonith_key_value_add(params, name, value); -- } -- -- data = create_device_registration_xml(rsc_id, provider, agent, params); -- -- if (force == FALSE && crm_element_value(match, XML_DIFF_MARKER)) { -- stonith_device_register(data, NULL, TRUE); -- } else { -- stonith_device_remove(rsc_id, TRUE); -- stonith_device_register(data, NULL, TRUE); -- } -- } --} -- --static void - register_fencing_topology(xmlXPathObjectPtr xpathObj, gboolean force) - { -- int max = 0, lpc = 0; -- -- if (xpathObj && xpathObj->nodesetval) { -- max = xpathObj->nodesetval->nodeNr; -- } -+ int max = numXpathResults(xpathObj), lpc = 0; - - for (lpc = 0; lpc < max; lpc++) { - int index = 0; -@@ -622,50 +563,158 @@ fencing_topology_init(xmlNode * msg) - - register_fencing_topology(xpathObj, TRUE); - -- if (xpathObj) { -- xmlXPathFreeObject(xpathObj); -+ freeXpathObject(xpathObj); -+} -+ -+#define rsc_name(x) x->clone_name?x->clone_name:x->id -+ -+static void cib_device_update(resource_t *rsc, pe_working_set_t *data_set) -+{ -+ node_t *node = NULL; -+ const char *value = NULL; -+ const char *rclass = NULL; -+ -+ if(rsc->children) { -+ GListPtr gIter = NULL; -+ for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) { -+ cib_device_update(gIter->data, data_set); -+ if(rsc->variant == pe_clone || rsc->variant == pe_master) { -+ crm_trace("Only adding one copy of the clone %s", rsc->id); -+ break; -+ } -+ } -+ return; -+ } -+ -+ if(g_hash_table_lookup(device_list, rsc_name(rsc))) { -+ stonith_device_remove(rsc_name(rsc), TRUE); -+ } -+ -+ rclass = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS); -+ if(safe_str_neq(rclass, "stonith")) { -+ return; -+ } -+ -+ value = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_TARGET_ROLE); -+ if(value && strcmp(RSC_STOPPED, value) == 0) { -+ crm_info("Device %s has been disabled", rsc->id); -+ return; -+ -+ } else if(stonith_our_uname) { -+ GHashTableIter iter; -+ -+ g_hash_table_iter_init(&iter, rsc->allowed_nodes); -+ while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) { -+ if(node && strcmp(node->details->uname, stonith_our_uname) == 0) { -+ break; -+ } -+ node = NULL; -+ } -+ } -+ -+ if(node == NULL) { -+ GHashTableIter iter; -+ -+ crm_info("Device %s has been disabled on %s: unknown", rsc->id, stonith_our_uname); -+ g_hash_table_iter_init(&iter, rsc->allowed_nodes); -+ while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) { -+ crm_trace("Available: %s = %d", node->details->uname, node->weight); -+ } -+ -+ return; -+ -+ } else if(node->weight < 0) { -+ crm_info("Device %s has been disabled on %s: score=%s", rsc->id, stonith_our_uname, score2char(node->weight)); -+ return; -+ -+ } else { -+ xmlNode *data; -+ GHashTableIter gIter; -+ stonith_key_value_t *params = NULL; -+ -+ const char *name = NULL; -+ const char *agent = crm_element_value(rsc->xml, XML_EXPR_ATTR_TYPE); -+ const char *provider = crm_element_value(rsc->xml, XML_AGENT_ATTR_PROVIDER); -+ -+ crm_info("Device %s is allowed on %s: score=%d", rsc->id, stonith_our_uname, node->weight); -+ get_rsc_attributes(rsc->parameters, rsc, node, data_set); -+ -+ g_hash_table_iter_init(&gIter, rsc->parameters); -+ while (g_hash_table_iter_next(&gIter, (gpointer *) & name, (gpointer *) & value)) { -+ if (!name || !value) { -+ continue; -+ } -+ params = stonith_key_value_add(params, name, value); -+ crm_trace(" %s=%s", name, value); -+ } -+ -+ data = create_device_registration_xml(rsc_name(rsc), provider, agent, params); -+ stonith_device_register(data, NULL, TRUE); -+ -+ stonith_key_value_freeall(params, 1, 1); -+ free_xml(data); - } - } - -+extern xmlNode *do_calculations(pe_working_set_t * data_set, xmlNode * xml_input, crm_time_t * now); -+extern node_t *create_node(const char *id, const char *uname, const char *type, const char *score, pe_working_set_t * data_set); -+ - static void --cib_stonith_devices_init(xmlNode * msg) -+cib_devices_update(void) - { -- xmlXPathObjectPtr xpathObj = NULL; -- const char *xpath = "//" XML_CIB_TAG_RESOURCE; -+ GListPtr gIter = NULL; -+ pe_working_set_t data_set; - -- crm_trace("Pushing in stonith devices"); -+ set_working_set_defaults(&data_set); -+ data_set.input = local_cib; -+ data_set.now = crm_time_new(NULL); -+ data_set.flags |= pe_flag_quick_location; -+ data_set.localhost = stonith_our_uname; - -- /* Grab everything */ -- xpathObj = xpath_search(msg, xpath); -+ cluster_status(&data_set); -+ do_calculations(&data_set, NULL, NULL); - -- if (xpathObj) { -- register_cib_device(xpathObj, TRUE); -- xmlXPathFreeObject(xpathObj); -+ for (gIter = data_set.resources; gIter != NULL; gIter = gIter->next) { -+ cib_device_update(gIter->data, &data_set); - } -+ data_set.input = NULL; /* Wasn't a copy */ -+ cleanup_alloc_calculations(&data_set); - } - - static void - update_cib_stonith_devices(const char *event, xmlNode * msg) - { -- -- const char *xpath_add = -- "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//" XML_CIB_TAG_RESOURCE; -- const char *xpath_del = -- "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_REMOVED "//" XML_CIB_TAG_RESOURCE; -+ const char *reason = "none"; -+ gboolean needs_update = FALSE; - xmlXPathObjectPtr xpath_obj = NULL; - -+ /* process new constraints */ -+ xpath_obj = xpath_search(msg, "//" F_CIB_UPDATE_RESULT "//" XML_CONS_TAG_RSC_LOCATION); -+ if (numXpathResults(xpath_obj) > 0) { -+ /* Safest and simplest to always recompute */ -+ needs_update = TRUE; -+ reason = "new location constraint"; -+ } -+ freeXpathObject(xpath_obj); -+ - /* process deletions */ -- xpath_obj = xpath_search(msg, xpath_del); -- if (xpath_obj) { -+ xpath_obj = xpath_search(msg, "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_REMOVED "//" XML_CIB_TAG_RESOURCE); -+ if (numXpathResults(xpath_obj) > 0) { - remove_cib_device(xpath_obj); -- xmlXPathFreeObject(xpath_obj); - } -+ freeXpathObject(xpath_obj); - - /* process additions */ -- xpath_obj = xpath_search(msg, xpath_add); -- if (xpath_obj) { -- register_cib_device(xpath_obj, FALSE); -- xmlXPathFreeObject(xpath_obj); -+ xpath_obj = xpath_search(msg, "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//" XML_CIB_TAG_RESOURCE); -+ if (numXpathResults(xpath_obj) > 0) { -+ needs_update = TRUE; -+ reason = "new resource"; -+ } -+ freeXpathObject(xpath_obj); -+ -+ if(needs_update) { -+ crm_info("Updating device list from the cib: %s", reason); -+ cib_devices_update(); - } - } - -@@ -680,25 +729,61 @@ update_fencing_topology(const char *event, xmlNode * msg) - xpathObj = xpath_search(msg, xpath); - - remove_fencing_topology(xpathObj); -- -- if (xpathObj) { -- xmlXPathFreeObject(xpathObj); -- } -+ freeXpathObject(xpathObj); - - /* Process additions and changes */ - xpath = "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//" XML_TAG_FENCING_LEVEL; - xpathObj = xpath_search(msg, xpath); - - register_fencing_topology(xpathObj, FALSE); -- -- if (xpathObj) { -- xmlXPathFreeObject(xpathObj); -- } -+ freeXpathObject(xpathObj); - } -+static bool have_cib_devices = FALSE; - - static void - update_cib_cache_cb(const char *event, xmlNode * msg) - { -+ int rc = pcmk_ok; -+ static int (*cib_apply_patch_event)(xmlNode *, xmlNode *, xmlNode **, int) = NULL; -+ -+ if(!have_cib_devices) { -+ crm_trace("Skipping updates until we get a full dump"); -+ return; -+ } -+ if (cib_apply_patch_event == NULL) { -+ cib_apply_patch_event = find_library_function(&cib_library, CIB_LIBRARY, "cib_apply_patch_event", TRUE); -+ } -+ -+ CRM_ASSERT(cib_apply_patch_event); -+ -+ /* Maintain a local copy of the CIB so that we have full access to the device definitions and location constraints */ -+ if (local_cib != NULL) { -+ xmlNode *cib_last = local_cib; -+ -+ local_cib = NULL; -+ rc = (*cib_apply_patch_event)(msg, cib_last, &local_cib, LOG_DEBUG); -+ free_xml(cib_last); -+ -+ switch (rc) { -+ case -pcmk_err_diff_resync: -+ case -pcmk_err_diff_failed: -+ crm_notice("[%s] Patch aborted: %s (%d)", event, pcmk_strerror(rc), rc); -+ case pcmk_ok: -+ break; -+ default: -+ crm_warn("[%s] ABORTED: %s (%d)", event, pcmk_strerror(rc), rc); -+ } -+ } -+ -+ if (local_cib == NULL) { -+ crm_trace("Re-requesting the full cib after diff failure"); -+ rc = cib_api->cmds->query(cib_api, NULL, &local_cib, cib_scope_local | cib_sync_call); -+ if(rc != pcmk_ok) { -+ crm_err("Couldnt retrieve the CIB: %s (%d)", pcmk_strerror(rc), rc); -+ } -+ CRM_ASSERT(local_cib != NULL); -+ } -+ - update_fencing_topology(event, msg); - update_cib_stonith_devices(event, msg); - } -@@ -706,8 +791,12 @@ update_cib_cache_cb(const char *event, xmlNode * msg) - static void - init_cib_cache_cb(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) - { -+ crm_info("Updating device list from the cib: init"); -+ have_cib_devices = TRUE; -+ local_cib = copy_xml(output); -+ - fencing_topology_init(msg); -- cib_stonith_devices_init(msg); -+ cib_devices_update(); - } - - static void -@@ -719,12 +808,10 @@ stonith_shutdown(int nsig) - g_main_quit(mainloop); - } else { - stonith_cleanup(); -- crm_exit(EX_OK); -+ crm_exit(pcmk_ok); - } - } - --cib_t *cib = NULL; -- - static void - cib_connection_destroy(gpointer user_data) - { -@@ -734,8 +821,8 @@ cib_connection_destroy(gpointer user_data) - } else { - crm_notice("Connection to the CIB terminated. Shutting down."); - } -- if (cib) { -- cib->cmds->signoff(cib); -+ if (cib_api) { -+ cib_api->cmds->signoff(cib_api); - } - stonith_shutdown(0); - } -@@ -743,8 +830,8 @@ cib_connection_destroy(gpointer user_data) - static void - stonith_cleanup(void) - { -- if (cib) { -- cib->cmds->signoff(cib); -+ if (cib_api) { -+ cib_api->cmds->signoff(cib_api); - } - - if (ipcs) { -@@ -753,6 +840,7 @@ stonith_cleanup(void) - crm_peer_destroy(); - crm_client_cleanup(); - free(stonith_our_uname); -+ free_xml(local_cib); - } - - /* *INDENT-OFF* */ -@@ -770,47 +858,39 @@ static struct crm_option long_options[] = { - static void - setup_cib(void) - { -- static void *cib_library = NULL; -- static cib_t *(*cib_new_fn) (void) = NULL; -- static const char *(*cib_err_fn) (int) = NULL; -- - int rc, retries = 0; -+ static cib_t *(*cib_new_fn) (void) = NULL; - -- if (cib_library == NULL) { -- cib_library = dlopen(CIB_LIBRARY, RTLD_LAZY); -- } -- if (cib_library && cib_new_fn == NULL) { -- cib_new_fn = dlsym(cib_library, "cib_new"); -- } -- if (cib_library && cib_err_fn == NULL) { -- cib_err_fn = dlsym(cib_library, "pcmk_strerror"); -+ if (cib_new_fn == NULL) { -+ cib_new_fn = find_library_function(&cib_library, CIB_LIBRARY, "cib_new", TRUE); - } -+ - if (cib_new_fn != NULL) { -- cib = (*cib_new_fn) (); -+ cib_api = (*cib_new_fn) (); - } - -- if (cib == NULL) { -+ if (cib_api == NULL) { - crm_err("No connection to the CIB"); - return; - } - - do { - sleep(retries); -- rc = cib->cmds->signon(cib, CRM_SYSTEM_CRMD, cib_command); -+ rc = cib_api->cmds->signon(cib_api, CRM_SYSTEM_CRMD, cib_command); - } while (rc == -ENOTCONN && ++retries < 5); - - if (rc != pcmk_ok) { -- crm_err("Could not connect to the CIB service: %d %p", rc, cib_err_fn); -+ crm_err("Could not connect to the CIB service: %s (%d)", pcmk_strerror(rc), rc); - - } else if (pcmk_ok != -- cib->cmds->add_notify_callback(cib, T_CIB_DIFF_NOTIFY, update_cib_cache_cb)) { -+ cib_api->cmds->add_notify_callback(cib_api, T_CIB_DIFF_NOTIFY, update_cib_cache_cb)) { - crm_err("Could not set CIB notification callback"); - - } else { -- rc = cib->cmds->query(cib, NULL, NULL, cib_scope_local); -- cib->cmds->register_callback(cib, rc, 120, FALSE, NULL, "init_cib_cache_cb", -- init_cib_cache_cb); -- cib->cmds->set_connection_dnotify(cib, cib_connection_destroy); -+ rc = cib_api->cmds->query(cib_api, NULL, NULL, cib_scope_local); -+ cib_api->cmds->register_callback(cib_api, rc, 120, FALSE, NULL, "init_cib_cache_cb", -+ init_cib_cache_cb); -+ cib_api->cmds->set_connection_dnotify(cib_api, cib_connection_destroy); - crm_notice("Watching for stonith topology changes"); - } - } -@@ -904,7 +984,7 @@ main(int argc, char **argv) - - printf(" \n"); - printf -- (" The priority of the stonith resource. The lower the number, the higher the priority.\n"); -+ (" The priority of the stonith resource. Devices are tried in order of highest priority to lowest.\n"); - printf(" \n"); - printf(" \n"); - -@@ -1011,11 +1091,10 @@ main(int argc, char **argv) - - if (crm_cluster_connect(&cluster) == FALSE) { - crm_crit("Cannot sign in to the cluster... terminating"); -- crm_exit(100); -- } else { -- stonith_our_uname = cluster.uname; -+ crm_exit(DAEMON_RESPAWN_STOP); - } - stonith_our_uname = cluster.uname; -+ stonith_our_uuid = cluster.uuid; - - if (no_cib_connect == FALSE) { - setup_cib(); -@@ -1031,11 +1110,8 @@ main(int argc, char **argv) - - topology = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, free_topology_entry); - -- ipcs = mainloop_add_ipc_server("stonith-ng", QB_IPC_NATIVE, &ipc_callbacks); -- if (ipcs == NULL) { -- crm_err("Failed to create IPC server: shutting down and inhibiting respawn"); -- crm_exit(100); -- } -+ stonith_ipc_server_init(&ipcs, &ipc_callbacks); -+ - #if SUPPORT_STONITH_CONFIG - if (((stand_alone == TRUE)) && !(standalone_cfg_read_file(STONITH_NG_CONF_FILE))) { - standalone_cfg_commit(); -diff --git a/fencing/regression.py.in b/fencing/regression.py.in -index 851ae17..6b203a2 100644 ---- a/fencing/regression.py.in -+++ b/fencing/regression.py.in -@@ -618,7 +618,7 @@ class Tests: - test = self.new_test("%s_monitor_timeout" % test_type["prefix"], - "Verify monitor uses duration of timeout period given.", test_type["use_cpg"]) - test.add_cmd("stonith_admin", "-R true1 -a fence_dummy_monitor_fail -o \"pcmk_host_list=node3\"") -- test.add_expected_fail_cmd("stonith_admin", "-Q true1 -t 5", 23) -+ test.add_expected_fail_cmd("stonith_admin", "-Q true1 -t 5", 195) - test.add_stonith_log_pattern("Attempt 2 to execute") - - # Verify monitor occurs for duration of timeout period on failure, but stops at max retries -@@ -626,7 +626,7 @@ class Tests: - test = self.new_test("%s_monitor_timeout_max_retries" % test_type["prefix"], - "Verify monitor retries until max retry value or timeout is hit.", test_type["use_cpg"]) - test.add_cmd("stonith_admin", "-R true1 -a fence_dummy_monitor_fail -o \"pcmk_host_list=node3\"") -- test.add_expected_fail_cmd("stonith_admin", "-Q true1 -t 15", 23) -+ test.add_expected_fail_cmd("stonith_admin", "-Q true1 -t 15",195) - test.add_stonith_log_pattern("Attempted to execute agent fence_dummy_monitor_fail (list) the maximum number of times") - - # simple register test -@@ -753,7 +753,7 @@ class Tests: - test = self.new_test("cpg_unfence_on_target_2", - "Verify failure unfencing with on_target = true", 1) - test.add_cmd("stonith_admin", "-R true1 -a fence_dummy_metadata_helper -o \"pcmk_host_list=%s node_fake_1234\"" % (our_uname)) -- test.add_expected_fail_cmd("stonith_admin", "-U node_fake_1234 -t 3", 194) -+ test.add_expected_fail_cmd("stonith_admin", "-U node_fake_1234 -t 3", 143) - test.add_stonith_log_pattern("(on) to be executed on the target node") - - -diff --git a/fencing/remote.c b/fencing/remote.c -index 61ea0c2..656d9c9 100644 ---- a/fencing/remote.c -+++ b/fencing/remote.c -@@ -76,6 +76,7 @@ free_remote_query(gpointer data) - - crm_trace("Free'ing query result from %s", query->host); - free(query->host); -+ g_list_free_full(query->device_list, free); - g_hash_table_destroy(query->custom_action_timeouts); - g_hash_table_destroy(query->verified_devices); - free(query); -@@ -487,6 +488,24 @@ static uint32_t fencing_active_peers(void) - return count; - } - -+int -+stonith_manual_ack(xmlNode * msg, remote_fencing_op_t * op) -+{ -+ xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, msg, LOG_ERR); -+ -+ op->state = st_done; -+ op->completed = time(NULL); -+ op->delegate = strdup("a human"); -+ -+ crm_notice("Injecting manual confirmation that %s is safely off/down", -+ crm_element_value(dev, F_STONITH_TARGET)); -+ -+ remote_op_done(op, msg, pcmk_ok, FALSE); -+ -+ /* Replies are sent via done_cb->stonith_send_async_reply()->do_local_reply() */ -+ return -EINPROGRESS; -+} -+ - /*! - * \internal - * \brief Create a new remote stonith op -@@ -533,11 +552,13 @@ create_remote_stonith_op(const char *client, xmlNode * request, gboolean peer) - - g_hash_table_replace(remote_op_list, op->id, op); - CRM_LOG_ASSERT(g_hash_table_lookup(remote_op_list, op->id) != NULL); -+ crm_trace("Created %s", op->id); - - op->state = st_query; - op->replies_expected = fencing_active_peers(); - op->action = crm_element_value_copy(dev, F_STONITH_ACTION); - op->originator = crm_element_value_copy(dev, F_STONITH_ORIGIN); -+ op->delegate = crm_element_value_copy(dev, F_STONITH_DELEGATE); /* May not be set */ - op->created = time(NULL); - - if (op->originator == NULL) { -@@ -555,6 +576,7 @@ create_remote_stonith_op(const char *client, xmlNode * request, gboolean peer) - op->target = crm_element_value_copy(dev, F_STONITH_TARGET); - op->request = copy_xml(request); /* TODO: Figure out how to avoid this */ - crm_element_value_int(request, F_STONITH_CALLOPTS, (int *)&(op->call_options)); -+ crm_element_value_int(request, F_STONITH_CALLID, (int *)&(op->client_callid)); - - crm_trace("%s new stonith op: %s - %s of %s for %s", - (peer -@@ -584,6 +606,7 @@ create_remote_stonith_op(const char *client, xmlNode * request, gboolean peer) - remote_fencing_op_t * - initiate_remote_stonith_op(crm_client_t * client, xmlNode * request, gboolean manual_ack) - { -+ int query_timeout = 0; - xmlNode *query = NULL; - const char *client_id = NULL; - remote_fencing_op_t *op = NULL; -@@ -597,7 +620,12 @@ initiate_remote_stonith_op(crm_client_t * client, xmlNode * request, gboolean ma - CRM_LOG_ASSERT(client_id != NULL); - op = create_remote_stonith_op(client_id, request, FALSE); - op->owner = TRUE; -- -+ if (manual_ack) { -+ crm_notice("Initiating manual confirmation for %s: %s", -+ op->target, op->id); -+ return op; -+ } -+ - CRM_CHECK(op->action, return NULL); - - if (stonith_topology_next(op) != pcmk_ok) { -@@ -621,16 +649,7 @@ initiate_remote_stonith_op(crm_client_t * client, xmlNode * request, gboolean ma - op->id, op->state); - } - -- query = stonith_create_op(0, op->id, STONITH_OP_QUERY, NULL, 0); -- -- if (!manual_ack) { -- int query_timeout = op->base_timeout * TIMEOUT_MULTIPLY_FACTOR; -- -- op->query_timer = g_timeout_add((1000 * query_timeout), remote_op_query_timeout, op); -- -- } else { -- crm_xml_add(query, F_STONITH_DEVICE, "manual_ack"); -- } -+ query = stonith_create_op(op->client_callid, op->id, STONITH_OP_QUERY, NULL, 0); - - crm_xml_add(query, F_STONITH_REMOTE_OP_ID, op->id); - crm_xml_add(query, F_STONITH_TARGET, op->target); -@@ -641,8 +660,11 @@ initiate_remote_stonith_op(crm_client_t * client, xmlNode * request, gboolean ma - crm_xml_add_int(query, F_STONITH_TIMEOUT, op->base_timeout); - - send_cluster_message(NULL, crm_msg_stonith_ng, query, FALSE); -- - free_xml(query); -+ -+ query_timeout = op->base_timeout * TIMEOUT_MULTIPLY_FACTOR; -+ op->query_timer = g_timeout_add((1000 * query_timeout), remote_op_query_timeout, op); -+ - return op; - } - -@@ -854,7 +876,7 @@ report_timeout_period(remote_fencing_op_t * op, int op_timeout) - } - - /* The client is connected to another node, relay this update to them */ -- update = stonith_create_op(0, op->id, STONITH_OP_TIMEOUT_UPDATE, NULL, 0); -+ update = stonith_create_op(op->client_callid, op->id, STONITH_OP_TIMEOUT_UPDATE, NULL, 0); - crm_xml_add(update, F_STONITH_REMOTE_OP_ID, op->id); - crm_xml_add(update, F_STONITH_CLIENTID, client_id); - crm_xml_add(update, F_STONITH_CALLID, call_id); -@@ -878,7 +900,7 @@ call_remote_stonith(remote_fencing_op_t * op, st_query_result_t * peer) - const char *device = NULL; - int timeout = op->base_timeout; - -- crm_trace("State for %s.%.8s: %d", op->target, op->client_name, op->id, op->state); -+ crm_trace("State for %s.%.8s: %s %d", op->target, op->client_name, op->id, op->state); - if (peer == NULL && !is_set(op->call_options, st_opt_topology)) { - peer = stonith_choose_peer(op); - } -@@ -905,7 +927,7 @@ call_remote_stonith(remote_fencing_op_t * op, st_query_result_t * peer) - - if (peer) { - int timeout_one = 0; -- xmlNode *query = stonith_create_op(0, op->id, STONITH_OP_FENCE, NULL, 0); -+ xmlNode *query = stonith_create_op(op->client_callid, op->id, STONITH_OP_FENCE, NULL, 0); - - crm_xml_add(query, F_STONITH_REMOTE_OP_ID, op->id); - crm_xml_add(query, F_STONITH_TARGET, op->target); -@@ -951,6 +973,13 @@ call_remote_stonith(remote_fencing_op_t * op, st_query_result_t * peer) - CRM_LOG_ASSERT(op->state < st_done); - remote_op_timeout(op); - -+ } else if(op->replies >= op->replies_expected || op->replies >= fencing_active_peers()) { -+ crm_info("None of the %d peers are capable of terminating %s for %s (%d)", -+ op->replies, op->target, op->client_name, op->state); -+ -+ op->state = st_failed; -+ remote_op_done(op, NULL, -EHOSTUNREACH, FALSE); -+ - } else if (device) { - crm_info("Waiting for additional peers capable of terminating %s with %s for %s.%.8s", - op->target, device, op->client_name, op->id); -@@ -1112,17 +1141,24 @@ process_remote_stonith_query(xmlNode * msg) - * query results. */ - if (op->state == st_query && all_topology_devices_found(op)) { - /* All the query results are in for the topology, start the fencing ops. */ -+ crm_trace("All topology devices found"); - call_remote_stonith(op, result); -+ -+ } else if(op->state == st_query && (op->replies >= op->replies_expected || op->replies >= active)) { -+ crm_info("All topology queries have arrived, continuing (%d, %d, %d) ", op->replies_expected, active, op->replies); -+ call_remote_stonith(op, NULL); - } -+ - } else if (op->state == st_query) { - /* We have a result for a non-topology fencing op that looks promising, - * go ahead and start fencing before query timeout */ - if (host_is_target == FALSE && g_hash_table_size(result->verified_devices)) { - /* we have a verified device living on a peer that is not the target */ -+ crm_trace("Found %d verified devices", g_hash_table_size(result->verified_devices)); - call_remote_stonith(op, result); - - } else if (safe_str_eq(op->action, "on")) { -- /* unfencing. */ -+ crm_trace("Unfencing %s", op->target); - call_remote_stonith(op, result); - - } else if(op->replies >= op->replies_expected || op->replies >= active) { -@@ -1182,15 +1218,15 @@ process_remote_stonith_exec(xmlNode * msg) - - if (op->devices && device && safe_str_neq(op->devices->data, device)) { - crm_err -- ("Received outdated reply for device %s to %s node %s. Operation already timed out at remote level.", -- device, op->action, op->target); -+ ("Received outdated reply for device %s (instead of %s) to %s node %s. Operation already timed out at remote level.", -+ device, op->devices->data, op->action, op->target); - return rc; - } - - if (safe_str_eq(crm_element_value(msg, F_SUBTYPE), "broadcast")) { - crm_debug("Marking call to %s for %s on behalf of %s@%s.%.8s: %s (%d)", - op->action, op->target, op->client_name, op->id, op->originator, -- rc == pcmk_ok ? "passed" : "failed", rc); -+ pcmk_strerror(rc), rc); - if (rc == pcmk_ok) { - op->state = st_done; - } else { -@@ -1212,7 +1248,7 @@ process_remote_stonith_exec(xmlNode * msg) - - crm_notice("Call to %s for %s on behalf of %s@%s: %s (%d)", - device, op->target, op->client_name, op->originator, -- rc == pcmk_ok ? "passed" : "failed", rc); -+ pcmk_strerror(rc), rc); - - /* We own the op, and it is complete. broadcast the result to all nodes - * and notify our local clients. */ -@@ -1282,8 +1318,10 @@ stonith_fence_history(xmlNode * msg, xmlNode ** output) - } - } - } -- *output = create_xml_node(NULL, F_STONITH_HISTORY_LIST); - -+ crm_trace("Looking for operations on %s in %p", target, remote_op_list); -+ -+ *output = create_xml_node(NULL, F_STONITH_HISTORY_LIST); - if (remote_op_list) { - GHashTableIter iter; - remote_fencing_op_t *op = NULL; -@@ -1297,11 +1335,13 @@ stonith_fence_history(xmlNode * msg, xmlNode ** output) - } - - rc = 0; -+ crm_trace("Attaching op %s", op->id); - entry = create_xml_node(*output, STONITH_OP_EXEC); - crm_xml_add(entry, F_STONITH_TARGET, op->target); - crm_xml_add(entry, F_STONITH_ACTION, op->action); - crm_xml_add(entry, F_STONITH_ORIGIN, op->originator); - crm_xml_add(entry, F_STONITH_DELEGATE, op->delegate); -+ crm_xml_add(entry, F_STONITH_CLIENTNAME, op->client_name); - crm_xml_add_int(entry, F_STONITH_DATE, op->completed); - crm_xml_add_int(entry, F_STONITH_STATE, op->state); - } -diff --git a/fencing/test.c b/fencing/test.c -index 5ae83f5..afedba8 100644 ---- a/fencing/test.c -+++ b/fencing/test.c -@@ -55,8 +55,8 @@ typedef void (*mainloop_test_iteration_cb) (int check_event); - mainloop_iter++; \ - mainloop_set_trigger(trig); \ - } else { \ -- crm_info("FAILURE = %s async_callback %d", __PRETTY_FUNCTION__, callback_rc); \ -- crm_exit(-1); \ -+ crm_err("FAILURE = %s async_callback %d", __PRETTY_FUNCTION__, callback_rc); \ -+ crm_exit(pcmk_err_generic); \ - } \ - callback_rc = 0; \ - -@@ -114,7 +114,7 @@ static void - st_callback(stonith_t * st, stonith_event_t * e) - { - if (st->state == stonith_disconnected) { -- crm_exit(1); -+ crm_exit(ENOTCONN); - } - - crm_notice("Operation %s requested by %s %s for peer %s. %s reported: %s (ref=%s)", -@@ -161,12 +161,12 @@ passive_test(void) - dispatch_helper(500); \ - } \ - if (rc != expected_rc) { \ -- crm_info("FAILURE - expected rc %d != %d(%s) for cmd - %s\n", expected_rc, rc, pcmk_strerror(rc), str); \ -- crm_exit(-1); \ -+ crm_err("FAILURE - expected rc %d != %d(%s) for cmd - %s\n", expected_rc, rc, pcmk_strerror(rc), str); \ -+ crm_exit(pcmk_err_generic); \ - } else if (expected_notifications) { \ -- crm_info("FAILURE - expected %d notifications, got only %d for cmd - %s\n", \ -+ crm_err("FAILURE - expected %d notifications, got only %d for cmd - %s\n", \ - num_notifications, num_notifications - expected_notifications, str); \ -- crm_exit(-1); \ -+ crm_exit(pcmk_err_generic); \ - } else { \ - if (verbose) { \ - crm_info("SUCCESS - %s: %d", str, rc); \ -@@ -251,7 +251,7 @@ run_standard_test(void) - "Status false_1_node1", 1, 0); - - single_test(st->cmds->fence(st, st_opts, "unknown-host", "off", 1, 0), -- "Fence unknown-host (expected failure)", 0, -113); -+ "Fence unknown-host (expected failure)", 0, -19); - - single_test(st->cmds->fence(st, st_opts, "false_1_node1", "off", 1, 0), - "Fence false_1_node1", 1, 0); -@@ -427,7 +427,7 @@ test_async_fence_timeout(int check_event) - int rc = 0; - - if (check_event) { -- if (callback_rc != -ETIME) { -+ if (callback_rc != -EHOSTUNREACH) { - mainloop_test_done(FALSE); - } else { - mainloop_test_done(TRUE); -@@ -533,7 +533,7 @@ iterate_mainloop_tests(gboolean event_ready) - if (mainloop_iter == (sizeof(callbacks) / sizeof(mainloop_test_iteration_cb))) { - /* all tests ran, everything passed */ - crm_info("ALL MAINLOOP TESTS PASSED!"); -- crm_exit(0); -+ crm_exit(pcmk_ok); - } - - callbacks[mainloop_iter] (event_ready); -@@ -560,7 +560,7 @@ test_shutdown(int nsig) - } - - if (rc) { -- crm_exit(-1); -+ crm_exit(pcmk_err_generic); - } - } - -diff --git a/include/crm/cib.h b/include/crm/cib.h -index 48e10af..7a694ac 100644 ---- a/include/crm/cib.h -+++ b/include/crm/cib.h -@@ -1,16 +1,16 @@ --/* -+/* - * Copyright (C) 2004 Andrew Beekhof -- * -+ * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. -- * -+ * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. -- * -+ * - * You should have received a copy of the GNU General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -@@ -175,6 +175,6 @@ void remove_cib_op_callback(int call_id, gboolean all_callbacks); - } while(0) - # include - --# define CIB_LIBRARY "libcib.so.2" -+# define CIB_LIBRARY "libcib.so.3" - - #endif -diff --git a/include/crm/cib/internal.h b/include/crm/cib/internal.h -index 0c7dfe6..94d86dc 100644 ---- a/include/crm/cib/internal.h -+++ b/include/crm/cib/internal.h -@@ -18,6 +18,7 @@ - #ifndef CIB_INTERNAL__H - # define CIB_INTERNAL__H - # include -+# include - - # define CIB_OP_SLAVE "cib_slave" - # define CIB_OP_SLAVEALL "cib_slave_all" -@@ -188,4 +189,5 @@ int cib_internal_op(cib_t * cib, const char *op, const char *host, - const char *section, xmlNode * data, - xmlNode ** output_data, int call_options, const char *user_name); - -+ - #endif -diff --git a/include/crm/cib/util.h b/include/crm/cib/util.h -index 03bf426..2a00937 100644 ---- a/include/crm/cib/util.h -+++ b/include/crm/cib/util.h -@@ -59,7 +59,7 @@ int delete_attr_delegate(cib_t * the_cib, int options, - const char *attr_id, const char *attr_name, - const char *attr_value, gboolean to_console, const char *user_name); - --int query_node_uuid(cib_t * the_cib, const char *uname, char **uuid); -+int query_node_uuid(cib_t * the_cib, const char *uname, char **uuid, int *is_remote_node); - - int query_node_uname(cib_t * the_cib, const char *uuid, char **uname); - -diff --git a/include/crm/cluster.h b/include/crm/cluster.h -index 5be940a..cac863f 100644 ---- a/include/crm/cluster.h -+++ b/include/crm/cluster.h -@@ -73,8 +73,7 @@ typedef struct crm_peer_node_s { - - void crm_peer_init(void); - void crm_peer_destroy(void); --char *get_corosync_uuid(uint32_t id, const char *uname); --const char *get_node_uuid(uint32_t id, const char *uname); -+char *get_corosync_uuid(crm_node_t *peer); - int get_corosync_id(int id, const char *uuid); - - typedef struct crm_cluster_s { -@@ -132,7 +131,6 @@ gboolean crm_is_peer_active(const crm_node_t * node); - guint reap_crm_member(uint32_t id, const char *name); - int crm_terminate_member(int nodeid, const char *uname, void *unused); - int crm_terminate_member_no_mainloop(int nodeid, const char *uname, int *connection); --gboolean crm_get_cluster_name(char **cname); - - # if SUPPORT_HEARTBEAT - gboolean crm_is_heartbeat_peer_active(const crm_node_t * node); -@@ -143,14 +141,11 @@ extern int ais_fd_sync; - gboolean crm_is_corosync_peer_active(const crm_node_t * node); - gboolean send_ais_text(int class, const char *data, gboolean local, - crm_node_t * node, enum crm_ais_msg_types dest); --gboolean get_ais_nodeid(uint32_t * id, char **uname); - # endif - --void empty_uuid_cache(void); --const char *get_uuid(const char *uname); --const char *get_uname(const char *uuid); --void set_uuid(xmlNode * node, const char *attr, const char *uname); --void unget_uuid(const char *uname); -+const char *crm_peer_uuid(crm_node_t *node); -+const char *crm_peer_uname(const char *uuid); -+void set_uuid(xmlNode *xml, const char *attr, crm_node_t *node); - - enum crm_status_type { - crm_status_uname, -@@ -182,7 +177,7 @@ gboolean is_openais_cluster(void); - gboolean is_classic_ais_cluster(void); - gboolean is_heartbeat_cluster(void); - --char *get_local_node_name(void); -+const char *get_local_node_name(void); - char *get_node_name(uint32_t nodeid); - - #endif -diff --git a/include/crm/cluster/internal.h b/include/crm/cluster/internal.h -index ff0fbf2..2fa8e08 100644 ---- a/include/crm/cluster/internal.h -+++ b/include/crm/cluster/internal.h -@@ -379,8 +379,6 @@ enum crm_quorum_source { - - enum crm_quorum_source get_quorum_source(void); - --void crm_update_peer_join(const char *source, crm_node_t * node, enum crm_join_phase phase); -- - void crm_update_peer_proc(const char *source, crm_node_t * peer, uint32_t flag, const char *status); - - crm_node_t *crm_update_peer(const char *source, unsigned int id, uint64_t born, uint64_t seen, -diff --git a/include/crm/common/Makefile.am b/include/crm/common/Makefile.am -index 48f871d..5d61df7 100644 ---- a/include/crm/common/Makefile.am -+++ b/include/crm/common/Makefile.am -@@ -22,3 +22,6 @@ headerdir=$(pkgincludedir)/crm/common - - header_HEADERS = xml.h ipc.h util.h iso8601.h mainloop.h logging.h - noinst_HEADERS = ipcs.h -+if BUILD_CIBSECRETS -+noinst_HEADERS += cib_secrets.h -+endif -diff --git a/include/crm/common/cib_secrets.h b/include/crm/common/cib_secrets.h -new file mode 100644 -index 0000000..566f445 ---- /dev/null -+++ b/include/crm/common/cib_secrets.h -@@ -0,0 +1,25 @@ -+/* -+ * cib_secrets.h -+ * -+ * Author: Dejan Muhamedagic -+ * Copyright (c) 2011 SUSE, Attachmate -+ * -+ * This program is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU General Public -+ * License as published by the Free Software Foundation; either -+ * version 2 of the License, or (at your option) any later version. -+ * -+ * This software is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public -+ * License along with this library; if not, write to the Free Software -+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -+ */ -+ -+/* -+ * load parameters from an ini file (cib_secrets.c) -+ */ -+int replace_secret_params(char * rsc_id, GHashTable * params); -diff --git a/include/crm/common/logging.h b/include/crm/common/logging.h -index bed6cd9..22b1ad7 100644 ---- a/include/crm/common/logging.h -+++ b/include/crm/common/logging.h -@@ -1,16 +1,16 @@ --/* -+/* - * Copyright (C) 2004 Andrew Beekhof -- * -+ * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. -- * -+ * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. -- * -+ * - * You should have received a copy of the GNU General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -@@ -39,6 +39,16 @@ extern gboolean crm_config_error; - extern gboolean crm_config_warning; - extern unsigned int crm_trace_nonlog; - -+enum xml_log_options -+{ -+ xml_log_option_filtered = 0x001, -+ xml_log_option_formatted = 0x002, -+ xml_log_option_diff_plus = 0x010, -+ xml_log_option_diff_minus = 0x020, -+ xml_log_option_diff_short = 0x040, -+ xml_log_option_diff_all = 0x100, -+}; -+ - void crm_enable_blackbox(int nsig); - void crm_enable_blackbox_tracing(int nsig); - void crm_write_blackbox(int nsig, struct qb_log_callsite *callsite); -@@ -123,7 +133,7 @@ unsigned int get_crm_log_level(void); - xml_cs = qb_log_callsite_get(__func__, __FILE__, "xml-blog", level, __LINE__, 0); \ - } \ - if (crm_is_callsite_active(xml_cs, level, 0)) { \ -- log_data_element(level, __FILE__, __PRETTY_FUNCTION__, __LINE__, text, xml, 0, TRUE); \ -+ log_data_element(level, __FILE__, __PRETTY_FUNCTION__, __LINE__, text, xml, 1, xml_log_option_formatted); \ - } \ - } while(0) - -diff --git a/include/crm/common/mainloop.h b/include/crm/common/mainloop.h -index d103107..0941f1b 100644 ---- a/include/crm/common/mainloop.h -+++ b/include/crm/common/mainloop.h -@@ -30,6 +30,8 @@ typedef struct trigger_s crm_trigger_t; - typedef struct mainloop_io_s mainloop_io_t; - typedef struct mainloop_child_s mainloop_child_t; - -+void mainloop_cleanup(void); -+ - crm_trigger_t *mainloop_add_trigger(int priority, int (*dispatch) (gpointer user_data), - gpointer userdata); - -@@ -79,22 +81,18 @@ void mainloop_del_fd(mainloop_io_t * client); - * Create a new tracked process - * To track a process group, use -pid - */ --void -- -- --mainloop_add_child(pid_t pid, -- int timeout, -- const char *desc, -- void *userdata, -- void (*callback) (mainloop_child_t * p, int status, int signo, int exitcode)); -- --void *mainloop_get_child_userdata(mainloop_child_t * child); --int -- mainloop_get_child_timeout(mainloop_child_t * child); -- --pid_t mainloop_get_child_pid(mainloop_child_t * child); --void -- mainloop_clear_child_userdata(mainloop_child_t * child); -+void mainloop_child_add(pid_t pid, -+ int timeout, -+ const char *desc, -+ void *userdata, -+ void (*callback) (mainloop_child_t * p, pid_t pid, int core, int signo, int exitcode)); -+ -+void *mainloop_child_userdata(mainloop_child_t * child); -+int mainloop_child_timeout(mainloop_child_t * child); -+const char *mainloop_child_name(mainloop_child_t * child); -+ -+pid_t mainloop_child_pid(mainloop_child_t * child); -+void mainloop_clear_child_userdata(mainloop_child_t * child); - - # define G_PRIORITY_MEDIUM (G_PRIORITY_HIGH/2) - -diff --git a/include/crm/common/util.h b/include/crm/common/util.h -index 966785e..f0dca36 100644 ---- a/include/crm/common/util.h -+++ b/include/crm/common/util.h -@@ -113,6 +113,8 @@ gboolean did_rsc_op_fail(lrmd_event_data_t * event, int target_rc); - char *crm_md5sum(const char *buffer); - - char *crm_generate_uuid(void); -+ -+void crm_build_path(const char *path_c, mode_t mode); - int crm_user_lookup(const char *name, uid_t * uid, gid_t * gid); - - int crm_exit(int rc); -diff --git a/include/crm/common/xml.h b/include/crm/common/xml.h -index d5147c8..9b2ced9 100644 ---- a/include/crm/common/xml.h -+++ b/include/crm/common/xml.h -@@ -1,16 +1,16 @@ --/* -+/* - * Copyright (C) 2004 Andrew Beekhof -- * -+ * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. -- * -+ * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. -- * -+ * - * You should have received a copy of the GNU General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -@@ -68,7 +68,7 @@ xmlDoc *getDocPtr(xmlNode * node); - * - * Copy all the attributes/properties from src into target. - * -- * Not recursive, does not return anything. -+ * Not recursive, does not return anything. - * - */ - void copy_in_properties(xmlNode * target, xmlNode * src); -@@ -108,7 +108,7 @@ const char *crm_xml_add_int(xmlNode * node, const char *name, int value); - void unlink_xml_node(xmlNode * node); - - /* -- * -+ * - */ - void purge_diff_markers(xmlNode * a_node); - -@@ -201,6 +201,8 @@ gboolean validate_xml_verbose(xmlNode * xml_blob); - int update_validation(xmlNode ** xml_blob, int *best, gboolean transform, gboolean to_logs); - int get_schema_version(const char *name); - const char *get_schema_name(int version); -+ -+void crm_xml_init(void); - void crm_xml_cleanup(void); - - static inline xmlNode * -@@ -210,7 +212,7 @@ __xml_first_child(xmlNode * parent) - - if (parent) { - child = parent->children; -- while (child && child->type != XML_ELEMENT_NODE) { -+ while (child && child->type == XML_TEXT_NODE) { - child = child->next; - } - } -@@ -222,7 +224,7 @@ __xml_next(xmlNode * child) - { - if (child) { - child = child->next; -- while (child && child->type != XML_ELEMENT_NODE) { -+ while (child && child->type == XML_TEXT_NODE) { - child = child->next; - } - } -@@ -238,6 +240,15 @@ xmlXPathObjectPtr xpath_search(xmlNode * xml_top, const char *path); - gboolean cli_config_update(xmlNode ** xml, int *best_version, gboolean to_logs); - xmlNode *expand_idref(xmlNode * input, xmlNode * top); - -+void freeXpathObject(xmlXPathObjectPtr xpathObj); - xmlNode *getXpathResult(xmlXPathObjectPtr xpathObj, int index); - -+static inline int numXpathResults(xmlXPathObjectPtr xpathObj) -+{ -+ if(xpathObj == NULL || xpathObj->nodesetval == NULL) { -+ return 0; -+ } -+ return xpathObj->nodesetval->nodeNr; -+} -+ - #endif -diff --git a/include/crm/crm.h b/include/crm/crm.h -index 5d69231..9c66563 100644 ---- a/include/crm/crm.h -+++ b/include/crm/crm.h -@@ -1,16 +1,16 @@ --/* -+/* - * Copyright (C) 2004 Andrew Beekhof -- * -+ * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. -- * -+ * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. -- * -+ * - * You should have received a copy of the GNU General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -@@ -56,7 +56,7 @@ - - # define CRM_META "CRM_meta" - --extern const char *crm_system_name; -+extern char *crm_system_name; - - /* *INDENT-OFF* */ - -@@ -66,6 +66,8 @@ extern const char *crm_system_name; - # define MAX_IPC_FAIL 5 - # define MAX_IPC_DELAY 120 - -+# define DAEMON_RESPAWN_STOP 100 -+ - # define MSG_LOG 1 - # define DOT_FSA_ACTIONS 1 - # define DOT_ALL_FSA_INPUTS 1 -@@ -119,10 +121,11 @@ extern const char *crm_system_name; - # define CRM_OP_TETIMEOUT "te_timeout" - # define CRM_OP_TRANSITION "transition" - # define CRM_OP_REGISTER "register" -+# define CRM_OP_IPC_FWD "ipc_fwd" - # define CRM_OP_DEBUG_UP "debug_inc" - # define CRM_OP_DEBUG_DOWN "debug_dec" - # define CRM_OP_INVOKE_LRM "lrm_invoke" --# define CRM_OP_LRM_REFRESH "lrm_refresh" -+# define CRM_OP_LRM_REFRESH "lrm_refresh" /* Deprecated */ - # define CRM_OP_LRM_QUERY "lrm_query" - # define CRM_OP_LRM_DELETE "lrm_delete" - # define CRM_OP_LRM_FAIL "lrm_fail" -diff --git a/include/crm/error.h b/include/crm/error.h -index 2e8d6f5..1613d66 100644 ---- a/include/crm/error.h -+++ b/include/crm/error.h -@@ -1,16 +1,16 @@ --/* -+/* - * Copyright (C) 2012 Andrew Beekhof -- * -+ * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. -- * -+ * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. -- * -+ * - * You should have received a copy of the GNU General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -@@ -39,17 +39,21 @@ - } while(0) - - # define pcmk_ok 0 --# define PCMK_ERROR_OFFSET 900 /* Replacements on non-linux systems, see include/portability.h */ --# define PCMK_CUSTOM_OFFSET 1000 /* Purely custom codes */ --# define pcmk_err_generic 1001 --# define pcmk_err_no_quorum 1002 --# define pcmk_err_dtd_validation 1003 --# define pcmk_err_transform_failed 1004 --# define pcmk_err_old_data 1005 --# define pcmk_err_diff_failed 1006 --# define pcmk_err_diff_resync 1007 -+# define PCMK_ERROR_OFFSET 190 /* Replacements on non-linux systems, see include/portability.h */ -+# define PCMK_CUSTOM_OFFSET 200 /* Purely custom codes */ -+# define pcmk_err_generic 201 -+# define pcmk_err_no_quorum 202 -+# define pcmk_err_dtd_validation 203 -+# define pcmk_err_transform_failed 204 -+# define pcmk_err_old_data 205 -+# define pcmk_err_diff_failed 206 -+# define pcmk_err_diff_resync 207 -+# define pcmk_err_cib_modified 208 -+# define pcmk_err_cib_backup 209 -+# define pcmk_err_cib_save 210 - - const char *pcmk_strerror(int rc); -+const char *pcmk_errorname(int rc); - const char *bz2_strerror(int rc); - - #endif -diff --git a/include/crm/lrmd.h b/include/crm/lrmd.h -index 0d40729..f627373 100644 ---- a/include/crm/lrmd.h -+++ b/include/crm/lrmd.h -@@ -33,16 +33,19 @@ typedef struct lrmd_key_value_s { - struct lrmd_key_value_s *next; - } lrmd_key_value_t; - -+#define LRMD_PROTOCOL_VERSION "1.0" - - /* *INDENT-OFF* */ - #define DEFAULT_REMOTE_KEY_LOCATION "/etc/pacemaker/authkey" - #define ALT_REMOTE_KEY_LOCATION "/etc/corosync/authkey" --#define DEFAULT_REMOTE_PORT 1984 -+#define DEFAULT_REMOTE_PORT 3121 - #define DEFAULT_REMOTE_USERNAME "lrmd" - - #define F_LRMD_OPERATION "lrmd_op" - #define F_LRMD_CLIENTNAME "lrmd_clientname" -+#define F_LRMD_IS_IPC_PROVIDER "lrmd_is_ipc_provider" - #define F_LRMD_CLIENTID "lrmd_clientid" -+#define F_LRMD_PROTOCOL_VERSION "lrmd_protocol_version" - #define F_LRMD_REMOTE_MSG_TYPE "lrmd_remote_msg_type" - #define F_LRMD_REMOTE_MSG_ID "lrmd_remote_msg_id" - #define F_LRMD_CALLBACK_TOKEN "lrmd_async_id" -@@ -83,9 +86,19 @@ typedef struct lrmd_key_value_s { - #define LRMD_OP_RSC_METADATA "lrmd_rsc_metadata" - #define LRMD_OP_POKE "lrmd_rsc_poke" - -+#define F_LRMD_IPC_OP "lrmd_ipc_op" -+#define F_LRMD_IPC_IPC_SERVER "lrmd_ipc_server" -+#define F_LRMD_IPC_SESSION "lrmd_ipc_session" -+#define F_LRMD_IPC_PROXY_NODE "lrmd_ipc_proxy_node" -+#define F_LRMD_IPC_USER "lrmd_ipc_user" -+#define F_LRMD_IPC_MSG "lrmd_ipc_msg" -+#define F_LRMD_IPC_MSG_ID "lrmd_ipc_msg_id" -+#define F_LRMD_IPC_MSG_FLAGS "lrmd_ipc_msg_flags" -+ - #define T_LRMD "lrmd" - #define T_LRMD_REPLY "lrmd_reply" - #define T_LRMD_NOTIFY "lrmd_notify" -+#define T_LRMD_IPC_PROXY "lrmd_ipc_proxy" - /* *INDENT-ON* */ - - /*! -diff --git a/include/crm/msg_xml.h b/include/crm/msg_xml.h -index 57ec9bd..8575163 100644 ---- a/include/crm/msg_xml.h -+++ b/include/crm/msg_xml.h -@@ -1,16 +1,16 @@ --/* -+/* - * Copyright (C) 2004 Andrew Beekhof -- * -+ * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. -- * -+ * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. -- * -+ * - * You should have received a copy of the GNU General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -@@ -256,6 +256,11 @@ - # define XML_LRM_ATTR_OP_RESTART "op-force-restart" - # define XML_LRM_ATTR_RESTART_DIGEST "op-restart-digest" - -+# define XML_RSC_OP_LAST_CHANGE "last-rc-change" -+# define XML_RSC_OP_LAST_RUN "last-run" -+# define XML_RSC_OP_T_EXEC "exec-time" -+# define XML_RSC_OP_T_QUEUE "queue-time" -+ - # define XML_LRM_ATTR_MIGRATE_SOURCE "migrate_source" - # define XML_LRM_ATTR_MIGRATE_TARGET "migrate_target" - -diff --git a/include/crm/pengine/internal.h b/include/crm/pengine/internal.h -index f7da80e..164ef8c 100644 ---- a/include/crm/pengine/internal.h -+++ b/include/crm/pengine/internal.h -@@ -1,16 +1,16 @@ --/* -+/* - * Copyright (C) 2004 Andrew Beekhof -- * -+ * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. -- * -+ * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. -- * -+ * - * You should have received a copy of the GNU General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -@@ -101,10 +101,12 @@ void common_free(resource_t * rsc); - extern pe_working_set_t *pe_dataset; - - extern node_t *node_copy(node_t * this_node); --extern time_t get_timet_now(pe_working_set_t * data_set); --extern int get_failcount(node_t * node, resource_t * rsc, int *last_failure, -+extern time_t get_effective_time(pe_working_set_t * data_set); -+extern int get_failcount(node_t * node, resource_t * rsc, time_t *last_failure, - pe_working_set_t * data_set); --extern int get_failcount_all(node_t * node, resource_t * rsc, int *last_failure, -+extern int get_failcount_full(node_t * node, resource_t * rsc, time_t *last_failure, -+ bool effective, pe_working_set_t * data_set); -+extern int get_failcount_all(node_t * node, resource_t * rsc, time_t *last_failure, - pe_working_set_t * data_set); - - /* Binary like operators for lists of nodes */ -@@ -134,9 +136,6 @@ extern GListPtr node_list_minus(GListPtr list1, GListPtr list2, gboolean filter) - extern void pe_free_shallow(GListPtr alist); - extern void pe_free_shallow_adv(GListPtr alist, gboolean with_data); - --/* For creating the transition graph */ --extern xmlNode *action2xml(action_t * action, gboolean as_input); -- - /* Printing functions for debug */ - extern void print_node(const char *pre_text, node_t * node, gboolean details); - -diff --git a/include/crm/pengine/status.h b/include/crm/pengine/status.h -index 95130c7..bd0a9ba 100644 ---- a/include/crm/pengine/status.h -+++ b/include/crm/pengine/status.h -@@ -1,16 +1,16 @@ --/* -+/* - * Copyright (C) 2004 Andrew Beekhof -- * -+ * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. -- * -+ * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. -- * -+ * - * You should have received a copy of the GNU General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -@@ -58,19 +58,21 @@ enum pe_find { - # define pe_flag_is_managed_default 0x00000004ULL - # define pe_flag_maintenance_mode 0x00000008ULL - --# define pe_flag_stonith_enabled 0x00000010ULL -+# define pe_flag_stonith_enabled 0x00000010ULL - # define pe_flag_have_stonith_resource 0x00000020ULL - - # define pe_flag_stop_rsc_orphans 0x00000100ULL - # define pe_flag_stop_action_orphans 0x00000200ULL --# define pe_flag_stop_everything 0x00000400ULL -+# define pe_flag_stop_everything 0x00000400ULL - - # define pe_flag_start_failure_fatal 0x00001000ULL - # define pe_flag_remove_after_stop 0x00002000ULL - --# define pe_flag_startup_probes 0x00010000ULL -+# define pe_flag_startup_probes 0x00010000ULL - # define pe_flag_have_status 0x00020000ULL --# define pe_flag_have_remote_nodes 0x00040000ULL -+# define pe_flag_have_remote_nodes 0x00040000ULL -+ -+# define pe_flag_quick_location 0x00100000ULL - - typedef struct pe_working_set_s { - xmlNode *input; -@@ -114,6 +116,7 @@ typedef struct pe_working_set_s { - xmlNode *graph; - - GHashTable *template_rsc_sets; -+ const char *localhost; - - } pe_working_set_t; - -@@ -341,6 +344,7 @@ struct action_wrapper_s { - action_t *action; - }; - -+const char *rsc_printable_id(resource_t *rsc); - gboolean cluster_status(pe_working_set_t * data_set); - void set_working_set_defaults(pe_working_set_t * data_set); - void cleanup_calculations(pe_working_set_t * data_set); -diff --git a/include/crm/stonith-ng.h b/include/crm/stonith-ng.h -index 355d1ae..35f6e5a 100644 ---- a/include/crm/stonith-ng.h -+++ b/include/crm/stonith-ng.h -@@ -1,16 +1,16 @@ --/* -+/* - * Copyright (C) 2004 Andrew Beekhof -- * -+ * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. -- * -+ * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. -- * -+ * - * You should have received a copy of the GNU General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -@@ -86,6 +86,7 @@ typedef struct stonith_history_s { - int state; - - struct stonith_history_s *next; -+ char *client; - } stonith_history_t; - - typedef struct stonith_s stonith_t; -diff --git a/include/crm_internal.h b/include/crm_internal.h -index 6215a3a..62eb385 100644 ---- a/include/crm_internal.h -+++ b/include/crm_internal.h -@@ -31,6 +31,7 @@ - - # include - # include -+# include - - /* Dynamic loading of libraries */ - void *find_library_function(void **handle, const char *lib, const char *fn, int fatal); -@@ -38,7 +39,7 @@ void *convert_const_pointer(const void *ptr); - - /* For ACLs */ - char *uid2username(uid_t uid); --void determine_request_user(char *user, xmlNode * request, const char *field); -+void determine_request_user(const char *user, xmlNode * request, const char *field); - - # if ENABLE_ACL - # include -@@ -309,4 +310,22 @@ typedef struct { - - # endif - -+void -+attrd_ipc_server_init(qb_ipcs_service_t **ipcs, struct qb_ipcs_service_handlers *cb); -+void -+stonith_ipc_server_init(qb_ipcs_service_t **ipcs, struct qb_ipcs_service_handlers *cb); -+ -+qb_ipcs_service_t * -+crmd_ipc_server_init(struct qb_ipcs_service_handlers *cb); -+ -+void cib_ipc_servers_init(qb_ipcs_service_t **ipcs_ro, -+ qb_ipcs_service_t **ipcs_rw, -+ qb_ipcs_service_t **ipcs_shm, -+ struct qb_ipcs_service_handlers *ro_cb, -+ struct qb_ipcs_service_handlers *rw_cb); -+ -+void cib_ipc_servers_destroy(qb_ipcs_service_t *ipcs_ro, -+ qb_ipcs_service_t *ipcs_rw, -+ qb_ipcs_service_t *ipcs_shm); -+ - #endif /* CRM_INTERNAL__H */ -diff --git a/include/portability.h b/include/portability.h -index 681ddeb..b0f9f1c 100644 ---- a/include/portability.h -+++ b/include/portability.h -@@ -10,12 +10,12 @@ - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. -- * -+ * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. -- * -+ * - * You should have received a copy of the GNU General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -@@ -74,9 +74,9 @@ char *strndup(const char *str, size_t len); - # define USE_GNU - # endif - --# if NEED_G_HASH_ITER -+# include -+# if !GLIB_CHECK_VERSION(2,14,0) - --# include - typedef struct fake_ghi { - GHashTable *hash; - int nth; /* current index over the iteration */ -@@ -102,6 +102,9 @@ g_hash_table_get_values(GHashTable * hash_table) - g_hash_table_foreach(hash_table, g_hash_prepend_value, &values); - return values; - } -+# endif -+ -+# if !GLIB_CHECK_VERSION(2,16,0) - - static inline gboolean - g_hash_table_nth_data(gpointer key, gpointer value, gpointer user_data) -@@ -116,7 +119,6 @@ g_hash_table_nth_data(gpointer key, gpointer value, gpointer user_data) - return FALSE; - } - --/* Since: 2.16 */ - static inline void - g_hash_table_iter_init(GHashTableIter * iter, GHashTable * hash_table) - { -@@ -146,7 +148,6 @@ g_hash_table_iter_next(GHashTableIter * iter, gpointer * key, gpointer * value) - return found; - } - --/* Since: 2.16 */ - static inline void - g_hash_table_iter_remove(GHashTableIter * iter) - { -@@ -154,7 +155,6 @@ g_hash_table_iter_remove(GHashTableIter * iter) - iter->nth--; /* Or zero to be safe? */ - } - --/* Since: 2.16 */ - static inline int - g_strcmp0(const char *str1, const char *str2) - { -@@ -166,8 +166,7 @@ g_strcmp0(const char *str1, const char *str2) - } - # endif /* !HAVE_LIBGLIB_2_0 */ - --# ifdef NEED_G_LIST_FREE_FULL --# include -+# if !GLIB_CHECK_VERSION(2,28,0) - # include - /* Since: 2.28 */ - static inline void -@@ -180,27 +179,27 @@ g_list_free_full(GList * list, GDestroyNotify free_func) - - /* Replacement error codes for non-linux */ - # ifndef ENOTUNIQ --# define ENOTUNIQ 900 -+# define ENOTUNIQ 190 - # endif - - # ifndef ECOMM --# define ECOMM 901 -+# define ECOMM 191 - # endif - - # ifndef ELIBACC --# define ELIBACC 902 -+# define ELIBACC 192 - # endif - - # ifndef EREMOTEIO --# define EREMOTEIO 903 -+# define EREMOTEIO 193 - # endif - - # ifndef EUNATCH --# define EUNATCH 904 -+# define EUNATCH 194 - # endif - - # ifndef ENOKEY --# define ENOKEY 905 -+# define ENOKEY 195 - # endif - - /* -diff --git a/lib/ais/plugin.c b/lib/ais/plugin.c -index 01f11eb..1e372de 100644 ---- a/lib/ais/plugin.c -+++ b/lib/ais/plugin.c -@@ -1,16 +1,16 @@ - /* - * Copyright (C) 2004 Andrew Beekhof -- * -+ * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. -- * -+ * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. -- * -+ * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USAA -@@ -140,6 +140,7 @@ void pcmk_quorum(void *conn, ais_void_ptr * msg); - void pcmk_cluster_id_swab(void *msg); - void pcmk_cluster_id_callback(ais_void_ptr * message, unsigned int nodeid); - void ais_remove_peer(char *node_id); -+void ais_remove_peer_by_name(const char *node_name); - - static uint32_t - get_process_list(void) -@@ -203,14 +204,14 @@ struct corosync_service_engine pcmk_service_handler = { - .name = (char *)"Pacemaker Cluster Manager "PACKAGE_VERSION, - .id = PCMK_SERVICE_ID, - .private_data_size = 0, -- .flow_control = COROSYNC_LIB_FLOW_CONTROL_NOT_REQUIRED, -+ .flow_control = COROSYNC_LIB_FLOW_CONTROL_NOT_REQUIRED, - .allow_inquorate = CS_LIB_ALLOW_INQUORATE, - .lib_init_fn = pcmk_ipc_connect, - .lib_exit_fn = pcmk_ipc_exit, - .exec_init_fn = pcmk_startup, - .exec_exit_fn = pcmk_shutdown, - .config_init_fn = pcmk_config_init, -- .priority = 50, -+ .priority = 50, - .lib_engine = pcmk_lib_service, - .lib_engine_count = sizeof (pcmk_lib_service) / sizeof (struct corosync_lib_handler), - .exec_engine = pcmk_exec_service, -@@ -492,7 +493,7 @@ pcmk_wait_dispatch(void *arg) - } - - /* Broadcast the fact that one of our processes died -- * -+ * - * Try to get some logging of the cause out first though - * because we're probably about to get fenced - * -@@ -674,13 +675,13 @@ pcmk_startup(struct corosync_api_v1 *init_with) - } - - /* -- static void ais_print_node(const char *prefix, struct totem_ip_address *host) -+ static void ais_print_node(const char *prefix, struct totem_ip_address *host) - { - int len = 0; - char *buffer = NULL; - - ais_malloc0(buffer, INET6_ADDRSTRLEN+1); -- -+ - inet_ntop(host->family, host->addr, buffer, INET6_ADDRSTRLEN); - - len = strlen(buffer); -@@ -1026,7 +1027,7 @@ pcmk_ipc(void *conn, ais_void_ptr * msg) - transient = FALSE; - } - #if 0 -- /* If this check fails, the order of pcmk_children probably -+ /* If this check fails, the order of pcmk_children probably - * doesn't match that of the crm_ais_msg_types enum - */ - AIS_CHECK(transient || mutable->sender.pid == pcmk_children[type].pid, -@@ -1493,6 +1494,7 @@ route_ais_message(const AIS_Message * msg, gboolean local_origin) - if (mutable->host.local) { - void *conn = NULL; - const char *lookup = NULL; -+ int children_index = 0; - - if (dest == crm_msg_ais) { - process_ais_message(mutable); -@@ -1530,10 +1532,15 @@ route_ais_message(const AIS_Message * msg, gboolean local_origin) - } - - lookup = msg_type2text(dest); -- conn = pcmk_children[dest].async_conn; - -- /* the cluster fails in weird and wonderfully obscure ways when this is not true */ -- AIS_ASSERT(ais_str_eq(lookup, pcmk_children[dest].name)); -+ if (dest == crm_msg_pe && ais_str_eq(pcmk_children[7].name, lookup)) { -+ children_index = 7; -+ -+ } else { -+ children_index = dest; -+ } -+ -+ conn = pcmk_children[children_index].async_conn; - - if (mutable->header.id == service_id) { - mutable->header.id = 0; /* reset this back to zero for IPC messages */ -@@ -1712,6 +1719,42 @@ ais_remove_peer(char *node_id) - } - } - -+void -+ais_remove_peer_by_name(const char *node_name) -+{ -+ GHashTableIter iter; -+ gpointer key = 0; -+ crm_node_t *node = NULL; -+ GList *node_list = NULL; -+ -+ g_hash_table_iter_init(&iter, membership_list); -+ -+ while (g_hash_table_iter_next(&iter, &key, (void **)&node)) { -+ if (ais_str_eq(node_name, node->uname)) { -+ uint32_t node_id = GPOINTER_TO_UINT(key); -+ char *node_id_s = NULL; -+ -+ ais_malloc0(node_id_s, 32); -+ snprintf(node_id_s, 31, "%u", node_id); -+ node_list = g_list_append(node_list, node_id_s); -+ } -+ } -+ -+ if (node_list) { -+ GList *gIter = NULL; -+ -+ for (gIter = node_list; gIter != NULL; gIter = gIter->next) { -+ char *node_id_s = gIter->data; -+ -+ ais_remove_peer(node_id_s); -+ } -+ g_list_free_full(node_list, free); -+ -+ } else { -+ ais_warn("Peer %s is unkown", node_name); -+ } -+} -+ - gboolean - process_ais_message(const AIS_Message * msg) - { -@@ -1728,7 +1771,7 @@ process_ais_message(const AIS_Message * msg) - if (data && len > 12 && strncmp("remove-peer:", data, 12) == 0) { - char *node = data + 12; - -- ais_remove_peer(node); -+ ais_remove_peer_by_name(node); - } - - ais_free(data); -diff --git a/lib/ais/utils.c b/lib/ais/utils.c -index a9774ad..465e381 100644 ---- a/lib/ais/utils.c -+++ b/lib/ais/utils.c -@@ -1,16 +1,16 @@ - /* - * Copyright (C) 2004 Andrew Beekhof -- * -+ * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. -- * -+ * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. -- * -+ * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -@@ -63,7 +63,7 @@ log_ais_message(int level, const AIS_Message * msg) - } - - /* --static gboolean ghash_find_by_uname(gpointer key, gpointer value, gpointer user_data) -+static gboolean ghash_find_by_uname(gpointer key, gpointer value, gpointer user_data) - { - crm_node_t *node = value; - int id = GPOINTER_TO_INT(user_data); -@@ -140,6 +140,7 @@ spawn_child(crm_child_t * child) - { - int lpc = 0; - uid_t uid = 0; -+ gid_t gid = 0; - struct rlimit oflimits; - gboolean use_valgrind = FALSE; - gboolean use_callgrind = FALSE; -@@ -174,10 +175,11 @@ spawn_child(crm_child_t * child) - } - - if (child->uid) { -- if (pcmk_user_lookup(child->uid, &uid, NULL) < 0) { -+ if (pcmk_user_lookup(child->uid, &uid, &gid) < 0) { - ais_err("Invalid uid (%s) specified for %s", child->uid, child->name); - return FALSE; - } -+ ais_info("Using uid=%u and group=%u for process %s", uid, gid, child->name); - } - - child->pid = fork(); -@@ -206,22 +208,8 @@ spawn_child(crm_child_t * child) - } - opts_default[0] = ais_strdup(child->command);; - --#if 0 -- /* Dont set the group for now - it prevents connection to the cluster */ -- if (gid && setgid(gid) < 0) { -- ais_perror("Could not set group to %d", gid); -- } --#endif -- -- if (uid) { -- struct passwd *pwent = getpwuid(uid); -- -- if (pwent == NULL) { -- ais_perror("Cannot get password entry of uid: %d", uid); -- -- } else if (initgroups(pwent->pw_name, pwent->pw_gid) < 0) { -- ais_perror("Cannot initalize groups for %s (uid=%d)", pwent->pw_name, uid); -- } -+ if (uid && initgroups(child->uid, gid) < 0) { -+ ais_perror("Cannot initalize groups for %s", child->uid); - } - - if (uid && setuid(uid) < 0) { -diff --git a/lib/cib/Makefile.am b/lib/cib/Makefile.am -index dacac2b..13cd596 100644 ---- a/lib/cib/Makefile.am -+++ b/lib/cib/Makefile.am -@@ -32,9 +32,8 @@ if ENABLE_ACL - libcib_la_SOURCES += cib_acl.c - endif - --libcib_la_LDFLAGS = -version-info 2:1:0 $(top_builddir)/lib/common/libcrmcommon.la $(CRYPTOLIB) \ -- $(top_builddir)/lib/pengine/libpe_rules.la -- -+libcib_la_LDFLAGS = -version-info 3:1:0 -L$(top_builddir)/lib/pengine/.libs -+libcib_la_LIBADD = $(CRYPTOLIB) $(top_builddir)/lib/pengine/libpe_rules.la $(top_builddir)/lib/common/libcrmcommon.la - libcib_la_CFLAGS = -I$(top_srcdir) - - clean-generic: -diff --git a/lib/cib/cib_acl.c b/lib/cib/cib_acl.c -index 72dd6f2..1000345 100644 ---- a/lib/cib/cib_acl.c -+++ b/lib/cib/cib_acl.c -@@ -1,16 +1,16 @@ --/* -+/* - * Copyright (C) 2009 Yan Gao -- * -+ * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. -- * -+ * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. -- * -+ * - * You should have received a copy of the GNU General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -@@ -518,6 +518,7 @@ search_xml_children(GListPtr * children, xmlNode * root, - static int - search_xpath_objects(GListPtr * objects, xmlNode * xml_obj, const char *xpath) - { -+ int lpc = 0, max = 0; - int match_found = 0; - xmlXPathObjectPtr xpathObj = NULL; - -@@ -526,28 +527,24 @@ search_xpath_objects(GListPtr * objects, xmlNode * xml_obj, const char *xpath) - } - - xpathObj = xpath_search(xml_obj, xpath); -+ max = numXpathResults(xpathObj); - -- if (xpathObj == NULL || xpathObj->nodesetval == NULL || xpathObj->nodesetval->nodeNr < 1) { -+ if (max <= 0) { - crm_debug("No match for %s in %s", xpath, xmlGetNodePath(xml_obj)); -+ } - -- } else if (xpathObj->nodesetval->nodeNr > 0) { -- int lpc = 0, max = xpathObj->nodesetval->nodeNr; -- -- for (lpc = 0; lpc < max; lpc++) { -- xmlNode *match = getXpathResult(xpathObj, lpc); -- -- if (match == NULL) { -- continue; -- } -+ for (lpc = 0; lpc < max; lpc++) { -+ xmlNode *match = getXpathResult(xpathObj, lpc); - -- *objects = g_list_append(*objects, match); -- match_found++; -+ if (match == NULL) { -+ continue; - } -- } - -- if (xpathObj) { -- xmlXPathFreeObject(xpathObj); -+ *objects = g_list_append(*objects, match); -+ match_found++; - } -+ -+ freeXpathObject(xpathObj); - return match_found; - } - -diff --git a/lib/cib/cib_attrs.c b/lib/cib/cib_attrs.c -index 5604d20..4af077c 100644 ---- a/lib/cib/cib_attrs.c -+++ b/lib/cib/cib_attrs.c -@@ -138,7 +138,7 @@ find_nvpair_attr_delegate(cib_t * the_cib, const char *attr, const char *section - if (xml_has_children(xml_search)) { - xmlNode *child = NULL; - -- rc = -EINVAL; -+ rc = -ENOTUNIQ; - attr_msg(LOG_WARNING, "Multiple attributes match name=%s", attr_name); - - for (child = __xml_first_child(xml_search); child != NULL; child = __xml_next(child)) { -@@ -180,7 +180,7 @@ update_attr_delegate(cib_t * the_cib, int call_options, - CRM_CHECK(attr_name != NULL || attr_id != NULL, return -EINVAL); - - rc = find_nvpair_attr_delegate(the_cib, XML_ATTR_ID, section, node_uuid, set_type, set_name, -- attr_id, attr_name, FALSE, &local_attr_id, user_name); -+ attr_id, attr_name, to_console, &local_attr_id, user_name); - if (rc == pcmk_ok) { - attr_id = local_attr_id; - goto do_modify; -@@ -196,6 +196,7 @@ update_attr_delegate(cib_t * the_cib, int call_options, - const char *node_type = NULL; - xmlNode *cib_top = NULL; - -+ crm_trace("%s does not exist, create it", attr_name); - rc = cib_internal_op(the_cib, CIB_OP_QUERY, NULL, "/cib", NULL, &cib_top, - cib_sync_call | cib_scope_local | cib_xpath | cib_no_children, - user_name); -@@ -413,8 +414,43 @@ delete_attr_delegate(cib_t * the_cib, int options, - return rc; - } - --int --query_node_uuid(cib_t * the_cib, const char *uname, char **uuid) -+static int -+get_remote_node_uuid(cib_t * the_cib, const char *uname, char **uuid) -+{ -+#define REMOTE_NODE_XPATH "//nvpair[@name='remote-node'][@value='%s']" -+#define REMOTE_NODE_XPATH2 "//primitive[@type='remote'][@provider='pacemaker'][@id='%s']" -+ int rc = pcmk_ok; -+ char *xpath_string = NULL; -+ size_t len = strlen(REMOTE_NODE_XPATH) + strlen(uname) + 1; -+ xmlNode *xml_search = NULL; -+ -+ xpath_string = calloc(1, len); -+ sprintf(xpath_string, REMOTE_NODE_XPATH, uname); -+ rc = cib_internal_op(the_cib, CIB_OP_QUERY, NULL, xpath_string, NULL, &xml_search, -+ cib_sync_call | cib_scope_local | cib_xpath, NULL); -+ free(xpath_string); -+ free(xml_search); -+ -+ if (rc != pcmk_ok) { -+ len = strlen(REMOTE_NODE_XPATH2) + strlen(uname) + 1; -+ xpath_string = calloc(1, len); -+ sprintf(xpath_string, REMOTE_NODE_XPATH2, uname); -+ rc = cib_internal_op(the_cib, CIB_OP_QUERY, NULL, xpath_string, NULL, &xml_search, -+ cib_sync_call | cib_scope_local | cib_xpath, NULL); -+ -+ free(xpath_string); -+ free(xml_search); -+ } -+ -+ if (rc == pcmk_ok) { -+ *uuid = strdup(uname); -+ } -+ -+ return rc; -+} -+ -+static int -+get_cluster_node_uuid(cib_t * the_cib, const char *uname, char **uuid) - { - int rc = pcmk_ok; - xmlNode *a_child = NULL; -@@ -422,9 +458,6 @@ query_node_uuid(cib_t * the_cib, const char *uname, char **uuid) - xmlNode *fragment = NULL; - const char *child_name = NULL; - -- CRM_ASSERT(uname != NULL); -- CRM_ASSERT(uuid != NULL); -- - rc = the_cib->cmds->query(the_cib, XML_CIB_TAG_NODES, &fragment, - cib_sync_call | cib_scope_local); - if (rc != pcmk_ok) { -@@ -453,13 +486,36 @@ query_node_uuid(cib_t * the_cib, const char *uname, char **uuid) - } - } - -+ free_xml(fragment); -+ return rc; -+} -+ -+int -+query_node_uuid(cib_t * the_cib, const char *uname, char **uuid, int *is_remote_node) -+{ -+ int rc = pcmk_ok; -+ -+ CRM_ASSERT(uname != NULL); -+ CRM_ASSERT(uuid != NULL); -+ -+ rc = get_cluster_node_uuid(the_cib, uname, uuid); -+ if (rc != pcmk_ok) { -+ crm_debug("%s is not a cluster node, checking to see if remote-node", uname); -+ rc = get_remote_node_uuid(the_cib, uname, uuid); -+ if (rc != pcmk_ok) { -+ crm_debug("%s is not a remote node either", uname); -+ -+ } else if (is_remote_node) { -+ *is_remote_node = TRUE; -+ } -+ } -+ - if (rc != pcmk_ok) { - crm_debug("Could not map name=%s to a UUID: %s\n", uname, pcmk_strerror(rc)); - } else { - crm_info("Mapped %s to %s", uname, *uuid); - } - -- free_xml(fragment); - return rc; - } - -diff --git a/lib/cib/cib_client.c b/lib/cib/cib_client.c -index 51b589d..d1eaf87 100644 ---- a/lib/cib/cib_client.c -+++ b/lib/cib/cib_client.c -@@ -396,7 +396,10 @@ cib_new_variant(void) - void - cib_delete(cib_t * cib) - { -- GList *list = cib->notify_list; -+ GList *list = NULL; -+ if(cib) { -+ list = cib->notify_list; -+ } - - while (list != NULL) { - cib_notify_client_t *client = g_list_nth_data(list, 0); -@@ -405,10 +408,14 @@ cib_delete(cib_t * cib) - free(client); - } - -- g_hash_table_destroy(cib_op_callback_table); -- cib_op_callback_table = NULL; -- cib->cmds->free(cib); -- cib = NULL; -+ if(cib_op_callback_table) { -+ g_hash_table_destroy(cib_op_callback_table); -+ cib_op_callback_table = NULL; -+ } -+ -+ if(cib) { -+ cib->cmds->free(cib); -+ } - } - - int -diff --git a/lib/cib/cib_native.c b/lib/cib/cib_native.c -index 1366b4f..9553ba2 100644 ---- a/lib/cib/cib_native.c -+++ b/lib/cib/cib_native.c -@@ -385,8 +385,8 @@ cib_native_perform_op_delegate(cib_t * cib, const char *op, const char *host, co - free_xml(op_msg); - - if (rc < 0) { -- crm_perror(LOG_ERR, "Couldn't perform %s operation (timeout=%ds): %d", op, -- cib->call_timeout, rc); -+ crm_err("Couldn't perform %s operation (timeout=%ds): %s (%d)", op, -+ cib->call_timeout, pcmk_strerror(rc), rc); - rc = -ECOMM; - goto done; - } -diff --git a/lib/cib/cib_ops.c b/lib/cib/cib_ops.c -index 03521da..ceed536 100644 ---- a/lib/cib/cib_ops.c -+++ b/lib/cib/cib_ops.c -@@ -1,16 +1,16 @@ --/* -+/* - * Copyright (C) 2004 Andrew Beekhof -- * -+ * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. -- * -+ * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. -- * -+ * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -@@ -295,8 +295,16 @@ cib_process_delete(const char *op, int options, const char *section, xmlNode * r - } - - obj_root = get_object_root(section, *result_cib); -- if (replace_xml_child(NULL, obj_root, input, TRUE) == FALSE) { -- crm_trace("No matching object to delete"); -+ if(safe_str_eq(crm_element_name(input), section)) { -+ xmlNode *child = NULL; -+ for(child = __xml_first_child(input); child; child = __xml_next(child)) { -+ if (replace_xml_child(NULL, obj_root, child, TRUE) == FALSE) { -+ crm_trace("No matching object to delete: %s=%s", child->name, ID(child)); -+ } -+ } -+ -+ } else if (replace_xml_child(NULL, obj_root, input, TRUE) == FALSE) { -+ crm_trace("No matching object to delete: %s=%s", input->name, ID(input)); - } - - return pcmk_ok; -@@ -614,9 +622,9 @@ cib_process_diff(const char *op, int options, const char *section, xmlNode * req - - } else { - apply_diff = FALSE; -- log_level = LOG_ERR; -+ log_level = LOG_NOTICE; - reason = "+ and - versions in the diff did not change"; -- log_cib_diff(LOG_ERR, input, __FUNCTION__); -+ log_cib_diff(LOG_NOTICE, input, __FUNCTION__); - } - } - -@@ -628,7 +636,7 @@ cib_process_diff(const char *op, int options, const char *section, xmlNode * req - - } else if (apply_diff && diff_del_admin_epoch < this_admin_epoch) { - apply_diff = FALSE; -- log_level = LOG_WARNING; -+ log_level = LOG_NOTICE; - reason = "current \"" XML_ATTR_GENERATION_ADMIN "\" is greater than required"; - - } else if (apply_diff && diff_del_epoch > this_epoch) { -@@ -639,7 +647,7 @@ cib_process_diff(const char *op, int options, const char *section, xmlNode * req - - } else if (apply_diff && diff_del_epoch < this_epoch) { - apply_diff = FALSE; -- log_level = LOG_WARNING; -+ log_level = LOG_NOTICE; - reason = "current \"" XML_ATTR_GENERATION "\" is greater than required"; - - } else if (apply_diff && diff_del_updates > this_updates) { -@@ -650,7 +658,7 @@ cib_process_diff(const char *op, int options, const char *section, xmlNode * req - - } else if (apply_diff && diff_del_updates < this_updates) { - apply_diff = FALSE; -- log_level = LOG_WARNING; -+ log_level = LOG_NOTICE; - reason = "current \"" XML_ATTR_NUMUPDATES "\" is greater than required"; - } - -@@ -658,7 +666,7 @@ cib_process_diff(const char *op, int options, const char *section, xmlNode * req - free_xml(*result_cib); - *result_cib = NULL; - if (apply_xml_diff(existing_cib, input, result_cib) == FALSE) { -- log_level = LOG_NOTICE; -+ log_level = LOG_WARNING; - reason = "Failed application of an update diff"; - - if (options & cib_force_diff) { -@@ -775,6 +783,7 @@ apply_cib_diff(xmlNode * old, xmlNode * diff, xmlNode ** new) - gboolean - cib_config_changed(xmlNode * last, xmlNode * next, xmlNode ** diff) - { -+ int lpc = 0, max = 0; - gboolean config_changes = FALSE; - xmlXPathObject *xpathObj = NULL; - -@@ -789,13 +798,11 @@ cib_config_changed(xmlNode * last, xmlNode * next, xmlNode ** diff) - } - - xpathObj = xpath_search(*diff, "//" XML_CIB_TAG_CONFIGURATION); -- if (xpathObj && xpathObj->nodesetval->nodeNr > 0) { -+ if (numXpathResults(xpathObj) > 0) { - config_changes = TRUE; - goto done; -- -- } else if (xpathObj) { -- xmlXPathFreeObject(xpathObj); - } -+ freeXpathObject(xpathObj); - - /* - * Do not check XML_TAG_DIFF_ADDED "//" XML_TAG_CIB -@@ -803,44 +810,40 @@ cib_config_changed(xmlNode * last, xmlNode * next, xmlNode ** diff) - * every time if the checked value existed - */ - xpathObj = xpath_search(*diff, "//" XML_TAG_DIFF_REMOVED "//" XML_TAG_CIB); -- if (xpathObj) { -- int lpc = 0, max = xpathObj->nodesetval->nodeNr; -+ max = numXpathResults(xpathObj); - -- for (lpc = 0; lpc < max; lpc++) { -- xmlNode *top = getXpathResult(xpathObj, lpc); -+ for (lpc = 0; lpc < max; lpc++) { -+ xmlNode *top = getXpathResult(xpathObj, lpc); - -- if (crm_element_value(top, XML_ATTR_GENERATION) != NULL) { -- config_changes = TRUE; -- goto done; -- } -- if (crm_element_value(top, XML_ATTR_GENERATION_ADMIN) != NULL) { -- config_changes = TRUE; -- goto done; -- } -+ if (crm_element_value(top, XML_ATTR_GENERATION) != NULL) { -+ config_changes = TRUE; -+ goto done; -+ } -+ if (crm_element_value(top, XML_ATTR_GENERATION_ADMIN) != NULL) { -+ config_changes = TRUE; -+ goto done; -+ } - -- if (crm_element_value(top, XML_ATTR_VALIDATION) != NULL) { -- config_changes = TRUE; -- goto done; -- } -- if (crm_element_value(top, XML_ATTR_CRM_VERSION) != NULL) { -- config_changes = TRUE; -- goto done; -- } -- if (crm_element_value(top, "remote-clear-port") != NULL) { -- config_changes = TRUE; -- goto done; -- } -- if (crm_element_value(top, "remote-tls-port") != NULL) { -- config_changes = TRUE; -- goto done; -- } -+ if (crm_element_value(top, XML_ATTR_VALIDATION) != NULL) { -+ config_changes = TRUE; -+ goto done; -+ } -+ if (crm_element_value(top, XML_ATTR_CRM_VERSION) != NULL) { -+ config_changes = TRUE; -+ goto done; -+ } -+ if (crm_element_value(top, "remote-clear-port") != NULL) { -+ config_changes = TRUE; -+ goto done; -+ } -+ if (crm_element_value(top, "remote-tls-port") != NULL) { -+ config_changes = TRUE; -+ goto done; - } - } - - done: -- if (xpathObj) { -- xmlXPathFreeObject(xpathObj); -- } -+ freeXpathObject(xpathObj); - return config_changes; - } - -@@ -874,9 +877,7 @@ cib_process_xpath(const char *op, int options, const char *section, xmlNode * re - xpathObj = xpath_search(*result_cib, section); - } - -- if (xpathObj != NULL && xpathObj->nodesetval != NULL) { -- max = xpathObj->nodesetval->nodeNr; -- } -+ max = numXpathResults(xpathObj); - - if (max < 1 && safe_str_eq(op, CIB_OP_DELETE)) { - crm_debug("%s was already removed", section); -@@ -951,7 +952,10 @@ cib_process_xpath(const char *op, int options, const char *section, xmlNode * re - - path_len += extra; - new_path = malloc(path_len + 1); -- if (id) { -+ if(new_path == NULL) { -+ break; -+ -+ } else if (id) { - snprintf(new_path, path_len + 1, "/%s[@id='%s']%s", parent->name, id, - path ? path : ""); - } else { -@@ -991,10 +995,7 @@ cib_process_xpath(const char *op, int options, const char *section, xmlNode * re - } - } - -- if (xpathObj) { -- xmlXPathFreeObject(xpathObj); -- } -- -+ freeXpathObject(xpathObj); - return rc; - } - -diff --git a/lib/cib/cib_remote.c b/lib/cib/cib_remote.c -index 223695b..8847422 100644 ---- a/lib/cib/cib_remote.c -+++ b/lib/cib/cib_remote.c -@@ -218,9 +218,10 @@ cib_tls_signon(cib_t * cib, crm_remote_t * connection, gboolean event_channel) - connection->tls_session = NULL; - #endif - sock = crm_remote_tcp_connect(private->server, private->port); -- if (sock <= 0) { -+ if (sock < 0) { - crm_perror(LOG_ERR, "remote tcp connection to %s:%d failed", private->server, - private->port); -+ return -ENOTCONN; - } - - connection->tcp_socket = sock; -diff --git a/lib/cib/cib_utils.c b/lib/cib/cib_utils.c -index aa3e08b..6353d1d 100644 ---- a/lib/cib/cib_utils.c -+++ b/lib/cib/cib_utils.c -@@ -5,12 +5,12 @@ - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. -- * -+ * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. -- * -+ * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -@@ -497,7 +497,7 @@ cib_perform_op(const char *op, int call_options, cib_op_t * fn, gboolean is_quer - /* The diff calculation in cib_config_changed() accounts for 25% of the - * CIB's total CPU usage on the DC - * -- * RNG validation on the otherhand, accounts for only 9%... -+ * RNG validation on the otherhand, accounts for only 9%... - */ - *config_changed = cib_config_changed(current_cib, scratch, &local_diff); - -@@ -537,7 +537,7 @@ cib_perform_op(const char *op, int call_options, cib_op_t * fn, gboolean is_quer - - static filter_t filter[] = { - { 0, XML_ATTR_ORIGIN }, -- { 0, XML_CIB_ATTR_WRITTEN }, -+ { 0, XML_CIB_ATTR_WRITTEN }, - { 0, XML_ATTR_UPDATE_ORIG }, - { 0, XML_ATTR_UPDATE_CLIENT }, - { 0, XML_ATTR_UPDATE_USER }, -@@ -881,13 +881,11 @@ cib_internal_config_changed(xmlNode * diff) - } - - xpathObj = xpath_search(diff, config_xpath); -- if (xpathObj && xpathObj->nodesetval->nodeNr > 0) { -+ if (numXpathResults(xpathObj) > 0) { - changed = TRUE; - } - -- if (xpathObj) { -- xmlXPathFreeObject(xpathObj); -- } -+ freeXpathObject(xpathObj); - - return changed; - } -diff --git a/lib/cluster/Makefile.am b/lib/cluster/Makefile.am -index a97cfee..a5a70ff 100644 ---- a/lib/cluster/Makefile.am -+++ b/lib/cluster/Makefile.am -@@ -28,7 +28,7 @@ header_HEADERS = - lib_LTLIBRARIES = libcrmcluster.la - - libcrmcluster_la_SOURCES = cluster.c membership.c --libcrmcluster_la_LDFLAGS = -version-info 3:0:1 $(CLUSTERLIBS) -+libcrmcluster_la_LDFLAGS = -version-info 4:0:0 $(CLUSTERLIBS) - libcrmcluster_la_LIBADD = $(top_builddir)/lib/common/libcrmcommon.la $(top_builddir)/lib/fencing/libstonithd.la - libcrmcluster_la_DEPENDENCIES = $(top_builddir)/lib/common/libcrmcommon.la $(top_builddir)/lib/fencing/libstonithd.la - -diff --git a/lib/cluster/cluster.c b/lib/cluster/cluster.c -index e5b85ea..bee991c 100644 ---- a/lib/cluster/cluster.c -+++ b/lib/cluster/cluster.c -@@ -40,11 +40,8 @@ CRM_TRACE_INIT_DATA(cluster); - void *hb_library = NULL; - #endif - --static GHashTable *crm_uuid_cache = NULL; --static GHashTable *crm_uname_cache = NULL; -- - static char * --get_heartbeat_uuid(uint32_t unused, const char *uname) -+get_heartbeat_uuid(const char *uname) - { - char *uuid_calc = NULL; - -@@ -55,6 +52,8 @@ get_heartbeat_uuid(uint32_t unused, const char *uname) - if (heartbeat_cluster == NULL) { - crm_warn("No connection to heartbeat, using uuid=uname"); - return NULL; -+ } else if(uname == NULL) { -+ return NULL; - } - - if (heartbeat_cluster->llc_ops->get_uuid_by_name(heartbeat_cluster, uname, &uuid_raw) == -@@ -104,87 +103,62 @@ get_corosync_id(int id, const char *uuid) - } - - char * --get_corosync_uuid(uint32_t id, const char *uname) -+get_corosync_uuid(crm_node_t *node) - { -- if (!uname_is_uuid() && is_corosync_cluster()) { -- if (id <= 0) { -- /* Try the membership cache... */ -- crm_node_t *node = g_hash_table_lookup(crm_peer_cache, uname); -- -- if (node != NULL) { -- id = node->id; -- } -- } -+ if(node == NULL) { -+ return NULL; - -- if (id > 0) { -+ } else if (!uname_is_uuid() && is_corosync_cluster()) { -+ if (node->id > 0) { - int len = 32; - char *buffer = NULL; - - buffer = calloc(1, (len + 1)); - if (buffer != NULL) { -- snprintf(buffer, len, "%u", id); -+ snprintf(buffer, len, "%u", node->id); - } - - return buffer; - - } else { -- crm_warn("Node %s is not yet known by corosync", uname); -+ crm_info("Node %s is not yet known by corosync", node->uname); - } - -- } else if (uname != NULL) { -- return strdup(uname); -+ } else if (node->uname != NULL) { -+ return strdup(node->uname); - } - - return NULL; - } - --void --set_node_uuid(const char *uname, const char *uuid) --{ -- CRM_CHECK(uuid != NULL, return); -- CRM_CHECK(uname != NULL, return); -- -- if (crm_uuid_cache == NULL) { -- crm_uuid_cache = g_hash_table_new_full(crm_str_hash, g_str_equal, -- g_hash_destroy_str, g_hash_destroy_str); -- } -- -- g_hash_table_insert(crm_uuid_cache, strdup(uname), strdup(uuid)); --} -- - const char * --get_node_uuid(uint32_t id, const char *uname) -+crm_peer_uuid(crm_node_t *peer) - { - char *uuid = NULL; - enum cluster_type_e type = get_cluster_type(); - -- if (crm_uuid_cache == NULL) { -- crm_uuid_cache = g_hash_table_new_full(crm_str_hash, g_str_equal, -- g_hash_destroy_str, g_hash_destroy_str); -- } -- - /* avoid blocking heartbeat calls where possible */ -- if (uname) { -- uuid = g_hash_table_lookup(crm_uuid_cache, uname); -- } -- if (uuid != NULL) { -- return uuid; -+ if(peer == NULL) { -+ return NULL; -+ -+ } else if (peer->uuid) { -+ return peer->uuid; - } - - switch (type) { - case pcmk_cluster_corosync: -- uuid = get_corosync_uuid(id, uname); -+ uuid = get_corosync_uuid(peer); - break; - - case pcmk_cluster_cman: - case pcmk_cluster_classic_ais: -- if (uname) { -- uuid = strdup(uname); -+ if (peer->uname) { -+ uuid = strdup(peer->uname); - } - break; - - case pcmk_cluster_heartbeat: -- uuid = get_heartbeat_uuid(id, uname); -+ uuid = get_heartbeat_uuid(peer->uname); - break; - - case pcmk_cluster_unknown: -@@ -193,18 +167,8 @@ get_node_uuid(uint32_t id, const char *uname) - break; - } - -- if (uuid == NULL) { -- return NULL; -- } -- -- if (uname) { -- g_hash_table_insert(crm_uuid_cache, strdup(uname), uuid); -- return g_hash_table_lookup(crm_uuid_cache, uname); -- } -- -- /* Memory leak! */ -- CRM_LOG_ASSERT(uuid != NULL); -- return uuid; -+ peer->uuid = uuid; -+ return peer->uuid; - } - - gboolean -@@ -321,73 +285,15 @@ send_cluster_message(crm_node_t * node, enum crm_ais_msg_types service, xmlNode - return FALSE; - } - --void --empty_uuid_cache(void) --{ -- if (crm_uuid_cache != NULL) { -- g_hash_table_destroy(crm_uuid_cache); -- crm_uuid_cache = NULL; -- } --} -- --void --unget_uuid(const char *uname) --{ -- if (crm_uuid_cache == NULL) { -- return; -- } -- g_hash_table_remove(crm_uuid_cache, uname); --} -- - const char * --get_uuid(const char *uname) --{ -- return get_node_uuid(0, uname); --} -- --char * - get_local_node_name(void) - { -- char *name = NULL; -- enum cluster_type_e stack = get_cluster_type(); -- -- switch (stack) { -- --#if SUPPORT_CMAN -- case pcmk_cluster_cman: -- name = cman_node_name(0 /* AKA. CMAN_NODEID_US */ ); -- break; --#endif -+ static char *name = NULL; - --#if SUPPORT_COROSYNC --# if !SUPPORT_PLUGIN -- case pcmk_cluster_corosync: -- name = corosync_node_name(0, 0); -- break; --# endif --#endif -- case pcmk_cluster_heartbeat: -- case pcmk_cluster_classic_ais: -- break; -- default: -- crm_err("Unknown cluster type: %s (%d)", name_for_cluster_type(stack), stack); -- } -- -- if (name == NULL) { -- struct utsname res; -- int rc = uname(&res); -- -- if (rc == 0) { -- crm_notice("Defaulting to uname -n for the local %s node name", -- name_for_cluster_type(stack)); -- name = strdup(res.nodename); -- } -- } -- -- if (name == NULL) { -- crm_err("Could not obtain the local %s node name", name_for_cluster_type(stack)); -- crm_exit(100); -+ if(name) { -+ return name; - } -+ name = get_node_name(0); - return name; - } - -@@ -395,15 +301,18 @@ char * - get_node_name(uint32_t nodeid) - { - char *name = NULL; -+ bool do_uname = FALSE; - enum cluster_type_e stack = get_cluster_type(); - - switch (stack) { - case pcmk_cluster_heartbeat: -+ do_uname = TRUE; - break; - - #if SUPPORT_PLUGIN - case pcmk_cluster_classic_ais: - name = classic_node_name(nodeid); -+ do_uname = TRUE; - break; - #else - # if SUPPORT_COROSYNC -@@ -416,6 +325,7 @@ get_node_name(uint32_t nodeid) - #if SUPPORT_CMAN - case pcmk_cluster_cman: - name = cman_node_name(nodeid); -+ do_uname = TRUE; - break; - #endif - -@@ -423,6 +333,22 @@ get_node_name(uint32_t nodeid) - crm_err("Unknown cluster type: %s (%d)", name_for_cluster_type(stack), stack); - } - -+ if(name == NULL && nodeid == 0 && do_uname) { -+ struct utsname res; -+ int rc = uname(&res); -+ -+ if (rc == 0) { -+ crm_notice("Defaulting to uname -n for the local %s node name", -+ name_for_cluster_type(stack)); -+ name = strdup(res.nodename); -+ } -+ -+ if (name == NULL) { -+ crm_err("Could not obtain the local %s node name", name_for_cluster_type(stack)); -+ crm_exit(DAEMON_RESPAWN_STOP); -+ } -+ } -+ - if (name == NULL) { - crm_notice("Could not obtain a node name for %s nodeid %u", - name_for_cluster_type(stack), nodeid); -@@ -432,36 +358,43 @@ get_node_name(uint32_t nodeid) - - /* Only used by update_failcount() in te_utils.c */ - const char * --get_uname(const char *uuid) -+crm_peer_uname(const char *uuid) - { -- char *uname = NULL; -- -- if (crm_uname_cache == NULL) { -- crm_uname_cache = g_hash_table_new_full(crm_str_hash, g_str_equal, -- g_hash_destroy_str, g_hash_destroy_str); -- } -+ GHashTableIter iter; -+ crm_node_t *node = NULL; - - CRM_CHECK(uuid != NULL, return NULL); - - /* avoid blocking calls where possible */ -- uname = g_hash_table_lookup(crm_uname_cache, uuid); -- if (uname != NULL) { -- crm_trace("%s = %s (cached)", uuid, uname); -- return uname; -+ g_hash_table_iter_init(&iter, crm_peer_cache); -+ while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) { -+ if(node->uuid && strcasecmp(node->uuid, uuid) == 0) { -+ if(node->uname) { -+ return node->uname; -+ } -+ break; -+ } - } -+ - #if SUPPORT_COROSYNC - if (is_openais_cluster()) { -- if (!uname_is_uuid() && is_corosync_cluster()) { -+ if (uname_is_uuid() == FALSE && is_corosync_cluster()) { - uint32_t id = crm_int_helper(uuid, NULL); -- crm_node_t *node = g_hash_table_lookup(crm_peer_id_cache, GUINT_TO_POINTER(id)); - -- if (node && node->uname) { -- uname = strdup(node->uname); -- } -+ node = crm_get_peer(id, NULL); - - } else { -- uname = strdup(uuid); -+ node = crm_get_peer(0, uuid); -+ } -+ -+ if (node) { -+ crm_info("Setting uuid for node %s[%u] to '%s'", node->uname, node->id, uuid); -+ node->uuid = strdup(uuid); -+ if(node->uname) { -+ return node->uname; -+ } - } -+ return NULL; - } - #endif - -@@ -470,34 +403,41 @@ get_uname(const char *uuid) - if (heartbeat_cluster != NULL) { - cl_uuid_t uuid_raw; - char *uuid_copy = strdup(uuid); -+ char *uname = malloc(MAX_NAME); - - cl_uuid_parse(uuid_copy, &uuid_raw); -- uname = malloc(MAX_NAME); - - if (heartbeat_cluster->llc_ops->get_name_by_uuid(heartbeat_cluster, &uuid_raw, uname, - MAX_NAME) == HA_FAIL) { - crm_err("Could not calculate uname for %s", uuid); -- free(uuid_copy); -- free(uname); -- uname = NULL; -+ } else { -+ node = crm_get_peer(0, uname); -+ } -+ -+ free(uuid_copy); -+ free(uname); -+ } -+ -+ if (node) { -+ crm_info("Setting uuid for node %s to '%s'", node->uname, uuid); -+ node->uuid = strdup(uuid); -+ if(node->uname) { -+ return node->uname; - } - } -+ return NULL; - } - #endif - -- if (uname) { -- crm_trace("Storing %s = %s", uuid, uname); -- g_hash_table_insert(crm_uname_cache, strdup(uuid), uname); -- } -- return uname; -+ return NULL; - } - - void --set_uuid(xmlNode * node, const char *attr, const char *uname) -+set_uuid(xmlNode *xml, const char *attr, crm_node_t *node) - { -- const char *uuid_calc = get_uuid(uname); -+ const char *uuid_calc = crm_peer_uuid(node); - -- crm_xml_add(node, attr, uuid_calc); -+ crm_xml_add(xml, attr, uuid_calc); - return; - } - -@@ -568,7 +508,7 @@ get_cluster_type(void) - - hb = (*new_cluster) ("heartbeat"); - -- crm_debug("Signing in with Heartbeat"); -+ crm_debug("Testing with Heartbeat"); - if (hb->llc_ops->signon(hb, crm_system_name) == HA_OK) { - hb->llc_ops->signoff(hb, FALSE); - -@@ -582,6 +522,7 @@ get_cluster_type(void) - #if SUPPORT_COROSYNC - /* If nothing is defined in the environment, try corosync (if supported) */ - if(cluster == NULL) { -+ crm_debug("Testing with Corosync"); - cluster_type = find_corosync_variant(); - if (cluster_type != pcmk_cluster_unknown) { - detected = TRUE; -@@ -615,6 +556,7 @@ get_cluster_type(void) - - } else { - cluster_type = pcmk_cluster_invalid; -+ goto done; /* Keep the compiler happy when no stacks are supported */ - } - - done: -@@ -624,7 +566,7 @@ get_cluster_type(void) - } else if (cluster_type == pcmk_cluster_invalid) { - crm_notice("This installation does not support the '%s' cluster infrastructure: terminating.", - cluster); -- crm_exit(100); -+ crm_exit(DAEMON_RESPAWN_STOP); - - } else { - crm_info("%s an active '%s' cluster", detected?"Detected":"Assuming", name_for_cluster_type(cluster_type)); -diff --git a/lib/cluster/corosync.c b/lib/cluster/corosync.c -index 02428cb..f9bcea6 100644 ---- a/lib/cluster/corosync.c -+++ b/lib/cluster/corosync.c -@@ -52,10 +52,6 @@ quorum_handle_t pcmk_quorum_handle = 0; - - gboolean(*quorum_app_callback) (unsigned long long seq, gboolean quorate) = NULL; - --static char *pcmk_uname = NULL; --static int pcmk_uname_len = 0; --static uint32_t pcmk_nodeid = 0; -- - #define cs_repeat(counter, max, code) do { \ - code; \ - if(rc == CS_ERR_TRY_AGAIN || rc == CS_ERR_QUEUE_FULL) { \ -@@ -67,6 +63,40 @@ static uint32_t pcmk_nodeid = 0; - } \ - } while(counter < max) - -+static uint32_t get_local_nodeid(cpg_handle_t handle) -+{ -+ int rc = CS_OK; -+ int retries = 0; -+ static uint32_t local_nodeid = 0; -+ cpg_handle_t local_handle = handle; -+ cpg_callbacks_t cb = { }; -+ -+ if(local_nodeid != 0) { -+ return local_nodeid; -+ } -+ -+ if(handle == 0) { -+ crm_trace("Creating connection"); -+ cs_repeat(retries, 5, rc = cpg_initialize(&local_handle, &cb)); -+ } -+ -+ if (rc == CS_OK) { -+ retries = 0; -+ crm_trace("Performing lookup"); -+ cs_repeat(retries, 5, rc = cpg_local_get(local_handle, &local_nodeid)); -+ } -+ -+ if (rc != CS_OK) { -+ crm_err("Could not get local node id from the CPG API: %s (%d)", ais_error2text(rc), rc); -+ } -+ if(handle == 0) { -+ crm_trace("Closing connection"); -+ cpg_finalize(local_handle); -+ } -+ crm_debug("Local nodeid is %u", local_nodeid); -+ return local_nodeid; -+} -+ - /* - * CFG functionality stolen from node_name() in corosync-quorumtool.c - * This resolves the first address assigned to a node and returns the name or IP address. -@@ -78,30 +108,11 @@ corosync_node_name(uint64_t /*cmap_handle_t */ cmap_handle, uint32_t nodeid) - int rc = CS_OK; - int retries = 0; - char *name = NULL; -- - cmap_handle_t local_handle = 0; - - /* nodeid == 0 == CMAN_NODEID_US */ -- if (nodeid == 0 && pcmk_nodeid) { -- nodeid = pcmk_nodeid; -- -- } else if (nodeid == 0) { -- /* Look it up */ -- int rc = -1; -- int retries = 0; -- cpg_handle_t handle = 0; -- cpg_callbacks_t cb = { }; -- -- cs_repeat(retries, 5, rc = cpg_initialize(&handle, &cb)); -- if (rc == CS_OK) { -- retries = 0; -- cs_repeat(retries, 5, rc = cpg_local_get(handle, &pcmk_nodeid)); -- } -- -- if (rc != CS_OK) { -- crm_err("Could not get local node id from the CPG API: %d", rc); -- } -- cpg_finalize(handle); -+ if (nodeid == 0) { -+ nodeid = get_local_nodeid(0); - } - - if (cmap_handle == 0 && local_handle == 0) { -@@ -221,19 +232,6 @@ text2msg_type(const char *text) - return type; - } - --static char *ais_cluster_name = NULL; -- --gboolean --crm_get_cluster_name(char **cname) --{ -- CRM_CHECK(cname != NULL, return FALSE); -- if (ais_cluster_name) { -- *cname = strdup(ais_cluster_name); -- return TRUE; -- } -- return FALSE; --} -- - GListPtr cs_message_queue = NULL; - int cs_message_timer = 0; - -@@ -247,6 +245,7 @@ crm_cs_flush_cb(gpointer data) - return FALSE; - } - -+#define CS_SEND_MAX 200 - static ssize_t - crm_cs_flush(void) - { -@@ -256,16 +255,25 @@ crm_cs_flush(void) - static unsigned int last_sent = 0; - - if (pcmk_cpg_handle == 0) { -+ crm_trace("Connection is dead"); - return pcmk_ok; -+ } -+ -+ queue_len = g_list_length(cs_message_queue); -+ if ((queue_len % 1000) == 0 && queue_len > 1) { -+ crm_err("CPG queue has grown to %d", queue_len); -+ -+ } else if (queue_len == CS_SEND_MAX) { -+ crm_warn("CPG queue has grown to %d", queue_len); -+ } - -- } else if (cs_message_timer) { -+ if (cs_message_timer) { - /* There is already a timer, wait until it goes off */ - crm_trace("Timer active %d", cs_message_timer); - return pcmk_ok; - } - -- queue_len = g_list_length(cs_message_queue); -- while (cs_message_queue && sent < 100) { -+ while (cs_message_queue && sent < CS_SEND_MAX) { - AIS_Message *header = NULL; - struct iovec *iov = cs_message_queue->data; - -@@ -294,19 +302,20 @@ crm_cs_flush(void) - - queue_len -= sent; - if (sent > 1 || cs_message_queue) { -- crm_info("Sent %d CPG messages (%d remaining, last=%u): %s", -- sent, queue_len, last_sent, ais_error2text(rc)); -+ crm_info("Sent %d CPG messages (%d remaining, last=%u): %s (%d)", -+ sent, queue_len, last_sent, ais_error2text(rc), rc); - } else { -- crm_trace("Sent %d CPG messages (%d remaining, last=%u): %s", -- sent, queue_len, last_sent, ais_error2text(rc)); -+ crm_trace("Sent %d CPG messages (%d remaining, last=%u): %s (%d)", -+ sent, queue_len, last_sent, ais_error2text(rc), rc); - } - - if (cs_message_queue) { -- if (queue_len % 100 == 0 && queue_len > 99) { -- crm_err("CPG queue has grown to %d", queue_len); -+ uint32_t delay_ms = 100; -+ if(rc != CS_OK) { -+ /* Proportionally more if sending failed but cap at 1s */ -+ delay_ms = QB_MIN(1000, CS_SEND_MAX + (10 * queue_len)); - } -- -- cs_message_timer = g_timeout_add(1000 + 100 * queue_len, crm_cs_flush_cb, NULL); -+ cs_message_timer = g_timeout_add(delay_ms, crm_cs_flush_cb, NULL); - } - - return rc; -@@ -318,6 +327,8 @@ send_ais_text(int class, const char *data, - { - static int msg_id = 0; - static int local_pid = 0; -+ static int local_name_len = 0; -+ static const char *local_name = NULL; - - char *target = NULL; - struct iovec *iov; -@@ -330,6 +341,13 @@ send_ais_text(int class, const char *data, - - CRM_CHECK(dest != crm_msg_ais, return FALSE); - -+ if(local_name == NULL) { -+ local_name = get_local_node_name(); -+ } -+ if(local_name_len == 0 && local_name) { -+ local_name_len = strlen(local_name); -+ } -+ - if (data == NULL) { - data = ""; - } -@@ -368,9 +386,9 @@ send_ais_text(int class, const char *data, - ais_msg->sender.id = 0; - ais_msg->sender.type = sender; - ais_msg->sender.pid = local_pid; -- ais_msg->sender.size = pcmk_uname_len; -+ ais_msg->sender.size = local_name_len; - memset(ais_msg->sender.uname, 0, MAX_NAME); -- memcpy(ais_msg->sender.uname, pcmk_uname, ais_msg->sender.size); -+ memcpy(ais_msg->sender.uname, local_name, ais_msg->sender.size); - - ais_msg->size = 1 + strlen(data); - ais_msg->header.size = sizeof(AIS_Message) + ais_msg->size; -@@ -571,16 +589,20 @@ pcmk_cpg_deliver(cpg_handle_t handle, - uint32_t nodeid, uint32_t pid, void *msg, size_t msg_len) - { - AIS_Message *ais_msg = (AIS_Message *) msg; -+ uint32_t local_nodeid = get_local_nodeid(handle); -+ const char *local_name = get_local_node_name(); - - if (ais_msg->sender.id > 0 && ais_msg->sender.id != nodeid) { - crm_err("Nodeid mismatch from %d.%d: claimed nodeid=%u", nodeid, pid, ais_msg->sender.id); - return; - -- } else if (ais_msg->host.size != 0 && safe_str_neq(ais_msg->host.uname, pcmk_uname)) { -+ } else if (ais_msg->host.id != 0 && (local_nodeid != ais_msg->host.id)) { - /* Not for us */ -+ crm_trace("Not for us: %u != %u", ais_msg->host.id, local_nodeid); - return; -- } else if (ais_msg->host.id != 0 && (pcmk_nodeid != ais_msg->host.id)) { -+ } else if (ais_msg->host.size != 0 && safe_str_neq(ais_msg->host.uname, local_name)) { - /* Not for us */ -+ crm_trace("Not for us: %s != %s", ais_msg->host.uname, local_name); - return; - } - -@@ -615,6 +637,7 @@ pcmk_cpg_membership(cpg_handle_t handle, - int i; - gboolean found = FALSE; - static int counter = 0; -+ uint32_t local_nodeid = get_local_nodeid(handle); - - for (i = 0; i < left_list_entries; i++) { - crm_node_t *peer = crm_get_peer(left_list[i].nodeid, NULL); -@@ -631,14 +654,38 @@ pcmk_cpg_membership(cpg_handle_t handle, - crm_node_t *peer = crm_get_peer(member_list[i].nodeid, NULL); - - crm_info("Member[%d.%d] %s.%u ", counter, i, groupName->value, member_list[i].nodeid); -+ -+ /* Anyone that is sending us CPG messages must also be a _CPG_ member. -+ * But its _not_ safe to assume its in the quorum membership. -+ * We may have just found out its dead and are processing the last couple of messages it sent -+ */ - crm_update_peer_proc(__FUNCTION__, peer, crm_proc_cpg, ONLINESTATUS); -- if (pcmk_nodeid == member_list[i].nodeid) { -+ if(peer && crm_is_peer_active(peer) == FALSE) { -+ time_t now = time(NULL); -+ -+ /* Co-opt the otherwise unused votes field */ -+ if(peer->votes == 0) { -+ peer->votes = now; -+ -+ } else if(now > (60 + peer->votes)) { -+ /* On the otherhand, if we're still getting messages, at a certain point -+ * we need to acknowledge our internal cache is probably wrong -+ * -+ * Set the threshold to 1 minute -+ */ -+ crm_err("Node %s[%u] appears to be online even though we think it is dead", peer->uname, peer->id); -+ crm_update_peer_state(__FUNCTION__, peer, CRM_NODE_MEMBER, 0); -+ peer->votes = 0; -+ } -+ } -+ -+ if (local_nodeid == member_list[i].nodeid) { - found = TRUE; - } - } - - if (!found) { -- crm_err("We're not part of CPG group %s anymore!", groupName->value); -+ crm_err("We're not part of CPG group '%s' anymore!", groupName->value); - cpg_evicted = TRUE; - } - -@@ -657,6 +704,7 @@ init_cpg_connection(gboolean(*dispatch) (int kind, const char *from, const char - int rc = -1; - int fd = 0; - int retries = 0; -+ uint32_t id = 0; - crm_node_t *peer = NULL; - - struct mainloop_fd_callbacks cpg_fd_callbacks = { -@@ -674,11 +722,13 @@ init_cpg_connection(gboolean(*dispatch) (int kind, const char *from, const char - goto bail; - } - -- retries = 0; -- cs_repeat(retries, 30, rc = cpg_local_get(pcmk_cpg_handle, (unsigned int *)nodeid)); -- if (rc != CS_OK) { -+ id = get_local_nodeid(pcmk_cpg_handle); -+ if (id == 0) { - crm_err("Could not get local node id from the CPG API"); - goto bail; -+ -+ } else if(nodeid) { -+ *nodeid = id; - } - - retries = 0; -@@ -702,7 +752,7 @@ init_cpg_connection(gboolean(*dispatch) (int kind, const char *from, const char - return FALSE; - } - -- peer = crm_get_peer(pcmk_nodeid, pcmk_uname); -+ peer = crm_get_peer(id, NULL); - crm_update_peer_proc(__FUNCTION__, peer, crm_proc_cpg, ONLINESTATUS); - return TRUE; - } -@@ -722,32 +772,13 @@ pcmk_quorum_dispatch(gpointer user_data) - } - - static void --corosync_mark_unseen_peer_dead(gpointer key, gpointer value, gpointer user_data) --{ -- int *seq = user_data; -- crm_node_t *node = value; -- -- if (node->last_seen != *seq && node->state -- && crm_str_eq(CRM_NODE_LOST, node->state, TRUE) == FALSE) { -- crm_notice("Node %d/%s was not seen in the previous transition", node->id, node->uname); -- crm_update_peer_state(__FUNCTION__, node, CRM_NODE_LOST, 0); -- } --} -- --static void --corosync_mark_node_unseen(gpointer key, gpointer value, gpointer user_data) --{ -- crm_node_t *node = value; -- -- node->last_seen = 0; --} -- --static void - pcmk_quorum_notification(quorum_handle_t handle, - uint32_t quorate, - uint64_t ring_id, uint32_t view_list_entries, uint32_t * view_list) - { - int i; -+ GHashTableIter iter; -+ crm_node_t *node = NULL; - static gboolean init_phase = TRUE; - - if (quorate != crm_have_quorum) { -@@ -766,14 +797,17 @@ pcmk_quorum_notification(quorum_handle_t handle, - } - - init_phase = FALSE; -- g_hash_table_foreach(crm_peer_cache, corosync_mark_node_unseen, NULL); -+ -+ g_hash_table_iter_init(&iter, crm_peer_cache); -+ while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) { -+ node->last_seen = 0; -+ } - - for (i = 0; i < view_list_entries; i++) { - uint32_t id = view_list[i]; - char *name = NULL; -- crm_node_t *node = NULL; - -- crm_debug("Member[%d] %d ", i, id); -+ crm_debug("Member[%d] %u ", i, id); - - node = crm_get_peer(id, NULL); - if (node->uname == NULL) { -@@ -787,7 +821,14 @@ pcmk_quorum_notification(quorum_handle_t handle, - } - - crm_trace("Reaping unseen nodes..."); -- g_hash_table_foreach(crm_peer_cache, corosync_mark_unseen_peer_dead, &ring_id); -+ g_hash_table_iter_init(&iter, crm_peer_cache); -+ while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) { -+ if (node->last_seen != ring_id && node->state) { -+ crm_update_peer_state(__FUNCTION__, node, CRM_NODE_LOST, 0); -+ } else if (node->last_seen != ring_id) { -+ crm_info("State of node %s[%u] is still unknown", node->uname, node->id); -+ } -+ } - - if (quorum_app_callback) { - quorum_app_callback(ring_id, quorate); -@@ -887,6 +928,8 @@ init_cs_connection(crm_cluster_t * cluster) - gboolean - init_cs_connection_once(crm_cluster_t * cluster) - { -+ const char *uuid = NULL; -+ crm_node_t *peer = NULL; - enum cluster_type_e stack = get_cluster_type(); - - crm_peer_init(); -@@ -897,23 +940,30 @@ init_cs_connection_once(crm_cluster_t * cluster) - return FALSE; - } - -- if (init_cpg_connection(cluster->cs_dispatch, cluster->destroy, &pcmk_nodeid) == FALSE) { -+ if (init_cpg_connection(cluster->cs_dispatch, cluster->destroy, NULL) == FALSE) { - return FALSE; - } -- pcmk_uname = get_local_node_name(); - crm_info("Connection to '%s': established", name_for_cluster_type(stack)); - -- CRM_ASSERT(pcmk_uname != NULL); -- pcmk_uname_len = strlen(pcmk_uname); -+ cluster->nodeid = get_local_nodeid(0); -+ if(cluster->nodeid == 0) { -+ crm_err("Could not establish local nodeid"); -+ return FALSE; -+ } - -- if (pcmk_nodeid != 0) { -- /* Ensure the local node always exists */ -- crm_get_peer(pcmk_nodeid, pcmk_uname); -+ cluster->uname = get_node_name(0); -+ if(cluster->uname == NULL) { -+ crm_err("Could not establish local node name"); -+ return FALSE; - } - -- cluster->uuid = get_corosync_uuid(pcmk_nodeid, pcmk_uname); -- cluster->uname = strdup(pcmk_uname); -- cluster->nodeid = pcmk_nodeid; -+ /* Ensure the local node always exists */ -+ peer = crm_get_peer(cluster->nodeid, cluster->uname); -+ uuid = get_corosync_uuid(peer); -+ -+ if(uuid) { -+ cluster->uuid = strdup(uuid); -+ } - - return TRUE; - } -@@ -1069,12 +1119,18 @@ corosync_initialize_nodelist(void *cluster, gboolean force_member, xmlNode * xml - - name = corosync_node_name(cmap_handle, nodeid); - if (name != NULL) { -- crm_node_t *node = g_hash_table_lookup(crm_peer_cache, name); -- -- if (node && node->id != nodeid) { -- crm_crit("Nodes %u and %u share the same name '%s': shutting down", node->id, -- nodeid, name); -- crm_exit(100); -+ GHashTableIter iter; -+ crm_node_t *node = NULL; -+ -+ g_hash_table_iter_init(&iter, crm_peer_cache); -+ while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) { -+ if(node->uname && strcasecmp(node->uname, name) == 0) { -+ if (node && node->id && node->id != nodeid) { -+ crm_crit("Nodes %u and %u share the same name '%s': shutting down", node->id, -+ nodeid, name); -+ crm_exit(DAEMON_RESPAWN_STOP); -+ } -+ } - } - } - -diff --git a/lib/cluster/heartbeat.c b/lib/cluster/heartbeat.c -index 2dda61b..a801c8e 100644 ---- a/lib/cluster/heartbeat.c -+++ b/lib/cluster/heartbeat.c -@@ -1,16 +1,16 @@ --/* -+/* - * Copyright (C) 2004 Andrew Beekhof -- * -+ * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. -- * -+ * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. -- * -+ * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -@@ -110,7 +110,7 @@ convert_ha_field(xmlNode * parent, void *msg_v, int lpc) - memset(uncompressed, 0, size); - used = size - 1; /* always leave room for a trailing '\0' - * BZ2_bzBuffToBuffDecompress wont say anything if -- * the uncompressed data is exactly 'size' bytes -+ * the uncompressed data is exactly 'size' bytes - */ - - rc = BZ2_bzBuffToBuffDecompress(uncompressed, &used, compressed, orig_len, 1, 0); -@@ -300,7 +300,7 @@ convert_xml_child(HA_Message * msg, xmlNode * xml) - ); - crm_debug("rc=%d, used=%d", rc, used); - if (rc != BZ_OK) { -- crm_exit(100); -+ crm_exit(DAEMON_RESPAWN_STOP); - } - crm_debug("Original %s, decompressed %s", buffer, uncompressed); - free(uncompressed); -@@ -368,8 +368,11 @@ crm_update_ccm_node(const oc_ev_membership_t * oc, int offset, const char *state - const char *uuid = NULL; - - CRM_CHECK(oc->m_array[offset].node_uname != NULL, return NULL); -- uuid = get_uuid(oc->m_array[offset].node_uname); -- peer = crm_update_peer(__FUNCTION__, oc->m_array[offset].node_id, -+ -+ peer = crm_get_peer(0, oc->m_array[offset].node_uname); -+ uuid = crm_peer_uuid(peer); -+ -+ crm_update_peer(__FUNCTION__, oc->m_array[offset].node_id, - oc->m_array[offset].node_born_on, seq, -1, 0, - uuid, oc->m_array[offset].node_uname, NULL, state); - -@@ -402,10 +405,13 @@ send_ha_message(ll_cluster_t * hb_conn, xmlNode * xml, const char *node, gboolea - all_is_good = FALSE; - - } else if (node != NULL) { -- if (hb_conn->llc_ops->send_ordered_nodemsg(hb_conn, msg, node) != HA_OK) { -+ char *host_lowercase = g_ascii_strdown(node, -1); -+ -+ if (hb_conn->llc_ops->send_ordered_nodemsg(hb_conn, msg, host_lowercase) != HA_OK) { - all_is_good = FALSE; - crm_err("Send failed"); - } -+ free(host_lowercase); - - } else if (force_ordered) { - if (hb_conn->llc_ops->send_ordered_clustermsg(hb_conn, msg) != HA_OK) { -@@ -482,6 +488,7 @@ ha_msg_dispatch(ll_cluster_t * cluster_conn, gpointer user_data) - gboolean - register_heartbeat_conn(crm_cluster_t * cluster) - { -+ crm_node_t *peer = NULL; - const char *const_uuid = NULL; - const char *const_uname = NULL; - -@@ -516,7 +523,9 @@ register_heartbeat_conn(crm_cluster_t * cluster) - const_uname = cluster->hb_conn->llc_ops->get_mynodeid(cluster->hb_conn); - CRM_CHECK(const_uname != NULL, return FALSE); - -- const_uuid = get_uuid(const_uname); -+ peer = crm_get_peer(0, const_uname); -+ const_uuid = crm_peer_uuid(peer); -+ - CRM_CHECK(const_uuid != NULL, return FALSE); - - crm_info("Hostname: %s", const_uname); -@@ -578,6 +587,7 @@ heartbeat_initialize_nodelist(void *cluster, gboolean force_member, xmlNode * xm - - do { - xmlNode *node = NULL; -+ crm_node_t *peer = NULL; - const char *ha_node_type = NULL; - const char *ha_node_uuid = NULL; - -@@ -592,7 +602,9 @@ heartbeat_initialize_nodelist(void *cluster, gboolean force_member, xmlNode * xm - continue; - } - -- ha_node_uuid = get_uuid(ha_node); -+ peer = crm_get_peer(0, ha_node); -+ ha_node_uuid = crm_peer_uuid(peer); -+ - if (ha_node_uuid == NULL) { - crm_warn("Node %s: no uuid found", ha_node); - continue; -diff --git a/lib/cluster/legacy.c b/lib/cluster/legacy.c -index 05394d9..14749e4 100644 ---- a/lib/cluster/legacy.c -+++ b/lib/cluster/legacy.c -@@ -48,9 +48,6 @@ struct cpg_name pcmk_cpg_group = { - cman_handle_t pcmk_cman_handle = NULL; - #endif - --static char *pcmk_uname = NULL; --static int pcmk_uname_len = 0; --static uint32_t pcmk_nodeid = 0; - int ais_membership_timer = 0; - gboolean ais_membership_force = FALSE; - int ais_dispatch(gpointer user_data); -@@ -101,7 +98,7 @@ text2msg_type(const char *text) - */ - int scan_rc = sscanf(text, "%d", &type); - -- if (scan_rc != 1) { -+ if (scan_rc != 1 || type <= crm_msg_stonith_ng) { - /* Ensure its sane */ - type = crm_msg_none; - } -@@ -140,10 +137,9 @@ int ais_fd_async = -1; /* never send messages via this channel */ - void *ais_ipc_ctx = NULL; - - hdb_handle_t ais_ipc_handle = 0; --static char *ais_cluster_name = NULL; - --gboolean --get_ais_nodeid(uint32_t * id, char **uname) -+static gboolean -+get_ais_details(uint32_t * id, char **uname) - { - struct iovec iov; - int retries = 0; -@@ -151,6 +147,15 @@ get_ais_nodeid(uint32_t * id, char **uname) - cs_ipc_header_response_t header; - struct crm_ais_nodeid_resp_s answer; - -+ static uint32_t local_id = 0; -+ static char *local_uname = NULL; -+ -+ if(local_id) { -+ if(id) *id = local_id; -+ if(uname) *uname = strdup(local_uname); -+ return TRUE; -+ } -+ - header.error = CS_OK; - header.id = crm_class_nodeid; - header.size = sizeof(cs_ipc_header_response_t); -@@ -190,47 +195,225 @@ get_ais_nodeid(uint32_t * id, char **uname) - - crm_info("Server details: id=%u uname=%s cname=%s", answer.id, answer.uname, answer.cname); - -- *id = answer.id; -- *uname = strdup(answer.uname); -- ais_cluster_name = strdup(answer.cname); -+ local_id = answer.id; -+ local_uname = strdup(answer.uname); - -+ if(id) *id = local_id; -+ if(uname) *uname = strdup(local_uname); - return TRUE; - } - --gboolean --crm_get_cluster_name(char **cname) -+static uint32_t get_local_nodeid(cpg_handle_t handle) - { -- CRM_CHECK(cname != NULL, return FALSE); -- if (ais_cluster_name) { -- *cname = strdup(ais_cluster_name); -- return TRUE; -+ int rc = CS_OK; -+ int retries = 0; -+ static uint32_t local_nodeid = 0; -+ cpg_handle_t local_handle = handle; -+ cpg_callbacks_t cb = { }; -+ -+ if(local_nodeid != 0) { -+ return local_nodeid; -+ } -+ -+#if 0 -+ /* Should not be necessary */ -+ if(get_cluster_type() == pcmk_cluster_classic_ais) { -+ get_ais_details(&local_nodeid, NULL); -+ goto done; -+ } -+#endif -+ -+ if(local_handle == 0) { -+ crm_trace("Creating connection"); -+ cs_repeat(retries, 5, rc = cpg_initialize(&local_handle, &cb)); -+ } -+ -+ if (rc == CS_OK) { -+ retries = 0; -+ crm_trace("Performing lookup"); -+ cs_repeat(retries, 5, rc = cpg_local_get(local_handle, &local_nodeid)); - } -+ -+ if (rc != CS_OK) { -+ crm_err("Could not get local node id from the CPG API: %s (%d)", ais_error2text(rc), rc); -+ } -+ -+ if(handle != local_handle) { -+ crm_trace("Closing connection %u", local_handle); -+ cpg_finalize(local_handle); -+ } -+ -+ crm_debug("Local nodeid is %u", local_nodeid); -+ return local_nodeid; -+} -+ -+GListPtr cs_message_queue = NULL; -+int cs_message_timer = 0; -+ -+static ssize_t crm_cs_flush(void); -+ -+static gboolean -+crm_cs_flush_cb(gpointer data) -+{ -+ cs_message_timer = 0; -+ crm_cs_flush(); - return FALSE; - } - -+#define CS_SEND_MAX 200 -+static ssize_t -+crm_cs_flush(void) -+{ -+ int sent = 0; -+ ssize_t rc = 0; -+ int queue_len = 0; -+ static unsigned int last_sent = 0; -+ -+ if (pcmk_cpg_handle == 0) { -+ crm_trace("Connection is dead"); -+ return pcmk_ok; -+ } -+ -+ queue_len = g_list_length(cs_message_queue); -+ if ((queue_len % 1000) == 0 && queue_len > 1) { -+ crm_err("CPG queue has grown to %d", queue_len); -+ -+ } else if (queue_len == CS_SEND_MAX) { -+ crm_warn("CPG queue has grown to %d", queue_len); -+ } -+ -+ if (cs_message_timer) { -+ /* There is already a timer, wait until it goes off */ -+ crm_trace("Timer active %d", cs_message_timer); -+ return pcmk_ok; -+ } -+ -+ while (cs_message_queue && sent < CS_SEND_MAX) { -+ AIS_Message *header = NULL; -+ struct iovec *iov = cs_message_queue->data; -+ -+ errno = 0; -+ rc = cpg_mcast_joined(pcmk_cpg_handle, CPG_TYPE_AGREED, iov, 1); -+ -+ if (rc != CS_OK) { -+ break; -+ } -+ -+ sent++; -+ header = iov->iov_base; -+ last_sent = header->id; -+ if (header->compressed_size) { -+ crm_trace("CPG message %d (%d compressed bytes) sent", -+ header->id, header->compressed_size); -+ } else { -+ crm_trace("CPG message %d (%d bytes) sent: %.200s", -+ header->id, header->size, header->data); -+ } -+ -+ cs_message_queue = g_list_remove(cs_message_queue, iov); -+ free(iov[0].iov_base); -+ free(iov); -+ } -+ -+ queue_len -= sent; -+ if (sent > 1 || cs_message_queue) { -+ crm_info("Sent %d CPG messages (%d remaining, last=%u): %s (%d)", -+ sent, queue_len, last_sent, ais_error2text(rc), rc); -+ } else { -+ crm_trace("Sent %d CPG messages (%d remaining, last=%u): %s (%d)", -+ sent, queue_len, last_sent, ais_error2text(rc), rc); -+ } -+ -+ if (cs_message_queue) { -+ uint32_t delay_ms = 100; -+ if(rc != CS_OK) { -+ /* Proportionally more if sending failed but cap at 1s */ -+ delay_ms = QB_MIN(1000, CS_SEND_MAX + (10 * queue_len)); -+ } -+ cs_message_timer = g_timeout_add(delay_ms, crm_cs_flush_cb, NULL); -+ } -+ -+ return rc; -+} -+ -+static bool -+send_plugin_text(int class, struct iovec *iov) -+{ -+ int rc = CS_OK; -+ int retries = 0; -+ int buf_len = sizeof(cs_ipc_header_response_t); -+ char *buf = malloc(buf_len); -+ AIS_Message *ais_msg = (AIS_Message*)iov[0].iov_base; -+ cs_ipc_header_response_t *header = (cs_ipc_header_response_t *) buf; -+ -+ /* There are only 6 handlers registered to crm_lib_service in plugin.c */ -+ CRM_CHECK(class < 6, crm_err("Invalid message class: %d", class); -+ return FALSE); -+ -+ do { -+ if (rc == CS_ERR_TRY_AGAIN || rc == CS_ERR_QUEUE_FULL) { -+ retries++; -+ crm_info("Peer overloaded or membership in flux:" -+ " Re-sending message (Attempt %d of 20)", retries); -+ sleep(retries); /* Proportional back off */ -+ } -+ -+ errno = 0; -+ rc = coroipcc_msg_send_reply_receive(ais_ipc_handle, iov, 1, buf, buf_len); -+ -+ } while ((rc == CS_ERR_TRY_AGAIN || rc == CS_ERR_QUEUE_FULL) && retries < 20); -+ -+ if (rc == CS_OK) { -+ CRM_CHECK(header->size == sizeof(cs_ipc_header_response_t), -+ crm_err("Odd message: id=%d, size=%d, class=%d, error=%d", -+ header->id, header->size, class, header->error)); -+ -+ CRM_ASSERT(buf_len >= header->size); -+ CRM_CHECK(header->id == CRM_MESSAGE_IPC_ACK, -+ crm_err("Bad response id (%d) for request (%d)", header->id, -+ ais_msg->header.id)); -+ CRM_CHECK(header->error == CS_OK, rc = header->error); -+ -+ } else { -+ crm_perror(LOG_ERR, "Sending plugin message %d FAILED: %s (%d)", -+ ais_msg->id, ais_error2text(rc), rc); -+ } -+ -+ free(iov[0].iov_base); -+ free(iov); -+ free(buf); -+ -+ return (rc == CS_OK); -+} -+ - gboolean - send_ais_text(int class, const char *data, - gboolean local, crm_node_t * node, enum crm_ais_msg_types dest) - { - static int msg_id = 0; - static int local_pid = 0; -- enum cluster_type_e cluster_type = get_cluster_type(); -+ static int local_name_len = 0; -+ static const char *local_name = NULL; - -- int retries = 0; -- int rc = CS_OK; -- int buf_len = sizeof(cs_ipc_header_response_t); -- -- char *buf = NULL; -- struct iovec iov; -- const char *transport = "pcmk"; -- cs_ipc_header_response_t *header = NULL; -+ char *target = NULL; -+ struct iovec *iov; - AIS_Message *ais_msg = NULL; -+ enum cluster_type_e cluster_type = get_cluster_type(); - enum crm_ais_msg_types sender = text2msg_type(crm_system_name); - - /* There are only 6 handlers registered to crm_lib_service in plugin.c */ - CRM_CHECK(class < 6, crm_err("Invalid message class: %d", class); - return FALSE); - -+ CRM_CHECK(dest != crm_msg_ais, return FALSE); -+ -+ if(local_name == NULL) { -+ local_name = get_local_node_name(); -+ } -+ if(local_name_len == 0 && local_name) { -+ local_name_len = strlen(local_name); -+ } -+ - if (data == NULL) { - data = ""; - } -@@ -254,140 +437,80 @@ send_ais_text(int class, const char *data, - - if (node) { - if (node->uname) { -+ target = strdup(node->uname); - ais_msg->host.size = strlen(node->uname); - memset(ais_msg->host.uname, 0, MAX_NAME); - memcpy(ais_msg->host.uname, node->uname, ais_msg->host.size); -+ } else { -+ target = g_strdup_printf("%u", node->id); - } - ais_msg->host.id = node->id; -+ } else { -+ target = strdup("all"); - } - - ais_msg->sender.id = 0; - ais_msg->sender.type = sender; - ais_msg->sender.pid = local_pid; -- ais_msg->sender.size = pcmk_uname_len; -+ ais_msg->sender.size = local_name_len; - memset(ais_msg->sender.uname, 0, MAX_NAME); -- memcpy(ais_msg->sender.uname, pcmk_uname, ais_msg->sender.size); -+ memcpy(ais_msg->sender.uname, local_name, ais_msg->sender.size); - - ais_msg->size = 1 + strlen(data); -+ ais_msg->header.size = sizeof(AIS_Message) + ais_msg->size; - - if (ais_msg->size < CRM_BZ2_THRESHOLD) { -- failback: -- ais_msg = realloc(ais_msg, sizeof(AIS_Message) + ais_msg->size); -+ ais_msg = realloc(ais_msg, ais_msg->header.size); - memcpy(ais_msg->data, data, ais_msg->size); - - } else { - char *compressed = NULL; -+ unsigned int new_size = 0; - char *uncompressed = strdup(data); -- unsigned int len = (ais_msg->size * 1.1) + 600; /* recomended size */ - -- crm_trace("Compressing message payload"); -- compressed = malloc(len); -+ if (crm_compress_string(uncompressed, ais_msg->size, 0, &compressed, &new_size)) { - -- rc = BZ2_bzBuffToBuffCompress(compressed, &len, uncompressed, ais_msg->size, CRM_BZ2_BLOCKS, -- 0, CRM_BZ2_WORK); -+ ais_msg->header.size = sizeof(AIS_Message) + new_size + 1; -+ ais_msg = realloc(ais_msg, ais_msg->header.size); -+ memcpy(ais_msg->data, compressed, new_size); -+ ais_msg->data[new_size] = 0; - -- free(uncompressed); -+ ais_msg->is_compressed = TRUE; -+ ais_msg->compressed_size = new_size; - -- if (rc != BZ_OK) { -- crm_err("Compression failed: %d", rc); -- free(compressed); -- goto failback; -+ } else { -+ ais_msg = realloc(ais_msg, ais_msg->header.size); -+ memcpy(ais_msg->data, data, ais_msg->size); - } - -- ais_msg = realloc(ais_msg, sizeof(AIS_Message) + len + 1); -- memcpy(ais_msg->data, compressed, len); -- ais_msg->data[len] = 0; -+ free(uncompressed); - free(compressed); -- -- ais_msg->is_compressed = TRUE; -- ais_msg->compressed_size = len; -- -- crm_trace("Compression details: %d -> %d", ais_msg->size, ais_data_len(ais_msg)); - } - -- ais_msg->header.size = sizeof(AIS_Message) + ais_data_len(ais_msg); -- -- crm_trace("Sending%s message %d to %s.%s (data=%d, total=%d)", -- ais_msg->is_compressed ? " compressed" : "", -- ais_msg->id, ais_dest(&(ais_msg->host)), msg_type2text(dest), -- ais_data_len(ais_msg), ais_msg->header.size); -- -- iov.iov_base = ais_msg; -- iov.iov_len = ais_msg->header.size; -- buf = realloc(buf, buf_len); -- -- do { -- if (rc == CS_ERR_TRY_AGAIN || rc == CS_ERR_QUEUE_FULL) { -- retries++; -- crm_info("Peer overloaded or membership in flux:" -- " Re-sending message (Attempt %d of 20)", retries); -- sleep(retries); /* Proportional back off */ -- } -- -- errno = 0; -- switch (cluster_type) { -- case pcmk_cluster_corosync: -- CRM_ASSERT(FALSE /*Not supported here */ ); -- break; -- -- case pcmk_cluster_classic_ais: -- rc = coroipcc_msg_send_reply_receive(ais_ipc_handle, &iov, 1, buf, buf_len); -- header = (cs_ipc_header_response_t *) buf; -- if (rc == CS_OK) { -- CRM_CHECK(header->size == sizeof(cs_ipc_header_response_t), -- crm_err("Odd message: id=%d, size=%d, class=%d, error=%d", -- header->id, header->size, class, header->error)); -- -- CRM_ASSERT(buf_len >= header->size); -- CRM_CHECK(header->id == CRM_MESSAGE_IPC_ACK, -- crm_err("Bad response id (%d) for request (%d)", header->id, -- ais_msg->header.id)); -- CRM_CHECK(header->error == CS_OK, rc = header->error); -- } -- break; -- -- case pcmk_cluster_cman: -- transport = "cpg"; -- CRM_CHECK(dest != crm_msg_ais, rc = CS_ERR_MESSAGE_ERROR; -- goto bail); -- rc = cpg_mcast_joined(pcmk_cpg_handle, CPG_TYPE_AGREED, &iov, 1); -- if (rc == CS_ERR_TRY_AGAIN || rc == CS_ERR_QUEUE_FULL) { -- cpg_flow_control_state_t fc_state = CPG_FLOW_CONTROL_DISABLED; -- int rc2 = cpg_flow_control_state_get(pcmk_cpg_handle, &fc_state); -- -- if (rc2 == CS_OK && fc_state == CPG_FLOW_CONTROL_ENABLED) { -- crm_warn("Connection overloaded, cannot send messages"); -- goto bail; -- -- } else if (rc2 != CS_OK) { -- crm_warn("Could not determin the connection state: %s (%d)", -- ais_error2text(rc2), rc2); -- goto bail; -- } -- } -- break; -- -- case pcmk_cluster_unknown: -- case pcmk_cluster_invalid: -- case pcmk_cluster_heartbeat: -- CRM_ASSERT(is_openais_cluster()); -- break; -- } -- -- } while ((rc == CS_ERR_TRY_AGAIN || rc == CS_ERR_QUEUE_FULL) && retries < 20); -- -- bail: -- if (rc != CS_OK) { -- crm_perror(LOG_ERR, "Sending message %d via %s: FAILED (rc=%d): %s", -- ais_msg->id, transport, rc, ais_error2text(rc)); -+ iov = calloc(1, sizeof(struct iovec)); -+ iov->iov_base = ais_msg; -+ iov->iov_len = ais_msg->header.size; - -+ if (ais_msg->compressed_size) { -+ crm_trace("Queueing %s message %u to %s (%d compressed bytes)", -+ cluster_type == pcmk_cluster_classic_ais?"plugin":"CPG", -+ ais_msg->id, target, ais_msg->compressed_size); - } else { -- crm_trace("Message %d: sent", ais_msg->id); -+ crm_trace("Queueing %s message %u to %s (%d bytes)", -+ cluster_type == pcmk_cluster_classic_ais?"plugin":"CPG", -+ ais_msg->id, target, ais_msg->size); - } - -- free(buf); -- free(ais_msg); -- return (rc == CS_OK); -+ /* The plugin is the only time we dont use CPG messaging */ -+ if(cluster_type == pcmk_cluster_classic_ais) { -+ return send_plugin_text(class, iov); -+ } -+ -+ cs_message_queue = g_list_append(cs_message_queue, iov); -+ crm_cs_flush(); -+ -+ free(target); -+ return TRUE; - } - - gboolean -@@ -427,6 +550,7 @@ terminate_cs_connection(void) - if (pcmk_cpg_handle) { - crm_info("Disconnecting CPG"); - if (cpg_leave(pcmk_cpg_handle, &pcmk_cpg_group) == CS_OK) { -+ crm_info("Destroying CPG"); - cpg_finalize(pcmk_cpg_handle); - } - pcmk_cpg_handle = 0; -@@ -441,6 +565,7 @@ terminate_cs_connection(void) - if (pcmk_cman_handle) { - crm_info("Disconnecting cman"); - if (cman_stop_notification(pcmk_cman_handle) >= 0) { -+ crm_info("Destroying cman"); - cman_finish(pcmk_cman_handle); - } - -@@ -635,7 +760,7 @@ ais_destroy(gpointer user_data) - { - crm_err("AIS connection terminated"); - ais_fd_sync = -1; -- crm_exit(1); -+ crm_exit(ENOTCONN); - } - - # if SUPPORT_CMAN -@@ -744,13 +869,6 @@ init_cman_connection(gboolean(*dispatch) (unsigned long long, gboolean), void (* - goto cman_bail; - } - -- rc = cman_get_cluster(pcmk_cman_handle, &cluster); -- if (rc < 0) { -- crm_err("Couldn't query cman cluster details: %d %d", rc, errno); -- goto cman_bail; -- } -- ais_cluster_name = strdup(cluster.ci_name); -- - rc = cman_start_notification(pcmk_cman_handle, cman_event_callback); - if (rc < 0) { - crm_err("Couldn't register for cman notifications: %d %d", rc, errno); -@@ -772,7 +890,7 @@ init_cman_connection(gboolean(*dispatch) (unsigned long long, gboolean), void (* - } - # else - crm_err("cman qorum is not supported in this build"); -- crm_exit(100); -+ crm_exit(DAEMON_RESPAWN_STOP); - # endif - return TRUE; - } -@@ -806,15 +924,18 @@ pcmk_cpg_deliver(cpg_handle_t handle, - uint32_t nodeid, uint32_t pid, void *msg, size_t msg_len) - { - AIS_Message *ais_msg = (AIS_Message *) msg; -+ uint32_t local_nodeid = get_local_nodeid(handle); -+ const char *local_name = get_local_node_name(); - - if (ais_msg->sender.id > 0 && ais_msg->sender.id != nodeid) { - crm_err("Nodeid mismatch from %d.%d: claimed nodeid=%u", nodeid, pid, ais_msg->sender.id); - return; - -- } else if (ais_msg->host.size != 0 && safe_str_neq(ais_msg->host.uname, pcmk_uname)) { -+ } else if (ais_msg->host.id != 0 && (local_nodeid != ais_msg->host.id)) { - /* Not for us */ - return; -- } else if (ais_msg->host.id != 0 && (pcmk_nodeid != ais_msg->host.id)) { -+ -+ } else if (ais_msg->host.size != 0 && safe_str_neq(ais_msg->host.uname, local_name)) { - /* Not for us */ - return; - } -@@ -850,6 +971,7 @@ pcmk_cpg_membership(cpg_handle_t handle, - int i; - gboolean found = FALSE; - static int counter = 0; -+ uint32_t local_nodeid = get_local_nodeid(handle); - - for (i = 0; i < left_list_entries; i++) { - crm_node_t *peer = crm_get_peer(left_list[i].nodeid, NULL); -@@ -867,7 +989,7 @@ pcmk_cpg_membership(cpg_handle_t handle, - - crm_info("Member[%d.%d] %s.%u ", counter, i, groupName->value, member_list[i].nodeid); - crm_update_peer_proc(__FUNCTION__, peer, crm_proc_cpg, ONLINESTATUS); -- if (pcmk_nodeid == member_list[i].nodeid) { -+ if (local_nodeid == member_list[i].nodeid) { - found = TRUE; - } - } -@@ -938,12 +1060,12 @@ init_cpg_connection(crm_cluster_t * cluster) - return FALSE; - } - -- peer = crm_get_peer(cluster->nodeid, pcmk_uname); -+ peer = crm_get_peer(cluster->nodeid, NULL); - crm_update_peer_proc(__FUNCTION__, peer, crm_proc_cpg, ONLINESTATUS); - - # else - crm_err("The Corosync CPG API is not supported in this build"); -- crm_exit(100); -+ crm_exit(DAEMON_RESPAWN_STOP); - # endif - return TRUE; - } -@@ -953,7 +1075,7 @@ init_quorum_connection(gboolean(*dispatch) (unsigned long long, gboolean), - void (*destroy) (gpointer)) - { - crm_err("The Corosync quorum API is not supported in this build"); -- crm_exit(100); -+ crm_exit(DAEMON_RESPAWN_STOP); - return TRUE; - } - -@@ -963,7 +1085,7 @@ init_cs_connection_classic(crm_cluster_t * cluster) - int rc; - int pid = 0; - char *pid_s = NULL; -- struct utsname name; -+ const char *name = NULL; - - struct mainloop_fd_callbacks ais_fd_callbacks = { - .dispatch = ais_dispatch, -@@ -1007,22 +1129,18 @@ init_cs_connection_classic(crm_cluster_t * cluster) - send_ais_text(crm_class_cluster, pid_s, TRUE, NULL, crm_msg_ais); - free(pid_s); - -- if (uname(&name) < 0) { -- crm_perror(LOG_ERR, "Could not determin the current host"); -- crm_exit(100); -- } -+ cluster->nodeid = get_local_nodeid(0); - -- get_ais_nodeid(&pcmk_nodeid, &pcmk_uname); -- if (safe_str_neq(name.nodename, pcmk_uname)) { -- crm_crit("Node name mismatch! Corosync supplied %s, our lookup returned %s", -- pcmk_uname, name.nodename); -+ name = get_local_node_name(); -+ get_ais_details(NULL, &(cluster->uname)); -+ if (safe_str_neq(name, cluster->uname)) { -+ crm_crit("Node name mismatch! Corosync supplied %s but our lookup returned %s", -+ cluster->uname, name); - crm_notice - ("Node name mismatches usually occur when assigned automatically by DHCP servers"); -- crm_notice("If this node was part of the cluster with a different name," -- " you will need to remove the old entry with crm_node --remove"); -+ crm_exit(ENOTUNIQ); - } -- -- cluster->nodeid = pcmk_nodeid; -+ - - return TRUE; - } -@@ -1080,10 +1198,9 @@ init_cs_connection(crm_cluster_t * cluster) - int rc = init_cs_connection_once(cluster); - - retries++; -- - switch (rc) { - case CS_OK: -- if (getenv("HA_mcp")) { -+ if (getenv("HA_mcp") && get_cluster_type() != pcmk_cluster_cman) { - xmlNode *poke = create_xml_node(NULL, "poke"); - mainloop_io_t *ipc = - mainloop_add_ipc_client(CRM_SYSTEM_MCP, G_PRIORITY_MEDIUM, 0, -@@ -1144,6 +1261,8 @@ extern int set_cluster_type(enum cluster_type_e type); - gboolean - init_cs_connection_once(crm_cluster_t * cluster) - { -+ const char *uuid = NULL; -+ crm_node_t *peer = NULL; - enum cluster_type_e stack = get_cluster_type(); - - crm_peer_init(); -@@ -1159,7 +1278,7 @@ init_cs_connection_once(crm_cluster_t * cluster) - if (init_cpg_connection(cluster) == FALSE) { - return FALSE; - } -- pcmk_uname = cman_node_name(0 /* CMAN_NODEID_US */ ); -+ cluster->uname = cman_node_name(0 /* CMAN_NODEID_US */ ); - break; - case pcmk_cluster_heartbeat: - crm_info("Could not find an active corosync based cluster"); -@@ -1173,17 +1292,25 @@ init_cs_connection_once(crm_cluster_t * cluster) - - crm_info("Connection to '%s': established", name_for_cluster_type(stack)); - -- CRM_ASSERT(pcmk_uname != NULL); -- pcmk_uname_len = strlen(pcmk_uname); -+ cluster->nodeid = get_local_nodeid(0); -+ if(cluster->nodeid == 0) { -+ crm_err("Could not establish local nodeid"); -+ return FALSE; -+ } - -- pcmk_nodeid = cluster->nodeid; -- if (pcmk_nodeid != 0) { -- /* Ensure the local node always exists */ -- crm_get_peer(pcmk_nodeid, pcmk_uname); -+ cluster->uname = get_node_name(0); -+ if(cluster->uname == NULL) { -+ crm_err("Could not establish local node name"); -+ return FALSE; - } - -- cluster->uuid = get_corosync_uuid(pcmk_nodeid, pcmk_uname); -- cluster->uname = strdup(pcmk_uname); -+ /* Ensure the local node always exists */ -+ peer = crm_get_peer(cluster->nodeid, cluster->uname); -+ uuid = get_corosync_uuid(peer); -+ -+ if(uuid) { -+ cluster->uuid = strdup(uuid); -+ } - - return TRUE; - } -diff --git a/lib/cluster/membership.c b/lib/cluster/membership.c -index b9ca70c..add069c 100644 ---- a/lib/cluster/membership.c -+++ b/lib/cluster/membership.c -@@ -32,7 +32,6 @@ - #include - #include - --GHashTable *crm_peer_id_cache = NULL; - GHashTable *crm_peer_cache = NULL; - unsigned long long crm_peer_seq = 0; - gboolean crm_have_quorum = FALSE; -@@ -40,6 +39,9 @@ gboolean crm_have_quorum = FALSE; - gboolean - crm_is_peer_active(const crm_node_t * node) - { -+ if(node == NULL) { -+ return FALSE; -+ } - #if SUPPORT_COROSYNC - if (is_openais_cluster()) { - return crm_is_corosync_peer_active(node); -@@ -80,41 +82,24 @@ guint - reap_crm_member(uint32_t id, const char *name) - { - int matches = 0; -- crm_node_t *node = NULL; -+ crm_node_t search; - -- if (crm_peer_cache == NULL || crm_peer_id_cache == NULL) { -+ if (crm_peer_cache == NULL) { - crm_trace("Nothing to do, cache not initialized"); - return 0; - } - -- if (name) { -- node = g_hash_table_lookup(crm_peer_cache, name); -- } -- -- if (node == NULL && id > 0) { -- node = g_hash_table_lookup(crm_peer_id_cache, GUINT_TO_POINTER(id)); -- } -- -- if (node == NULL) { -- crm_info("Peer %u/%s cannot be purged: does not exist", id, name); -- return 0; -- } -- -- if (crm_is_peer_active(node)) { -- crm_warn("Peer %u/%s cannot be purged: still active", id, name); -+ search.id = id; -+ search.uname = strdup(name); -+ matches = g_hash_table_foreach_remove(crm_peer_cache, crm_reap_dead_member, &search); -+ if(matches) { -+ crm_notice("Purged %d peers with id=%u and/or uname=%s from the membership cache", matches, id, name); - - } else { -- if (g_hash_table_remove(crm_peer_id_cache, GUINT_TO_POINTER(id))) { -- crm_notice("Purged dead peer %u/%s from the uuid cache", id, name); -- -- } else if (id) { -- crm_warn("Peer %u/%s was not found in the ID cache", id, name); -- } -- -- matches = g_hash_table_foreach_remove(crm_peer_cache, crm_reap_dead_member, node); -- crm_notice("Purged %d dead peers with id=%u from the membership cache", matches, id); -+ crm_info("No peers with id=%u and/or uname=%s exist", id, name); - } - -+ free(search.uname); - return matches; - } - -@@ -151,6 +136,7 @@ destroy_crm_node(gpointer data) - free(node->uname); - free(node->state); - free(node->uuid); -+ free(node->expected); - free(node); - } - -@@ -166,11 +152,7 @@ crm_peer_init(void) - - crm_peer_destroy(); - if (crm_peer_cache == NULL) { -- crm_peer_cache = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, destroy_crm_node); -- } -- -- if (crm_peer_id_cache == NULL) { -- crm_peer_id_cache = g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL, NULL); -+ crm_peer_cache = g_hash_table_new_full(crm_str_hash, g_str_equal, free, destroy_crm_node); - } - } - -@@ -178,14 +160,10 @@ void - crm_peer_destroy(void) - { - if (crm_peer_cache != NULL) { -+ crm_trace("Destroying peer cache with %d members", g_hash_table_size(crm_peer_cache)); - g_hash_table_destroy(crm_peer_cache); - crm_peer_cache = NULL; - } -- -- if (crm_peer_id_cache != NULL) { -- g_hash_table_destroy(crm_peer_id_cache); -- crm_peer_id_cache = NULL; -- } - } - - void (*crm_status_callback) (enum crm_status_type, crm_node_t *, const void *) = NULL; -@@ -200,73 +178,114 @@ crm_set_status_callback(void (*dispatch) (enum crm_status_type, crm_node_t *, co - crm_node_t * - crm_get_peer(unsigned int id, const char *uname) - { -+ GHashTableIter iter; - crm_node_t *node = NULL; -+ crm_node_t *by_id = NULL; -+ crm_node_t *by_name = NULL; - - CRM_ASSERT(id > 0 || uname != NULL); - - crm_peer_init(); - -- if (node == NULL && uname != NULL) { -- node = g_hash_table_lookup(crm_peer_cache, uname); -+ if (uname != NULL) { -+ g_hash_table_iter_init(&iter, crm_peer_cache); -+ while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) { -+ if(node->uname && strcasecmp(node->uname, uname) == 0) { -+ crm_trace("Name match: %s", node->uname); -+ by_name = node; -+ break; -+ } -+ } -+ } -+ -+ if (id > 0) { -+ g_hash_table_iter_init(&iter, crm_peer_cache); -+ while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) { -+ if(node->id == id) { -+ crm_trace("ID match: %u", node->id); -+ by_id = node; -+ break; -+ } -+ } - } - -- if (node == NULL && id > 0) { -- node = g_hash_table_lookup(crm_peer_id_cache, GUINT_TO_POINTER(id)); -+ node = by_id; /* Good default */ -+ if(by_id == by_name) { -+ /* Nothing to do if they match (both NULL counts) */ -+ crm_trace("Consistent: %p for %u/%s", by_id, id, uname); -+ -+ } else if(by_name) { -+ crm_trace("Only one: %p for %u/%s", by_name, id, uname); - -- if (node && node->uname && uname) { -- crm_crit("Node %s and %s share the same cluster node id '%u'!", node->uname, uname, id); -+ if(id && by_name->id) { -+ crm_crit("Node %u and %u share the same name '%s'", -+ id, by_name->id, uname); -+ node = NULL; /* Create a new one */ - -- /* NOTE: Calling crm_new_peer() means the entry in -- * crm_peer_id_cache will point to the new entity -- * -- * TO-DO: Replace the old uname instead? -- */ -- node = NULL; -+ } else { -+ node = by_name; - } -+ -+ } else if(by_id) { -+ crm_trace("Only one: %p for %u/%s", by_id, id, uname); -+ -+ if(uname && by_id->uname) { -+ crm_crit("Node '%s' and '%s' share the same cluster nodeid %u: assuming '%s' is correct", -+ uname, by_id->uname, id, uname); -+ } -+ -+ } else if(uname && by_id->uname) { -+ crm_warn("Node '%s' and '%s' share the same cluster nodeid: %u", by_id->uname, by_name->uname, id); -+ -+ } else if(id && by_name->id) { -+ crm_warn("Node %u and %u share the same name: '%s'", by_id->id, by_name->id, uname); -+ -+ } else { -+ /* Simple merge */ -+ -+ /* Only corosync based clusters use nodeid's -+ * The functions that call crm_update_peer_state() only know nodeid so 'by_id' is authorative when merging -+ * Same for crm_update_peer_proc() -+ */ -+ -+ crm_info("Merging %p into %p", by_name, by_id); -+ g_hash_table_remove(crm_peer_cache, by_name); - } - - if (node == NULL) { -- crm_debug("Creating entry for node %s/%u", uname, id); -+ char *uniqueid = crm_generate_uuid(); - - node = calloc(1, sizeof(crm_node_t)); - CRM_ASSERT(node); -+ -+ crm_info("Created entry %s/%p for node %s/%u (%d total)", -+ uniqueid, node, uname, id, 1 + g_hash_table_size(crm_peer_cache)); -+ g_hash_table_replace(crm_peer_cache, uniqueid, node); - } - -- if (id > 0 && node->id != id) { -- crm_node_t *old = g_hash_table_lookup(crm_peer_id_cache, GUINT_TO_POINTER(id)); -+ if(id > 0 && uname && (node->id == 0 || node->uname == NULL)) { -+ crm_info("Node %u is now known as %s", id, uname); -+ } - -+ if(id > 0 && node->id == 0) { - node->id = id; -- crm_info("Node %s now has id: %u", crm_str(uname), id); -- if (old && old->state) { -- /* Only corosync based clusters use nodeid's -- * The functions that call crm_update_peer_state() only know nodeid so 'old' is authorative when merging -- * Same for crm_update_peer_proc() -- */ -- crm_update_peer_state(__FUNCTION__, node, old->state, 0); -- crm_update_peer_proc(__FUNCTION__, node, old->processes, NULL); -- } -- g_hash_table_replace(crm_peer_id_cache, GUINT_TO_POINTER(node->id), node); - } - -- if (uname && node->uname == NULL) { -+ if(uname && node->uname == NULL) { - node->uname = strdup(uname); -- if (node->id) { -- crm_info("Node %u is now known as %s", node->id, uname); -- } -- g_hash_table_replace(crm_peer_cache, node->uname, node); - if (crm_status_callback) { - crm_status_callback(crm_status_uname, node, NULL); - } - } - -- if (node && node->uname && node->uuid == NULL) { -- const char *uuid = get_node_uuid(id, node->uname); -+ if(node->uuid == NULL) { -+ const char *uuid = crm_peer_uuid(node); - - if (uuid) { -- node->uuid = strdup(uuid); -- crm_info("Node %u has uuid %s", id, node->uuid); -+ crm_info("Node %u has uuid %s", id, uuid); -+ - } else { -- crm_warn("Cannot obtain a UUID for node %d/%s", id, node->uname); -+ crm_info("Cannot obtain a UUID for node %d/%s", id, node->uname); - } - } - -@@ -292,7 +311,7 @@ crm_update_peer(const char *source, unsigned int id, uint64_t born, uint64_t see - if (node->uuid == NULL) { - if (is_openais_cluster()) { - /* Yes, overrule whatever was passed in */ -- node->uuid = get_corosync_uuid(id, uname); -+ crm_peer_uuid(node); - - } else if (uuid != NULL) { - node->uuid = strdup(uuid); -@@ -342,38 +361,6 @@ crm_update_peer(const char *source, unsigned int id, uint64_t born, uint64_t see - } - - void --crm_update_peer_join(const char *source, crm_node_t * node, enum crm_join_phase phase) --{ -- enum crm_join_phase last = 0; -- -- if(node == NULL) { -- crm_err("%s: Could not set join to %d for NULL", source, phase); -- return; -- } -- -- last = node->join; -- -- if(phase == last) { -- crm_trace("%s: Node %s[%u] - join phase still %u", -- source, node->uname, node->id, last); -- -- } else if (phase <= crm_join_none) { -- node->join = phase; -- crm_info("%s: Node %s[%u] - join phase %u -> %u", -- source, node->uname, node->id, last, phase); -- -- } else if(phase == last + 1) { -- node->join = phase; -- crm_info("%s: Node %s[%u] - join phase %u -> %u", -- source, node->uname, node->id, last, phase); -- } else { -- crm_err("%s: Node %s[%u] - join phase cannot transition from %u to %u", -- source, node->uname, node->id, last, phase); -- -- } --} -- --void - crm_update_peer_proc(const char *source, crm_node_t * node, uint32_t flag, const char *status) - { - uint32_t last = 0; -@@ -470,7 +457,7 @@ crm_update_peer_state(const char *source, crm_node_t * node, const char *state, - } - - if (changed) { -- crm_notice("%s: Node %s[%u] - state is now %s", source, node->uname, node->id, state); -+ crm_notice("%s: Node %s[%u] - state is now %s (was %s)", source, node->uname, node->id, state, last); - if (crm_status_callback) { - crm_status_callback(crm_status_nstate, node, last); - } -diff --git a/lib/common/Makefile.am b/lib/common/Makefile.am -index 318d1fa..87c3f1f 100644 ---- a/lib/common/Makefile.am -+++ b/lib/common/Makefile.am -@@ -33,8 +33,11 @@ lib_LTLIBRARIES = libcrmcommon.la - CFLAGS = $(CFLAGS_COPY:-Wcast-qual=) -fPIC - - libcrmcommon_la_SOURCES = ipc.c utils.c xml.c iso8601.c remote.c mainloop.c logging.c -+if BUILD_CIBSECRETS -+libcrmcommon_la_SOURCES += cib_secrets.c -+endif - --libcrmcommon_la_LDFLAGS = -version-info 4:0:1 -+libcrmcommon_la_LDFLAGS = -version-info 5:0:2 - libcrmcommon_la_LIBADD = -ldl $(GNUTLSLIBS) - libcrmcommon_la_SOURCES += $(top_builddir)/lib/gnu/md5.c - -diff --git a/lib/common/cib_secrets.c b/lib/common/cib_secrets.c -new file mode 100644 -index 0000000..d1b60d3 ---- /dev/null -+++ b/lib/common/cib_secrets.c -@@ -0,0 +1,222 @@ -+/* -+ * cib_secrets.c -+ * -+ * Author: Dejan Muhamedagic -+ * Copyright (c) 2011 SUSE, Attachmate -+ * -+ * This program is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU General Public -+ * License as published by the Free Software Foundation; either -+ * version 2 of the License, or (at your option) any later version. -+ * -+ * This software is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public -+ * License along with this library; if not, write to the Free Software -+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -+ */ -+ -+#include -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include -+ -+#include -+#include -+ -+static int do_replace_secret_params(char *rsc_id, GHashTable *params, gboolean from_legacy_dir); -+static int is_magic_value(char *p); -+static int check_md5_hash(char *hash, char *value); -+static void add_secret_params(gpointer key, gpointer value, gpointer user_data); -+static char *read_local_file(char *local_file); -+ -+#define MAX_VALUE_LEN 255 -+#define MAGIC "lrm://" -+ -+static int -+is_magic_value(char *p) -+{ -+ return !strcmp(p, MAGIC); -+} -+ -+static int -+check_md5_hash(char *hash, char *value) -+{ -+ int rc = FALSE; -+ char *hash2 = NULL; -+ -+ hash2 = crm_md5sum(value); -+ crm_debug("hash: %s, calculated hash: %s", hash, hash2); -+ if (safe_str_eq(hash, hash2)) { -+ rc = TRUE; -+ } -+ -+ free(hash2); -+ return rc; -+} -+ -+static char * -+read_local_file(char *local_file) -+{ -+ FILE *fp = fopen(local_file, "r"); -+ char buf[MAX_VALUE_LEN+1]; -+ char *p; -+ -+ if (!fp) { -+ if (errno != ENOENT) { -+ crm_perror(LOG_ERR, "cannot open %s" , local_file); -+ } -+ return NULL; -+ } -+ -+ if (!fgets(buf, MAX_VALUE_LEN, fp)) { -+ crm_perror(LOG_ERR, "cannot read %s", local_file); -+ return NULL; -+ } -+ -+ /* strip white space */ -+ for (p = buf+strlen(buf)-1; p >= buf && isspace(*p); p--) -+ ; -+ *(p+1) = '\0'; -+ return g_strdup(buf); -+} -+ -+/* -+ * returns 0 on success or no replacements necessary -+ * returns -1 if replacement failed for whatever reasone -+ */ -+ -+int -+replace_secret_params(char *rsc_id, GHashTable *params) -+{ -+ if (do_replace_secret_params(rsc_id, params, FALSE) < 0 -+ && do_replace_secret_params(rsc_id, params, TRUE) < 0) { -+ return -1; -+ } -+ -+ return 0; -+} -+ -+static int -+do_replace_secret_params(char *rsc_id, GHashTable *params, gboolean from_legacy_dir) -+{ -+ char local_file[FILENAME_MAX+1], *start_pname; -+ char hash_file[FILENAME_MAX+1], *hash; -+ GList *secret_params = NULL, *l; -+ char *key, *pvalue, *secret_value; -+ int rc = 0; -+ const char *dir_prefix = NULL; -+ -+ if (params == NULL) { -+ return 0; -+ } -+ -+ if (from_legacy_dir) { -+ dir_prefix = LRM_LEGACY_CIBSECRETS_DIR; -+ -+ } else { -+ dir_prefix = LRM_CIBSECRETS_DIR; -+ } -+ -+ /* secret_params could be cached with the resource; -+ * there are also parameters sent with operations -+ * which cannot be cached -+ */ -+ g_hash_table_foreach(params, add_secret_params, &secret_params); -+ if (!secret_params) { /* none found? */ -+ return 0; -+ } -+ -+ crm_debug("replace secret parameters for resource %s", rsc_id); -+ -+ if (snprintf(local_file, FILENAME_MAX, -+ "%s/%s/", dir_prefix, rsc_id) > FILENAME_MAX) { -+ crm_err("filename size exceeded for resource %s", rsc_id); -+ return -1; -+ } -+ start_pname = local_file + strlen(local_file); -+ -+ for (l = g_list_first(secret_params); l; l = g_list_next(l)) { -+ key = (char *)(l->data); -+ pvalue = g_hash_table_lookup(params, key); -+ if (!pvalue) { /* this cannot really happen */ -+ crm_err("odd, no parameter %s for rsc %s found now", key, rsc_id); -+ continue; -+ } -+ -+ if ((strlen(key) + strlen(local_file)) >= FILENAME_MAX-2) { -+ crm_err("%d: parameter name %s too big", key); -+ rc = -1; -+ continue; -+ } -+ -+ strcpy(start_pname, key); -+ secret_value = read_local_file(local_file); -+ if (!secret_value) { -+ if (from_legacy_dir == FALSE) { -+ crm_debug("secret for rsc %s parameter %s not found in %s. " -+ "will try "LRM_LEGACY_CIBSECRETS_DIR, rsc_id, key, dir_prefix); -+ -+ } else { -+ crm_err("secret for rsc %s parameter %s not found in %s", -+ rsc_id, key, dir_prefix); -+ } -+ rc = -1; -+ continue; -+ } -+ -+ strcpy(hash_file, local_file); -+ if (strlen(hash_file) + 5 > FILENAME_MAX) { -+ crm_err("cannot build such a long name " -+ "for the sign file: %s.sign", hash_file); -+ g_free(secret_value); -+ rc = -1; -+ continue; -+ -+ } else { -+ strncat(hash_file, ".sign", 5); -+ hash = read_local_file(hash_file); -+ if (hash == NULL) { -+ crm_err("md5 sum for rsc %s parameter %s " -+ "cannot be read from %s", rsc_id, key, hash_file); -+ g_free(secret_value); -+ rc = -1; -+ continue; -+ -+ } else if (!check_md5_hash(hash, secret_value)) { -+ crm_err("md5 sum for rsc %s parameter %s " -+ "does not match", rsc_id, key); -+ g_free(secret_value); -+ g_free(hash); -+ rc = -1; -+ continue; -+ } -+ g_free(hash); -+ } -+ g_hash_table_replace(params, g_strdup(key), secret_value); -+ } -+ g_list_free(secret_params); -+ return rc; -+} -+ -+static void -+add_secret_params(gpointer key, gpointer value, gpointer user_data) -+{ -+ GList **lp = (GList **)user_data; -+ -+ if (is_magic_value((char *)value)) { -+ *lp = g_list_append(*lp, (char *)key); -+ } -+} -diff --git a/lib/common/ipc.c b/lib/common/ipc.c -index 88a73c0..2cd42bf 100644 ---- a/lib/common/ipc.c -+++ b/lib/common/ipc.c -@@ -147,16 +147,21 @@ create_reply_adv(xmlNode * original_request, xmlNode * xml_response_data, const - const char *crm_msg_reference = crm_element_value(original_request, F_CRM_REFERENCE); - - if (type == NULL) { -- crm_err("Cannot create new_message," " no message type in original message"); -+ crm_err("Cannot create new_message, no message type in original message"); - CRM_ASSERT(type != NULL); - return NULL; - #if 0 - } else if (strcasecmp(XML_ATTR_REQUEST, type) != 0) { -- crm_err("Cannot create new_message," " original message was not a request"); -+ crm_err("Cannot create new_message, original message was not a request"); - return NULL; - #endif - } - reply = create_xml_node(NULL, __FUNCTION__); -+ if (reply == NULL) { -+ crm_err("Cannot create new_message, malloc failed"); -+ return NULL; -+ } -+ - crm_xml_add(reply, F_CRM_ORIGIN, origin); - crm_xml_add(reply, F_TYPE, T_CRM); - crm_xml_add(reply, F_CRM_VERSION, CRM_FEATURE_SET); -@@ -243,19 +248,22 @@ crm_client_init(void) - void - crm_client_cleanup(void) - { -- if (client_connections == NULL) { -+ if (client_connections != NULL) { - int active = g_hash_table_size(client_connections); - - if (active) { - crm_err("Exiting with %d active connections", active); - } -- g_hash_table_destroy(client_connections); -+ g_hash_table_destroy(client_connections); client_connections = NULL; - } - } - - crm_client_t * --crm_client_new(qb_ipcs_connection_t * c, uid_t uid, gid_t gid) -+crm_client_new(qb_ipcs_connection_t * c, uid_t uid_client, gid_t gid_client) - { -+ static uid_t uid_server = 0; -+ static gid_t gid_cluster = 0; -+ - crm_client_t *client = NULL; - - CRM_LOG_ASSERT(c); -@@ -263,6 +271,29 @@ crm_client_new(qb_ipcs_connection_t * c, uid_t uid, gid_t gid) - return NULL; - } - -+ if (gid_cluster == 0) { -+ uid_server = getuid(); -+ if(crm_user_lookup(CRM_DAEMON_USER, NULL, &gid_cluster) < 0) { -+ static bool have_error = FALSE; -+ if(have_error == FALSE) { -+ crm_warn("Could not find group for user %s", CRM_DAEMON_USER); -+ have_error = TRUE; -+ } -+ } -+ } -+ -+ if(gid_cluster != 0 && gid_client != 0) { -+ uid_t best_uid = -1; /* Passing -1 to chown(2) means don't change */ -+ -+ if(uid_client == 0 || uid_server == 0) { /* Someone is priveliged, but the other may not be */ -+ best_uid = QB_MAX(uid_client, uid_server); -+ crm_trace("Allowing user %u to clean up after disconnect", best_uid); -+ } -+ -+ crm_trace("Giving access to group %u", gid_cluster); -+ qb_ipcs_connection_auth_set(c, best_uid, gid_cluster, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP); -+ } -+ - crm_client_init(); - - client = calloc(1, sizeof(crm_client_t)); -@@ -273,19 +304,10 @@ crm_client_new(qb_ipcs_connection_t * c, uid_t uid, gid_t gid) - - client->id = crm_generate_uuid(); - -- crm_info("Connecting %p for uid=%d gid=%d pid=%u id=%s", c, uid, gid, client->pid, client->id); -+ crm_info("Connecting %p for uid=%d gid=%d pid=%u id=%s", c, uid_client, gid_client, client->pid, client->id); - - #if ENABLE_ACL -- { -- struct group *crm_grp = NULL; -- -- crm_grp = getgrnam(CRM_DAEMON_GROUP); -- if (crm_grp) { -- qb_ipcs_connection_auth_set(c, -1, crm_grp->gr_gid, -- S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP); -- } -- client->user = uid2username(uid); -- } -+ client->user = uid2username(uid_client); - #endif - - g_hash_table_insert(client_connections, c, client); -@@ -358,7 +380,7 @@ crm_ipcs_recv(crm_client_t * c, void *data, size_t size, uint32_t * id, uint32_t - struct crm_ipc_response_header *header = data; - - if (id) { -- *id = ((struct qb_ipc_request_header *)data)->id; -+ *id = ((struct qb_ipc_response_header *)data)->id; - } - if (flags) { - *flags = header->flags; -@@ -481,6 +503,7 @@ crm_ipc_prepare(uint32_t request, xmlNode * message, struct iovec ** result) - - CRM_ASSERT(result != NULL); - -+ *result = NULL; - iov = calloc(2, sizeof(struct iovec)); - - crm_ipc_init(); -@@ -583,7 +606,7 @@ crm_ipcs_sendv(crm_client_t * c, struct iovec * iov, enum crm_ipc_server_flags f - crm_trace("Response %d sent, %d bytes to %p[%d]", header->qb.id, rc, c->ipcs, c->pid); - } - -- if (header->flags & crm_ipc_server_free) { -+ if (flags & crm_ipc_server_free) { - free(iov[0].iov_base); - free(iov[1].iov_base); - free(iov); -@@ -608,13 +631,20 @@ crm_ipcs_send(crm_client_t * c, uint32_t request, xmlNode * message, - enum crm_ipc_server_flags flags) - { - struct iovec *iov = NULL; -- ssize_t rc = crm_ipc_prepare(request, message, &iov); -+ ssize_t rc = 0; - -+ if(c == NULL) { -+ return -EDESTADDRREQ; -+ } -+ -+ rc = crm_ipc_prepare(request, message, &iov); - if (rc > 0) { - rc = crm_ipcs_sendv(c, iov, flags | crm_ipc_server_free); - - } else { -- crm_notice("Message to %p[%d] failed: %s (%d)", c->ipcs, c->pid, pcmk_strerror(rc), rc); -+ free(iov); -+ crm_notice("Message to %p[%d] failed: %s (%d)", -+ c->ipcs, c->pid, pcmk_strerror(rc), rc); - } - - return rc; -@@ -820,7 +850,7 @@ crm_ipc_decompress(crm_ipc_t * client) - if (rc != BZ_OK) { - crm_err("Decompression failed: %s (%d)", bz2_strerror(rc), rc); - free(uncompressed); -- return -EREMOTEIO; -+ return -EILSEQ; - } - - CRM_ASSERT((header->size_uncompressed + hdr_offset) >= ipc_buffer_max); -@@ -975,7 +1005,6 @@ crm_ipc_send(crm_ipc_t * client, xmlNode * message, enum crm_ipc_flags flags, in - struct iovec *iov; - static uint32_t id = 0; - struct crm_ipc_response_header *header; -- char *buffer = NULL; - - crm_ipc_init(); - -@@ -995,8 +1024,7 @@ crm_ipc_send(crm_ipc_t * client, xmlNode * message, enum crm_ipc_flags flags, in - if (rc < 0) { - crm_warn("Sending to %s (%p) is disabled until pending reply is recieved", client->name, - client->ipc); -- free(buffer); -- return -EREMOTEIO; -+ return -EALREADY; - - } else { - crm_notice("Lost reply from %s (%p) finally arrived, sending re-enabled", client->name, -@@ -1017,21 +1045,21 @@ crm_ipc_send(crm_ipc_t * client, xmlNode * message, enum crm_ipc_flags flags, in - ms_timeout = 5000; - } - -- crm_trace("Sending from client: %s request id: %d bytes: %u timeout:%d msg: %.200s...", -- client->name, header->qb.id, header->qb.size, ms_timeout, buffer); -+ crm_trace("Sending from client: %s request id: %d bytes: %u timeout:%d msg...", -+ client->name, header->qb.id, header->qb.size, ms_timeout); - - if (ms_timeout > 0) { - - rc = internal_ipc_send_request(client, iov, ms_timeout); - - if (rc <= 0) { -- crm_trace("Failed to send from client %s request %d with %u bytes: %.200s...", -- client->name, header->qb.id, header->qb.size, buffer); -+ crm_trace("Failed to send from client %s request %d with %u bytes...", -+ client->name, header->qb.id, header->qb.size); - goto send_cleanup; - - } else if (is_not_set(flags, crm_ipc_client_response)) { -- crm_trace("Message sent, not waiting for reply to %d from %s to %u bytes: %.200s...", -- header->qb.id, client->name, header->qb.size, buffer); -+ crm_trace("Message sent, not waiting for reply to %d from %s to %u bytes...", -+ header->qb.id, client->name, header->qb.size); - - goto send_cleanup; - } -@@ -1073,16 +1101,16 @@ crm_ipc_send(crm_ipc_t * client, xmlNode * message, enum crm_ipc_flags flags, in - } else if (rc == -ETIMEDOUT) { - crm_warn("Request %d to %s (%p) failed: %s (%ld) after %dms", - header->qb.id, client->name, client->ipc, pcmk_strerror(rc), rc, ms_timeout); -- crm_info("Request was %.120s", buffer); - crm_write_blackbox(0, NULL); - - } else if (rc <= 0) { - crm_warn("Request %d to %s (%p) failed: %s (%ld)", - header->qb.id, client->name, client->ipc, pcmk_strerror(rc), rc); -- crm_info("Request was %.120s", buffer); - } - -- free(buffer); -+ free(header); -+ free(iov[1].iov_base); -+ free(iov); - return rc; - } - -diff --git a/lib/common/logging.c b/lib/common/logging.c -index c3bce72..f4fa627 100644 ---- a/lib/common/logging.c -+++ b/lib/common/logging.c -@@ -1,16 +1,16 @@ --/* -+/* - * Copyright (C) 2004 Andrew Beekhof -- * -+ * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. -- * -+ * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. -- * -+ * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -@@ -42,6 +42,7 @@ - #include - #include - -+unsigned int crm_log_priority = LOG_NOTICE; - unsigned int crm_log_level = LOG_INFO; - static gboolean crm_tracing_enabled(void); - unsigned int crm_trace_nonlog = 0; -@@ -221,9 +222,8 @@ crm_add_logfile(const char *filename) - return FALSE; /* Nothing to do */ - } - -+ /* Check the parent directory */ - filename_cp = strdup(filename); -- -- /* Check the parent directory and attempt to open */ - parent_dir = dirname(filename_cp); - rc = stat(parent_dir, &parent); - -@@ -231,27 +231,19 @@ crm_add_logfile(const char *filename) - crm_err("Directory '%s' does not exist: logging to '%s' is disabled", parent_dir, filename); - free(filename_cp); - return FALSE; -+ } -+ free(filename_cp); - -- } else if (parent.st_uid == geteuid() && (parent.st_mode & (S_IRUSR | S_IWUSR))) { -- /* all good - user */ -- logfile = fopen(filename, "a"); -- -- } else if (parent.st_gid == getegid() && (parent.st_mode & S_IXGRP)) { -- /* all good - group */ -- logfile = fopen(filename, "a"); -- -- } else { -- crm_err -- ("We (uid=%u, gid=%u) do not have permission to access '%s': logging to '%s' is disabled", -- geteuid(), getegid(), parent_dir, filename); -- free(filename_cp); -+ errno = 0; -+ logfile = fopen(filename, "a"); -+ if(logfile == NULL) { -+ crm_err("%s (%d): Logging to '%s' as uid=%u, gid=%u is disabled", -+ pcmk_strerror(errno), errno, filename, geteuid(), getegid()); - return FALSE; - } -- free(filename_cp); -- filename_cp = NULL; - - /* Check/Set permissions if we're root */ -- if (logfile && geteuid() == 0) { -+ if (geteuid() == 0) { - struct stat st; - uid_t pcmk_uid = 0; - gid_t pcmk_gid = 0; -@@ -265,13 +257,14 @@ crm_add_logfile(const char *filename) - return FALSE; - } - -- crm_user_lookup(CRM_DAEMON_USER, &pcmk_uid, &pcmk_gid); -- if (st.st_gid != pcmk_gid) { -- /* Wrong group */ -- fix = TRUE; -- } else if ((st.st_mode & S_IRWXG) != (S_IRGRP | S_IWGRP)) { -- /* Not read/writable by the correct group */ -- fix = TRUE; -+ if(crm_user_lookup(CRM_DAEMON_USER, &pcmk_uid, &pcmk_gid) == 0) { -+ if (st.st_gid != pcmk_gid) { -+ /* Wrong group */ -+ fix = TRUE; -+ } else if ((st.st_mode & S_IRWXG) != (S_IRGRP | S_IWGRP)) { -+ /* Not read/writable by the correct group */ -+ fix = TRUE; -+ } - } - - if (fix) { -@@ -293,11 +286,9 @@ crm_add_logfile(const char *filename) - } - } - } -- if (logfile) { -- fclose(logfile); -- } - -- /* Now open with libqb */ -+ /* Close and reopen with libqb */ -+ fclose(logfile); - fd = qb_log_file_open(filename); - - if (fd < 0) { -@@ -320,7 +311,11 @@ static char *blackbox_file_prefix = NULL; - static void - blackbox_logger(int32_t t, struct qb_log_callsite *cs, time_t timestamp, const char *msg) - { -- crm_write_blackbox(0, cs); -+ if(cs && cs->priority < LOG_ERR) { -+ crm_write_blackbox(SIGTRAP, cs); /* Bypass the over-dumping logic */ -+ } else { -+ crm_write_blackbox(0, cs); -+ } - } - - void -@@ -339,10 +334,16 @@ crm_enable_blackbox(int nsig) - qb_log_ctl(QB_LOG_BLACKBOX, QB_LOG_CONF_ENABLED, QB_TRUE); /* Setting the size seems to disable it */ - - crm_notice("Initiated blackbox recorder: %s", blackbox_file_prefix); -+ -+ /* Save to disk on abnormal termination */ - crm_signal(SIGSEGV, crm_trigger_blackbox); -+ crm_signal(SIGABRT, crm_trigger_blackbox); -+ crm_signal(SIGILL, crm_trigger_blackbox); -+ crm_signal(SIGBUS, crm_trigger_blackbox); -+ - crm_update_callsites(); - -- /* Original meanings from signal(7) -+ /* Original meanings from signal(7) - * - * Signal Value Action Comment - * SIGTRAP 5 Core Trace/breakpoint trap -@@ -353,8 +354,8 @@ crm_enable_blackbox(int nsig) - - blackbox_trigger = qb_log_custom_open(blackbox_logger, NULL, NULL, NULL); - qb_log_ctl(blackbox_trigger, QB_LOG_CONF_ENABLED, QB_TRUE); -- crm_info("Trigger: %d is %d %d", blackbox_trigger, -- qb_log_ctl(blackbox_trigger, QB_LOG_CONF_STATE_GET, 0), QB_LOG_STATE_ENABLED); -+ crm_trace("Trigger: %d is %d %d", blackbox_trigger, -+ qb_log_ctl(blackbox_trigger, QB_LOG_CONF_STATE_GET, 0), QB_LOG_STATE_ENABLED); - - crm_update_callsites(); - } -@@ -375,7 +376,6 @@ crm_write_blackbox(int nsig, struct qb_log_callsite *cs) - - switch (nsig) { - case 0: -- case SIGABRT: - case SIGTRAP: - /* The graceful case - such as assertion failure or user request */ - -@@ -461,7 +461,7 @@ crm_log_filter_source(int source, const char *trace_files, const char *trace_fns - } - - } else if (source == QB_LOG_SYSLOG) { /* No tracing to syslog */ -- if (cs->priority <= LOG_NOTICE && cs->priority <= crm_log_level) { -+ if (cs->priority <= crm_log_priority && cs->priority <= crm_log_level) { - qb_bit_set(cs->targets, source); - } - /* Log file tracing options... */ -@@ -584,6 +584,34 @@ crm_tracing_enabled(void) - return FALSE; - } - -+static int -+crm_priority2int(const char *name) -+{ -+ struct syslog_names { -+ const char *name; -+ int priority; -+ }; -+ static struct syslog_names p_names[] = { -+ {"emerg", LOG_EMERG}, -+ {"alert", LOG_ALERT}, -+ {"crit", LOG_CRIT}, -+ {"error", LOG_ERR}, -+ {"warning", LOG_WARNING}, -+ {"notice", LOG_NOTICE}, -+ {"info", LOG_INFO}, -+ {"debug", LOG_DEBUG}, -+ {NULL, -1} -+ }; -+ int lpc; -+ -+ for (lpc = 0; name != NULL && p_names[lpc].name != NULL; lpc++) { -+ if (crm_str_eq(p_names[lpc].name, name, TRUE)) { -+ return p_names[lpc].priority; -+ } -+ } -+ return crm_log_priority; -+} -+ - gboolean - crm_log_init(const char *entity, int level, gboolean daemon, gboolean to_stderr, - int argc, char **argv, gboolean quiet) -@@ -616,18 +644,23 @@ crm_log_init(const char *entity, int level, gboolean daemon, gboolean to_stderr, - } - - if (entity) { -- crm_system_name = entity; -+ free(crm_system_name); -+ crm_system_name = strdup(entity); - - } else if (argc > 0 && argv != NULL) { - char *mutable = strdup(argv[0]); -+ char *modified = basename(mutable); - -- crm_system_name = basename(mutable); -- if (strstr(crm_system_name, "lt-") == crm_system_name) { -- crm_system_name += 3; -+ if (strstr(modified, "lt-") == modified) { -+ modified += 3; - } - -+ free(crm_system_name); -+ crm_system_name = strdup(modified); -+ free(mutable); -+ - } else if (crm_system_name == NULL) { -- crm_system_name = "Unknown"; -+ crm_system_name = strdup("Unknown"); - } - - setenv("PCMK_service", crm_system_name, 1); -@@ -642,6 +675,8 @@ crm_log_init(const char *entity, int level, gboolean daemon, gboolean to_stderr, - to_stderr = TRUE; - } - -+ crm_log_priority = crm_priority2int(daemon_option("logpriority")); -+ - crm_log_level = level; - qb_log_init(crm_system_name, qb_log_facility2int(facility), level); - qb_log_tags_stringify_fn_set(crm_quark_to_string); -@@ -729,6 +764,7 @@ crm_log_init(const char *entity, int level, gboolean daemon, gboolean to_stderr, - mainloop_add_signal(SIGUSR1, crm_enable_blackbox); - } - -+ crm_xml_init(); /* Sets buffer allocation strategy */ - return TRUE; - } - -@@ -822,6 +858,148 @@ crm_log_args(int argc, char **argv) - } - - const char * -+pcmk_errorname(int rc) -+{ -+ int error = ABS(rc); -+ -+ switch (error) { -+ case E2BIG: return "E2BIG"; -+ case EACCES: return "EACCES"; -+ case EADDRINUSE: return "EADDRINUSE"; -+ case EADDRNOTAVAIL: return "EADDRNOTAVAIL"; -+ case EAFNOSUPPORT: return "EAFNOSUPPORT"; -+ case EAGAIN: return "EAGAIN"; -+ case EALREADY: return "EALREADY"; -+ case EBADE: return "EBADE"; -+ case EBADF: return "EBADF"; -+ case EBADFD: return "EBADFD"; -+ case EBADMSG: return "EBADMSG"; -+ case EBADR: return "EBADR"; -+ case EBADRQC: return "EBADRQC"; -+ case EBADSLT: return "EBADSLT"; -+ case EBUSY: return "EBUSY"; -+ case ECANCELED: return "ECANCELED"; -+ case ECHILD: return "ECHILD"; -+ case ECHRNG: return "ECHRNG"; -+ case ECOMM: return "ECOMM"; -+ case ECONNABORTED: return "ECONNABORTED"; -+ case ECONNREFUSED: return "ECONNREFUSED"; -+ case ECONNRESET: return "ECONNRESET"; -+ /* case EDEADLK: return "EDEADLK"; */ -+ case EDEADLOCK: return "EDEADLOCK"; -+ case EDESTADDRREQ: return "EDESTADDRREQ"; -+ case EDOM: return "EDOM"; -+ case EDQUOT: return "EDQUOT"; -+ case EEXIST: return "EEXIST"; -+ case EFAULT: return "EFAULT"; -+ case EFBIG: return "EFBIG"; -+ case EHOSTDOWN: return "EHOSTDOWN"; -+ case EHOSTUNREACH: return "EHOSTUNREACH"; -+ case EIDRM: return "EIDRM"; -+ case EILSEQ: return "EILSEQ"; -+ case EINPROGRESS: return "EINPROGRESS"; -+ case EINTR: return "EINTR"; -+ case EINVAL: return "EINVAL"; -+ case EIO: return "EIO"; -+ case EISCONN: return "EISCONN"; -+ case EISDIR: return "EISDIR"; -+ case EISNAM: return "EISNAM"; -+ case EKEYEXPIRED: return "EKEYEXPIRED"; -+ case EKEYREJECTED: return "EKEYREJECTED"; -+ case EKEYREVOKED: return "EKEYREVOKED"; -+ case EL2HLT: return "EL2HLT"; -+ case EL2NSYNC: return "EL2NSYNC"; -+ case EL3HLT: return "EL3HLT"; -+ case EL3RST: return "EL3RST"; -+ case ELIBACC: return "ELIBACC"; -+ case ELIBBAD: return "ELIBBAD"; -+ case ELIBMAX: return "ELIBMAX"; -+ case ELIBSCN: return "ELIBSCN"; -+ case ELIBEXEC: return "ELIBEXEC"; -+ case ELOOP: return "ELOOP"; -+ case EMEDIUMTYPE: return "EMEDIUMTYPE"; -+ case EMFILE: return "EMFILE"; -+ case EMLINK: return "EMLINK"; -+ case EMSGSIZE: return "EMSGSIZE"; -+ case EMULTIHOP: return "EMULTIHOP"; -+ case ENAMETOOLONG: return "ENAMETOOLONG"; -+ case ENETDOWN: return "ENETDOWN"; -+ case ENETRESET: return "ENETRESET"; -+ case ENETUNREACH: return "ENETUNREACH"; -+ case ENFILE: return "ENFILE"; -+ case ENOBUFS: return "ENOBUFS"; -+ case ENODATA: return "ENODATA"; -+ case ENODEV: return "ENODEV"; -+ case ENOENT: return "ENOENT"; -+ case ENOEXEC: return "ENOEXEC"; -+ case ENOKEY: return "ENOKEY"; -+ case ENOLCK: return "ENOLCK"; -+ case ENOLINK: return "ENOLINK"; -+ case ENOMEDIUM: return "ENOMEDIUM"; -+ case ENOMEM: return "ENOMEM"; -+ case ENOMSG: return "ENOMSG"; -+ case ENONET: return "ENONET"; -+ case ENOPKG: return "ENOPKG"; -+ case ENOPROTOOPT: return "ENOPROTOOPT"; -+ case ENOSPC: return "ENOSPC"; -+ case ENOSR: return "ENOSR"; -+ case ENOSTR: return "ENOSTR"; -+ case ENOSYS: return "ENOSYS"; -+ case ENOTBLK: return "ENOTBLK"; -+ case ENOTCONN: return "ENOTCONN"; -+ case ENOTDIR: return "ENOTDIR"; -+ case ENOTEMPTY: return "ENOTEMPTY"; -+ case ENOTSOCK: return "ENOTSOCK"; -+ /* case ENOTSUP: return "ENOTSUP"; */ -+ case ENOTTY: return "ENOTTY"; -+ case ENOTUNIQ: return "ENOTUNIQ"; -+ case ENXIO: return "ENXIO"; -+ case EOPNOTSUPP: return "EOPNOTSUPP"; -+ case EOVERFLOW: return "EOVERFLOW"; -+ case EPERM: return "EPERM"; -+ case EPFNOSUPPORT: return "EPFNOSUPPORT"; -+ case EPIPE: return "EPIPE"; -+ case EPROTO: return "EPROTO"; -+ case EPROTONOSUPPORT: return "EPROTONOSUPPORT"; -+ case EPROTOTYPE: return "EPROTOTYPE"; -+ case ERANGE: return "ERANGE"; -+ case EREMCHG: return "EREMCHG"; -+ case EREMOTE: return "EREMOTE"; -+ case EREMOTEIO: return "EREMOTEIO"; -+ case ERESTART: return "ERESTART"; -+ case EROFS: return "EROFS"; -+ case ESHUTDOWN: return "ESHUTDOWN"; -+ case ESPIPE: return "ESPIPE"; -+ case ESOCKTNOSUPPORT: return "ESOCKTNOSUPPORT"; -+ case ESRCH: return "ESRCH"; -+ case ESTALE: return "ESTALE"; -+ case ESTRPIPE: return "ESTRPIPE"; -+ case ETIME: return "ETIME"; -+ case ETIMEDOUT: return "ETIMEDOUT"; -+ case ETXTBSY: return "ETXTBSY"; -+ case EUCLEAN: return "EUCLEAN"; -+ case EUNATCH: return "EUNATCH"; -+ case EUSERS: return "EUSERS"; -+ /* case EWOULDBLOCK: return "EWOULDBLOCK"; */ -+ case EXDEV: return "EXDEV"; -+ case EXFULL: return "EXFULL"; -+ -+ case pcmk_err_generic: return "pcmk_err_generic"; -+ case pcmk_err_no_quorum: return "pcmk_err_no_quorum"; -+ case pcmk_err_dtd_validation: return "pcmk_err_dtd_validation"; -+ case pcmk_err_transform_failed: return "pcmk_err_transform_failed"; -+ case pcmk_err_old_data: return "pcmk_err_old_data"; -+ case pcmk_err_diff_failed: return "pcmk_err_diff_failed"; -+ case pcmk_err_diff_resync: return "pcmk_err_diff_resync"; -+ case pcmk_err_cib_modified: return "pcmk_err_cib_modified"; -+ case pcmk_err_cib_backup: return "pcmk_err_cib_backup"; -+ case pcmk_err_cib_save: return "pcmk_err_cib_save"; -+ } -+ return "Unknown"; -+} -+ -+ -+const char * - pcmk_strerror(int rc) - { - int error = rc; -@@ -851,6 +1029,12 @@ pcmk_strerror(int rc) - return "Application of an update diff failed"; - case pcmk_err_diff_resync: - return "Application of an update diff failed, requesting a full refresh"; -+ case pcmk_err_cib_modified: -+ return "The on-disk configuration was manually modified"; -+ case pcmk_err_cib_backup: -+ return "Could not archive the previous configuration"; -+ case pcmk_err_cib_save: -+ return "Could not save the new configuration to disk"; - - /* The following cases will only be hit on systems for which they are non-standard */ - /* coverity[dead_error_condition] False positive on non-Linux */ -diff --git a/lib/common/mainloop.c b/lib/common/mainloop.c -index c038174..e95d1d8 100644 ---- a/lib/common/mainloop.c -+++ b/lib/common/mainloop.c -@@ -42,7 +42,7 @@ struct mainloop_child_s { - void *privatedata; - - /* Called when a process dies */ -- void (*callback) (mainloop_child_t * p, int status, int signo, int exitcode); -+ void (*callback) (mainloop_child_t * p, pid_t pid, int core, int signo, int exitcode); - }; - - struct trigger_s { -@@ -112,11 +112,59 @@ crm_trigger_dispatch(GSource * source, GSourceFunc callback, gpointer userdata) - return rc; - } - -+static void -+crm_trigger_finalize(GSource * source) -+{ -+ crm_trace("Trigger %p destroyed", source); -+} -+ -+#if 0 -+struct _GSourceCopy -+{ -+ gpointer callback_data; -+ GSourceCallbackFuncs *callback_funcs; -+ -+ const GSourceFuncs *source_funcs; -+ guint ref_count; -+ -+ GMainContext *context; -+ -+ gint priority; -+ guint flags; -+ guint source_id; -+ -+ GSList *poll_fds; -+ -+ GSource *prev; -+ GSource *next; -+ -+ char *name; -+ -+ void *priv; -+}; -+ -+static int -+g_source_refcount(GSource * source) -+{ -+ /* Duplicating the contents of private header files is a necessary evil */ -+ if (source) { -+ struct _GSourceCopy *evil = (struct _GSourceCopy*)source; -+ return evil->ref_count; -+ } -+ return 0; -+} -+#else -+static int g_source_refcount(GSource * source) -+{ -+ return 0; -+} -+#endif -+ - static GSourceFuncs crm_trigger_funcs = { - crm_trigger_prepare, - crm_trigger_check, - crm_trigger_dispatch, -- NULL -+ crm_trigger_finalize, - }; - - static crm_trigger_t * -@@ -138,7 +186,10 @@ mainloop_setup_trigger(GSource * source, int priority, int (*dispatch) (gpointer - g_source_set_priority(source, priority); - g_source_set_can_recurse(source, FALSE); - -+ crm_trace("Setup %p with ref-count=%u", source, g_source_refcount(source)); - trigger->id = g_source_attach(source, NULL); -+ crm_trace("Attached %p with ref-count=%u", source, g_source_refcount(source)); -+ - return trigger; - } - -@@ -169,17 +220,35 @@ mainloop_add_trigger(int priority, int (*dispatch) (gpointer user_data), gpointe - void - mainloop_set_trigger(crm_trigger_t * source) - { -- source->trigger = TRUE; -+ if(source) { -+ source->trigger = TRUE; -+ } - } - - gboolean - mainloop_destroy_trigger(crm_trigger_t * source) - { -- source->trigger = FALSE; -- if (source->id > 0) { -- g_source_remove(source->id); -- source->id = 0; -+ GSource *gs = NULL; -+ -+ if(source == NULL) { -+ return TRUE; -+ } -+ -+ gs = (GSource *)source; -+ -+ if(g_source_refcount(gs) > 2) { -+ crm_info("Trigger %p is still referenced %u times", gs, g_source_refcount(gs)); - } -+ -+ g_source_destroy(gs); /* Remove from mainloop, ref_count-- */ -+ g_source_unref(gs); /* The caller no longer carries a reference to source -+ * -+ * At this point the source should be free'd, -+ * unless we're currently processing said -+ * source, in which case mainloop holds an -+ * additional reference and it will be free'd -+ * once our processing completes -+ */ - return TRUE; - } - -@@ -197,7 +266,9 @@ crm_signal_dispatch(GSource * source, GSourceFunc callback, gpointer userdata) - { - crm_signal_t *sig = (crm_signal_t *) source; - -- crm_info("Invoking handler for signal %d: %s", sig->signal, strsignal(sig->signal)); -+ if(sig->signal != SIGCHLD) { -+ crm_info("Invoking handler for signal %d: %s", sig->signal, strsignal(sig->signal)); -+ } - - sig->trigger.trigger = FALSE; - if (sig->handler) { -@@ -218,7 +289,7 @@ static GSourceFuncs crm_signal_funcs = { - crm_trigger_prepare, - crm_trigger_check, - crm_signal_dispatch, -- NULL -+ crm_trigger_finalize, - }; - - gboolean -@@ -321,6 +392,7 @@ mainloop_destroy_signal(int sig) - return TRUE; - } - -+ crm_trace("Destroying signal %d", sig); - tmp = crm_signals[sig]; - crm_signals[sig] = NULL; - mainloop_destroy_trigger((crm_trigger_t *) tmp); -@@ -329,6 +401,14 @@ mainloop_destroy_signal(int sig) - - static qb_array_t *gio_map = NULL; - -+void -+mainloop_cleanup(void) -+{ -+ if(gio_map) { -+ qb_array_free(gio_map); -+ } -+} -+ - /* - * libqb... - */ -@@ -591,7 +671,7 @@ mainloop_gio_callback(GIOChannel * gio, GIOCondition condition, gpointer data) - } while (keep && rc > 0 && --max > 0); - - } else { -- crm_trace("New message from %s[%p]", client->name, client); -+ crm_trace("New message from %s[%p] %u", client->name, client, condition); - if (client->dispatch_fn_io) { - if (client->dispatch_fn_io(client->userdata) < 0) { - crm_trace("Connection to %s no longer required", client->name); -@@ -651,29 +731,37 @@ static void - mainloop_gio_destroy(gpointer c) - { - mainloop_io_t *client = c; -+ char *c_name = strdup(client->name); - - /* client->source is valid but about to be destroyed (ref_count == 0) in gmain.c - * client->channel will still have ref_count > 0... should be == 1 - */ -- crm_trace("Destroying client %s[%p] %d", client->name, c, mainloop_gio_refcount(client)); -+ crm_trace("Destroying client %s[%p] %d", c_name, c, mainloop_gio_refcount(client)); - - if (client->ipc) { - crm_ipc_close(client->ipc); - } - - if (client->destroy_fn) { -- client->destroy_fn(client->userdata); -+ void (*destroy_fn) (gpointer userdata) = client->destroy_fn; -+ -+ client->destroy_fn = NULL; -+ destroy_fn(client->userdata); - } - - if (client->ipc) { -- crm_ipc_destroy(client->ipc); -+ crm_ipc_t *ipc = client->ipc; -+ -+ client->ipc = NULL; -+ crm_ipc_destroy(ipc); - } - -- crm_trace("Destroyed client %s[%p] %d", client->name, c, mainloop_gio_refcount(client)); -- free(client->name); -+ crm_trace("Destroyed client %s[%p] %d", c_name, c, mainloop_gio_refcount(client)); - -- memset(client, 0, sizeof(mainloop_io_t)); /* A bit of pointless paranoia */ -+ free(client->name); client->name = NULL; - free(client); -+ -+ free(c_name); - } - - mainloop_io_t * -@@ -770,19 +858,25 @@ mainloop_del_fd(mainloop_io_t * client) - } - - pid_t --mainloop_get_child_pid(mainloop_child_t * child) -+mainloop_child_pid(mainloop_child_t * child) - { - return child->pid; - } - -+const char * -+mainloop_child_name(mainloop_child_t * child) -+{ -+ return child->desc; -+} -+ - int --mainloop_get_child_timeout(mainloop_child_t * child) -+mainloop_child_timeout(mainloop_child_t * child) - { - return child->timeout; - } - - void * --mainloop_get_child_userdata(mainloop_child_t * child) -+mainloop_child_userdata(mainloop_child_t * child) - { - return child->privatedata; - } -@@ -819,70 +913,108 @@ child_timeout_callback(gpointer p) - return FALSE; - } - -+static GListPtr child_list = NULL; -+ - static void --mainloop_child_destroy(mainloop_child_t * child) -+child_death_dispatch(int signal) - { -- if (child->timerid != 0) { -- crm_trace("Removing timer %d", child->timerid); -- g_source_remove(child->timerid); -- child->timerid = 0; -- } -+ GListPtr iter = child_list; - -- free(child->desc); -- g_free(child); --} -+ while(iter) { -+ int rc = 0; -+ int core = 0; -+ int signo = 0; -+ int status = 0; -+ int exitcode = 0; - --static void --child_death_dispatch(GPid pid, gint status, gpointer user_data) --{ -- int signo = 0; -- int exitcode = 0; -- mainloop_child_t *child = user_data; -+ GListPtr saved = NULL; -+ mainloop_child_t *child = iter->data; - -- crm_trace("Managed process %d exited: %p", pid, child); -+ rc = waitpid(child->pid, &status, WNOHANG); -+ if(rc == 0) { -+ iter = iter->next; -+ continue; - -- if (WIFEXITED(status)) { -- exitcode = WEXITSTATUS(status); -- crm_trace("Managed process %d (%s) exited with rc=%d", pid, child->desc, exitcode); -+ } else if(rc != child->pid) { -+ signo = signal; -+ exitcode = 1; -+ status = 1; -+ crm_perror(LOG_ERR, "Call to waitpid(%d) failed", child->pid); - -- } else if (WIFSIGNALED(status)) { -- signo = WTERMSIG(status); -- crm_trace("Managed process %d (%s) exited with signal=%d", pid, child->desc, signo); -- } -+ } else { -+ crm_trace("Managed process %d exited: %p", child->pid, child); -+ -+ if (WIFEXITED(status)) { -+ exitcode = WEXITSTATUS(status); -+ crm_trace("Managed process %d (%s) exited with rc=%d", child->pid, child->desc, exitcode); -+ -+ } else if (WIFSIGNALED(status)) { -+ signo = WTERMSIG(status); -+ crm_trace("Managed process %d (%s) exited with signal=%d", child->pid, child->desc, signo); -+ } - #ifdef WCOREDUMP -- if (WCOREDUMP(status)) { -- crm_err("Managed process %d (%s) dumped core", pid, child->desc); -- } -+ if (WCOREDUMP(status)) { -+ core = 1; -+ crm_err("Managed process %d (%s) dumped core", child->pid, child->desc); -+ } - #endif -+ } - -- if (child->callback) { -- child->callback(child, status, signo, exitcode); -- } -- crm_trace("Removed process entry for %d", pid); -+ if (child->callback) { -+ child->callback(child, child->pid, core, signo, exitcode); -+ } -+ -+ crm_trace("Removing process entry %p for %d", child, child->pid); -+ -+ saved = iter; -+ iter = iter->next; -+ -+ child_list = g_list_remove_link(child_list, saved); -+ g_list_free(saved); - -- mainloop_child_destroy(child); -- return; -+ if (child->timerid != 0) { -+ crm_trace("Removing timer %d", child->timerid); -+ g_source_remove(child->timerid); -+ child->timerid = 0; -+ } -+ free(child->desc); -+ free(child); -+ } - } - - /* Create/Log a new tracked process - * To track a process group, use -pid - */ - void --mainloop_add_child(pid_t pid, int timeout, const char *desc, void *privatedata, -- void (*callback) (mainloop_child_t * p, int status, int signo, int exitcode)) -+mainloop_child_add(pid_t pid, int timeout, const char *desc, void *privatedata, -+ void (*callback) (mainloop_child_t * p, pid_t pid, int core, int signo, int exitcode)) - { -+ static bool need_init = TRUE; - mainloop_child_t *child = g_new(mainloop_child_t, 1); - - child->pid = pid; - child->timerid = 0; - child->timeout = FALSE; -- child->desc = strdup(desc); - child->privatedata = privatedata; - child->callback = callback; - -+ if(desc) { -+ child->desc = strdup(desc); -+ } -+ - if (timeout) { - child->timerid = g_timeout_add(timeout, child_timeout_callback, child); - } - -- child->watchid = g_child_watch_add(pid, child_death_dispatch, child); -+ child_list = g_list_append(child_list, child); -+ -+ if(need_init) { -+ need_init = FALSE; -+ -+ /* Do NOT use g_child_watch_add() and friends, they rely on pthreads */ -+ mainloop_add_signal(SIGCHLD, child_death_dispatch); -+ -+ /* In case they terminated before the signal handler was installed */ -+ child_death_dispatch(SIGCHLD); -+ } - } -diff --git a/lib/common/remote.c b/lib/common/remote.c -index ef198e3..8b00f16 100644 ---- a/lib/common/remote.c -+++ b/lib/common/remote.c -@@ -692,7 +692,7 @@ check_connect_finished(gpointer userdata) - if ((time(NULL) - cb_data->start) < (cb_data->timeout / 1000)) { - goto reschedule; - } -- crm_err("fd %d: timeout during select", sock); -+ crm_debug("fd %d: timeout during select", sock); - rc = -ETIMEDOUT; - goto dispatch_done; - } else { -@@ -728,11 +728,11 @@ check_connect_finished(gpointer userdata) - } else { - close(sock); - } -- free(cb_data); - - if (cb_data->callback) { - cb_data->callback(cb_data->userdata, rc); - } -+ free(cb_data); - return FALSE; - - reschedule: -@@ -748,7 +748,7 @@ internal_tcp_connect_async(int sock, - { - int rc = 0; - int flag = 0; -- int interval = 1000; -+ int interval = 500; - struct tcp_async_cb_data *cb_data = NULL; - - if ((flag = fcntl(sock, F_GETFL)) >= 0) { -@@ -821,12 +821,12 @@ int - crm_remote_tcp_connect_async(const char *host, int port, int timeout, /*ms */ - void *userdata, void (*callback) (void *userdata, int sock)) - { -- struct addrinfo *res; -- struct addrinfo *rp; -+ struct addrinfo *res = NULL; -+ struct addrinfo *rp = NULL; - struct addrinfo hints; - const char *server = host; - int ret_ga; -- int sock; -+ int sock = -1; - - /* getaddrinfo */ - memset(&hints, 0, sizeof(struct addrinfo)); -@@ -843,7 +843,7 @@ crm_remote_tcp_connect_async(const char *host, int port, int timeout, /*ms */ - - if (!res || !res->ai_addr) { - crm_err("getaddrinfo failed"); -- return -1; -+ goto async_cleanup; - } - - for (rp = res; rp != NULL; rp = rp->ai_next) { -@@ -879,7 +879,8 @@ crm_remote_tcp_connect_async(const char *host, int port, int timeout, /*ms */ - if (callback) { - if (internal_tcp_connect_async - (sock, rp->ai_addr, rp->ai_addrlen, timeout, userdata, callback) == 0) { -- return 0; /* Success for now, we'll hear back later in the callback */ -+ sock = 0; -+ goto async_cleanup; /* Success for now, we'll hear back later in the callback */ - } - - } else { -@@ -891,8 +892,12 @@ crm_remote_tcp_connect_async(const char *host, int port, int timeout, /*ms */ - close(sock); - sock = -1; - } -- freeaddrinfo(res); - -+async_cleanup: -+ -+ if (res) { -+ freeaddrinfo(res); -+ } - return sock; - } - -diff --git a/lib/common/utils.c b/lib/common/utils.c -index 503abce..adf0a6b 100644 ---- a/lib/common/utils.c -+++ b/lib/common/utils.c -@@ -47,6 +47,7 @@ - #include - #include - #include -+#include - #include - #include - #include -@@ -71,26 +72,44 @@ CRM_TRACE_INIT_DATA(common); - - gboolean crm_config_error = FALSE; - gboolean crm_config_warning = FALSE; --const char *crm_system_name = "unknown"; -+char *crm_system_name = NULL; - - int node_score_red = 0; - int node_score_green = 0; - int node_score_yellow = 0; - int node_score_infinity = INFINITY; - -+static struct crm_option *crm_long_options = NULL; -+static const char *crm_app_description = NULL; -+static char *crm_short_options = NULL; -+static const char *crm_app_usage = NULL; -+ - int - crm_exit(int rc) - { -- crm_trace("exit %d", rc); -+ mainloop_cleanup(); -+ - #if HAVE_LIBXML2 -+ crm_trace("cleaning up libxml"); - crm_xml_cleanup(); - #endif -+ -+ crm_trace("exit %d", rc); - qb_log_fini(); -- exit(rc); -- return rc; /* Can never happen, but allows return crm_exit(rc) -- * where "return rc" was used previously -- * - which keeps compilers happy. -- */ -+ -+ free(crm_short_options); -+ free(crm_system_name); -+ -+ exit(ABS(rc)); /* Always exit with a positive value so that it can be passed to crm_error -+ * -+ * Otherwise the system wraps it around and people -+ * have to jump through hoops figuring out what the -+ * error was -+ */ -+ return rc; /* Can never happen, but allows return crm_exit(rc) -+ * where "return rc" was used previously - which -+ * keeps compilers happy. -+ */ - } - - gboolean -@@ -315,9 +334,11 @@ crm_concat(const char *prefix, const char *suffix, char join) - CRM_ASSERT(suffix != NULL); - len = strlen(prefix) + strlen(suffix) + 2; - -- new_str = calloc(1, (len)); -- sprintf(new_str, "%s%c%s", prefix, join, suffix); -- new_str[len - 1] = 0; -+ new_str = malloc(len); -+ if(new_str) { -+ sprintf(new_str, "%s%c%s", prefix, join, suffix); -+ new_str[len - 1] = 0; -+ } - return new_str; - } - -@@ -336,7 +357,7 @@ crm_itoa(int an_int) - int len = 32; - char *buffer = NULL; - -- buffer = calloc(1, (len + 1)); -+ buffer = malloc(len + 1); - if (buffer != NULL) { - snprintf(buffer, len, "%d", an_int); - } -@@ -344,6 +365,29 @@ crm_itoa(int an_int) - return buffer; - } - -+void -+crm_build_path(const char *path_c, mode_t mode) -+{ -+ int offset = 1, len = 0; -+ char *path = strdup(path_c); -+ -+ CRM_CHECK(path != NULL, return); -+ for (len = strlen(path); offset < len; offset++) { -+ if (path[offset] == '/') { -+ path[offset] = 0; -+ if (mkdir(path, mode) < 0 && errno != EEXIST) { -+ crm_perror(LOG_ERR, "Could not create directory '%s'", path); -+ break; -+ } -+ path[offset] = '/'; -+ } -+ } -+ if (mkdir(path, mode) < 0 && errno != EEXIST) { -+ crm_perror(LOG_ERR, "Could not create directory '%s'", path); -+ } -+ free(path); -+} -+ - int - crm_user_lookup(const char *name, uid_t * uid, gid_t * gid) - { -@@ -693,7 +737,7 @@ generate_op_key(const char *rsc_id, const char *op_type, int interval) - - len += strlen(op_type); - len += strlen(rsc_id); -- op_id = calloc(1, len); -+ op_id = malloc(len); - CRM_CHECK(op_id != NULL, return NULL); - sprintf(op_id, "%s_%s_%d", rsc_id, op_type, interval); - return op_id; -@@ -785,7 +829,7 @@ generate_notify_key(const char *rsc_id, const char *notify_type, const char *op_ - len += strlen(op_type); - len += strlen(rsc_id); - len += strlen(notify_type); -- op_id = calloc(1, len); -+ op_id = malloc(len); - if (op_id != NULL) { - sprintf(op_id, "%s_%s_notify_%s_0", rsc_id, notify_type, op_type); - } -@@ -802,7 +846,7 @@ generate_transition_magic_v202(const char *transition_key, int op_status) - - len += strlen(transition_key); - -- fail_state = calloc(1, len); -+ fail_state = malloc(len); - if (fail_state != NULL) { - snprintf(fail_state, len, "%d:%s", op_status, transition_key); - } -@@ -819,7 +863,7 @@ generate_transition_magic(const char *transition_key, int op_status, int op_rc) - - len += strlen(transition_key); - -- fail_state = calloc(1, len); -+ fail_state = malloc(len); - if (fail_state != NULL) { - snprintf(fail_state, len, "%d:%d;%s", op_status, op_rc, transition_key); - } -@@ -864,9 +908,9 @@ generate_transition_key(int transition_id, int action_id, int target_rc, const c - - len += strlen(node); - -- fail_state = calloc(1, len); -+ fail_state = malloc(len); - if (fail_state != NULL) { -- snprintf(fail_state, len, "%d:%d:%d:%s", action_id, transition_id, target_rc, node); -+ snprintf(fail_state, len, "%d:%d:%d:%-*s", action_id, transition_id, target_rc, 36, node); - } - return fail_state; - } -@@ -1025,11 +1069,13 @@ filter_reload_parameters(xmlNode * param_set, const char *restart_string) - name = NULL; - len = strlen(prop_name) + 3; - -- name = calloc(1, len); -- sprintf(name, " %s ", prop_name); -- name[len - 1] = 0; -+ name = malloc(len); -+ if(name) { -+ sprintf(name, " %s ", prop_name); -+ name[len - 1] = 0; -+ match = strstr(restart_string, name); -+ } - -- match = strstr(restart_string, name); - if (match == NULL) { - crm_trace("%s not found in %s", prop_name, restart_string); - xml_remove_prop(param_set, prop_name); -@@ -1075,7 +1121,7 @@ crm_abort(const char *file, const char *function, int line, - default: /* Parent */ - crm_err("%s: Forked child %d to record non-fatal assert at %s:%d : %s", - function, pid, file, line, assert_condition); -- crm_write_blackbox(SIGABRT, NULL); -+ crm_write_blackbox(SIGTRAP, NULL); - - do { - rc = waitpid(pid, &status, 0); -@@ -1099,9 +1145,13 @@ generate_series_filename(const char *directory, const char *series, int sequence - CRM_CHECK(directory != NULL, return NULL); - CRM_CHECK(series != NULL, return NULL); - -+#if !HAVE_BZLIB_H -+ bzip = FALSE; -+#endif -+ - len += strlen(directory); - len += strlen(series); -- filename = calloc(1, len); -+ filename = malloc(len); - CRM_CHECK(filename != NULL, return NULL); - - if (bzip) { -@@ -1127,7 +1177,7 @@ get_last_sequence(const char *directory, const char *series) - - len += strlen(directory); - len += strlen(series); -- series_file = calloc(1, len); -+ series_file = malloc(len); - CRM_CHECK(series_file != NULL, return 0); - sprintf(series_file, "%s/%s.last", directory, series); - -@@ -1193,21 +1243,23 @@ write_last_sequence(const char *directory, const char *series, int sequence, int - - len += strlen(directory); - len += strlen(series); -- series_file = calloc(1, len); -- sprintf(series_file, "%s/%s.last", directory, series); -+ series_file = malloc(len); - -- file_strm = fopen(series_file, "w"); -- if (file_strm == NULL) { -- crm_err("Cannout open series file %s for writing", series_file); -- goto bail; -+ if(series_file) { -+ sprintf(series_file, "%s/%s.last", directory, series); -+ file_strm = fopen(series_file, "w"); - } - -- rc = fprintf(file_strm, "%d", sequence); -- if (rc < 0) { -- crm_perror(LOG_ERR, "Cannot write to series file %s", series_file); -+ if (file_strm != NULL) { -+ rc = fprintf(file_strm, "%d", sequence); -+ if (rc < 0) { -+ crm_perror(LOG_ERR, "Cannot write to series file %s", series_file); -+ } -+ -+ } else { -+ crm_err("Cannout open series file %s for writing", series_file); - } - -- bail: - if (file_strm != NULL) { - fflush(file_strm); - fclose(file_strm); -@@ -1308,15 +1360,22 @@ crm_pidfile_inuse(const char *filename, long mypid) - } - if (read(fd, buf, sizeof(buf)) > 0) { - if (sscanf(buf, "%lu", &pid) > 0) { -+ crm_trace("Got pid %lu from %s\n", pid, filename); - if (pid <= 1) { - /* Invalid pid */ - rc = -ENOENT; -+ unlink(filename); - - } else if (mypid && pid == mypid) { - /* In use by us */ - rc = pcmk_ok; - -- } else if (mypid && pid != mypid && crm_pid_active(pid)) { -+ } else if (crm_pid_active(pid) == FALSE) { -+ /* Contains a stale value */ -+ unlink(filename); -+ rc = -ENOENT; -+ -+ } else if (mypid && pid != mypid) { - /* locked by existing process - give up */ - rc = -EEXIST; - } -@@ -1337,9 +1396,12 @@ crm_lock_pidfile(const char *filename) - mypid = (unsigned long)getpid(); - - rc = crm_pidfile_inuse(filename, 0); -- if (rc != pcmk_ok && rc != -ENOENT) { -+ if (rc == -ENOENT) { -+ /* exists but the process is not active */ -+ -+ } else if (rc != pcmk_ok) { - /* locked by existing process - give up */ -- return -1; -+ return rc; - } - - if ((fd = open(filename, O_CREAT | O_WRONLY | O_EXCL, 0644)) < 0) { -@@ -1354,7 +1416,6 @@ crm_lock_pidfile(const char *filename) - if (rc != LOCKSTRLEN) { - crm_perror(LOG_ERR, "Incomplete write to %s", filename); - return -errno; -- - } - - return crm_pidfile_inuse(filename, mypid); -@@ -1363,6 +1424,7 @@ crm_lock_pidfile(const char *filename) - void - crm_make_daemon(const char *name, gboolean daemonize, const char *pidfile) - { -+ int rc; - long pid; - const char *devnull = "/dev/null"; - -@@ -1370,22 +1432,30 @@ crm_make_daemon(const char *name, gboolean daemonize, const char *pidfile) - return; - } - -+ /* Check before we even try... */ -+ rc = crm_pidfile_inuse(pidfile, 1); -+ if(rc < pcmk_ok && rc != -ENOENT) { -+ pid = crm_read_pidfile(pidfile); -+ crm_err("%s: already running [pid %ld in %s]", name, pid, pidfile); -+ printf("%s: already running [pid %ld in %s]\n", name, pid, pidfile); -+ crm_exit(rc); -+ } -+ - pid = fork(); - if (pid < 0) { - fprintf(stderr, "%s: could not start daemon\n", name); - crm_perror(LOG_ERR, "fork"); -- crm_exit(EX_USAGE); -+ crm_exit(EINVAL); - - } else if (pid > 0) { -- crm_exit(EX_OK); -+ crm_exit(pcmk_ok); - } - -- if (crm_lock_pidfile(pidfile) < 0) { -- pid = crm_read_pidfile(pidfile); -- if (crm_pid_active(pid) > 0) { -- crm_warn("%s: already running [pid %ld] (%s).\n", name, pid, pidfile); -- crm_exit(EX_OK); -- } -+ rc = crm_lock_pidfile(pidfile); -+ if(rc < pcmk_ok) { -+ crm_err("Could not lock '%s' for %s: %s (%d)", pidfile, name, pcmk_strerror(rc), rc); -+ printf("Could not lock '%s' for %s: %s (%d)\n", pidfile, name, pcmk_strerror(rc), rc); -+ crm_exit(rc); - } - - umask(S_IWGRP | S_IWOTH | S_IROTH); -@@ -1527,11 +1597,6 @@ crm_meta_value(GHashTable * hash, const char *field) - return value; - } - --static struct crm_option *crm_long_options = NULL; --static const char *crm_app_description = NULL; --static const char *crm_short_options = NULL; --static const char *crm_app_usage = NULL; -- - static struct option * - crm_create_long_opts(struct crm_option *long_options) - { -@@ -1585,7 +1650,7 @@ crm_set_options(const char *short_options, const char *app_usage, struct crm_opt - const char *app_desc) - { - if (short_options) { -- crm_short_options = short_options; -+ crm_short_options = strdup(short_options); - - } else if (long_options) { - int lpc = 0; -@@ -1742,12 +1807,68 @@ crm_help(char cmd, int exit_code) - } - } - -+void cib_ipc_servers_init(qb_ipcs_service_t **ipcs_ro, -+ qb_ipcs_service_t **ipcs_rw, -+ qb_ipcs_service_t **ipcs_shm, -+ struct qb_ipcs_service_handlers *ro_cb, -+ struct qb_ipcs_service_handlers *rw_cb) -+{ -+ *ipcs_ro = mainloop_add_ipc_server(cib_channel_ro, QB_IPC_NATIVE, ro_cb); -+ *ipcs_rw = mainloop_add_ipc_server(cib_channel_rw, QB_IPC_NATIVE, rw_cb); -+ *ipcs_shm = mainloop_add_ipc_server(cib_channel_shm, QB_IPC_SHM, rw_cb); -+ -+ if (*ipcs_ro == NULL || *ipcs_rw == NULL || *ipcs_shm == NULL) { -+ crm_err("Failed to create cib servers: exiting and inhibiting respawn."); -+ crm_warn("Verify pacemaker and pacemaker_remote are not both enabled."); -+ crm_exit(DAEMON_RESPAWN_STOP); -+ } -+} -+ -+void cib_ipc_servers_destroy(qb_ipcs_service_t *ipcs_ro, -+ qb_ipcs_service_t *ipcs_rw, -+ qb_ipcs_service_t *ipcs_shm) -+{ -+ qb_ipcs_destroy(ipcs_ro); -+ qb_ipcs_destroy(ipcs_rw); -+ qb_ipcs_destroy(ipcs_shm); -+} -+ -+qb_ipcs_service_t * -+crmd_ipc_server_init(struct qb_ipcs_service_handlers *cb) -+{ -+ return mainloop_add_ipc_server(CRM_SYSTEM_CRMD, QB_IPC_NATIVE, cb); -+} -+ -+void -+attrd_ipc_server_init(qb_ipcs_service_t **ipcs, struct qb_ipcs_service_handlers *cb) -+{ -+ *ipcs = mainloop_add_ipc_server(T_ATTRD, QB_IPC_NATIVE, cb); -+ -+ if (*ipcs == NULL) { -+ crm_err("Failed to create attrd servers: exiting and inhibiting respawn."); -+ crm_warn("Verify pacemaker and pacemaker_remote are not both enabled."); -+ crm_exit(DAEMON_RESPAWN_STOP); -+ } -+} -+ -+void -+stonith_ipc_server_init(qb_ipcs_service_t **ipcs, struct qb_ipcs_service_handlers *cb) -+{ -+ *ipcs = mainloop_add_ipc_server("stonith-ng", QB_IPC_NATIVE, cb); -+ -+ if (*ipcs == NULL) { -+ crm_err("Failed to create stonith-ng servers: exiting and inhibiting respawn."); -+ crm_warn("Verify pacemaker and pacemaker_remote are not both enabled."); -+ crm_exit(DAEMON_RESPAWN_STOP); -+ } -+} -+ - int - attrd_update_delegate(crm_ipc_t * ipc, char command, const char *host, const char *name, - const char *value, const char *section, const char *set, const char *dampen, - const char *user_name) - { -- int rc = 0; -+ int rc = -ENOTCONN; - int max = 5; - enum crm_ipc_flags flags = crm_ipc_client_none; - xmlNode *update = create_xml_node(NULL, __FUNCTION__); -@@ -1822,7 +1943,7 @@ attrd_update_delegate(crm_ipc_t * ipc, char command, const char *host, const cha - } else if (rc > 0) { - break; - -- } else if (rc == -EAGAIN || rc == -EREMOTEIO) { -+ } else if (rc == -EAGAIN || rc == -EALREADY) { - sleep(5 - max); - max--; - -@@ -1837,6 +1958,8 @@ attrd_update_delegate(crm_ipc_t * ipc, char command, const char *host, const cha - free_xml(update); - if (rc > 0) { - crm_debug("Sent update: %s=%s for %s", name, value, host ? host : "localhost"); -+ rc = pcmk_ok; -+ - } else { - crm_debug("Could not send update %s=%s for %s: %s (%d)", name, value, - host ? host : "localhost", pcmk_strerror(rc), rc); -@@ -2032,11 +2155,21 @@ create_operation_update(xmlNode * parent, lrmd_event_data_t * op, const char *ca - op->t_run, op->t_rcchange, op->exec_time, op->queue_time); - - if (op->interval == 0) { -- crm_xml_add_int(xml_op, "last-run", op->t_run); -+ /* The values are the same for non-recurring ops */ -+ crm_xml_add_int(xml_op, XML_RSC_OP_LAST_RUN, op->t_run); -+ crm_xml_add_int(xml_op, XML_RSC_OP_LAST_CHANGE, op->t_run); -+ -+ } else if(op->t_rcchange) { -+ /* last-run is not accurate for recurring ops */ -+ crm_xml_add_int(xml_op, XML_RSC_OP_LAST_CHANGE, op->t_rcchange); -+ -+ } else { -+ /* ...but is better than nothing otherwise */ -+ crm_xml_add_int(xml_op, XML_RSC_OP_LAST_CHANGE, op->t_run); - } -- crm_xml_add_int(xml_op, "last-rc-change", op->t_rcchange); -- crm_xml_add_int(xml_op, "exec-time", op->exec_time); -- crm_xml_add_int(xml_op, "queue-time", op->queue_time); -+ -+ crm_xml_add_int(xml_op, XML_RSC_OP_T_EXEC, op->exec_time); -+ crm_xml_add_int(xml_op, XML_RSC_OP_T_QUEUE, op->queue_time); - } - } - -@@ -2081,7 +2214,7 @@ uid2username(uid_t uid) - } - - void --determine_request_user(char *user, xmlNode * request, const char *field) -+determine_request_user(const char *user, xmlNode * request, const char *field) - { - /* Get our internal validation out of the way first */ - CRM_CHECK(user != NULL && request != NULL && field != NULL, return); -@@ -2140,7 +2273,7 @@ find_library_function(void **handle, const char *lib, const char *fn, gboolean f - if (!(*handle)) { - crm_err("%sCould not open %s: %s", fatal ? "Fatal: " : "", lib, dlerror()); - if (fatal) { -- crm_exit(100); -+ crm_exit(DAEMON_RESPAWN_STOP); - } - return NULL; - } -@@ -2149,7 +2282,7 @@ find_library_function(void **handle, const char *lib, const char *fn, gboolean f - if ((error = dlerror()) != NULL) { - crm_err("%sCould not find %s in %s: %s", fatal ? "Fatal: " : "", fn, lib, error); - if (fatal) { -- crm_exit(100); -+ crm_exit(DAEMON_RESPAWN_STOP); - } - } - -@@ -2202,18 +2335,27 @@ crm_generate_uuid(void) - char * - crm_md5sum(const char *buffer) - { -- int lpc = 0; -+ int lpc = 0, len = 0; - char *digest = NULL; - unsigned char raw_digest[MD5_DIGEST_SIZE]; - -- crm_trace("Beginning digest"); -+ if(buffer != NULL) { -+ len = strlen(buffer); -+ } -+ -+ crm_trace("Beginning digest of %d bytes", len); - digest = malloc(2 * MD5_DIGEST_SIZE + 1); -- md5_buffer(buffer, strlen(buffer), raw_digest); -- for (lpc = 0; lpc < MD5_DIGEST_SIZE; lpc++) { -- sprintf(digest + (2 * lpc), "%02x", raw_digest[lpc]); -+ if(digest) { -+ md5_buffer(buffer, len, raw_digest); -+ for (lpc = 0; lpc < MD5_DIGEST_SIZE; lpc++) { -+ sprintf(digest + (2 * lpc), "%02x", raw_digest[lpc]); -+ } -+ digest[(2 * MD5_DIGEST_SIZE)] = 0; -+ crm_trace("Digest %s.", digest); -+ -+ } else { -+ crm_err("Could not create digest"); - } -- digest[(2 * MD5_DIGEST_SIZE)] = 0; -- crm_trace("Digest %s\n", digest); - return digest; - } - -@@ -2233,7 +2375,10 @@ crm_compress_string(const char *data, int length, int max, char **result, unsign - max = (length * 1.1) + 600; /* recomended size */ - } - -+#ifdef CLOCK_MONOTONIC - clock_gettime(CLOCK_MONOTONIC, &before_t); -+#endif -+ - /* coverity[returned_null] Ignore */ - compressed = malloc(max); - -@@ -2249,7 +2394,10 @@ crm_compress_string(const char *data, int length, int max, char **result, unsign - return FALSE; - } - -+#ifdef CLOCK_MONOTONIC - clock_gettime(CLOCK_MONOTONIC, &after_t); -+#endif -+ - crm_info("Compressed %d bytes into %d (ratio %d:1) in %dms", - length, *result_len, length / (*result_len), - (after_t.tv_sec - before_t.tv_sec) * 1000 + (after_t.tv_nsec - -diff --git a/lib/common/xml.c b/lib/common/xml.c -index 9832321..aea1ed5 100644 ---- a/lib/common/xml.c -+++ b/lib/common/xml.c -@@ -89,15 +89,6 @@ typedef struct { - } filter_t; - - /* *INDENT-OFF* */ --enum xml_log_options --{ -- xml_log_option_filtered = 0x001, -- xml_log_option_formatted = 0x002, -- xml_log_option_diff_plus = 0x010, -- xml_log_option_diff_minus = 0x020, -- xml_log_option_diff_short = 0x040, -- xml_log_option_diff_all = 0x100, --}; - - struct schema_s known_schemas[] = { - /* 0 */ { 0, NULL, NULL, NULL, 1 }, -@@ -125,8 +116,10 @@ static int max_schemas = DIMOF(known_schemas) - 2; /* skip back past 'none' - #define CHUNK_SIZE 1024 - - #define buffer_print(buffer, max, offset, fmt, args...) do { \ -- int rc; \ -- rc = snprintf((buffer) + (offset), (max) - (offset), fmt, ##args); \ -+ int rc = (max); \ -+ if(buffer) { \ -+ rc = snprintf((buffer) + (offset), (max) - (offset), fmt, ##args); \ -+ } \ - if(rc < 0) { \ - crm_perror(LOG_ERR, "snprintf failed"); \ - (buffer)[(offset)] = 0; \ -@@ -146,7 +139,7 @@ insert_prefix(int options, char **buffer, int *offset, int *max, int depth) - if (options & xml_log_option_formatted) { - size_t spaces = 2 * depth; - -- if (spaces >= ((*max) - (*offset))) { -+ if ((*buffer) == NULL || spaces >= ((*max) - (*offset))) { - (*max) = QB_MAX(CHUNK_SIZE, (*max) * 2); - (*buffer) = realloc((*buffer), (*max) + 1); - } -@@ -177,7 +170,6 @@ gboolean can_prune_leaf(xmlNode * xml_node); - void diff_filter_context(int context, int upper_bound, int lower_bound, - xmlNode * xml_node, xmlNode * parent); - int in_upper_context(int depth, int context, xmlNode * xml_node); --int write_file(const char *string, const char *filename); - int add_xml_object(xmlNode * parent, xmlNode * target, xmlNode * update, gboolean as_diff); - - static inline const char * -@@ -516,44 +508,6 @@ static void - crm_xml_err(void *ctx, const char *msg, ...) - G_GNUC_PRINTF(2, 3); - --int --write_file(const char *string, const char *filename) --{ -- int rc = 0; -- FILE *file_output_strm = NULL; -- -- CRM_CHECK(filename != NULL, return -1); -- -- if (string == NULL) { -- crm_err("Cannot write NULL to %s", filename); -- return -1; -- } -- -- file_output_strm = fopen(filename, "w"); -- if (file_output_strm == NULL) { -- crm_perror(LOG_ERR, "Cannot open %s for writing", filename); -- return -1; -- } -- -- rc = fprintf(file_output_strm, "%s", string); -- if (rc < 0) { -- crm_perror(LOG_ERR, "Cannot write output to %s", filename); -- } -- -- if (fflush(file_output_strm) != 0) { -- crm_perror(LOG_ERR, "fflush for %s failed:", filename); -- rc = -1; -- } -- -- if (fsync(fileno(file_output_strm)) < 0) { -- crm_perror(LOG_ERR, "fsync for %s failed:", filename); -- rc = -1; -- } -- -- fclose(file_output_strm); -- return rc; --} -- - static void - crm_xml_err(void *ctx, const char *msg, ...) - { -@@ -644,9 +598,13 @@ string2xml(const char *input) - - } else { - int len = strlen(input); -+ int lpc = 0; -+ -+ while(lpc < len) { -+ crm_warn("Parse error[+%.3d]: %.80s", lpc, input+lpc); -+ lpc += 80; -+ } - -- crm_warn("String start: %.50s", input); -- crm_warn("String start+%d: %s", len - 50, input + len - 50); - crm_abort(__FILE__, __PRETTY_FUNCTION__, __LINE__, "String parsing error", TRUE, TRUE); - } - } -@@ -836,9 +794,7 @@ static int - write_xml_stream(xmlNode * xml_node, const char *filename, FILE * stream, gboolean compress) - { - int res = 0; -- time_t now; - char *buffer = NULL; -- char *now_str = NULL; - unsigned int out = 0; - static mode_t cib_mode = S_IRUSR | S_IWUSR; - -@@ -851,15 +807,20 @@ write_xml_stream(xmlNode * xml_node, const char *filename, FILE * stream, gboole - return -1; - } - -- /* establish the correct permissions */ -- fchmod(fileno(stream), cib_mode); - - crm_log_xml_trace(xml_node, "Writing out"); - -- now = time(NULL); -- now_str = ctime(&now); -- now_str[24] = EOS; /* replace the newline */ -- crm_xml_add(xml_node, XML_CIB_ATTR_WRITTEN, now_str); -+ if(strstr(filename, "cib") != NULL) { -+ /* Only CIB's need this field written */ -+ time_t now = time(NULL); -+ char *now_str = ctime(&now); -+ -+ now_str[24] = EOS; /* replace the newline */ -+ crm_xml_add(xml_node, XML_CIB_ATTR_WRITTEN, now_str); -+ -+ /* establish the correct permissions */ -+ fchmod(fileno(stream), cib_mode); -+ } - - buffer = dump_xml_formatted(xml_node); - CRM_CHECK(buffer != NULL && strlen(buffer) > 0, crm_log_xml_warn(xml_node, "dump:failed"); -@@ -936,7 +897,9 @@ write_xml_fd(xmlNode * xml_node, const char *filename, int fd, gboolean compress - int - write_xml_file(xmlNode * xml_node, const char *filename, gboolean compress) - { -- FILE *stream = fopen(filename, "w"); -+ FILE *stream = NULL; -+ -+ stream = fopen(filename, "w"); - - return write_xml_stream(xml_node, filename, stream, compress); - } -@@ -1000,6 +963,8 @@ crm_xml_escape(const char *text) - - for (index = 0; index < length; index++) { - switch (copy[index]) { -+ case 0: -+ break; - case '<': - copy = crm_xml_escape_shuffle(copy, index, &length, "<"); - changes++; -@@ -1020,6 +985,35 @@ crm_xml_escape(const char *text) - copy = crm_xml_escape_shuffle(copy, index, &length, "&"); - changes++; - break; -+ case '\t': -+ /* Might as well just expand to a few spaces... */ -+ copy = crm_xml_escape_shuffle(copy, index, &length, " "); -+ changes++; -+ break; -+ case '\n': -+ /* crm_trace("Convert: \\%.3o", copy[index]); */ -+ copy = crm_xml_escape_shuffle(copy, index, &length, "\\n"); -+ changes++; -+ break; -+ case '\r': -+ copy = crm_xml_escape_shuffle(copy, index, &length, "\\r"); -+ changes++; -+ break; -+ /* For debugging... -+ case '\\': -+ crm_trace("Passthrough: \\%c", copy[index+1]); -+ break; -+ */ -+ default: -+ /* Check for and replace non-printing characters with their octal equivalent */ -+ if(copy[index] < ' ' || copy[index] > '~') { -+ char *replace = g_strdup_printf("\\%.3o", copy[index]); -+ -+ /* crm_trace("Convert to octal: \\%.3o", copy[index]); */ -+ copy = crm_xml_escape_shuffle(copy, index, &length, replace); -+ free(replace); -+ changes++; -+ } - } - } - -@@ -1033,12 +1027,13 @@ static inline void - dump_xml_attr(xmlAttrPtr attr, int options, char **buffer, int *offset, int *max) - { - char *p_value = NULL; -- const char *p_name = (const char *)attr->name; -+ const char *p_name = NULL; - - if (attr == NULL || attr->children == NULL) { - return; - } - -+ p_name = (const char *)attr->name; - p_value = crm_xml_escape((const char *)attr->children->content); - buffer_print(*buffer, *max, *offset, " %s=\"%s\"", p_name, p_value); - free(p_value); -@@ -1065,8 +1060,8 @@ log_data_element(int log_level, const char *file, const char *function, int line - - /* Since we use the same file and line, to avoid confusing libqb, we need to use the same format strings */ - if (data == NULL) { -- do_crm_log_alias(log_level, file, function, line, "%s%s", prefix, -- ": No data to dump as XML"); -+ do_crm_log_alias(log_level, file, function, line, "%s: %s", prefix, -+ "No data to dump as XML"); - return; - - } else if (is_set(options, xml_log_option_diff_short) -@@ -1098,7 +1093,14 @@ log_data_element(int log_level, const char *file, const char *function, int line - } - - insert_prefix(options, &buffer, &offset, &max, depth); -- buffer_print(buffer, max, offset, "<%s", name); -+ if(data->type == XML_COMMENT_NODE) { -+ buffer_print(buffer, max, offset, ""); -+ -+ } else { -+ buffer_print(buffer, max, offset, "<%s", name); -+ } - - hidden = crm_element_value(data, "hidden"); - for (pIter = crm_first_attr(data); pIter != NULL; pIter = pIter->next) { -@@ -1128,9 +1130,9 @@ log_data_element(int log_level, const char *file, const char *function, int line - buffer_print(buffer, max, offset, "/>"); - } - -- do_crm_log_alias(log_level, file, function, line, "%s%s", prefix, buffer); -+ do_crm_log_alias(log_level, file, function, line, "%s %s", prefix, buffer); - -- if (data->children) { -+ if (data->children && data->type != XML_COMMENT_NODE) { - offset = 0; - max = 0; - free(buffer); -@@ -1143,7 +1145,7 @@ log_data_element(int log_level, const char *file, const char *function, int line - insert_prefix(options, &buffer, &offset, &max, depth); - buffer_print(buffer, max, offset, "", name); - -- do_crm_log_alias(log_level, file, function, line, "%s%s", prefix, buffer); -+ do_crm_log_alias(log_level, file, function, line, "%s %s", prefix, buffer); - } - - free(prefix_m); -@@ -1180,7 +1182,10 @@ dump_filtered_xml(xmlNode * data, int options, char **buffer, int *offset, int * - } - - static void --dump_xml(xmlNode * data, int options, char **buffer, int *offset, int *max, int depth) -+dump_xml(xmlNode * data, int options, char **buffer, int *offset, int *max, int depth); -+ -+static void -+dump_xml_element(xmlNode * data, int options, char **buffer, int *offset, int *max, int depth) - { - const char *name = NULL; - -@@ -1208,13 +1213,73 @@ dump_xml(xmlNode * data, int options, char **buffer, int *offset, int *max, int - dump_filtered_xml(data, options, buffer, offset, max); - - } else { --#if 1 - xmlAttrPtr xIter = NULL; - - for (xIter = crm_first_attr(data); xIter != NULL; xIter = xIter->next) { - dump_xml_attr(xIter, options, buffer, offset, max); - } --#else -+ } -+ -+ if (data->children == NULL) { -+ buffer_print(*buffer, *max, *offset, "/>"); -+ -+ } else { -+ buffer_print(*buffer, *max, *offset, ">"); -+ } -+ -+ if (options & xml_log_option_formatted) { -+ buffer_print(*buffer, *max, *offset, "\n"); -+ } -+ -+ if (data->children) { -+ xmlNode *xChild = NULL; -+ -+ for (xChild = __xml_first_child(data); xChild != NULL; xChild = __xml_next(xChild)) { -+ dump_xml(xChild, options, buffer, offset, max, depth + 1); -+ } -+ -+ insert_prefix(options, buffer, offset, max, depth); -+ buffer_print(*buffer, *max, *offset, "", name); -+ -+ if (options & xml_log_option_formatted) { -+ buffer_print(*buffer, *max, *offset, "\n"); -+ } -+ } -+} -+ -+static void -+dump_xml_comment(xmlNode * data, int options, char **buffer, int *offset, int *max, int depth) -+{ -+ CRM_ASSERT(max != NULL); -+ CRM_ASSERT(offset != NULL); -+ CRM_ASSERT(buffer != NULL); -+ -+ if (data == NULL) { -+ crm_trace("Nothing to dump"); -+ return; -+ } -+ -+ if (*buffer == NULL) { -+ *offset = 0; -+ *max = 0; -+ } -+ -+ insert_prefix(options, buffer, offset, max, depth); -+ -+ buffer_print(*buffer, *max, *offset, ""); -+ -+ if (options & xml_log_option_formatted) { -+ buffer_print(*buffer, *max, *offset, "\n"); -+ } -+} -+ -+static void -+dump_xml(xmlNode * data, int options, char **buffer, int *offset, int *max, int depth) -+{ -+#if 0 -+ if (is_not_set(options, xml_log_option_filtered)) { - /* Turning this code on also changes the PE tests for some reason - * (not just newlines). Figure out why before considering to - * enable this permanently. -@@ -1263,34 +1328,46 @@ dump_xml(xmlNode * data, int options, char **buffer, int *offset, int *max, int - - xmlBufferFree(xml_buffer); - return; --#endif -- } -- -- if (data->children == NULL) { -- buffer_print(*buffer, *max, *offset, "/>"); -- -- } else { -- buffer_print(*buffer, *max, *offset, ">"); - } -+#endif - -- if (options & xml_log_option_formatted) { -- buffer_print(*buffer, *max, *offset, "\n"); -+ switch(data->type) { -+ case XML_ELEMENT_NODE: -+ /* Handle below */ -+ dump_xml_element(data, options, buffer, offset, max, depth); -+ break; -+ case XML_TEXT_NODE: -+ /* Ignore */ -+ return; -+ case XML_COMMENT_NODE: -+ dump_xml_comment(data, options, buffer, offset, max, depth); -+ break; -+ default: -+ crm_warn("Unhandled type: %d", data->type); -+ return; -+ -+ /* -+ XML_ATTRIBUTE_NODE = 2 -+ XML_CDATA_SECTION_NODE = 4 -+ XML_ENTITY_REF_NODE = 5 -+ XML_ENTITY_NODE = 6 -+ XML_PI_NODE = 7 -+ XML_DOCUMENT_NODE = 9 -+ XML_DOCUMENT_TYPE_NODE = 10 -+ XML_DOCUMENT_FRAG_NODE = 11 -+ XML_NOTATION_NODE = 12 -+ XML_HTML_DOCUMENT_NODE = 13 -+ XML_DTD_NODE = 14 -+ XML_ELEMENT_DECL = 15 -+ XML_ATTRIBUTE_DECL = 16 -+ XML_ENTITY_DECL = 17 -+ XML_NAMESPACE_DECL = 18 -+ XML_XINCLUDE_START = 19 -+ XML_XINCLUDE_END = 20 -+ XML_DOCB_DOCUMENT_NODE = 21 -+ */ - } - -- if (data->children) { -- xmlNode *xChild = NULL; -- -- for (xChild = __xml_first_child(data); xChild != NULL; xChild = __xml_next(xChild)) { -- dump_xml(xChild, options, buffer, offset, max, depth + 1); -- } -- -- insert_prefix(options, buffer, offset, max, depth); -- buffer_print(*buffer, *max, *offset, "", name); -- -- if (options & xml_log_option_formatted) { -- buffer_print(*buffer, *max, *offset, "\n"); -- } -- } - } - - static void -@@ -1446,9 +1523,6 @@ static void - save_xml_to_file(xmlNode * xml, const char *desc, const char *filename) - { - char *f = NULL; -- FILE *st = NULL; -- xmlDoc *doc = getDocPtr(xml); -- xmlBuffer *xml_buffer = xmlBufferCreate(); - - if (filename == NULL) { - char *uuid = crm_generate_uuid(); -@@ -1459,17 +1533,7 @@ save_xml_to_file(xmlNode * xml, const char *desc, const char *filename) - } - - crm_info("Saving %s to %s", desc, filename); -- xmlNodeDump(xml_buffer, doc, xml, 0, FALSE); -- -- st = fopen(filename, "w"); -- if (st) { -- fprintf(st, "%s", xml_buffer->content); -- /* fflush(st); */ -- /* fsync(fileno(st)); */ -- fclose(st); -- } -- -- xmlBufferFree(xml_buffer); -+ write_xml_file(xml, filename, FALSE); - g_free(f); - } - -@@ -2293,7 +2357,7 @@ calculate_xml_digest_v2(xmlNode * source, gboolean do_filter) - - static struct qb_log_callsite *digest_cs = NULL; - -- crm_trace("Begin digest"); -+ crm_trace("Begin digest %s", do_filter?"filtered":""); - if (do_filter && BEST_EFFORT_STATUS) { - /* Exclude the status calculation from the digest - * -@@ -2385,34 +2449,33 @@ validate_with_dtd(xmlDocPtr doc, gboolean to_logs, const char *dtd_file) - CRM_CHECK(dtd_file != NULL, return FALSE); - - dtd = xmlParseDTD(NULL, (const xmlChar *)dtd_file); -- CRM_CHECK(dtd != NULL, crm_err("Could not find/parse %s", dtd_file); -- goto cleanup); -- -- cvp = xmlNewValidCtxt(); -- CRM_CHECK(cvp != NULL, goto cleanup); -- -- if (to_logs) { -- cvp->userData = (void *)LOG_ERR; -- cvp->error = (xmlValidityErrorFunc) xml_log; -- cvp->warning = (xmlValidityWarningFunc) xml_log; -- } else { -- cvp->userData = (void *)stderr; -- cvp->error = (xmlValidityErrorFunc) fprintf; -- cvp->warning = (xmlValidityWarningFunc) fprintf; -+ if(dtd == NULL) { -+ crm_err("Could not locate/parse DTD: %s", dtd_file); -+ return TRUE; - } - -- if (!xmlValidateDtd(cvp, doc, dtd)) { -- valid = FALSE; -- } -+ cvp = xmlNewValidCtxt(); -+ if(cvp) { -+ if (to_logs) { -+ cvp->userData = (void *)LOG_ERR; -+ cvp->error = (xmlValidityErrorFunc) xml_log; -+ cvp->warning = (xmlValidityWarningFunc) xml_log; -+ } else { -+ cvp->userData = (void *)stderr; -+ cvp->error = (xmlValidityErrorFunc) fprintf; -+ cvp->warning = (xmlValidityWarningFunc) fprintf; -+ } - -- cleanup: -- if (cvp) { -+ if (!xmlValidateDtd(cvp, doc, dtd)) { -+ valid = FALSE; -+ } - xmlFreeValidCtxt(cvp); -- } -- if (dtd) { -- xmlFreeDtd(dtd); -+ -+ } else { -+ crm_err("Internal error: No valid context"); - } - -+ xmlFreeDtd(dtd); - return valid; - } - -@@ -2546,6 +2609,22 @@ validate_with_relaxng(xmlDocPtr doc, gboolean to_logs, const char *relaxng_file, - } - - void -+crm_xml_init(void) -+{ -+ static bool init = TRUE; -+ -+ if(init) { -+ init = FALSE; -+ /* The default allocator XML_BUFFER_ALLOC_EXACT does far too many -+ * realloc()s and it can take upwards of 18 seconds (yes, seconds) -+ * to dump a 28kb tree which XML_BUFFER_ALLOC_DOUBLEIT can do in -+ * less than 1 second. -+ */ -+ xmlSetBufferAllocationScheme(XML_BUFFER_ALLOC_DOUBLEIT); -+ } -+} -+ -+void - crm_xml_cleanup(void) - { - int lpc = 0; -@@ -2873,42 +2952,69 @@ update_validation(xmlNode ** xml_blob, int *best, gboolean transform, gboolean t - return rc; - } - -+/* -+ * From xpath2.c -+ * -+ * All the elements returned by an XPath query are pointers to -+ * elements from the tree *except* namespace nodes where the XPath -+ * semantic is different from the implementation in libxml2 tree. -+ * As a result when a returned node set is freed when -+ * xmlXPathFreeObject() is called, that routine must check the -+ * element type. But node from the returned set may have been removed -+ * by xmlNodeSetContent() resulting in access to freed data. -+ * -+ * This can be exercised by running -+ * valgrind xpath2 test3.xml '//discarded' discarded -+ * -+ * There is 2 ways around it: -+ * - make a copy of the pointers to the nodes from the result set -+ * then call xmlXPathFreeObject() and then modify the nodes -+ * or -+ * - remove the references from the node set, if they are not -+ namespace nodes, before calling xmlXPathFreeObject(). -+ */ -+void -+freeXpathObject(xmlXPathObjectPtr xpathObj) -+{ -+ int lpc, max = numXpathResults(xpathObj); -+ -+ if(xpathObj == NULL) { -+ return; -+ } -+ -+ for(lpc = 0; lpc < max; lpc++) { -+ if (xpathObj->nodesetval->nodeTab[lpc] && xpathObj->nodesetval->nodeTab[lpc]->type != XML_NAMESPACE_DECL) { -+ xpathObj->nodesetval->nodeTab[lpc] = NULL; -+ } -+ } -+ -+ /* _Now_ its safe to free it */ -+ xmlXPathFreeObject(xpathObj); -+} -+ - xmlNode * - getXpathResult(xmlXPathObjectPtr xpathObj, int index) - { - xmlNode *match = NULL; -+ int max = numXpathResults(xpathObj); - - CRM_CHECK(index >= 0, return NULL); - CRM_CHECK(xpathObj != NULL, return NULL); - -- if (index >= xpathObj->nodesetval->nodeNr) { -- crm_err("Requested index %d of only %d items", index, xpathObj->nodesetval->nodeNr); -+ if (index >= max) { -+ crm_err("Requested index %d of only %d items", index, max); -+ return NULL; -+ -+ } else if(xpathObj->nodesetval->nodeTab[index] == NULL) { -+ /* Previously requested */ - return NULL; - } - - match = xpathObj->nodesetval->nodeTab[index]; - CRM_CHECK(match != NULL, return NULL); - -- /* -- * From xpath2.c -- * -- * All the elements returned by an XPath query are pointers to -- * elements from the tree *except* namespace nodes where the XPath -- * semantic is different from the implementation in libxml2 tree. -- * As a result when a returned node set is freed when -- * xmlXPathFreeObject() is called, that routine must check the -- * element type. But node from the returned set may have been removed -- * by xmlNodeSetContent() resulting in access to freed data. -- * This can be exercised by running -- * valgrind xpath2 test3.xml '//discarded' discarded -- * There is 2 ways around it: -- * - make a copy of the pointers to the nodes from the result set -- * then call xmlXPathFreeObject() and then modify the nodes -- * or -- * - remove the reference to the modified nodes from the node set -- * as they are processed, if they are not namespace nodes. -- */ - if (xpathObj->nodesetval->nodeTab[index]->type != XML_NAMESPACE_DECL) { -+ /* See the comment for freeXpathObject() */ - xpathObj->nodesetval->nodeTab[index] = NULL; - } - -@@ -3090,6 +3196,7 @@ get_xpath_object_relative(const char *xpath, xmlNode * xml_obj, int error_level) - xmlNode * - get_xpath_object(const char *xpath, xmlNode * xml_obj, int error_level) - { -+ int max; - xmlNode *result = NULL; - xmlXPathObjectPtr xpathObj = NULL; - char *nodePath = NULL; -@@ -3101,12 +3208,14 @@ get_xpath_object(const char *xpath, xmlNode * xml_obj, int error_level) - - xpathObj = xpath_search(xml_obj, xpath); - nodePath = (char *)xmlGetNodePath(xml_obj); -- if (xpathObj == NULL || xpathObj->nodesetval == NULL || xpathObj->nodesetval->nodeNr < 1) { -+ max = numXpathResults(xpathObj); -+ -+ if (max < 1) { - do_crm_log(error_level, "No match for %s in %s", xpath, crm_str(nodePath)); - crm_log_xml_explicit(xml_obj, "Unexpected Input"); - -- } else if (xpathObj->nodesetval->nodeNr > 1) { -- int lpc = 0, max = xpathObj->nodesetval->nodeNr; -+ } else if (max > 1) { -+ int lpc = 0; - - do_crm_log(error_level, "Too many matches for %s in %s", xpath, crm_str(nodePath)); - -@@ -3125,9 +3234,7 @@ get_xpath_object(const char *xpath, xmlNode * xml_obj, int error_level) - result = getXpathResult(xpathObj, 0); - } - -- if (xpathObj) { -- xmlXPathFreeObject(xpathObj); -- } -+ freeXpathObject(xpathObj); - free(nodePath); - - return result; -diff --git a/lib/fencing/Makefile.am b/lib/fencing/Makefile.am -index 30215b8..3297961 100644 ---- a/lib/fencing/Makefile.am -+++ b/lib/fencing/Makefile.am -@@ -25,7 +25,7 @@ INCLUDES = -I$(top_builddir)/include -I$(top_srcdir)/include \ - lib_LTLIBRARIES = libstonithd.la - - libstonithd_la_SOURCES = st_client.c --libstonithd_la_LDFLAGS = -version-info 2:1:0 -+libstonithd_la_LDFLAGS = -version-info 3:0:1 - libstonithd_la_LIBADD = $(top_builddir)/lib/common/libcrmcommon.la - - AM_CFLAGS = $(INCLUDES) -diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c -index d464708..c87f2d5 100644 ---- a/lib/fencing/st_client.c -+++ b/lib/fencing/st_client.c -@@ -604,6 +604,9 @@ read_output(int fd) - more = read(fd, buffer, READ_MAX - 1); - - if (more > 0) { -+ buffer[more] = 0; /* Make sure its nul-terminated for logging -+ * 'more' is always less than our buffer size -+ */ - crm_trace("Got %d more bytes: %.200s...", more, buffer); - output = realloc(output, len + more + 1); - snprintf(output + len, more + 1, "%s", buffer); -@@ -635,10 +638,9 @@ update_remaining_timeout(stonith_action_t * action) - } - - static void --stonith_action_async_done(GPid pid, gint status, gpointer user_data) -+stonith_action_async_done(mainloop_child_t * p, pid_t pid, int core, int signo, int exitcode) - { -- int rc = -pcmk_err_generic; -- stonith_action_t *action = user_data; -+ stonith_action_t *action = mainloop_child_userdata(p); - - if (action->timer_sigterm > 0) { - g_source_remove(action->timer_sigterm); -@@ -648,26 +650,25 @@ stonith_action_async_done(GPid pid, gint status, gpointer user_data) - } - - if (action->last_timeout_signo) { -- rc = -ETIME; -+ action->rc = -ETIME; - crm_notice("Child process %d performing action '%s' timed out with signal %d", - pid, action->action, action->last_timeout_signo); -- } else if (WIFSIGNALED(status)) { -- int signo = WTERMSIG(status); - -- rc = -ECONNABORTED; -+ } else if (signo) { -+ action->rc = -ECONNABORTED; - crm_notice("Child process %d performing action '%s' timed out with signal %d", - pid, action->action, signo); -- } else if (WIFEXITED(status)) { -- rc = WEXITSTATUS(status); -+ -+ } else { -+ action->rc = exitcode; - crm_debug("Child process %d performing action '%s' exited with rc %d", -- pid, action->action, rc); -+ pid, action->action, exitcode); - } - -- action->rc = rc; - action->output = read_output(action->fd_stdout); - - if (action->rc != pcmk_ok && update_remaining_timeout(action)) { -- rc = internal_stonith_action_execute(action); -+ int rc = internal_stonith_action_execute(action); - if (rc == pcmk_ok) { - return; - } -@@ -778,17 +779,17 @@ internal_stonith_action_execute(stonith_action_t * action) - if (total != len) { - crm_perror(LOG_ERR, "Sent %d not %d bytes", total, len); - if (ret >= 0) { -- rc = -EREMOTEIO; -+ rc = -ECOMM; - } - goto fail; - } - -- close(p_write_fd); -+ close(p_write_fd); p_write_fd = -1; - - /* async */ - if (action->async) { - action->fd_stdout = p_read_fd; -- g_child_watch_add(pid, stonith_action_async_done, action); -+ mainloop_child_add(pid, 0/* Move the timeout here? */, action->action, action, stonith_action_async_done); - crm_trace("Op: %s on %s, pid: %d, timeout: %ds", action->action, action->agent, pid, - action->remaining_timeout); - action->last_timeout_signo = 0; -@@ -821,12 +822,18 @@ internal_stonith_action_execute(stonith_action_t * action) - } - - if (timeout == 0) { -- int killrc = kill(pid, 9 /*SIGKILL*/); -+ int killrc = kill(pid, SIGKILL); - - if (killrc && errno != ESRCH) { - crm_err("kill(%d, KILL) failed: %s (%d)", pid, pcmk_strerror(errno), errno); - } -- p = waitpid(pid, &status, WNOHANG); -+ /* -+ * From sigprocmask(2): -+ * It is not possible to block SIGKILL or SIGSTOP. Attempts to do so are silently ignored. -+ * -+ * This makes it safe to skip WNOHANG here -+ */ -+ p = waitpid(pid, &status, 0); - } - - if (p <= 0) { -@@ -1077,13 +1084,15 @@ stonith_api_device_metadata(stonith_t * stonith, int call_options, const char *a - xmlXPathObject *xpathObj = NULL; - - xpathObj = xpath_search(xml, "//actions"); -- if (xpathObj && xpathObj->nodesetval->nodeNr > 0) { -+ if (numXpathResults(xpathObj) > 0) { - actions = getXpathResult(xpathObj, 0); - } - -+ freeXpathObject(xpathObj); -+ - /* Now fudge the metadata so that the start/stop actions appear */ - xpathObj = xpath_search(xml, "//action[@name='stop']"); -- if (xpathObj == NULL || xpathObj->nodesetval->nodeNr <= 0) { -+ if (numXpathResults(xpathObj) <= 0) { - xmlNode *tmp = NULL; - - tmp = create_xml_node(actions, "action"); -@@ -1095,15 +1104,18 @@ stonith_api_device_metadata(stonith_t * stonith, int call_options, const char *a - crm_xml_add(tmp, "timeout", "20s"); - } - -+ freeXpathObject(xpathObj); -+ - /* Now fudge the metadata so that the port isn't required in the configuration */ - xpathObj = xpath_search(xml, "//parameter[@name='port']"); -- if (xpathObj && xpathObj->nodesetval->nodeNr > 0) { -+ if (numXpathResults(xpathObj) > 0) { - /* We'll fill this in */ - xmlNode *tmp = getXpathResult(xpathObj, 0); - - crm_xml_add(tmp, "required", "0"); - } - -+ freeXpathObject(xpathObj); - free(buffer); - buffer = dump_xml_formatted(xml); - free_xml(xml); -@@ -1232,7 +1244,7 @@ stonith_api_query(stonith_t * stonith, int call_options, const char *target, - - xpathObj = xpath_search(output, "//@agent"); - if (xpathObj) { -- max = xpathObj->nodesetval->nodeNr; -+ max = numXpathResults(xpathObj); - - for (lpc = 0; lpc < max; lpc++) { - xmlNode *match = getXpathResult(xpathObj, lpc); -@@ -1242,6 +1254,8 @@ stonith_api_query(stonith_t * stonith, int call_options, const char *target, - crm_info("%s[%d] = %s", "//@agent", lpc, xmlGetNodePath(match)); - *devices = stonith_key_value_add(*devices, NULL, crm_element_value(match, XML_ATTR_ID)); - } -+ -+ freeXpathObject(xpathObj); - } - - free_xml(output); -@@ -1366,6 +1380,7 @@ stonith_api_history(stonith_t * stonith, int call_options, const char *node, - kvp->action = crm_element_value_copy(op, F_STONITH_ACTION); - kvp->origin = crm_element_value_copy(op, F_STONITH_ORIGIN); - kvp->delegate = crm_element_value_copy(op, F_STONITH_DELEGATE); -+ kvp->client = crm_element_value_copy(op, F_STONITH_CLIENTNAME); - crm_element_value_int(op, F_STONITH_DATE, &kvp->completed); - crm_element_value_int(op, F_STONITH_STATE, &kvp->state); - -@@ -1519,6 +1534,7 @@ stonith_api_signoff(stonith_t * stonith) - crm_ipc_destroy(ipc); - } - -+ free(native->token); native->token = NULL; - stonith->state = stonith_disconnected; - return pcmk_ok; - } -@@ -2201,18 +2217,28 @@ stonith_api_free(stonith_t * stonith) - { - int rc = pcmk_ok; - -+ crm_trace("Destroying %p", stonith); -+ - if (stonith->state != stonith_disconnected) { -+ crm_trace("Disconnecting %p first", stonith); - rc = stonith->cmds->disconnect(stonith); - } - - if (stonith->state == stonith_disconnected) { - stonith_private_t *private = stonith->private; - -+ crm_trace("Removing %d callbacks", g_hash_table_size(private->stonith_op_callback_table)); - g_hash_table_destroy(private->stonith_op_callback_table); -- free(private->token); -+ -+ crm_trace("Destroying %d notification clients", g_list_length(private->notify_list)); -+ g_list_free_full(private->notify_list, free); -+ - free(stonith->private); - free(stonith->cmds); - free(stonith); -+ -+ } else { -+ crm_err("Not free'ing active connection: %s (%d)", pcmk_strerror(rc), rc); - } - - return rc; -@@ -2221,18 +2247,10 @@ stonith_api_free(stonith_t * stonith) - void - stonith_api_delete(stonith_t * stonith) - { -- stonith_private_t *private = stonith->private; -- GList *list = private->notify_list; -- -- while (list != NULL) { -- stonith_notify_client_t *client = g_list_nth_data(list, 0); -- -- list = g_list_remove(list, client); -- free(client); -+ crm_trace("Destroying %p", stonith); -+ if(stonith) { -+ stonith->cmds->free(stonith); - } -- -- stonith->cmds->free(stonith); -- stonith = NULL; - } - - stonith_t * -diff --git a/lib/lrmd/Makefile.am b/lib/lrmd/Makefile.am -index d727f80..38ff108 100644 ---- a/lib/lrmd/Makefile.am -+++ b/lib/lrmd/Makefile.am -@@ -25,7 +25,7 @@ INCLUDES = -I$(top_builddir)/include -I$(top_srcdir)/include \ - lib_LTLIBRARIES = liblrmd.la - - liblrmd_la_SOURCES = lrmd_client.c --liblrmd_la_LDFLAGS = -version-info 1:0:0 -+liblrmd_la_LDFLAGS = -version-info 2:1:1 - liblrmd_la_LIBADD = $(top_builddir)/lib/common/libcrmcommon.la \ - $(top_builddir)/lib/services/libcrmservice.la \ - $(top_builddir)/lib/fencing/libstonithd.la -diff --git a/lib/lrmd/lrmd_client.c b/lib/lrmd/lrmd_client.c -index d7cbbc0..5fd9efb 100644 ---- a/lib/lrmd/lrmd_client.c -+++ b/lib/lrmd/lrmd_client.c -@@ -58,10 +58,15 @@ static stonith_t *stonith_api = NULL; - static int lrmd_api_disconnect(lrmd_t * lrmd); - static int lrmd_api_is_connected(lrmd_t * lrmd); - -+/* IPC proxy functions */ -+int lrmd_internal_proxy_send(lrmd_t * lrmd, xmlNode *msg); -+static void lrmd_internal_proxy_dispatch(lrmd_t *lrmd, xmlNode *msg); -+void lrmd_internal_set_proxy_callback(lrmd_t * lrmd, void *userdata, void (*callback)(lrmd_t *lrmd, void *userdata, xmlNode *msg)); -+ - #ifdef HAVE_GNUTLS_GNUTLS_H - # define LRMD_CLIENT_HANDSHAKE_TIMEOUT 5000 /* 5 seconds */ - gnutls_psk_client_credentials_t psk_cred_s; --int lrmd_tls_set_key(gnutls_datum_t * key, const char *location); -+int lrmd_tls_set_key(gnutls_datum_t * key); - static void lrmd_tls_disconnect(lrmd_t * lrmd); - static int global_remote_msg_id = 0; - int lrmd_tls_send_msg(crm_remote_t * session, xmlNode * msg, uint32_t id, const char *msg_type); -@@ -92,6 +97,9 @@ typedef struct lrmd_private_s { - - lrmd_event_callback callback; - -+ /* Internal IPC proxy msg passing for remote guests */ -+ void (*proxy_callback)(lrmd_t *lrmd, void *userdata, xmlNode *msg); -+ void *proxy_callback_userdata; - } lrmd_private_t; - - static lrmd_list_t * -@@ -227,9 +235,16 @@ static int - lrmd_dispatch_internal(lrmd_t * lrmd, xmlNode * msg) - { - const char *type; -+ const char *proxy_session = crm_element_value(msg, F_LRMD_IPC_SESSION); - lrmd_private_t *native = lrmd->private; - lrmd_event_data_t event = { 0, }; - -+ if (proxy_session != NULL) { -+ /* this is proxy business */ -+ lrmd_internal_proxy_dispatch(lrmd, msg); -+ return 1; -+ } -+ - if (!native->callback) { - /* no callback set */ - crm_trace("notify event received but client has not set callback"); -@@ -834,6 +849,12 @@ lrmd_handshake(lrmd_t * lrmd, const char *name) - crm_xml_add(hello, F_TYPE, T_LRMD); - crm_xml_add(hello, F_LRMD_OPERATION, CRM_OP_REGISTER); - crm_xml_add(hello, F_LRMD_CLIENTNAME, name); -+ crm_xml_add(hello, F_LRMD_PROTOCOL_VERSION, LRMD_PROTOCOL_VERSION); -+ -+ /* advertise that we are a proxy provider */ -+ if (native->proxy_callback) { -+ crm_xml_add(hello, F_LRMD_IS_IPC_PROVIDER, "true"); -+ } - - rc = lrmd_send_xml(lrmd, hello, -1, &reply); - -@@ -847,7 +868,14 @@ lrmd_handshake(lrmd_t * lrmd, const char *name) - const char *msg_type = crm_element_value(reply, F_LRMD_OPERATION); - const char *tmp_ticket = crm_element_value(reply, F_LRMD_CLIENTID); - -- if (safe_str_neq(msg_type, CRM_OP_REGISTER)) { -+ crm_element_value_int(reply, F_LRMD_RC, &rc); -+ -+ if (rc == -EPROTO) { -+ crm_err("LRMD protocol mismatch client version %s, server version %s", -+ LRMD_PROTOCOL_VERSION, crm_element_value(reply, F_LRMD_PROTOCOL_VERSION)); -+ crm_log_xml_err(reply, "Protocol Error"); -+ -+ } else if (safe_str_neq(msg_type, CRM_OP_REGISTER)) { - crm_err("Invalid registration message: %s", msg_type); - crm_log_xml_err(reply, "Bad reply"); - rc = -EPROTO; -@@ -906,8 +934,8 @@ lrmd_ipc_connect(lrmd_t * lrmd, int *fd) - } - - #ifdef HAVE_GNUTLS_GNUTLS_H --int --lrmd_tls_set_key(gnutls_datum_t * key, const char *location) -+static int -+set_key(gnutls_datum_t * key, const char *location) - { - FILE *stream; - int read_len = 256; -@@ -917,6 +945,10 @@ lrmd_tls_set_key(gnutls_datum_t * key, const char *location) - static size_t key_cache_len = 0; - static time_t key_cache_updated; - -+ if (location == NULL) { -+ return -1; -+ } -+ - if (key_cache) { - time_t now = time(NULL); - -@@ -943,7 +975,7 @@ lrmd_tls_set_key(gnutls_datum_t * key, const char *location) - - key->data = gnutls_malloc(read_len); - while (!feof(stream)) { -- char next; -+ int next; - - if (cur_len == buf_len) { - buf_len = cur_len + read_len; -@@ -977,22 +1009,25 @@ lrmd_tls_set_key(gnutls_datum_t * key, const char *location) - return 0; - } - --static int --lrmd_tls_key_cb(gnutls_session_t session, char **username, gnutls_datum_t * key) -+int -+lrmd_tls_set_key(gnutls_datum_t * key) - { - int rc = 0; -+ const char *specific_location = getenv("PCMK_authkey_location"); - -- if (lrmd_tls_set_key(key, DEFAULT_REMOTE_KEY_LOCATION)) { -- rc = lrmd_tls_set_key(key, ALT_REMOTE_KEY_LOCATION); -+ if (set_key(key, specific_location) == 0) { -+ crm_debug("Using custom authkey location %s", specific_location); -+ return 0; -+ } -+ -+ if (set_key(key, DEFAULT_REMOTE_KEY_LOCATION)) { -+ rc = set_key(key, ALT_REMOTE_KEY_LOCATION); - } - if (rc) { - crm_err("No lrmd remote key found"); - return -1; - } - -- *username = gnutls_malloc(strlen(DEFAULT_REMOTE_USERNAME) + 1); -- strcpy(*username, DEFAULT_REMOTE_USERNAME); -- - return rc; - } - -@@ -1034,6 +1069,7 @@ lrmd_tcp_connect_cb(void *userdata, int sock) - .destroy = lrmd_tls_connection_destroy, - }; - int rc = sock; -+ gnutls_datum_t psk_key = { NULL, 0 }; - - if (rc < 0) { - lrmd_tls_connection_destroy(lrmd); -@@ -1045,8 +1081,16 @@ lrmd_tcp_connect_cb(void *userdata, int sock) - /* TODO continue with tls stuff now that tcp connect passed. make this async as well soon - * to avoid all blocking code in the client. */ - native->sock = sock; -+ -+ if (lrmd_tls_set_key(&psk_key) != 0) { -+ lrmd_tls_connection_destroy(lrmd); -+ return; -+ } -+ - gnutls_psk_allocate_client_credentials(&native->psk_cred_c); -- gnutls_psk_set_client_credentials_function(native->psk_cred_c, lrmd_tls_key_cb); -+ gnutls_psk_set_client_credentials(native->psk_cred_c, DEFAULT_REMOTE_USERNAME, &psk_key, GNUTLS_PSK_KEY_RAW); -+ gnutls_free(psk_key.data); -+ - native->remote->tls_session = create_psk_tls_session(sock, GNUTLS_CLIENT, native->psk_cred_c); - - if (crm_initiate_client_tls_handshake(native->remote, LRMD_CLIENT_HANDSHAKE_TIMEOUT) != 0) { -@@ -1099,19 +1143,28 @@ lrmd_tls_connect(lrmd_t * lrmd, int *fd) - - lrmd_private_t *native = lrmd->private; - int sock; -+ gnutls_datum_t psk_key = { NULL, 0 }; - - lrmd_gnutls_global_init(); - - sock = crm_remote_tcp_connect(native->server, native->port); -- if (sock <= 0) { -+ if (sock < 0) { - crm_warn("Could not establish remote lrmd connection to %s", native->server); - lrmd_tls_connection_destroy(lrmd); - return -ENOTCONN; - } - - native->sock = sock; -+ -+ if (lrmd_tls_set_key(&psk_key) != 0) { -+ lrmd_tls_connection_destroy(lrmd); -+ return -1; -+ } -+ - gnutls_psk_allocate_client_credentials(&native->psk_cred_c); -- gnutls_psk_set_client_credentials_function(native->psk_cred_c, lrmd_tls_key_cb); -+ gnutls_psk_set_client_credentials(native->psk_cred_c, DEFAULT_REMOTE_USERNAME, &psk_key, GNUTLS_PSK_KEY_RAW); -+ gnutls_free(psk_key.data); -+ - native->remote->tls_session = create_psk_tls_session(sock, GNUTLS_CLIENT, native->psk_cred_c); - - if (crm_initiate_client_tls_handshake(native->remote, LRMD_CLIENT_HANDSHAKE_TIMEOUT) != 0) { -@@ -1400,6 +1453,38 @@ lrmd_api_set_callback(lrmd_t * lrmd, lrmd_event_callback callback) - native->callback = callback; - } - -+void -+lrmd_internal_set_proxy_callback(lrmd_t * lrmd, void *userdata, void (*callback)(lrmd_t *lrmd, void *userdata, xmlNode *msg)) -+{ -+ lrmd_private_t *native = lrmd->private; -+ -+ native->proxy_callback = callback; -+ native->proxy_callback_userdata = userdata; -+} -+ -+void -+lrmd_internal_proxy_dispatch(lrmd_t *lrmd, xmlNode *msg) -+{ -+ lrmd_private_t *native = lrmd->private; -+ -+ if (native->proxy_callback) { -+ crm_log_xml_trace(msg, "PROXY_INBOUND"); -+ native->proxy_callback(lrmd, native->proxy_callback_userdata, msg); -+ } -+} -+ -+int -+lrmd_internal_proxy_send(lrmd_t * lrmd, xmlNode *msg) -+{ -+ if (lrmd == NULL) { -+ return -ENOTCONN; -+ } -+ crm_xml_add(msg, F_LRMD_OPERATION, CRM_OP_IPC_FWD); -+ -+ crm_log_xml_trace(msg, "PROXY_OUTBOUND"); -+ return lrmd_send_xml_no_reply(lrmd, msg); -+} -+ - static int - stonith_get_metadata(const char *provider, const char *type, char **output) - { -@@ -1412,40 +1497,36 @@ stonith_get_metadata(const char *provider, const char *type, char **output) - return rc; - } - --static int --lsb_get_metadata(const char *type, char **output) --{ -- - #define lsb_metadata_template \ --"\n"\ --"\n"\ --"\n"\ --" 1.0\n"\ --" \n"\ --" %s"\ --" \n"\ --" %s\n"\ --" \n"\ --" \n"\ --" \n"\ --" \n"\ --" \n"\ --" \n"\ --" \n"\ --" \n"\ --" \n"\ --" \n"\ --" \n"\ --" \n"\ --" %s\n"\ --" %s\n"\ --" %s\n"\ --" %s\n"\ --" %s\n"\ --" %s\n"\ --" %s\n"\ --" \n"\ --"\n" -+ "\n" \ -+ "\n" \ -+ "\n" \ -+ " 1.0\n" \ -+ " \n" \ -+ " %s\n" \ -+ " \n" \ -+ " %s\n" \ -+ " \n" \ -+ " \n" \ -+ " \n" \ -+ " \n" \ -+ " \n" \ -+ " \n" \ -+ " \n" \ -+ " \n" \ -+ " \n" \ -+ " \n" \ -+ " \n" \ -+ " \n" \ -+ " %s\n" \ -+ " %s\n" \ -+ " %s\n" \ -+ " %s\n" \ -+ " %s\n" \ -+ " %s\n" \ -+ " %s\n" \ -+ " \n" \ -+ "\n" - - #define LSB_INITSCRIPT_INFOBEGIN_TAG "### BEGIN INIT INFO" - #define LSB_INITSCRIPT_INFOEND_TAG "### END INIT INFO" -@@ -1471,6 +1552,9 @@ lsb_get_metadata(const char *type, char **output) - continue; \ - } - -+static int -+lsb_get_metadata(const char *type, char **output) -+{ - char ra_pathname[PATH_MAX] = { 0, }; - FILE *fp; - GString *meta_data = NULL; -@@ -1486,11 +1570,15 @@ lsb_get_metadata(const char *type, char **output) - char *xml_l_dscrpt = NULL; - GString *l_dscrpt = NULL; - -- snprintf(ra_pathname, sizeof(ra_pathname), "%s%s%s", -- type[0] == '/' ? "" : LSB_ROOT_DIR, type[0] == '/' ? "" : "/", type); -+ if(type[0] == '/') { -+ snprintf(ra_pathname, sizeof(ra_pathname), "%s", type); -+ } else { -+ snprintf(ra_pathname, sizeof(ra_pathname), "%s/%s", LSB_ROOT_DIR, type); -+ } - -+ crm_trace("Looking into %s", ra_pathname); - if (!(fp = fopen(ra_pathname, "r"))) { -- return -EIO; -+ return -errno; - } - - /* Enter into the lsb-compliant comment block */ -@@ -1559,6 +1647,7 @@ lsb_get_metadata(const char *type, char **output) - *output = strdup(meta_data->str); - g_string_free(meta_data, TRUE); - -+ crm_trace("Created fake metadata: %d", strlen(*output)); - return pcmk_ok; - } - -@@ -1874,13 +1963,19 @@ lrmd_remote_api_new(const char *nodename, const char *server, int port) - lrmd_private_t *native = new_lrmd->private; - - if (!nodename && !server) { -+ lrmd_api_delete(new_lrmd); - return NULL; - } - - native->type = CRM_CLIENT_TLS; - native->remote_nodename = nodename ? strdup(nodename) : strdup(server); - native->server = server ? strdup(server) : strdup(nodename); -- native->port = port ? port : DEFAULT_REMOTE_PORT; -+ native->port = port; -+ if (native->port == 0) { -+ const char *remote_port_str = getenv("PCMK_remote_port"); -+ native->port = remote_port_str ? atoi(remote_port_str) : DEFAULT_REMOTE_PORT; -+ } -+ - return new_lrmd; - #else - crm_err("GNUTLS is not enabled for this build, remote LRMD client can not be created"); -@@ -1906,6 +2001,12 @@ lrmd_api_delete(lrmd_t * lrmd) - free(native->remote_nodename); - free(native->remote); - } -+ -+ if (stonith_api) { -+ stonith_api->cmds->free(stonith_api); -+ stonith_api = NULL; -+ } -+ - free(lrmd->private); - free(lrmd); - } -diff --git a/lib/pengine/Makefile.am b/lib/pengine/Makefile.am -index cd5e2b4..6c0bb32 100644 ---- a/lib/pengine/Makefile.am -+++ b/lib/pengine/Makefile.am -@@ -26,12 +26,13 @@ lib_LTLIBRARIES = libpe_rules.la libpe_status.la - ## SOURCES - noinst_HEADERS = unpack.h variant.h - --libpe_rules_la_LDFLAGS = -version-info 2:1:0 -+libpe_rules_la_LDFLAGS = -version-info 2:2:0 - libpe_rules_la_SOURCES = rules.c common.c -+libpe_rules_la_LIBADD = $(top_builddir)/lib/common/libcrmcommon.la - --libpe_status_la_LDFLAGS = -version-info 4:1:0 -+libpe_status_la_LDFLAGS = -version-info 6:0:2 - libpe_status_la_SOURCES = status.c unpack.c utils.c complex.c native.c group.c clone.c rules.c common.c --libpe_status_la_LIBADD = @CURSESLIBS@ -+libpe_status_la_LIBADD = @CURSESLIBS@ $(top_builddir)/lib/common/libcrmcommon.la - - clean-generic: - rm -f *.log *.debug *~ -diff --git a/lib/pengine/clone.c b/lib/pengine/clone.c -index 2ff092e..38ff23a 100644 ---- a/lib/pengine/clone.c -+++ b/lib/pengine/clone.c -@@ -206,7 +206,7 @@ clone_unpack(resource_t * rsc, pe_working_set_t * data_set) - clone_data->ordered = TRUE; - } - if ((rsc->flags & pe_rsc_unique) == 0 && clone_data->clone_node_max > 1) { -- crm_config_err("Anonymous clones (%s) may only support one copy" " per node", rsc->id); -+ crm_config_err("Anonymous clones (%s) may only support one copy per node", rsc->id); - clone_data->clone_node_max = 1; - } - -@@ -510,6 +510,32 @@ clone_print(resource_t * rsc, const char *pre_text, long options, void *print_da - list_text = NULL; - - /* Stopped */ -+ if(is_not_set(rsc->flags, pe_rsc_unique)) { -+ -+ GListPtr nIter; -+ GListPtr list = g_hash_table_get_values(rsc->allowed_nodes); -+ -+ /* Custom stopped list for non-unique clones */ -+ free(stopped_list); stopped_list = NULL; -+ -+ if(g_list_length(list) == 0) { -+ /* Clusters with symmetrical=false haven't calculated allowed_nodes yet -+ * If we've not probed for them yet, the Stopped list will be empty -+ */ -+ list = g_hash_table_get_values(rsc->known_on); -+ } -+ -+ list = g_list_sort(list, sort_node_uname); -+ for (nIter = list; nIter != NULL; nIter = nIter->next) { -+ node_t *node = (node_t *)nIter->data; -+ -+ if(pe_find_node(rsc->running_on, node->details->uname) == NULL) { -+ stopped_list = add_list_element(stopped_list, node->details->uname); -+ } -+ } -+ g_list_free(list); -+ } -+ - short_print(stopped_list, child_text, "Stopped", options, print_data); - free(stopped_list); - -diff --git a/lib/pengine/complex.c b/lib/pengine/complex.c -index b13d511..9110585 100644 ---- a/lib/pengine/complex.c -+++ b/lib/pengine/complex.c -@@ -22,7 +22,6 @@ - #include - #include - --extern xmlNode *get_object_root(const char *object_type, xmlNode * the_root); - void populate_hash(xmlNode * nvpair_list, GHashTable * hash, const char **attrs, int attrs_length); - - resource_object_functions_t resource_class_functions[] = { -@@ -220,9 +219,9 @@ unpack_template(xmlNode * xml_obj, xmlNode ** expanded_xml, pe_working_set_t * d - return FALSE; - } - -- cib_resources = get_object_root(XML_CIB_TAG_RESOURCES, data_set->input); -+ cib_resources = get_xpath_object("//"XML_CIB_TAG_RESOURCES, data_set->input, LOG_TRACE); - if (cib_resources == NULL) { -- pe_err("Cannot get the root of object '%s'", XML_CIB_TAG_RESOURCES); -+ pe_err("No resources configured"); - return FALSE; - } - -diff --git a/lib/pengine/native.c b/lib/pengine/native.c -index ad73f25..110c210 100644 ---- a/lib/pengine/native.c -+++ b/lib/pengine/native.c -@@ -1,16 +1,16 @@ --/* -+/* - * Copyright (C) 2004 Andrew Beekhof -- * -+ * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. -- * -+ * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. -- * -+ * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -@@ -34,7 +34,6 @@ native_add_running(resource_t * rsc, node_t * node, pe_working_set_t * data_set) - GListPtr gIter = rsc->running_on; - - CRM_CHECK(node != NULL, return); -- - for (; gIter != NULL; gIter = gIter->next) { - node_t *a_node = (node_t *) gIter->data; - -@@ -44,7 +43,8 @@ native_add_running(resource_t * rsc, node_t * node, pe_working_set_t * data_set) - } - } - -- pe_rsc_trace(rsc, "Adding %s to %s", rsc->id, node->details->uname); -+ pe_rsc_trace(rsc, "Adding %s to %s %s", rsc->id, node->details->uname, -+ is_set(rsc->flags, pe_rsc_managed)?"":"(unmanaged)"); - - rsc->running_on = g_list_append(rsc->running_on, node); - if (rsc->variant == pe_native) { -@@ -52,8 +52,16 @@ native_add_running(resource_t * rsc, node_t * node, pe_working_set_t * data_set) - } - - if (is_not_set(rsc->flags, pe_rsc_managed)) { -+ resource_t *p = rsc->parent; -+ - pe_rsc_info(rsc, "resource %s isnt managed", rsc->id); - resource_location(rsc, node, INFINITY, "not_managed_default", data_set); -+ -+ while(p && node->details->online) { -+ /* add without the additional location constraint */ -+ p->running_on = g_list_append(p->running_on, node); -+ p = p->parent; -+ } - return; - } - -@@ -287,7 +295,7 @@ native_print_xml(resource_t * rsc, const char *pre_text, long options, void *pri - - /* resource information. */ - status_print("%sid); -+ status_print("id=\"%s\" ", rsc_printable_id(rsc)); - status_print("resource_agent=\"%s%s%s:%s\" ", - class, - prov ? "::" : "", prov ? prov : "", crm_element_value(rsc->xml, XML_ATTR_TYPE)); -@@ -329,12 +337,19 @@ native_print_xml(resource_t * rsc, const char *pre_text, long options, void *pri - } - } - -+ - void - native_print(resource_t * rsc, const char *pre_text, long options, void *print_data) - { - node_t *node = NULL; -- const char *prov = NULL; - const char *class = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS); -+ const char *kind = crm_element_value(rsc->xml, XML_ATTR_TYPE); -+ -+ int offset = 0; -+ char buffer[LINE_MAX]; -+ -+ CRM_ASSERT(rsc->variant == pe_native); -+ CRM_ASSERT(kind != NULL); - - if (rsc->meta) { - const char *is_internal = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_INTERNAL_RSC); -@@ -353,13 +368,12 @@ native_print(resource_t * rsc, const char *pre_text, long options, void *print_d - return; - } - -- if (safe_str_eq(class, "ocf")) { -- prov = crm_element_value(rsc->xml, XML_AGENT_ATTR_PROVIDER); -- } -- - if (rsc->running_on != NULL) { - node = rsc->running_on->data; - } -+ if ((options & pe_print_rsconly) || g_list_length(rsc->running_on) > 1) { -+ node = NULL; -+ } - - if (options & pe_print_html) { - if (is_not_set(rsc->flags, pe_rsc_managed)) { -@@ -382,40 +396,51 @@ native_print(resource_t * rsc, const char *pre_text, long options, void *print_d - } - } - -+ if(pre_text) { -+ offset += snprintf(buffer + offset, LINE_MAX - offset, "%s", pre_text); -+ } -+ offset += snprintf(buffer + offset, LINE_MAX - offset, "%s", rsc_printable_id(rsc)); -+ offset += snprintf(buffer + offset, LINE_MAX - offset, "\t(%s", class); -+ if (safe_str_eq(class, "ocf")) { -+ const char *prov = crm_element_value(rsc->xml, XML_AGENT_ATTR_PROVIDER); -+ offset += snprintf(buffer + offset, LINE_MAX - offset, "::%s", prov); -+ } -+ offset += snprintf(buffer + offset, LINE_MAX - offset, ":%s):\t", kind); -+ if(is_set(rsc->flags, pe_rsc_orphan)) { -+ offset += snprintf(buffer + offset, LINE_MAX - offset, " ORPHANED "); -+ } -+ offset += snprintf(buffer + offset, LINE_MAX - offset, "%s ", role2text(rsc->role)); -+ if(node) { -+ offset += snprintf(buffer + offset, LINE_MAX - offset, "%s ", node->details->uname); -+ } -+ if(is_not_set(rsc->flags, pe_rsc_managed)) { -+ offset += snprintf(buffer + offset, LINE_MAX - offset, "(unmanaged) "); -+ } -+ if(is_set(rsc->flags, pe_rsc_failed)) { -+ offset += snprintf(buffer + offset, LINE_MAX - offset, "FAILED "); -+ } -+ if(is_set(rsc->flags, pe_rsc_failure_ignored)) { -+ offset += snprintf(buffer + offset, LINE_MAX - offset, "(failure ignored)"); -+ } -+ - if ((options & pe_print_rsconly) || g_list_length(rsc->running_on) > 1) { -- const char *desc = NULL; -- -- desc = crm_element_value(rsc->xml, XML_ATTR_DESC); -- status_print("%s%s\t(%s%s%s:%s%s):\t%s %s%s%s%s%s", -- pre_text ? pre_text : "", rsc->id, -- class, prov ? "::" : "", prov ? prov : "", -- crm_element_value(rsc->xml, XML_ATTR_TYPE), -- is_set(rsc->flags, pe_rsc_orphan) ? " ORPHANED" : "", -- (rsc->variant != pe_native) ? "" : role2text(rsc->role), -- is_set(rsc->flags, pe_rsc_managed) ? "" : "(unmanaged) ", -- is_set(rsc->flags, pe_rsc_failed) ? "FAILED " : "", -- is_set(rsc->flags, pe_rsc_failure_ignored) ? "(failure ignored) " : "", -- desc ? ": " : "", desc ? desc : ""); -+ const char *desc = crm_element_value(rsc->xml, XML_ATTR_DESC); -+ if(desc) { -+ offset += snprintf(buffer + offset, LINE_MAX - offset, "%s", desc); -+ } -+ } - -- } else { -- status_print("%s%s\t(%s%s%s:%s):\t%s%s %s%s%s%s", -- pre_text ? pre_text : "", rsc->id, -- class, prov ? "::" : "", prov ? prov : "", -- crm_element_value(rsc->xml, XML_ATTR_TYPE), -- is_set(rsc->flags, pe_rsc_orphan) ? " ORPHANED " : "", -- (rsc->variant != pe_native) ? "" : role2text(rsc->role), -- (rsc->variant != pe_native) ? "" : node != NULL ? node->details->uname : "", -- is_set(rsc->flags, pe_rsc_managed) ? "" : " (unmanaged)", -- is_set(rsc->flags, pe_rsc_failed) ? " FAILED" : "", -- is_set(rsc->flags, pe_rsc_failure_ignored) ? " (failure ignored)" : ""); -+ status_print("%s", buffer); - - #if CURSES_ENABLED -- if (options & pe_print_ncurses) { -- /* coverity[negative_returns] False positive */ -- move(-1, 0); -- } --#endif -+ if ((options & pe_print_rsconly) || g_list_length(rsc->running_on) > 1) { -+ /* Done */ -+ -+ } else if (options & pe_print_ncurses) { -+ /* coverity[negative_returns] False positive */ -+ move(-1, 0); - } -+#endif - - if (options & pe_print_html) { - status_print(" "); -diff --git a/lib/pengine/status.c b/lib/pengine/status.c -index f0449de..bb9dfcb 100644 ---- a/lib/pengine/status.c -+++ b/lib/pengine/status.c -@@ -1,16 +1,16 @@ --/* -+/* - * Copyright (C) 2004 Andrew Beekhof -- * -+ * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. -- * -+ * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. -- * -+ * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -@@ -29,14 +29,12 @@ - #include - #include - --extern xmlNode *get_object_root(const char *object_type, xmlNode * the_root); -- - #define MEMCHECK_STAGE_0 0 - - #define check_and_exit(stage) cleanup_calculations(data_set); \ - crm_mem_stats(NULL); \ - crm_err("Exiting: stage %d", stage); \ -- crm_exit(1); -+ crm_exit(pcmk_err_generic); - - /* - * Unpack everything -@@ -52,11 +50,11 @@ extern xmlNode *get_object_root(const char *object_type, xmlNode * the_root); - gboolean - cluster_status(pe_working_set_t * data_set) - { -- xmlNode *config = get_object_root(XML_CIB_TAG_CRMCONFIG, data_set->input); -- xmlNode *cib_nodes = get_object_root(XML_CIB_TAG_NODES, data_set->input); -- xmlNode *cib_resources = get_object_root(XML_CIB_TAG_RESOURCES, data_set->input); -- xmlNode *cib_status = get_object_root(XML_CIB_TAG_STATUS, data_set->input); -- xmlNode *cib_domains = get_object_root(XML_CIB_TAG_DOMAINS, data_set->input); -+ xmlNode *config = get_xpath_object("//"XML_CIB_TAG_CRMCONFIG, data_set->input, LOG_TRACE); -+ xmlNode *cib_nodes = get_xpath_object("//"XML_CIB_TAG_NODES, data_set->input, LOG_TRACE); -+ xmlNode *cib_resources = get_xpath_object("//"XML_CIB_TAG_RESOURCES, data_set->input, LOG_TRACE); -+ xmlNode *cib_status = get_xpath_object("//"XML_CIB_TAG_STATUS, data_set->input, LOG_TRACE); -+ xmlNode *cib_domains = get_xpath_object("//"XML_CIB_TAG_DOMAINS, data_set->input, LOG_TRACE); - const char *value = crm_element_value(data_set->input, XML_ATTR_HAVE_QUORUM); - - crm_trace("Beginning unpack"); -@@ -73,7 +71,9 @@ cluster_status(pe_working_set_t * data_set) - data_set->now = crm_time_new(NULL); - } - -- if (data_set->input != NULL && crm_element_value(data_set->input, XML_ATTR_DC_UUID) != NULL) { -+ if (data_set->dc_uuid == NULL -+ && data_set->input != NULL -+ && crm_element_value(data_set->input, XML_ATTR_DC_UUID) != NULL) { - /* this should always be present */ - data_set->dc_uuid = crm_element_value_copy(data_set->input, XML_ATTR_DC_UUID); - } -@@ -83,21 +83,29 @@ cluster_status(pe_working_set_t * data_set) - set_bit(data_set->flags, pe_flag_have_quorum); - } - -- data_set->op_defaults = get_object_root(XML_CIB_TAG_OPCONFIG, data_set->input); -- data_set->rsc_defaults = get_object_root(XML_CIB_TAG_RSCCONFIG, data_set->input); -+ data_set->op_defaults = get_xpath_object("//"XML_CIB_TAG_OPCONFIG, data_set->input, LOG_TRACE); -+ data_set->rsc_defaults = get_xpath_object("//"XML_CIB_TAG_RSCCONFIG, data_set->input, LOG_TRACE); - - unpack_config(config, data_set); - -- if (is_set(data_set->flags, pe_flag_have_quorum) == FALSE -- && data_set->no_quorum_policy != no_quorum_ignore) { -- crm_warn("We do not have quorum" " - fencing and resource management disabled"); -+ if (is_not_set(data_set->flags, pe_flag_quick_location) -+ && is_not_set(data_set->flags, pe_flag_have_quorum) -+ && data_set->no_quorum_policy != no_quorum_ignore) { -+ crm_warn("We do not have quorum - fencing and resource management disabled"); - } - - unpack_nodes(cib_nodes, data_set); - unpack_domains(cib_domains, data_set); -- unpack_remote_nodes(cib_resources, data_set); -+ -+ if(is_not_set(data_set->flags, pe_flag_quick_location)) { -+ unpack_remote_nodes(cib_resources, data_set); -+ } -+ - unpack_resources(cib_resources, data_set); -- unpack_status(cib_status, data_set); -+ -+ if(is_not_set(data_set->flags, pe_flag_quick_location)) { -+ unpack_status(cib_status, data_set); -+ } - - set_bit(data_set->flags, pe_flag_have_status); - return TRUE; -@@ -225,6 +233,7 @@ set_working_set_defaults(pe_working_set_t * data_set) - pe_dataset = data_set; - memset(data_set, 0, sizeof(pe_working_set_t)); - -+ data_set->dc_uuid = NULL; - data_set->order_id = 1; - data_set->action_id = 1; - data_set->no_quorum_policy = no_quorum_freeze; -diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c -index 4257579..81a4936 100644 ---- a/lib/pengine/unpack.c -+++ b/lib/pengine/unpack.c -@@ -85,8 +85,9 @@ unpack_config(xmlNode * config, pe_working_set_t * data_set) - verify_pe_options(data_set->config_hash); - - set_config_flag(data_set, "enable-startup-probes", pe_flag_startup_probes); -- crm_info("Startup probes: %s", -- is_set(data_set->flags, pe_flag_startup_probes) ? "enabled" : "disabled (dangerous)"); -+ if(is_not_set(data_set->flags, pe_flag_startup_probes)) { -+ crm_info("Startup probes: disabled (dangerous)"); -+ } - - value = pe_pref(data_set->config_hash, "stonith-timeout"); - data_set->stonith_timeout = crm_get_msec(value); -@@ -190,7 +191,7 @@ unpack_config(xmlNode * config, pe_working_set_t * data_set) - node_score_green = char2score(pe_pref(data_set->config_hash, "node-health-green")); - node_score_yellow = char2score(pe_pref(data_set->config_hash, "node-health-yellow")); - -- crm_info("Node scores: 'red' = %s, 'yellow' = %s, 'green' = %s", -+ crm_debug("Node scores: 'red' = %s, 'yellow' = %s, 'green' = %s", - pe_pref(data_set->config_hash, "node-health-red"), - pe_pref(data_set->config_hash, "node-health-yellow"), - pe_pref(data_set->config_hash, "node-health-green")); -@@ -292,6 +293,7 @@ expand_remote_rsc_meta(xmlNode *xml_obj, xmlNode *parent, GHashTable **rsc_name_ - const char *remote_name = NULL; - const char *remote_server = NULL; - const char *remote_port = NULL; -+ const char *connect_timeout = "60s"; - char *tmp_id = NULL; - - for (attr_set = __xml_first_child(xml_obj); attr_set != NULL; attr_set = __xml_next(attr_set)) { -@@ -309,6 +311,8 @@ expand_remote_rsc_meta(xmlNode *xml_obj, xmlNode *parent, GHashTable **rsc_name_ - remote_server = value; - } else if (safe_str_eq(name, "remote-port")) { - remote_port = value; -+ } else if (safe_str_eq(name, "remote-connect-timeout")) { -+ connect_timeout = value; - } - } - } -@@ -369,6 +373,16 @@ expand_remote_rsc_meta(xmlNode *xml_obj, xmlNode *parent, GHashTable **rsc_name_ - crm_xml_add(attr, XML_NVPAIR_ATTR_NAME, "monitor"); - free(tmp_id); - -+ if (connect_timeout) { -+ attr = create_xml_node(xml_tmp, XML_ATTR_OP); -+ tmp_id = crm_concat(remote_name, "start-interval-0", '_'); -+ crm_xml_add(attr, XML_ATTR_ID, tmp_id); -+ crm_xml_add(attr, XML_ATTR_TIMEOUT, connect_timeout); -+ crm_xml_add(attr, XML_LRM_ATTR_INTERVAL, "0"); -+ crm_xml_add(attr, XML_NVPAIR_ATTR_NAME, "start"); -+ free(tmp_id); -+ } -+ - if (remote_port || remote_server) { - xml_tmp = create_xml_node(xml_rsc, XML_TAG_ATTR_SETS); - tmp_id = crm_concat(remote_name, XML_TAG_ATTR_SETS, '_'); -@@ -459,6 +473,11 @@ unpack_nodes(xmlNode * xml_nodes, pe_working_set_t * data_set) - } - } - -+ if (data_set->localhost && pe_find_node(data_set->nodes, data_set->localhost) == NULL) { -+ crm_info("Creating a fake local node"); -+ create_node(data_set->localhost, data_set->localhost, NULL, 0, data_set); -+ } -+ - return TRUE; - } - -@@ -478,7 +497,7 @@ unpack_domains(xmlNode * xml_domains, pe_working_set_t * data_set) - xmlNode *xml_node = NULL; - xmlNode *xml_domain = NULL; - -- crm_info("Unpacking domains"); -+ crm_debug("Unpacking domains"); - data_set->domains = - g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, - g_hash_destroy_node_list); -@@ -562,12 +581,6 @@ setup_container(resource_t * rsc, pe_working_set_t * data_set) - rsc->container = container; - container->fillers = g_list_append(container->fillers, rsc); - pe_rsc_trace(rsc, "Resource %s's container is %s", rsc->id, container_id); -- if (rsc->is_remote_node) { -- node_t *node = g_hash_table_lookup(container->allowed_nodes, rsc->id); -- if (node) { -- node->weight = -INFINITY; -- } -- } - } else { - pe_err("Resource %s: Unknown resource container (%s)", rsc->id, container_id); - } -@@ -665,7 +678,8 @@ unpack_resources(xmlNode * xml_resources, pe_working_set_t * data_set) - - data_set->resources = g_list_sort(data_set->resources, sort_rsc_priority); - -- if (is_set(data_set->flags, pe_flag_stonith_enabled) -+ if (is_not_set(data_set->flags, pe_flag_quick_location) -+ && is_set(data_set->flags, pe_flag_stonith_enabled) - && is_set(data_set->flags, pe_flag_have_stonith_resource) == FALSE) { - crm_config_err("Resource start-up disabled since no STONITH resources have been defined"); - crm_config_err("Either configure some or disable STONITH with the stonith-enabled option"); -@@ -1464,7 +1478,7 @@ find_anonymous_clone(pe_working_set_t * data_set, node_t * node, resource_t * pa - } - - if (safe_str_neq(rsc_id, rsc->id)) { -- pe_rsc_info(rsc, "Internally renamed %s on %s to %s%s", -+ pe_rsc_debug(rsc, "Internally renamed %s on %s to %s%s", - rsc_id, node->details->uname, rsc->id, - is_set(rsc->flags, pe_rsc_orphan) ? " (ORPHAN)" : ""); - } -@@ -2021,7 +2035,7 @@ unpack_rsc_op(resource_t * rsc, node_t * node, xmlNode * xml_op, - int task_status_i = -2; - int actual_rc_i = 0; - int target_rc = -1; -- int last_failure = 0; -+ time_t last_failure = 0; - int clear_failcount = 0; - - action_t *action = NULL; -@@ -2044,6 +2058,7 @@ unpack_rsc_op(resource_t * rsc, node_t * node, xmlNode * xml_op, - op_version = crm_element_value(xml_op, XML_ATTR_CRM_VERSION); - magic = crm_element_value(xml_op, XML_ATTR_TRANSITION_MAGIC); - key = crm_element_value(xml_op, XML_ATTR_TRANSITION_KEY); -+ actual_rc = crm_element_value(xml_op, XML_LRM_ATTR_RC); - - crm_element_value_int(xml_op, XML_LRM_ATTR_CALLID, &task_id); - -@@ -2064,8 +2079,8 @@ unpack_rsc_op(resource_t * rsc, node_t * node, xmlNode * xml_op, - if (rsc->failure_timeout > 0) { - int last_run = 0; - -- if (crm_element_value_int(xml_op, "last-rc-change", &last_run) == 0) { -- time_t now = get_timet_now(data_set); -+ if (crm_element_value_int(xml_op, XML_RSC_OP_LAST_CHANGE, &last_run) == 0) { -+ time_t now = get_effective_time(data_set); - - if (now > (last_run + rsc->failure_timeout)) { - expired = TRUE; -@@ -2073,8 +2088,8 @@ unpack_rsc_op(resource_t * rsc, node_t * node, xmlNode * xml_op, - } - } - -- pe_rsc_trace(rsc, "Unpacking task %s/%s (call_id=%d, status=%s) on %s (role=%s)", -- id, task, task_id, task_status, node->details->uname, role2text(rsc->role)); -+ pe_rsc_trace(rsc, "Unpacking task %s/%s (call_id=%d, status=%s, rc=%s) on %s (role=%s)", -+ id, task, task_id, task_status, actual_rc, node->details->uname, role2text(rsc->role)); - - interval_s = crm_element_value(xml_op, XML_LRM_ATTR_INTERVAL); - interval = crm_parse_int(interval_s, "0"); -@@ -2089,7 +2104,6 @@ unpack_rsc_op(resource_t * rsc, node_t * node, xmlNode * xml_op, - node->details->uname, rsc->id); - } - -- actual_rc = crm_element_value(xml_op, XML_LRM_ATTR_RC); - CRM_CHECK(actual_rc != NULL, return FALSE); - actual_rc_i = crm_parse_int(actual_rc, NULL); - -@@ -2123,13 +2137,14 @@ unpack_rsc_op(resource_t * rsc, node_t * node, xmlNode * xml_op, - } - - if (expired) { -- int fc = get_failcount(node, rsc, &last_failure, data_set); -- -- if (rsc->failure_timeout > 0 && last_failure > 0 && fc == 0) { -- -- clear_failcount = 1; -- crm_notice("Clearing expired failcount for %s on %s", rsc->id, node->details->uname); -+ if (rsc->failure_timeout > 0) { -+ int fc = get_failcount_full(node, rsc, &last_failure, FALSE, data_set); -+ if(fc && get_failcount_full(node, rsc, &last_failure, TRUE, data_set) == 0) { -+ clear_failcount = 1; -+ crm_notice("Clearing expired failcount for %s on %s", rsc->id, node->details->uname); -+ } - } -+ - } else if (strstr(id, "last_failure") && - ((strcmp(task, "start") == 0) || (strcmp(task, "monitor") == 0))) { - -@@ -2159,8 +2174,16 @@ unpack_rsc_op(resource_t * rsc, node_t * node, xmlNode * xml_op, - if (expired - && actual_rc_i != PCMK_EXECRA_NOT_RUNNING - && actual_rc_i != PCMK_EXECRA_RUNNING_MASTER && actual_rc_i != PCMK_EXECRA_OK) { -- crm_notice("Ignoring expired failure %s (rc=%d, magic=%s) on %s", -- id, actual_rc_i, magic, node->details->uname); -+ if(interval == 0) { -+ crm_notice("Ignoring expired failure %s (rc=%d, magic=%s) on %s", -+ id, actual_rc_i, magic, node->details->uname); -+ -+ } else if(node->details->online && node->details->unclean == FALSE) { -+ crm_notice("Re-initiated expired failure %s (rc=%d, magic=%s) on %s", -+ id, actual_rc_i, magic, node->details->uname); -+ /* This is SO horrible, but we don't have access to CancelXmlOp() yet */ -+ crm_xml_add(xml_op, XML_LRM_ATTR_RESTART_DIGEST, "failure-timeout"); -+ } - goto done; - } - -@@ -2280,8 +2303,16 @@ unpack_rsc_op(resource_t * rsc, node_t * node, xmlNode * xml_op, - - action = custom_action(rsc, strdup(action_key), task, NULL, TRUE, FALSE, data_set); - if (expired) { -- crm_notice("Ignoring expired failure (calculated) %s (rc=%d, magic=%s) on %s", -- id, actual_rc_i, magic, node->details->uname); -+ if(interval == 0) { -+ crm_notice("Ignoring expired calculated failure %s (rc=%d, magic=%s) on %s", -+ id, actual_rc_i, magic, node->details->uname); -+ -+ } else if(node->details->online && node->details->unclean == FALSE) { -+ crm_notice("Re-initiated expired calculated failure %s (rc=%d, magic=%s) on %s", -+ id, actual_rc_i, magic, node->details->uname); -+ /* This is SO horrible, but we don't have access to CancelXmlOp() yet */ -+ crm_xml_add(xml_op, XML_LRM_ATTR_RESTART_DIGEST, "calculated-failure-timeout"); -+ } - goto done; - - } else if ((action->on_fail == action_fail_ignore) || -@@ -2329,6 +2360,12 @@ unpack_rsc_op(resource_t * rsc, node_t * node, xmlNode * xml_op, - if (actual_rc_i == PCMK_EXECRA_NOT_RUNNING) { - clear_past_failure = TRUE; - -+ } else if (safe_str_eq(task, CRMD_ACTION_STATUS)) { -+ clear_past_failure = TRUE; -+ if (rsc->role < RSC_ROLE_STARTED) { -+ set_active(rsc); -+ } -+ - } else if (safe_str_eq(task, CRMD_ACTION_START)) { - rsc->role = RSC_ROLE_STARTED; - clear_past_failure = TRUE; -@@ -2436,7 +2473,7 @@ unpack_rsc_op(resource_t * rsc, node_t * node, xmlNode * xml_op, - } - - } else if (rsc->role < RSC_ROLE_STARTED) { -- /* start, migrate_to and migrate_from will land here */ -+ /* migrate_to and migrate_from will land here */ - pe_rsc_trace(rsc, "%s active on %s", rsc->id, node->details->uname); - set_active(rsc); - } -@@ -2444,7 +2481,6 @@ unpack_rsc_op(resource_t * rsc, node_t * node, xmlNode * xml_op, - /* clear any previous failure actions */ - if (clear_past_failure) { - switch (*on_fail) { -- case action_fail_block: - case action_fail_stop: - case action_fail_fence: - case action_fail_migrate: -@@ -2453,6 +2489,7 @@ unpack_rsc_op(resource_t * rsc, node_t * node, xmlNode * xml_op, - rsc->id, fail2text(*on_fail)); - break; - -+ case action_fail_block: - case action_fail_ignore: - case action_fail_recover: - *on_fail = action_fail_ignore; -@@ -2571,9 +2608,14 @@ unpack_rsc_op(resource_t * rsc, node_t * node, xmlNode * xml_op, - * setting role=slave is not dangerous because no master will be - * promoted until the failed resource has been fully stopped - */ -- crm_warn("Forcing %s to stop after a failed demote action", rsc->id); - rsc->next_role = RSC_ROLE_STOPPED; -- rsc->role = RSC_ROLE_SLAVE; -+ if (action->on_fail == action_fail_block) { -+ rsc->role = RSC_ROLE_MASTER; -+ -+ } else { -+ crm_warn("Forcing %s to stop after a failed demote action", rsc->id); -+ rsc->role = RSC_ROLE_SLAVE; -+ } - - } else if (compare_version("2.0", op_version) > 0 - && safe_str_eq(task, CRMD_ACTION_START)) { -diff --git a/lib/pengine/utils.c b/lib/pengine/utils.c -index 57236ff..6bf67ad 100644 ---- a/lib/pengine/utils.c -+++ b/lib/pengine/utils.c -@@ -1,16 +1,16 @@ --/* -+/* - * Copyright (C) 2004 Andrew Beekhof -- * -+ * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. -- * -+ * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. -- * -+ * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -@@ -385,7 +385,7 @@ custom_action(resource_t * rsc, char *key, const char *task, - Implied by calloc()... - action->actions_before = NULL; - action->actions_after = NULL; -- -+ - action->pseudo = FALSE; - action->dumped = FALSE; - action->processed = FALSE; -@@ -1281,9 +1281,10 @@ sort_op_by_callid(gconstpointer a, gconstpointer b) - int last_a = -1; - int last_b = -1; - -- crm_element_value_const_int(xml_a, "last-rc-change", &last_a); -- crm_element_value_const_int(xml_b, "last-rc-change", &last_b); -+ crm_element_value_const_int(xml_a, XML_RSC_OP_LAST_CHANGE, &last_a); -+ crm_element_value_const_int(xml_b, XML_RSC_OP_LAST_CHANGE, &last_b); - -+ crm_trace("rc-change: %d vs %d", last_a, last_b); - if (last_a >= 0 && last_a < last_b) { - sort_return(-1, "rc-change"); - -@@ -1347,25 +1348,18 @@ sort_op_by_callid(gconstpointer a, gconstpointer b) - } - - time_t --get_timet_now(pe_working_set_t * data_set) -+get_effective_time(pe_working_set_t * data_set) - { -- time_t now = 0; -- -- /* if (data_set && data_set->now) { */ -- /* now = data_set->now->tm_now; */ -- /* } */ -- -- if (now == 0) { -- /* eventually we should convert data_set->now into time_tm -- * for now, its only triggered by PE regression tests -- */ -- now = time(NULL); -- crm_crit("Defaulting to 'now'"); -- /* if (data_set && data_set->now) { */ -- /* data_set->now->tm_now = now; */ -- /* } */ -+ if(data_set) { -+ if (data_set->now == NULL) { -+ crm_trace("Recording a new 'now'"); -+ data_set->now = crm_time_new(NULL); -+ } -+ return crm_time_get_seconds_since_epoch(data_set->now); - } -- return now; -+ -+ crm_trace("Defaulting to 'now'"); -+ return time(NULL); - } - - struct fail_search { -@@ -1395,7 +1389,13 @@ get_failcount_by_prefix(gpointer key_p, gpointer value, gpointer user_data) - } - - int --get_failcount(node_t * node, resource_t * rsc, int *last_failure, pe_working_set_t * data_set) -+get_failcount(node_t * node, resource_t * rsc, time_t *last_failure, pe_working_set_t * data_set) -+{ -+ return get_failcount_full(node, rsc, last_failure, TRUE, data_set); -+} -+ -+int -+get_failcount_full(node_t * node, resource_t * rsc, time_t *last_failure, bool effective, pe_working_set_t * data_set) - { - char *key = NULL; - const char *value = NULL; -@@ -1429,9 +1429,32 @@ get_failcount(node_t * node, resource_t * rsc, int *last_failure, pe_working_set - *last_failure = search.last; - } - -- if (search.count != 0 && search.last != 0 && rsc->failure_timeout) { -+ if(search.count && rsc->failure_timeout) { -+ /* Never time-out if blocking failures are configured */ -+ char *xml_name = clone_strip(rsc->id); -+ char *xpath = g_strdup_printf("//primitive[@id='%s']//op[@on-fail='block']", xml_name); -+ xmlXPathObject *xpathObj = xpath_search(rsc->xml, xpath); -+ -+ free(xml_name); -+ free(xpath); -+ -+ if (numXpathResults(xpathObj) > 0) { -+ xmlNode *pref = getXpathResult(xpathObj, 0); -+ pe_warn("Setting %s.failure_timeout=%d in %s conflicts with on-fail=block: ignoring timeout", rsc->id, rsc->failure_timeout, ID(pref)); -+ rsc->failure_timeout = 0; -+#if 0 -+ /* A good idea? */ -+ } else if (rsc->container == NULL && is_not_set(data_set->flags, pe_flag_stonith_enabled)) { -+ /* In this case, stop.on-fail defaults to block in unpack_operation() */ -+ rsc->failure_timeout = 0; -+#endif -+ } -+ freeXpathObject(xpathObj); -+ } -+ -+ if (effective && search.count != 0 && search.last != 0 && rsc->failure_timeout) { - if (search.last > 0) { -- time_t now = get_timet_now(data_set); -+ time_t now = get_effective_time(data_set); - - if (now > (search.last + rsc->failure_timeout)) { - crm_debug("Failcount for %s on %s has expired (limit was %ds)", -@@ -1453,7 +1476,7 @@ get_failcount(node_t * node, resource_t * rsc, int *last_failure, pe_working_set - - /* If it's a resource container, get its failcount plus all the failcounts of the resources within it */ - int --get_failcount_all(node_t * node, resource_t * rsc, int *last_failure, pe_working_set_t * data_set) -+get_failcount_all(node_t * node, resource_t * rsc, time_t *last_failure, pe_working_set_t * data_set) - { - int failcount_all = 0; - -@@ -1464,7 +1487,7 @@ get_failcount_all(node_t * node, resource_t * rsc, int *last_failure, pe_working - - for (gIter = rsc->fillers; gIter != NULL; gIter = gIter->next) { - resource_t *filler = (resource_t *) gIter->data; -- int filler_last_failure = 0; -+ time_t filler_last_failure = 0; - - failcount_all += get_failcount(node, filler, &filler_last_failure, data_set); - -@@ -1719,3 +1742,11 @@ rsc_action_digest_cmp(resource_t * rsc, xmlNode * xml_op, node_t * node, - - return data; - } -+ -+const char *rsc_printable_id(resource_t *rsc) -+{ -+ if (is_not_set(rsc->flags, pe_rsc_unique)) { -+ return ID(rsc->xml); -+ } -+ return rsc->id; -+} -diff --git a/lib/services/Makefile.am b/lib/services/Makefile.am -index 3ee3347..67d7237 100644 ---- a/lib/services/Makefile.am -+++ b/lib/services/Makefile.am -@@ -25,8 +25,8 @@ noinst_HEADERS = upstart.h systemd.h services_private.h - - libcrmservice_la_SOURCES = services.c services_linux.c - libcrmservice_la_LDFLAGS = -version-info 1:0:0 --libcrmservice_la_CFLAGS = $(GIO_CFLAGS) --libcrmservice_la_LIBADD = $(GIO_LIBS) -+libcrmservice_la_CFLAGS = $(GIO_CFLAGS) -DOCF_ROOT_DIR=\"@OCF_ROOT_DIR@\" -+libcrmservice_la_LIBADD = $(GIO_LIBS) $(top_builddir)/lib/common/libcrmcommon.la - - if BUILD_UPSTART - libcrmservice_la_SOURCES += upstart.c -diff --git a/lib/services/services.c b/lib/services/services.c -index 200fc3f..adfc508 100644 ---- a/lib/services/services.c -+++ b/lib/services/services.c -@@ -61,7 +61,7 @@ resources_action_create(const char *name, const char *standard, const char *prov - const char *agent, const char *action, int interval, int timeout, - GHashTable * params) - { -- svc_action_t *op; -+ svc_action_t *op = NULL; - - /* - * Do some up front sanity checks before we go off and -@@ -70,27 +70,27 @@ resources_action_create(const char *name, const char *standard, const char *prov - - if (crm_strlen_zero(name)) { - crm_err("A service or resource action must have a name."); -- return NULL; -+ goto return_error; - } - - if (crm_strlen_zero(standard)) { - crm_err("A service action must have a valid standard."); -- return NULL; -+ goto return_error; - } - - if (!strcasecmp(standard, "ocf") && crm_strlen_zero(provider)) { - crm_err("An OCF resource action must have a provider."); -- return NULL; -+ goto return_error; - } - - if (crm_strlen_zero(agent)) { - crm_err("A service or resource action must have an agent."); -- return NULL; -+ goto return_error; - } - - if (crm_strlen_zero(action)) { - crm_err("A service or resource action must specify an action."); -- return NULL; -+ goto return_error; - } - - if (safe_str_eq(action, "monitor") -@@ -163,8 +163,10 @@ resources_action_create(const char *name, const char *standard, const char *prov - if (strcasecmp(op->standard, "ocf") == 0) { - op->provider = strdup(provider); - op->params = params; -+ params = NULL; - - if (asprintf(&op->opaque->exec, "%s/resource.d/%s/%s", OCF_ROOT_DIR, provider, agent) == -1) { -+ crm_err("Internal error: cannot create agent path"); - goto return_error; - } - op->opaque->args[0] = strdup(op->opaque->exec); -@@ -176,6 +178,7 @@ resources_action_create(const char *name, const char *standard, const char *prov - * of tacking on the LSB_ROOT_DIR path to the front */ - op->opaque->exec = strdup(op->agent); - } else if (asprintf(&op->opaque->exec, "%s/%s", LSB_ROOT_DIR, op->agent) == -1) { -+ crm_err("Internal error: cannot create agent path"); - goto return_error; - } - op->opaque->args[0] = strdup(op->opaque->exec); -@@ -206,6 +209,7 @@ resources_action_create(const char *name, const char *standard, const char *prov - op->opaque->exec = strdup(op->agent); - - } else if (asprintf(&op->opaque->exec, "%s/%s", NAGIOS_PLUGIN_DIR, op->agent) == -1) { -+ crm_err("Internal error: cannot create agent path"); - goto return_error; - } - -@@ -253,9 +257,15 @@ resources_action_create(const char *name, const char *standard, const char *prov - op = NULL; - } - -+ if(params) { -+ g_hash_table_destroy(params); -+ } - return op; - - return_error: -+ if(params) { -+ g_hash_table_destroy(params); -+ } - services_action_free(op); - - return NULL; -@@ -311,6 +321,7 @@ services_action_free(svc_action_t * op) - free(op->opaque->args[i]); - } - -+ free(op->opaque); - free(op->rsc); - free(op->action); - -diff --git a/lib/services/services_linux.c b/lib/services/services_linux.c -index 3402397..6192ccf 100644 ---- a/lib/services/services_linux.c -+++ b/lib/services/services_linux.c -@@ -30,6 +30,7 @@ - #include - #include - #include -+#include - - #include "crm/crm.h" - #include "crm/common/mainloop.h" -@@ -37,6 +38,10 @@ - - #include "services_private.h" - -+#if SUPPORT_CIBSECRETS -+# include "crm/common/cib_secrets.h" -+#endif -+ - static inline void - set_fd_opts(int fd, int opts) - { -@@ -250,10 +255,9 @@ operation_finalize(svc_action_t * op) - } - - static void --operation_finished(mainloop_child_t * p, int status, int signo, int exitcode) -+operation_finished(mainloop_child_t * p, pid_t pid, int core, int signo, int exitcode) - { -- svc_action_t *op = mainloop_get_child_userdata(p); -- pid_t pid = mainloop_get_child_pid(p); -+ svc_action_t *op = mainloop_child_userdata(p); - char *prefix = g_strdup_printf("%s:%d", op->id, op->pid); - - mainloop_clear_child_userdata(p); -@@ -275,7 +279,7 @@ operation_finished(mainloop_child_t * p, int status, int signo, int exitcode) - } - - if (signo) { -- if (mainloop_get_child_timeout(p)) { -+ if (mainloop_child_timeout(p)) { - crm_warn("%s - timed out after %dms", prefix, op->timeout); - op->status = PCMK_LRM_OP_TIMEOUT; - op->rc = PCMK_OCF_TIMEOUT; -@@ -304,6 +308,8 @@ services_os_action_execute(svc_action_t * op, gboolean synchronous) - int rc, lpc; - int stdout_fd[2]; - int stderr_fd[2]; -+ sigset_t mask; -+ sigset_t old_mask; - - if (pipe(stdout_fd) < 0) { - crm_err("pipe() failed"); -@@ -313,6 +319,16 @@ services_os_action_execute(svc_action_t * op, gboolean synchronous) - crm_err("pipe() failed"); - } - -+ if (synchronous) { -+ sigemptyset(&mask); -+ sigaddset(&mask, SIGCHLD); -+ sigemptyset(&old_mask); -+ -+ if (sigprocmask(SIG_BLOCK, &mask, &old_mask) < 0) { -+ crm_perror(LOG_ERR, "sigprocmask() failed"); -+ } -+ } -+ - op->pid = fork(); - switch (op->pid) { - case -1: -@@ -349,6 +365,20 @@ services_os_action_execute(svc_action_t * op, gboolean synchronous) - close(lpc); - } - -+#if SUPPORT_CIBSECRETS -+ if (replace_secret_params(op->rsc, op->params) < 0) { -+ /* replacing secrets failed! */ -+ if (safe_str_eq(op->action,"stop")) { -+ /* don't fail on stop! */ -+ crm_info("proceeding with the stop operation for %s", op->rsc); -+ -+ } else { -+ crm_err("failed to get secrets for %s, " -+ "considering resource not configured", op->rsc); -+ _exit(PCMK_OCF_NOT_CONFIGURED); -+ } -+ } -+#endif - /* Setup environment correctly */ - add_OCF_env_vars(op); - -@@ -392,27 +422,101 @@ services_os_action_execute(svc_action_t * op, gboolean synchronous) - - if (synchronous) { - int status = 0; -- int timeout = (1 + op->timeout) / 1000; -+ int timeout = op->timeout; -+ int sfd = -1; -+ time_t start = -1; -+ struct pollfd fds[3]; -+ int wait_rc = 0; -+ -+ sfd = signalfd(-1, &mask, 0); -+ if (sfd < 0) { -+ crm_perror(LOG_ERR, "signalfd() failed"); -+ } -+ -+ fds[0].fd = op->opaque->stdout_fd; -+ fds[0].events = POLLIN; -+ fds[0].revents = 0; -+ -+ fds[1].fd = op->opaque->stderr_fd; -+ fds[1].events = POLLIN; -+ fds[1].revents = 0; -+ -+ fds[2].fd = sfd; -+ fds[2].events = POLLIN; -+ fds[2].revents = 0; - - crm_trace("Waiting for %d", op->pid); -- while ((op->timeout < 0 || timeout > 0) && waitpid(op->pid, &status, WNOHANG) <= 0) { -- sleep(1); -- read_output(op->opaque->stdout_fd, op); -- read_output(op->opaque->stderr_fd, op); -- timeout--; -- } -+ start = time(NULL); -+ do { -+ int poll_rc = poll(fds, 3, timeout); -+ -+ if (poll_rc > 0) { -+ if (fds[0].revents & POLLIN) { -+ read_output(op->opaque->stdout_fd, op); -+ } -+ -+ if (fds[1].revents & POLLIN) { -+ read_output(op->opaque->stderr_fd, op); -+ } -+ -+ if (fds[2].revents & POLLIN) { -+ struct signalfd_siginfo fdsi; -+ ssize_t s; -+ -+ s = read(sfd, &fdsi, sizeof(struct signalfd_siginfo)); -+ if (s != sizeof(struct signalfd_siginfo)) { -+ crm_perror(LOG_ERR, "Read from signal fd %d failed", sfd); -+ -+ } else if (fdsi.ssi_signo == SIGCHLD) { -+ wait_rc = waitpid(op->pid, &status, WNOHANG); -+ -+ if (wait_rc < 0){ -+ crm_perror(LOG_ERR, "waitpid() for %d failed", op->pid); -+ -+ } else if (wait_rc > 0) { -+ break; -+ } -+ } -+ } -+ -+ } else if (poll_rc == 0) { -+ timeout = 0; -+ break; -+ -+ } else if (poll_rc < 0) { -+ if (errno != EINTR) { -+ crm_perror(LOG_ERR, "poll() failed"); -+ break; -+ } -+ } -+ -+ timeout = op->timeout - (time(NULL) - start) * 1000; -+ -+ } while ((op->timeout < 0 || timeout > 0)); - - crm_trace("Child done: %d", op->pid); -- if (timeout == 0) { -- int killrc = kill(op->pid, 9 /*SIGKILL*/); -+ if (wait_rc <= 0) { -+ int killrc = kill(op->pid, SIGKILL); - - op->rc = PCMK_OCF_UNKNOWN_ERROR; -- op->status = PCMK_LRM_OP_TIMEOUT; -- crm_warn("%s:%d - timed out after %dms", op->id, op->pid, op->timeout); -+ if (op->timeout > 0 && timeout <= 0) { -+ op->status = PCMK_LRM_OP_TIMEOUT; -+ crm_warn("%s:%d - timed out after %dms", op->id, op->pid, op->timeout); -+ -+ } else { -+ op->status = PCMK_LRM_OP_ERROR; -+ } - - if (killrc && errno != ESRCH) { - crm_err("kill(%d, KILL) failed: %d", op->pid, errno); - } -+ /* -+ * From sigprocmask(2): -+ * It is not possible to block SIGKILL or SIGSTOP. Attempts to do so are silently ignored. -+ * -+ * This makes it safe to skip WNOHANG here -+ */ -+ waitpid(op->pid, &status, 0); - - } else if (WIFEXITED(status)) { - op->status = PCMK_LRM_OP_DONE; -@@ -434,9 +538,19 @@ services_os_action_execute(svc_action_t * op, gboolean synchronous) - read_output(op->opaque->stdout_fd, op); - read_output(op->opaque->stderr_fd, op); - -+ close(op->opaque->stdout_fd); -+ close(op->opaque->stderr_fd); -+ close(sfd); -+ -+ if (sigismember(&old_mask, SIGCHLD) == 0) { -+ if (sigprocmask(SIG_UNBLOCK, &mask, NULL) < 0) { -+ crm_perror(LOG_ERR, "sigprocmask() to unblocked failed"); -+ } -+ } -+ - } else { - crm_trace("Async waiting for %d - %s", op->pid, op->opaque->exec); -- mainloop_add_child(op->pid, op->timeout, op->id, op, operation_finished); -+ mainloop_child_add(op->pid, op->timeout, op->id, op, operation_finished); - - op->opaque->stdout_gsource = mainloop_add_fd(op->id, - G_PRIORITY_LOW, -diff --git a/lib/transition/Makefile.am b/lib/transition/Makefile.am -index 49c7113..da87e61 100644 ---- a/lib/transition/Makefile.am -+++ b/lib/transition/Makefile.am -@@ -27,8 +27,9 @@ lib_LTLIBRARIES = libtransitioner.la - noinst_HEADERS = - libtransitioner_la_SOURCES = unpack.c graph.c utils.c - --libtransitioner_la_LDFLAGS = -version-info 2:0:0 -+libtransitioner_la_LDFLAGS = -version-info 2:1:0 - libtransitioner_la_CFLAGS = -I$(top_builddir) -+libtransitioner_la_LIBADD = $(top_builddir)/lib/common/libcrmcommon.la - - clean-generic: - rm -f *~ -diff --git a/lib/transition/unpack.c b/lib/transition/unpack.c -index 3187d21..90b7a96 100644 ---- a/lib/transition/unpack.c -+++ b/lib/transition/unpack.c -@@ -1,16 +1,16 @@ --/* -+/* - * Copyright (C) 2004 Andrew Beekhof -- * -+ * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. -- * -+ * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. -- * -+ * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -@@ -166,10 +166,10 @@ unpack_graph(xmlNode * xml_graph, const char *reference) - - - - rc = rc; - op->op_status = status; -+ op->t_run = time(NULL); -+ op->t_rcchange = op->t_run; - - op->params = g_hash_table_new_full(crm_str_hash, g_str_equal, - g_hash_destroy_str, g_hash_destroy_str); -diff --git a/lrmd/Makefile.am b/lrmd/Makefile.am -index 86f2a35..73f1d7e 100644 ---- a/lrmd/Makefile.am -+++ b/lrmd/Makefile.am -@@ -4,12 +4,12 @@ - # modify it under the terms of the GNU Lesser General Public - # License as published by the Free Software Foundation; either - # version 2.1 of the License, or (at your option) any later version. --# -+# - # This library is distributed in the hope that it will be useful, - # but WITHOUT ANY WARRANTY; without even the implied warranty of - # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - # Lesser General Public License for more details. --# -+# - # You should have received a copy of the GNU Lesser General Public - # License along with this library; if not, write to the Free Software - # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -@@ -21,16 +21,38 @@ testdir = $(datadir)/$(PACKAGE)/tests/lrmd - test_SCRIPTS = regression.py - - lrmdlibdir = $(CRM_DAEMON_DIR) -- --## binary progs - lrmdlib_PROGRAMS = lrmd lrmd_test - --lrmd_SOURCES = main.c lrmd.c tls_backend.c -+initdir = $(INITDIR) -+init_SCRIPTS = pacemaker_remote -+sbin_PROGRAMS = pacemaker_remoted -+ -+if HAVE_SYSTEMD -+systemdunit_DATA = pacemaker_remote.service -+endif -+ -+if BUILD_HELP -+man8_MANS = $(sbin_PROGRAMS:%=%.8) -+endif -+ -+%.8: % -+ echo Creating $@ -+ chmod a+x $(top_builddir)/lrmd/$< -+ $(top_builddir)/lrmd/$< --help -+ $(HELP2MAN) --output $@ --no-info --section 8 --name "Part of the Pacemaker cluster resource manager" $(top_builddir)/lrmd/$< -+ -+lrmd_SOURCES = main.c lrmd.c - lrmd_LDADD = $(top_builddir)/lib/common/libcrmcommon.la \ - $(top_builddir)/lib/services/libcrmservice.la \ - $(top_builddir)/lib/lrmd/liblrmd.la \ - $(top_builddir)/lib/fencing/libstonithd.la - -+ -+pacemaker_remoted_SOURCES = main.c lrmd.c tls_backend.c ipc_proxy.c -+pacemaker_remoted_CFLAGS = -DSUPPORT_REMOTE -+pacemaker_remoted_LDADD = $(lrmd_LDADD) -+ -+ - lrmd_test_SOURCES = test.c - lrmd_test_LDADD = $(top_builddir)/lib/common/libcrmcommon.la \ - $(top_builddir)/lib/lrmd/liblrmd.la \ -diff --git a/lrmd/ipc_proxy.c b/lrmd/ipc_proxy.c -new file mode 100644 -index 0000000..bbf9b24 ---- /dev/null -+++ b/lrmd/ipc_proxy.c -@@ -0,0 +1,374 @@ -+/* -+ * Copyright (c) 2012 David Vossel -+ * -+ * This library is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU Lesser General Public -+ * License as published by the Free Software Foundation; either -+ * version 2.1 of the License, or (at your option) any later version. -+ * -+ * This library is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * Lesser General Public License for more details. -+ * -+ * You should have received a copy of the GNU Lesser General Public -+ * License along with this library; if not, write to the Free Software -+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -+ * -+ */ -+ -+ -+#include -+#include -+ -+#include -+#include -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+static qb_ipcs_service_t *cib_ro = NULL; -+static qb_ipcs_service_t *cib_rw = NULL; -+static qb_ipcs_service_t *cib_shm = NULL; -+ -+static qb_ipcs_service_t *attrd_ipcs = NULL; -+static qb_ipcs_service_t *crmd_ipcs = NULL; -+static qb_ipcs_service_t *stonith_ipcs = NULL; -+ -+/* ipc providers == crmd clients connecting from cluster nodes */ -+GHashTable *ipc_providers; -+/* ipc clients == things like cibadmin, crm_resource, connecting locally */ -+GHashTable *ipc_clients; -+ -+static int32_t -+ipc_proxy_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid, const char *ipc_channel) -+{ -+ void *key = NULL; -+ void *value = NULL; -+ crm_client_t *client; -+ crm_client_t *ipc_proxy = NULL; -+ GHashTableIter iter; -+ xmlNode *msg; -+ -+ crm_trace("Connection %p on channel %s", c, ipc_channel); -+ -+ if (g_hash_table_size(ipc_providers) == 0) { -+ crm_err("No ipc providers available for uid %d gid %d", uid, gid); -+ return -EREMOTEIO; -+ } -+ -+ g_hash_table_iter_init(&iter, ipc_providers); -+ if (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) { -+ /* grab the first provider available, any provider in this -+ * table will work. Usually there will only be one. These are -+ * lrmd client connections originating for a cluster node's crmd. */ -+ ipc_proxy = value; -+ } else { -+ crm_err("No ipc providers available for uid %d gid %d", uid, gid); -+ return -EREMOTEIO; -+ } -+ -+ /* this new client is a local ipc client on a remote -+ * guest wanting to access the ipc on any available cluster nodes */ -+ client = crm_client_new(c, uid, gid); -+ if (client == NULL) { -+ return -EREMOTEIO; -+ } -+ -+ /* This ipc client is bound to a single ipc provider. If the -+ * provider goes away, this client is disconnected */ -+ client->userdata = strdup(ipc_proxy->id); -+ -+ g_hash_table_insert(ipc_clients, client->id, client); -+ -+ msg = create_xml_node(NULL, T_LRMD_IPC_PROXY); -+ crm_xml_add(msg, F_LRMD_IPC_OP, "new"); -+ crm_xml_add(msg, F_LRMD_IPC_IPC_SERVER, ipc_channel); -+ crm_xml_add(msg, F_LRMD_IPC_SESSION, client->id); -+ lrmd_server_send_notify(ipc_proxy, msg); -+ free_xml(msg); -+ crm_debug("created new ipc proxy with session id %s", client->id); -+ return 0; -+} -+ -+static int32_t -+crmd_proxy_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid) -+{ -+ return ipc_proxy_accept(c, uid, gid, CRM_SYSTEM_CRMD); -+} -+ -+static int32_t -+attrd_proxy_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid) -+{ -+ return ipc_proxy_accept(c, uid, gid, T_ATTRD); -+} -+ -+static int32_t -+stonith_proxy_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid) -+{ -+ return ipc_proxy_accept(c, uid, gid, "stonith-ng"); -+} -+ -+static int32_t -+cib_proxy_accept_rw(qb_ipcs_connection_t * c, uid_t uid, gid_t gid) -+{ -+ return ipc_proxy_accept(c, uid, gid, cib_channel_rw); -+} -+ -+static int32_t -+cib_proxy_accept_ro(qb_ipcs_connection_t * c, uid_t uid, gid_t gid) -+{ -+ return ipc_proxy_accept(c, uid, gid, cib_channel_ro); -+} -+ -+static void -+ipc_proxy_created(qb_ipcs_connection_t * c) -+{ -+ crm_trace("Connection %p", c); -+} -+ -+void -+ipc_proxy_forward_client(crm_client_t *ipc_proxy, xmlNode *xml) -+{ -+ const char *session = crm_element_value(xml, F_LRMD_IPC_SESSION); -+ const char *msg_type = crm_element_value(xml, F_LRMD_IPC_OP); -+ xmlNode *msg = get_message_xml(xml, F_LRMD_IPC_MSG); -+ crm_client_t *ipc_client = crm_client_get_by_id(session); -+ int rc = 0; -+ -+ if (ipc_client == NULL) { -+ xmlNode *msg = create_xml_node(NULL, T_LRMD_IPC_PROXY); -+ crm_xml_add(msg, F_LRMD_IPC_OP, "destroy"); -+ crm_xml_add(msg, F_LRMD_IPC_SESSION, session); -+ lrmd_server_send_notify(ipc_proxy, msg); -+ free_xml(msg); -+ return; -+ } -+ -+ /* This is an event or response from the ipc provider -+ * going to the local ipc client. -+ * -+ * Looking at the chain of events. -+ * -+ * -----remote node----------------|---- cluster node ------ -+ * ipc_client <--1--> this code <--2--> crmd <----3----> ipc server -+ * -+ * This function is receiving a msg from connection 2 -+ * and forwarding it to connection 1. -+ */ -+ if (safe_str_eq(msg_type, "event")) { -+ rc = crm_ipcs_send(ipc_client, 0, msg, TRUE); -+ } else if (safe_str_eq(msg_type, "response")) { -+ int msg_id = 0; -+ crm_element_value_int(xml, F_LRMD_IPC_MSG_ID, &msg_id); -+ rc = crm_ipcs_send(ipc_client, msg_id, msg, FALSE); -+ } else if (safe_str_eq(msg_type, "destroy")) { -+ qb_ipcs_disconnect(ipc_client->ipcs); -+ } else { -+ crm_err("Unknown ipc proxy msg type %s" , msg_type); -+ } -+ -+ if (rc < 0) { -+ crm_warn("IPC Proxy send to ipc client %s failed, rc = %d", ipc_client->id, rc); -+ } -+} -+ -+static int32_t -+ipc_proxy_dispatch(qb_ipcs_connection_t * c, void *data, size_t size) -+{ -+ uint32_t id = 0; -+ uint32_t flags = 0; -+ crm_client_t *client = crm_client_get(c); -+ crm_client_t *ipc_proxy = crm_client_get_by_id(client->userdata); -+ xmlNode *request = NULL; -+ xmlNode *msg = NULL; -+ -+ if (!ipc_proxy) { -+ qb_ipcs_disconnect(client->ipcs); -+ return 0; -+ } -+ -+ /* This is a request from the local ipc client going -+ * to the ipc provider. -+ * -+ * Looking at the chain of events. -+ * -+ * -----remote node----------------|---- cluster node ------ -+ * ipc_client <--1--> this code <--2--> crmd <----3----> ipc server -+ * -+ * This function is receiving a request from connection -+ * 1 and forwarding it to connection 2. -+ */ -+ request = crm_ipcs_recv(client, data, size, &id, &flags); -+ -+ if (!request) { -+ return 0; -+ } -+ -+ CRM_CHECK(client != NULL, crm_err("Invalid client"); -+ return FALSE); -+ CRM_CHECK(client->id != NULL, crm_err("Invalid client: %p", client); -+ return FALSE); -+ -+ msg = create_xml_node(NULL, T_LRMD_IPC_PROXY); -+ crm_xml_add(msg, F_LRMD_IPC_OP, "request"); -+ crm_xml_add(msg, F_LRMD_IPC_SESSION, client->id); -+ crm_xml_add(msg, F_LRMD_IPC_USER, client->user); -+ crm_xml_add_int(msg, F_LRMD_IPC_MSG_ID, id); -+ crm_xml_add_int(msg, F_LRMD_IPC_MSG_FLAGS, flags); -+ add_message_xml(msg, F_LRMD_IPC_MSG, request); -+ lrmd_server_send_notify(ipc_proxy, msg); -+ free_xml(msg); -+ -+ return 0; -+} -+ -+static int32_t -+ipc_proxy_closed(qb_ipcs_connection_t * c) -+{ -+ crm_client_t *client = crm_client_get(c); -+ crm_client_t *ipc_proxy = crm_client_get_by_id(client->userdata); -+ -+ crm_trace("Connection %p", c); -+ -+ if (ipc_proxy) { -+ xmlNode *msg = create_xml_node(NULL, T_LRMD_IPC_PROXY); -+ crm_xml_add(msg, F_LRMD_IPC_OP, "destroy"); -+ crm_xml_add(msg, F_LRMD_IPC_SESSION, client->id); -+ lrmd_server_send_notify(ipc_proxy, msg); -+ free_xml(msg); -+ } -+ -+ g_hash_table_remove(ipc_clients, client->id); -+ -+ free(client->userdata); -+ client->userdata = NULL; -+ crm_client_destroy(client); -+ return 0; -+} -+ -+static void -+ipc_proxy_destroy(qb_ipcs_connection_t * c) -+{ -+ crm_trace("Connection %p", c); -+} -+ -+static struct qb_ipcs_service_handlers crmd_proxy_callbacks = { -+ .connection_accept = crmd_proxy_accept, -+ .connection_created = ipc_proxy_created, -+ .msg_process = ipc_proxy_dispatch, -+ .connection_closed = ipc_proxy_closed, -+ .connection_destroyed = ipc_proxy_destroy -+}; -+ -+static struct qb_ipcs_service_handlers attrd_proxy_callbacks = { -+ .connection_accept = attrd_proxy_accept, -+ .connection_created = ipc_proxy_created, -+ .msg_process = ipc_proxy_dispatch, -+ .connection_closed = ipc_proxy_closed, -+ .connection_destroyed = ipc_proxy_destroy -+}; -+ -+static struct qb_ipcs_service_handlers stonith_proxy_callbacks = { -+ .connection_accept = stonith_proxy_accept, -+ .connection_created = ipc_proxy_created, -+ .msg_process = ipc_proxy_dispatch, -+ .connection_closed = ipc_proxy_closed, -+ .connection_destroyed = ipc_proxy_destroy -+}; -+ -+static struct qb_ipcs_service_handlers cib_proxy_callbacks_ro = { -+ .connection_accept = cib_proxy_accept_ro, -+ .connection_created = ipc_proxy_created, -+ .msg_process = ipc_proxy_dispatch, -+ .connection_closed = ipc_proxy_closed, -+ .connection_destroyed = ipc_proxy_destroy -+}; -+ -+static struct qb_ipcs_service_handlers cib_proxy_callbacks_rw = { -+ .connection_accept = cib_proxy_accept_rw, -+ .connection_created = ipc_proxy_created, -+ .msg_process = ipc_proxy_dispatch, -+ .connection_closed = ipc_proxy_closed, -+ .connection_destroyed = ipc_proxy_destroy -+}; -+ -+void -+ipc_proxy_add_provider(crm_client_t *ipc_proxy) -+{ -+ if (ipc_providers == NULL) { -+ return; -+ } -+ g_hash_table_insert(ipc_providers, ipc_proxy->id, ipc_proxy); -+} -+ -+void -+ipc_proxy_remove_provider(crm_client_t *ipc_proxy) -+{ -+ GHashTableIter iter; -+ crm_client_t *ipc_client = NULL; -+ char *key = NULL; -+ -+ if (ipc_providers == NULL) { -+ return; -+ } -+ -+ g_hash_table_remove(ipc_providers, ipc_proxy->id); -+ -+ g_hash_table_iter_init(&iter, ipc_clients); -+ while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & ipc_client)) { -+ const char *proxy_id = ipc_client->userdata; -+ if (safe_str_eq(proxy_id, ipc_proxy->id)) { -+ crm_info("ipc proxy connection for client %s pid %d destroyed because cluster node disconnected.", -+ ipc_client->id, ipc_client->pid); -+ qb_ipcs_disconnect(ipc_client->ipcs); -+ } -+ } -+} -+ -+void -+ipc_proxy_init(void) -+{ -+ ipc_clients = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, NULL); -+ ipc_providers = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, NULL); -+ -+ cib_ipc_servers_init(&cib_ro, -+ &cib_rw, -+ &cib_shm, -+ &cib_proxy_callbacks_ro, -+ &cib_proxy_callbacks_rw); -+ -+ attrd_ipc_server_init(&attrd_ipcs, &attrd_proxy_callbacks); -+ stonith_ipc_server_init(&stonith_ipcs, &stonith_proxy_callbacks); -+ crmd_ipcs = crmd_ipc_server_init(&crmd_proxy_callbacks); -+ if (crmd_ipcs == NULL) { -+ crm_err("Failed to create crmd server: exiting and inhibiting respawn."); -+ crm_warn("Verify pacemaker and pacemaker_remote are not both enabled."); -+ crm_exit(DAEMON_RESPAWN_STOP); -+ } -+} -+ -+void -+ipc_proxy_cleanup(void) -+{ -+ if (ipc_providers) { -+ g_hash_table_destroy(ipc_providers); -+ } -+ if (ipc_clients) { -+ g_hash_table_destroy(ipc_clients); -+ } -+ cib_ipc_servers_destroy(cib_ro, cib_rw, cib_shm); -+ qb_ipcs_destroy(attrd_ipcs); -+ qb_ipcs_destroy(stonith_ipcs); -+ qb_ipcs_destroy(crmd_ipcs); -+ cib_ro = NULL; -+ cib_rw = NULL; -+ cib_shm = NULL; -+ ipc_providers = NULL; -+ ipc_clients = NULL; -+} -diff --git a/lrmd/lrmd.c b/lrmd/lrmd.c -index 46c0eac..a4747cb 100644 ---- a/lrmd/lrmd.c -+++ b/lrmd/lrmd.c -@@ -5,12 +5,12 @@ - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. -- * -+ * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. -- * -+ * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -@@ -760,8 +760,8 @@ lrmd_rsc_execute_stonith(lrmd_rsc_t * rsc, lrmd_cmd_t * cmd) - } - } - -- /* Stonith automatically registers devices from the CIB when changes occur, -- * but to avoid a possible race condition between stonith receiving the CIB update -+ /* Stonith automatically registers devices from the IPC when changes occur, -+ * but to avoid a possible race condition between stonith receiving the IPC update - * and the lrmd requesting that resource, the lrmd still registers the device as well. - * Stonith knows how to handle duplicate device registrations correctly. */ - rc = stonith_api->cmds->register_device(stonith_api, -@@ -870,10 +870,11 @@ lrmd_rsc_execute_service_lib(lrmd_rsc_t * rsc, lrmd_cmd_t * cmd) - - action->cb_data = cmd; - -- /* 'cmd' may not be valid after this point -+ /* 'cmd' may not be valid after this point if -+ * services_action_async() returned TRUE - * - * Upstart and systemd both synchronously determine monitor/status -- * results and call action_complete (which may free 'cmd') if necessary -+ * results and call action_complete (which may free 'cmd') if necessary. - */ - if (services_action_async(action, action_complete)) { - return TRUE; -@@ -996,11 +997,25 @@ static int - process_lrmd_signon(crm_client_t * client, uint32_t id, xmlNode * request) - { - xmlNode *reply = create_xml_node(NULL, "reply"); -+ const char *is_ipc_provider = crm_element_value(request, F_LRMD_IS_IPC_PROVIDER); -+ const char *protocol_version = crm_element_value(request, F_LRMD_PROTOCOL_VERSION); -+ -+ if (safe_str_neq(protocol_version, LRMD_PROTOCOL_VERSION)) { -+ crm_xml_add_int(reply, F_LRMD_RC, -EPROTO); -+ crm_xml_add(reply, F_LRMD_PROTOCOL_VERSION, LRMD_PROTOCOL_VERSION); -+ } - - crm_xml_add(reply, F_LRMD_OPERATION, CRM_OP_REGISTER); - crm_xml_add(reply, F_LRMD_CLIENTID, client->id); - lrmd_server_send_reply(client, id, reply); - -+ if (crm_is_true(is_ipc_provider)) { -+ /* this is a remote connection from a cluster nodes crmd */ -+#ifdef SUPPORT_REMOTE -+ ipc_proxy_add_provider(client); -+#endif -+ } -+ - free_xml(reply); - return pcmk_ok; - } -@@ -1250,12 +1265,16 @@ process_lrmd_message(crm_client_t * client, uint32_t id, xmlNode * request) - const char *op = crm_element_value(request, F_LRMD_OPERATION); - int do_reply = 0; - int do_notify = 0; -- int exit = 0; - - crm_trace("Processing %s operation from %s", op, client->id); - crm_element_value_int(request, F_LRMD_CALLID, &call_id); - -- if (crm_str_eq(op, CRM_OP_REGISTER, TRUE)) { -+ if (crm_str_eq(op, CRM_OP_IPC_FWD, TRUE)) { -+#ifdef SUPPORT_REMOTE -+ ipc_proxy_forward_client(client, request); -+#endif -+ do_reply = 1; -+ } else if (crm_str_eq(op, CRM_OP_REGISTER, TRUE)) { - rc = process_lrmd_signon(client, id, request); - } else if (crm_str_eq(op, LRMD_OP_RSC_REG, TRUE)) { - rc = process_lrmd_rsc_register(client, id, request); -@@ -1295,8 +1314,4 @@ process_lrmd_message(crm_client_t * client, uint32_t id, xmlNode * request) - if (do_notify) { - send_generic_notify(rc, request); - } -- -- if (exit) { -- lrmd_shutdown(0); -- } - } -diff --git a/lrmd/lrmd_private.h b/lrmd/lrmd_private.h -index 84805bf..f625293 100644 ---- a/lrmd/lrmd_private.h -+++ b/lrmd/lrmd_private.h -@@ -64,7 +64,7 @@ void lrmd_tls_server_destroy(void); - /* Hidden in lrmd client lib */ - extern int lrmd_tls_send_msg(crm_remote_t * session, xmlNode * msg, uint32_t id, - const char *msg_type); --extern int lrmd_tls_set_key(gnutls_datum_t * key, const char *location); -+extern int lrmd_tls_set_key(gnutls_datum_t * key); - # endif - - int lrmd_server_send_reply(crm_client_t * client, uint32_t id, xmlNode * reply); -@@ -92,4 +92,13 @@ stonith_t *get_stonith_connection(void); - */ - void stonith_connection_failed(void); - -+#ifdef SUPPORT_REMOTE -+void ipc_proxy_init(void); -+void ipc_proxy_cleanup(void); -+void ipc_proxy_add_provider(crm_client_t *client); -+void ipc_proxy_remove_provider(crm_client_t *client); -+void ipc_proxy_forward_client(crm_client_t *client, xmlNode *xml); - #endif -+ -+#endif -+ -diff --git a/lrmd/main.c b/lrmd/main.c -index 59ee22c..1020b98 100644 ---- a/lrmd/main.c -+++ b/lrmd/main.c -@@ -5,12 +5,12 @@ - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. -- * -+ * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. -- * -+ * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -@@ -31,11 +31,13 @@ - - #include - -+#if defined(HAVE_GNUTLS_GNUTLS_H) && defined(SUPPORT_REMOTE) -+# define ENABLE_PCMK_REMOTE -+#endif -+ - GMainLoop *mainloop = NULL; --qb_ipcs_service_t *ipcs = NULL; -+static qb_ipcs_service_t *ipcs = NULL; - stonith_t *stonith_api = NULL; --static gboolean enable_remote = FALSE; --static int remote_port = 0; - int lrmd_call_id = 0; - - static void -@@ -149,6 +151,9 @@ lrmd_ipc_closed(qb_ipcs_connection_t * c) - - crm_trace("Connection %p", c); - client_disconnect_cleanup(client->id); -+#ifdef ENABLE_PCMK_REMOTE -+ ipc_proxy_remove_provider(client); -+#endif - crm_client_destroy(client); - return 0; - } -@@ -175,7 +180,7 @@ lrmd_server_send_reply(crm_client_t * client, uint32_t id, xmlNode * reply) - switch (client->kind) { - case CRM_CLIENT_IPC: - return crm_ipcs_send(client, id, reply, FALSE); --#ifdef HAVE_GNUTLS_GNUTLS_H -+#ifdef ENABLE_PCMK_REMOTE - case CRM_CLIENT_TLS: - return lrmd_tls_send_msg(client->remote, reply, id, "reply"); - #endif -@@ -196,7 +201,7 @@ lrmd_server_send_notify(crm_client_t * client, xmlNode * msg) - return -1; - } - return crm_ipcs_send(client, 0, msg, TRUE); --#ifdef HAVE_GNUTLS_GNUTLS_H -+#ifdef ENABLE_PCMK_REMOTE - case CRM_CLIENT_TLS: - if (client->remote == NULL) { - crm_trace("Asked to send event to disconnected remote client"); -@@ -217,7 +222,7 @@ lrmd_shutdown(int nsig) - if (ipcs) { - mainloop_del_ipc_server(ipcs); - } -- crm_exit(0); -+ crm_exit(pcmk_ok); - } - - /* *INDENT-OFF* */ -@@ -226,10 +231,11 @@ static struct crm_option long_options[] = { - {"help", 0, 0, '?', "\tThis text"}, - {"version", 0, 0, '$', "\tVersion information" }, - {"verbose", 0, 0, 'V', "\tIncrease debug output"}, -- {"tls_enable", 0, 0, 't', "\tEnable TLS connection."}, -- {"tls_port", 1, 0, 'p', "\tTLS port to listen to, defaults to 1984"}, - - {"logfile", 1, 0, 'l', "\tSend logs to the additional named logfile"}, -+ -+ /* For compatibility with the original lrmd */ -+ {"dummy", 0, 0, 'r', NULL, 1}, - {0, 0, 0, 0} - }; - /* *INDENT-ON* */ -@@ -241,9 +247,15 @@ main(int argc, char **argv) - int flag = 0; - int index = 0; - -- crm_log_init("lrmd", LOG_INFO, TRUE, FALSE, argc, argv, FALSE); -+#ifdef ENABLE_PCMK_REMOTE -+ crm_log_init("pacemaker_remoted", LOG_INFO, TRUE, FALSE, argc, argv, FALSE); - crm_set_options(NULL, "[options]", long_options, - "Daemon for controlling services confirming to different standards"); -+#else -+ crm_log_init("lrmd", LOG_INFO, TRUE, FALSE, argc, argv, FALSE); -+ crm_set_options(NULL, "[options]", long_options, -+ "Pacemaker Remote daemon for extending pacemaker functionality to remote nodes."); -+#endif - - while (1) { - flag = crm_get_option(argc, argv, &index); -@@ -252,16 +264,13 @@ main(int argc, char **argv) - } - - switch (flag) { -+ case 'r': -+ break; - case 'l': - crm_add_logfile(optarg); - break; -- case 't': -- enable_remote = TRUE; -- break; -- case 'p': -- remote_port = atoi(optarg); - case 'V': -- set_crm_log_level(crm_log_level + 1); -+ crm_bump_log_level(argc, argv); - break; - case '?': - case '$': -@@ -273,28 +282,28 @@ main(int argc, char **argv) - } - } - -- if (enable_remote && !remote_port) { -- remote_port = DEFAULT_REMOTE_PORT; -- } -+ /* Used by RAs - Leave owned by root */ -+ crm_build_path(CRM_RSCTMP_DIR, 0755); - - rsc_list = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, free_rsc); - ipcs = mainloop_add_ipc_server(CRM_SYSTEM_LRMD, QB_IPC_SHM, &lrmd_ipc_callbacks); - if (ipcs == NULL) { - crm_err("Failed to create IPC server: shutting down and inhibiting respawn"); -- crm_exit(100); -+ crm_exit(DAEMON_RESPAWN_STOP); - } - -- if (enable_remote) { --#ifdef HAVE_GNUTLS_GNUTLS_H -+#ifdef ENABLE_PCMK_REMOTE -+ { -+ const char *remote_port_str = getenv("PCMK_remote_port"); -+ int remote_port = remote_port_str ? atoi(remote_port_str) : DEFAULT_REMOTE_PORT; -+ - if (lrmd_init_remote_tls_server(remote_port) < 0) { -- crm_err("Failed to create TLS server: shutting down and inhibiting respawn"); -- crm_exit(100); -+ crm_err("Failed to create TLS server on port %d: shutting down and inhibiting respawn", remote_port); -+ crm_exit(DAEMON_RESPAWN_STOP); - } --#else -- crm_err("GNUTLS not enabled in this build, can not establish remote server"); -- crm_exit(100); --#endif -+ ipc_proxy_init(); - } -+#endif - - mainloop_add_signal(SIGTERM, lrmd_shutdown); - mainloop = g_main_new(FALSE); -@@ -302,12 +311,11 @@ main(int argc, char **argv) - g_main_run(mainloop); - - mainloop_del_ipc_server(ipcs); -- crm_client_cleanup(); -- if (enable_remote) { --#ifdef HAVE_GNUTLS_GNUTLS_H -- lrmd_tls_server_destroy(); -+#ifdef ENABLE_PCMK_REMOTE -+ lrmd_tls_server_destroy(); -+ ipc_proxy_cleanup(); - #endif -- } -+ crm_client_cleanup(); - - g_hash_table_destroy(rsc_list); - -diff --git a/lrmd/pacemaker_remote.in b/lrmd/pacemaker_remote.in -new file mode 100644 -index 0000000..ba89087 ---- /dev/null -+++ b/lrmd/pacemaker_remote.in -@@ -0,0 +1,155 @@ -+#!/bin/bash -+ -+# Authors: -+# Andrew Beekhof -+# -+# License: Revised BSD -+ -+# chkconfig: - 99 01 -+# description: Pacemaker Cluster Manager -+# processname: pacemaker_remoted -+# -+### BEGIN INIT INFO -+# Provides: pacemaker_remoted -+# Required-Start: $network -+# Should-Start: $syslog -+# Required-Stop: $network -+# Default-Start: -+# Default-Stop: -+# Short-Description: Starts and stops the Pacemaker remote agent for non-cluster nodes -+# Description: Starts and stops the Pacemaker remote agent for non-cluster nodes -+### END INIT INFO -+ -+desc="Pacemaker Remote Agent" -+prog="pacemaker_remoted" -+cman=0 -+ -+# set secure PATH -+PATH="/sbin:/bin:/usr/sbin:/usr/bin:@sbindir@" -+ -+checkrc() { -+ if [ $? = 0 ]; then -+ success -+ else -+ failure -+ fi -+} -+ -+success() -+{ -+ echo -ne "[ OK ]\r" -+} -+ -+failure() -+{ -+ echo -ne "[FAILED]\r" -+} -+ -+status() -+{ -+ pid=$(pidof $1 2>/dev/null) -+ rtrn=$? -+ if [ $rtrn -ne 0 ]; then -+ echo "$1 is stopped" -+ else -+ echo "$1 (pid $pid) is running..." -+ fi -+ return $rtrn -+} -+ -+# rpm based distros -+if [ -d @sysconfdir@/sysconfig ]; then -+ [ -f @INITDIR@/functions ] && . @INITDIR@/functions -+ [ -f @sysconfdir@/sysconfig/pacemaker ] && . @sysconfdir@/sysconfig/pacemaker -+ [ -z "$LOCK_FILE" ] && LOCK_FILE="@localstatedir@/lock/subsys/pacemaker_remote" -+fi -+ -+# deb based distros -+if [ -d @sysconfdir@/default ]; then -+ [ -f @sysconfdir@/default/pacemaker ] && . @sysconfdir@/default/pacemaker -+ [ -z "$LOCK_FILE" ] && LOCK_FILE="@localstatedir@/lock/pacemaker_remote" -+fi -+ -+start() -+{ -+ echo -n "Starting $desc: " -+ -+ # most recent distributions use tmpfs for $@localstatedir@/run -+ # to avoid to clean it up on every boot. -+ # they also assume that init scripts will create -+ # required subdirectories for proper operations -+ mkdir -p @localstatedir@/run -+ -+ if status $prog > /dev/null 2>&1; then -+ success -+ else -+ $prog > /dev/null 2>&1 & -+ -+ # Time to connect to corosync and fail -+ sleep 5 -+ -+ if status $prog > /dev/null 2>&1; then -+ touch $LOCK_FILE -+ pidof $prog > @localstatedir@/run/$prog.pid -+ success -+ else -+ failure -+ rtrn=1 -+ fi -+ fi -+ echo -+} -+ -+stop() -+{ -+ if status $prog > /dev/null 2>&1; then -+ echo -n "Signaling $desc to terminate: " -+ kill -TERM $(pidof $prog) > /dev/null 2>&1 -+ success -+ echo -+ -+ echo -n "Waiting for $desc to unload:" -+ while status $prog > /dev/null 2>&1; do -+ sleep 1 -+ echo -n "." -+ done -+ fi -+ -+ rm -f $LOCK_FILE -+ rm -f @localstatedir@/run/$prog.pid -+ success -+ echo -+} -+ -+rtrn=0 -+ -+case "$1" in -+start) -+ start -+;; -+restart|reload|force-reload) -+ stop -+ start -+;; -+condrestart|try-restart) -+ if status $prog > /dev/null 2>&1; then -+ stop -+ start -+ rtrn=$? -+ fi -+;; -+status) -+ status $prog -+ rtrn=$? -+;; -+stop) -+ stop -+ rtrn=$? -+;; -+*) -+ echo "usage: $0 {start|stop|restart|reload|force-reload|condrestart|try-restart|status}" -+ rtrn=2 -+;; -+esac -+ -+exit $rtrn -diff --git a/lrmd/pacemaker_remote.service.in b/lrmd/pacemaker_remote.service.in -new file mode 100644 -index 0000000..f73e943 ---- /dev/null -+++ b/lrmd/pacemaker_remote.service.in -@@ -0,0 +1,22 @@ -+[Unit] -+Description=Pacemaker Remote Service -+After=network.target -+Requires=network.target -+ -+[Install] -+WantedBy=multi-user.target -+ -+[Service] -+Type=simple -+KillMode=process -+NotifyAccess=none -+SysVStartPriority=99 -+EnvironmentFile=-/etc/sysconfig/pacemaker -+ -+ExecStart=@sbindir@/pacemaker_remoted -+ -+TimeoutStopSec=30s -+TimeoutStartSec=30s -+ -+# Restart options include: no, on-success, on-failure, on-abort or always -+Restart=on-failure -diff --git a/lrmd/regression.py.in b/lrmd/regression.py.in -index 7c33d9c..9efe3b8 100755 ---- a/lrmd/regression.py.in -+++ b/lrmd/regression.py.in -@@ -36,7 +36,12 @@ class Test: - self.name = name - self.description = description - self.cmds = [] -- self.daemon_location = "@CRM_DAEMON_DIR@/lrmd" -+ -+ if tls: -+ self.daemon_location = "/usr/sbin/pacemaker_remoted" -+ else: -+ self.daemon_location = "@CRM_DAEMON_DIR@/lrmd" -+ - self.test_tool_location = "@CRM_DAEMON_DIR@/lrmd_test" - self.verbose = verbose - self.tls = tls -@@ -72,15 +77,15 @@ class Test: - - def start_environment(self): - ### make sure we are in full control here ### -- cmd = shlex.split("killall -q -9 stonithd lrmd lt-lrmd lrmd_test lt-lrmd_test") -+ cmd = shlex.split("killall -q -9 stonithd lrmd lt-lrmd lrmd_test lt-lrmd_test pacemaker_remoted") - test = subprocess.Popen(cmd, stdout=subprocess.PIPE) - test.wait() - - additional_args = "" - -- self.stonith_process = subprocess.Popen(shlex.split("@CRM_DAEMON_DIR@/stonithd -s")) -- if self.tls: -- additional_args = additional_args + " -t " -+ if self.tls == 0: -+ self.stonith_process = subprocess.Popen(shlex.split("@CRM_DAEMON_DIR@/stonithd -s")) -+ - if self.verbose: - additional_args = additional_args + " -VVV " - -@@ -123,7 +128,7 @@ class Test: - self.__new_cmd(self.test_tool_location, args, 0, "", kill=killProc) - - def add_expected_fail_cmd(self, args): -- self.__new_cmd(self.test_tool_location, args, 255, "") -+ self.__new_cmd(self.test_tool_location, args, 1, "") - - def get_exitcode(self): - return self.result_exitcode -@@ -170,6 +175,12 @@ class Test: - def run(self): - res = 0 - i = 1 -+ -+ if self.tls and self.name.count("stonith") != 0: -+ self.result_txt = "SKIPPED - '%s' - disabled when testing pacemaker_remote" % (self.name) -+ print self.result_txt -+ return res -+ - self.start_environment() - - if self.verbose: -@@ -204,13 +215,13 @@ class Tests: - self.tests = [] - self.verbose = verbose - self.tls = tls; -- self.rsc_classes = output_from_command("crm_resource --list-standards") -+ self.rsc_classes = output_from_command("crm_resource --list-standards") - self.rsc_classes = self.rsc_classes[:-1] # Strip trailing empty line -- print "Testing "+repr(self.rsc_classes) -+ self.need_authkey = 0 -+ if self.tls: -+ self.rsc_classes.remove("stonith") - -- if not os.path.isfile("/etc/pacemaker/authkey"): -- os.system("mkdir -p /etc/pacemaker") -- os.system("dd if=/dev/urandom of=/etc/pacemaker/authkey bs=4096 count=1") -+ print "Testing "+repr(self.rsc_classes) - - self.common_cmds = { - "ocf_reg_line" : "-c register_rsc -r ocf_test_rsc -t 3000 -C ocf -P pacemaker -T Dummy", -@@ -252,7 +263,7 @@ class Tests: - "upstart_cancel_line" : "-c cancel -r \"upstart_test_rsc\" -a \"monitor\" -i \"2000\" -t \"3000\" ", - "upstart_cancel_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:upstart_test_rsc action:monitor rc:ok op_status:Cancelled\" ", - -- "service_reg_line" : "-c register_rsc -r service_test_rsc -t 3000 -C service -T lrmd_dummy_daemon", -+ "service_reg_line" : "-c register_rsc -r service_test_rsc -t 3000 -C service -T LSBDummy", - "service_reg_event" : "-l \"NEW_EVENT event_type:register rsc_id:service_test_rsc action:none rc:ok op_status:complete\"", - "service_unreg_line" : "-c unregister_rsc -r \"service_test_rsc\" -t 3000", - "service_unreg_event" : "-l \"NEW_EVENT event_type:unregister rsc_id:service_test_rsc action:none rc:ok op_status:complete\"", -@@ -265,7 +276,7 @@ class Tests: - "service_cancel_line" : "-c cancel -r \"service_test_rsc\" -a \"monitor\" -i \"2000\" -t \"3000\" ", - "service_cancel_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:service_test_rsc action:monitor rc:ok op_status:Cancelled\" ", - -- "lsb_reg_line" : "-c register_rsc -r lsb_test_rsc -t 3000 -C lsb -T lrmd_dummy_daemon", -+ "lsb_reg_line" : "-c register_rsc -r lsb_test_rsc -t 3000 -C lsb -T LSBDummy", - "lsb_reg_event" : "-l \"NEW_EVENT event_type:register rsc_id:lsb_test_rsc action:none rc:ok op_status:complete\" ", - "lsb_unreg_line" : "-c unregister_rsc -r \"lsb_test_rsc\" -t 3000", - "lsb_unreg_event" : "-l \"NEW_EVENT event_type:unregister rsc_id:lsb_test_rsc action:none rc:ok op_status:complete\"", -@@ -298,8 +309,14 @@ class Tests: - return test - - def setup_test_environment(self): -+ os.system("service pacemaker_remote stop") - self.cleanup_test_environment() - -+ if self.tls and not os.path.isfile("/etc/pacemaker/authkey"): -+ self.need_authkey = 1 -+ os.system("mkdir -p /etc/pacemaker") -+ os.system("dd if=/dev/urandom of=/etc/pacemaker/authkey bs=4096 count=1") -+ - ### Make fake systemd daemon and unit file ### - dummy_daemon = "#!/bin/bash\nwhile true\ndo\nsleep 5\ndone" - dummy_service_file = ("[Unit]\n" -@@ -370,14 +387,17 @@ if __name__ == "__main__": - os.system("cat <<-END >>/usr/sbin/fence_dummy_monitor\n%s\nEND" % (dummy_fence_agent)) - os.system("chmod 711 /usr/sbin/fence_dummy_monitor") - -- os.system("cp /usr/share/pacemaker/tests/cts/LSBDummy /etc/init.d/lrmd_dummy_daemon") -+ os.system("cp /usr/share/pacemaker/tests/cts/LSBDummy /etc/init.d/LSBDummy") - os.system("mkdir -p @CRM_CORE_DIR@/root") - - os.system("systemctl daemon-reload") - - def cleanup_test_environment(self): -+ if self.need_authkey: -+ os.system("rm -f /etc/pacemaker/authkey") -+ - os.system("rm -f /lib/systemd/system/lrmd_dummy_daemon.service") -- os.system("rm -f /etc/init.d/lrmd_dummy_daemon") -+ os.system("rm -f /etc/init.d/LSBDummy") - os.system("rm -f /usr/sbin/lrmd_dummy_daemon") - os.system("rm -f /usr/sbin/fence_dummy_monitor") - os.system("rm -f /usr/sbin/fence_dummy_sleep") -@@ -505,7 +525,7 @@ if __name__ == "__main__": - "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" ") - test.add_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" -t 3000") - test.add_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" -t 3000") -- test.add_cmd_and_kill("rm -f /var/run/Dummy-test_rsc.state", "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:complete\" -t 6000") -+ test.add_cmd_and_kill("rm -f @localstatedir@/run/Dummy-test_rsc.state", "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:complete\" -t 6000") - test.add_cmd("-c cancel -r \"test_rsc\" -a \"monitor\" -i \"100\" -t \"3000\" " - "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:Cancelled\" ") - test.add_expected_fail_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:complete\" -t 3000") -@@ -522,7 +542,7 @@ if __name__ == "__main__": - test.add_cmd("-c exec -r \"test_rsc\" -a \"monitor\" -i \"100\" -t 3000 -o " - "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" ") - test.add_expected_fail_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" -t 3000") -- test.add_cmd_and_kill("rm -f /var/run/Dummy-test_rsc.state", "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:complete\" -t 6000") -+ test.add_cmd_and_kill("rm -f @localstatedir@/run/Dummy-test_rsc.state", "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:complete\" -t 6000") - test.add_cmd("-c cancel -r \"test_rsc\" -a \"monitor\" -i \"100\" -t \"3000\" " - "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:Cancelled\" ") - test.add_expected_fail_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:complete\" -t 3000") -@@ -651,6 +671,18 @@ if __name__ == "__main__": - - ### These are tests that target specific cases ### - def build_custom_tests(self): -+ -+ ### verify resource temporary folder is created and used by heartbeat agents. ### -+ test = self.new_test("rsc_tmp_dir", "Verify creation and use of rsc temporary state directory") -+ test.add_sys_cmd("ls", "@CRM_RSCTMP_DIR@") -+ test.add_cmd("-c register_rsc -r test_rsc -P heartbeat -C ocf -T Dummy " -+ "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" -t 3000") -+ test.add_cmd("-c exec -r test_rsc -a start -t 4000") -+ test.add_sys_cmd("ls", "@CRM_RSCTMP_DIR@/Dummy-test_rsc.state") -+ test.add_cmd("-c exec -r test_rsc -a stop -t 4000") -+ test.add_cmd("-c unregister_rsc -r test_rsc -t 3000 " -+ "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") -+ - ### start delay then stop test ### - test = self.new_test("start_delay", "Verify start delay works as expected.") - test.add_cmd("-c register_rsc -r test_rsc -P pacemaker -C ocf -T Dummy " -@@ -715,7 +747,7 @@ if __name__ == "__main__": - "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" ") - test.add_cmd("-c exec -r \"test_rsc\" -a \"monitor\" -i \"100\" -t 3000 -n " - "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" ") -- # this will fail because the monitor notifications should only go to the original caller, which no longer exists. -+ # this will fail because the monitor notifications should only go to the original caller, which no longer exists. - test.add_expected_fail_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" -t 3000") - test.add_cmd("-c cancel -r \"test_rsc\" -a \"monitor\" -i \"100\" -t \"3000\" ") - test.add_cmd("-c unregister_rsc -r \"test_rsc\" -t 3000 " -@@ -731,8 +763,8 @@ if __name__ == "__main__": - - ### get metadata ### - test = self.new_test("get_lsb_metadata", "Retrieve metadata for a resource") -- test.add_cmd_check_stdout("-c metadata -C \"lsb\" -T \"lrmd_dummy_daemon\"" -- ,"resource-agent name=\"lrmd_dummy_daemon\"") -+ test.add_cmd_check_stdout("-c metadata -C \"lsb\" -T \"LSBDummy\"" -+ ,"resource-agent name='LSBDummy'") - - ### get stonith metadata ### - test = self.new_test("get_stonith_metadata", "Retrieve stonith metadata for a resource") -@@ -762,9 +794,9 @@ if __name__ == "__main__": - test.add_cmd_check_stdout("-c list_agents -C ocf", "Stateful") - test.add_cmd_check_stdout("-c list_agents -C lsb", "", "Stateful") ### should not exist - test.add_cmd_check_stdout("-c list_agents -C service", "", "Stateful") ### should not exist -- test.add_cmd_check_stdout("-c list_agents ", "lrmd_dummy_daemon") ### init.d ### -- test.add_cmd_check_stdout("-c list_agents -C lsb", "lrmd_dummy_daemon") -- test.add_cmd_check_stdout("-c list_agents -C service", "lrmd_dummy_daemon") -+ test.add_cmd_check_stdout("-c list_agents ", "LSBDummy") ### init.d ### -+ test.add_cmd_check_stdout("-c list_agents -C lsb", "LSBDummy") -+ test.add_cmd_check_stdout("-c list_agents -C service", "LSBDummy") - test.add_cmd_check_stdout("-c list_agents -C ocf", "", "lrmd_dummy_daemon") ### should not exist - - test.add_cmd_check_stdout("-c list_agents -C ocf", "", "lrmd_dummy_daemon") ### should not exist -@@ -774,15 +806,15 @@ if __name__ == "__main__": - - if "systemd" in self.rsc_classes: - test.add_cmd_check_stdout("-c list_agents ", "lrmd_dummy_daemon") ### systemd ### -- test.add_cmd_check_stdout("-c list_agents -C service", "lrmd_dummy_daemon") -- test.add_cmd_check_stdout("-c list_agents -C systemd", "", "Stateful") ### should not exist -+ test.add_cmd_check_stdout("-c list_agents -C service", "LSBDummy") -+ test.add_cmd_check_stdout("-c list_agents -C systemd", "", "Stateful") ### should not exist - test.add_cmd_check_stdout("-c list_agents -C systemd", "lrmd_dummy_daemon") - test.add_cmd_check_stdout("-c list_agents -C systemd", "", "fence_dummy_monitor") ### should not exist - - if "upstart" in self.rsc_classes: - test.add_cmd_check_stdout("-c list_agents ", "lrmd_dummy_daemon") ### upstart ### -- test.add_cmd_check_stdout("-c list_agents -C service", "lrmd_dummy_daemon") -- test.add_cmd_check_stdout("-c list_agents -C upstart", "", "Stateful") ### should not exist -+ test.add_cmd_check_stdout("-c list_agents -C service", "LSBDummy") -+ test.add_cmd_check_stdout("-c list_agents -C upstart", "", "Stateful") ### should not exist - test.add_cmd_check_stdout("-c list_agents -C upstart", "lrmd_dummy_daemon") - test.add_cmd_check_stdout("-c list_agents -C upstart", "", "fence_dummy_monitor") ### should not exist - -@@ -855,7 +887,7 @@ class TestOptions: - self.options['verbose'] = 0 - self.options['invalid-arg'] = "" - self.options['show-usage'] = 0 -- self.options['tls-backend'] = 0 -+ self.options['pacemaker-remote'] = 0 - - def build_options(self, argv): - args = argv[1:] -@@ -870,8 +902,8 @@ class TestOptions: - self.options['list-tests'] = 1 - elif args[i] == "-V" or args[i] == "--verbose": - self.options['verbose'] = 1 -- elif args[i] == "-S" or args[i] == "--tls-backend": -- self.options['tls-backend'] = 1 -+ elif args[i] == "-R" or args[i] == "--pacemaker-remote": -+ self.options['pacemaker-remote'] = 1 - elif args[i] == "-r" or args[i] == "--run-only": - self.options['run-only'] = args[i+1] - skip = 1 -@@ -887,7 +919,7 @@ class TestOptions: - print "\t [--list-tests | -l] Print out all registered tests." - print "\t [--run-only | -r 'testname'] Run a specific test" - print "\t [--verbose | -V] Verbose output" -- print "\t [--tls-backend | -S Use tls backend" -+ print "\t [--pacemaker-remote | -R Test pacemaker-remote binary instead of lrmd." - print "\t [--run-only-pattern | -p 'string'] Run only tests containing the string value" - print "\n\tExample: Run only the test 'start_top'" - print "\t\t python ./regression.py --run-only start_stop" -@@ -899,7 +931,7 @@ def main(argv): - o = TestOptions() - o.build_options(argv) - -- tests = Tests(o.options['verbose'], o.options['tls-backend']) -+ tests = Tests(o.options['verbose'], o.options['pacemaker-remote']) - - tests.build_generic_tests() - tests.build_multi_rsc_tests() -diff --git a/lrmd/tls_backend.c b/lrmd/tls_backend.c -index 8ee16d4..780d17b 100644 ---- a/lrmd/tls_backend.c -+++ b/lrmd/tls_backend.c -@@ -137,6 +137,7 @@ lrmd_remote_client_destroy(gpointer user_data) - return; - } - -+ ipc_proxy_remove_provider(client); - client_disconnect_cleanup(client->id); - - crm_notice("LRMD client disconnecting remote client - name: %s id: %s", -@@ -249,17 +250,7 @@ lrmd_remote_connection_destroy(gpointer user_data) - static int - lrmd_tls_server_key_cb(gnutls_session_t session, const char *username, gnutls_datum_t * key) - { -- int rc = 0; -- -- if (lrmd_tls_set_key(key, DEFAULT_REMOTE_KEY_LOCATION)) { -- rc = lrmd_tls_set_key(key, ALT_REMOTE_KEY_LOCATION); -- } -- if (rc) { -- crm_err("No lrmd remote key found"); -- return -1; -- } -- -- return rc; -+ return lrmd_tls_set_key(key); - } - - int -diff --git a/mcp/Makefile.am b/mcp/Makefile.am -index f3a0f86..73a71c4 100644 ---- a/mcp/Makefile.am -+++ b/mcp/Makefile.am -@@ -5,17 +5,17 @@ - # modify it under the terms of the GNU General Public License - # as published by the Free Software Foundation; either version 2 - # of the License, or (at your option) any later version. --# -+# - # This program is distributed in the hope that it will be useful, - # but WITHOUT ANY WARRANTY; without even the implied warranty of - # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - # GNU General Public License for more details. --# -+# - # You should have received a copy of the GNU General Public License - # along with this program; if not, write to the Free Software - # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - # --MAINTAINERCLEANFILES = Makefile.in -+MAINTAINERCLEANFILES = Makefile.in - - if BUILD_CS_SUPPORT - INCLUDES = -I$(top_builddir)/include -I$(top_srcdir)/include \ -@@ -23,7 +23,7 @@ INCLUDES = -I$(top_builddir)/include -I$(top_srcdir)/include \ - - initdir = $(INITDIR) - init_SCRIPTS = pacemaker --sbin_PROGRAMS = pacemakerd -+sbin_PROGRAMS = pacemakerd - - if BUILD_HELP - man8_MANS = $(sbin_PROGRAMS:%=%.8) -@@ -35,7 +35,7 @@ endif - - ## SOURCES - --noinst_HEADERS = -+noinst_HEADERS = - - pacemakerd_SOURCES = pacemaker.c corosync.c - pacemakerd_LDADD = $(top_builddir)/lib/cluster/libcrmcluster.la $(top_builddir)/lib/common/libcrmcommon.la -@@ -44,6 +44,7 @@ pacemakerd_LDFLAGS = $(CLUSTERLIBS) - %.8: % - echo Creating $@ - chmod a+x $(top_builddir)/mcp/$< -+ $(top_builddir)/mcp/$< --help - $(HELP2MAN) --output $@ --no-info --section 8 --name "Part of the Pacemaker cluster resource manager" $(top_builddir)/mcp/$< - - clean-generic: -diff --git a/mcp/corosync.c b/mcp/corosync.c -index 28a7ff7..64d6eb5 100644 ---- a/mcp/corosync.c -+++ b/mcp/corosync.c -@@ -174,7 +174,7 @@ cpg_connection_destroy(gpointer user_data) - { - crm_err("Connection destroyed"); - cpg_handle = 0; -- crm_exit(1); -+ crm_exit(ENOTCONN); - } - - static void -@@ -527,7 +527,7 @@ read_config(void) - } else { - crm_err("We can only start Pacemaker from init if using version 1" - " of the Pacemaker plugin for Corosync. Terminating."); -- crm_exit(100); -+ crm_exit(DAEMON_RESPAWN_STOP); - } - break; - } -@@ -581,6 +581,19 @@ read_config(void) - #if HAVE_CONFDB - confdb_finalize(config); - #elif HAVE_CMAP -+ if(local_handle){ -+ gid_t gid = 0; -+ if (crm_user_lookup(CRM_DAEMON_USER, NULL, &gid) < 0) { -+ crm_warn("No group found for user %s", CRM_DAEMON_USER); -+ -+ } else { -+ char key[PATH_MAX]; -+ snprintf(key, PATH_MAX, "uidgid.gid.%u", gid); -+ rc = cmap_set_uint8(local_handle, key, 1); -+ crm_notice("Configured corosync to accept connections from group %u: %s (%d)", -+ gid, ais_error2text(rc), rc); -+ } -+ } - cmap_finalize(local_handle); - #endif - -@@ -624,6 +637,7 @@ read_config(void) - } - - set_daemon_option("logfacility", logging_syslog_facility); -+ setenv("HA_LOGFACILITY", logging_syslog_facility, 1); - - free(logging_debug); - free(logging_logfile); -diff --git a/mcp/pacemaker.c b/mcp/pacemaker.c -index ea97851..bf538dd 100644 ---- a/mcp/pacemaker.c -+++ b/mcp/pacemaker.c -@@ -20,6 +20,7 @@ - #include - - #include -+#include - #include - #include - #include -@@ -38,7 +39,7 @@ GHashTable *peers = NULL; - - #define PCMK_PROCESS_CHECK_INTERVAL 5 - --char *local_name = NULL; -+const char *local_name = NULL; - uint32_t local_nodeid = 0; - crm_trigger_t *shutdown_trigger = NULL; - const char *pid_file = "/var/run/pacemaker.pid"; -@@ -160,27 +161,23 @@ pcmk_process_exit(pcmk_child_t * child) - } - - static void --pcmk_child_exit(GPid pid, gint status, gpointer user_data) -+pcmk_child_exit(mainloop_child_t * p, pid_t pid, int core, int signo, int exitcode) - { -- int exitcode = 0; -- pcmk_child_t *child = user_data; -- -- if (WIFSIGNALED(status)) { -- int signo = WTERMSIG(status); -- int core = WCOREDUMP(status); -+ pcmk_child_t *child = mainloop_child_userdata(p); -+ const char *name = mainloop_child_name(p); - -+ if (signo) { - crm_notice("Child process %s terminated with signal %d (pid=%d, core=%d)", -- child->name, signo, child->pid, core); -+ name, signo, pid, core); - -- } else if (WIFEXITED(status)) { -- exitcode = WEXITSTATUS(status); -+ } else { - do_crm_log(exitcode == 0 ? LOG_INFO : LOG_ERR, -- "Child process %s exited (pid=%d, rc=%d)", child->name, child->pid, exitcode); -+ "Child process %s (%d) exited: %s (%d)", name, pid, pcmk_strerror(exitcode), exitcode); - } - - if (exitcode == 100) { - crm_warn("Pacemaker child process %s no longer wishes to be respawned. " -- "Shutting ourselves down.", child->name); -+ "Shutting ourselves down.", name); - child->respawn = FALSE; - fatal_error = TRUE; - pcmk_shutdown(15); -@@ -226,12 +223,14 @@ start_child(pcmk_child_t * child) - { - int lpc = 0; - uid_t uid = 0; -+ gid_t gid = 0; - struct rlimit oflimits; - gboolean use_valgrind = FALSE; - gboolean use_callgrind = FALSE; - const char *devnull = "/dev/null"; - const char *env_valgrind = getenv("PCMK_valgrind_enabled"); - const char *env_callgrind = getenv("PCMK_callgrind_enabled"); -+ enum cluster_type_e stack = get_cluster_type(); - - child->active_before_startup = FALSE; - -@@ -261,12 +260,20 @@ start_child(pcmk_child_t * child) - use_valgrind = FALSE; - } - -+ if (child->uid) { -+ if (crm_user_lookup(child->uid, &uid, &gid) < 0) { -+ crm_err("Invalid user (%s) for %s: not found", child->uid, child->name); -+ return FALSE; -+ } -+ crm_info("Using uid=%u and group=%u for process %s", uid, gid, child->name); -+ } -+ - child->pid = fork(); - CRM_ASSERT(child->pid != -1); - - if (child->pid > 0) { - /* parent */ -- g_child_watch_add(child->pid, pcmk_child_exit, child); -+ mainloop_child_add(child->pid, 0, child->name, child, pcmk_child_exit); - - crm_info("Forked child %d for process %s%s", child->pid, child->name, - use_valgrind ? " (valgrind enabled: " VALGRIND_BIN ")" : ""); -@@ -292,17 +299,21 @@ start_child(pcmk_child_t * child) - } - opts_default[0] = strdup(child->command);; - --#if 0 -- /* Dont set the group for now - it prevents connection to the cluster */ -- if (gid && setgid(gid) < 0) { -- crm_perror("Could not set group to %d", gid); -- } --#endif -+ if(gid) { -+ if(stack == pcmk_cluster_corosync) { -+ /* Drop root privileges completely -+ * -+ * We can do this because we set uidgid.gid.${gid}=1 -+ * via CMAP which allows these processes to connect to -+ * corosync -+ */ -+ if (setgid(gid) < 0) { -+ crm_perror(LOG_ERR, "Could not set group to %d", gid); -+ } - -- if (child->uid) { -- if (crm_user_lookup(child->uid, &uid, NULL) < 0) { -- crm_err("Invalid uid (%s) specified for %s", child->uid, child->name); -- return TRUE; -+ /* Keep the root group (so we can access corosync), but add the haclient group (so we can access ipc) */ -+ } else if (initgroups(child->uid, gid) < 0) { -+ crm_err("Cannot initalize groups for %s: %s (%d)", child->uid, pcmk_strerror(errno), errno); - } - } - -@@ -326,7 +337,7 @@ start_child(pcmk_child_t * child) - (void)execvp(child->command, opts_default); - } - crm_perror(LOG_ERR, "FATAL: Cannot exec %s", child->command); -- crm_exit(100); -+ crm_exit(DAEMON_RESPAWN_STOP); - } - return TRUE; /* never reached */ - } -@@ -403,7 +414,7 @@ pcmk_shutdown_worker(gpointer user_data) - - if (fatal_error) { - crm_notice("Attempting to inhibit respawning after fatal error"); -- crm_exit(100); -+ crm_exit(DAEMON_RESPAWN_STOP); - } - - return TRUE; -@@ -418,29 +429,6 @@ pcmk_shutdown(int nsig) - mainloop_set_trigger(shutdown_trigger); - } - --static void --build_path(const char *path_c, mode_t mode) --{ -- int offset = 1, len = 0; -- char *path = strdup(path_c); -- -- CRM_CHECK(path != NULL, return); -- for (len = strlen(path); offset < len; offset++) { -- if (path[offset] == '/') { -- path[offset] = 0; -- if (mkdir(path, mode) < 0 && errno != EEXIST) { -- crm_perror(LOG_ERR, "Could not create directory '%s'", path); -- break; -- } -- path[offset] = '/'; -- } -- } -- if (mkdir(path, mode) < 0 && errno != EEXIST) { -- crm_perror(LOG_ERR, "Could not create directory '%s'", path); -- } -- free(path); --} -- - static int32_t - pcmk_ipc_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid) - { -@@ -641,8 +629,8 @@ static struct crm_option long_options[] = { - {"features", 0, 0, 'F', "\tDisplay the full version and list of features Pacemaker was built with"}, - - {"-spacer-", 1, 0, '-', "\nAdditional Options:"}, -- {"foreground", 0, 0, 'f', "\tRun in the foreground instead of as a daemon"}, -- {"pid-file", 1, 0, 'p', "\t(Advanced) Daemon pid file location"}, -+ {"foreground", 0, 0, 'f', "\t(Ignored) Pacemaker always runs in the foreground"}, -+ {"pid-file", 1, 0, 'p', "\t(Ignored) Daemon pid file location"}, - - {NULL, 0, 0, 0} - }; -@@ -809,7 +797,6 @@ main(int argc, char **argv) - const char *facility = daemon_option("logfacility"); - - setenv("LC_ALL", "C", 1); -- setenv("HA_LOGFACILITY", facility, 1); - setenv("HA_LOGD", "no", 1); - - set_daemon_option("mcp", "true"); -@@ -844,9 +831,9 @@ main(int argc, char **argv) - shutdown = TRUE; - break; - case 'F': -- printf("Pacemaker %s (Build: %s)\n Supporting: %s\n", VERSION, BUILD_VERSION, -- CRM_FEATURES); -- crm_exit(0); -+ printf("Pacemaker %s (Build: %s)\n Supporting v%s: %s\n", VERSION, BUILD_VERSION, -+ CRM_FEATURE_SET, CRM_FEATURES); -+ crm_exit(pcmk_ok); - default: - printf("Argument code 0%o (%c) is not (?yet?) supported\n", flag, flag); - ++argerr; -@@ -882,13 +869,13 @@ main(int argc, char **argv) - } - crm_ipc_close(old_instance); - crm_ipc_destroy(old_instance); -- crm_exit(0); -+ crm_exit(pcmk_ok); - - } else if (crm_ipc_connected(old_instance)) { - crm_ipc_close(old_instance); - crm_ipc_destroy(old_instance); - crm_err("Pacemaker is already active, aborting startup"); -- crm_exit(100); -+ crm_exit(DAEMON_RESPAWN_STOP); - } - - crm_ipc_close(old_instance); -@@ -896,7 +883,7 @@ main(int argc, char **argv) - - if (read_config() == FALSE) { - crm_notice("Could not obtain corosync config data, exiting"); -- crm_exit(1); -+ crm_exit(ENODATA); - } - - crm_notice("Starting Pacemaker %s (Build: %s): %s", VERSION, BUILD_VERSION, CRM_FEATURES); -@@ -932,33 +919,33 @@ main(int argc, char **argv) - - if (crm_user_lookup(CRM_DAEMON_USER, &pcmk_uid, &pcmk_gid) < 0) { - crm_err("Cluster user %s does not exist, aborting Pacemaker startup", CRM_DAEMON_USER); -- crm_exit(1); -+ crm_exit(ENOKEY); - } - - mkdir(CRM_STATE_DIR, 0750); - mcp_chown(CRM_STATE_DIR, pcmk_uid, pcmk_gid); - - /* Used by stonithd */ -- build_path(HA_STATE_DIR "/heartbeat", 0755); -+ crm_build_path(HA_STATE_DIR "/heartbeat", 0755); - mcp_chown(HA_STATE_DIR "/heartbeat", pcmk_uid, pcmk_gid); - - /* Used by RAs - Leave owned by root */ -- build_path(CRM_RSCTMP_DIR, 0755); -+ crm_build_path(CRM_RSCTMP_DIR, 0755); - - /* Used to store core files in */ -- build_path(CRM_CORE_DIR, 0755); -+ crm_build_path(CRM_CORE_DIR, 0775); - mcp_chown(CRM_CORE_DIR, pcmk_uid, pcmk_gid); - - /* Used to store blackbox dumps in */ -- build_path(CRM_BLACKBOX_DIR, 0755); -+ crm_build_path(CRM_BLACKBOX_DIR, 0755); - mcp_chown(CRM_BLACKBOX_DIR, pcmk_uid, pcmk_gid); - - /* Used to store policy engine inputs in */ -- build_path(PE_STATE_DIR, 0755); -+ crm_build_path(PE_STATE_DIR, 0755); - mcp_chown(PE_STATE_DIR, pcmk_uid, pcmk_gid); - - /* Used to store the cluster configuration */ -- build_path(CRM_CONFIG_DIR, 0755); -+ crm_build_path(CRM_CONFIG_DIR, 0755); - mcp_chown(CRM_CONFIG_DIR, pcmk_uid, pcmk_gid); - - peers = g_hash_table_new(g_direct_hash, g_direct_equal); -@@ -966,17 +953,17 @@ main(int argc, char **argv) - ipcs = mainloop_add_ipc_server(CRM_SYSTEM_MCP, QB_IPC_NATIVE, &ipc_callbacks); - if (ipcs == NULL) { - crm_err("Couldn't start IPC server"); -- crm_exit(1); -+ crm_exit(EIO); - } - - if (cluster_connect_cfg(&local_nodeid) == FALSE) { - crm_err("Couldn't connect to Corosync's CFG service"); -- crm_exit(1); -+ crm_exit(ENOPROTOOPT); - } - - if (cluster_connect_cpg() == FALSE) { - crm_err("Couldn't connect to Corosync's CPG service"); -- crm_exit(1); -+ crm_exit(ENOPROTOOPT); - } - - local_name = get_local_node_name(); -@@ -1005,5 +992,5 @@ main(int argc, char **argv) - - crm_info("Exiting %s", crm_system_name); - -- crm_exit(0); -+ crm_exit(pcmk_ok); - } -diff --git a/mcp/pacemaker.combined.upstart.in b/mcp/pacemaker.combined.upstart.in -new file mode 100644 -index 0000000..df055f7 ---- /dev/null -+++ b/mcp/pacemaker.combined.upstart.in -@@ -0,0 +1,57 @@ -+# pacemaker-corosync - High-Availability cluster -+# -+# Starts Corosync cluster engine and Pacemaker cluster manager. -+ -+kill timeout 3600 -+ -+env prog=pacemakerd -+env rpm_sysconf=@sysconfdir@/sysconfig/pacemaker -+env rpm_lockfile=@localstatedir@/lock/subsys/pacemaker -+env deb_sysconf=@sysconfdir@/default/pacemaker -+env deb_lockfile=@localstatedir@/lock/pacemaker -+ -+script -+ [ -f "$rpm_sysconf" ] && . $rpm_sysconf -+ [ -f "$deb_sysconf" ] && . $deb_sysconf -+ exec $prog -+end script -+ -+pre-start script -+ # setup the software watchdog which corosync uses in post-stop. -+ # rewrite according to environment. -+ modprobe softdog soft_margin=60 -+ start corosync -+ -+ # if you use corosync-notifyd, uncomment the line below. -+ #start corosync-notifyd -+ -+ # give it time to fail. -+ sleep 2 -+ pidof corosync || { exit 1; } -+end script -+ -+post-start script -+ [ -f "$rpm_sysconf" ] && . $rpm_sysconf -+ [ -f "$deb_sysconf" ] && . $deb_sysconf -+ [ -z "$LOCK_FILE" -a -d @sysconfdir@/sysconfig ] && LOCK_FILE="$rpm_lockfile" -+ [ -z "$LOCK_FILE" -a -d @sysconfdir@/default ] && LOCK_FILE="$deb_lockfile" -+ touch $LOCK_FILE -+ pidof $prog > @localstatedir@/run/$prog.pid -+end script -+ -+post-stop script -+ [ -f "$rpm_sysconf" ] && . $rpm_sysconf -+ [ -f "$deb_sysconf" ] && . $deb_sysconf -+ [ -z "$LOCK_FILE" -a -d @sysconfdir@/sysconfig ] && LOCK_FILE="$rpm_lockfile" -+ [ -z "$LOCK_FILE" -a -d @sysconfdir@/default ] && LOCK_FILE="$deb_lockfile" -+ rm -f $LOCK_FILE -+ rm -f @localstatedir@/run/$prog.pid -+ -+ # when pacemakerd disappeared unexpectedly, a machine is rebooted -+ # by the watchdog of corosync. -+ pidof crmd && killall -q -9 corosync -+ stop corosync || true -+ -+ # if you use corosync-notifyd, uncomment the line below. -+ #stop corosync-notifyd || true -+end script -diff --git a/mcp/pacemaker.in b/mcp/pacemaker.in -index 0e613e3..a6647fe 100644 ---- a/mcp/pacemaker.in -+++ b/mcp/pacemaker.in -@@ -8,15 +8,15 @@ - - # chkconfig: - 99 01 - # description: Pacemaker Cluster Manager --# processname: pacemaker -+# processname: pacemakerd - # - ### BEGIN INIT INFO - # Provides: pacemaker - # Required-Start: $network corosync - # Should-Start: $syslog --# Required-Stop: $network --# Default-Start: --# Default-Stop: -+# Required-Stop: $network corosync -+# Default-Start: 2 3 4 5 -+# Default-Stop: 0 1 6 - # Short-Description: Starts and stops Pacemaker Cluster Manager. - # Description: Starts and stops Pacemaker Cluster Manager. - ### END INIT INFO -@@ -71,6 +71,11 @@ if [ -d @sysconfdir@/default ]; then - [ -z "$LOCK_FILE" ] && LOCK_FILE="@localstatedir@/lock/pacemaker" - fi - -+# Unless specified otherwise, assume cman is in use if cluster.conf exists -+if [ x = "x$PCMK_STACK" -a -f @sysconfdir@/cluster/cluster.conf ]; then -+ PCMK_STACK=cman -+fi -+ - start() - { - echo -n "Starting $desc: " -@@ -101,8 +106,21 @@ start() - echo - } - -+cman_pre_start() -+{ -+ pid=$(pidof corosync 2>/dev/null) -+ if [ $? -ne 0 ]; then -+ service cman start -+ fi -+} -+ - cman_pre_stop() - { -+ pid=$(pidof fenced 2>/dev/null) -+ if [ $? -ne 0 ]; then -+ : CMAN is not running, nothing to do here -+ return -+ fi - cname=`crm_node --name` - crm_attribute -N $cname -n standby -v true -l reboot - logger -t pacemaker -p daemon.notice "Waiting for shutdown of managed resources" -@@ -151,7 +169,7 @@ stop() - echo -n "." - done - else -- echo -n "$desc is already stopped, cleaning up any stale processes and files" -+ echo -n "$desc is already stopped" - fi - - rm -f $LOCK_FILE -@@ -166,7 +184,7 @@ rtrn=0 - case "$1" in - start) - # For consistency with stop -- [ -f @INITDIR@/cman ] && service cman start -+ [ "$PCMK_STACK" = cman ] && cman_pre_start - start - ;; - restart|reload|force-reload) -@@ -195,9 +213,9 @@ stop) - # 4. stop pacemaker - # 5. stop the rest of cman (so it doesn't end up half up/down) - # -- [ -f @INITDIR@/cman ] && cman_pre_stop -+ [ "$PCMK_STACK" = cman ] && cman_pre_stop - stop -- [ -f @INITDIR@/cman ] && service cman stop -+ [ "$PCMK_STACK" = cman ] && service cman stop - ;; - *) - echo "usage: $0 {start|stop|restart|reload|force-reload|condrestart|try-restart|status}" -diff --git a/mcp/pacemaker.sysconfig b/mcp/pacemaker.sysconfig -index d63604d..7f12111 100644 ---- a/mcp/pacemaker.sysconfig -+++ b/mcp/pacemaker.sysconfig -@@ -1,5 +1,9 @@ - # For non-systemd based systems, prefix export to each enabled line - -+# Turn on special handling for CMAN clusters in the init script -+# Without this, fenced (and by inference, cman) cannot reliably be made to shut down -+# PCMK_STACK=cman -+ - #==#==# Variables that control logging - - # Enable debug logging globally or per-subsystem -@@ -12,11 +16,16 @@ - # By default Pacemaker will inherit the logfile specified in corosync.conf - # PCMK_debugfile=/var/log/pacemaker.log - --# Specify an alternate syslog target for NOTICE (and higher) messages -+# Specify an alternate syslog target for NOTICE (and higher) messages - # Use 'none' to disable - not recommended - # The default value is 'daemon' - # PCMK_logfacility=none|daemon|user|local0|local1|local2|local3|local4|local5|local6|local7 - -+# Send all messages up-to-and-including the configured priority to syslog -+# A value of 'info' will be far too verbose for most installations and 'debug' is almost certain to send you blind -+# The default value is 'notice' -+# PCMK_logpriority=emerg|alert|crit|error|warning|notice|info|debug -+ - # Log all messages from a comma-separated list of functions - # PCMK_trace_functions=function1,function2,function3 - -@@ -54,6 +63,13 @@ - # Mostly only useful for developer testing - # PCMK_schema_directory=/some/path - -+#==#==# Pacemaker Remote -+# Use a custom directory for finding the authkey. -+# PCMK_authkey_location=/etc/pacemaker/authkey -+# -+# Specify a custom port for Pacemaker Remote connections -+# PCMK_remote_port=3121 -+ - #==#==# IPC - - # Force use of a particular class of IPC connection -@@ -73,4 +89,4 @@ - # PCMK_valgrind_enabled=cib,crmd - # PCMK_callgrind_enabled=yes - # PCMK_callgrind_enabled=cib,crmd --# VALGRIND_OPTS="--leak-check=full --trace-children=no --num-callers=25 --log-file=/tmp/pacemaker-%p.valgrind" -+# VALGRIND_OPTS="--leak-check=full --trace-children=no --num-callers=25 --log-file=/var/lib/pacemaker/valgrind-%p --suppressions=/usr/share/pacemaker/tests/valgrind-pcmk.suppressions --gen-suppressions=all" -diff --git a/mcp/pacemaker.upstart.in b/mcp/pacemaker.upstart.in -new file mode 100644 -index 0000000..7c977dc ---- /dev/null -+++ b/mcp/pacemaker.upstart.in -@@ -0,0 +1,37 @@ -+# pacemaker - High-Availability cluster resource manager -+# -+# Starts pacemakerd -+ -+stop on runlevel [0123456] -+kill timeout 3600 -+respawn -+ -+env prog=pacemakerd -+env rpm_sysconf=@sysconfdir@/sysconfig/pacemaker -+env rpm_lockfile=@localstatedir@/lock/subsys/pacemaker -+env deb_sysconf=@sysconfdir@/default/pacemaker -+env deb_lockfile=@localstatedir@/lock/pacemaker -+ -+script -+ [ -f "$rpm_sysconf" ] && . $rpm_sysconf -+ [ -f "$deb_sysconf" ] && . $deb_sysconf -+ exec $prog -+end script -+ -+post-start script -+ [ -f "$rpm_sysconf" ] && . $rpm_sysconf -+ [ -f "$deb_sysconf" ] && . $deb_sysconf -+ [ -z "$LOCK_FILE" -a -d @sysconfdir@/sysconfig ] && LOCK_FILE="$rpm_lockfile" -+ [ -z "$LOCK_FILE" -a -d @sysconfdir@/default ] && LOCK_FILE="$deb_lockfile" -+ touch $LOCK_FILE -+ pidof $prog > @localstatedir@/run/$prog.pid -+end script -+ -+post-stop script -+ [ -f "$rpm_sysconf" ] && . $rpm_sysconf -+ [ -f "$deb_sysconf" ] && . $deb_sysconf -+ [ -z "$LOCK_FILE" -a -d @sysconfdir@/sysconfig ] && LOCK_FILE="$rpm_lockfile" -+ [ -z "$LOCK_FILE" -a -d @sysconfdir@/default ] && LOCK_FILE="$deb_lockfile" -+ rm -f $LOCK_FILE -+ rm -f @localstatedir@/run/$prog.pid -+end script -diff --git a/pengine/Makefile.am b/pengine/Makefile.am -index aadbd61..7309f5b 100644 ---- a/pengine/Makefile.am -+++ b/pengine/Makefile.am -@@ -5,12 +5,12 @@ - # modify it under the terms of the GNU General Public License - # as published by the Free Software Foundation; either version 2 - # of the License, or (at your option) any later version. --# -+# - # This program is distributed in the hope that it will be useful, - # but WITHOUT ANY WARRANTY; without even the implied warranty of - # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - # GNU General Public License for more details. --# -+# - # You should have received a copy of the GNU General Public License - # along with this program; if not, write to the Free Software - # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -@@ -27,7 +27,7 @@ PE_TESTS = $(wildcard test10/*.scores) - - testdir = $(datadir)/$(PACKAGE)/tests/pengine - test_SCRIPTS = regression.sh --test_DATA = regression.core.sh ptest.supp -+test_DATA = regression.core.sh - - test10dir = $(datadir)/$(PACKAGE)/tests/pengine/test10 - test10_DATA = $(PE_TESTS) $(PE_TESTS:%.scores=%.xml) $(PE_TESTS:%.scores=%.exp) $(PE_TESTS:%.scores=%.dot) $(PE_TESTS:%.scores=%.summary) $(wildcard test10/*.stderr) -@@ -35,7 +35,7 @@ test10_DATA = $(PE_TESTS) $(PE_TESTS:%.scores=%.xml) $(PE_TESTS:%.scores=%.exp) - COMMONLIBS = \ - $(top_builddir)/lib/common/libcrmcommon.la \ - $(top_builddir)/lib/pengine/libpe_status.la \ -- libpengine.la $(CURSESLIBS) $(CLUSTERLIBS) -+ libpengine.la $(CURSESLIBS) $(CLUSTERLIBS) - - ## libraries - lib_LTLIBRARIES = libpengine.la -@@ -59,8 +59,8 @@ endif - noinst_HEADERS = allocate.h utils.h pengine.h - #utils.h pengine.h - --libpengine_la_LDFLAGS = -version-info 4:1:0 --# -L$(top_builddir)/lib/pils -lpils -export-dynamic -module -avoid-version -+libpengine_la_LDFLAGS = -version-info 6:0:2 -+# -L$(top_builddir)/lib/pils -lpils -export-dynamic -module -avoid-version - libpengine_la_SOURCES = pengine.c allocate.c utils.c constraints.c - libpengine_la_SOURCES += native.c group.c clone.c master.c graph.c - -diff --git a/pengine/allocate.c b/pengine/allocate.c -index 9f5e370..2f179cf 100644 ---- a/pengine/allocate.c -+++ b/pengine/allocate.c -@@ -239,7 +239,7 @@ check_action_definition(resource_t * rsc, node_t * active_node, xmlNode * xml_op - did_change = TRUE; - key = generate_op_key(rsc->id, task, interval); - crm_log_xml_info(digest_data->params_restart, "params:restart"); -- crm_info("Parameters to %s on %s changed: was %s vs. now %s (restart:%s) %s", -+ pe_rsc_info(rsc, "Parameters to %s on %s changed: was %s vs. now %s (restart:%s) %s", - key, active_node->details->uname, - crm_str(digest_restart), digest_data->digest_restart_calc, - op_version, crm_element_value(xml_op, XML_ATTR_TRANSITION_MAGIC)); -@@ -254,7 +254,7 @@ check_action_definition(resource_t * rsc, node_t * active_node, xmlNode * xml_op - did_change = TRUE; - crm_log_xml_info(digest_data->params_all, "params:reload"); - key = generate_op_key(rsc->id, task, interval); -- crm_info("Parameters to %s on %s changed: was %s vs. now %s (reload:%s) %s", -+ pe_rsc_info(rsc, "Parameters to %s on %s changed: was %s vs. now %s (reload:%s) %s", - key, active_node->details->uname, - crm_str(digest_all), digest_data->digest_all_calc, op_version, - crm_element_value(xml_op, XML_ATTR_TRANSITION_MAGIC)); -@@ -319,8 +319,15 @@ check_actions_for(xmlNode * rsc_entry, resource_t * rsc, node_t * node, pe_worki - CRM_CHECK(node != NULL, return); - - if (is_set(rsc->flags, pe_rsc_orphan)) { -- pe_rsc_trace(rsc, "Skipping param check for %s and deleting: orphan", rsc->id); -- DeleteRsc(rsc, node, FALSE, data_set); -+ resource_t *parent = uber_parent(rsc); -+ if(parent == NULL -+ || parent->variant < pe_clone -+ || is_set(parent->flags, pe_rsc_unique)) { -+ pe_rsc_trace(rsc, "Skipping param check for %s and deleting: orphan", rsc->id); -+ DeleteRsc(rsc, node, FALSE, data_set); -+ } else { -+ pe_rsc_trace(rsc, "Skipping param check for %s (orphan clone)", rsc->id); -+ } - return; - - } else if (pe_find_node_id(rsc->running_on, node->details->id) == NULL) { -@@ -1578,7 +1585,7 @@ apply_remote_node_ordering(pe_working_set_t *data_set) - - /* when the container representing a remote node fails, the stop - * action for all the resources living in that container is implied -- * by the container stopping. This is similar to how fencing operations -+ * by the container stopping. This is similar to how fencing operations - * work for cluster nodes. */ - pe_set_action_bit(action, pe_action_pseudo); - custom_action_order(container, -@@ -1863,7 +1870,7 @@ pe_post_notify(resource_t * rsc, node_t * node, notify_data_t * n_data, pe_worki - if (interval == NULL || safe_str_eq(interval, "0")) { - pe_rsc_trace(rsc, "Skipping %s: interval", mon->uuid); - continue; -- } else if (safe_str_eq(mon->task, "cancel")) { -+ } else if (safe_str_eq(mon->task, RSC_CANCEL)) { - pe_rsc_trace(rsc, "Skipping %s: cancel", mon->uuid); - continue; - } -diff --git a/pengine/constraints.c b/pengine/constraints.c -index 7bd917f..1f59230 100644 ---- a/pengine/constraints.c -+++ b/pengine/constraints.c -@@ -366,10 +366,10 @@ unpack_rsc_location(xmlNode * xml_obj, pe_working_set_t * data_set) - { - gboolean empty = TRUE; - rsc_to_node_t *location = NULL; -- const char *id_lh = crm_element_value(xml_obj, "rsc"); -+ const char *id_lh = crm_element_value(xml_obj, XML_COLOC_ATTR_SOURCE); - const char *id = crm_element_value(xml_obj, XML_ATTR_ID); - resource_t *rsc_lh = pe_find_resource(data_set->resources, id_lh); -- const char *node = crm_element_value(xml_obj, "node"); -+ const char *node = crm_element_value(xml_obj, XML_CIB_TAG_NODE); - const char *score = crm_element_value(xml_obj, XML_RULE_ATTR_SCORE); - const char *domain = crm_element_value(xml_obj, XML_CIB_TAG_DOMAIN); - const char *role = crm_element_value(xml_obj, XML_RULE_ATTR_ROLE); -@@ -1066,6 +1066,14 @@ order_rsc_sets(const char *id, xmlNode * set1, xmlNode * set2, enum pe_order_kin - action_2 = invert_action(action_2); - } - -+ if(safe_str_eq(RSC_STOP, action_1) || safe_str_eq(RSC_DEMOTE, action_1)) { -+ /* Assuming: A -> ( B || C) -> D -+ * The one-or-more logic only applies during the start/promote phase -+ * During shutdown neither B nor can shutdown until D is down, so simply turn require_all back on. -+ */ -+ require_all = TRUE; -+ } -+ - if (symmetrical == FALSE) { - flags = get_asymmetrical_flags(kind); - } else { -diff --git a/pengine/graph.c b/pengine/graph.c -index 1282330..2e44ce5 100644 ---- a/pengine/graph.c -+++ b/pengine/graph.c -@@ -1,16 +1,16 @@ --/* -+/* - * Copyright (C) 2004 Andrew Beekhof -- * -+ * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. -- * -+ * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. -- * -+ * - * You should have received a copy of the GNU General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -@@ -333,6 +333,26 @@ graph_update_action(action_t * first, action_t * then, node_t * node, enum pe_ac - update_action_flags(first, pe_action_print_always); /* dont care about changed */ - } - -+ if ((type & pe_order_implies_then -+ || type & pe_order_implies_first -+ || type & pe_order_restart) -+ && first->rsc -+ && safe_str_eq(first->task, RSC_STOP) -+ && is_not_set(first->rsc->flags, pe_rsc_managed) -+ && is_set(first->rsc->flags, pe_rsc_block) -+ && is_not_set(first->flags, pe_action_runnable)) { -+ -+ if (update_action_flags(then, pe_action_runnable | pe_action_clear)) { -+ changed |= pe_graph_updated_then; -+ } -+ -+ if (changed) { -+ pe_rsc_trace(then->rsc, "unmanaged left: %s then %s: changed", first->uuid, then->uuid); -+ } else { -+ crm_trace("unmanaged left: %s then %s", first->uuid, then->uuid); -+ } -+ } -+ - if (processed == FALSE) { - crm_trace("Constraint 0x%.6x not applicable", type); - } -@@ -568,7 +588,7 @@ get_router_node(action_t *action) - * This means some actions will get routed through the cluster - * node the connection rsc began on, and others are routed through - * the cluster node the connection rsc ends up on. -- * -+ * - * 1. stop, demote, migrate actions of resources living in the remote - * node _MUST_ occur _BEFORE_ the connection can move (these actions - * are all required before the remote rsc stop action can occur.) In -@@ -595,8 +615,8 @@ get_router_node(action_t *action) - return router_node; - } - --xmlNode * --action2xml(action_t * action, gboolean as_input) -+static xmlNode * -+action2xml(action_t * action, gboolean as_input, pe_working_set_t *data_set) - { - gboolean needs_node_info = TRUE; - xmlNode *action_xml = NULL; -@@ -752,7 +772,14 @@ action2xml(action_t * action, gboolean as_input) - crm_xml_add(args_xml, XML_ATTR_CRM_VERSION, CRM_FEATURE_SET); - - g_hash_table_foreach(action->extra, hash2field, args_xml); -- if (action->rsc != NULL) { -+ if (action->rsc != NULL && action->node) { -+ GHashTable *p = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); -+ -+ get_rsc_attributes(p, action->rsc, action->node, data_set); -+ g_hash_table_foreach(p, hash2smartfield, args_xml); -+ -+ g_hash_table_destroy(p); -+ } else if(action->rsc) { - g_hash_table_foreach(action->rsc->parameters, hash2smartfield, args_xml); - } - -@@ -800,7 +827,7 @@ should_dump_action(action_t * action) - * probe_complete from running (presumably because it is only - * partially up) - * -- * For these reasons we tolerate such perversions -+ * For these reasons we tolerate such perversions - */ - - for (lpc = action->actions_after; lpc != NULL; lpc = lpc->next) { -@@ -940,7 +967,7 @@ should_dump_input(int last_action, action_t * action, action_wrapper_t * wrapper - if (action->rsc && safe_str_eq(action->task, RSC_MIGRATE)) { - /* Remove the orders like : - * "load_stopped_node2" -> "rscA_migrate_to node1" -- * which were created from: pengine/native.c: MigrateRsc() -+ * which were created from: pengine/native.c: MigrateRsc() - * order_actions(other, then, other_w->type); - */ - wrapper->type = pe_order_none; -@@ -1045,7 +1072,7 @@ graph_element_from_action(action_t * action, pe_working_set_t * data_set) - crm_xml_add_int(syn, XML_CIB_ATTR_PRIORITY, synapse_priority); - } - -- xml_action = action2xml(action, FALSE); -+ xml_action = action2xml(action, FALSE, data_set); - add_node_nocopy(set, crm_element_name(xml_action), xml_action); - - action->actions_before = g_list_sort(action->actions_before, sort_action_id); -@@ -1063,7 +1090,7 @@ graph_element_from_action(action_t * action, pe_working_set_t * data_set) - last_action = wrapper->action->id; - input = create_xml_node(in, "trigger"); - -- xml_action = action2xml(wrapper->action, TRUE); -+ xml_action = action2xml(wrapper->action, TRUE, data_set); - add_node_nocopy(input, crm_element_name(xml_action), xml_action); - } - } -diff --git a/pengine/main.c b/pengine/main.c -index 8b7b5e4..397cc61 100644 ---- a/pengine/main.c -+++ b/pengine/main.c -@@ -172,7 +172,7 @@ main(int argc, char **argv) - ipcs = mainloop_add_ipc_server(CRM_SYSTEM_PENGINE, QB_IPC_SHM, &ipc_callbacks); - if (ipcs == NULL) { - crm_err("Failed to create IPC server: shutting down and inhibiting respawn"); -- crm_exit(100); -+ crm_exit(DAEMON_RESPAWN_STOP); - } - - /* Create the mainloop and run it... */ -@@ -182,12 +182,12 @@ main(int argc, char **argv) - g_main_run(mainloop); - - crm_info("Exiting %s", crm_system_name); -- return crm_exit(0); -+ return crm_exit(pcmk_ok); - } - - void - pengine_shutdown(int nsig) - { - mainloop_del_ipc_server(ipcs); -- crm_exit(EX_OK); -+ crm_exit(pcmk_ok); - } -diff --git a/pengine/master.c b/pengine/master.c -index 78f2c93..23db809 100644 ---- a/pengine/master.c -+++ b/pengine/master.c -@@ -1,16 +1,16 @@ --/* -+/* - * Copyright (C) 2004 Andrew Beekhof -- * -+ * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. -- * -+ * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. -- * -+ * - * You should have received a copy of the GNU General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -@@ -394,7 +394,7 @@ master_promotion_order(resource_t * rsc, pe_working_set_t * data_set) - } - - static gboolean --anonymous_known_on(resource_t * rsc, node_t * node) -+filter_anonymous_instance(resource_t * rsc, node_t * node) - { - GListPtr rIter = NULL; - char *key = clone_strip(rsc->id); -@@ -402,10 +402,35 @@ anonymous_known_on(resource_t * rsc, node_t * node) - - for (rIter = parent->children; rIter; rIter = rIter->next) { - resource_t *child = rIter->data; -+ resource_t *active = parent->fns->find_rsc(child, key, node, pe_find_clone|pe_find_current); - -- /* ->find_rsc() because we might be a cloned group -- * and knowing that other members of the group are -- * known here implies nothing -+ /* -+ * Look for an active instance on $node, if there is one, only it recieves the master score -+ * Use ->find_rsc() because we might be a cloned group -+ */ -+ if(rsc == active) { -+ pe_rsc_trace(rsc, "Found %s for %s active on %s: done", active->id, key, node->details->uname); -+ free(key); -+ return TRUE; -+ } else if(active) { -+ pe_rsc_trace(rsc, "Found %s for %s on %s: not %s", active->id, key, node->details->uname, rsc->id); -+ free(key); -+ return FALSE; -+ } else { -+ pe_rsc_trace(rsc, "%s on %s: not active", key, node->details->uname); -+ } -+ } -+ -+ for (rIter = parent->children; rIter; rIter = rIter->next) { -+ resource_t *child = rIter->data; -+ -+ /* -+ * We know its not running, but any score will still count if -+ * the instance has been probed on $node -+ * -+ * Again use ->find_rsc() because we might be a cloned group -+ * and knowing that other members of the group are known here -+ * implies nothing - */ - rsc = parent->fns->find_rsc(child, key, NULL, pe_find_clone); - pe_rsc_trace(rsc, "Checking %s for %s on %s", rsc->id, key, node->details->uname); -@@ -452,11 +477,11 @@ master_score(resource_t * rsc, node_t * node, int not_set_value) - node_t *match = pe_find_node_id(rsc->running_on, node->details->id); - node_t *known = pe_hash_table_lookup(rsc->known_on, node->details->id); - -- if (is_not_set(rsc->flags, pe_rsc_unique) && anonymous_known_on(rsc, node)) { -- pe_rsc_trace(rsc, "Anonymous clone %s is known on %s", rsc->id, node->details->uname); -+ if (is_not_set(rsc->flags, pe_rsc_unique) && filter_anonymous_instance(rsc, node)) { -+ pe_rsc_trace(rsc, "Anonymous clone %s is allowed on %s", rsc->id, node->details->uname); - - } else if (match == NULL && known == NULL) { -- pe_rsc_trace(rsc, "%s (aka. %s) is not known on %s - ignoring", rsc->id, -+ pe_rsc_trace(rsc, "%s (aka. %s) has been filtered on %s - ignoring", rsc->id, - rsc->clone_name, node->details->uname); - return score; - } -diff --git a/pengine/native.c b/pengine/native.c -index ac73ffa..45df696 100644 ---- a/pengine/native.c -+++ b/pengine/native.c -@@ -1,16 +1,16 @@ --/* -+/* - * Copyright (C) 2004 Andrew Beekhof -- * -+ * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. -- * -+ * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. -- * -+ * - * You should have received a copy of the GNU General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -@@ -57,7 +57,7 @@ gboolean NullOp(resource_t * rsc, node_t * next, gboolean optional, pe_working_s - - /* *INDENT-OFF* */ - enum rsc_role_e rsc_state_matrix[RSC_ROLE_MAX][RSC_ROLE_MAX] = { --/* Current State */ -+/* Current State */ - /* Next State: Unknown Stopped Started Slave Master */ - /* Unknown */ { RSC_ROLE_UNKNOWN, RSC_ROLE_STOPPED, RSC_ROLE_STOPPED, RSC_ROLE_STOPPED, RSC_ROLE_STOPPED, }, - /* Stopped */ { RSC_ROLE_STOPPED, RSC_ROLE_STOPPED, RSC_ROLE_STARTED, RSC_ROLE_SLAVE, RSC_ROLE_SLAVE, }, -@@ -67,7 +67,7 @@ enum rsc_role_e rsc_state_matrix[RSC_ROLE_MAX][RSC_ROLE_MAX] = { - }; - - gboolean (*rsc_action_matrix[RSC_ROLE_MAX][RSC_ROLE_MAX])(resource_t*,node_t*,gboolean,pe_working_set_t*) = { --/* Current State */ -+/* Current State */ - /* Next State: Unknown Stopped Started Slave Master */ - /* Unknown */ { RoleError, StopRsc, RoleError, RoleError, RoleError, }, - /* Stopped */ { RoleError, NullOp, StartRsc, StartRsc, RoleError, }, -@@ -1124,6 +1124,10 @@ native_create_actions(resource_t * rsc, pe_working_set_t * data_set) - pe_rsc_trace(rsc, "Recovering %s", rsc->id); - need_stop = TRUE; - -+ } else if (is_set(rsc->flags, pe_rsc_block)) { -+ pe_rsc_trace(rsc, "Block %s", rsc->id); -+ need_stop = TRUE; -+ - } else if (rsc->role > RSC_ROLE_STARTED && current != NULL && chosen != NULL) { - /* Recovery of a promoted resource */ - start = start_action(rsc, chosen, TRUE); -@@ -1148,7 +1152,8 @@ native_create_actions(resource_t * rsc, pe_working_set_t * data_set) - role = next_role; - } - -- while (rsc->role <= rsc->next_role && role != rsc->role) { -+ -+ while (rsc->role <= rsc->next_role && role != rsc->role && is_not_set(rsc->flags, pe_rsc_block)) { - next_role = rsc_state_matrix[role][rsc->role]; - pe_rsc_trace(rsc, "Up: Executing: %s->%s (%s)%s", role2text(role), role2text(next_role), - rsc->id, need_stop ? " required" : ""); -@@ -1170,15 +1175,33 @@ native_create_actions(resource_t * rsc, pe_working_set_t * data_set) - role = next_role; - } - -- if (rsc->next_role != RSC_ROLE_STOPPED || is_set(rsc->flags, pe_rsc_managed) == FALSE) { -+ if(is_set(rsc->flags, pe_rsc_block)) { -+ pe_rsc_trace(rsc, "No monitor additional ops for blocked resource"); -+ -+ } else if (rsc->next_role != RSC_ROLE_STOPPED || is_set(rsc->flags, pe_rsc_managed) == FALSE) { -+ pe_rsc_trace(rsc, "Monitor ops for active resource"); - start = start_action(rsc, chosen, TRUE); - Recurring(rsc, start, chosen, data_set); - Recurring_Stopped(rsc, start, chosen, data_set); - } else { -+ pe_rsc_trace(rsc, "Monitor ops for in-active resource"); - Recurring_Stopped(rsc, NULL, NULL, data_set); - } - } - -+static void -+rsc_avoids_remote_nodes(resource_t *rsc) -+{ -+ GHashTableIter iter; -+ node_t *node = NULL; -+ g_hash_table_iter_init(&iter, rsc->allowed_nodes); -+ while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) { -+ if (node->details->remote_rsc) { -+ node->weight = -INFINITY; -+ } -+ } -+} -+ - void - native_internal_constraints(resource_t * rsc, pe_working_set_t * data_set) - { -@@ -1272,16 +1295,17 @@ native_internal_constraints(resource_t * rsc, pe_working_set_t * data_set) - } - - if (rsc->is_remote_node || is_stonith) { -- GHashTableIter iter; -- node_t *node = NULL; -- g_hash_table_iter_init(&iter, rsc->allowed_nodes); -- while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) { -- /* don't allow remote nodes to run stonith devices -- * or remote connection resources.*/ -- if (node->details->remote_rsc) { -- node->weight = -INFINITY; -- } -- } -+ /* don't allow remote nodes to run stonith devices -+ * or remote connection resources.*/ -+ rsc_avoids_remote_nodes(rsc); -+ } -+ -+ /* If this rsc is a remote connection resource associated -+ * with a container ( which will most likely be a virtual guest ) -+ * do not allow the container to live on any remote-nodes. -+ * remote-nodes managing nested remote-nodes should not be allowed. */ -+ if (rsc->is_remote_node && rsc->container) { -+ rsc_avoids_remote_nodes(rsc->container); - } - } - -@@ -1983,11 +2007,28 @@ LogActions(resource_t * rsc, pe_working_set_t * data_set, gboolean terminal) - GListPtr gIter = NULL; - - CRM_CHECK(current != NULL,); -+ -+ key = stop_key(rsc); - for (gIter = rsc->running_on; gIter != NULL; gIter = gIter->next) { - node_t *node = (node_t *) gIter->data; -+ action_t *stop_op = NULL; -+ gboolean allowed = FALSE; -+ -+ possible_matches = find_actions(rsc->actions, key, node); -+ if (possible_matches) { -+ stop_op = possible_matches->data; -+ g_list_free(possible_matches); -+ } - -- log_change("Stop %s\t(%s)", rsc->id, node->details->uname); -+ if (stop_op && (stop_op->flags & pe_action_runnable)) { -+ allowed = TRUE; -+ } -+ -+ log_change("Stop %s\t(%s%s)", rsc->id, node->details->uname, -+ allowed ? "" : " - blocked"); - } -+ -+ free(key); - } - - if (moving) { -@@ -2175,10 +2216,6 @@ NullOp(resource_t * rsc, node_t * next, gboolean optional, pe_working_set_t * da - gboolean - DeleteRsc(resource_t * rsc, node_t * node, gboolean optional, pe_working_set_t * data_set) - { --#if DELETE_THEN_REFRESH -- action_t *delete = NULL; -- action_t *refresh = NULL; --#endif - if (is_set(rsc->flags, pe_rsc_failed)) { - pe_rsc_trace(rsc, "Resource %s not deleted from %s: failed", rsc->id, node->details->uname); - return FALSE; -@@ -2195,11 +2232,7 @@ DeleteRsc(resource_t * rsc, node_t * node, gboolean optional, pe_working_set_t * - - crm_notice("Removing %s from %s", rsc->id, node->details->uname); - --#if DELETE_THEN_REFRESH -- delete = delete_action(rsc, node, optional); --#else - delete_action(rsc, node, optional); --#endif - - new_rsc_order(rsc, RSC_STOP, rsc, RSC_DELETE, - optional ? pe_order_implies_then : pe_order_optional, data_set); -@@ -2207,15 +2240,6 @@ DeleteRsc(resource_t * rsc, node_t * node, gboolean optional, pe_working_set_t * - new_rsc_order(rsc, RSC_DELETE, rsc, RSC_START, - optional ? pe_order_implies_then : pe_order_optional, data_set); - --#if DELETE_THEN_REFRESH -- refresh = custom_action(NULL, strdup(CRM_OP_LRM_REFRESH), CRM_OP_LRM_REFRESH, -- node, FALSE, TRUE, data_set); -- -- add_hash_param(refresh->meta, XML_ATTR_TE_NOWAIT, XML_BOOLEAN_TRUE); -- -- order_actions(delete, refresh, pe_order_optional); --#endif -- - return TRUE; - } - -@@ -2541,7 +2565,7 @@ native_stop_constraints(resource_t * rsc, action_t * stonith_op, gboolean is_sto - * - * The extra notification here changes - * + C.healthy depends on C.notify -- * into: -+ * into: - * + C.healthy depends on C.notify' - * + C.notify' depends on STONITH' - * thus breaking the loop -@@ -2560,7 +2584,7 @@ native_stop_constraints(resource_t * rsc, action_t * stonith_op, gboolean is_sto - - /* From Bug #1601, successful fencing must be an input to a failed resources stop action. - -- However given group(rA, rB) running on nodeX and B.stop has failed, -+ However given group(rA, rB) running on nodeX and B.stop has failed, - A := stop healthy resource (rA.stop) - B := stop failed resource (pseudo operation B.stop) - C := stonith nodeX -@@ -2574,12 +2598,12 @@ native_stop_constraints(resource_t * rsc, action_t * stonith_op, gboolean is_sto - (marked as a pseudo op depending on the STONITH). - - TODO: Break the "A requires B" dependency in update_action() and re-enable this block -- -+ - } else if(is_stonith == FALSE) { - crm_info("Moving healthy resource %s" - " off %s before fencing", - rsc->id, node->details->uname); -- -+ - * stop healthy resources before the - * stonith op - * -diff --git a/pengine/pengine.c b/pengine/pengine.c -index 97d68df..99a81c6 100644 ---- a/pengine/pengine.c -+++ b/pengine/pengine.c -@@ -184,6 +184,7 @@ process_pe_message(xmlNode * msg, xmlNode * xml_data, crm_client_t * sender) - } - - if (is_repoke == FALSE && series_wrap != 0) { -+ unlink(filename); - write_xml_file(xml_data, filename, HAVE_BZLIB_H); - write_last_sequence(PE_STATE_DIR, series[series_id].name, seq + 1, series_wrap); - } else { -@@ -210,29 +211,37 @@ do_calculations(pe_working_set_t * data_set, xmlNode * xml_input, crm_time_t * n - set_working_set_defaults(data_set); - data_set->input = xml_input; - data_set->now = now; -- if (data_set->now == NULL) { -- data_set->now = crm_time_new(NULL); -- } -+ - } else { - crm_trace("Already have status - reusing"); - } - -+ if (data_set->now == NULL) { -+ data_set->now = crm_time_new(NULL); -+ } -+ - crm_trace("Calculate cluster status"); - stage0(data_set); - -- gIter = data_set->resources; -- for (; gIter != NULL; gIter = gIter->next) { -- resource_t *rsc = (resource_t *) gIter->data; -+ if(is_not_set(data_set->flags, pe_flag_quick_location)) { -+ gIter = data_set->resources; -+ for (; gIter != NULL; gIter = gIter->next) { -+ resource_t *rsc = (resource_t *) gIter->data; - -- if (is_set(rsc->flags, pe_rsc_orphan) && rsc->role == RSC_ROLE_STOPPED) { -- continue; -+ if (is_set(rsc->flags, pe_rsc_orphan) && rsc->role == RSC_ROLE_STOPPED) { -+ continue; -+ } -+ rsc->fns->print(rsc, NULL, pe_print_log, &rsc_log_level); - } -- rsc->fns->print(rsc, NULL, pe_print_log, &rsc_log_level); - } - - crm_trace("Applying placement constraints"); - stage2(data_set); - -+ if(is_set(data_set->flags, pe_flag_quick_location)){ -+ return NULL; -+ } -+ - crm_trace("Create internal constraints"); - stage3(data_set); - -diff --git a/pengine/ptest.supp b/pengine/ptest.supp -deleted file mode 100644 -index f4ef2cc..0000000 ---- a/pengine/ptest.supp -+++ /dev/null -@@ -1,33 +0,0 @@ --# Valgrind suppressions for PE testing --{ -- Valgrind bug -- Memcheck:Addr8 -- fun:__strspn_sse42 -- fun:crm_get_msec --} -- --{ -- Ignore crm_system_name -- Memcheck:Leak -- fun:malloc -- fun:crm_strdup_fn -- fun:crm_log_init_worker -- fun:crm_log_init -- fun:main --} -- --{ -- libqb fixed upstream 1 -- Memcheck:Leak -- fun:realloc -- fun:_grow_bin_array -- fun:_qb_array_grow --} -- --{ -- libqb fixed upstream 2 -- Memcheck:Leak -- fun:calloc -- fun:qb_log_dcs_get -- fun:_qb_log_callsite_get --} -diff --git a/pengine/regression.core.sh.in b/pengine/regression.core.sh.in -index 685200a..03a6993 100644 ---- a/pengine/regression.core.sh.in -+++ b/pengine/regression.core.sh.in -@@ -1,15 +1,15 @@ - # Copyright (C) 2004 Andrew Beekhof -- # -+ # - # This program is free software; you can redistribute it and/or - # modify it under the terms of the GNU General Public - # License as published by the Free Software Foundation; either - # version 2.1 of the License, or (at your option) any later version. -- # -+ # - # This software is distributed in the hope that it will be useful, - # but WITHOUT ANY WARRANTY; without even the implied warranty of - # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - # General Public License for more details. -- # -+ # - # You should have received a copy of the GNU General Public - # License along with this library; if not, write to the Free Software - # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -@@ -20,7 +20,7 @@ num_failed=0 - num_tests=0 - force_local=0 - VALGRIND_CMD="" --diff_opts="--ignore-all-space -u -N" -+diff_opts="--ignore-all-space --ignore-blank-lines -u -N" - - test_home=`dirname $0` - test_name=`basename $0` -@@ -47,19 +47,20 @@ io_dir=$test_home/test10 - test_binary=@abs_top_builddir@/tools/crm_simulate - PCMK_schema_directory=@abs_top_builddir@/xml - -+ - failed=$test_home/.regression.failed.diff - single_test= - - while true ; do - case "$1" in - -V|--verbose) verbose=1; shift;; -- -v|--valgrind) -+ -v|--valgrind) - export G_SLICE=always-malloc -- VALGRIND_CMD="valgrind -q --log-file=%q{valgrind_output} --show-reachable=no --leak-check=full --trace-children=no --time-stamp=yes --num-callers=20 --suppressions=$test_home/ptest.supp" -+ VALGRIND_CMD="valgrind -q --gen-suppressions=all --log-file=%q{valgrind_output} --show-reachable=no --leak-check=full --trace-children=no --time-stamp=yes --num-callers=20 --suppressions=@datadir@/@PACKAGE@/tests/valgrind-pcmk.suppressions" - test_binary= - shift;; -- --valgrind-dhat) -- VALGRIND_CMD="valgrind --log-file=%q{valgrind_output} --show-top-n=100 --num-callers=4 --time-stamp=yes --trace-children=no --tool=exp-dhat --suppressions=$test_home/ptest.supp" -+ --valgrind-dhat) -+ VALGRIND_CMD="valgrind --log-file=%q{valgrind_output} --show-top-n=100 --num-callers=4 --time-stamp=yes --trace-children=no --tool=exp-dhat" - test_binary= - shift;; - --valgrind-skip-output) -@@ -67,6 +68,7 @@ while true ; do - shift;; - --run) single_test="$2"; shift; shift; break;; - -b|--binary) test_binary=$2; PCMK_schema_directory=""; shift; shift;; -+ -i|--io-dir) io_dir=$2; shift; shift;; - -?|--help) echo "$0 [--binary name] [--force-local]"; shift; exit 0;; - --) shift ; break ;; - "") break;; -@@ -243,7 +245,7 @@ function do_test { - cp "$dot_output" "$dot_expected" - cp "$score_output" "$scores" - cp "$summary_output" "$summary" -- info " Created expected outputs" -+ info " Created expected outputs" - fi - - diff $diff_opts $summary $summary_output >/dev/null -@@ -262,7 +264,7 @@ function do_test { - diff $diff_opts $dot_expected $dot_output 2>/dev/null >> $failed - echo "" >> $failed - did_fail=1 -- else -+ else - rm -f $dot_output - fi - -@@ -274,7 +276,7 @@ function do_test { - echo "" >> $failed - did_fail=1 - fi -- -+ - diff $diff_opts $scores $score_output >/dev/null - rc=$? - if [ $rc != 0 ]; then -@@ -312,5 +314,6 @@ else - failed=.single - > $failed - do_test $single_test "Single shot" $* -+ cat $failed - exit $? - fi -diff --git a/pengine/regression.sh b/pengine/regression.sh -index 447b563..87ef726 100755 ---- a/pengine/regression.sh -+++ b/pengine/regression.sh -@@ -1,24 +1,24 @@ - #!/bin/bash - - # Copyright (C) 2004 Andrew Beekhof -- # -+ # - # This program is free software; you can redistribute it and/or - # modify it under the terms of the GNU General Public - # License as published by the Free Software Foundation; either - # version 2 of the License, or (at your option) any later version. -- # -+ # - # This software is distributed in the hope that it will be useful, - # but WITHOUT ANY WARRANTY; without even the implied warranty of - # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - # General Public License for more details. -- # -+ # - # You should have received a copy of the GNU General Public - # License along with this library; if not, write to the Free Software - # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - # - - core=`dirname $0` --. $core/regression.core.sh -+. $core/regression.core.sh || exit 1 - - create_mode="true" - info Generating test outputs for these tests... -@@ -121,6 +121,7 @@ do_test one-or-more-4 "D cannot start because of target-role" - do_test one-or-more-5 "Start A and F even though C and D are stopped" - do_test one-or-more-6 "Leave A running even though B is stopped" - do_test one-or-more-7 "Leave A running even though C is stopped" -+do_test bug-5140-require-all-false "Allow basegrp:0 to stop" - - echo "" - do_test order1 "Order start 1 " -@@ -180,6 +181,7 @@ do_test attrs5 "string: not_exists " - do_test attrs6 "is_dc: true " - do_test attrs7 "is_dc: false " - do_test attrs8 "score_attribute " -+do_test per-node-attrs "Per node resource parameters" - - echo "" - do_test mon-rsc-1 "Schedule Monitor - start" -@@ -198,6 +200,7 @@ do_test rec-rsc-6 "Resource Recover - multiple - restart" - do_test rec-rsc-7 "Resource Recover - multiple - stop " - do_test rec-rsc-8 "Resource Recover - multiple - block " - do_test rec-rsc-9 "Resource Recover - group/group" -+do_test monitor-recovery "on-fail=block + resource recovery detected by recurring monitor" - - echo "" - do_test quorum-1 "No quorum - ignore" -@@ -274,8 +277,8 @@ echo "" - do_test clone-anon-probe-1 "Probe the correct (anonymous) clone instance for each node" - do_test clone-anon-probe-2 "Avoid needless re-probing of anonymous clones" - do_test clone-anon-failcount "Merge failcounts for anonymous clones" --do_test inc0 "Incarnation start" --do_test inc1 "Incarnation start order" -+do_test inc0 "Incarnation start" -+do_test inc1 "Incarnation start order" - do_test inc2 "Incarnation silent restart, stop, move" - do_test inc3 "Inter-incarnation ordering, silent restart, stop, move" - do_test inc4 "Inter-incarnation ordering, silent restart, stop, move (ordered)" -@@ -285,8 +288,8 @@ do_test inc7 "Clone colocation" - do_test inc8 "Clone anti-colocation" - do_test inc9 "Non-unique clone" - do_test inc10 "Non-unique clone (stop)" --do_test inc11 "Primitive colocation with clones" --do_test inc12 "Clone shutdown" -+do_test inc11 "Primitive colocation with clones" -+do_test inc12 "Clone shutdown" - do_test cloned-group "Make sure only the correct number of cloned groups are started" - do_test clone-no-shuffle "Dont prioritize allocation of instances that must be moved" - do_test clone-max-zero "Orphan processing with clone-max=0" -@@ -300,7 +303,7 @@ do_test clone-colocate-instance-1 "Colocation with a specific clone instance (ne - do_test clone-colocate-instance-2 "Colocation with a specific clone instance" - do_test clone-order-instance "Ordering with specific clone instances" - do_test bug-lf-2453 "Enforce mandatory clone ordering without colocation" --do_test bug-lf-2508 "Correctly reconstruct the status of anonymous cloned groups" -+do_test bug-lf-2508 "Correctly reconstruct the status of anonymous cloned groups" - do_test bug-lf-2544 "Balanced clone placement" - do_test bug-lf-2445 "Redistribute clones with node-max > 1 and stickiness = 0" - do_test bug-lf-2574 "Avoid clone shuffle" -@@ -323,7 +326,7 @@ do_test novell-239082 "Demote/Promote ordering" - do_test novell-239087 "Stable master placement" - do_test master-12 "Promotion based solely on rsc_location constraints" - do_test master-13 "Include preferences of colocated resources when placing master" --do_test master-demote "Ordering when actions depends on demoting a slave resource" -+do_test master-demote "Ordering when actions depends on demoting a slave resource" - do_test master-ordering "Prevent resources from starting that need a master" - do_test bug-1765 "Master-Master Colocation (dont stop the slaves)" - do_test master-group "Promotion of cloned groups" -@@ -341,7 +344,7 @@ do_test master-promotion-constraint "Mandatory master colocation constraints" - do_test unmanaged-master "Ensure role is preserved for unmanaged resources" - do_test master-unmanaged-monitor "Start the correct monitor operation for unmanaged masters" - do_test master-demote-2 "Demote does not clear past failure" --do_test master-move "Move master based on failure of colocated group" -+do_test master-move "Move master based on failure of colocated group" - do_test master-probed-score "Observe the promotion score of probed resources" - do_test colocation_constraint_stops_master "cl#5054 - Ensure master is demoted when stopped by colocation constraint" - do_test colocation_constraint_stops_slave "cl#5054 - Ensure slave is not demoted when stopped by colocation constraint" -@@ -349,6 +352,8 @@ do_test order_constraint_stops_master "cl#5054 - Ensure master is demoted w - do_test order_constraint_stops_slave "cl#5054 - Ensure slave is not demoted when stopped by order constraint" - do_test master_monitor_restart "cl#5072 - Ensure master monitor operation will start after promotion." - do_test bug-rh-880249 "Handle replacement of an m/s resource with a primitive" -+do_test bug-5143-ms-shuffle "Prevent master shuffling due to promotion score" -+do_test master-demote-block "Block promotion if demote fails with on-fail=block" - - echo "" - do_test history-1 "Correctly parse stateful-1 resource state" -@@ -360,6 +365,10 @@ do_test managed-2 "Not managed - up " - do_test bug-5028 "Shutdown should block if anything depends on an unmanaged resource" - do_test bug-5028-detach "Ensure detach still works" - do_test bug-5028-bottom "Ensure shutdown still blocks if the blocked resource is at the bottom of the stack" -+do_test unmanaged-stop-1 "cl#5155 - Block the stop of resources if any depending resource is unmanaged " -+do_test unmanaged-stop-2 "cl#5155 - Block the stop of resources if the first resource in a mandatory stop order is unmanaged " -+do_test unmanaged-stop-3 "cl#5155 - Block the stop of resources if any depending resource in a group is unmanaged " -+do_test unmanaged-stop-4 "cl#5155 - Block the stop of resources if any depending resource in the middle of a group is unmanaged " - - echo "" - do_test interleave-0 "Interleave (reference)" -@@ -433,6 +442,7 @@ do_test bug-5025-2 "Make sure clear failcount action isn't set when config does - do_test bug-5025-3 "Automatically clean up failcount after resource config change with restart" - do_test bug-5025-4 "Clear failcount when last failure is a start op and rsc attributes changed." - do_test failcount "Ensure failcounts are correctly expired" -+do_test failcount-block "Ensure failcounts are not expired when on-fail=block is present" - do_test monitor-onfail-restart "bug-5058 - Monitor failure with on-fail set to restart" - do_test monitor-onfail-stop "bug-5058 - Monitor failure wiht on-fail set to stop" - do_test bug-5059 "No need to restart p_stateful1:*" -@@ -567,7 +577,7 @@ do_test ticket-group-21 "Ticket - Group (loss-policy=fence, standby, revoked)" - do_test ticket-group-22 "Ticket - Group (loss-policy=freeze, standby, granted)" - do_test ticket-group-23 "Ticket - Group (loss-policy=freeze, granted, standby)" - do_test ticket-group-24 "Ticket - Group (loss-policy=freeze, standby, revoked)" -- -+ - echo"" - do_test ticket-clone-1 "Ticket - Clone (loss-policy=stop, initial)" - do_test ticket-clone-2 "Ticket - Clone (loss-policy=stop, granted)" -@@ -594,8 +604,8 @@ do_test ticket-clone-21 "Ticket - Clone (loss-policy=fence, standby, revoked)" - do_test ticket-clone-22 "Ticket - Clone (loss-policy=freeze, standby, granted)" - do_test ticket-clone-23 "Ticket - Clone (loss-policy=freeze, granted, standby)" - do_test ticket-clone-24 "Ticket - Clone (loss-policy=freeze, standby, revoked)" -- --echo"" -+ -+echo"" - do_test ticket-master-1 "Ticket - Master (loss-policy=stop, initial)" - do_test ticket-master-2 "Ticket - Master (loss-policy=stop, granted)" - do_test ticket-master-3 "Ticket - Master (loss-policy-stop, revoked)" -@@ -673,6 +683,7 @@ do_test container-group-4 "Container in group - reached migration-threshold" - echo "" - do_test whitebox-fail1 "Fail whitebox container rsc." - do_test whitebox-fail2 "Fail whitebox container rsc lrmd connection." -+do_test whitebox-fail3 "Failed containers should not run nested on remote nodes." - do_test whitebox-start "Start whitebox container with resources assigned to it" - do_test whitebox-stop "Stop whitebox container with resources assigned to it" - do_test whitebox-move "Move whitebox container with resources assigned to it" -diff --git a/pengine/test10/1360.summary b/pengine/test10/1360.summary -index 04ec941..946c828 100644 ---- a/pengine/test10/1360.summary -+++ b/pengine/test10/1360.summary -@@ -6,6 +6,7 @@ Online: [ ssgtest1a ssgtest1b ] - VIP (ocf::testing:VIP-RIP.sh): Started ssgtest1a - Clone Set: dolly [dollies] - Started: [ ssgtest1a ] -+ Stopped: [ ssgtest1b ] - - Transition Summary: - * Move dollies:0 (Started ssgtest1a -> ssgtest1b) -@@ -26,4 +27,5 @@ Online: [ ssgtest1a ssgtest1b ] - VIP (ocf::testing:VIP-RIP.sh): Started ssgtest1a - Clone Set: dolly [dollies] - Started: [ ssgtest1b ] -+ Stopped: [ ssgtest1a ] - -diff --git a/pengine/test10/bug-1572-1.scores b/pengine/test10/bug-1572-1.scores -index 30d1aed..1aecc90 100644 ---- a/pengine/test10/bug-1572-1.scores -+++ b/pengine/test10/bug-1572-1.scores -@@ -2,7 +2,7 @@ Allocation scores: - clone_color: ms_drbd_7788 allocation score on arc-dknightlx: 0 - clone_color: ms_drbd_7788 allocation score on arc-tkincaidlx.wsicorp.com: 0 - clone_color: rsc_drbd_7788:0 allocation score on arc-dknightlx: 1 --clone_color: rsc_drbd_7788:0 allocation score on arc-tkincaidlx.wsicorp.com: 100 -+clone_color: rsc_drbd_7788:0 allocation score on arc-tkincaidlx.wsicorp.com: 0 - clone_color: rsc_drbd_7788:1 allocation score on arc-dknightlx: 0 - clone_color: rsc_drbd_7788:1 allocation score on arc-tkincaidlx.wsicorp.com: 101 - group_color: IPaddr_147_81_84_133 allocation score on arc-dknightlx: 0 -diff --git a/pengine/test10/bug-1572-1.summary b/pengine/test10/bug-1572-1.summary -index 7446e92..4280f7b 100644 ---- a/pengine/test10/bug-1572-1.summary -+++ b/pengine/test10/bug-1572-1.summary -@@ -76,7 +76,7 @@ Online: [ arc-dknightlx arc-tkincaidlx.wsicorp.com ] - - Master/Slave Set: ms_drbd_7788 [rsc_drbd_7788] - Masters: [ arc-tkincaidlx.wsicorp.com ] -- Stopped: [ rsc_drbd_7788:1 ] -+ Stopped: [ arc-dknightlx ] - Resource Group: grp_pgsql_mirror - fs_mirror (ocf::heartbeat:Filesystem): Started arc-tkincaidlx.wsicorp.com - pgsql_5555 (ocf::heartbeat:pgsql): Started arc-tkincaidlx.wsicorp.com -diff --git a/pengine/test10/bug-1572-2.summary b/pengine/test10/bug-1572-2.summary -index d93372d..6174027 100644 ---- a/pengine/test10/bug-1572-2.summary -+++ b/pengine/test10/bug-1572-2.summary -@@ -52,7 +52,7 @@ Online: [ arc-dknightlx arc-tkincaidlx.wsicorp.com ] - - Master/Slave Set: ms_drbd_7788 [rsc_drbd_7788] - Slaves: [ arc-tkincaidlx.wsicorp.com ] -- Stopped: [ rsc_drbd_7788:1 ] -+ Stopped: [ arc-dknightlx ] - Resource Group: grp_pgsql_mirror - fs_mirror (ocf::heartbeat:Filesystem): Stopped - pgsql_5555 (ocf::heartbeat:pgsql): Stopped -diff --git a/pengine/test10/bug-1765.scores b/pengine/test10/bug-1765.scores -index af90cc6..28f19e1 100644 ---- a/pengine/test10/bug-1765.scores -+++ b/pengine/test10/bug-1765.scores -@@ -1,9 +1,9 @@ - Allocation scores: - clone_color: drbd0:0 allocation score on sles236: 76 - clone_color: drbd0:0 allocation score on sles238: 75 --clone_color: drbd0:1 allocation score on sles236: 10 -+clone_color: drbd0:1 allocation score on sles236: 0 - clone_color: drbd0:1 allocation score on sles238: 5 --clone_color: drbd1:0 allocation score on sles236: 10 -+clone_color: drbd1:0 allocation score on sles236: 0 - clone_color: drbd1:0 allocation score on sles238: 76 - clone_color: drbd1:1 allocation score on sles236: 76 - clone_color: drbd1:1 allocation score on sles238: 0 -@@ -21,7 +21,7 @@ native_color: drbd0:0 allocation score on sles236: 76 - native_color: drbd0:0 allocation score on sles238: 75 - native_color: drbd0:1 allocation score on sles236: -INFINITY - native_color: drbd0:1 allocation score on sles238: 5 --native_color: drbd1:0 allocation score on sles236: 10 -+native_color: drbd1:0 allocation score on sles236: 0 - native_color: drbd1:0 allocation score on sles238: 76 - native_color: drbd1:1 allocation score on sles236: 76 - native_color: drbd1:1 allocation score on sles238: -INFINITY -diff --git a/pengine/test10/bug-1765.summary b/pengine/test10/bug-1765.summary -index d3b8c41..593bac3 100644 ---- a/pengine/test10/bug-1765.summary -+++ b/pengine/test10/bug-1765.summary -@@ -4,7 +4,7 @@ Online: [ sles236 sles238 ] - - Master/Slave Set: ms-drbd0 [drbd0] - Masters: [ sles236 ] -- Stopped: [ drbd0:1 ] -+ Stopped: [ sles238 ] - Master/Slave Set: ms-drbd1 [drbd1] - Masters: [ sles236 ] - Slaves: [ sles238 ] -diff --git a/pengine/test10/bug-5014-CLONE-A-start-B-start.summary b/pengine/test10/bug-5014-CLONE-A-start-B-start.summary -index 9dad260..84a4e4a 100644 ---- a/pengine/test10/bug-5014-CLONE-A-start-B-start.summary -+++ b/pengine/test10/bug-5014-CLONE-A-start-B-start.summary -@@ -3,9 +3,9 @@ Current cluster status: - Online: [ fc16-builder ] - - Clone Set: clone1 [ClusterIP] -- Stopped: [ ClusterIP:0 ] -+ Stopped: [ fc16-builder ] - Clone Set: clone2 [ClusterIP2] -- Stopped: [ ClusterIP2:0 ] -+ Stopped: [ fc16-builder ] - - Transition Summary: - * Start ClusterIP:0 (fc16-builder) -diff --git a/pengine/test10/bug-5014-CLONE-A-stop-B-started.summary b/pengine/test10/bug-5014-CLONE-A-stop-B-started.summary -index e9c18f9..1020124 100644 ---- a/pengine/test10/bug-5014-CLONE-A-stop-B-started.summary -+++ b/pengine/test10/bug-5014-CLONE-A-stop-B-started.summary -@@ -20,7 +20,7 @@ Revised cluster status: - Online: [ fc16-builder ] - - Clone Set: clone1 [ClusterIP] -- Stopped: [ ClusterIP:0 ] -+ Stopped: [ fc16-builder ] - Clone Set: clone2 [ClusterIP2] - Started: [ fc16-builder ] - -diff --git a/pengine/test10/bug-5025-1.summary b/pengine/test10/bug-5025-1.summary -index 6324f20..9f9baa2 100644 ---- a/pengine/test10/bug-5025-1.summary -+++ b/pengine/test10/bug-5025-1.summary -@@ -10,7 +10,7 @@ Transition Summary: - * Reload A (Started fc16-builder) - - Executing cluster transition: -- * Cluster action: clear_failcount on fc16-builder -+ * Cluster action: clear_failcount for A on fc16-builder - * Resource action: A reload on fc16-builder - * Resource action: A monitor=30000 on fc16-builder - -diff --git a/pengine/test10/bug-5025-3.summary b/pengine/test10/bug-5025-3.summary -index de1d654..0d843d2 100644 ---- a/pengine/test10/bug-5025-3.summary -+++ b/pengine/test10/bug-5025-3.summary -@@ -12,7 +12,7 @@ Transition Summary: - - Executing cluster transition: - * Resource action: A stop on fc16-builder -- * Cluster action: clear_failcount on fc16-builder -+ * Cluster action: clear_failcount for A on fc16-builder - * Resource action: A start on fc16-builder - * Resource action: A monitor=30000 on fc16-builder - * Pseudo action: all_stopped -diff --git a/pengine/test10/bug-5025-4.summary b/pengine/test10/bug-5025-4.summary -index daa6bea..f21a5e4 100644 ---- a/pengine/test10/bug-5025-4.summary -+++ b/pengine/test10/bug-5025-4.summary -@@ -10,7 +10,7 @@ Transition Summary: - - Executing cluster transition: - * Resource action: remote-node start on 18builder -- * Cluster action: clear_failcount on 18builder -+ * Cluster action: clear_failcount for remote-node on 18builder - * Resource action: remote-node monitor=30000 on 18builder - - Revised cluster status: -diff --git a/pengine/test10/bug-5028-bottom.dot b/pengine/test10/bug-5028-bottom.dot -index 93eef66..e25e097 100644 ---- a/pengine/test10/bug-5028-bottom.dot -+++ b/pengine/test10/bug-5028-bottom.dot -@@ -1,16 +1,10 @@ - digraph "g" { - "all_stopped" [ style=bold color="green" fontcolor="orange"] - "do_shutdown bl460g6a" [ style=dashed color="red" fontcolor="black"] --"dummy-g_running_0" [ style=dashed color="red" fontcolor="orange"] --"dummy-g_start_0" -> "dummy-g_running_0" [ style = dashed] --"dummy-g_start_0" [ style=bold color="green" fontcolor="orange"] - "dummy-g_stop_0" -> "dummy-g_stopped_0" [ style = dashed] - "dummy-g_stop_0" -> "dummy02_stop_0 bl460g6a" [ style = bold] - "dummy-g_stop_0" [ style=bold color="green" fontcolor="orange"] --"dummy-g_stopped_0" -> "dummy-g_start_0" [ style = dashed] - "dummy-g_stopped_0" [ style=dashed color="red" fontcolor="orange"] --"dummy02_start_0 " -> "dummy-g_running_0" [ style = dashed] --"dummy02_start_0 " [ style=dashed color="red" fontcolor="black"] - "dummy02_stop_0 bl460g6a" -> "all_stopped" [ style = bold] - "dummy02_stop_0 bl460g6a" -> "do_shutdown bl460g6a" [ style = dashed] - "dummy02_stop_0 bl460g6a" -> "dummy-g_stopped_0" [ style = dashed] -diff --git a/pengine/test10/bug-5028-bottom.exp b/pengine/test10/bug-5028-bottom.exp -index 1264da7..900c024 100644 ---- a/pengine/test10/bug-5028-bottom.exp -+++ b/pengine/test10/bug-5028-bottom.exp -@@ -1,7 +1,7 @@ - - - -- -+ - - - -@@ -9,26 +9,18 @@ - - - -- -- -- -- -- -- -- -- -- -+ - - - - - - -- -+ - - - -- -+ - - - -@@ -36,7 +28,7 @@ - - - -- -+ - - - -diff --git a/pengine/test10/bug-5028-bottom.summary b/pengine/test10/bug-5028-bottom.summary -index 6e3d895..479fb18 100644 ---- a/pengine/test10/bug-5028-bottom.summary -+++ b/pengine/test10/bug-5028-bottom.summary -@@ -11,7 +11,6 @@ Transition Summary: - - Executing cluster transition: - * Pseudo action: dummy-g_stop_0 -- * Pseudo action: dummy-g_start_0 - * Resource action: dummy02 stop on bl460g6a - * Pseudo action: all_stopped - -diff --git a/pengine/test10/bug-5028-detach.exp b/pengine/test10/bug-5028-detach.exp -index b91b31d..1c3374b 100644 ---- a/pengine/test10/bug-5028-detach.exp -+++ b/pengine/test10/bug-5028-detach.exp -@@ -1,7 +1,7 @@ - - - -- -+ - - - -diff --git a/pengine/test10/bug-5028.exp b/pengine/test10/bug-5028.exp -index d7b91d1..249c666 100644 ---- a/pengine/test10/bug-5028.exp -+++ b/pengine/test10/bug-5028.exp -@@ -1,7 +1,7 @@ - - - -- -+ - - - -@@ -9,7 +9,7 @@ - - - -- -+ - - - -diff --git a/pengine/test10/bug-5059.scores b/pengine/test10/bug-5059.scores -index 845f70a..d8295c6 100644 ---- a/pengine/test10/bug-5059.scores -+++ b/pengine/test10/bug-5059.scores -@@ -4,7 +4,7 @@ clone_color: c_dummy allocation score on gluster02.h: 0 - clone_color: c_dummy allocation score on gluster03.h: 0 - clone_color: c_dummy allocation score on gluster04.h: 0 - clone_color: g_stateful:0 allocation score on gluster01.h: 5 --clone_color: g_stateful:0 allocation score on gluster02.h: 10 -+clone_color: g_stateful:0 allocation score on gluster02.h: 0 - clone_color: g_stateful:0 allocation score on gluster03.h: 0 - clone_color: g_stateful:0 allocation score on gluster04.h: 0 - clone_color: g_stateful:1 allocation score on gluster01.h: 0 -diff --git a/pengine/test10/bug-5059.summary b/pengine/test10/bug-5059.summary -index b854e43..b93e4e6 100644 ---- a/pengine/test10/bug-5059.summary -+++ b/pengine/test10/bug-5059.summary -@@ -6,13 +6,15 @@ OFFLINE: [ gluster04.h ] - - Master/Slave Set: ms_stateful [g_stateful] - Resource Group: g_stateful:0 -- p_stateful1:0 (ocf::pacemaker:Stateful): Started gluster01.h -- p_stateful2:0 (ocf::pacemaker:Stateful): Stopped -+ p_stateful1 (ocf::pacemaker:Stateful): Started gluster01.h -+ p_stateful2 (ocf::pacemaker:Stateful): Stopped - Resource Group: g_stateful:1 -- p_stateful1:1 (ocf::pacemaker:Stateful): Started gluster02.h -- p_stateful2:1 (ocf::pacemaker:Stateful): Stopped -+ p_stateful1 (ocf::pacemaker:Stateful): Started gluster02.h -+ p_stateful2 (ocf::pacemaker:Stateful): Stopped -+ Stopped: [ gluster03.h gluster04.h ] - Clone Set: c_dummy [p_dummy1] - Started: [ gluster01.h gluster02.h ] -+ Stopped: [ gluster03.h gluster04.h ] - - Transition Summary: - * Promote p_stateful1:0 (Slave -> Master gluster01.h) -@@ -70,6 +72,8 @@ OFFLINE: [ gluster04.h ] - Master/Slave Set: ms_stateful [g_stateful] - Masters: [ gluster01.h ] - Slaves: [ gluster02.h ] -+ Stopped: [ gluster03.h gluster04.h ] - Clone Set: c_dummy [p_dummy1] - Started: [ gluster01.h gluster02.h ] -+ Stopped: [ gluster03.h gluster04.h ] - -diff --git a/pengine/test10/bug-5069-op-disabled.summary b/pengine/test10/bug-5069-op-disabled.summary -index c7c47b9..6524e8e 100644 ---- a/pengine/test10/bug-5069-op-disabled.summary -+++ b/pengine/test10/bug-5069-op-disabled.summary -@@ -8,7 +8,7 @@ OFFLINE: [ fc16-builder fc16-builder3 ] - Transition Summary: - - Executing cluster transition: -- * Cluster action: clear_failcount on fc16-builder2 -+ * Cluster action: clear_failcount for A on fc16-builder2 - * Resource action: A cancel=10000 on fc16-builder2 - - Revised cluster status: -diff --git a/pengine/test10/bug-5140-require-all-false.dot b/pengine/test10/bug-5140-require-all-false.dot -new file mode 100644 -index 0000000..8e7f299 ---- /dev/null -+++ b/pengine/test10/bug-5140-require-all-false.dot -@@ -0,0 +1,25 @@ -+digraph "g" { -+"all_stopped" [ style=bold color="green" fontcolor="orange"] -+"baseclone_stop_0" -> "baseclone_stopped_0" [ style = bold] -+"baseclone_stop_0" -> "basegrp:0_stop_0" [ style = bold] -+"baseclone_stop_0" [ style=bold color="green" fontcolor="orange"] -+"baseclone_stopped_0" [ style=bold color="green" fontcolor="orange"] -+"basegrp:0_stop_0" -> "basegrp:0_stopped_0" [ style = bold] -+"basegrp:0_stop_0" -> "clvmd_stop_0 hex-2" [ style = bold] -+"basegrp:0_stop_0" -> "dlm_stop_0 hex-2" [ style = bold] -+"basegrp:0_stop_0" -> "o2cb_stop_0 hex-2" [ style = bold] -+"basegrp:0_stop_0" [ style=bold color="green" fontcolor="orange"] -+"basegrp:0_stopped_0" -> "baseclone_stopped_0" [ style = bold] -+"basegrp:0_stopped_0" [ style=bold color="green" fontcolor="orange"] -+"clvmd_stop_0 hex-2" -> "all_stopped" [ style = bold] -+"clvmd_stop_0 hex-2" -> "basegrp:0_stopped_0" [ style = bold] -+"clvmd_stop_0 hex-2" -> "dlm_stop_0 hex-2" [ style = bold] -+"clvmd_stop_0 hex-2" [ style=bold color="green" fontcolor="black"] -+"dlm_stop_0 hex-2" -> "all_stopped" [ style = bold] -+"dlm_stop_0 hex-2" -> "basegrp:0_stopped_0" [ style = bold] -+"dlm_stop_0 hex-2" [ style=bold color="green" fontcolor="black"] -+"o2cb_stop_0 hex-2" -> "all_stopped" [ style = bold] -+"o2cb_stop_0 hex-2" -> "basegrp:0_stopped_0" [ style = bold] -+"o2cb_stop_0 hex-2" -> "clvmd_stop_0 hex-2" [ style = bold] -+"o2cb_stop_0 hex-2" [ style=bold color="green" fontcolor="black"] -+} -diff --git a/pengine/test10/bug-5140-require-all-false.exp b/pengine/test10/bug-5140-require-all-false.exp -new file mode 100644 -index 0000000..a2433e1 ---- /dev/null -+++ b/pengine/test10/bug-5140-require-all-false.exp -@@ -0,0 +1,122 @@ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -diff --git a/pengine/test10/bug-5140-require-all-false.scores b/pengine/test10/bug-5140-require-all-false.scores -new file mode 100644 -index 0000000..0bbb768 ---- /dev/null -+++ b/pengine/test10/bug-5140-require-all-false.scores -@@ -0,0 +1,275 @@ -+Allocation scores: -+clone_color: baseclone allocation score on hex-1: 0 -+clone_color: baseclone allocation score on hex-2: 0 -+clone_color: baseclone allocation score on hex-3: 0 -+clone_color: basegrp:0 allocation score on hex-1: 0 -+clone_color: basegrp:0 allocation score on hex-2: 0 -+clone_color: basegrp:0 allocation score on hex-3: 0 -+clone_color: basegrp:1 allocation score on hex-1: 0 -+clone_color: basegrp:1 allocation score on hex-2: 0 -+clone_color: basegrp:1 allocation score on hex-3: 0 -+clone_color: basegrp:2 allocation score on hex-1: 0 -+clone_color: basegrp:2 allocation score on hex-2: 0 -+clone_color: basegrp:2 allocation score on hex-3: 0 -+clone_color: clvmd:0 allocation score on hex-1: 0 -+clone_color: clvmd:0 allocation score on hex-2: 1 -+clone_color: clvmd:0 allocation score on hex-3: 0 -+clone_color: clvmd:1 allocation score on hex-1: 0 -+clone_color: clvmd:1 allocation score on hex-2: 0 -+clone_color: clvmd:1 allocation score on hex-3: 0 -+clone_color: clvmd:2 allocation score on hex-1: 0 -+clone_color: clvmd:2 allocation score on hex-2: 0 -+clone_color: clvmd:2 allocation score on hex-3: 0 -+clone_color: dlm:0 allocation score on hex-1: 0 -+clone_color: dlm:0 allocation score on hex-2: 1 -+clone_color: dlm:0 allocation score on hex-3: 0 -+clone_color: dlm:1 allocation score on hex-1: 0 -+clone_color: dlm:1 allocation score on hex-2: 0 -+clone_color: dlm:1 allocation score on hex-3: 0 -+clone_color: dlm:2 allocation score on hex-1: 0 -+clone_color: dlm:2 allocation score on hex-2: 0 -+clone_color: dlm:2 allocation score on hex-3: 0 -+clone_color: drbd-r0:0 allocation score on hex-1: 0 -+clone_color: drbd-r0:0 allocation score on hex-2: 0 -+clone_color: drbd-r0:0 allocation score on hex-3: -INFINITY -+clone_color: drbd-r0:1 allocation score on hex-1: 0 -+clone_color: drbd-r0:1 allocation score on hex-2: 0 -+clone_color: drbd-r0:1 allocation score on hex-3: -INFINITY -+clone_color: drbd-r1:0 allocation score on hex-1: -INFINITY -+clone_color: drbd-r1:0 allocation score on hex-2: 0 -+clone_color: drbd-r1:0 allocation score on hex-3: 0 -+clone_color: drbd-r1:1 allocation score on hex-1: -INFINITY -+clone_color: drbd-r1:1 allocation score on hex-2: 0 -+clone_color: drbd-r1:1 allocation score on hex-3: 0 -+clone_color: fs-ocfs-1:0 allocation score on hex-1: 0 -+clone_color: fs-ocfs-1:0 allocation score on hex-2: 0 -+clone_color: fs-ocfs-1:0 allocation score on hex-3: 0 -+clone_color: fs-ocfs-1:1 allocation score on hex-1: 0 -+clone_color: fs-ocfs-1:1 allocation score on hex-2: 0 -+clone_color: fs-ocfs-1:1 allocation score on hex-3: 0 -+clone_color: fs-ocfs-1:2 allocation score on hex-1: 0 -+clone_color: fs-ocfs-1:2 allocation score on hex-2: 0 -+clone_color: fs-ocfs-1:2 allocation score on hex-3: 0 -+clone_color: fs-ocfs-2:0 allocation score on hex-1: 0 -+clone_color: fs-ocfs-2:0 allocation score on hex-2: 0 -+clone_color: fs-ocfs-2:0 allocation score on hex-3: 0 -+clone_color: fs-ocfs-2:1 allocation score on hex-1: 0 -+clone_color: fs-ocfs-2:1 allocation score on hex-2: 0 -+clone_color: fs-ocfs-2:1 allocation score on hex-3: 0 -+clone_color: fs-ocfs-2:2 allocation score on hex-1: 0 -+clone_color: fs-ocfs-2:2 allocation score on hex-2: 0 -+clone_color: fs-ocfs-2:2 allocation score on hex-3: 0 -+clone_color: fs2 allocation score on hex-1: 0 -+clone_color: fs2 allocation score on hex-2: 0 -+clone_color: fs2 allocation score on hex-3: 0 -+clone_color: ms-r0 allocation score on hex-1: 0 -+clone_color: ms-r0 allocation score on hex-2: 0 -+clone_color: ms-r0 allocation score on hex-3: -INFINITY -+clone_color: ms-r1 allocation score on hex-1: -INFINITY -+clone_color: ms-r1 allocation score on hex-2: 0 -+clone_color: ms-r1 allocation score on hex-3: 0 -+clone_color: o2cb:0 allocation score on hex-1: 0 -+clone_color: o2cb:0 allocation score on hex-2: 1 -+clone_color: o2cb:0 allocation score on hex-3: 0 -+clone_color: o2cb:1 allocation score on hex-1: 0 -+clone_color: o2cb:1 allocation score on hex-2: 0 -+clone_color: o2cb:1 allocation score on hex-3: 0 -+clone_color: o2cb:2 allocation score on hex-1: 0 -+clone_color: o2cb:2 allocation score on hex-2: 0 -+clone_color: o2cb:2 allocation score on hex-3: 0 -+clone_color: vg1:0 allocation score on hex-1: 0 -+clone_color: vg1:0 allocation score on hex-2: 0 -+clone_color: vg1:0 allocation score on hex-3: 0 -+clone_color: vg1:1 allocation score on hex-1: 0 -+clone_color: vg1:1 allocation score on hex-2: 0 -+clone_color: vg1:1 allocation score on hex-3: 0 -+clone_color: vg1:2 allocation score on hex-1: 0 -+clone_color: vg1:2 allocation score on hex-2: 0 -+clone_color: vg1:2 allocation score on hex-3: 0 -+drbd-r0:0 promotion score on none: 0 -+drbd-r0:1 promotion score on none: 0 -+drbd-r1:0 promotion score on none: 0 -+drbd-r1:1 promotion score on none: 0 -+group_color: basegrp:0 allocation score on hex-1: -INFINITY -+group_color: basegrp:0 allocation score on hex-2: -INFINITY -+group_color: basegrp:0 allocation score on hex-3: -INFINITY -+group_color: basegrp:1 allocation score on hex-1: -INFINITY -+group_color: basegrp:1 allocation score on hex-2: -INFINITY -+group_color: basegrp:1 allocation score on hex-3: -INFINITY -+group_color: basegrp:2 allocation score on hex-1: -INFINITY -+group_color: basegrp:2 allocation score on hex-2: -INFINITY -+group_color: basegrp:2 allocation score on hex-3: -INFINITY -+group_color: clvmd:0 allocation score on hex-1: -INFINITY -+group_color: clvmd:0 allocation score on hex-2: -INFINITY -+group_color: clvmd:0 allocation score on hex-3: -INFINITY -+group_color: clvmd:1 allocation score on hex-1: -INFINITY -+group_color: clvmd:1 allocation score on hex-2: -INFINITY -+group_color: clvmd:1 allocation score on hex-3: -INFINITY -+group_color: clvmd:2 allocation score on hex-1: -INFINITY -+group_color: clvmd:2 allocation score on hex-2: -INFINITY -+group_color: clvmd:2 allocation score on hex-3: -INFINITY -+group_color: dlm:0 allocation score on hex-1: -INFINITY -+group_color: dlm:0 allocation score on hex-2: -INFINITY -+group_color: dlm:0 allocation score on hex-3: -INFINITY -+group_color: dlm:1 allocation score on hex-1: -INFINITY -+group_color: dlm:1 allocation score on hex-2: -INFINITY -+group_color: dlm:1 allocation score on hex-3: -INFINITY -+group_color: dlm:2 allocation score on hex-1: -INFINITY -+group_color: dlm:2 allocation score on hex-2: -INFINITY -+group_color: dlm:2 allocation score on hex-3: -INFINITY -+group_color: dummy1 allocation score on hex-1: 0 -+group_color: dummy1 allocation score on hex-2: 0 -+group_color: dummy1 allocation score on hex-3: 0 -+group_color: dummy2 allocation score on hex-1: 0 -+group_color: dummy2 allocation score on hex-2: 0 -+group_color: dummy2 allocation score on hex-3: 0 -+group_color: fs-md0 allocation score on hex-1: 0 -+group_color: fs-md0 allocation score on hex-2: 0 -+group_color: fs-md0 allocation score on hex-3: 0 -+group_color: fs-ocfs-1:0 allocation score on hex-1: -INFINITY -+group_color: fs-ocfs-1:0 allocation score on hex-2: -INFINITY -+group_color: fs-ocfs-1:0 allocation score on hex-3: -INFINITY -+group_color: fs-ocfs-1:1 allocation score on hex-1: -INFINITY -+group_color: fs-ocfs-1:1 allocation score on hex-2: -INFINITY -+group_color: fs-ocfs-1:1 allocation score on hex-3: -INFINITY -+group_color: fs-ocfs-1:2 allocation score on hex-1: -INFINITY -+group_color: fs-ocfs-1:2 allocation score on hex-2: -INFINITY -+group_color: fs-ocfs-1:2 allocation score on hex-3: -INFINITY -+group_color: fs-r0 allocation score on hex-1: 0 -+group_color: fs-r0 allocation score on hex-2: -INFINITY -+group_color: fs-r0 allocation score on hex-3: 0 -+group_color: md0 allocation score on hex-1: 0 -+group_color: md0 allocation score on hex-2: 0 -+group_color: md0 allocation score on hex-3: 0 -+group_color: md0-group allocation score on hex-1: 0 -+group_color: md0-group allocation score on hex-2: 0 -+group_color: md0-group allocation score on hex-3: 0 -+group_color: o2cb:0 allocation score on hex-1: -INFINITY -+group_color: o2cb:0 allocation score on hex-2: -INFINITY -+group_color: o2cb:0 allocation score on hex-3: -INFINITY -+group_color: o2cb:1 allocation score on hex-1: -INFINITY -+group_color: o2cb:1 allocation score on hex-2: -INFINITY -+group_color: o2cb:1 allocation score on hex-3: -INFINITY -+group_color: o2cb:2 allocation score on hex-1: -INFINITY -+group_color: o2cb:2 allocation score on hex-2: -INFINITY -+group_color: o2cb:2 allocation score on hex-3: -INFINITY -+group_color: r0-group allocation score on hex-1: 0 -+group_color: r0-group allocation score on hex-2: 0 -+group_color: r0-group allocation score on hex-3: 0 -+group_color: vg-md0 allocation score on hex-1: 0 -+group_color: vg-md0 allocation score on hex-2: 0 -+group_color: vg-md0 allocation score on hex-3: 0 -+group_color: vg1:0 allocation score on hex-1: -INFINITY -+group_color: vg1:0 allocation score on hex-2: -INFINITY -+group_color: vg1:0 allocation score on hex-3: -INFINITY -+group_color: vg1:1 allocation score on hex-1: -INFINITY -+group_color: vg1:1 allocation score on hex-2: -INFINITY -+group_color: vg1:1 allocation score on hex-3: -INFINITY -+group_color: vg1:2 allocation score on hex-1: -INFINITY -+group_color: vg1:2 allocation score on hex-2: -INFINITY -+group_color: vg1:2 allocation score on hex-3: -INFINITY -+native_color: cluster-md0 allocation score on hex-1: 0 -+native_color: cluster-md0 allocation score on hex-2: 0 -+native_color: cluster-md0 allocation score on hex-3: 0 -+native_color: clvmd:0 allocation score on hex-1: -INFINITY -+native_color: clvmd:0 allocation score on hex-2: -INFINITY -+native_color: clvmd:0 allocation score on hex-3: -INFINITY -+native_color: clvmd:1 allocation score on hex-1: -INFINITY -+native_color: clvmd:1 allocation score on hex-2: -INFINITY -+native_color: clvmd:1 allocation score on hex-3: -INFINITY -+native_color: clvmd:2 allocation score on hex-1: -INFINITY -+native_color: clvmd:2 allocation score on hex-2: -INFINITY -+native_color: clvmd:2 allocation score on hex-3: -INFINITY -+native_color: dlm:0 allocation score on hex-1: -INFINITY -+native_color: dlm:0 allocation score on hex-2: -INFINITY -+native_color: dlm:0 allocation score on hex-3: -INFINITY -+native_color: dlm:1 allocation score on hex-1: -INFINITY -+native_color: dlm:1 allocation score on hex-2: -INFINITY -+native_color: dlm:1 allocation score on hex-3: -INFINITY -+native_color: dlm:2 allocation score on hex-1: -INFINITY -+native_color: dlm:2 allocation score on hex-2: -INFINITY -+native_color: dlm:2 allocation score on hex-3: -INFINITY -+native_color: drbd-r0:0 allocation score on hex-1: -INFINITY -+native_color: drbd-r0:0 allocation score on hex-2: -INFINITY -+native_color: drbd-r0:0 allocation score on hex-3: -INFINITY -+native_color: drbd-r0:1 allocation score on hex-1: -INFINITY -+native_color: drbd-r0:1 allocation score on hex-2: -INFINITY -+native_color: drbd-r0:1 allocation score on hex-3: -INFINITY -+native_color: drbd-r1:0 allocation score on hex-1: -INFINITY -+native_color: drbd-r1:0 allocation score on hex-2: -INFINITY -+native_color: drbd-r1:0 allocation score on hex-3: -INFINITY -+native_color: drbd-r1:1 allocation score on hex-1: -INFINITY -+native_color: drbd-r1:1 allocation score on hex-2: -INFINITY -+native_color: drbd-r1:1 allocation score on hex-3: -INFINITY -+native_color: dummy1 allocation score on hex-1: -INFINITY -+native_color: dummy1 allocation score on hex-2: -INFINITY -+native_color: dummy1 allocation score on hex-3: -INFINITY -+native_color: dummy2 allocation score on hex-1: -INFINITY -+native_color: dummy2 allocation score on hex-2: -INFINITY -+native_color: dummy2 allocation score on hex-3: -INFINITY -+native_color: dummy3 allocation score on hex-1: -INFINITY -+native_color: dummy3 allocation score on hex-2: -INFINITY -+native_color: dummy3 allocation score on hex-3: -INFINITY -+native_color: dummy4 allocation score on hex-1: -INFINITY -+native_color: dummy4 allocation score on hex-2: -INFINITY -+native_color: dummy4 allocation score on hex-3: -INFINITY -+native_color: dummy5 allocation score on hex-1: 0 -+native_color: dummy5 allocation score on hex-2: 0 -+native_color: dummy5 allocation score on hex-3: 0 -+native_color: dummy6 allocation score on hex-1: 0 -+native_color: dummy6 allocation score on hex-2: 0 -+native_color: dummy6 allocation score on hex-3: 0 -+native_color: fencing allocation score on hex-1: 0 -+native_color: fencing allocation score on hex-2: 0 -+native_color: fencing allocation score on hex-3: 0 -+native_color: fs-md0 allocation score on hex-1: -INFINITY -+native_color: fs-md0 allocation score on hex-2: -INFINITY -+native_color: fs-md0 allocation score on hex-3: -INFINITY -+native_color: fs-ocfs-1:0 allocation score on hex-1: -INFINITY -+native_color: fs-ocfs-1:0 allocation score on hex-2: -INFINITY -+native_color: fs-ocfs-1:0 allocation score on hex-3: -INFINITY -+native_color: fs-ocfs-1:1 allocation score on hex-1: -INFINITY -+native_color: fs-ocfs-1:1 allocation score on hex-2: -INFINITY -+native_color: fs-ocfs-1:1 allocation score on hex-3: -INFINITY -+native_color: fs-ocfs-1:2 allocation score on hex-1: -INFINITY -+native_color: fs-ocfs-1:2 allocation score on hex-2: -INFINITY -+native_color: fs-ocfs-1:2 allocation score on hex-3: -INFINITY -+native_color: fs-ocfs-2:0 allocation score on hex-1: -INFINITY -+native_color: fs-ocfs-2:0 allocation score on hex-2: -INFINITY -+native_color: fs-ocfs-2:0 allocation score on hex-3: -INFINITY -+native_color: fs-ocfs-2:1 allocation score on hex-1: -INFINITY -+native_color: fs-ocfs-2:1 allocation score on hex-2: -INFINITY -+native_color: fs-ocfs-2:1 allocation score on hex-3: -INFINITY -+native_color: fs-ocfs-2:2 allocation score on hex-1: -INFINITY -+native_color: fs-ocfs-2:2 allocation score on hex-2: -INFINITY -+native_color: fs-ocfs-2:2 allocation score on hex-3: -INFINITY -+native_color: fs-r0 allocation score on hex-1: -INFINITY -+native_color: fs-r0 allocation score on hex-2: -INFINITY -+native_color: fs-r0 allocation score on hex-3: -INFINITY -+native_color: fs-xfs-1 allocation score on hex-1: -INFINITY -+native_color: fs-xfs-1 allocation score on hex-2: -INFINITY -+native_color: fs-xfs-1 allocation score on hex-3: -INFINITY -+native_color: md0 allocation score on hex-1: 0 -+native_color: md0 allocation score on hex-2: 0 -+native_color: md0 allocation score on hex-3: 0 -+native_color: o2cb:0 allocation score on hex-1: -INFINITY -+native_color: o2cb:0 allocation score on hex-2: -INFINITY -+native_color: o2cb:0 allocation score on hex-3: -INFINITY -+native_color: o2cb:1 allocation score on hex-1: -INFINITY -+native_color: o2cb:1 allocation score on hex-2: -INFINITY -+native_color: o2cb:1 allocation score on hex-3: -INFINITY -+native_color: o2cb:2 allocation score on hex-1: -INFINITY -+native_color: o2cb:2 allocation score on hex-2: -INFINITY -+native_color: o2cb:2 allocation score on hex-3: -INFINITY -+native_color: vg-md0 allocation score on hex-1: -INFINITY -+native_color: vg-md0 allocation score on hex-2: -INFINITY -+native_color: vg-md0 allocation score on hex-3: -INFINITY -+native_color: vg1:0 allocation score on hex-1: -INFINITY -+native_color: vg1:0 allocation score on hex-2: -INFINITY -+native_color: vg1:0 allocation score on hex-3: -INFINITY -+native_color: vg1:1 allocation score on hex-1: -INFINITY -+native_color: vg1:1 allocation score on hex-2: -INFINITY -+native_color: vg1:1 allocation score on hex-3: -INFINITY -+native_color: vg1:2 allocation score on hex-1: -INFINITY -+native_color: vg1:2 allocation score on hex-2: -INFINITY -+native_color: vg1:2 allocation score on hex-3: -INFINITY -diff --git a/pengine/test10/bug-5140-require-all-false.summary b/pengine/test10/bug-5140-require-all-false.summary -new file mode 100644 -index 0000000..b9d38bb ---- /dev/null -+++ b/pengine/test10/bug-5140-require-all-false.summary -@@ -0,0 +1,80 @@ -+ -+Current cluster status: -+Node hex-1: standby -+Node hex-2: standby -+Node hex-3: OFFLINE (standby) -+ -+ fencing (stonith:external/sbd): Stopped -+ Clone Set: baseclone [basegrp] -+ Resource Group: basegrp:0 -+ dlm (ocf::pacemaker:controld): Started hex-2 -+ clvmd (ocf::lvm2:clvmd): Started hex-2 -+ o2cb (ocf::ocfs2:o2cb): Started hex-2 -+ vg1 (ocf::heartbeat:LVM): Stopped -+ fs-ocfs-1 (ocf::heartbeat:Filesystem): Stopped -+ Stopped: [ hex-1 hex-3 ] -+ fs-xfs-1 (ocf::heartbeat:Filesystem): Stopped -+ Clone Set: fs2 [fs-ocfs-2] -+ Stopped: [ hex-1 hex-2 hex-3 ] -+ Master/Slave Set: ms-r0 [drbd-r0] -+ Stopped: [ hex-1 hex-2 hex-3 ] -+ Master/Slave Set: ms-r1 [drbd-r1] -+ Stopped: [ hex-1 hex-2 hex-3 ] -+ Resource Group: md0-group -+ md0 (ocf::heartbeat:Raid1): Stopped -+ vg-md0 (ocf::heartbeat:LVM): Stopped -+ fs-md0 (ocf::heartbeat:Filesystem): Stopped -+ dummy1 (ocf::heartbeat:Delay): Stopped -+ dummy3 (ocf::heartbeat:Delay): Stopped -+ dummy4 (ocf::heartbeat:Delay): Stopped -+ dummy5 (ocf::heartbeat:Delay): Stopped -+ dummy6 (ocf::heartbeat:Delay): Stopped -+ Resource Group: r0-group -+ fs-r0 (ocf::heartbeat:Filesystem): Stopped -+ dummy2 (ocf::heartbeat:Delay): Stopped -+ cluster-md0 (ocf::heartbeat:Raid1): Stopped -+ -+Transition Summary: -+ * Stop dlm:0 (hex-2) -+ * Stop clvmd:0 (hex-2) -+ * Stop o2cb:0 (hex-2) -+ -+Executing cluster transition: -+ * Pseudo action: baseclone_stop_0 -+ * Pseudo action: basegrp:0_stop_0 -+ * Resource action: o2cb stop on hex-2 -+ * Resource action: clvmd stop on hex-2 -+ * Resource action: dlm stop on hex-2 -+ * Pseudo action: all_stopped -+ * Pseudo action: basegrp:0_stopped_0 -+ * Pseudo action: baseclone_stopped_0 -+ -+Revised cluster status: -+Node hex-1: standby -+Node hex-2: standby -+Node hex-3: OFFLINE (standby) -+ -+ fencing (stonith:external/sbd): Stopped -+ Clone Set: baseclone [basegrp] -+ Stopped: [ hex-1 hex-2 hex-3 ] -+ fs-xfs-1 (ocf::heartbeat:Filesystem): Stopped -+ Clone Set: fs2 [fs-ocfs-2] -+ Stopped: [ hex-1 hex-2 hex-3 ] -+ Master/Slave Set: ms-r0 [drbd-r0] -+ Stopped: [ hex-1 hex-2 hex-3 ] -+ Master/Slave Set: ms-r1 [drbd-r1] -+ Stopped: [ hex-1 hex-2 hex-3 ] -+ Resource Group: md0-group -+ md0 (ocf::heartbeat:Raid1): Stopped -+ vg-md0 (ocf::heartbeat:LVM): Stopped -+ fs-md0 (ocf::heartbeat:Filesystem): Stopped -+ dummy1 (ocf::heartbeat:Delay): Stopped -+ dummy3 (ocf::heartbeat:Delay): Stopped -+ dummy4 (ocf::heartbeat:Delay): Stopped -+ dummy5 (ocf::heartbeat:Delay): Stopped -+ dummy6 (ocf::heartbeat:Delay): Stopped -+ Resource Group: r0-group -+ fs-r0 (ocf::heartbeat:Filesystem): Stopped -+ dummy2 (ocf::heartbeat:Delay): Stopped -+ cluster-md0 (ocf::heartbeat:Raid1): Stopped -+ -diff --git a/pengine/test10/bug-5140-require-all-false.xml b/pengine/test10/bug-5140-require-all-false.xml -new file mode 100644 -index 0000000..2db4935 ---- /dev/null -+++ b/pengine/test10/bug-5140-require-all-false.xml -@@ -0,0 +1,416 @@ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -diff --git a/pengine/test10/bug-5143-ms-shuffle.dot b/pengine/test10/bug-5143-ms-shuffle.dot -new file mode 100644 -index 0000000..c8da3a7 ---- /dev/null -+++ b/pengine/test10/bug-5143-ms-shuffle.dot -@@ -0,0 +1,33 @@ -+digraph "g" { -+"drbd-r1_monitor_29000 hex-2" [ style=bold color="green" fontcolor="black"] -+"drbd-r1_monitor_31000 hex-3" [ style=bold color="green" fontcolor="black"] -+"drbd-r1_post_notify_promoted_0 hex-2" -> "ms-r1_confirmed-post_notify_promoted_0" [ style = bold] -+"drbd-r1_post_notify_promoted_0 hex-2" [ style=bold color="green" fontcolor="black"] -+"drbd-r1_post_notify_promoted_0 hex-3" -> "ms-r1_confirmed-post_notify_promoted_0" [ style = bold] -+"drbd-r1_post_notify_promoted_0 hex-3" [ style=bold color="green" fontcolor="black"] -+"drbd-r1_pre_notify_promote_0 hex-2" -> "ms-r1_confirmed-pre_notify_promote_0" [ style = bold] -+"drbd-r1_pre_notify_promote_0 hex-2" [ style=bold color="green" fontcolor="black"] -+"drbd-r1_pre_notify_promote_0 hex-3" -> "ms-r1_confirmed-pre_notify_promote_0" [ style = bold] -+"drbd-r1_pre_notify_promote_0 hex-3" [ style=bold color="green" fontcolor="black"] -+"drbd-r1_promote_0 hex-3" -> "drbd-r1_monitor_31000 hex-3" [ style = bold] -+"drbd-r1_promote_0 hex-3" -> "ms-r1_promoted_0" [ style = bold] -+"drbd-r1_promote_0 hex-3" [ style=bold color="green" fontcolor="black"] -+"ms-r1_confirmed-post_notify_promoted_0" -> "drbd-r1_monitor_29000 hex-2" [ style = bold] -+"ms-r1_confirmed-post_notify_promoted_0" -> "drbd-r1_monitor_31000 hex-3" [ style = bold] -+"ms-r1_confirmed-post_notify_promoted_0" [ style=bold color="green" fontcolor="orange"] -+"ms-r1_confirmed-pre_notify_promote_0" -> "ms-r1_post_notify_promoted_0" [ style = bold] -+"ms-r1_confirmed-pre_notify_promote_0" -> "ms-r1_promote_0" [ style = bold] -+"ms-r1_confirmed-pre_notify_promote_0" [ style=bold color="green" fontcolor="orange"] -+"ms-r1_post_notify_promoted_0" -> "drbd-r1_post_notify_promoted_0 hex-2" [ style = bold] -+"ms-r1_post_notify_promoted_0" -> "drbd-r1_post_notify_promoted_0 hex-3" [ style = bold] -+"ms-r1_post_notify_promoted_0" -> "ms-r1_confirmed-post_notify_promoted_0" [ style = bold] -+"ms-r1_post_notify_promoted_0" [ style=bold color="green" fontcolor="orange"] -+"ms-r1_pre_notify_promote_0" -> "drbd-r1_pre_notify_promote_0 hex-2" [ style = bold] -+"ms-r1_pre_notify_promote_0" -> "drbd-r1_pre_notify_promote_0 hex-3" [ style = bold] -+"ms-r1_pre_notify_promote_0" -> "ms-r1_confirmed-pre_notify_promote_0" [ style = bold] -+"ms-r1_pre_notify_promote_0" [ style=bold color="green" fontcolor="orange"] -+"ms-r1_promote_0" -> "drbd-r1_promote_0 hex-3" [ style = bold] -+"ms-r1_promote_0" [ style=bold color="green" fontcolor="orange"] -+"ms-r1_promoted_0" -> "ms-r1_post_notify_promoted_0" [ style = bold] -+"ms-r1_promoted_0" [ style=bold color="green" fontcolor="orange"] -+} -diff --git a/pengine/test10/bug-5143-ms-shuffle.exp b/pengine/test10/bug-5143-ms-shuffle.exp -new file mode 100644 -index 0000000..cd1bf59 ---- /dev/null -+++ b/pengine/test10/bug-5143-ms-shuffle.exp -@@ -0,0 +1,180 @@ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -diff --git a/pengine/test10/bug-5143-ms-shuffle.scores b/pengine/test10/bug-5143-ms-shuffle.scores -new file mode 100644 -index 0000000..e6bfca3 ---- /dev/null -+++ b/pengine/test10/bug-5143-ms-shuffle.scores -@@ -0,0 +1,273 @@ -+Allocation scores: -+clone_color: baseclone allocation score on hex-1: 0 -+clone_color: baseclone allocation score on hex-2: 2000 -+clone_color: baseclone allocation score on hex-3: 0 -+clone_color: basegrp:0 allocation score on hex-1: 0 -+clone_color: basegrp:0 allocation score on hex-2: 0 -+clone_color: basegrp:0 allocation score on hex-3: 0 -+clone_color: basegrp:1 allocation score on hex-1: 0 -+clone_color: basegrp:1 allocation score on hex-2: 0 -+clone_color: basegrp:1 allocation score on hex-3: 0 -+clone_color: basegrp:2 allocation score on hex-1: 0 -+clone_color: basegrp:2 allocation score on hex-2: 0 -+clone_color: basegrp:2 allocation score on hex-3: 0 -+clone_color: clvmd:0 allocation score on hex-1: 1 -+clone_color: clvmd:0 allocation score on hex-2: 0 -+clone_color: clvmd:0 allocation score on hex-3: 0 -+clone_color: clvmd:1 allocation score on hex-1: 0 -+clone_color: clvmd:1 allocation score on hex-2: 1 -+clone_color: clvmd:1 allocation score on hex-3: 0 -+clone_color: clvmd:2 allocation score on hex-1: 0 -+clone_color: clvmd:2 allocation score on hex-2: 0 -+clone_color: clvmd:2 allocation score on hex-3: 1 -+clone_color: dlm:0 allocation score on hex-1: 1 -+clone_color: dlm:0 allocation score on hex-2: 0 -+clone_color: dlm:0 allocation score on hex-3: 0 -+clone_color: dlm:1 allocation score on hex-1: 0 -+clone_color: dlm:1 allocation score on hex-2: 1 -+clone_color: dlm:1 allocation score on hex-3: 0 -+clone_color: dlm:2 allocation score on hex-1: 0 -+clone_color: dlm:2 allocation score on hex-2: 0 -+clone_color: dlm:2 allocation score on hex-3: 1 -+clone_color: drbd-r0:0 allocation score on hex-1: 10001 -+clone_color: drbd-r0:0 allocation score on hex-2: 0 -+clone_color: drbd-r0:0 allocation score on hex-3: -INFINITY -+clone_color: drbd-r0:1 allocation score on hex-1: 0 -+clone_color: drbd-r0:1 allocation score on hex-2: 10001 -+clone_color: drbd-r0:1 allocation score on hex-3: -INFINITY -+clone_color: drbd-r1:0 allocation score on hex-1: -INFINITY -+clone_color: drbd-r1:0 allocation score on hex-2: 1 -+clone_color: drbd-r1:0 allocation score on hex-3: 0 -+clone_color: drbd-r1:1 allocation score on hex-1: -INFINITY -+clone_color: drbd-r1:1 allocation score on hex-2: 0 -+clone_color: drbd-r1:1 allocation score on hex-3: 10001 -+clone_color: fs-ocfs-1:0 allocation score on hex-1: 1 -+clone_color: fs-ocfs-1:0 allocation score on hex-2: 0 -+clone_color: fs-ocfs-1:0 allocation score on hex-3: 0 -+clone_color: fs-ocfs-1:1 allocation score on hex-1: 0 -+clone_color: fs-ocfs-1:1 allocation score on hex-2: 1 -+clone_color: fs-ocfs-1:1 allocation score on hex-3: 0 -+clone_color: fs-ocfs-1:2 allocation score on hex-1: 0 -+clone_color: fs-ocfs-1:2 allocation score on hex-2: 0 -+clone_color: fs-ocfs-1:2 allocation score on hex-3: 1 -+clone_color: fs-ocfs-2:0 allocation score on hex-1: 1 -+clone_color: fs-ocfs-2:0 allocation score on hex-2: 0 -+clone_color: fs-ocfs-2:0 allocation score on hex-3: 0 -+clone_color: fs-ocfs-2:1 allocation score on hex-1: 0 -+clone_color: fs-ocfs-2:1 allocation score on hex-2: 1 -+clone_color: fs-ocfs-2:1 allocation score on hex-3: 0 -+clone_color: fs-ocfs-2:2 allocation score on hex-1: 0 -+clone_color: fs-ocfs-2:2 allocation score on hex-2: 0 -+clone_color: fs-ocfs-2:2 allocation score on hex-3: 1 -+clone_color: fs2 allocation score on hex-1: 1000 -+clone_color: fs2 allocation score on hex-2: 0 -+clone_color: fs2 allocation score on hex-3: 0 -+clone_color: ms-r0 allocation score on hex-1: 0 -+clone_color: ms-r0 allocation score on hex-2: 0 -+clone_color: ms-r0 allocation score on hex-3: -INFINITY -+clone_color: ms-r1 allocation score on hex-1: -INFINITY -+clone_color: ms-r1 allocation score on hex-2: 0 -+clone_color: ms-r1 allocation score on hex-3: 0 -+clone_color: o2cb:0 allocation score on hex-1: 1 -+clone_color: o2cb:0 allocation score on hex-2: 0 -+clone_color: o2cb:0 allocation score on hex-3: 0 -+clone_color: o2cb:1 allocation score on hex-1: 0 -+clone_color: o2cb:1 allocation score on hex-2: 1 -+clone_color: o2cb:1 allocation score on hex-3: 0 -+clone_color: o2cb:2 allocation score on hex-1: 0 -+clone_color: o2cb:2 allocation score on hex-2: 0 -+clone_color: o2cb:2 allocation score on hex-3: 1 -+clone_color: vg1:0 allocation score on hex-1: 1 -+clone_color: vg1:0 allocation score on hex-2: 0 -+clone_color: vg1:0 allocation score on hex-3: 0 -+clone_color: vg1:1 allocation score on hex-1: 0 -+clone_color: vg1:1 allocation score on hex-2: 1 -+clone_color: vg1:1 allocation score on hex-3: 0 -+clone_color: vg1:2 allocation score on hex-1: 0 -+clone_color: vg1:2 allocation score on hex-2: 0 -+clone_color: vg1:2 allocation score on hex-3: 1 -+drbd-r0:0 promotion score on hex-1: 10000 -+drbd-r0:1 promotion score on hex-2: 10000 -+drbd-r0:2 promotion score on none: 0 -+drbd-r1:0 promotion score on hex-2: -1 -+drbd-r1:1 promotion score on hex-3: 10000 -+group_color: basegrp:0 allocation score on hex-1: 0 -+group_color: basegrp:0 allocation score on hex-2: 0 -+group_color: basegrp:0 allocation score on hex-3: 0 -+group_color: basegrp:1 allocation score on hex-1: -INFINITY -+group_color: basegrp:1 allocation score on hex-2: 0 -+group_color: basegrp:1 allocation score on hex-3: 0 -+group_color: basegrp:2 allocation score on hex-1: -INFINITY -+group_color: basegrp:2 allocation score on hex-2: -INFINITY -+group_color: basegrp:2 allocation score on hex-3: 0 -+group_color: clvmd:0 allocation score on hex-1: 1 -+group_color: clvmd:0 allocation score on hex-2: 0 -+group_color: clvmd:0 allocation score on hex-3: 0 -+group_color: clvmd:1 allocation score on hex-1: -INFINITY -+group_color: clvmd:1 allocation score on hex-2: 1 -+group_color: clvmd:1 allocation score on hex-3: 0 -+group_color: clvmd:2 allocation score on hex-1: -INFINITY -+group_color: clvmd:2 allocation score on hex-2: -INFINITY -+group_color: clvmd:2 allocation score on hex-3: 1 -+group_color: dlm:0 allocation score on hex-1: 1 -+group_color: dlm:0 allocation score on hex-2: 0 -+group_color: dlm:0 allocation score on hex-3: 0 -+group_color: dlm:1 allocation score on hex-1: -INFINITY -+group_color: dlm:1 allocation score on hex-2: 1 -+group_color: dlm:1 allocation score on hex-3: 0 -+group_color: dlm:2 allocation score on hex-1: -INFINITY -+group_color: dlm:2 allocation score on hex-2: -INFINITY -+group_color: dlm:2 allocation score on hex-3: 1 -+group_color: dummy1 allocation score on hex-1: 0 -+group_color: dummy1 allocation score on hex-2: 0 -+group_color: dummy1 allocation score on hex-3: 1000 -+group_color: dummy2 allocation score on hex-1: 0 -+group_color: dummy2 allocation score on hex-2: 0 -+group_color: dummy2 allocation score on hex-3: 0 -+group_color: fs-md0 allocation score on hex-1: 0 -+group_color: fs-md0 allocation score on hex-2: 0 -+group_color: fs-md0 allocation score on hex-3: 1000 -+group_color: fs-ocfs-1:0 allocation score on hex-1: 1 -+group_color: fs-ocfs-1:0 allocation score on hex-2: 0 -+group_color: fs-ocfs-1:0 allocation score on hex-3: 0 -+group_color: fs-ocfs-1:1 allocation score on hex-1: -INFINITY -+group_color: fs-ocfs-1:1 allocation score on hex-2: 1 -+group_color: fs-ocfs-1:1 allocation score on hex-3: 0 -+group_color: fs-ocfs-1:2 allocation score on hex-1: -INFINITY -+group_color: fs-ocfs-1:2 allocation score on hex-2: -INFINITY -+group_color: fs-ocfs-1:2 allocation score on hex-3: 1 -+group_color: fs-r0 allocation score on hex-1: 0 -+group_color: fs-r0 allocation score on hex-2: 0 -+group_color: fs-r0 allocation score on hex-3: 0 -+group_color: md0 allocation score on hex-1: 0 -+group_color: md0 allocation score on hex-2: 0 -+group_color: md0 allocation score on hex-3: 1000 -+group_color: md0-group allocation score on hex-1: 0 -+group_color: md0-group allocation score on hex-2: 0 -+group_color: md0-group allocation score on hex-3: 0 -+group_color: o2cb:0 allocation score on hex-1: 1 -+group_color: o2cb:0 allocation score on hex-2: 0 -+group_color: o2cb:0 allocation score on hex-3: 0 -+group_color: o2cb:1 allocation score on hex-1: -INFINITY -+group_color: o2cb:1 allocation score on hex-2: 1 -+group_color: o2cb:1 allocation score on hex-3: 0 -+group_color: o2cb:2 allocation score on hex-1: -INFINITY -+group_color: o2cb:2 allocation score on hex-2: -INFINITY -+group_color: o2cb:2 allocation score on hex-3: 1 -+group_color: r0-group allocation score on hex-1: 0 -+group_color: r0-group allocation score on hex-2: 0 -+group_color: r0-group allocation score on hex-3: 0 -+group_color: vg-md0 allocation score on hex-1: 0 -+group_color: vg-md0 allocation score on hex-2: 0 -+group_color: vg-md0 allocation score on hex-3: 1000 -+group_color: vg1:0 allocation score on hex-1: 1 -+group_color: vg1:0 allocation score on hex-2: 0 -+group_color: vg1:0 allocation score on hex-3: 0 -+group_color: vg1:1 allocation score on hex-1: -INFINITY -+group_color: vg1:1 allocation score on hex-2: 1 -+group_color: vg1:1 allocation score on hex-3: 0 -+group_color: vg1:2 allocation score on hex-1: -INFINITY -+group_color: vg1:2 allocation score on hex-2: -INFINITY -+group_color: vg1:2 allocation score on hex-3: 1 -+native_color: clvmd:0 allocation score on hex-1: 4 -+native_color: clvmd:0 allocation score on hex-2: -INFINITY -+native_color: clvmd:0 allocation score on hex-3: -INFINITY -+native_color: clvmd:1 allocation score on hex-1: -INFINITY -+native_color: clvmd:1 allocation score on hex-2: 4 -+native_color: clvmd:1 allocation score on hex-3: -INFINITY -+native_color: clvmd:2 allocation score on hex-1: -INFINITY -+native_color: clvmd:2 allocation score on hex-2: -INFINITY -+native_color: clvmd:2 allocation score on hex-3: 4 -+native_color: dlm:0 allocation score on hex-1: 5 -+native_color: dlm:0 allocation score on hex-2: 0 -+native_color: dlm:0 allocation score on hex-3: 0 -+native_color: dlm:1 allocation score on hex-1: -INFINITY -+native_color: dlm:1 allocation score on hex-2: 5 -+native_color: dlm:1 allocation score on hex-3: 0 -+native_color: dlm:2 allocation score on hex-1: -INFINITY -+native_color: dlm:2 allocation score on hex-2: -INFINITY -+native_color: dlm:2 allocation score on hex-3: 5 -+native_color: drbd-r0:0 allocation score on hex-1: 10001 -+native_color: drbd-r0:0 allocation score on hex-2: 0 -+native_color: drbd-r0:0 allocation score on hex-3: -INFINITY -+native_color: drbd-r0:1 allocation score on hex-1: -INFINITY -+native_color: drbd-r0:1 allocation score on hex-2: 10001 -+native_color: drbd-r0:1 allocation score on hex-3: -INFINITY -+native_color: drbd-r1:0 allocation score on hex-1: -INFINITY -+native_color: drbd-r1:0 allocation score on hex-2: 1 -+native_color: drbd-r1:0 allocation score on hex-3: -INFINITY -+native_color: drbd-r1:1 allocation score on hex-1: -INFINITY -+native_color: drbd-r1:1 allocation score on hex-2: 0 -+native_color: drbd-r1:1 allocation score on hex-3: 10001 -+native_color: dummy1 allocation score on hex-1: -INFINITY -+native_color: dummy1 allocation score on hex-2: -INFINITY -+native_color: dummy1 allocation score on hex-3: 1000 -+native_color: dummy2 allocation score on hex-1: -INFINITY -+native_color: dummy2 allocation score on hex-2: -INFINITY -+native_color: dummy2 allocation score on hex-3: -INFINITY -+native_color: dummy3 allocation score on hex-1: 1000 -+native_color: dummy3 allocation score on hex-2: -INFINITY -+native_color: dummy3 allocation score on hex-3: 0 -+native_color: dummy4 allocation score on hex-1: -INFINITY -+native_color: dummy4 allocation score on hex-2: 1000 -+native_color: dummy4 allocation score on hex-3: -INFINITY -+native_color: dummy5 allocation score on hex-1: 1000 -+native_color: dummy5 allocation score on hex-2: 0 -+native_color: dummy5 allocation score on hex-3: 0 -+native_color: dummy6 allocation score on hex-1: 0 -+native_color: dummy6 allocation score on hex-2: 1000 -+native_color: dummy6 allocation score on hex-3: 0 -+native_color: fencing allocation score on hex-1: 1000 -+native_color: fencing allocation score on hex-2: 0 -+native_color: fencing allocation score on hex-3: 0 -+native_color: fs-md0 allocation score on hex-1: -INFINITY -+native_color: fs-md0 allocation score on hex-2: -INFINITY -+native_color: fs-md0 allocation score on hex-3: 2000 -+native_color: fs-ocfs-1:0 allocation score on hex-1: 1 -+native_color: fs-ocfs-1:0 allocation score on hex-2: -INFINITY -+native_color: fs-ocfs-1:0 allocation score on hex-3: -INFINITY -+native_color: fs-ocfs-1:1 allocation score on hex-1: -INFINITY -+native_color: fs-ocfs-1:1 allocation score on hex-2: 1 -+native_color: fs-ocfs-1:1 allocation score on hex-3: -INFINITY -+native_color: fs-ocfs-1:2 allocation score on hex-1: -INFINITY -+native_color: fs-ocfs-1:2 allocation score on hex-2: -INFINITY -+native_color: fs-ocfs-1:2 allocation score on hex-3: 1 -+native_color: fs-ocfs-2:0 allocation score on hex-1: 1 -+native_color: fs-ocfs-2:0 allocation score on hex-2: -INFINITY -+native_color: fs-ocfs-2:0 allocation score on hex-3: -INFINITY -+native_color: fs-ocfs-2:1 allocation score on hex-1: -INFINITY -+native_color: fs-ocfs-2:1 allocation score on hex-2: 1 -+native_color: fs-ocfs-2:1 allocation score on hex-3: -INFINITY -+native_color: fs-ocfs-2:2 allocation score on hex-1: -INFINITY -+native_color: fs-ocfs-2:2 allocation score on hex-2: -INFINITY -+native_color: fs-ocfs-2:2 allocation score on hex-3: 1 -+native_color: fs-r0 allocation score on hex-1: -INFINITY -+native_color: fs-r0 allocation score on hex-2: -INFINITY -+native_color: fs-r0 allocation score on hex-3: -INFINITY -+native_color: fs-xfs-1 allocation score on hex-1: -1000 -+native_color: fs-xfs-1 allocation score on hex-2: 2000 -+native_color: fs-xfs-1 allocation score on hex-3: 0 -+native_color: md0 allocation score on hex-1: 0 -+native_color: md0 allocation score on hex-2: 0 -+native_color: md0 allocation score on hex-3: 4000 -+native_color: o2cb:0 allocation score on hex-1: 3 -+native_color: o2cb:0 allocation score on hex-2: -INFINITY -+native_color: o2cb:0 allocation score on hex-3: -INFINITY -+native_color: o2cb:1 allocation score on hex-1: -INFINITY -+native_color: o2cb:1 allocation score on hex-2: 3 -+native_color: o2cb:1 allocation score on hex-3: -INFINITY -+native_color: o2cb:2 allocation score on hex-1: -INFINITY -+native_color: o2cb:2 allocation score on hex-2: -INFINITY -+native_color: o2cb:2 allocation score on hex-3: 3 -+native_color: vg-md0 allocation score on hex-1: -INFINITY -+native_color: vg-md0 allocation score on hex-2: -INFINITY -+native_color: vg-md0 allocation score on hex-3: 3000 -+native_color: vg1:0 allocation score on hex-1: 2 -+native_color: vg1:0 allocation score on hex-2: -INFINITY -+native_color: vg1:0 allocation score on hex-3: -INFINITY -+native_color: vg1:1 allocation score on hex-1: -INFINITY -+native_color: vg1:1 allocation score on hex-2: 2 -+native_color: vg1:1 allocation score on hex-3: -INFINITY -+native_color: vg1:2 allocation score on hex-1: -INFINITY -+native_color: vg1:2 allocation score on hex-2: -INFINITY -+native_color: vg1:2 allocation score on hex-3: 2 -diff --git a/pengine/test10/bug-5143-ms-shuffle.summary b/pengine/test10/bug-5143-ms-shuffle.summary -new file mode 100644 -index 0000000..7a3bcf4 ---- /dev/null -+++ b/pengine/test10/bug-5143-ms-shuffle.summary -@@ -0,0 +1,78 @@ -+ -+Current cluster status: -+Online: [ hex-1 hex-2 hex-3 ] -+ -+ fencing (stonith:external/sbd): Started hex-1 -+ Clone Set: baseclone [basegrp] -+ Started: [ hex-1 hex-2 hex-3 ] -+ fs-xfs-1 (ocf::heartbeat:Filesystem): Started hex-2 -+ Clone Set: fs2 [fs-ocfs-2] -+ Started: [ hex-1 hex-2 hex-3 ] -+ Master/Slave Set: ms-r0 [drbd-r0] -+ Masters: [ hex-1 ] -+ Slaves: [ hex-2 ] -+ Stopped: [ hex-3 ] -+ Master/Slave Set: ms-r1 [drbd-r1] -+ Slaves: [ hex-2 hex-3 ] -+ Stopped: [ hex-1 ] -+ Resource Group: md0-group -+ md0 (ocf::heartbeat:Raid1): Started hex-3 -+ vg-md0 (ocf::heartbeat:LVM): Started hex-3 -+ fs-md0 (ocf::heartbeat:Filesystem): Started hex-3 -+ dummy1 (ocf::heartbeat:Delay): Started hex-3 -+ dummy3 (ocf::heartbeat:Delay): Started hex-1 -+ dummy4 (ocf::heartbeat:Delay): Started hex-2 -+ dummy5 (ocf::heartbeat:Delay): Started hex-1 -+ dummy6 (ocf::heartbeat:Delay): Started hex-2 -+ Resource Group: r0-group -+ fs-r0 (ocf::heartbeat:Filesystem): Stopped -+ dummy2 (ocf::heartbeat:Delay): Stopped -+ -+Transition Summary: -+ * Promote drbd-r1:1 (Slave -> Master hex-3) -+ -+Executing cluster transition: -+ * Pseudo action: ms-r1_pre_notify_promote_0 -+ * Resource action: drbd-r1 notify on hex-2 -+ * Resource action: drbd-r1 notify on hex-3 -+ * Pseudo action: ms-r1_confirmed-pre_notify_promote_0 -+ * Pseudo action: ms-r1_promote_0 -+ * Resource action: drbd-r1 promote on hex-3 -+ * Pseudo action: ms-r1_promoted_0 -+ * Pseudo action: ms-r1_post_notify_promoted_0 -+ * Resource action: drbd-r1 notify on hex-2 -+ * Resource action: drbd-r1 notify on hex-3 -+ * Pseudo action: ms-r1_confirmed-post_notify_promoted_0 -+ * Resource action: drbd-r1 monitor=29000 on hex-2 -+ * Resource action: drbd-r1 monitor=31000 on hex-3 -+ -+Revised cluster status: -+Online: [ hex-1 hex-2 hex-3 ] -+ -+ fencing (stonith:external/sbd): Started hex-1 -+ Clone Set: baseclone [basegrp] -+ Started: [ hex-1 hex-2 hex-3 ] -+ fs-xfs-1 (ocf::heartbeat:Filesystem): Started hex-2 -+ Clone Set: fs2 [fs-ocfs-2] -+ Started: [ hex-1 hex-2 hex-3 ] -+ Master/Slave Set: ms-r0 [drbd-r0] -+ Masters: [ hex-1 ] -+ Slaves: [ hex-2 ] -+ Stopped: [ hex-3 ] -+ Master/Slave Set: ms-r1 [drbd-r1] -+ Masters: [ hex-3 ] -+ Slaves: [ hex-2 ] -+ Stopped: [ hex-1 ] -+ Resource Group: md0-group -+ md0 (ocf::heartbeat:Raid1): Started hex-3 -+ vg-md0 (ocf::heartbeat:LVM): Started hex-3 -+ fs-md0 (ocf::heartbeat:Filesystem): Started hex-3 -+ dummy1 (ocf::heartbeat:Delay): Started hex-3 -+ dummy3 (ocf::heartbeat:Delay): Started hex-1 -+ dummy4 (ocf::heartbeat:Delay): Started hex-2 -+ dummy5 (ocf::heartbeat:Delay): Started hex-1 -+ dummy6 (ocf::heartbeat:Delay): Started hex-2 -+ Resource Group: r0-group -+ fs-r0 (ocf::heartbeat:Filesystem): Stopped -+ dummy2 (ocf::heartbeat:Delay): Stopped -+ -diff --git a/pengine/test10/bug-5143-ms-shuffle.xml b/pengine/test10/bug-5143-ms-shuffle.xml -new file mode 100644 -index 0000000..486d706 ---- /dev/null -+++ b/pengine/test10/bug-5143-ms-shuffle.xml -@@ -0,0 +1,477 @@ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -diff --git a/pengine/test10/bug-lf-1852.summary b/pengine/test10/bug-lf-1852.summary -index 98ffea5..337ad6a 100644 ---- a/pengine/test10/bug-lf-1852.summary -+++ b/pengine/test10/bug-lf-1852.summary -@@ -4,7 +4,7 @@ Online: [ mysql-01 mysql-02 ] - - Master/Slave Set: ms-drbd0 [drbd0] - Masters: [ mysql-02 ] -- Stopped: [ drbd0:1 ] -+ Stopped: [ mysql-01 ] - Resource Group: fs_mysql_ip - fs0 (ocf::heartbeat:Filesystem): Started mysql-02 - mysqlid (lsb:mysql): Started mysql-02 -diff --git a/pengine/test10/bug-lf-2106.scores b/pengine/test10/bug-lf-2106.scores -index c5ca47b..4dc52b5 100644 ---- a/pengine/test10/bug-lf-2106.scores -+++ b/pengine/test10/bug-lf-2106.scores -@@ -1,26 +1,26 @@ - Allocation scores: - clone_color: drbd-bugtrack:0 allocation score on cl-virt-1: 1 --clone_color: drbd-bugtrack:0 allocation score on cl-virt-2: 75 -+clone_color: drbd-bugtrack:0 allocation score on cl-virt-2: 0 - clone_color: drbd-bugtrack:1 allocation score on cl-virt-1: 0 - clone_color: drbd-bugtrack:1 allocation score on cl-virt-2: 76 - clone_color: drbd-infotos:0 allocation score on cl-virt-1: 1 --clone_color: drbd-infotos:0 allocation score on cl-virt-2: 75 -+clone_color: drbd-infotos:0 allocation score on cl-virt-2: 0 - clone_color: drbd-infotos:1 allocation score on cl-virt-1: 0 - clone_color: drbd-infotos:1 allocation score on cl-virt-2: 76 - clone_color: drbd-itwiki:0 allocation score on cl-virt-1: 1 --clone_color: drbd-itwiki:0 allocation score on cl-virt-2: 75 -+clone_color: drbd-itwiki:0 allocation score on cl-virt-2: 0 - clone_color: drbd-itwiki:1 allocation score on cl-virt-1: 0 - clone_color: drbd-itwiki:1 allocation score on cl-virt-2: 76 - clone_color: drbd-medomus-cvs:0 allocation score on cl-virt-1: 1 --clone_color: drbd-medomus-cvs:0 allocation score on cl-virt-2: 75 -+clone_color: drbd-medomus-cvs:0 allocation score on cl-virt-2: 0 - clone_color: drbd-medomus-cvs:1 allocation score on cl-virt-1: 0 - clone_color: drbd-medomus-cvs:1 allocation score on cl-virt-2: 76 - clone_color: drbd-servsyslog:0 allocation score on cl-virt-1: 1 --clone_color: drbd-servsyslog:0 allocation score on cl-virt-2: 75 -+clone_color: drbd-servsyslog:0 allocation score on cl-virt-2: 0 - clone_color: drbd-servsyslog:1 allocation score on cl-virt-1: 0 - clone_color: drbd-servsyslog:1 allocation score on cl-virt-2: 76 - clone_color: drbd-smsprod2:0 allocation score on cl-virt-1: 1 --clone_color: drbd-smsprod2:0 allocation score on cl-virt-2: 75 -+clone_color: drbd-smsprod2:0 allocation score on cl-virt-2: 0 - clone_color: drbd-smsprod2:1 allocation score on cl-virt-1: 0 - clone_color: drbd-smsprod2:1 allocation score on cl-virt-2: 76 - clone_color: ms-bugtrack allocation score on cl-virt-1: 50 -@@ -66,27 +66,27 @@ native_color: apcstonith allocation score on cl-virt-2: 0 - native_color: bugtrack allocation score on cl-virt-1: -INFINITY - native_color: bugtrack allocation score on cl-virt-2: 176 - native_color: drbd-bugtrack:0 allocation score on cl-virt-1: 1 --native_color: drbd-bugtrack:0 allocation score on cl-virt-2: 75 -+native_color: drbd-bugtrack:0 allocation score on cl-virt-2: 0 - native_color: drbd-bugtrack:1 allocation score on cl-virt-1: -INFINITY - native_color: drbd-bugtrack:1 allocation score on cl-virt-2: 76 - native_color: drbd-infotos:0 allocation score on cl-virt-1: 1 --native_color: drbd-infotos:0 allocation score on cl-virt-2: 75 -+native_color: drbd-infotos:0 allocation score on cl-virt-2: 0 - native_color: drbd-infotos:1 allocation score on cl-virt-1: -INFINITY - native_color: drbd-infotos:1 allocation score on cl-virt-2: 76 - native_color: drbd-itwiki:0 allocation score on cl-virt-1: 1 --native_color: drbd-itwiki:0 allocation score on cl-virt-2: 75 -+native_color: drbd-itwiki:0 allocation score on cl-virt-2: 0 - native_color: drbd-itwiki:1 allocation score on cl-virt-1: -INFINITY - native_color: drbd-itwiki:1 allocation score on cl-virt-2: 76 - native_color: drbd-medomus-cvs:0 allocation score on cl-virt-1: 1 --native_color: drbd-medomus-cvs:0 allocation score on cl-virt-2: 75 -+native_color: drbd-medomus-cvs:0 allocation score on cl-virt-2: 0 - native_color: drbd-medomus-cvs:1 allocation score on cl-virt-1: -INFINITY - native_color: drbd-medomus-cvs:1 allocation score on cl-virt-2: 76 - native_color: drbd-servsyslog:0 allocation score on cl-virt-1: 1 --native_color: drbd-servsyslog:0 allocation score on cl-virt-2: 75 -+native_color: drbd-servsyslog:0 allocation score on cl-virt-2: 0 - native_color: drbd-servsyslog:1 allocation score on cl-virt-1: -INFINITY - native_color: drbd-servsyslog:1 allocation score on cl-virt-2: 76 - native_color: drbd-smsprod2:0 allocation score on cl-virt-1: 1 --native_color: drbd-smsprod2:0 allocation score on cl-virt-2: 75 -+native_color: drbd-smsprod2:0 allocation score on cl-virt-2: 0 - native_color: drbd-smsprod2:1 allocation score on cl-virt-1: -INFINITY - native_color: drbd-smsprod2:1 allocation score on cl-virt-2: 76 - native_color: infotos allocation score on cl-virt-1: -INFINITY -diff --git a/pengine/test10/bug-lf-2106.summary b/pengine/test10/bug-lf-2106.summary -index 7be05ae..9a71125 100644 ---- a/pengine/test10/bug-lf-2106.summary -+++ b/pengine/test10/bug-lf-2106.summary -@@ -39,8 +39,8 @@ Transition Summary: - * Restart pingd:1 (Started cl-virt-2) - - Executing cluster transition: -- * Cluster action: clear_failcount on cl-virt-1 -- * Cluster action: clear_failcount on cl-virt-2 -+ * Cluster action: clear_failcount for pingd on cl-virt-1 -+ * Cluster action: clear_failcount for pingd on cl-virt-2 - * Pseudo action: pingdclone_stop_0 - * Resource action: pingd:0 stop on cl-virt-1 - * Resource action: pingd:0 stop on cl-virt-2 -diff --git a/pengine/test10/bug-lf-2153.summary b/pengine/test10/bug-lf-2153.summary -index 475e7df..9995475 100644 ---- a/pengine/test10/bug-lf-2153.summary -+++ b/pengine/test10/bug-lf-2153.summary -@@ -43,10 +43,10 @@ Online: [ alice ] - - Master/Slave Set: ms_drbd_iscsivg01 [res_drbd_iscsivg01] - Masters: [ alice ] -- Stopped: [ res_drbd_iscsivg01:1 ] -+ Stopped: [ bob ] - Clone Set: cl_tgtd [res_tgtd] - Started: [ alice ] -- Stopped: [ res_tgtd:1 ] -+ Stopped: [ bob ] - Resource Group: rg_iscsivg01 - res_portblock_iscsivg01_block (ocf::heartbeat:portblock): Started alice - res_lvm_iscsivg01 (ocf::heartbeat:LVM): Started alice -diff --git a/pengine/test10/bug-lf-2160.summary b/pengine/test10/bug-lf-2160.summary -index 77ef8f9..e09540a 100644 ---- a/pengine/test10/bug-lf-2160.summary -+++ b/pengine/test10/bug-lf-2160.summary -@@ -5,6 +5,7 @@ Online: [ cardhu dualamd1 dualamd3 ] - domU-test01 (ocf::heartbeat:Xen): Started dualamd1 - Clone Set: clone-dom0-iscsi1 [dom0-iscsi1-cnx1] - Started: [ dualamd1 dualamd3 ] -+ Stopped: [ cardhu ] - - Transition Summary: - -@@ -19,4 +20,5 @@ Online: [ cardhu dualamd1 dualamd3 ] - domU-test01 (ocf::heartbeat:Xen): Started dualamd1 - Clone Set: clone-dom0-iscsi1 [dom0-iscsi1-cnx1] - Started: [ dualamd1 dualamd3 ] -+ Stopped: [ cardhu ] - -diff --git a/pengine/test10/bug-lf-2171.summary b/pengine/test10/bug-lf-2171.summary -index a240116..5af3cd0 100644 ---- a/pengine/test10/bug-lf-2171.summary -+++ b/pengine/test10/bug-lf-2171.summary -@@ -29,7 +29,7 @@ Revised cluster status: - Online: [ xenserver1 xenserver2 ] - - Clone Set: cl_res_Dummy1 [res_Dummy1] -- Stopped: [ res_Dummy1:0 res_Dummy1:1 ] -+ Stopped: [ xenserver1 xenserver2 ] - Resource Group: gr_Dummy - res_Dummy2 (ocf::heartbeat:Dummy): Stopped - res_Dummy3 (ocf::heartbeat:Dummy): Stopped -diff --git a/pengine/test10/bug-lf-2213.summary b/pengine/test10/bug-lf-2213.summary -index f5d06cb..a60012d 100644 ---- a/pengine/test10/bug-lf-2213.summary -+++ b/pengine/test10/bug-lf-2213.summary -@@ -3,7 +3,7 @@ Current cluster status: - Online: [ fs1 fs2 web1 web2 ] - - Clone Set: cl-test [gr-test] -- Stopped: [ gr-test:0 gr-test:1 gr-test:2 gr-test:3 ] -+ Stopped: [ fs1 fs2 web1 web2 ] - - Transition Summary: - * Start test:0 (web1) -@@ -24,5 +24,5 @@ Online: [ fs1 fs2 web1 web2 ] - - Clone Set: cl-test [gr-test] - Started: [ web1 web2 ] -- Stopped: [ gr-test:2 gr-test:3 ] -+ Stopped: [ fs1 fs2 ] - -diff --git a/pengine/test10/bug-lf-2358.summary b/pengine/test10/bug-lf-2358.summary -index 3b383c7..434c5ed 100644 ---- a/pengine/test10/bug-lf-2358.summary -+++ b/pengine/test10/bug-lf-2358.summary -@@ -3,7 +3,7 @@ Current cluster status: - Online: [ alice.demo bob.demo ] - - Master/Slave Set: ms_drbd_nfsexport [res_drbd_nfsexport] -- Stopped: [ res_drbd_nfsexport:0 res_drbd_nfsexport:1 ] -+ Stopped: [ alice.demo bob.demo ] - Resource Group: rg_nfs - res_fs_nfsexport (ocf::heartbeat:Filesystem): Stopped - res_ip_nfs (ocf::heartbeat:IPaddr2): Stopped -@@ -14,7 +14,7 @@ Online: [ alice.demo bob.demo ] - res_mysql1 (ocf::heartbeat:mysql): Started bob.demo - Master/Slave Set: ms_drbd_mysql1 [res_drbd_mysql1] - Masters: [ bob.demo ] -- Stopped: [ res_drbd_mysql1:1 ] -+ Stopped: [ alice.demo ] - Master/Slave Set: ms_drbd_mysql2 [res_drbd_mysql2] - Masters: [ alice.demo ] - Slaves: [ bob.demo ] -@@ -42,7 +42,7 @@ Revised cluster status: - Online: [ alice.demo bob.demo ] - - Master/Slave Set: ms_drbd_nfsexport [res_drbd_nfsexport] -- Stopped: [ res_drbd_nfsexport:0 res_drbd_nfsexport:1 ] -+ Stopped: [ alice.demo bob.demo ] - Resource Group: rg_nfs - res_fs_nfsexport (ocf::heartbeat:Filesystem): Stopped - res_ip_nfs (ocf::heartbeat:IPaddr2): Stopped -diff --git a/pengine/test10/bug-lf-2361.summary b/pengine/test10/bug-lf-2361.summary -index fd48ba9..b81456c 100644 ---- a/pengine/test10/bug-lf-2361.summary -+++ b/pengine/test10/bug-lf-2361.summary -@@ -4,9 +4,9 @@ Online: [ alice.demo bob.demo ] - - dummy1 (ocf::heartbeat:Dummy): Stopped - Master/Slave Set: ms_stateful [stateful] -- Stopped: [ stateful:0 stateful:1 ] -+ Stopped: [ alice.demo bob.demo ] - Clone Set: cl_dummy2 [dummy2] -- Stopped: [ dummy2:0 dummy2:1 ] -+ Stopped: [ alice.demo bob.demo ] - - Transition Summary: - * Start stateful:0 (alice.demo) -@@ -38,5 +38,5 @@ Online: [ alice.demo bob.demo ] - Master/Slave Set: ms_stateful [stateful] - Slaves: [ alice.demo bob.demo ] - Clone Set: cl_dummy2 [dummy2] -- Stopped: [ dummy2:0 dummy2:1 ] -+ Stopped: [ alice.demo bob.demo ] - -diff --git a/pengine/test10/bug-lf-2422.summary b/pengine/test10/bug-lf-2422.summary -index a4d3abd..760fc2b 100644 ---- a/pengine/test10/bug-lf-2422.summary -+++ b/pengine/test10/bug-lf-2422.summary -@@ -56,25 +56,25 @@ Online: [ qa-suse-1 qa-suse-2 qa-suse-3 qa-suse-4 ] - sbd_stonith (stonith:external/sbd): Started qa-suse-2 - Clone Set: c-o2stage [o2stage] - Resource Group: o2stage:0 -- dlm:0 (ocf::pacemaker:controld): Started qa-suse-1 -- clvm:0 (ocf::lvm2:clvmd): Started qa-suse-1 -- o2cb:0 (ocf::ocfs2:o2cb): Stopped -- cmirror:0 (ocf::lvm2:cmirrord): Stopped -+ dlm (ocf::pacemaker:controld): Started qa-suse-1 -+ clvm (ocf::lvm2:clvmd): Started qa-suse-1 -+ o2cb (ocf::ocfs2:o2cb): Stopped -+ cmirror (ocf::lvm2:cmirrord): Stopped - Resource Group: o2stage:1 -- dlm:1 (ocf::pacemaker:controld): Started qa-suse-4 -- clvm:1 (ocf::lvm2:clvmd): Started qa-suse-4 -- o2cb:1 (ocf::ocfs2:o2cb): Stopped -- cmirror:1 (ocf::lvm2:cmirrord): Stopped -+ dlm (ocf::pacemaker:controld): Started qa-suse-4 -+ clvm (ocf::lvm2:clvmd): Started qa-suse-4 -+ o2cb (ocf::ocfs2:o2cb): Stopped -+ cmirror (ocf::lvm2:cmirrord): Stopped - Resource Group: o2stage:2 -- dlm:2 (ocf::pacemaker:controld): Started qa-suse-3 -- clvm:2 (ocf::lvm2:clvmd): Started qa-suse-3 -- o2cb:2 (ocf::ocfs2:o2cb): Stopped -- cmirror:2 (ocf::lvm2:cmirrord): Stopped -+ dlm (ocf::pacemaker:controld): Started qa-suse-3 -+ clvm (ocf::lvm2:clvmd): Started qa-suse-3 -+ o2cb (ocf::ocfs2:o2cb): Stopped -+ cmirror (ocf::lvm2:cmirrord): Stopped - Resource Group: o2stage:3 -- dlm:3 (ocf::pacemaker:controld): Started qa-suse-2 -- clvm:3 (ocf::lvm2:clvmd): Started qa-suse-2 -- o2cb:3 (ocf::ocfs2:o2cb): Stopped -- cmirror:3 (ocf::lvm2:cmirrord): Stopped -+ dlm (ocf::pacemaker:controld): Started qa-suse-2 -+ clvm (ocf::lvm2:clvmd): Started qa-suse-2 -+ o2cb (ocf::ocfs2:o2cb): Stopped -+ cmirror (ocf::lvm2:cmirrord): Stopped - Clone Set: c-ocfs [ocfs] -- Stopped: [ ocfs:0 ocfs:1 ocfs:2 ocfs:3 ] -+ Stopped: [ qa-suse-1 qa-suse-2 qa-suse-3 qa-suse-4 ] - -diff --git a/pengine/test10/bug-lf-2453.summary b/pengine/test10/bug-lf-2453.summary -index 8b40492..70eb79e 100644 ---- a/pengine/test10/bug-lf-2453.summary -+++ b/pengine/test10/bug-lf-2453.summary -@@ -32,7 +32,7 @@ Online: [ domu1 domu2 ] - - PrimitiveResource1 (ocf::heartbeat:IPaddr2): Stopped - Clone Set: CloneResource1 [apache] -- Stopped: [ apache:0 apache:1 ] -+ Stopped: [ domu1 domu2 ] - Clone Set: CloneResource2 [DummyResource] -- Stopped: [ DummyResource:0 DummyResource:1 ] -+ Stopped: [ domu1 domu2 ] - -diff --git a/pengine/test10/bug-lf-2508.summary b/pengine/test10/bug-lf-2508.summary -index 7032109..4435ced 100644 ---- a/pengine/test10/bug-lf-2508.summary -+++ b/pengine/test10/bug-lf-2508.summary -@@ -11,19 +11,19 @@ Online: [ srv01 srv03 srv04 ] - Dummy03 (ocf::heartbeat:Dummy): Started srv03 - Clone Set: clnStonith1 [grpStonith1] - Started: [ srv02 srv03 srv04 ] -- Stopped: [ grpStonith1:3 ] -+ Stopped: [ srv01 ] - Clone Set: clnStonith2 [grpStonith2] - Started: [ srv01 srv03 srv04 ] -- Stopped: [ grpStonith2:3 ] -+ Stopped: [ srv02 ] - Clone Set: clnStonith3 [grpStonith3] - Resource Group: grpStonith3:1 -- prmStonith3-1:1 (stonith:external/stonith-helper): Started srv01 -- prmStonith3-3:1 (stonith:external/ssh): Stopped -+ prmStonith3-1 (stonith:external/stonith-helper): Started srv01 -+ prmStonith3-3 (stonith:external/ssh): Stopped - Started: [ srv02 srv04 ] -- Stopped: [ grpStonith3:3 ] -+ Stopped: [ srv03 ] - Clone Set: clnStonith4 [grpStonith4] - Started: [ srv01 srv02 srv03 ] -- Stopped: [ grpStonith4:3 ] -+ Stopped: [ srv04 ] - - Transition Summary: - * Start Dummy01 (srv01) -@@ -89,14 +89,14 @@ OFFLINE: [ srv02 ] - Dummy03 (ocf::heartbeat:Dummy): Started srv03 - Clone Set: clnStonith1 [grpStonith1] - Started: [ srv03 srv04 ] -- Stopped: [ grpStonith1:2 grpStonith1:3 ] -+ Stopped: [ srv01 srv02 ] - Clone Set: clnStonith2 [grpStonith2] - Started: [ srv01 srv03 srv04 ] -- Stopped: [ grpStonith2:3 ] -+ Stopped: [ srv02 ] - Clone Set: clnStonith3 [grpStonith3] - Started: [ srv01 srv04 ] -- Stopped: [ grpStonith3:2 grpStonith3:3 ] -+ Stopped: [ srv02 srv03 ] - Clone Set: clnStonith4 [grpStonith4] - Started: [ srv01 srv03 ] -- Stopped: [ grpStonith4:2 grpStonith4:3 ] -+ Stopped: [ srv02 srv04 ] - -diff --git a/pengine/test10/bug-lf-2551.summary b/pengine/test10/bug-lf-2551.summary -index 0d27903..d299ee9 100644 ---- a/pengine/test10/bug-lf-2551.summary -+++ b/pengine/test10/bug-lf-2551.summary -@@ -148,7 +148,7 @@ OFFLINE: [ hex-9 ] - vm-00 (ocf::heartbeat:Xen): Started hex-0 - Clone Set: base-clone [base-group] - Started: [ hex-0 hex-7 hex-8 ] -- Stopped: [ base-group:3 ] -+ Stopped: [ hex-9 ] - vm-01 (ocf::heartbeat:Xen): Started hex-7 - vm-02 (ocf::heartbeat:Xen): Started hex-8 - vm-03 (ocf::heartbeat:Xen): Stopped -diff --git a/pengine/test10/bug-lf-2574.summary b/pengine/test10/bug-lf-2574.summary -index c8e945f..3024a73 100644 ---- a/pengine/test10/bug-lf-2574.summary -+++ b/pengine/test10/bug-lf-2574.summary -@@ -6,7 +6,7 @@ Online: [ srv01 srv02 srv03 ] - main_rsc2 (ocf::pacemaker:Dummy): Started srv02 - Clone Set: clnDummy1 [prmDummy1] - Started: [ srv02 srv03 ] -- Stopped: [ prmDummy1:2 ] -+ Stopped: [ srv01 ] - Clone Set: clnPingd [prmPingd] - Started: [ srv01 srv02 srv03 ] - -@@ -30,8 +30,8 @@ Online: [ srv01 srv02 srv03 ] - main_rsc2 (ocf::pacemaker:Dummy): Started srv02 - Clone Set: clnDummy1 [prmDummy1] - Started: [ srv02 srv03 ] -- Stopped: [ prmDummy1:2 ] -+ Stopped: [ srv01 ] - Clone Set: clnPingd [prmPingd] - Started: [ srv02 srv03 ] -- Stopped: [ prmPingd:2 ] -+ Stopped: [ srv01 ] - -diff --git a/pengine/test10/bug-lf-2581.summary b/pengine/test10/bug-lf-2581.summary -index 6979839..98b3763 100644 ---- a/pengine/test10/bug-lf-2581.summary -+++ b/pengine/test10/bug-lf-2581.summary -@@ -4,7 +4,7 @@ Online: [ elvis queen ] - - Clone Set: AZ-clone [AZ-group] - Started: [ elvis ] -- Stopped: [ AZ-group:1 ] -+ Stopped: [ queen ] - Resource Group: BC-group-1 - B-1 (ocf::rgk:typeB): Started elvis - C-1 (ocf::rgk:typeC): Started elvis -@@ -13,7 +13,7 @@ Online: [ elvis queen ] - C-2 (ocf::rgk:typeC): Started elvis - Clone Set: stonith-l2network-set [stonith-l2network] - Started: [ elvis ] -- Stopped: [ stonith-l2network:1 ] -+ Stopped: [ queen ] - - Transition Summary: - * Start A:1 (queen) -diff --git a/pengine/test10/bug-lf-2606.summary b/pengine/test10/bug-lf-2606.summary -index 4d1dee3..535b3d9 100644 ---- a/pengine/test10/bug-lf-2606.summary -+++ b/pengine/test10/bug-lf-2606.summary -@@ -39,5 +39,5 @@ OFFLINE: [ node2 ] - rsc2 (ocf::pacemaker:Dummy): Started node1 - Master/Slave Set: ms3 [rsc3] - Slaves: [ node1 ] -- Stopped: [ rsc3:1 ] -+ Stopped: [ node2 ] - -diff --git a/pengine/test10/bug-lf-2619.summary b/pengine/test10/bug-lf-2619.summary -index e6defdc..ad94dd4 100644 ---- a/pengine/test10/bug-lf-2619.summary -+++ b/pengine/test10/bug-lf-2619.summary -@@ -24,7 +24,7 @@ Online: [ act1 act2 act3 sby1 sby2 ] - prmIpPostgreSQLDB3 (ocf::pacemaker:Dummy): Started act3 - prmApPostgreSQLDB3 (ocf::pacemaker:Dummy): Started act3 - Clone Set: clnPingd [prmPingd] -- prmPingd:0 (ocf::pacemaker:ping): Started act1 FAILED -+ prmPingd (ocf::pacemaker:ping): Started act1 FAILED - Started: [ act2 act3 sby1 sby2 ] - - Transition Summary: -@@ -95,5 +95,5 @@ Online: [ act1 act2 act3 sby1 sby2 ] - prmApPostgreSQLDB3 (ocf::pacemaker:Dummy): Started act3 - Clone Set: clnPingd [prmPingd] - Started: [ act2 act3 sby1 sby2 ] -- Stopped: [ prmPingd:4 ] -+ Stopped: [ act1 ] - -diff --git a/pengine/test10/bug-suse-707150.summary b/pengine/test10/bug-suse-707150.summary -index 697c6a6..6d1fcdf 100644 ---- a/pengine/test10/bug-suse-707150.summary -+++ b/pengine/test10/bug-suse-707150.summary -@@ -6,13 +6,13 @@ OFFLINE: [ hex-7 hex-8 ] - vm-00 (ocf::heartbeat:Xen): Stopped - Clone Set: base-clone [base-group] - Resource Group: base-group:0 -- dlm:0 (ocf::pacemaker:controld): Started hex-0 -- o2cb:0 (ocf::ocfs2:o2cb): Stopped -- clvm:0 (ocf::lvm2:clvmd): Stopped -- cmirrord:0 (ocf::lvm2:cmirrord): Stopped -- vg1:0 (ocf::heartbeat:LVM): Stopped -- ocfs2-1:0 (ocf::heartbeat:Filesystem): Stopped -- Stopped: [ base-group:1 base-group:2 base-group:3 ] -+ dlm (ocf::pacemaker:controld): Started hex-0 -+ o2cb (ocf::ocfs2:o2cb): Stopped -+ clvm (ocf::lvm2:clvmd): Stopped -+ cmirrord (ocf::lvm2:cmirrord): Stopped -+ vg1 (ocf::heartbeat:LVM): Stopped -+ ocfs2-1 (ocf::heartbeat:Filesystem): Stopped -+ Stopped: [ hex-7 hex-8 hex-9 ] - vm-01 (ocf::heartbeat:Xen): Stopped - fencing-sbd (stonith:external/sbd): Started hex-9 - dummy1 (ocf::heartbeat:Dummy): Started hex-0 -@@ -52,20 +52,20 @@ OFFLINE: [ hex-7 hex-8 ] - vm-00 (ocf::heartbeat:Xen): Stopped - Clone Set: base-clone [base-group] - Resource Group: base-group:0 -- dlm:0 (ocf::pacemaker:controld): Started hex-0 -- o2cb:0 (ocf::ocfs2:o2cb): Started hex-0 -- clvm:0 (ocf::lvm2:clvmd): Started hex-0 -- cmirrord:0 (ocf::lvm2:cmirrord): Started hex-0 -- vg1:0 (ocf::heartbeat:LVM): Stopped -- ocfs2-1:0 (ocf::heartbeat:Filesystem): Stopped -+ dlm (ocf::pacemaker:controld): Started hex-0 -+ o2cb (ocf::ocfs2:o2cb): Started hex-0 -+ clvm (ocf::lvm2:clvmd): Started hex-0 -+ cmirrord (ocf::lvm2:cmirrord): Started hex-0 -+ vg1 (ocf::heartbeat:LVM): Stopped -+ ocfs2-1 (ocf::heartbeat:Filesystem): Stopped - Resource Group: base-group:1 -- dlm:1 (ocf::pacemaker:controld): Started hex-9 -- o2cb:1 (ocf::ocfs2:o2cb): Started hex-9 -- clvm:1 (ocf::lvm2:clvmd): Started hex-9 -- cmirrord:1 (ocf::lvm2:cmirrord): Started hex-9 -- vg1:1 (ocf::heartbeat:LVM): Stopped -- ocfs2-1:1 (ocf::heartbeat:Filesystem): Stopped -- Stopped: [ base-group:2 base-group:3 ] -+ dlm (ocf::pacemaker:controld): Started hex-9 -+ o2cb (ocf::ocfs2:o2cb): Started hex-9 -+ clvm (ocf::lvm2:clvmd): Started hex-9 -+ cmirrord (ocf::lvm2:cmirrord): Started hex-9 -+ vg1 (ocf::heartbeat:LVM): Stopped -+ ocfs2-1 (ocf::heartbeat:Filesystem): Stopped -+ Stopped: [ hex-7 hex-8 ] - vm-01 (ocf::heartbeat:Xen): Stopped - fencing-sbd (stonith:external/sbd): Started hex-9 - dummy1 (ocf::heartbeat:Dummy): Started hex-0 -diff --git a/pengine/test10/clone-anon-dup.dot b/pengine/test10/clone-anon-dup.dot -index 342ad3a..85c7fcc 100644 ---- a/pengine/test10/clone-anon-dup.dot -+++ b/pengine/test10/clone-anon-dup.dot -@@ -1,12 +1,6 @@ - digraph "g" { - "all_stopped" [ style=bold color="green" fontcolor="orange" ] --"apache2:0_delete_0 wc01" [ style=bold color="green" fontcolor="black"] --"apache2:0_delete_0 wc02" [ style=bold color="green" fontcolor="black"] --"apache2:0_delete_0 wc03" [ style=bold color="green" fontcolor="black"] - "apache2:0_stop_0 wc02" -> "all_stopped" [ style = bold] --"apache2:0_stop_0 wc02" -> "apache2:0_delete_0 wc01" [ style = bold] --"apache2:0_stop_0 wc02" -> "apache2:0_delete_0 wc02" [ style = bold] --"apache2:0_stop_0 wc02" -> "apache2:0_delete_0 wc03" [ style = bold] - "apache2:0_stop_0 wc02" -> "group_webservice:2_stopped_0" [ style = bold] - "apache2:0_stop_0 wc02" [ style=bold color="green" fontcolor="black"] - "clone_webservice_stop_0" -> "clone_webservice_stopped_0" [ style = bold] -diff --git a/pengine/test10/clone-anon-dup.exp b/pengine/test10/clone-anon-dup.exp -index e06b55a..69ed601 100644 ---- a/pengine/test10/clone-anon-dup.exp -+++ b/pengine/test10/clone-anon-dup.exp -@@ -1,20 +1,20 @@ - - - -- -+ - - - - - - -- -+ - - - - - -- -+ - - - -@@ -23,7 +23,7 @@ - - - -- -+ - - - -@@ -32,7 +32,7 @@ - - - -- -+ - - - -@@ -41,164 +41,125 @@ - - - -- -+ - - - - - -- -+ - - -- -+ - - - - - -- -+ - - - - - -- -+ - - - - - -- -+ - - - - - - -- -+ - - -- -+ - - - -- -+ - -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -+ - - - - - -- -+ - - -- -+ - - - -- -+ - -- -+ - - - - - -- -+ - -- -+ - - - - - -- -+ - - - -- -+ - -- -+ - - - - - -- -+ - - - -- -+ - -- -+ - - - - - -- -+ - - - -- -+ - -- -+ - - - - - -- -+ - - -- -+ - - -- -+ - - - -- -+ - - - -@@ -206,7 +167,7 @@ - - - -- -+ - - - -diff --git a/pengine/test10/clone-anon-dup.summary b/pengine/test10/clone-anon-dup.summary -index ed127c3..bffbf15 100644 ---- a/pengine/test10/clone-anon-dup.summary -+++ b/pengine/test10/clone-anon-dup.summary -@@ -2,12 +2,13 @@ - Current cluster status: - Online: [ wc01 wc02 wc03 ] - -- stonith-1 (stonith:dummy): Stopped -+ stonith-1 (stonith:dummy): Stopped - Clone Set: clone_webservice [group_webservice] - Resource Group: group_webservice:2 -- fs_www:2 (ocf::heartbeat:Filesystem): ORPHANED Stopped -- apache2:2 (ocf::heartbeat:apache): ORPHANED Started wc02 -+ fs_www (ocf::heartbeat:Filesystem): ORPHANED Stopped -+ apache2 (ocf::heartbeat:apache): ORPHANED Started wc02 - Started: [ wc01 wc02 ] -+ Stopped: [ wc03 ] - - Transition Summary: - * Start stonith-1 (wc01) -@@ -22,9 +23,6 @@ Executing cluster transition: - * Resource action: stonith-1 start on wc01 - * Pseudo action: group_webservice:2_stop_0 - * Resource action: apache2:0 stop on wc02 -- * Resource action: apache2:0 delete on wc03 -- * Resource action: apache2:0 delete on wc01 -- * Resource action: apache2:0 delete on wc02 - * Pseudo action: all_stopped - * Pseudo action: group_webservice:2_stopped_0 - * Pseudo action: clone_webservice_stopped_0 -@@ -35,4 +33,5 @@ Online: [ wc01 wc02 wc03 ] - stonith-1 (stonith:dummy): Started wc01 - Clone Set: clone_webservice [group_webservice] - Started: [ wc01 wc02 ] -+ Stopped: [ wc03 ] - -diff --git a/pengine/test10/clone-anon-failcount.summary b/pengine/test10/clone-anon-failcount.summary -index 1f8c853..7e899b3 100644 ---- a/pengine/test10/clone-anon-failcount.summary -+++ b/pengine/test10/clone-anon-failcount.summary -@@ -23,9 +23,10 @@ Online: [ srv01 srv02 srv03 srv04 ] - prmStonithN4 (stonith:external/ssh): Started srv03 - Clone Set: clnUMgroup01 [clnUmResource] - Resource Group: clnUmResource:0 -- clnUMdummy01:0 (ocf::pacemaker:Dummy): Started srv04 FAILED -- clnUMdummy02:0 (ocf::pacemaker:Dummy): Started srv04 -+ clnUMdummy01 (ocf::pacemaker:Dummy): Started srv04 FAILED -+ clnUMdummy02 (ocf::pacemaker:Dummy): Started srv04 - Started: [ srv01 ] -+ Stopped: [ srv02 srv03 ] - Clone Set: clnPingd [clnPrmPingd] - Started: [ srv01 srv02 srv03 srv04 ] - Clone Set: clnDiskd1 [clnPrmDiskd1] -@@ -105,7 +106,7 @@ Online: [ srv01 srv02 srv03 srv04 ] - prmStonithN4 (stonith:external/ssh): Started srv03 - Clone Set: clnUMgroup01 [clnUmResource] - Started: [ srv04 ] -- Stopped: [ clnUmResource:1 ] -+ Stopped: [ srv01 srv02 srv03 ] - Clone Set: clnPingd [clnPrmPingd] - Started: [ srv01 srv02 srv03 srv04 ] - Clone Set: clnDiskd1 [clnPrmDiskd1] -diff --git a/pengine/test10/clone-anon-probe-1.summary b/pengine/test10/clone-anon-probe-1.summary -index 7567efa..093f59b 100644 ---- a/pengine/test10/clone-anon-probe-1.summary -+++ b/pengine/test10/clone-anon-probe-1.summary -@@ -3,7 +3,7 @@ Current cluster status: - Online: [ mysql-01 mysql-02 ] - - Clone Set: ms-drbd0 [drbd0] -- Stopped: [ drbd0:0 drbd0:1 ] -+ Stopped: [ mysql-01 mysql-02 ] - - Transition Summary: - * Start drbd0:0 (mysql-01) -diff --git a/pengine/test10/clone-anon-probe-2.summary b/pengine/test10/clone-anon-probe-2.summary -index ce278b1..7064e86 100644 ---- a/pengine/test10/clone-anon-probe-2.summary -+++ b/pengine/test10/clone-anon-probe-2.summary -@@ -4,7 +4,7 @@ Online: [ mysql-01 mysql-02 ] - - Clone Set: ms-drbd0 [drbd0] - Started: [ mysql-02 ] -- Stopped: [ drbd0:1 ] -+ Stopped: [ mysql-01 ] - - Transition Summary: - * Start drbd0:1 (mysql-01) -diff --git a/pengine/test10/clone-colocate-instance-1.summary b/pengine/test10/clone-colocate-instance-1.summary -index 4cc23cc..d6eaa4f 100644 ---- a/pengine/test10/clone-colocate-instance-1.summary -+++ b/pengine/test10/clone-colocate-instance-1.summary -@@ -5,7 +5,7 @@ Online: [ alice.demo bob.demo ] - dummy1 (ocf::heartbeat:Dummy): Stopped - dummy2 (ocf::heartbeat:Dummy): Stopped - Clone Set: cl_dummy [dummy] -- Stopped: [ dummy:0 dummy:1 ] -+ Stopped: [ alice.demo bob.demo ] - - Transition Summary: - * Start dummy1 (alice.demo) -diff --git a/pengine/test10/clone-colocate-instance-2.summary b/pengine/test10/clone-colocate-instance-2.summary -index 95a64fd..10b380a 100644 ---- a/pengine/test10/clone-colocate-instance-2.summary -+++ b/pengine/test10/clone-colocate-instance-2.summary -@@ -5,7 +5,7 @@ Online: [ alice.demo bob.demo ] - dummy1 (ocf::heartbeat:Dummy): Stopped - dummy2 (ocf::heartbeat:Dummy): Stopped - Clone Set: cl_dummy [dummy] -- Stopped: [ dummy:0 dummy:1 ] -+ Stopped: [ alice.demo bob.demo ] - - Transition Summary: - * Start dummy1 (bob.demo) -diff --git a/pengine/test10/clone-max-zero.dot b/pengine/test10/clone-max-zero.dot -index f4d11bd..33cec0a 100644 ---- a/pengine/test10/clone-max-zero.dot -+++ b/pengine/test10/clone-max-zero.dot -@@ -11,19 +11,11 @@ digraph "g" { - "dlm-clone_stop_0" -> "dlm:1_stop_0 c001n12" [ style = bold] - "dlm-clone_stop_0" [ style=bold color="green" fontcolor="orange" ] - "dlm-clone_stopped_0" [ style=bold color="green" fontcolor="orange" ] --"dlm:0_delete_0 c001n11" [ style=bold color="green" fontcolor="black"] --"dlm:0_delete_0 c001n12" [ style=bold color="green" fontcolor="black"] - "dlm:0_stop_0 c001n11" -> "all_stopped" [ style = bold] - "dlm:0_stop_0 c001n11" -> "dlm-clone_stopped_0" [ style = bold] --"dlm:0_stop_0 c001n11" -> "dlm:0_delete_0 c001n11" [ style = bold] --"dlm:0_stop_0 c001n11" -> "dlm:0_delete_0 c001n12" [ style = bold] - "dlm:0_stop_0 c001n11" [ style=bold color="green" fontcolor="black"] --"dlm:1_delete_0 c001n11" [ style=bold color="green" fontcolor="black"] --"dlm:1_delete_0 c001n12" [ style=bold color="green" fontcolor="black"] - "dlm:1_stop_0 c001n12" -> "all_stopped" [ style = bold] - "dlm:1_stop_0 c001n12" -> "dlm-clone_stopped_0" [ style = bold] --"dlm:1_stop_0 c001n12" -> "dlm:1_delete_0 c001n11" [ style = bold] --"dlm:1_stop_0 c001n12" -> "dlm:1_delete_0 c001n12" [ style = bold] - "dlm:1_stop_0 c001n12" [ style=bold color="green" fontcolor="black"] - "o2cb-clone_stop_0" -> "o2cb-clone_stopped_0" [ style = bold] - "o2cb-clone_stop_0" -> "o2cb:0_stop_0 c001n11" [ style = bold] -diff --git a/pengine/test10/clone-max-zero.exp b/pengine/test10/clone-max-zero.exp -index 274c117..d071c4b 100644 ---- a/pengine/test10/clone-max-zero.exp -+++ b/pengine/test10/clone-max-zero.exp -@@ -1,233 +1,181 @@ - - - -- -+ - - - - - - -- -+ - - -- -+ - - - - - -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -+ - - - - - - -- -+ - -- -- -- -- -- -- -- -- -- -- - -- -+ - - - -- -+ - -- -+ - - - - - -- -+ - - -- -+ - - -- -+ - - - -- -+ - -- -+ - - - - - -- -+ - - - -- -+ - -- -+ - - - - - - -- -+ - - -- -+ - - - -- -+ - -- -+ - - - - - - -- -+ - - -- -+ - - - -- -+ - -- -+ - - - - - -- -+ - - -- -+ - - -- -+ - - - -- -+ - -- -+ - - - - - -- -+ - - - -- -+ - -- -+ - - - - - - -- -+ - - - -- -+ - -- -+ - - - - - - -- -+ - - - -- -+ - -- -+ - - - - - -- -+ - - -- -+ - - -- -+ - - - -- -+ - -- -+ - - - - - -- -+ - - - -@@ -235,22 +183,22 @@ - - - -- -+ - - -- -+ - - -- -+ - - -- -+ - - -- -+ - - -- -+ - - - -diff --git a/pengine/test10/clone-max-zero.summary b/pengine/test10/clone-max-zero.summary -index 2a0b498..54829c8 100644 ---- a/pengine/test10/clone-max-zero.summary -+++ b/pengine/test10/clone-max-zero.summary -@@ -4,8 +4,8 @@ Online: [ c001n11 c001n12 ] - - fencing (stonith:external/ssh): Started c001n11 - Clone Set: dlm-clone [dlm] -- dlm:0 (ocf::pacemaker:controld): ORPHANED Started c001n12 -- dlm:1 (ocf::pacemaker:controld): ORPHANED Started c001n11 -+ dlm (ocf::pacemaker:controld): ORPHANED Started c001n12 -+ dlm (ocf::pacemaker:controld): ORPHANED Started c001n11 - Clone Set: o2cb-clone [o2cb] - Started: [ c001n11 c001n12 ] - Clone Set: clone-drbd0 [drbd0] -@@ -32,11 +32,7 @@ Executing cluster transition: - * Pseudo action: o2cb-clone_stopped_0 - * Pseudo action: dlm-clone_stop_0 - * Resource action: dlm:1 stop on c001n12 -- * Resource action: dlm:1 delete on c001n11 -- * Resource action: dlm:1 delete on c001n12 - * Resource action: dlm:0 stop on c001n11 -- * Resource action: dlm:0 delete on c001n11 -- * Resource action: dlm:0 delete on c001n12 - * Pseudo action: dlm-clone_stopped_0 - * Pseudo action: all_stopped - -@@ -45,10 +41,11 @@ Online: [ c001n11 c001n12 ] - - fencing (stonith:external/ssh): Started c001n11 - Clone Set: dlm-clone [dlm] -+ Stopped: [ c001n11 c001n12 ] - Clone Set: o2cb-clone [o2cb] -- Stopped: [ o2cb:0 o2cb:1 ] -+ Stopped: [ c001n11 c001n12 ] - Clone Set: clone-drbd0 [drbd0] - Started: [ c001n11 c001n12 ] - Clone Set: c-ocfs2-1 [ocfs2-1] -- Stopped: [ ocfs2-1:0 ocfs2-1:1 ] -+ Stopped: [ c001n11 c001n12 ] - -diff --git a/pengine/test10/clone-no-shuffle.summary b/pengine/test10/clone-no-shuffle.summary -index 618da52..a9f3f7a 100644 ---- a/pengine/test10/clone-no-shuffle.summary -+++ b/pengine/test10/clone-no-shuffle.summary -@@ -5,7 +5,7 @@ Online: [ dktest1sles10 dktest2sles10 ] - stonith-1 (stonith:dummy): Stopped - Master/Slave Set: ms-drbd1 [drbd1] - Masters: [ dktest2sles10 ] -- Stopped: [ drbd1:1 ] -+ Stopped: [ dktest1sles10 ] - testip (ocf::heartbeat:IPaddr2): Started dktest2sles10 - - Transition Summary: -@@ -56,6 +56,6 @@ Online: [ dktest1sles10 dktest2sles10 ] - stonith-1 (stonith:dummy): Started dktest1sles10 - Master/Slave Set: ms-drbd1 [drbd1] - Slaves: [ dktest1sles10 ] -- Stopped: [ drbd1:1 ] -+ Stopped: [ dktest2sles10 ] - testip (ocf::heartbeat:IPaddr2): Stopped - -diff --git a/pengine/test10/clone-order-instance.summary b/pengine/test10/clone-order-instance.summary -index 8df1c50..4946342 100644 ---- a/pengine/test10/clone-order-instance.summary -+++ b/pengine/test10/clone-order-instance.summary -@@ -4,7 +4,7 @@ Online: [ alice.demo bob.demo ] - - dummy1 (ocf::heartbeat:Dummy): Stopped - Clone Set: cl_dummy [dummy] -- Stopped: [ dummy:0 dummy:1 ] -+ Stopped: [ alice.demo bob.demo ] - - Transition Summary: - * Start dummy1 (alice.demo) -diff --git a/pengine/test10/clone-order-primitive.summary b/pengine/test10/clone-order-primitive.summary -index aecc65b..a628ade 100644 ---- a/pengine/test10/clone-order-primitive.summary -+++ b/pengine/test10/clone-order-primitive.summary -@@ -3,7 +3,7 @@ Current cluster status: - Online: [ pcw2058.see.ed.ac.uk pcw2059.see.ed.ac.uk pcw2688.see.ed.ac.uk pcw2709.see.ed.ac.uk ] - - Clone Set: cups_clone [cups_lsb] -- Stopped: [ cups_lsb:0 cups_lsb:1 ] -+ Stopped: [ pcw2058.see.ed.ac.uk pcw2059.see.ed.ac.uk pcw2688.see.ed.ac.uk pcw2709.see.ed.ac.uk ] - smb_lsb (lsb:smb): Stopped - - Transition Summary: -@@ -23,5 +23,6 @@ Online: [ pcw2058.see.ed.ac.uk pcw2059.see.ed.ac.uk pcw2688.see.ed.ac.uk pcw2709 - - Clone Set: cups_clone [cups_lsb] - Started: [ pcw2058.see.ed.ac.uk pcw2059.see.ed.ac.uk ] -+ Stopped: [ pcw2688.see.ed.ac.uk pcw2709.see.ed.ac.uk ] - smb_lsb (lsb:smb): Started pcw2688.see.ed.ac.uk - -diff --git a/pengine/test10/cloned-group.dot b/pengine/test10/cloned-group.dot -index 0d71821..cf6ec27 100644 ---- a/pengine/test10/cloned-group.dot -+++ b/pengine/test10/cloned-group.dot -@@ -1,8 +1,6 @@ - digraph "g" { - "all_stopped" [ style=bold color="green" fontcolor="orange" ] --"apache2:0_delete_0 webcluster01" [ style=bold color="green" fontcolor="black"] - "apache2:0_stop_0 webcluster01" -> "all_stopped" [ style = bold] --"apache2:0_stop_0 webcluster01" -> "apache2:0_delete_0 webcluster01" [ style = bold] - "apache2:0_stop_0 webcluster01" -> "grrr:2_stopped_0" [ style = bold] - "apache2:0_stop_0 webcluster01" [ style=bold color="green" fontcolor="black"] - "apache2:1_monitor_10000 webcluster01" [ style=bold color="green" fontcolor="black" ] -@@ -43,11 +41,9 @@ digraph "g" { - "grrr:2_stop_0" [ style=bold color="green" fontcolor="orange" ] - "grrr:2_stopped_0" -> "apache2_clone_stopped_0" [ style = bold] - "grrr:2_stopped_0" [ style=bold color="green" fontcolor="orange" ] --"mysql-proxy:0_delete_0 webcluster01" [ style=bold color="green" fontcolor="black"] - "mysql-proxy:0_stop_0 webcluster01" -> "all_stopped" [ style = bold] - "mysql-proxy:0_stop_0 webcluster01" -> "apache2:0_stop_0 webcluster01" [ style = bold] - "mysql-proxy:0_stop_0 webcluster01" -> "grrr:2_stopped_0" [ style = bold] --"mysql-proxy:0_stop_0 webcluster01" -> "mysql-proxy:0_delete_0 webcluster01" [ style = bold] - "mysql-proxy:0_stop_0 webcluster01" [ style=bold color="green" fontcolor="black"] - "mysql-proxy:1_monitor_10000 webcluster01" [ style=bold color="green" fontcolor="black" ] - "mysql-proxy:1_start_0 webcluster01" -> "grrr:0_running_0" [ style = bold] -diff --git a/pengine/test10/cloned-group.exp b/pengine/test10/cloned-group.exp -index 0a833b8..1e2ee27 100644 ---- a/pengine/test10/cloned-group.exp -+++ b/pengine/test10/cloned-group.exp -@@ -1,96 +1,96 @@ - - - -- -+ - - - - - -- -+ - - -- -+ - - -- -+ - - - - - -- -+ - - - - - -- -+ - - - - - -- -+ - - - - - -- -+ - - -- -+ - - -- -+ - - - - - -- -+ - - - - - -- -+ - - -- -+ - - - - - -- -+ - - - - - - -- -+ - - -- -+ - - - - - -- -+ - - - - - - -- -+ - - -- -+ - - - -@@ -103,39 +103,39 @@ - - - -- -+ - - - - - -- -+ - - - - - - -- -+ - - -- -+ - - -- -+ - - - - - -- -+ - - - - - - -- -+ - - - -@@ -148,149 +148,123 @@ - - - -- -+ - - -
- - -- -+ - - - - - -- -+ - - -- -+ - - -- -+ - - - - - -- -+ - - - - - -- -+ - - - - - -- -+ - - - - - - -- -+ - - -- -+ - - - - - -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -+ - - - - - - -- -+ - - - -- -+ - -- -+ - - - - - -- -+ - - -- -+ - - -- -+ - - - -- -+ - -- -+ - - - - - -- -+ - -- -+ - - - - - -- -+ - - -- -+ - - - -- -+ - -- -+ - - - - - -- -+ - - - -- -+ - - - -@@ -298,16 +272,16 @@ - - - -- -+ - - -- -+ - - -- -+ - - -- -+ - - - -diff --git a/pengine/test10/cloned-group.summary b/pengine/test10/cloned-group.summary -index 1ec8e79..7d64be4 100644 ---- a/pengine/test10/cloned-group.summary -+++ b/pengine/test10/cloned-group.summary -@@ -5,10 +5,10 @@ OFFLINE: [ webcluster02 ] - - Clone Set: apache2_clone [grrr] - Resource Group: grrr:2 -- apache2:2 (ocf::heartbeat:apache): ORPHANED Started webcluster01 -- mysql-proxy:2 (lsb:mysql-proxy): ORPHANED Started webcluster01 -+ apache2 (ocf::heartbeat:apache): ORPHANED Started webcluster01 -+ mysql-proxy (lsb:mysql-proxy): ORPHANED Started webcluster01 - Started: [ webcluster01 ] -- Stopped: [ grrr:1 ] -+ Stopped: [ webcluster02 ] - - Transition Summary: - * Restart apache2:0 (Started webcluster01) -@@ -22,10 +22,8 @@ Executing cluster transition: - * Resource action: mysql-proxy:1 stop on webcluster01 - * Pseudo action: grrr:2_stop_0 - * Resource action: mysql-proxy:0 stop on webcluster01 -- * Resource action: mysql-proxy:0 delete on webcluster01 - * Resource action: apache2:1 stop on webcluster01 - * Resource action: apache2:0 stop on webcluster01 -- * Resource action: apache2:0 delete on webcluster01 - * Pseudo action: all_stopped - * Pseudo action: grrr:0_stopped_0 - * Pseudo action: grrr:2_stopped_0 -@@ -45,5 +43,5 @@ OFFLINE: [ webcluster02 ] - - Clone Set: apache2_clone [grrr] - Started: [ webcluster01 ] -- Stopped: [ grrr:1 ] -+ Stopped: [ webcluster02 ] - -diff --git a/pengine/test10/coloc-clone-stays-active.summary b/pengine/test10/coloc-clone-stays-active.summary -index a00c775..b5edc92 100644 ---- a/pengine/test10/coloc-clone-stays-active.summary -+++ b/pengine/test10/coloc-clone-stays-active.summary -@@ -29,7 +29,7 @@ Online: [ s01-0 s01-1 ] - Masters: [ s01-1 ] - Slaves: [ s01-0 ] - Clone Set: cl-o2cb [o2cb] -- Stopped: [ o2cb:0 o2cb:1 ] -+ Stopped: [ s01-0 s01-1 ] - Master/Slave Set: ms-drbd-s01-service [drbd-s01-service] - Masters: [ s01-0 s01-1 ] - Clone Set: cl-s01-service-fs [s01-service-fs] -@@ -37,7 +37,7 @@ Online: [ s01-0 s01-1 ] - Clone Set: cl-ietd [ietd] - Started: [ s01-0 s01-1 ] - Clone Set: cl-dhcpd [dhcpd] -- Stopped: [ dhcpd:0 dhcpd:1 ] -+ Stopped: [ s01-0 s01-1 ] - Resource Group: http-server - vip-233 (ocf::heartbeat:IPaddr2): Started s01-0 - nginx (lsb:nginx): Stopped -@@ -73,7 +73,7 @@ Online: [ s01-0 s01-1 ] - Master/Slave Set: ms-drbd-vds-tftpboot [drbd-vds-tftpboot] - Masters: [ s01-0 s01-1 ] - Clone Set: cl-vds-tftpboot-fs [vds-tftpboot-fs] -- Stopped: [ vds-tftpboot-fs:0 vds-tftpboot-fs:1 ] -+ Stopped: [ s01-0 s01-1 ] - Clone Set: cl-gfs2 [gfs2] - Started: [ s01-0 s01-1 ] - Master/Slave Set: ms-drbd-vds-http [drbd-vds-http] -@@ -137,7 +137,7 @@ Online: [ s01-0 s01-1 ] - Masters: [ s01-1 ] - Slaves: [ s01-0 ] - Clone Set: cl-o2cb [o2cb] -- Stopped: [ o2cb:0 o2cb:1 ] -+ Stopped: [ s01-0 s01-1 ] - Master/Slave Set: ms-drbd-s01-service [drbd-s01-service] - Masters: [ s01-0 s01-1 ] - Clone Set: cl-s01-service-fs [s01-service-fs] -@@ -145,7 +145,7 @@ Online: [ s01-0 s01-1 ] - Clone Set: cl-ietd [ietd] - Started: [ s01-0 s01-1 ] - Clone Set: cl-dhcpd [dhcpd] -- Stopped: [ dhcpd:0 dhcpd:1 ] -+ Stopped: [ s01-0 s01-1 ] - Resource Group: http-server - vip-233 (ocf::heartbeat:IPaddr2): Started s01-0 - nginx (lsb:nginx): Stopped -@@ -181,7 +181,7 @@ Online: [ s01-0 s01-1 ] - Master/Slave Set: ms-drbd-vds-tftpboot [drbd-vds-tftpboot] - Masters: [ s01-0 s01-1 ] - Clone Set: cl-vds-tftpboot-fs [vds-tftpboot-fs] -- Stopped: [ vds-tftpboot-fs:0 vds-tftpboot-fs:1 ] -+ Stopped: [ s01-0 s01-1 ] - Clone Set: cl-gfs2 [gfs2] - Started: [ s01-0 s01-1 ] - Master/Slave Set: ms-drbd-vds-http [drbd-vds-http] -diff --git a/pengine/test10/colocate-primitive-with-clone.summary b/pengine/test10/colocate-primitive-with-clone.summary -index 41f95ea..5e4c511 100644 ---- a/pengine/test10/colocate-primitive-with-clone.summary -+++ b/pengine/test10/colocate-primitive-with-clone.summary -@@ -38,19 +38,19 @@ Online: [ srv01 srv02 srv03 srv04 ] - prmStonithN4 (stonith:external/ssh): Started srv03 - Clone Set: clnUMgroup01 [clnUmResource] - Started: [ srv04 ] -- Stopped: [ clnUmResource:1 ] -+ Stopped: [ srv01 srv02 srv03 ] - Clone Set: clnPingd [clnPrmPingd] - Started: [ srv02 srv03 srv04 ] -- Stopped: [ clnPrmPingd:3 ] -+ Stopped: [ srv01 ] - Clone Set: clnDiskd1 [clnPrmDiskd1] - Started: [ srv02 srv03 srv04 ] -- Stopped: [ clnPrmDiskd1:3 ] -+ Stopped: [ srv01 ] - Clone Set: clnG3dummy1 [clnG3dummy01] - Started: [ srv02 srv03 srv04 ] -- Stopped: [ clnG3dummy01:3 ] -+ Stopped: [ srv01 ] - Clone Set: clnG3dummy2 [clnG3dummy02] - Started: [ srv02 srv03 srv04 ] -- Stopped: [ clnG3dummy02:3 ] -+ Stopped: [ srv01 ] - - Transition Summary: - * Start UmVIPcheck (srv04) -@@ -109,17 +109,17 @@ Online: [ srv01 srv02 srv03 srv04 ] - prmStonithN4 (stonith:external/ssh): Started srv03 - Clone Set: clnUMgroup01 [clnUmResource] - Started: [ srv04 ] -- Stopped: [ clnUmResource:1 ] -+ Stopped: [ srv01 srv02 srv03 ] - Clone Set: clnPingd [clnPrmPingd] - Started: [ srv02 srv03 srv04 ] -- Stopped: [ clnPrmPingd:3 ] -+ Stopped: [ srv01 ] - Clone Set: clnDiskd1 [clnPrmDiskd1] - Started: [ srv02 srv03 srv04 ] -- Stopped: [ clnPrmDiskd1:3 ] -+ Stopped: [ srv01 ] - Clone Set: clnG3dummy1 [clnG3dummy01] - Started: [ srv02 srv03 srv04 ] -- Stopped: [ clnG3dummy01:3 ] -+ Stopped: [ srv01 ] - Clone Set: clnG3dummy2 [clnG3dummy02] - Started: [ srv02 srv03 srv04 ] -- Stopped: [ clnG3dummy02:3 ] -+ Stopped: [ srv01 ] - -diff --git a/pengine/test10/colocation_constraint_stops_master.summary b/pengine/test10/colocation_constraint_stops_master.summary -index c052861..1a80c4f 100644 ---- a/pengine/test10/colocation_constraint_stops_master.summary -+++ b/pengine/test10/colocation_constraint_stops_master.summary -@@ -4,6 +4,7 @@ Online: [ fc16-builder fc16-builder2 ] - - Master/Slave Set: MASTER_RSC_A [NATIVE_RSC_A] - Masters: [ fc16-builder ] -+ Stopped: [ fc16-builder2 ] - - Transition Summary: - * Demote NATIVE_RSC_A:0 (Master -> Stopped fc16-builder) -@@ -32,5 +33,5 @@ Revised cluster status: - Online: [ fc16-builder fc16-builder2 ] - - Master/Slave Set: MASTER_RSC_A [NATIVE_RSC_A] -- Stopped: [ NATIVE_RSC_A:0 ] -+ Stopped: [ fc16-builder fc16-builder2 ] - -diff --git a/pengine/test10/colocation_constraint_stops_slave.summary b/pengine/test10/colocation_constraint_stops_slave.summary -index 5528308..f928563 100644 ---- a/pengine/test10/colocation_constraint_stops_slave.summary -+++ b/pengine/test10/colocation_constraint_stops_slave.summary -@@ -5,6 +5,7 @@ OFFLINE: [ fc16-builder2 ] - - Master/Slave Set: MASTER_RSC_A [NATIVE_RSC_A] - Slaves: [ fc16-builder ] -+ Stopped: [ fc16-builder2 ] - NATIVE_RSC_B (ocf::pacemaker:Dummy): Started fc16-builder - - Transition Summary: -@@ -28,6 +29,6 @@ Online: [ fc16-builder ] - OFFLINE: [ fc16-builder2 ] - - Master/Slave Set: MASTER_RSC_A [NATIVE_RSC_A] -- Stopped: [ NATIVE_RSC_A:0 ] -+ Stopped: [ fc16-builder fc16-builder2 ] - NATIVE_RSC_B (ocf::pacemaker:Dummy): Stopped - -diff --git a/pengine/test10/date-1.exp b/pengine/test10/date-1.exp -index f3deedf..2a64721 100644 ---- a/pengine/test10/date-1.exp -+++ b/pengine/test10/date-1.exp -@@ -25,7 +25,7 @@ - - - -- -+ - - - -diff --git a/pengine/test10/failcount-block.dot b/pengine/test10/failcount-block.dot -new file mode 100644 -index 0000000..c45ebad ---- /dev/null -+++ b/pengine/test10/failcount-block.dot -@@ -0,0 +1,12 @@ -+digraph "g" { -+"rsc_pcmk-1_clear_failcount pcmk-1" [ style=bold color="green" fontcolor="black"] -+"rsc_pcmk-1_monitor_5000 pcmk-1" [ style=bold color="green" fontcolor="black"] -+"rsc_pcmk-3_clear_failcount pcmk-1" [ style=bold color="green" fontcolor="black"] -+"rsc_pcmk-3_monitor_5000 pcmk-1" [ style=bold color="green" fontcolor="black"] -+"rsc_pcmk-3_start_0 pcmk-1" -> "rsc_pcmk-3_monitor_5000 pcmk-1" [ style = bold] -+"rsc_pcmk-3_start_0 pcmk-1" [ style=bold color="green" fontcolor="black"] -+"rsc_pcmk-4_monitor_5000 pcmk-1" [ style=bold color="green" fontcolor="black"] -+"rsc_pcmk-4_start_0 pcmk-1" -> "rsc_pcmk-4_monitor_5000 pcmk-1" [ style = bold] -+"rsc_pcmk-4_start_0 pcmk-1" [ style=bold color="green" fontcolor="black"] -+"rsc_pcmk-5_clear_failcount pcmk-1" [ style=bold color="green" fontcolor="black"] -+} -diff --git a/pengine/test10/failcount-block.exp b/pengine/test10/failcount-block.exp -new file mode 100644 -index 0000000..9ac63a2 ---- /dev/null -+++ b/pengine/test10/failcount-block.exp -@@ -0,0 +1,83 @@ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -diff --git a/pengine/test10/failcount-block.scores b/pengine/test10/failcount-block.scores -new file mode 100644 -index 0000000..09896aa ---- /dev/null -+++ b/pengine/test10/failcount-block.scores -@@ -0,0 +1,11 @@ -+Allocation scores: -+native_color: rsc_pcmk-1 allocation score on pcmk-1: 0 -+native_color: rsc_pcmk-1 allocation score on pcmk-4: 0 -+native_color: rsc_pcmk-2 allocation score on pcmk-1: INFINITY -+native_color: rsc_pcmk-2 allocation score on pcmk-4: 0 -+native_color: rsc_pcmk-3 allocation score on pcmk-1: 0 -+native_color: rsc_pcmk-3 allocation score on pcmk-4: 0 -+native_color: rsc_pcmk-4 allocation score on pcmk-1: 0 -+native_color: rsc_pcmk-4 allocation score on pcmk-4: 0 -+native_color: rsc_pcmk-5 allocation score on pcmk-1: 0 -+native_color: rsc_pcmk-5 allocation score on pcmk-4: 0 -diff --git a/pengine/test10/failcount-block.summary b/pengine/test10/failcount-block.summary -new file mode 100644 -index 0000000..93a2022 ---- /dev/null -+++ b/pengine/test10/failcount-block.summary -@@ -0,0 +1,35 @@ -+ -+Current cluster status: -+Online: [ pcmk-1 ] -+OFFLINE: [ pcmk-4 ] -+ -+ rsc_pcmk-1 (ocf::heartbeat:IPaddr2): Started pcmk-1 -+ rsc_pcmk-2 (ocf::heartbeat:IPaddr2): Started pcmk-1 (unmanaged) FAILED -+ rsc_pcmk-3 (ocf::heartbeat:IPaddr2): Stopped -+ rsc_pcmk-4 (ocf::heartbeat:IPaddr2): Stopped -+ rsc_pcmk-5 (ocf::heartbeat:IPaddr2): Started pcmk-1 -+ -+Transition Summary: -+ * Start rsc_pcmk-3 (pcmk-1) -+ * Start rsc_pcmk-4 (pcmk-1) -+ -+Executing cluster transition: -+ * Resource action: rsc_pcmk-1 monitor=5000 on pcmk-1 -+ * Cluster action: clear_failcount for rsc_pcmk-1 on pcmk-1 -+ * Resource action: rsc_pcmk-3 start on pcmk-1 -+ * Cluster action: clear_failcount for rsc_pcmk-3 on pcmk-1 -+ * Resource action: rsc_pcmk-4 start on pcmk-1 -+ * Cluster action: clear_failcount for rsc_pcmk-5 on pcmk-1 -+ * Resource action: rsc_pcmk-3 monitor=5000 on pcmk-1 -+ * Resource action: rsc_pcmk-4 monitor=5000 on pcmk-1 -+ -+Revised cluster status: -+Online: [ pcmk-1 ] -+OFFLINE: [ pcmk-4 ] -+ -+ rsc_pcmk-1 (ocf::heartbeat:IPaddr2): Started pcmk-1 -+ rsc_pcmk-2 (ocf::heartbeat:IPaddr2): Started pcmk-1 (unmanaged) FAILED -+ rsc_pcmk-3 (ocf::heartbeat:IPaddr2): Started pcmk-1 -+ rsc_pcmk-4 (ocf::heartbeat:IPaddr2): Started pcmk-1 -+ rsc_pcmk-5 (ocf::heartbeat:IPaddr2): Started pcmk-1 -+ -diff --git a/pengine/test10/failcount-block.xml b/pengine/test10/failcount-block.xml -new file mode 100644 -index 0000000..12d4937 ---- /dev/null -+++ b/pengine/test10/failcount-block.xml -@@ -0,0 +1,147 @@ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -diff --git a/pengine/test10/failcount.dot b/pengine/test10/failcount.dot -index aa7124e..41966ad 100644 ---- a/pengine/test10/failcount.dot -+++ b/pengine/test10/failcount.dot -@@ -1,22 +1,6 @@ - digraph "g" { --"re-dummy1_delete_0 dresproddns01" [ style=bold color="green" fontcolor="black"] --"re-dummy1_delete_0 dresproddns02" [ style=bold color="green" fontcolor="black"] --"re-dummy2_delete_0 dresproddns01" [ style=bold color="green" fontcolor="black"] --"re-dummy2_delete_0 dresproddns02" [ style=bold color="green" fontcolor="black"] --"re-dummy3_delete_0 dresproddns01" [ style=bold color="green" fontcolor="black"] --"re-dummy3_delete_0 dresproddns02" [ style=bold color="green" fontcolor="black"] --"re-dummy4_delete_0 dresproddns01" [ style=bold color="green" fontcolor="black"] --"re-dummy4_delete_0 dresproddns02" [ style=bold color="green" fontcolor="black"] --"re-dummy_delete_0 dresproddns01" [ style=bold color="green" fontcolor="black"] --"re-dummy_delete_0 dresproddns02" [ style=bold color="green" fontcolor="black"] --"re-monitoring-lv:0_delete_0 dresproddns01" [ style=bold color="green" fontcolor="black"] --"re-monitoring-lv:0_delete_0 dresproddns02" [ style=bold color="green" fontcolor="black"] --"re-monitoring-lv_delete_0 dresproddns01" [ style=bold color="green" fontcolor="black"] --"re-monitoring-lv_delete_0 dresproddns02" [ style=bold color="green" fontcolor="black"] --"re-named-103ns1-ip_delete_0 dresproddns01" [ style=bold color="green" fontcolor="black"] --"re-named-103ns1-ip_delete_0 dresproddns02" [ style=bold color="green" fontcolor="black"] --"re-named-103ns2-ip_delete_0 dresproddns01" [ style=bold color="green" fontcolor="black"] --"re-named-103ns2-ip_delete_0 dresproddns02" [ style=bold color="green" fontcolor="black"] -+"re-named-lsb:0_monitor_10000 dresproddns02" [ style=bold color="green" fontcolor="black"] -+"re-named-lsb:1_monitor_10000 dresproddns01" [ style=bold color="green" fontcolor="black"] - "re-openfire-lsb:0_clear_failcount dresproddns01" [ style=bold color="green" fontcolor="black"] - "re-openfire-lsb:1_clear_failcount dresproddns02" [ style=bold color="green" fontcolor="black"] - } -diff --git a/pengine/test10/failcount.exp b/pengine/test10/failcount.exp -index 396bdb1..9b7234b 100644 ---- a/pengine/test10/failcount.exp -+++ b/pengine/test10/failcount.exp -@@ -19,162 +19,18 @@ - - - -- -- -- -+ -+ -+ - - - - - - -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -+ -+ -+ - - - -diff --git a/pengine/test10/failcount.summary b/pengine/test10/failcount.summary -index 9ee764f..2a8f6a4 100644 ---- a/pengine/test10/failcount.summary -+++ b/pengine/test10/failcount.summary -@@ -28,26 +28,10 @@ Online: [ dresproddns01 dresproddns02 ] - Transition Summary: - - Executing cluster transition: -- * Cluster action: clear_failcount on dresproddns01 -- * Cluster action: clear_failcount on dresproddns02 -- * Resource action: re-named-103ns2-ip delete on dresproddns02 -- * Resource action: re-named-103ns2-ip delete on dresproddns01 -- * Resource action: re-monitoring-lv:0 delete on dresproddns02 -- * Resource action: re-monitoring-lv:0 delete on dresproddns01 -- * Resource action: re-dummy delete on dresproddns02 -- * Resource action: re-dummy delete on dresproddns01 -- * Resource action: re-dummy3 delete on dresproddns02 -- * Resource action: re-dummy3 delete on dresproddns01 -- * Resource action: re-dummy4 delete on dresproddns02 -- * Resource action: re-dummy4 delete on dresproddns01 -- * Resource action: re-dummy1 delete on dresproddns02 -- * Resource action: re-dummy1 delete on dresproddns01 -- * Resource action: re-monitoring-lv delete on dresproddns02 -- * Resource action: re-monitoring-lv delete on dresproddns01 -- * Resource action: re-dummy2 delete on dresproddns02 -- * Resource action: re-dummy2 delete on dresproddns01 -- * Resource action: re-named-103ns1-ip delete on dresproddns02 -- * Resource action: re-named-103ns1-ip delete on dresproddns01 -+ * Cluster action: clear_failcount for re-openfire-lsb on dresproddns01 -+ * Cluster action: clear_failcount for re-openfire-lsb on dresproddns02 -+ * Resource action: re-named-lsb:1 monitor=10000 on dresproddns01 -+ * Resource action: re-named-lsb:0 monitor=10000 on dresproddns02 - - Revised cluster status: - Online: [ dresproddns01 dresproddns02 ] -diff --git a/pengine/test10/failcount.xml b/pengine/test10/failcount.xml -index 16c07f2..5ac8a2c 100644 ---- a/pengine/test10/failcount.xml -+++ b/pengine/test10/failcount.xml -@@ -565,13 +565,6 @@ - - - -- -- -- -- -- -- -- - - - -@@ -580,32 +573,17 @@ - - - -- -- -- -- -- -- - - - -- -- -- - - - - -- -- -- - - - - -- -- -- - - - -@@ -621,9 +599,6 @@ - - - -- -- -- - - - -@@ -645,9 +620,6 @@ - - - -- -- -- - - - -@@ -711,9 +683,6 @@ - - - -- -- -- - - - -@@ -743,34 +712,16 @@ - - - -- -- -- - - - - -- -- -- -- -- -- - - - -- -- -- - - - -- -- -- -- -- -- - - - -@@ -787,12 +738,6 @@ - - - -- -- -- -- -- -- - - - -diff --git a/pengine/test10/group14.summary b/pengine/test10/group14.summary -index 0f6b089..8bea277 100644 ---- a/pengine/test10/group14.summary -+++ b/pengine/test10/group14.summary -@@ -17,7 +17,7 @@ OFFLINE: [ c001n02 c001n03 c001n04 c001n05 ] - rsc_c001n06 (ocf::heartbeat:IPaddr): Stopped - rsc_c001n07 (ocf::heartbeat:IPaddr): Stopped - Clone Set: DoFencing [child_DoFencing] -- Stopped: [ child_DoFencing:0 child_DoFencing:1 child_DoFencing:2 child_DoFencing:3 child_DoFencing:4 child_DoFencing:5 ] -+ Stopped: [ c001n02 c001n03 c001n04 c001n05 c001n06 c001n07 ] - Master/Slave Set: master_rsc_1 [ocf_msdummy] (unique) - ocf_msdummy:0 (ocf::heartbeat:Stateful): Stopped - ocf_msdummy:1 (ocf::heartbeat:Stateful): Stopped -@@ -84,7 +84,7 @@ OFFLINE: [ c001n02 c001n03 c001n04 c001n05 ] - rsc_c001n07 (ocf::heartbeat:IPaddr): Stopped - Clone Set: DoFencing [child_DoFencing] - Started: [ c001n06 c001n07 ] -- Stopped: [ child_DoFencing:2 child_DoFencing:3 child_DoFencing:4 child_DoFencing:5 ] -+ Stopped: [ c001n02 c001n03 c001n04 c001n05 ] - Master/Slave Set: master_rsc_1 [ocf_msdummy] (unique) - ocf_msdummy:0 (ocf::heartbeat:Stateful): Stopped - ocf_msdummy:1 (ocf::heartbeat:Stateful): Stopped -diff --git a/pengine/test10/history-1.summary b/pengine/test10/history-1.summary -index 8add326..6ae03e2 100644 ---- a/pengine/test10/history-1.summary -+++ b/pengine/test10/history-1.summary -@@ -5,7 +5,7 @@ OFFLINE: [ pcmk-4 ] - - Clone Set: Fencing [FencingChild] - Started: [ pcmk-1 pcmk-2 pcmk-3 ] -- Stopped: [ FencingChild:3 ] -+ Stopped: [ pcmk-4 ] - Resource Group: group-1 - r192.168.101.181 (ocf::heartbeat:IPaddr): Stopped - r192.168.101.182 (ocf::heartbeat:IPaddr): Stopped -@@ -18,10 +18,10 @@ OFFLINE: [ pcmk-4 ] - migrator (ocf::pacemaker:Dummy): Started pcmk-1 - Clone Set: Connectivity [ping-1] - Started: [ pcmk-1 pcmk-2 pcmk-3 ] -- Stopped: [ ping-1:3 ] -+ Stopped: [ pcmk-4 ] - Master/Slave Set: master-1 [stateful-1] - Slaves: [ pcmk-1 pcmk-2 pcmk-3 ] -- Stopped: [ stateful-1:3 ] -+ Stopped: [ pcmk-4 ] - - Transition Summary: - -@@ -33,7 +33,7 @@ OFFLINE: [ pcmk-4 ] - - Clone Set: Fencing [FencingChild] - Started: [ pcmk-1 pcmk-2 pcmk-3 ] -- Stopped: [ FencingChild:3 ] -+ Stopped: [ pcmk-4 ] - Resource Group: group-1 - r192.168.101.181 (ocf::heartbeat:IPaddr): Stopped - r192.168.101.182 (ocf::heartbeat:IPaddr): Stopped -@@ -46,8 +46,8 @@ OFFLINE: [ pcmk-4 ] - migrator (ocf::pacemaker:Dummy): Started pcmk-1 - Clone Set: Connectivity [ping-1] - Started: [ pcmk-1 pcmk-2 pcmk-3 ] -- Stopped: [ ping-1:3 ] -+ Stopped: [ pcmk-4 ] - Master/Slave Set: master-1 [stateful-1] - Slaves: [ pcmk-1 pcmk-2 pcmk-3 ] -- Stopped: [ stateful-1:3 ] -+ Stopped: [ pcmk-4 ] - -diff --git a/pengine/test10/honor_stonith_rsc_order1.summary b/pengine/test10/honor_stonith_rsc_order1.summary -index c9b0f57..a51aaa1 100644 ---- a/pengine/test10/honor_stonith_rsc_order1.summary -+++ b/pengine/test10/honor_stonith_rsc_order1.summary -@@ -3,7 +3,7 @@ Current cluster status: - Online: [ fc16-builder ] - - Clone Set: S_CLONE [S_A] -- Stopped: [ S_A:0 ] -+ Stopped: [ fc16-builder ] - Resource Group: S_GROUP - S_B (stonith:fence_xvm): Stopped - A (ocf::pacemaker:Dummy): Stopped -diff --git a/pengine/test10/honor_stonith_rsc_order2.summary b/pengine/test10/honor_stonith_rsc_order2.summary -index 3ff043d..983ff77 100644 ---- a/pengine/test10/honor_stonith_rsc_order2.summary -+++ b/pengine/test10/honor_stonith_rsc_order2.summary -@@ -3,7 +3,7 @@ Current cluster status: - Online: [ fc16-builder ] - - Clone Set: S_CLONE [S_A] -- Stopped: [ S_A:0 ] -+ Stopped: [ fc16-builder ] - Resource Group: S_GROUP - S_B (stonith:fence_xvm): Stopped - S_C (stonith:fence_xvm): Stopped -diff --git a/pengine/test10/honor_stonith_rsc_order3.summary b/pengine/test10/honor_stonith_rsc_order3.summary -index 14a37d6..12a9c6b 100644 ---- a/pengine/test10/honor_stonith_rsc_order3.summary -+++ b/pengine/test10/honor_stonith_rsc_order3.summary -@@ -3,9 +3,9 @@ Current cluster status: - Online: [ fc16-builder ] - - Clone Set: S_CLONE [S_A] -- Stopped: [ S_A:0 ] -+ Stopped: [ fc16-builder ] - Clone Set: S_CLONE2 [S_GROUP] -- Stopped: [ S_GROUP:0 ] -+ Stopped: [ fc16-builder ] - A (ocf::pacemaker:Dummy): Stopped - - Transition Summary: -diff --git a/pengine/test10/ignore_stonith_rsc_order3.summary b/pengine/test10/ignore_stonith_rsc_order3.summary -index b671f29..1f1a71e 100644 ---- a/pengine/test10/ignore_stonith_rsc_order3.summary -+++ b/pengine/test10/ignore_stonith_rsc_order3.summary -@@ -3,7 +3,7 @@ Current cluster status: - Online: [ fc16-builder ] - - Clone Set: S_CLONE [S_A] -- Stopped: [ S_A:0 ] -+ Stopped: [ fc16-builder ] - Resource Group: MIXED_GROUP - A (ocf::pacemaker:Dummy): Stopped - S_B (stonith:fence_xvm): Stopped -diff --git a/pengine/test10/ignore_stonith_rsc_order4.summary b/pengine/test10/ignore_stonith_rsc_order4.summary -index fb0fa54..06aa292 100644 ---- a/pengine/test10/ignore_stonith_rsc_order4.summary -+++ b/pengine/test10/ignore_stonith_rsc_order4.summary -@@ -3,9 +3,9 @@ Current cluster status: - Online: [ fc16-builder ] - - Clone Set: S_CLONE [S_A] -- Stopped: [ S_A:0 ] -+ Stopped: [ fc16-builder ] - Clone Set: S_CLONE2 [MIXED_GROUP] -- Stopped: [ MIXED_GROUP:0 ] -+ Stopped: [ fc16-builder ] - - Transition Summary: - * Start S_A:0 (fc16-builder) -diff --git a/pengine/test10/inc10.summary b/pengine/test10/inc10.summary -index cb03155..77552e7 100644 ---- a/pengine/test10/inc10.summary -+++ b/pengine/test10/inc10.summary -@@ -38,8 +38,8 @@ Online: [ xen-1 xen-3 xen-4 ] - - Clone Set: DoFencing [child_DoFencing] - Started: [ xen-1 xen-3 xen-4 ] -- Stopped: [ child_DoFencing:3 ] -+ Stopped: [ xen-2 ] - Clone Set: ocfs2-clone [ocfs2] - Started: [ xen-1 xen-3 xen-4 ] -- Stopped: [ ocfs2:3 ] -+ Stopped: [ xen-2 ] - -diff --git a/pengine/test10/inc12.summary b/pengine/test10/inc12.summary -index 7171c08..af2315d 100644 ---- a/pengine/test10/inc12.summary -+++ b/pengine/test10/inc12.summary -@@ -16,7 +16,7 @@ Online: [ c001n02 c001n03 c001n04 c001n05 c001n06 c001n07 ] - rsc_c001n07 (ocf::heartbeat:IPaddr): Started c001n07 - Clone Set: DoFencing [child_DoFencing] - Started: [ c001n02 c001n04 c001n05 c001n06 c001n07 ] -- Stopped: [ child_DoFencing:5 ] -+ Stopped: [ c001n03 ] - Master/Slave Set: master_rsc_1 [ocf_msdummy] (unique) - ocf_msdummy:0 (ocf::heartbeat:Stateful): Stopped - ocf_msdummy:1 (ocf::heartbeat:Stateful): Stopped -@@ -114,7 +114,7 @@ Online: [ c001n02 c001n03 c001n04 c001n05 c001n06 c001n07 ] - rsc_c001n06 (ocf::heartbeat:IPaddr): Stopped - rsc_c001n07 (ocf::heartbeat:IPaddr): Stopped - Clone Set: DoFencing [child_DoFencing] -- Stopped: [ child_DoFencing:0 child_DoFencing:1 child_DoFencing:2 child_DoFencing:3 child_DoFencing:4 child_DoFencing:5 ] -+ Stopped: [ c001n02 c001n03 c001n04 c001n05 c001n06 c001n07 ] - Master/Slave Set: master_rsc_1 [ocf_msdummy] (unique) - ocf_msdummy:0 (ocf::heartbeat:Stateful): Stopped - ocf_msdummy:1 (ocf::heartbeat:Stateful): Stopped -diff --git a/pengine/test10/inc6.summary b/pengine/test10/inc6.summary -index ea5f085..cf84c1f 100644 ---- a/pengine/test10/inc6.summary -+++ b/pengine/test10/inc6.summary -@@ -4,14 +4,12 @@ Online: [ node1 node2 ] - - Clone Set: rsc1 [child_rsc1] - Started: [ node1 node2 ] -- Stopped: [ child_rsc1:2 ] - Clone Set: rsc2 [child_rsc2] (unique) - child_rsc2:0 (heartbeat:apache): Started node1 - child_rsc2:1 (heartbeat:apache): Started node1 - child_rsc2:2 (heartbeat:apache): Stopped - Clone Set: rsc3 [child_rsc3] - Started: [ node1 node2 ] -- Stopped: [ child_rsc3:2 ] - Clone Set: rsc4 [child_rsc4] (unique) - child_rsc4:0 (heartbeat:apache): Started node1 - child_rsc4:1 (heartbeat:apache): Started node1 -@@ -22,14 +20,12 @@ Online: [ node1 node2 ] - child_rsc5:2 (heartbeat:apache): Stopped - Clone Set: rsc6 [child_rsc6] - Started: [ node1 node2 ] -- Stopped: [ child_rsc6:2 ] - Clone Set: rsc7 [child_rsc7] (unique) - child_rsc7:0 (heartbeat:apache): Started node2 - child_rsc7:1 (heartbeat:apache): Started node2 - child_rsc7:2 (heartbeat:apache): Stopped - Clone Set: rsc8 [child_rsc8] - Started: [ node1 node2 ] -- Stopped: [ child_rsc8:2 ] - - Transition Summary: - * Move child_rsc2:1 (Started node1 -> node2) -@@ -79,14 +75,12 @@ Online: [ node1 node2 ] - - Clone Set: rsc1 [child_rsc1] - Started: [ node1 node2 ] -- Stopped: [ child_rsc1:2 ] - Clone Set: rsc2 [child_rsc2] (unique) - child_rsc2:0 (heartbeat:apache): Started node1 - child_rsc2:1 (heartbeat:apache): Started [ node1 node2 ] - child_rsc2:2 (heartbeat:apache): Stopped - Clone Set: rsc3 [child_rsc3] - Started: [ node1 node2 ] -- Stopped: [ child_rsc3:2 ] - Clone Set: rsc4 [child_rsc4] (unique) - child_rsc4:0 (heartbeat:apache): Started node1 - child_rsc4:1 (heartbeat:apache): Started [ node1 node2 ] -@@ -97,12 +91,10 @@ Online: [ node1 node2 ] - child_rsc5:2 (heartbeat:apache): Stopped - Clone Set: rsc6 [child_rsc6] - Started: [ node1 node2 ] -- Stopped: [ child_rsc6:2 ] - Clone Set: rsc7 [child_rsc7] (unique) - child_rsc7:0 (heartbeat:apache): Started node2 - child_rsc7:1 (heartbeat:apache): Started node1 - child_rsc7:2 (heartbeat:apache): Stopped - Clone Set: rsc8 [child_rsc8] - Started: [ node1 node2 ] -- Stopped: [ child_rsc8:2 ] - -diff --git a/pengine/test10/inc9.dot b/pengine/test10/inc9.dot -index 8e29738..f4d6649 100644 ---- a/pengine/test10/inc9.dot -+++ b/pengine/test10/inc9.dot -@@ -1,20 +1,12 @@ - digraph "g" { - "all_stopped" [ style=bold color="green" fontcolor="orange" ] --"child_rsc1:1_delete_0 node1" [ style=bold color="green" fontcolor="black"] --"child_rsc1:1_delete_0 node2" [ style=bold color="green" fontcolor="black"] - "child_rsc1:1_stop_0 node1" -> "all_stopped" [ style = bold] --"child_rsc1:1_stop_0 node1" -> "child_rsc1:1_delete_0 node1" [ style = bold] --"child_rsc1:1_stop_0 node1" -> "child_rsc1:1_delete_0 node2" [ style = bold] - "child_rsc1:1_stop_0 node1" -> "rsc1_stopped_0" [ style = bold] - "child_rsc1:1_stop_0 node1" [ style=bold color="green" fontcolor="black"] - "child_rsc1:1_stop_0 node2" -> "all_stopped" [ style = bold] --"child_rsc1:1_stop_0 node2" -> "child_rsc1:1_delete_0 node1" [ style = bold] --"child_rsc1:1_stop_0 node2" -> "child_rsc1:1_delete_0 node2" [ style = bold] - "child_rsc1:1_stop_0 node2" -> "rsc1_stopped_0" [ style = bold] - "child_rsc1:1_stop_0 node2" [ style=bold color="green" fontcolor="black"] --"child_rsc1:2_delete_0 node1" [ style=bold color="green" fontcolor="black"] - "child_rsc1:2_stop_0 node1" -> "all_stopped" [ style = bold] --"child_rsc1:2_stop_0 node1" -> "child_rsc1:2_delete_0 node1" [ style = bold] - "child_rsc1:2_stop_0 node1" -> "rsc1_stopped_0" [ style = bold] - "child_rsc1:2_stop_0 node1" [ style=bold color="green" fontcolor="black"] - "rsc1_stop_0" -> "child_rsc1:1_stop_0 node1" [ style = bold] -diff --git a/pengine/test10/inc9.exp b/pengine/test10/inc9.exp -index 8610824..a6d26c9 100644 ---- a/pengine/test10/inc9.exp -+++ b/pengine/test10/inc9.exp -@@ -1,138 +1,73 @@ - - - -- -+ - - - - - - -- -+ - - - - - -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -+ - - - - - - -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -+ - - - -- -+ - -- -+ - - - - - - -- -+ - - - -- -+ - -- -+ - - - - - -- -+ - - -- -+ - - -- -+ - - -- -+ - - - -- -+ - -- -+ - - - - - -- -+ - - - -@@ -140,13 +75,13 @@ - - - -- -+ - - -- -+ - - -- -+ - - - -diff --git a/pengine/test10/inc9.summary b/pengine/test10/inc9.summary -index f7ae82a..5a7f123 100644 ---- a/pengine/test10/inc9.summary -+++ b/pengine/test10/inc9.summary -@@ -3,11 +3,10 @@ Current cluster status: - Online: [ node1 node2 ] - - Clone Set: rsc1 [child_rsc1] -- child_rsc1:5 (heartbeat:apache): ORPHANED Started node1 -- child_rsc1:6 (heartbeat:apache): ORPHANED Started node1 -- child_rsc1:7 (heartbeat:apache): ORPHANED Started node2 -+ child_rsc1 (heartbeat:apache): ORPHANED Started node1 -+ child_rsc1 (heartbeat:apache): ORPHANED Started node1 -+ child_rsc1 (heartbeat:apache): ORPHANED Started node2 - Started: [ node1 node2 ] -- Stopped: [ child_rsc1:2 child_rsc1:3 child_rsc1:4 ] - - Transition Summary: - * Stop child_rsc1:5 (node1) -@@ -17,13 +16,8 @@ Transition Summary: - Executing cluster transition: - * Pseudo action: rsc1_stop_0 - * Resource action: child_rsc1:1 stop on node1 -- * Resource action: child_rsc1:1 delete on node2 -- * Resource action: child_rsc1:1 delete on node1 - * Resource action: child_rsc1:2 stop on node1 -- * Resource action: child_rsc1:2 delete on node1 - * Resource action: child_rsc1:1 stop on node2 -- * Resource action: child_rsc1:1 delete on node2 -- * Resource action: child_rsc1:1 delete on node1 - * Pseudo action: rsc1_stopped_0 - * Pseudo action: all_stopped - -@@ -32,5 +26,4 @@ Online: [ node1 node2 ] - - Clone Set: rsc1 [child_rsc1] - Started: [ node1 node2 ] -- Stopped: [ child_rsc1:2 child_rsc1:3 child_rsc1:4 ] - -diff --git a/pengine/test10/interleave-pseudo-stop.summary b/pengine/test10/interleave-pseudo-stop.summary -index 9ab0a63..7ac882d 100644 ---- a/pengine/test10/interleave-pseudo-stop.summary -+++ b/pengine/test10/interleave-pseudo-stop.summary -@@ -65,14 +65,14 @@ OFFLINE: [ node1 ] - - Clone Set: stonithcloneset [stonithclone] - Started: [ node2 ] -- Stopped: [ stonithclone:1 ] -+ Stopped: [ node1 ] - Clone Set: evmscloneset [evmsclone] - Started: [ node2 ] -- Stopped: [ evmsclone:1 ] -+ Stopped: [ node1 ] - Clone Set: imagestorecloneset [imagestoreclone] - Started: [ node2 ] -- Stopped: [ imagestoreclone:1 ] -+ Stopped: [ node1 ] - Clone Set: configstorecloneset [configstoreclone] - Started: [ node2 ] -- Stopped: [ configstoreclone:1 ] -+ Stopped: [ node1 ] - -diff --git a/pengine/test10/interleave-restart.summary b/pengine/test10/interleave-restart.summary -index ca5a593..32ff592 100644 ---- a/pengine/test10/interleave-restart.summary -+++ b/pengine/test10/interleave-restart.summary -@@ -5,7 +5,7 @@ Online: [ node1 node2 ] - Clone Set: stonithcloneset [stonithclone] - Started: [ node1 node2 ] - Clone Set: evmscloneset [evmsclone] -- evmsclone:1 (ocf::heartbeat:EvmsSCC): Started node1 FAILED -+ evmsclone (ocf::heartbeat:EvmsSCC): Started node1 FAILED - Started: [ node2 ] - Clone Set: imagestorecloneset [imagestoreclone] - Started: [ node1 node2 ] -diff --git a/pengine/test10/interleave-stop.summary b/pengine/test10/interleave-stop.summary -index 042e62d..14aa585 100644 ---- a/pengine/test10/interleave-stop.summary -+++ b/pengine/test10/interleave-stop.summary -@@ -60,14 +60,14 @@ Online: [ node2 ] - - Clone Set: stonithcloneset [stonithclone] - Started: [ node2 ] -- Stopped: [ stonithclone:1 ] -+ Stopped: [ node1 ] - Clone Set: evmscloneset [evmsclone] - Started: [ node2 ] -- Stopped: [ evmsclone:1 ] -+ Stopped: [ node1 ] - Clone Set: imagestorecloneset [imagestoreclone] - Started: [ node2 ] -- Stopped: [ imagestoreclone:1 ] -+ Stopped: [ node1 ] - Clone Set: configstorecloneset [configstoreclone] - Started: [ node2 ] -- Stopped: [ configstoreclone:1 ] -+ Stopped: [ node1 ] - -diff --git a/pengine/test10/load-stopped-loop.summary b/pengine/test10/load-stopped-loop.summary -index c14e05d..e514e82 100644 ---- a/pengine/test10/load-stopped-loop.summary -+++ b/pengine/test10/load-stopped-loop.summary -@@ -14,79 +14,69 @@ Online: [ mgmt01 v03-a v03-b ] - stonith-v03-d (stonith:fence_ipmilan): Stopped - Clone Set: cl-clvmd [clvmd] - Started: [ mgmt01 v03-a v03-b ] -- Stopped: [ clvmd:3 clvmd:4 clvmd:5 clvmd:6 clvmd:7 clvmd:8 ] - Clone Set: cl-dlm [dlm] - Started: [ mgmt01 v03-a v03-b ] -- Stopped: [ dlm:3 dlm:4 dlm:5 dlm:6 dlm:7 dlm:8 ] - Clone Set: cl-iscsid [iscsid] - Started: [ mgmt01 v03-a v03-b ] -- Stopped: [ iscsid:3 iscsid:4 iscsid:5 iscsid:6 iscsid:7 iscsid:8 ] - Clone Set: cl-libvirtd [libvirtd] - Started: [ v03-a v03-b ] -- Stopped: [ libvirtd:2 libvirtd:3 libvirtd:4 libvirtd:5 libvirtd:6 libvirtd:7 ] -+ Stopped: [ mgmt01 ] - Clone Set: cl-multipathd [multipathd] - Started: [ mgmt01 v03-a v03-b ] -- Stopped: [ multipathd:3 multipathd:4 multipathd:5 multipathd:6 multipathd:7 multipathd:8 ] - Clone Set: cl-node-params [node-params] - Started: [ v03-a v03-b ] -- Stopped: [ node-params:2 node-params:3 node-params:4 node-params:5 node-params:6 node-params:7 ] -+ Stopped: [ mgmt01 ] - Clone Set: cl-vlan1-if [vlan1-if] - Started: [ v03-a v03-b ] -- Stopped: [ vlan1-if:2 vlan1-if:3 vlan1-if:4 vlan1-if:5 vlan1-if:6 vlan1-if:7 ] -+ Stopped: [ mgmt01 ] - Clone Set: cl-vlan101-if [vlan101-if] - Started: [ v03-a v03-b ] -- Stopped: [ vlan101-if:2 vlan101-if:3 vlan101-if:4 vlan101-if:5 vlan101-if:6 vlan101-if:7 ] -+ Stopped: [ mgmt01 ] - Clone Set: cl-vlan102-if [vlan102-if] - Started: [ v03-a v03-b ] -- Stopped: [ vlan102-if:2 vlan102-if:3 vlan102-if:4 vlan102-if:5 vlan102-if:6 vlan102-if:7 ] -+ Stopped: [ mgmt01 ] - Clone Set: cl-vlan103-if [vlan103-if] - Started: [ v03-a v03-b ] -- Stopped: [ vlan103-if:2 vlan103-if:3 vlan103-if:4 vlan103-if:5 vlan103-if:6 vlan103-if:7 ] -+ Stopped: [ mgmt01 ] - Clone Set: cl-vlan104-if [vlan104-if] - Started: [ v03-a v03-b ] -- Stopped: [ vlan104-if:2 vlan104-if:3 vlan104-if:4 vlan104-if:5 vlan104-if:6 vlan104-if:7 ] -+ Stopped: [ mgmt01 ] - Clone Set: cl-vlan3-if [vlan3-if] - Started: [ v03-a v03-b ] -- Stopped: [ vlan3-if:2 vlan3-if:3 vlan3-if:4 vlan3-if:5 vlan3-if:6 vlan3-if:7 ] -+ Stopped: [ mgmt01 ] - Clone Set: cl-vlan4-if [vlan4-if] - Started: [ v03-a v03-b ] -- Stopped: [ vlan4-if:2 vlan4-if:3 vlan4-if:4 vlan4-if:5 vlan4-if:6 vlan4-if:7 ] -+ Stopped: [ mgmt01 ] - Clone Set: cl-vlan5-if [vlan5-if] - Started: [ v03-a v03-b ] -- Stopped: [ vlan5-if:2 vlan5-if:3 vlan5-if:4 vlan5-if:5 vlan5-if:6 vlan5-if:7 ] -+ Stopped: [ mgmt01 ] - Clone Set: cl-vlan900-if [vlan900-if] - Started: [ v03-a v03-b ] -- Stopped: [ vlan900-if:2 vlan900-if:3 vlan900-if:4 vlan900-if:5 vlan900-if:6 vlan900-if:7 ] -+ Stopped: [ mgmt01 ] - Clone Set: cl-vlan909-if [vlan909-if] - Started: [ v03-a v03-b ] -- Stopped: [ vlan909-if:2 vlan909-if:3 vlan909-if:4 vlan909-if:5 vlan909-if:6 vlan909-if:7 ] -+ Stopped: [ mgmt01 ] - Clone Set: cl-libvirt-images-fs [libvirt-images-fs] - Started: [ mgmt01 v03-a v03-b ] -- Stopped: [ libvirt-images-fs:3 libvirt-images-fs:4 libvirt-images-fs:5 libvirt-images-fs:6 libvirt-images-fs:7 libvirt-images-fs:8 ] - Clone Set: cl-libvirt-install-fs [libvirt-install-fs] - Started: [ mgmt01 v03-a v03-b ] -- Stopped: [ libvirt-install-fs:3 libvirt-install-fs:4 libvirt-install-fs:5 libvirt-install-fs:6 libvirt-install-fs:7 libvirt-install-fs:8 ] - Clone Set: cl-vds-ok-pool-0-iscsi [vds-ok-pool-0-iscsi] - Started: [ mgmt01 v03-a v03-b ] -- Stopped: [ vds-ok-pool-0-iscsi:3 vds-ok-pool-0-iscsi:4 vds-ok-pool-0-iscsi:5 vds-ok-pool-0-iscsi:6 vds-ok-pool-0-iscsi:7 vds-ok-pool-0-iscsi:8 ] - Clone Set: cl-vds-ok-pool-0-vg [vds-ok-pool-0-vg] - Started: [ mgmt01 v03-a v03-b ] -- Stopped: [ vds-ok-pool-0-vg:3 vds-ok-pool-0-vg:4 vds-ok-pool-0-vg:5 vds-ok-pool-0-vg:6 vds-ok-pool-0-vg:7 vds-ok-pool-0-vg:8 ] - Clone Set: cl-vds-ok-pool-1-iscsi [vds-ok-pool-1-iscsi] - Started: [ mgmt01 v03-a v03-b ] -- Stopped: [ vds-ok-pool-1-iscsi:3 vds-ok-pool-1-iscsi:4 vds-ok-pool-1-iscsi:5 vds-ok-pool-1-iscsi:6 vds-ok-pool-1-iscsi:7 vds-ok-pool-1-iscsi:8 ] - Clone Set: cl-vds-ok-pool-1-vg [vds-ok-pool-1-vg] - Started: [ mgmt01 v03-a v03-b ] -- Stopped: [ vds-ok-pool-1-vg:3 vds-ok-pool-1-vg:4 vds-ok-pool-1-vg:5 vds-ok-pool-1-vg:6 vds-ok-pool-1-vg:7 vds-ok-pool-1-vg:8 ] - Clone Set: cl-libvirt-images-pool [libvirt-images-pool] - Started: [ v03-a v03-b ] -- Stopped: [ libvirt-images-pool:2 libvirt-images-pool:3 libvirt-images-pool:4 libvirt-images-pool:5 libvirt-images-pool:6 libvirt-images-pool:7 ] -+ Stopped: [ mgmt01 ] - Clone Set: cl-vds-ok-pool-0-pool [vds-ok-pool-0-pool] - Started: [ v03-a v03-b ] -- Stopped: [ vds-ok-pool-0-pool:2 vds-ok-pool-0-pool:3 vds-ok-pool-0-pool:4 vds-ok-pool-0-pool:5 vds-ok-pool-0-pool:6 vds-ok-pool-0-pool:7 ] -+ Stopped: [ mgmt01 ] - Clone Set: cl-vds-ok-pool-1-pool [vds-ok-pool-1-pool] - Started: [ v03-a v03-b ] -- Stopped: [ vds-ok-pool-1-pool:2 vds-ok-pool-1-pool:3 vds-ok-pool-1-pool:4 vds-ok-pool-1-pool:5 vds-ok-pool-1-pool:6 vds-ok-pool-1-pool:7 ] -+ Stopped: [ mgmt01 ] - git.vds-ok.com-vm (ocf::vds-ok:VirtualDomain): Stopped - vd01-a.cdev.ttc.prague.cz.vds-ok.com-vm (ocf::vds-ok:VirtualDomain): Started v03-a - vd01-b.cdev.ttc.prague.cz.vds-ok.com-vm (ocf::vds-ok:VirtualDomain): Started v03-a -@@ -109,7 +99,7 @@ Online: [ mgmt01 v03-a v03-b ] - zakaz.transferrus.ru-vm (ocf::vds-ok:VirtualDomain): Stopped - Clone Set: cl-vlan200-if [vlan200-if] - Started: [ v03-a v03-b ] -- Stopped: [ vlan200-if:2 vlan200-if:3 vlan200-if:4 vlan200-if:5 vlan200-if:6 vlan200-if:7 ] -+ Stopped: [ mgmt01 ] - lenny-x32-devel-vm (ocf::vds-ok:VirtualDomain): Started v03-a - dist.express-consult.org-vm (ocf::vds-ok:VirtualDomain): Stopped - eu1.ca-pages.com-vm (ocf::vds-ok:VirtualDomain): Stopped -@@ -121,7 +111,7 @@ Online: [ mgmt01 v03-a v03-b ] - c5-x64-devel.vds-ok.com-vm (ocf::vds-ok:VirtualDomain): Started v03-b - Clone Set: cl-mcast-test-net [mcast-test-net] - Started: [ v03-a v03-b ] -- Stopped: [ mcast-test-net:2 mcast-test-net:3 mcast-test-net:4 mcast-test-net:5 mcast-test-net:6 mcast-test-net:7 ] -+ Stopped: [ mgmt01 ] - dist.fly-uni.org-vm (ocf::vds-ok:VirtualDomain): Stopped - ktstudio.net-vm (ocf::vds-ok:VirtualDomain): Started v03-a - cloudsrv.credo-dialogue.com-vm (ocf::vds-ok:VirtualDomain): Started v03-b -@@ -134,7 +124,7 @@ Online: [ mgmt01 v03-a v03-b ] - lustre04-right.vds-ok.com-vm (ocf::vds-ok:VirtualDomain): Started v03-a - Clone Set: cl-mcast-anbriz-net [mcast-anbriz-net] - Started: [ v03-a v03-b ] -- Stopped: [ mcast-anbriz-net:2 mcast-anbriz-net:3 mcast-anbriz-net:4 mcast-anbriz-net:5 mcast-anbriz-net:6 mcast-anbriz-net:7 ] -+ Stopped: [ mgmt01 ] - gw.anbriz.vds-ok.com-vm (ocf::vds-ok:VirtualDomain): Started v03-b - license.anbriz.vds-ok.com-vm (ocf::vds-ok:VirtualDomain): Started v03-b - terminal.anbriz.vds-ok.com-vm (ocf::vds-ok:VirtualDomain): Stopped -@@ -143,13 +133,13 @@ Online: [ mgmt01 v03-a v03-b ] - test-01.vds-ok.com-vm (ocf::vds-ok:VirtualDomain): Started v03-a - Clone Set: cl-libvirt-qpid [libvirt-qpid] - Started: [ v03-a v03-b ] -- Stopped: [ libvirt-qpid:2 libvirt-qpid:3 libvirt-qpid:4 libvirt-qpid:5 libvirt-qpid:6 libvirt-qpid:7 ] -+ Stopped: [ mgmt01 ] - gw.gleb.vds-ok.com-vm (ocf::vds-ok:VirtualDomain): Stopped - gw.gotin.vds-ok.com-vm (ocf::vds-ok:VirtualDomain): Stopped - terminal0.anbriz.vds-ok.com-vm (ocf::vds-ok:VirtualDomain): Started v03-a - Clone Set: cl-mcast-gleb-net [mcast-gleb-net] - Started: [ v03-a v03-b ] -- Stopped: [ mcast-gleb-net:2 mcast-gleb-net:3 mcast-gleb-net:4 mcast-gleb-net:5 mcast-gleb-net:6 mcast-gleb-net:7 ] -+ Stopped: [ mgmt01 ] - - Transition Summary: - * Reload vds-ok-pool-0-iscsi:0 (Started mgmt01) -@@ -215,79 +205,69 @@ Online: [ mgmt01 v03-a v03-b ] - stonith-v03-d (stonith:fence_ipmilan): Stopped - Clone Set: cl-clvmd [clvmd] - Started: [ mgmt01 v03-a v03-b ] -- Stopped: [ clvmd:3 clvmd:4 clvmd:5 clvmd:6 clvmd:7 clvmd:8 ] - Clone Set: cl-dlm [dlm] - Started: [ mgmt01 v03-a v03-b ] -- Stopped: [ dlm:3 dlm:4 dlm:5 dlm:6 dlm:7 dlm:8 ] - Clone Set: cl-iscsid [iscsid] - Started: [ mgmt01 v03-a v03-b ] -- Stopped: [ iscsid:3 iscsid:4 iscsid:5 iscsid:6 iscsid:7 iscsid:8 ] - Clone Set: cl-libvirtd [libvirtd] - Started: [ v03-a v03-b ] -- Stopped: [ libvirtd:2 libvirtd:3 libvirtd:4 libvirtd:5 libvirtd:6 libvirtd:7 ] -+ Stopped: [ mgmt01 ] - Clone Set: cl-multipathd [multipathd] - Started: [ mgmt01 v03-a v03-b ] -- Stopped: [ multipathd:3 multipathd:4 multipathd:5 multipathd:6 multipathd:7 multipathd:8 ] - Clone Set: cl-node-params [node-params] - Started: [ v03-a v03-b ] -- Stopped: [ node-params:2 node-params:3 node-params:4 node-params:5 node-params:6 node-params:7 ] -+ Stopped: [ mgmt01 ] - Clone Set: cl-vlan1-if [vlan1-if] - Started: [ v03-a v03-b ] -- Stopped: [ vlan1-if:2 vlan1-if:3 vlan1-if:4 vlan1-if:5 vlan1-if:6 vlan1-if:7 ] -+ Stopped: [ mgmt01 ] - Clone Set: cl-vlan101-if [vlan101-if] - Started: [ v03-a v03-b ] -- Stopped: [ vlan101-if:2 vlan101-if:3 vlan101-if:4 vlan101-if:5 vlan101-if:6 vlan101-if:7 ] -+ Stopped: [ mgmt01 ] - Clone Set: cl-vlan102-if [vlan102-if] - Started: [ v03-a v03-b ] -- Stopped: [ vlan102-if:2 vlan102-if:3 vlan102-if:4 vlan102-if:5 vlan102-if:6 vlan102-if:7 ] -+ Stopped: [ mgmt01 ] - Clone Set: cl-vlan103-if [vlan103-if] - Started: [ v03-a v03-b ] -- Stopped: [ vlan103-if:2 vlan103-if:3 vlan103-if:4 vlan103-if:5 vlan103-if:6 vlan103-if:7 ] -+ Stopped: [ mgmt01 ] - Clone Set: cl-vlan104-if [vlan104-if] - Started: [ v03-a v03-b ] -- Stopped: [ vlan104-if:2 vlan104-if:3 vlan104-if:4 vlan104-if:5 vlan104-if:6 vlan104-if:7 ] -+ Stopped: [ mgmt01 ] - Clone Set: cl-vlan3-if [vlan3-if] - Started: [ v03-a v03-b ] -- Stopped: [ vlan3-if:2 vlan3-if:3 vlan3-if:4 vlan3-if:5 vlan3-if:6 vlan3-if:7 ] -+ Stopped: [ mgmt01 ] - Clone Set: cl-vlan4-if [vlan4-if] - Started: [ v03-a v03-b ] -- Stopped: [ vlan4-if:2 vlan4-if:3 vlan4-if:4 vlan4-if:5 vlan4-if:6 vlan4-if:7 ] -+ Stopped: [ mgmt01 ] - Clone Set: cl-vlan5-if [vlan5-if] - Started: [ v03-a v03-b ] -- Stopped: [ vlan5-if:2 vlan5-if:3 vlan5-if:4 vlan5-if:5 vlan5-if:6 vlan5-if:7 ] -+ Stopped: [ mgmt01 ] - Clone Set: cl-vlan900-if [vlan900-if] - Started: [ v03-a v03-b ] -- Stopped: [ vlan900-if:2 vlan900-if:3 vlan900-if:4 vlan900-if:5 vlan900-if:6 vlan900-if:7 ] -+ Stopped: [ mgmt01 ] - Clone Set: cl-vlan909-if [vlan909-if] - Started: [ v03-a v03-b ] -- Stopped: [ vlan909-if:2 vlan909-if:3 vlan909-if:4 vlan909-if:5 vlan909-if:6 vlan909-if:7 ] -+ Stopped: [ mgmt01 ] - Clone Set: cl-libvirt-images-fs [libvirt-images-fs] - Started: [ mgmt01 v03-a v03-b ] -- Stopped: [ libvirt-images-fs:3 libvirt-images-fs:4 libvirt-images-fs:5 libvirt-images-fs:6 libvirt-images-fs:7 libvirt-images-fs:8 ] - Clone Set: cl-libvirt-install-fs [libvirt-install-fs] - Started: [ mgmt01 v03-a v03-b ] -- Stopped: [ libvirt-install-fs:3 libvirt-install-fs:4 libvirt-install-fs:5 libvirt-install-fs:6 libvirt-install-fs:7 libvirt-install-fs:8 ] - Clone Set: cl-vds-ok-pool-0-iscsi [vds-ok-pool-0-iscsi] - Started: [ mgmt01 v03-a v03-b ] -- Stopped: [ vds-ok-pool-0-iscsi:3 vds-ok-pool-0-iscsi:4 vds-ok-pool-0-iscsi:5 vds-ok-pool-0-iscsi:6 vds-ok-pool-0-iscsi:7 vds-ok-pool-0-iscsi:8 ] - Clone Set: cl-vds-ok-pool-0-vg [vds-ok-pool-0-vg] - Started: [ mgmt01 v03-a v03-b ] -- Stopped: [ vds-ok-pool-0-vg:3 vds-ok-pool-0-vg:4 vds-ok-pool-0-vg:5 vds-ok-pool-0-vg:6 vds-ok-pool-0-vg:7 vds-ok-pool-0-vg:8 ] - Clone Set: cl-vds-ok-pool-1-iscsi [vds-ok-pool-1-iscsi] - Started: [ mgmt01 v03-a v03-b ] -- Stopped: [ vds-ok-pool-1-iscsi:3 vds-ok-pool-1-iscsi:4 vds-ok-pool-1-iscsi:5 vds-ok-pool-1-iscsi:6 vds-ok-pool-1-iscsi:7 vds-ok-pool-1-iscsi:8 ] - Clone Set: cl-vds-ok-pool-1-vg [vds-ok-pool-1-vg] - Started: [ mgmt01 v03-a v03-b ] -- Stopped: [ vds-ok-pool-1-vg:3 vds-ok-pool-1-vg:4 vds-ok-pool-1-vg:5 vds-ok-pool-1-vg:6 vds-ok-pool-1-vg:7 vds-ok-pool-1-vg:8 ] - Clone Set: cl-libvirt-images-pool [libvirt-images-pool] - Started: [ v03-a v03-b ] -- Stopped: [ libvirt-images-pool:2 libvirt-images-pool:3 libvirt-images-pool:4 libvirt-images-pool:5 libvirt-images-pool:6 libvirt-images-pool:7 ] -+ Stopped: [ mgmt01 ] - Clone Set: cl-vds-ok-pool-0-pool [vds-ok-pool-0-pool] - Started: [ v03-a v03-b ] -- Stopped: [ vds-ok-pool-0-pool:2 vds-ok-pool-0-pool:3 vds-ok-pool-0-pool:4 vds-ok-pool-0-pool:5 vds-ok-pool-0-pool:6 vds-ok-pool-0-pool:7 ] -+ Stopped: [ mgmt01 ] - Clone Set: cl-vds-ok-pool-1-pool [vds-ok-pool-1-pool] - Started: [ v03-a v03-b ] -- Stopped: [ vds-ok-pool-1-pool:2 vds-ok-pool-1-pool:3 vds-ok-pool-1-pool:4 vds-ok-pool-1-pool:5 vds-ok-pool-1-pool:6 vds-ok-pool-1-pool:7 ] -+ Stopped: [ mgmt01 ] - git.vds-ok.com-vm (ocf::vds-ok:VirtualDomain): Stopped - vd01-a.cdev.ttc.prague.cz.vds-ok.com-vm (ocf::vds-ok:VirtualDomain): Started v03-a - vd01-b.cdev.ttc.prague.cz.vds-ok.com-vm (ocf::vds-ok:VirtualDomain): Started v03-a -@@ -310,7 +290,7 @@ Online: [ mgmt01 v03-a v03-b ] - zakaz.transferrus.ru-vm (ocf::vds-ok:VirtualDomain): Stopped - Clone Set: cl-vlan200-if [vlan200-if] - Started: [ v03-a v03-b ] -- Stopped: [ vlan200-if:2 vlan200-if:3 vlan200-if:4 vlan200-if:5 vlan200-if:6 vlan200-if:7 ] -+ Stopped: [ mgmt01 ] - lenny-x32-devel-vm (ocf::vds-ok:VirtualDomain): Started v03-a - dist.express-consult.org-vm (ocf::vds-ok:VirtualDomain): Stopped - eu1.ca-pages.com-vm (ocf::vds-ok:VirtualDomain): Stopped -@@ -322,7 +302,7 @@ Online: [ mgmt01 v03-a v03-b ] - c5-x64-devel.vds-ok.com-vm (ocf::vds-ok:VirtualDomain): Started v03-b - Clone Set: cl-mcast-test-net [mcast-test-net] - Started: [ v03-a v03-b ] -- Stopped: [ mcast-test-net:2 mcast-test-net:3 mcast-test-net:4 mcast-test-net:5 mcast-test-net:6 mcast-test-net:7 ] -+ Stopped: [ mgmt01 ] - dist.fly-uni.org-vm (ocf::vds-ok:VirtualDomain): Stopped - ktstudio.net-vm (ocf::vds-ok:VirtualDomain): Started v03-a - cloudsrv.credo-dialogue.com-vm (ocf::vds-ok:VirtualDomain): Started v03-b -@@ -335,7 +315,7 @@ Online: [ mgmt01 v03-a v03-b ] - lustre04-right.vds-ok.com-vm (ocf::vds-ok:VirtualDomain): Started v03-a - Clone Set: cl-mcast-anbriz-net [mcast-anbriz-net] - Started: [ v03-a v03-b ] -- Stopped: [ mcast-anbriz-net:2 mcast-anbriz-net:3 mcast-anbriz-net:4 mcast-anbriz-net:5 mcast-anbriz-net:6 mcast-anbriz-net:7 ] -+ Stopped: [ mgmt01 ] - gw.anbriz.vds-ok.com-vm (ocf::vds-ok:VirtualDomain): Started v03-b - license.anbriz.vds-ok.com-vm (ocf::vds-ok:VirtualDomain): Started v03-a - terminal.anbriz.vds-ok.com-vm (ocf::vds-ok:VirtualDomain): Stopped -@@ -344,11 +324,11 @@ Online: [ mgmt01 v03-a v03-b ] - test-01.vds-ok.com-vm (ocf::vds-ok:VirtualDomain): Started v03-a - Clone Set: cl-libvirt-qpid [libvirt-qpid] - Started: [ v03-a v03-b ] -- Stopped: [ libvirt-qpid:2 libvirt-qpid:3 libvirt-qpid:4 libvirt-qpid:5 libvirt-qpid:6 libvirt-qpid:7 ] -+ Stopped: [ mgmt01 ] - gw.gleb.vds-ok.com-vm (ocf::vds-ok:VirtualDomain): Stopped - gw.gotin.vds-ok.com-vm (ocf::vds-ok:VirtualDomain): Stopped - terminal0.anbriz.vds-ok.com-vm (ocf::vds-ok:VirtualDomain): Started v03-b - Clone Set: cl-mcast-gleb-net [mcast-gleb-net] - Started: [ v03-a v03-b ] -- Stopped: [ mcast-gleb-net:2 mcast-gleb-net:3 mcast-gleb-net:4 mcast-gleb-net:5 mcast-gleb-net:6 mcast-gleb-net:7 ] -+ Stopped: [ mgmt01 ] - -diff --git a/pengine/test10/master-demote-2.summary b/pengine/test10/master-demote-2.summary -index b5fb0b9..53259a7 100644 ---- a/pengine/test10/master-demote-2.summary -+++ b/pengine/test10/master-demote-2.summary -@@ -16,7 +16,7 @@ Online: [ pcmk-1 pcmk-2 pcmk-3 pcmk-4 ] - Clone Set: Connectivity [ping-1] - Started: [ pcmk-1 pcmk-2 pcmk-3 pcmk-4 ] - Master/Slave Set: master-1 [stateful-1] -- stateful-1:0 (ocf::pacemaker:Stateful): Slave pcmk-1 FAILED -+ stateful-1 (ocf::pacemaker:Stateful): Slave pcmk-1 FAILED - Slaves: [ pcmk-2 pcmk-3 pcmk-4 ] - - Transition Summary: -diff --git a/pengine/test10/master-demote-block.dot b/pengine/test10/master-demote-block.dot -new file mode 100644 -index 0000000..9b3c48f ---- /dev/null -+++ b/pengine/test10/master-demote-block.dot -@@ -0,0 +1,3 @@ -+digraph "g" { -+"dummy:1_monitor_20000 dl380g5d" [ style=bold color="green" fontcolor="black"] -+} -diff --git a/pengine/test10/master-demote-block.exp b/pengine/test10/master-demote-block.exp -new file mode 100644 -index 0000000..a8a51d7 ---- /dev/null -+++ b/pengine/test10/master-demote-block.exp -@@ -0,0 +1,12 @@ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -diff --git a/pengine/test10/master-demote-block.scores b/pengine/test10/master-demote-block.scores -new file mode 100644 -index 0000000..e99c828 ---- /dev/null -+++ b/pengine/test10/master-demote-block.scores -@@ -0,0 +1,13 @@ -+Allocation scores: -+clone_color: dummy:0 allocation score on dl380g5c: -INFINITY -+clone_color: dummy:0 allocation score on dl380g5d: 0 -+clone_color: dummy:1 allocation score on dl380g5c: -INFINITY -+clone_color: dummy:1 allocation score on dl380g5d: INFINITY -+clone_color: stateful allocation score on dl380g5c: -INFINITY -+clone_color: stateful allocation score on dl380g5d: 0 -+dummy:0 promotion score on dl380g5c: INFINITY -+dummy:1 promotion score on dl380g5d: 5 -+native_color: dummy:0 allocation score on dl380g5c: -INFINITY -+native_color: dummy:0 allocation score on dl380g5d: -INFINITY -+native_color: dummy:1 allocation score on dl380g5c: -INFINITY -+native_color: dummy:1 allocation score on dl380g5d: INFINITY -diff --git a/pengine/test10/master-demote-block.summary b/pengine/test10/master-demote-block.summary -new file mode 100644 -index 0000000..97fb20b ---- /dev/null -+++ b/pengine/test10/master-demote-block.summary -@@ -0,0 +1,22 @@ -+ -+Current cluster status: -+Node dl380g5c (21c624bd-c426-43dc-9665-bbfb92054bcd): standby -+Online: [ dl380g5d ] -+ -+ Master/Slave Set: stateful [dummy] -+ dummy (ocf::pacemaker:Stateful): Master dl380g5c (unmanaged) FAILED -+ Slaves: [ dl380g5d ] -+ -+Transition Summary: -+ -+Executing cluster transition: -+ * Resource action: dummy:1 monitor=20000 on dl380g5d -+ -+Revised cluster status: -+Node dl380g5c (21c624bd-c426-43dc-9665-bbfb92054bcd): standby -+Online: [ dl380g5d ] -+ -+ Master/Slave Set: stateful [dummy] -+ dummy (ocf::pacemaker:Stateful): Master dl380g5c (unmanaged) FAILED -+ Slaves: [ dl380g5d ] -+ -diff --git a/pengine/test10/master-demote-block.xml b/pengine/test10/master-demote-block.xml -new file mode 100644 -index 0000000..7aedca0 ---- /dev/null -+++ b/pengine/test10/master-demote-block.xml -@@ -0,0 +1,80 @@ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -diff --git a/pengine/test10/master-demote.scores b/pengine/test10/master-demote.scores -index 49c28dc..3650eca 100644 ---- a/pengine/test10/master-demote.scores -+++ b/pengine/test10/master-demote.scores -@@ -14,8 +14,8 @@ clone_color: fence_node:1 allocation score on cxb1: 1 - clone_color: named_drbd allocation score on cxa1: 200 - clone_color: named_drbd allocation score on cxb1: 210 - clone_color: named_drbd_node:0 allocation score on cxa1: 76 --clone_color: named_drbd_node:0 allocation score on cxb1: 75 --clone_color: named_drbd_node:1 allocation score on cxa1: 75 -+clone_color: named_drbd_node:0 allocation score on cxb1: 0 -+clone_color: named_drbd_node:1 allocation score on cxa1: 0 - clone_color: named_drbd_node:1 allocation score on cxb1: 76 - clone_color: pingd_clone allocation score on cxa1: 0 - clone_color: pingd_clone allocation score on cxb1: 0 -@@ -52,7 +52,7 @@ native_color: named_daemon allocation score on cxa1: -INFINITY - native_color: named_daemon allocation score on cxb1: -INFINITY - native_color: named_drbd_node:0 allocation score on cxa1: 76 - native_color: named_drbd_node:0 allocation score on cxb1: -INFINITY --native_color: named_drbd_node:1 allocation score on cxa1: 75 -+native_color: named_drbd_node:1 allocation score on cxa1: 0 - native_color: named_drbd_node:1 allocation score on cxb1: 76 - native_color: named_filesys allocation score on cxa1: -INFINITY - native_color: named_filesys allocation score on cxb1: -INFINITY -diff --git a/pengine/test10/master-depend.summary b/pengine/test10/master-depend.summary -index d64ccbf..f802b4e 100644 ---- a/pengine/test10/master-depend.summary -+++ b/pengine/test10/master-depend.summary -@@ -4,11 +4,11 @@ Online: [ vbox4 ] - OFFLINE: [ vbox3 ] - - Master/Slave Set: drbd [drbd0] -- Stopped: [ drbd0:0 drbd0:1 ] -+ Stopped: [ vbox3 vbox4 ] - Clone Set: cman_clone [cman] -- Stopped: [ cman:0 cman:1 ] -+ Stopped: [ vbox3 vbox4 ] - Clone Set: clvmd_clone [clvmd] -- Stopped: [ clvmd:0 clvmd:1 ] -+ Stopped: [ vbox3 vbox4 ] - vmnci36 (ocf::heartbeat:vm): Stopped - vmnci37 (ocf::heartbeat:vm): Stopped - vmnci38 (ocf::heartbeat:vm): Stopped -@@ -46,12 +46,12 @@ OFFLINE: [ vbox3 ] - - Master/Slave Set: drbd [drbd0] - Slaves: [ vbox4 ] -- Stopped: [ drbd0:1 ] -+ Stopped: [ vbox3 ] - Clone Set: cman_clone [cman] - Started: [ vbox4 ] -- Stopped: [ cman:1 ] -+ Stopped: [ vbox3 ] - Clone Set: clvmd_clone [clvmd] -- Stopped: [ clvmd:0 clvmd:1 ] -+ Stopped: [ vbox3 vbox4 ] - vmnci36 (ocf::heartbeat:vm): Stopped - vmnci37 (ocf::heartbeat:vm): Stopped - vmnci38 (ocf::heartbeat:vm): Stopped -diff --git a/pengine/test10/master-ordering.summary b/pengine/test10/master-ordering.summary -index 330bdc8..9196392 100644 ---- a/pengine/test10/master-ordering.summary -+++ b/pengine/test10/master-ordering.summary -@@ -3,23 +3,23 @@ Current cluster status: - Online: [ webcluster01 ] - OFFLINE: [ webcluster02 ] - -- mysql-server (ocf::heartbeat:mysql): Stopped -- extip_1 (ocf::heartbeat:IPaddr2): Stopped -- extip_2 (ocf::heartbeat:IPaddr2): Stopped -+ mysql-server (ocf::heartbeat:mysql): Stopped -+ extip_1 (ocf::heartbeat:IPaddr2): Stopped -+ extip_2 (ocf::heartbeat:IPaddr2): Stopped - Resource Group: group_main -- intip_0_main (ocf::heartbeat:IPaddr2): Stopped -- intip_1_master (ocf::heartbeat:IPaddr2): Stopped -- intip_2_slave (ocf::heartbeat:IPaddr2): Stopped -+ intip_0_main (ocf::heartbeat:IPaddr2): Stopped -+ intip_1_master (ocf::heartbeat:IPaddr2): Stopped -+ intip_2_slave (ocf::heartbeat:IPaddr2): Stopped - Master/Slave Set: ms_drbd_www [drbd_www] -- Stopped: [ drbd_www:0 drbd_www:1 ] -+ Stopped: [ webcluster01 webcluster02 ] - Clone Set: clone_ocfs2_www [ocfs2_www] (unique) -- ocfs2_www:0 (ocf::heartbeat:Filesystem): Stopped -- ocfs2_www:1 (ocf::heartbeat:Filesystem): Stopped -+ ocfs2_www:0 (ocf::heartbeat:Filesystem): Stopped -+ ocfs2_www:1 (ocf::heartbeat:Filesystem): Stopped - Clone Set: clone_webservice [group_webservice] -- Stopped: [ group_webservice:0 group_webservice:1 ] -+ Stopped: [ webcluster01 webcluster02 ] - Master/Slave Set: ms_drbd_mysql [drbd_mysql] -- Stopped: [ drbd_mysql:0 drbd_mysql:1 ] -- fs_mysql (ocf::heartbeat:Filesystem): Stopped -+ Stopped: [ webcluster01 webcluster02 ] -+ fs_mysql (ocf::heartbeat:Filesystem): Stopped - - Transition Summary: - * Start extip_1 (webcluster01) -@@ -73,23 +73,23 @@ Revised cluster status: - Online: [ webcluster01 ] - OFFLINE: [ webcluster02 ] - -- mysql-server (ocf::heartbeat:mysql): Stopped -+ mysql-server (ocf::heartbeat:mysql): Stopped - extip_1 (ocf::heartbeat:IPaddr2): Started webcluster01 - extip_2 (ocf::heartbeat:IPaddr2): Started webcluster01 - Resource Group: group_main -- intip_0_main (ocf::heartbeat:IPaddr2): Stopped -+ intip_0_main (ocf::heartbeat:IPaddr2): Stopped - intip_1_master (ocf::heartbeat:IPaddr2): Started webcluster01 - intip_2_slave (ocf::heartbeat:IPaddr2): Started webcluster01 - Master/Slave Set: ms_drbd_www [drbd_www] - Slaves: [ webcluster01 ] -- Stopped: [ drbd_www:1 ] -+ Stopped: [ webcluster02 ] - Clone Set: clone_ocfs2_www [ocfs2_www] (unique) -- ocfs2_www:0 (ocf::heartbeat:Filesystem): Stopped -- ocfs2_www:1 (ocf::heartbeat:Filesystem): Stopped -+ ocfs2_www:0 (ocf::heartbeat:Filesystem): Stopped -+ ocfs2_www:1 (ocf::heartbeat:Filesystem): Stopped - Clone Set: clone_webservice [group_webservice] -- Stopped: [ group_webservice:0 group_webservice:1 ] -+ Stopped: [ webcluster01 webcluster02 ] - Master/Slave Set: ms_drbd_mysql [drbd_mysql] - Slaves: [ webcluster01 ] -- Stopped: [ drbd_mysql:1 ] -- fs_mysql (ocf::heartbeat:Filesystem): Stopped -+ Stopped: [ webcluster02 ] -+ fs_mysql (ocf::heartbeat:Filesystem): Stopped - -diff --git a/pengine/test10/master-probed-score.summary b/pengine/test10/master-probed-score.summary -index faf4824..a634efd 100644 ---- a/pengine/test10/master-probed-score.summary -+++ b/pengine/test10/master-probed-score.summary -@@ -3,7 +3,7 @@ Current cluster status: - Online: [ hypatia-corosync.nevis.columbia.edu orestes-corosync.nevis.columbia.edu ] - - Master/Slave Set: AdminClone [AdminDrbd] -- Stopped: [ AdminDrbd:0 AdminDrbd:1 ] -+ Stopped: [ hypatia-corosync.nevis.columbia.edu orestes-corosync.nevis.columbia.edu ] - CronAmbientTemperature (ocf::heartbeat:symlink): Stopped - StonithHypatia (stonith:fence_nut): Stopped - StonithOrestes (stonith:fence_nut): Stopped -@@ -14,7 +14,7 @@ Online: [ hypatia-corosync.nevis.columbia.edu orestes-corosync.nevis.columbia.ed - Dhcpd (lsb:dhcpd): Stopped - DhcpIP (ocf::heartbeat:IPaddr2): Stopped - Clone Set: CupsClone [CupsGroup] -- Stopped: [ CupsGroup:0 CupsGroup:1 ] -+ Stopped: [ hypatia-corosync.nevis.columbia.edu orestes-corosync.nevis.columbia.edu ] - Clone Set: IPClone [IPGroup] (unique) - Resource Group: IPGroup:0 - ClusterIP:0 (ocf::heartbeat:IPaddr2): Stopped -@@ -25,13 +25,13 @@ Online: [ hypatia-corosync.nevis.columbia.edu orestes-corosync.nevis.columbia.ed - ClusterIPLocal:1 (ocf::heartbeat:IPaddr2): Stopped - ClusterIPSandbox:1 (ocf::heartbeat:IPaddr2): Stopped - Clone Set: LibvirtdClone [LibvirtdGroup] -- Stopped: [ LibvirtdGroup:0 LibvirtdGroup:1 ] -+ Stopped: [ hypatia-corosync.nevis.columbia.edu orestes-corosync.nevis.columbia.edu ] - Clone Set: TftpClone [TftpGroup] -- Stopped: [ TftpGroup:0 TftpGroup:1 ] -+ Stopped: [ hypatia-corosync.nevis.columbia.edu orestes-corosync.nevis.columbia.edu ] - Clone Set: ExportsClone [ExportsGroup] -- Stopped: [ ExportsGroup:0 ExportsGroup:1 ] -+ Stopped: [ hypatia-corosync.nevis.columbia.edu orestes-corosync.nevis.columbia.edu ] - Clone Set: FilesystemClone [FilesystemGroup] -- Stopped: [ FilesystemGroup:0 FilesystemGroup:1 ] -+ Stopped: [ hypatia-corosync.nevis.columbia.edu orestes-corosync.nevis.columbia.edu ] - KVM-guest (ocf::heartbeat:VirtualDomain): Stopped - Proxy (ocf::heartbeat:VirtualDomain): Stopped - -diff --git a/pengine/test10/master-pseudo.summary b/pengine/test10/master-pseudo.summary -index 4ac7605..d480fb8 100644 ---- a/pengine/test10/master-pseudo.summary -+++ b/pengine/test10/master-pseudo.summary -@@ -6,6 +6,7 @@ Online: [ sambuca.linbit ] - ip_float_right (ocf::heartbeat:IPaddr2): Stopped - Master/Slave Set: ms_drbd_float [drbd_float] - Slaves: [ sambuca.linbit ] -+ Stopped: [ raki.linbit ] - Resource Group: nfsexport - ip_nfs (ocf::heartbeat:IPaddr2): Stopped - fs_float (ocf::heartbeat:Filesystem): Stopped -@@ -54,6 +55,7 @@ Online: [ sambuca.linbit ] - ip_float_right (ocf::heartbeat:IPaddr2): Started sambuca.linbit - Master/Slave Set: ms_drbd_float [drbd_float] - Masters: [ sambuca.linbit ] -+ Stopped: [ raki.linbit ] - Resource Group: nfsexport - ip_nfs (ocf::heartbeat:IPaddr2): Started sambuca.linbit - fs_float (ocf::heartbeat:Filesystem): Stopped -diff --git a/pengine/test10/master-reattach.summary b/pengine/test10/master-reattach.summary -index 8afe6a9..2e7f492 100644 ---- a/pengine/test10/master-reattach.summary -+++ b/pengine/test10/master-reattach.summary -@@ -3,8 +3,8 @@ Current cluster status: - Online: [ dktest1 dktest2 ] - - Master/Slave Set: ms-drbd1 [drbd1] (unmanaged) -- drbd1:0 (ocf::heartbeat:drbd): Master dktest1 (unmanaged) -- drbd1:1 (ocf::heartbeat:drbd): Slave dktest2 (unmanaged) -+ drbd1 (ocf::heartbeat:drbd): Master dktest1 (unmanaged) -+ drbd1 (ocf::heartbeat:drbd): Slave dktest2 (unmanaged) - Resource Group: apache - apache-vip (ocf::heartbeat:IPaddr2): Started dktest1 (unmanaged) - mount (ocf::heartbeat:Filesystem): Started dktest1 (unmanaged) -@@ -23,8 +23,8 @@ Revised cluster status: - Online: [ dktest1 dktest2 ] - - Master/Slave Set: ms-drbd1 [drbd1] (unmanaged) -- drbd1:0 (ocf::heartbeat:drbd): Master dktest1 (unmanaged) -- drbd1:1 (ocf::heartbeat:drbd): Slave dktest2 (unmanaged) -+ drbd1 (ocf::heartbeat:drbd): Master dktest1 (unmanaged) -+ drbd1 (ocf::heartbeat:drbd): Slave dktest2 (unmanaged) - Resource Group: apache - apache-vip (ocf::heartbeat:IPaddr2): Started dktest1 (unmanaged) - mount (ocf::heartbeat:Filesystem): Started dktest1 (unmanaged) -diff --git a/pengine/test10/master-unmanaged-monitor.summary b/pengine/test10/master-unmanaged-monitor.summary -index 0a5712d..369dcde 100644 ---- a/pengine/test10/master-unmanaged-monitor.summary -+++ b/pengine/test10/master-unmanaged-monitor.summary -@@ -3,7 +3,7 @@ Current cluster status: - Online: [ pcmk-1 pcmk-2 pcmk-3 pcmk-4 ] - - Clone Set: Fencing [FencingChild] (unmanaged) -- Stopped: [ FencingChild:0 FencingChild:1 FencingChild:2 FencingChild:3 ] -+ Stopped: [ pcmk-1 pcmk-2 pcmk-3 pcmk-4 ] - Resource Group: group-1 - r192.168.122.112 (ocf::heartbeat:IPaddr): Started pcmk-3 (unmanaged) - r192.168.122.113 (ocf::heartbeat:IPaddr): Started pcmk-3 (unmanaged) -@@ -15,15 +15,15 @@ Online: [ pcmk-1 pcmk-2 pcmk-3 pcmk-4 ] - lsb-dummy (lsb:/usr/share/pacemaker/tests/cts/LSBDummy): Started pcmk-3 (unmanaged) - migrator (ocf::pacemaker:Dummy): Started pcmk-4 (unmanaged) - Clone Set: Connectivity [ping-1] (unmanaged) -- ping-1:0 (ocf::pacemaker:ping): Started pcmk-2 (unmanaged) -- ping-1:1 (ocf::pacemaker:ping): Started pcmk-3 (unmanaged) -- ping-1:2 (ocf::pacemaker:ping): Started pcmk-4 (unmanaged) -- ping-1:3 (ocf::pacemaker:ping): Started pcmk-1 (unmanaged) -+ ping-1 (ocf::pacemaker:ping): Started pcmk-2 (unmanaged) -+ ping-1 (ocf::pacemaker:ping): Started pcmk-3 (unmanaged) -+ ping-1 (ocf::pacemaker:ping): Started pcmk-4 (unmanaged) -+ ping-1 (ocf::pacemaker:ping): Started pcmk-1 (unmanaged) - Master/Slave Set: master-1 [stateful-1] (unmanaged) -- stateful-1:0 (ocf::pacemaker:Stateful): Slave pcmk-2 (unmanaged) -- stateful-1:1 (ocf::pacemaker:Stateful): Master pcmk-3 (unmanaged) -- stateful-1:2 (ocf::pacemaker:Stateful): Slave pcmk-4 (unmanaged) -- Stopped: [ stateful-1:3 ] -+ stateful-1 (ocf::pacemaker:Stateful): Slave pcmk-2 (unmanaged) -+ stateful-1 (ocf::pacemaker:Stateful): Master pcmk-3 (unmanaged) -+ stateful-1 (ocf::pacemaker:Stateful): Slave pcmk-4 (unmanaged) -+ Stopped: [ pcmk-1 ] - - Transition Summary: - -@@ -44,7 +44,7 @@ Revised cluster status: - Online: [ pcmk-1 pcmk-2 pcmk-3 pcmk-4 ] - - Clone Set: Fencing [FencingChild] (unmanaged) -- Stopped: [ FencingChild:0 FencingChild:1 FencingChild:2 FencingChild:3 ] -+ Stopped: [ pcmk-1 pcmk-2 pcmk-3 pcmk-4 ] - Resource Group: group-1 - r192.168.122.112 (ocf::heartbeat:IPaddr): Started pcmk-3 (unmanaged) - r192.168.122.113 (ocf::heartbeat:IPaddr): Started pcmk-3 (unmanaged) -@@ -56,13 +56,13 @@ Online: [ pcmk-1 pcmk-2 pcmk-3 pcmk-4 ] - lsb-dummy (lsb:/usr/share/pacemaker/tests/cts/LSBDummy): Started pcmk-3 (unmanaged) - migrator (ocf::pacemaker:Dummy): Started pcmk-4 (unmanaged) - Clone Set: Connectivity [ping-1] (unmanaged) -- ping-1:0 (ocf::pacemaker:ping): Started pcmk-2 (unmanaged) -- ping-1:1 (ocf::pacemaker:ping): Started pcmk-3 (unmanaged) -- ping-1:2 (ocf::pacemaker:ping): Started pcmk-4 (unmanaged) -- ping-1:3 (ocf::pacemaker:ping): Started pcmk-1 (unmanaged) -+ ping-1 (ocf::pacemaker:ping): Started pcmk-2 (unmanaged) -+ ping-1 (ocf::pacemaker:ping): Started pcmk-3 (unmanaged) -+ ping-1 (ocf::pacemaker:ping): Started pcmk-4 (unmanaged) -+ ping-1 (ocf::pacemaker:ping): Started pcmk-1 (unmanaged) - Master/Slave Set: master-1 [stateful-1] (unmanaged) -- stateful-1:0 (ocf::pacemaker:Stateful): Slave pcmk-2 (unmanaged) -- stateful-1:1 (ocf::pacemaker:Stateful): Master pcmk-3 (unmanaged) -- stateful-1:2 (ocf::pacemaker:Stateful): Slave pcmk-4 (unmanaged) -- Stopped: [ stateful-1:3 ] -+ stateful-1 (ocf::pacemaker:Stateful): Slave pcmk-2 (unmanaged) -+ stateful-1 (ocf::pacemaker:Stateful): Master pcmk-3 (unmanaged) -+ stateful-1 (ocf::pacemaker:Stateful): Slave pcmk-4 (unmanaged) -+ Stopped: [ pcmk-1 ] - -diff --git a/pengine/test10/master_monitor_restart.summary b/pengine/test10/master_monitor_restart.summary -index 2eed0a5..05b6460 100644 ---- a/pengine/test10/master_monitor_restart.summary -+++ b/pengine/test10/master_monitor_restart.summary -@@ -5,7 +5,7 @@ Online: [ node1 ] - - Master/Slave Set: MS_RSC [MS_RSC_NATIVE] - Masters: [ node1 ] -- Stopped: [ MS_RSC_NATIVE:1 ] -+ Stopped: [ node2 ] - - Transition Summary: - -@@ -18,5 +18,5 @@ Online: [ node1 ] - - Master/Slave Set: MS_RSC [MS_RSC_NATIVE] - Masters: [ node1 ] -- Stopped: [ MS_RSC_NATIVE:1 ] -+ Stopped: [ node2 ] - -diff --git a/pengine/test10/migrate-5.summary b/pengine/test10/migrate-5.summary -index 726f1f2..f9248ac 100644 ---- a/pengine/test10/migrate-5.summary -+++ b/pengine/test10/migrate-5.summary -@@ -30,5 +30,5 @@ Online: [ dom0-01 ] - domU-test01 (ocf::heartbeat:Xen): Started dom0-01 - Clone Set: clone-dom0-iscsi1 [dom0-iscsi1] - Started: [ dom0-01 ] -- Stopped: [ dom0-iscsi1:1 ] -+ Stopped: [ dom0-02 ] - -diff --git a/pengine/test10/migrate-fencing.summary b/pengine/test10/migrate-fencing.summary -index fbe5b25..920e754 100644 ---- a/pengine/test10/migrate-fencing.summary -+++ b/pengine/test10/migrate-fencing.summary -@@ -86,7 +86,7 @@ OFFLINE: [ pcmk-4 ] - - Clone Set: Fencing [FencingChild] - Started: [ pcmk-1 pcmk-2 pcmk-3 ] -- Stopped: [ FencingChild:3 ] -+ Stopped: [ pcmk-4 ] - Resource Group: group-1 - r192.168.101.181 (ocf::heartbeat:IPaddr): Started pcmk-1 - r192.168.101.182 (ocf::heartbeat:IPaddr): Started pcmk-1 -@@ -99,9 +99,9 @@ OFFLINE: [ pcmk-4 ] - migrator (ocf::pacemaker:Dummy): Started pcmk-3 - Clone Set: Connectivity [ping-1] - Started: [ pcmk-1 pcmk-2 pcmk-3 ] -- Stopped: [ ping-1:3 ] -+ Stopped: [ pcmk-4 ] - Master/Slave Set: master-1 [stateful-1] - Masters: [ pcmk-1 ] - Slaves: [ pcmk-2 pcmk-3 ] -- Stopped: [ stateful-1:3 ] -+ Stopped: [ pcmk-4 ] - -diff --git a/pengine/test10/migrate-partial-3.summary b/pengine/test10/migrate-partial-3.summary -index 8cdb66b..cf27c9f 100644 ---- a/pengine/test10/migrate-partial-3.summary -+++ b/pengine/test10/migrate-partial-3.summary -@@ -6,7 +6,7 @@ OFFLINE: [ hex-15 ] - test-vm (ocf::heartbeat:Xen): Started hex-14 FAILED - Clone Set: c-clusterfs [dlm] - Started: [ hex-13 hex-14 ] -- Stopped: [ dlm:2 ] -+ Stopped: [ hex-15 ] - - Transition Summary: - * Recover test-vm (Started hex-14 -> hex-13) -@@ -26,5 +26,5 @@ OFFLINE: [ hex-15 ] - test-vm (ocf::heartbeat:Xen): Started hex-13 - Clone Set: c-clusterfs [dlm] - Started: [ hex-13 hex-14 ] -- Stopped: [ dlm:2 ] -+ Stopped: [ hex-15 ] - -diff --git a/pengine/test10/migrate-partial-4.summary b/pengine/test10/migrate-partial-4.summary -index 8d4fa3b..1cf119a 100644 ---- a/pengine/test10/migrate-partial-4.summary -+++ b/pengine/test10/migrate-partial-4.summary -@@ -23,27 +23,27 @@ Online: [ lustre01-left lustre02-left lustre03-left lustre04-left ] - ip-booth (ocf::heartbeat:IPaddr2): Started lustre02-left - boothd (ocf::pacemaker:booth-site): Started lustre02-left - Master/Slave Set: ms-drbd-mgs [drbd-mgs] -- Stopped: [ drbd-mgs:0 drbd-mgs:1 ] -+ Stopped: [ lustre01-left lustre02-left lustre03-left lustre04-left ] - Master/Slave Set: ms-drbd-testfs-mdt0000 [drbd-testfs-mdt0000] -- Stopped: [ drbd-testfs-mdt0000:0 ] -+ Stopped: [ lustre01-left lustre02-left lustre03-left lustre04-left ] - Master/Slave Set: ms-drbd-testfs-mdt0000-left [drbd-testfs-mdt0000-left] -- Stopped: [ drbd-testfs-mdt0000-left:0 drbd-testfs-mdt0000-left:1 ] -+ Stopped: [ lustre01-left lustre02-left lustre03-left lustre04-left ] - Master/Slave Set: ms-drbd-testfs-ost0000 [drbd-testfs-ost0000] -- Stopped: [ drbd-testfs-ost0000:0 ] -+ Stopped: [ lustre01-left lustre02-left lustre03-left lustre04-left ] - Master/Slave Set: ms-drbd-testfs-ost0000-left [drbd-testfs-ost0000-left] -- Stopped: [ drbd-testfs-ost0000-left:0 drbd-testfs-ost0000-left:1 ] -+ Stopped: [ lustre01-left lustre02-left lustre03-left lustre04-left ] - Master/Slave Set: ms-drbd-testfs-ost0001 [drbd-testfs-ost0001] -- Stopped: [ drbd-testfs-ost0001:0 ] -+ Stopped: [ lustre01-left lustre02-left lustre03-left lustre04-left ] - Master/Slave Set: ms-drbd-testfs-ost0001-left [drbd-testfs-ost0001-left] -- Stopped: [ drbd-testfs-ost0001-left:0 drbd-testfs-ost0001-left:1 ] -+ Stopped: [ lustre01-left lustre02-left lustre03-left lustre04-left ] - Master/Slave Set: ms-drbd-testfs-ost0002 [drbd-testfs-ost0002] -- Stopped: [ drbd-testfs-ost0002:0 ] -+ Stopped: [ lustre01-left lustre02-left lustre03-left lustre04-left ] - Master/Slave Set: ms-drbd-testfs-ost0002-left [drbd-testfs-ost0002-left] -- Stopped: [ drbd-testfs-ost0002-left:0 drbd-testfs-ost0002-left:1 ] -+ Stopped: [ lustre01-left lustre02-left lustre03-left lustre04-left ] - Master/Slave Set: ms-drbd-testfs-ost0003 [drbd-testfs-ost0003] -- Stopped: [ drbd-testfs-ost0003:0 ] -+ Stopped: [ lustre01-left lustre02-left lustre03-left lustre04-left ] - Master/Slave Set: ms-drbd-testfs-ost0003-left [drbd-testfs-ost0003-left] -- Stopped: [ drbd-testfs-ost0003-left:0 drbd-testfs-ost0003-left:1 ] -+ Stopped: [ lustre01-left lustre02-left lustre03-left lustre04-left ] - - Transition Summary: - * Start drbd-stacked (lustre02-left) -@@ -104,24 +104,25 @@ Online: [ lustre01-left lustre02-left lustre03-left lustre04-left ] - boothd (ocf::pacemaker:booth-site): Started lustre02-left - Master/Slave Set: ms-drbd-mgs [drbd-mgs] - Slaves: [ lustre01-left lustre02-left ] -+ Stopped: [ lustre03-left lustre04-left ] - Master/Slave Set: ms-drbd-testfs-mdt0000 [drbd-testfs-mdt0000] -- Stopped: [ drbd-testfs-mdt0000:0 ] -+ Stopped: [ lustre01-left lustre02-left lustre03-left lustre04-left ] - Master/Slave Set: ms-drbd-testfs-mdt0000-left [drbd-testfs-mdt0000-left] -- Stopped: [ drbd-testfs-mdt0000-left:0 drbd-testfs-mdt0000-left:1 ] -+ Stopped: [ lustre01-left lustre02-left lustre03-left lustre04-left ] - Master/Slave Set: ms-drbd-testfs-ost0000 [drbd-testfs-ost0000] -- Stopped: [ drbd-testfs-ost0000:0 ] -+ Stopped: [ lustre01-left lustre02-left lustre03-left lustre04-left ] - Master/Slave Set: ms-drbd-testfs-ost0000-left [drbd-testfs-ost0000-left] -- Stopped: [ drbd-testfs-ost0000-left:0 drbd-testfs-ost0000-left:1 ] -+ Stopped: [ lustre01-left lustre02-left lustre03-left lustre04-left ] - Master/Slave Set: ms-drbd-testfs-ost0001 [drbd-testfs-ost0001] -- Stopped: [ drbd-testfs-ost0001:0 ] -+ Stopped: [ lustre01-left lustre02-left lustre03-left lustre04-left ] - Master/Slave Set: ms-drbd-testfs-ost0001-left [drbd-testfs-ost0001-left] -- Stopped: [ drbd-testfs-ost0001-left:0 drbd-testfs-ost0001-left:1 ] -+ Stopped: [ lustre01-left lustre02-left lustre03-left lustre04-left ] - Master/Slave Set: ms-drbd-testfs-ost0002 [drbd-testfs-ost0002] -- Stopped: [ drbd-testfs-ost0002:0 ] -+ Stopped: [ lustre01-left lustre02-left lustre03-left lustre04-left ] - Master/Slave Set: ms-drbd-testfs-ost0002-left [drbd-testfs-ost0002-left] -- Stopped: [ drbd-testfs-ost0002-left:0 drbd-testfs-ost0002-left:1 ] -+ Stopped: [ lustre01-left lustre02-left lustre03-left lustre04-left ] - Master/Slave Set: ms-drbd-testfs-ost0003 [drbd-testfs-ost0003] -- Stopped: [ drbd-testfs-ost0003:0 ] -+ Stopped: [ lustre01-left lustre02-left lustre03-left lustre04-left ] - Master/Slave Set: ms-drbd-testfs-ost0003-left [drbd-testfs-ost0003-left] -- Stopped: [ drbd-testfs-ost0003-left:0 drbd-testfs-ost0003-left:1 ] -+ Stopped: [ lustre01-left lustre02-left lustre03-left lustre04-left ] - -diff --git a/pengine/test10/migrate-shutdown.summary b/pengine/test10/migrate-shutdown.summary -index 62eb906..e634a5c 100644 ---- a/pengine/test10/migrate-shutdown.summary -+++ b/pengine/test10/migrate-shutdown.summary -@@ -15,11 +15,11 @@ Online: [ pcmk-1 pcmk-2 pcmk-3 pcmk-4 ] - migrator (ocf::pacemaker:Dummy): Started pcmk-1 - Clone Set: Connectivity [ping-1] - Started: [ pcmk-1 pcmk-2 pcmk-4 ] -- Stopped: [ ping-1:3 ] -+ Stopped: [ pcmk-3 ] - Master/Slave Set: master-1 [stateful-1] - Masters: [ pcmk-2 ] - Slaves: [ pcmk-1 pcmk-4 ] -- Stopped: [ stateful-1:3 ] -+ Stopped: [ pcmk-3 ] - - Transition Summary: - * Stop Fencing (pcmk-1) -@@ -85,7 +85,7 @@ Online: [ pcmk-1 pcmk-2 pcmk-3 pcmk-4 ] - lsb-dummy (lsb:/usr/share/pacemaker/tests/cts/LSBDummy): Stopped - migrator (ocf::pacemaker:Dummy): Stopped - Clone Set: Connectivity [ping-1] -- Stopped: [ ping-1:0 ping-1:1 ping-1:2 ping-1:3 ] -+ Stopped: [ pcmk-1 pcmk-2 pcmk-3 pcmk-4 ] - Master/Slave Set: master-1 [stateful-1] -- Stopped: [ stateful-1:0 stateful-1:1 stateful-1:2 stateful-1:3 ] -+ Stopped: [ pcmk-1 pcmk-2 pcmk-3 pcmk-4 ] - -diff --git a/pengine/test10/migrate-start-complex.summary b/pengine/test10/migrate-start-complex.summary -index 7d3c329..9de9466 100644 ---- a/pengine/test10/migrate-start-complex.summary -+++ b/pengine/test10/migrate-start-complex.summary -@@ -6,9 +6,9 @@ Online: [ dom0-01 dom0-02 ] - domU-test01 (ocf::heartbeat:Xen): Started dom0-02 - Clone Set: clone-dom0-iscsi1 [dom0-iscsi1] - Started: [ dom0-02 ] -- Stopped: [ dom0-iscsi1:1 ] -+ Stopped: [ dom0-01 ] - Clone Set: clone-bottom [bottom] -- Stopped: [ bottom:0 bottom:1 ] -+ Stopped: [ dom0-01 dom0-02 ] - - Transition Summary: - * Move top (Started dom0-02 -> dom0-01) -diff --git a/pengine/test10/migrate-start.summary b/pengine/test10/migrate-start.summary -index 057b020..8bbe3e5 100644 ---- a/pengine/test10/migrate-start.summary -+++ b/pengine/test10/migrate-start.summary -@@ -5,7 +5,7 @@ Online: [ dom0-01 dom0-02 ] - domU-test01 (ocf::heartbeat:Xen): Started dom0-02 - Clone Set: clone-dom0-iscsi1 [dom0-iscsi1] - Started: [ dom0-02 ] -- Stopped: [ dom0-iscsi1:1 ] -+ Stopped: [ dom0-01 ] - - Transition Summary: - * Migrate domU-test01 (Started dom0-02 -> dom0-01) -diff --git a/pengine/test10/migrate-stop-complex.summary b/pengine/test10/migrate-stop-complex.summary -index 7cbc802..51cec76 100644 ---- a/pengine/test10/migrate-stop-complex.summary -+++ b/pengine/test10/migrate-stop-complex.summary -@@ -41,8 +41,8 @@ Online: [ dom0-01 ] - domU-test01 (ocf::heartbeat:Xen): Started dom0-01 - Clone Set: clone-dom0-iscsi1 [dom0-iscsi1] - Started: [ dom0-01 ] -- Stopped: [ dom0-iscsi1:1 ] -+ Stopped: [ dom0-02 ] - Clone Set: clone-bottom [bottom] - Started: [ dom0-01 ] -- Stopped: [ bottom:1 ] -+ Stopped: [ dom0-02 ] - -diff --git a/pengine/test10/migrate-stop-start-complex.summary b/pengine/test10/migrate-stop-start-complex.summary -index f0cf070..bb3babe 100644 ---- a/pengine/test10/migrate-stop-start-complex.summary -+++ b/pengine/test10/migrate-stop-start-complex.summary -@@ -9,7 +9,7 @@ Online: [ dom0-01 ] - Started: [ dom0-01 dom0-02 ] - Clone Set: clone-bottom [bottom] - Started: [ dom0-02 ] -- Stopped: [ bottom:1 ] -+ Stopped: [ dom0-01 ] - - Transition Summary: - * Restart top (Started dom0-01) -@@ -45,8 +45,8 @@ Online: [ dom0-01 ] - domU-test01 (ocf::heartbeat:Xen): Started dom0-01 - Clone Set: clone-dom0-iscsi1 [dom0-iscsi1] - Started: [ dom0-01 ] -- Stopped: [ dom0-iscsi1:1 ] -+ Stopped: [ dom0-02 ] - Clone Set: clone-bottom [bottom] - Started: [ dom0-01 ] -- Stopped: [ bottom:1 ] -+ Stopped: [ dom0-02 ] - -diff --git a/pengine/test10/migrate-stop.summary b/pengine/test10/migrate-stop.summary -index 726f1f2..f9248ac 100644 ---- a/pengine/test10/migrate-stop.summary -+++ b/pengine/test10/migrate-stop.summary -@@ -30,5 +30,5 @@ Online: [ dom0-01 ] - domU-test01 (ocf::heartbeat:Xen): Started dom0-01 - Clone Set: clone-dom0-iscsi1 [dom0-iscsi1] - Started: [ dom0-01 ] -- Stopped: [ dom0-iscsi1:1 ] -+ Stopped: [ dom0-02 ] - -diff --git a/pengine/test10/migrate-stop_start.summary b/pengine/test10/migrate-stop_start.summary -index cf843fd..bc4bb6a 100644 ---- a/pengine/test10/migrate-stop_start.summary -+++ b/pengine/test10/migrate-stop_start.summary -@@ -6,7 +6,7 @@ Online: [ dom0-01 ] - domU-test01 (ocf::heartbeat:Xen): Started dom0-02 - Clone Set: clone-dom0-iscsi1 [dom0-iscsi1] - Started: [ dom0-02 ] -- Stopped: [ dom0-iscsi1:1 ] -+ Stopped: [ dom0-01 ] - - Transition Summary: - * Move domU-test01 (Started dom0-02 -> dom0-01) -@@ -34,5 +34,5 @@ Online: [ dom0-01 ] - domU-test01 (ocf::heartbeat:Xen): Started dom0-01 - Clone Set: clone-dom0-iscsi1 [dom0-iscsi1] - Started: [ dom0-01 ] -- Stopped: [ dom0-iscsi1:1 ] -+ Stopped: [ dom0-02 ] - -diff --git a/pengine/test10/monitor-onfail-restart.xml b/pengine/test10/monitor-onfail-restart.xml -index c0e9b6e..beb68bc 100644 ---- a/pengine/test10/monitor-onfail-restart.xml -+++ b/pengine/test10/monitor-onfail-restart.xml -@@ -30,7 +30,7 @@ - - - -- -+ - - - -@@ -45,4 +45,4 @@ - - - -- -\ No newline at end of file -+ -diff --git a/pengine/test10/monitor-recovery.dot b/pengine/test10/monitor-recovery.dot -new file mode 100644 -index 0000000..13eab93 ---- /dev/null -+++ b/pengine/test10/monitor-recovery.dot -@@ -0,0 +1,10 @@ -+digraph "g" { -+"all_stopped" [ style=bold color="green" fontcolor="orange"] -+"cl_tomcat_stop_0" -> "cl_tomcat_stopped_0" [ style = bold] -+"cl_tomcat_stop_0" -> "d_tomcat_stop_0 CSE-1" [ style = bold] -+"cl_tomcat_stop_0" [ style=bold color="green" fontcolor="orange"] -+"cl_tomcat_stopped_0" [ style=bold color="green" fontcolor="orange"] -+"d_tomcat_stop_0 CSE-1" -> "all_stopped" [ style = bold] -+"d_tomcat_stop_0 CSE-1" -> "cl_tomcat_stopped_0" [ style = bold] -+"d_tomcat_stop_0 CSE-1" [ style=bold color="green" fontcolor="black"] -+} -diff --git a/pengine/test10/monitor-recovery.exp b/pengine/test10/monitor-recovery.exp -new file mode 100644 -index 0000000..546b9c6 ---- /dev/null -+++ b/pengine/test10/monitor-recovery.exp -@@ -0,0 +1,51 @@ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -diff --git a/pengine/test10/monitor-recovery.scores b/pengine/test10/monitor-recovery.scores -new file mode 100644 -index 0000000..93a0ed4 ---- /dev/null -+++ b/pengine/test10/monitor-recovery.scores -@@ -0,0 +1,21 @@ -+Allocation scores: -+clone_color: cl_tomcat allocation score on CSE-1: -INFINITY -+clone_color: cl_tomcat allocation score on CSE-2: 0 -+clone_color: d_tomcat:0 allocation score on CSE-1: -INFINITY -+clone_color: d_tomcat:0 allocation score on CSE-2: 0 -+clone_color: d_tomcat:1 allocation score on CSE-1: -INFINITY -+clone_color: d_tomcat:1 allocation score on CSE-2: 0 -+group_color: ip_11 allocation score on CSE-1: 0 -+group_color: ip_11 allocation score on CSE-2: 0 -+group_color: ip_19 allocation score on CSE-1: 0 -+group_color: ip_19 allocation score on CSE-2: 0 -+group_color: svc-cse allocation score on CSE-1: 0 -+group_color: svc-cse allocation score on CSE-2: 0 -+native_color: d_tomcat:0 allocation score on CSE-1: -INFINITY -+native_color: d_tomcat:0 allocation score on CSE-2: -INFINITY -+native_color: d_tomcat:1 allocation score on CSE-1: -INFINITY -+native_color: d_tomcat:1 allocation score on CSE-2: -INFINITY -+native_color: ip_11 allocation score on CSE-1: -INFINITY -+native_color: ip_11 allocation score on CSE-2: -INFINITY -+native_color: ip_19 allocation score on CSE-1: -INFINITY -+native_color: ip_19 allocation score on CSE-2: -INFINITY -diff --git a/pengine/test10/monitor-recovery.summary b/pengine/test10/monitor-recovery.summary -new file mode 100644 -index 0000000..cae6d07 ---- /dev/null -+++ b/pengine/test10/monitor-recovery.summary -@@ -0,0 +1,31 @@ -+ -+Current cluster status: -+Online: [ CSE-1 ] -+OFFLINE: [ CSE-2 ] -+ -+ Resource Group: svc-cse -+ ip_19 (ocf::heartbeat:IPaddr2): Stopped -+ ip_11 (ocf::heartbeat:IPaddr2): Stopped -+ Clone Set: cl_tomcat [d_tomcat] -+ Started: [ CSE-1 ] -+ Stopped: [ CSE-2 ] -+ -+Transition Summary: -+ * Stop d_tomcat:0 (CSE-1) -+ -+Executing cluster transition: -+ * Pseudo action: cl_tomcat_stop_0 -+ * Resource action: d_tomcat stop on CSE-1 -+ * Pseudo action: cl_tomcat_stopped_0 -+ * Pseudo action: all_stopped -+ -+Revised cluster status: -+Online: [ CSE-1 ] -+OFFLINE: [ CSE-2 ] -+ -+ Resource Group: svc-cse -+ ip_19 (ocf::heartbeat:IPaddr2): Stopped -+ ip_11 (ocf::heartbeat:IPaddr2): Stopped -+ Clone Set: cl_tomcat [d_tomcat] -+ Stopped: [ CSE-1 CSE-2 ] -+ -diff --git a/pengine/test10/monitor-recovery.xml b/pengine/test10/monitor-recovery.xml -new file mode 100644 -index 0000000..bc10ed1 ---- /dev/null -+++ b/pengine/test10/monitor-recovery.xml -@@ -0,0 +1,107 @@ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -diff --git a/pengine/test10/novell-239079.summary b/pengine/test10/novell-239079.summary -index 3745d37..1298acb 100644 ---- a/pengine/test10/novell-239079.summary -+++ b/pengine/test10/novell-239079.summary -@@ -4,7 +4,7 @@ Online: [ xen-1 xen-2 ] - - fs_1 (ocf::heartbeat:Filesystem): Stopped - Master/Slave Set: ms-drbd0 [drbd0] -- Stopped: [ drbd0:0 drbd0:1 ] -+ Stopped: [ xen-1 xen-2 ] - - Transition Summary: - * Start drbd0:0 (xen-1) -diff --git a/pengine/test10/novell-239082.summary b/pengine/test10/novell-239082.summary -index 80d2206..b2c28ca 100644 ---- a/pengine/test10/novell-239082.summary -+++ b/pengine/test10/novell-239082.summary -@@ -54,5 +54,5 @@ Online: [ xen-1 xen-2 ] - fs_1 (ocf::heartbeat:Filesystem): Started xen-2 - Master/Slave Set: ms-drbd0 [drbd0] - Masters: [ xen-2 ] -- Stopped: [ drbd0:1 ] -+ Stopped: [ xen-1 ] - -diff --git a/pengine/test10/novell-252693-2.summary b/pengine/test10/novell-252693-2.summary -index 5b314a2..73b95d7 100644 ---- a/pengine/test10/novell-252693-2.summary -+++ b/pengine/test10/novell-252693-2.summary -@@ -4,19 +4,19 @@ Online: [ node1 node2 ] - - Clone Set: stonithcloneset [stonithclone] - Started: [ node2 ] -- Stopped: [ stonithclone:1 ] -+ Stopped: [ node1 ] - Clone Set: evmsdcloneset [evmsdclone] - Started: [ node2 ] -- Stopped: [ evmsdclone:1 ] -+ Stopped: [ node1 ] - Clone Set: evmscloneset [evmsclone] - Started: [ node2 ] -- Stopped: [ evmsclone:1 ] -+ Stopped: [ node1 ] - Clone Set: imagestorecloneset [imagestoreclone] - Started: [ node2 ] -- Stopped: [ imagestoreclone:1 ] -+ Stopped: [ node1 ] - Clone Set: configstorecloneset [configstoreclone] - Started: [ node2 ] -- Stopped: [ configstoreclone:1 ] -+ Stopped: [ node1 ] - sles10 (ocf::heartbeat:Xen): Started node2 - - Transition Summary: -diff --git a/pengine/test10/novell-252693-3.summary b/pengine/test10/novell-252693-3.summary -index d8ddcd7..9d42229 100644 ---- a/pengine/test10/novell-252693-3.summary -+++ b/pengine/test10/novell-252693-3.summary -@@ -4,19 +4,19 @@ Online: [ node1 node2 ] - - Clone Set: stonithcloneset [stonithclone] - Started: [ node2 ] -- Stopped: [ stonithclone:1 ] -+ Stopped: [ node1 ] - Clone Set: evmsdcloneset [evmsdclone] - Started: [ node2 ] -- Stopped: [ evmsdclone:1 ] -+ Stopped: [ node1 ] - Clone Set: evmscloneset [evmsclone] - Started: [ node2 ] -- Stopped: [ evmsclone:1 ] -+ Stopped: [ node1 ] - Clone Set: imagestorecloneset [imagestoreclone] -- imagestoreclone:0 (ocf::heartbeat:Filesystem): Started node2 FAILED -- Stopped: [ imagestoreclone:1 ] -+ imagestoreclone (ocf::heartbeat:Filesystem): Started node2 FAILED -+ Stopped: [ node1 ] - Clone Set: configstorecloneset [configstoreclone] - Started: [ node2 ] -- Stopped: [ configstoreclone:1 ] -+ Stopped: [ node1 ] - sles10 (ocf::heartbeat:Xen): Started node2 - - Transition Summary: -diff --git a/pengine/test10/novell-252693.summary b/pengine/test10/novell-252693.summary -index 47087bb..8207d41 100644 ---- a/pengine/test10/novell-252693.summary -+++ b/pengine/test10/novell-252693.summary -@@ -72,18 +72,18 @@ Online: [ node1 node2 ] - - Clone Set: stonithcloneset [stonithclone] - Started: [ node2 ] -- Stopped: [ stonithclone:1 ] -+ Stopped: [ node1 ] - Clone Set: evmsdcloneset [evmsdclone] - Started: [ node2 ] -- Stopped: [ evmsdclone:1 ] -+ Stopped: [ node1 ] - Clone Set: evmscloneset [evmsclone] - Started: [ node2 ] -- Stopped: [ evmsclone:1 ] -+ Stopped: [ node1 ] - Clone Set: imagestorecloneset [imagestoreclone] - Started: [ node2 ] -- Stopped: [ imagestoreclone:1 ] -+ Stopped: [ node1 ] - Clone Set: configstorecloneset [configstoreclone] - Started: [ node2 ] -- Stopped: [ configstoreclone:1 ] -+ Stopped: [ node1 ] - sles10 (ocf::heartbeat:Xen): Started node2 - -diff --git a/pengine/test10/obsolete-lrm-resource.summary b/pengine/test10/obsolete-lrm-resource.summary -index f45bdd5..237c3b8 100644 ---- a/pengine/test10/obsolete-lrm-resource.summary -+++ b/pengine/test10/obsolete-lrm-resource.summary -@@ -3,7 +3,7 @@ Current cluster status: - Online: [ yingying.site ] - - Clone Set: rsc1 [rsc1_child] -- Stopped: [ rsc1_child:0 ] -+ Stopped: [ yingying.site ] - - Transition Summary: - * Start rsc1_child:0 (yingying.site) -diff --git a/pengine/test10/one-or-more-5.exp b/pengine/test10/one-or-more-5.exp -index ed11c12..67d1231 100644 ---- a/pengine/test10/one-or-more-5.exp -+++ b/pengine/test10/one-or-more-5.exp -@@ -1,7 +1,7 @@ - - - -- -+ - - - -@@ -11,13 +11,13 @@ - - - -- -+ - - - - - -- -+ - - - -@@ -26,20 +26,20 @@ - - - -- -+ - - - - - - -- -+ - - - - - -- -+ - - - -@@ -48,7 +48,7 @@ - - - -- -+ - - - -@@ -57,7 +57,7 @@ - - - -- -+ - - - -@@ -66,23 +66,23 @@ - - - -- -+ - - - - - - -- -+ - - -- -+ - - - - - -- -+ - - - -@@ -91,7 +91,7 @@ - - - -- -+ - - - -@@ -101,13 +101,13 @@ - - - -- -+ - -
- - - -- -+ - - - -@@ -116,40 +116,40 @@ - - - -- -+ - - - - - -- -+ - - -- -+ - - -- -+ - - -- -+ - - -- -+ - - -- -+ - - - - - -- -+ - - - - - -- -+ - - - -@@ -161,7 +161,7 @@ - - - -- -+ - - - -@@ -173,7 +173,7 @@ - - - -- -+ - - - -diff --git a/pengine/test10/order-clone.summary b/pengine/test10/order-clone.summary -index 4af7b7e..cb61fb0 100644 ---- a/pengine/test10/order-clone.summary -+++ b/pengine/test10/order-clone.summary -@@ -4,17 +4,17 @@ Online: [ hex-0 hex-7 hex-8 hex-9 ] - - fencing-sbd (stonith:external/sbd): Stopped - Clone Set: o2cb-clone [o2cb] -- Stopped: [ o2cb:0 o2cb:1 o2cb:2 o2cb:3 ] -+ Stopped: [ hex-0 hex-7 hex-8 hex-9 ] - Clone Set: vg1-clone [vg1] -- Stopped: [ vg1:0 vg1:1 vg1:2 vg1:3 ] -+ Stopped: [ hex-0 hex-7 hex-8 hex-9 ] - Clone Set: fs2-clone [ocfs2-2] -- Stopped: [ ocfs2-2:0 ocfs2-2:1 ocfs2-2:2 ocfs2-2:3 ] -+ Stopped: [ hex-0 hex-7 hex-8 hex-9 ] - Clone Set: fs1-clone [ocfs2-1] -- Stopped: [ ocfs2-1:0 ocfs2-1:1 ocfs2-1:2 ocfs2-1:3 ] -+ Stopped: [ hex-0 hex-7 hex-8 hex-9 ] - Clone Set: dlm-clone [dlm] -- Stopped: [ dlm:0 dlm:1 dlm:2 dlm:3 ] -+ Stopped: [ hex-0 hex-7 hex-8 hex-9 ] - Clone Set: clvm-clone [clvm] -- Stopped: [ clvm:0 clvm:1 clvm:2 clvm:3 ] -+ Stopped: [ hex-0 hex-7 hex-8 hex-9 ] - - Transition Summary: - * Start fencing-sbd (hex-0) -@@ -27,15 +27,15 @@ Online: [ hex-0 hex-7 hex-8 hex-9 ] - - fencing-sbd (stonith:external/sbd): Started hex-0 - Clone Set: o2cb-clone [o2cb] -- Stopped: [ o2cb:0 o2cb:1 o2cb:2 o2cb:3 ] -+ Stopped: [ hex-0 hex-7 hex-8 hex-9 ] - Clone Set: vg1-clone [vg1] -- Stopped: [ vg1:0 vg1:1 vg1:2 vg1:3 ] -+ Stopped: [ hex-0 hex-7 hex-8 hex-9 ] - Clone Set: fs2-clone [ocfs2-2] -- Stopped: [ ocfs2-2:0 ocfs2-2:1 ocfs2-2:2 ocfs2-2:3 ] -+ Stopped: [ hex-0 hex-7 hex-8 hex-9 ] - Clone Set: fs1-clone [ocfs2-1] -- Stopped: [ ocfs2-1:0 ocfs2-1:1 ocfs2-1:2 ocfs2-1:3 ] -+ Stopped: [ hex-0 hex-7 hex-8 hex-9 ] - Clone Set: dlm-clone [dlm] -- Stopped: [ dlm:0 dlm:1 dlm:2 dlm:3 ] -+ Stopped: [ hex-0 hex-7 hex-8 hex-9 ] - Clone Set: clvm-clone [clvm] -- Stopped: [ clvm:0 clvm:1 clvm:2 clvm:3 ] -+ Stopped: [ hex-0 hex-7 hex-8 hex-9 ] - -diff --git a/pengine/test10/order7.exp b/pengine/test10/order7.exp -index f8594a9..cc7cf86 100644 ---- a/pengine/test10/order7.exp -+++ b/pengine/test10/order7.exp -@@ -45,7 +45,7 @@ - - - -- -+ - - - -diff --git a/pengine/test10/order_constraint_stops_master.summary b/pengine/test10/order_constraint_stops_master.summary -index cbbe157..8170d30 100644 ---- a/pengine/test10/order_constraint_stops_master.summary -+++ b/pengine/test10/order_constraint_stops_master.summary -@@ -4,6 +4,7 @@ Online: [ fc16-builder fc16-builder2 ] - - Master/Slave Set: MASTER_RSC_A [NATIVE_RSC_A] - Masters: [ fc16-builder ] -+ Stopped: [ fc16-builder2 ] - NATIVE_RSC_B (ocf::pacemaker:Dummy): Started fc16-builder2 - - Transition Summary: -@@ -35,6 +36,6 @@ Revised cluster status: - Online: [ fc16-builder fc16-builder2 ] - - Master/Slave Set: MASTER_RSC_A [NATIVE_RSC_A] -- Stopped: [ NATIVE_RSC_A:0 ] -+ Stopped: [ fc16-builder fc16-builder2 ] - NATIVE_RSC_B (ocf::pacemaker:Dummy): Stopped - -diff --git a/pengine/test10/order_constraint_stops_slave.summary b/pengine/test10/order_constraint_stops_slave.summary -index 14478b0..5a67aee 100644 ---- a/pengine/test10/order_constraint_stops_slave.summary -+++ b/pengine/test10/order_constraint_stops_slave.summary -@@ -5,6 +5,7 @@ OFFLINE: [ fc16-builder2 ] - - Master/Slave Set: MASTER_RSC_A [NATIVE_RSC_A] - Slaves: [ fc16-builder ] -+ Stopped: [ fc16-builder2 ] - NATIVE_RSC_B (ocf::pacemaker:Dummy): Started fc16-builder - - Transition Summary: -@@ -28,6 +29,6 @@ Online: [ fc16-builder ] - OFFLINE: [ fc16-builder2 ] - - Master/Slave Set: MASTER_RSC_A [NATIVE_RSC_A] -- Stopped: [ NATIVE_RSC_A:0 ] -+ Stopped: [ fc16-builder fc16-builder2 ] - NATIVE_RSC_B (ocf::pacemaker:Dummy): Stopped - -diff --git a/pengine/test10/orphan-2.summary b/pengine/test10/orphan-2.summary -index e20bb18..dbbdcd0 100644 ---- a/pengine/test10/orphan-2.summary -+++ b/pengine/test10/orphan-2.summary -@@ -27,8 +27,8 @@ Executing cluster transition: - * Resource action: rsc_c001n01 monitor on c001n08 - * Resource action: rsc_c001n01 monitor on c001n03 - * Resource action: rsc_c001n01 monitor on c001n02 -- * Cluster action: clear_failcount on c001n08 -- * Cluster action: clear_failcount on c001n02 -+ * Cluster action: clear_failcount for rsc_c001n08 on c001n08 -+ * Cluster action: clear_failcount for rsc_c001n08 on c001n02 - * Pseudo action: probe_complete - * Resource action: rsc_c001n08 stop on c001n08 - * Resource action: rsc_c001n08 delete on c001n08 -diff --git a/pengine/test10/params-6.summary b/pengine/test10/params-6.summary -index 05367dd..78f98c0 100644 ---- a/pengine/test10/params-6.summary -+++ b/pengine/test10/params-6.summary -@@ -14,83 +14,73 @@ Online: [ mgmt01 v03-a v03-b ] - stonith-v03-d (stonith:fence_ipmilan): Stopped - Clone Set: cl-clvmd [clvmd] - Started: [ mgmt01 v03-a v03-b ] -- Stopped: [ clvmd:3 clvmd:4 clvmd:5 clvmd:6 clvmd:7 clvmd:8 ] - Clone Set: cl-dlm [dlm] - Started: [ mgmt01 v03-a v03-b ] -- Stopped: [ dlm:3 dlm:4 dlm:5 dlm:6 dlm:7 dlm:8 ] - Clone Set: cl-iscsid [iscsid] - Started: [ mgmt01 v03-a v03-b ] -- Stopped: [ iscsid:3 iscsid:4 iscsid:5 iscsid:6 iscsid:7 iscsid:8 ] - Clone Set: cl-libvirtd [libvirtd] - Started: [ v03-a v03-b ] -- Stopped: [ libvirtd:2 libvirtd:3 libvirtd:4 libvirtd:5 libvirtd:6 libvirtd:7 ] -+ Stopped: [ mgmt01 ] - Clone Set: cl-multipathd [multipathd] - Started: [ mgmt01 v03-a v03-b ] -- Stopped: [ multipathd:3 multipathd:4 multipathd:5 multipathd:6 multipathd:7 multipathd:8 ] - Clone Set: cl-node-params [node-params] - Started: [ v03-a v03-b ] -- Stopped: [ node-params:2 node-params:3 node-params:4 node-params:5 node-params:6 node-params:7 ] -+ Stopped: [ mgmt01 ] - Clone Set: cl-vlan1-if [vlan1-if] - Started: [ v03-a v03-b ] -- Stopped: [ vlan1-if:2 vlan1-if:3 vlan1-if:4 vlan1-if:5 vlan1-if:6 vlan1-if:7 ] -+ Stopped: [ mgmt01 ] - Clone Set: cl-vlan101-if [vlan101-if] - Started: [ v03-a v03-b ] -- Stopped: [ vlan101-if:2 vlan101-if:3 vlan101-if:4 vlan101-if:5 vlan101-if:6 vlan101-if:7 ] -+ Stopped: [ mgmt01 ] - Clone Set: cl-vlan102-if [vlan102-if] - Started: [ v03-a v03-b ] -- Stopped: [ vlan102-if:2 vlan102-if:3 vlan102-if:4 vlan102-if:5 vlan102-if:6 vlan102-if:7 ] -+ Stopped: [ mgmt01 ] - Clone Set: cl-vlan103-if [vlan103-if] - Started: [ v03-a v03-b ] -- Stopped: [ vlan103-if:2 vlan103-if:3 vlan103-if:4 vlan103-if:5 vlan103-if:6 vlan103-if:7 ] -+ Stopped: [ mgmt01 ] - Clone Set: cl-vlan104-if [vlan104-if] - Started: [ v03-a v03-b ] -- Stopped: [ vlan104-if:2 vlan104-if:3 vlan104-if:4 vlan104-if:5 vlan104-if:6 vlan104-if:7 ] -+ Stopped: [ mgmt01 ] - Clone Set: cl-vlan3-if [vlan3-if] - Started: [ v03-a v03-b ] -- Stopped: [ vlan3-if:2 vlan3-if:3 vlan3-if:4 vlan3-if:5 vlan3-if:6 vlan3-if:7 ] -+ Stopped: [ mgmt01 ] - Clone Set: cl-vlan4-if [vlan4-if] - Started: [ v03-a v03-b ] -- Stopped: [ vlan4-if:2 vlan4-if:3 vlan4-if:4 vlan4-if:5 vlan4-if:6 vlan4-if:7 ] -+ Stopped: [ mgmt01 ] - Clone Set: cl-vlan5-if [vlan5-if] - Started: [ v03-a v03-b ] -- Stopped: [ vlan5-if:2 vlan5-if:3 vlan5-if:4 vlan5-if:5 vlan5-if:6 vlan5-if:7 ] -+ Stopped: [ mgmt01 ] - Clone Set: cl-vlan900-if [vlan900-if] - Started: [ v03-a v03-b ] -- Stopped: [ vlan900-if:2 vlan900-if:3 vlan900-if:4 vlan900-if:5 vlan900-if:6 vlan900-if:7 ] -+ Stopped: [ mgmt01 ] - Clone Set: cl-vlan909-if [vlan909-if] - Started: [ v03-a v03-b ] -- Stopped: [ vlan909-if:2 vlan909-if:3 vlan909-if:4 vlan909-if:5 vlan909-if:6 vlan909-if:7 ] -+ Stopped: [ mgmt01 ] - Clone Set: cl-libvirt-images-fs [libvirt-images-fs] - Started: [ mgmt01 v03-a v03-b ] -- Stopped: [ libvirt-images-fs:3 libvirt-images-fs:4 libvirt-images-fs:5 libvirt-images-fs:6 libvirt-images-fs:7 libvirt-images-fs:8 ] - Clone Set: cl-libvirt-install-fs [libvirt-install-fs] - Started: [ mgmt01 v03-a v03-b ] -- Stopped: [ libvirt-install-fs:3 libvirt-install-fs:4 libvirt-install-fs:5 libvirt-install-fs:6 libvirt-install-fs:7 libvirt-install-fs:8 ] - Clone Set: cl-vds-ok-pool-0-iscsi [vds-ok-pool-0-iscsi] - Started: [ mgmt01 v03-a v03-b ] -- Stopped: [ vds-ok-pool-0-iscsi:3 vds-ok-pool-0-iscsi:4 vds-ok-pool-0-iscsi:5 vds-ok-pool-0-iscsi:6 vds-ok-pool-0-iscsi:7 vds-ok-pool-0-iscsi:8 ] - Clone Set: cl-vds-ok-pool-0-vg [vds-ok-pool-0-vg] - Started: [ mgmt01 v03-a v03-b ] -- Stopped: [ vds-ok-pool-0-vg:3 vds-ok-pool-0-vg:4 vds-ok-pool-0-vg:5 vds-ok-pool-0-vg:6 vds-ok-pool-0-vg:7 vds-ok-pool-0-vg:8 ] - Clone Set: cl-vds-ok-pool-1-iscsi [vds-ok-pool-1-iscsi] - Started: [ mgmt01 v03-a v03-b ] -- Stopped: [ vds-ok-pool-1-iscsi:3 vds-ok-pool-1-iscsi:4 vds-ok-pool-1-iscsi:5 vds-ok-pool-1-iscsi:6 vds-ok-pool-1-iscsi:7 vds-ok-pool-1-iscsi:8 ] - Clone Set: cl-vds-ok-pool-1-vg [vds-ok-pool-1-vg] - Started: [ mgmt01 v03-a v03-b ] -- Stopped: [ vds-ok-pool-1-vg:3 vds-ok-pool-1-vg:4 vds-ok-pool-1-vg:5 vds-ok-pool-1-vg:6 vds-ok-pool-1-vg:7 vds-ok-pool-1-vg:8 ] - Clone Set: cl-libvirt-images-pool [libvirt-images-pool] - Started: [ v03-a v03-b ] -- Stopped: [ libvirt-images-pool:2 libvirt-images-pool:3 libvirt-images-pool:4 libvirt-images-pool:5 libvirt-images-pool:6 libvirt-images-pool:7 ] -+ Stopped: [ mgmt01 ] - Clone Set: cl-vds-ok-pool-0-pool [vds-ok-pool-0-pool] - Started: [ v03-a v03-b ] -- Stopped: [ vds-ok-pool-0-pool:2 vds-ok-pool-0-pool:3 vds-ok-pool-0-pool:4 vds-ok-pool-0-pool:5 vds-ok-pool-0-pool:6 vds-ok-pool-0-pool:7 ] -+ Stopped: [ mgmt01 ] - Clone Set: cl-vds-ok-pool-1-pool [vds-ok-pool-1-pool] - Started: [ v03-a v03-b ] -- Stopped: [ vds-ok-pool-1-pool:2 vds-ok-pool-1-pool:3 vds-ok-pool-1-pool:4 vds-ok-pool-1-pool:5 vds-ok-pool-1-pool:6 vds-ok-pool-1-pool:7 ] -+ Stopped: [ mgmt01 ] - git.vds-ok.com-vm (ocf::vds-ok:VirtualDomain): Stopped - Clone Set: cl-libvirt-qpid [libvirt-qpid] - Started: [ v03-a v03-b ] -- Stopped: [ libvirt-qpid:2 libvirt-qpid:3 libvirt-qpid:4 libvirt-qpid:5 libvirt-qpid:6 libvirt-qpid:7 ] -+ Stopped: [ mgmt01 ] - vd01-a.cdev.ttc.prague.cz.vds-ok.com-vm (ocf::vds-ok:VirtualDomain): Started v03-b - vd01-b.cdev.ttc.prague.cz.vds-ok.com-vm (ocf::vds-ok:VirtualDomain): Started v03-b - vd01-c.cdev.ttc.prague.cz.vds-ok.com-vm (ocf::vds-ok:VirtualDomain): Started v03-a -@@ -112,7 +102,7 @@ Online: [ mgmt01 v03-a v03-b ] - zakaz.transferrus.ru-vm (ocf::vds-ok:VirtualDomain): Stopped - Clone Set: cl-vlan200-if [vlan200-if] - Started: [ v03-a v03-b ] -- Stopped: [ vlan200-if:2 vlan200-if:3 vlan200-if:4 vlan200-if:5 vlan200-if:6 vlan200-if:7 ] -+ Stopped: [ mgmt01 ] - anbriz-gw-vm (ocf::vds-ok:VirtualDomain): Stopped - anbriz-work-vm (ocf::vds-ok:VirtualDomain): Stopped - lenny-x32-devel-vm (ocf::vds-ok:VirtualDomain): Started v03-a -@@ -187,7 +177,7 @@ Online: [ mgmt01 v03-a v03-b ] - c5-x64-devel.vds-ok.com-vm (ocf::vds-ok:VirtualDomain): Started v03-a - Clone Set: cl-mcast-test-net [mcast-test-net] - Started: [ v03-a v03-b ] -- Stopped: [ mcast-test-net:2 mcast-test-net:3 mcast-test-net:4 mcast-test-net:5 mcast-test-net:6 mcast-test-net:7 ] -+ Stopped: [ mgmt01 ] - dist.fly-uni.org-vm (ocf::vds-ok:VirtualDomain): Stopped - - Transition Summary: -@@ -217,83 +207,73 @@ Online: [ mgmt01 v03-a v03-b ] - stonith-v03-d (stonith:fence_ipmilan): Stopped - Clone Set: cl-clvmd [clvmd] - Started: [ mgmt01 v03-a v03-b ] -- Stopped: [ clvmd:3 clvmd:4 clvmd:5 clvmd:6 clvmd:7 clvmd:8 ] - Clone Set: cl-dlm [dlm] - Started: [ mgmt01 v03-a v03-b ] -- Stopped: [ dlm:3 dlm:4 dlm:5 dlm:6 dlm:7 dlm:8 ] - Clone Set: cl-iscsid [iscsid] - Started: [ mgmt01 v03-a v03-b ] -- Stopped: [ iscsid:3 iscsid:4 iscsid:5 iscsid:6 iscsid:7 iscsid:8 ] - Clone Set: cl-libvirtd [libvirtd] - Started: [ v03-a v03-b ] -- Stopped: [ libvirtd:2 libvirtd:3 libvirtd:4 libvirtd:5 libvirtd:6 libvirtd:7 ] -+ Stopped: [ mgmt01 ] - Clone Set: cl-multipathd [multipathd] - Started: [ mgmt01 v03-a v03-b ] -- Stopped: [ multipathd:3 multipathd:4 multipathd:5 multipathd:6 multipathd:7 multipathd:8 ] - Clone Set: cl-node-params [node-params] - Started: [ v03-a v03-b ] -- Stopped: [ node-params:2 node-params:3 node-params:4 node-params:5 node-params:6 node-params:7 ] -+ Stopped: [ mgmt01 ] - Clone Set: cl-vlan1-if [vlan1-if] - Started: [ v03-a v03-b ] -- Stopped: [ vlan1-if:2 vlan1-if:3 vlan1-if:4 vlan1-if:5 vlan1-if:6 vlan1-if:7 ] -+ Stopped: [ mgmt01 ] - Clone Set: cl-vlan101-if [vlan101-if] - Started: [ v03-a v03-b ] -- Stopped: [ vlan101-if:2 vlan101-if:3 vlan101-if:4 vlan101-if:5 vlan101-if:6 vlan101-if:7 ] -+ Stopped: [ mgmt01 ] - Clone Set: cl-vlan102-if [vlan102-if] - Started: [ v03-a v03-b ] -- Stopped: [ vlan102-if:2 vlan102-if:3 vlan102-if:4 vlan102-if:5 vlan102-if:6 vlan102-if:7 ] -+ Stopped: [ mgmt01 ] - Clone Set: cl-vlan103-if [vlan103-if] - Started: [ v03-a v03-b ] -- Stopped: [ vlan103-if:2 vlan103-if:3 vlan103-if:4 vlan103-if:5 vlan103-if:6 vlan103-if:7 ] -+ Stopped: [ mgmt01 ] - Clone Set: cl-vlan104-if [vlan104-if] - Started: [ v03-a v03-b ] -- Stopped: [ vlan104-if:2 vlan104-if:3 vlan104-if:4 vlan104-if:5 vlan104-if:6 vlan104-if:7 ] -+ Stopped: [ mgmt01 ] - Clone Set: cl-vlan3-if [vlan3-if] - Started: [ v03-a v03-b ] -- Stopped: [ vlan3-if:2 vlan3-if:3 vlan3-if:4 vlan3-if:5 vlan3-if:6 vlan3-if:7 ] -+ Stopped: [ mgmt01 ] - Clone Set: cl-vlan4-if [vlan4-if] - Started: [ v03-a v03-b ] -- Stopped: [ vlan4-if:2 vlan4-if:3 vlan4-if:4 vlan4-if:5 vlan4-if:6 vlan4-if:7 ] -+ Stopped: [ mgmt01 ] - Clone Set: cl-vlan5-if [vlan5-if] - Started: [ v03-a v03-b ] -- Stopped: [ vlan5-if:2 vlan5-if:3 vlan5-if:4 vlan5-if:5 vlan5-if:6 vlan5-if:7 ] -+ Stopped: [ mgmt01 ] - Clone Set: cl-vlan900-if [vlan900-if] - Started: [ v03-a v03-b ] -- Stopped: [ vlan900-if:2 vlan900-if:3 vlan900-if:4 vlan900-if:5 vlan900-if:6 vlan900-if:7 ] -+ Stopped: [ mgmt01 ] - Clone Set: cl-vlan909-if [vlan909-if] - Started: [ v03-a v03-b ] -- Stopped: [ vlan909-if:2 vlan909-if:3 vlan909-if:4 vlan909-if:5 vlan909-if:6 vlan909-if:7 ] -+ Stopped: [ mgmt01 ] - Clone Set: cl-libvirt-images-fs [libvirt-images-fs] - Started: [ mgmt01 v03-a v03-b ] -- Stopped: [ libvirt-images-fs:3 libvirt-images-fs:4 libvirt-images-fs:5 libvirt-images-fs:6 libvirt-images-fs:7 libvirt-images-fs:8 ] - Clone Set: cl-libvirt-install-fs [libvirt-install-fs] - Started: [ mgmt01 v03-a v03-b ] -- Stopped: [ libvirt-install-fs:3 libvirt-install-fs:4 libvirt-install-fs:5 libvirt-install-fs:6 libvirt-install-fs:7 libvirt-install-fs:8 ] - Clone Set: cl-vds-ok-pool-0-iscsi [vds-ok-pool-0-iscsi] - Started: [ mgmt01 v03-a v03-b ] -- Stopped: [ vds-ok-pool-0-iscsi:3 vds-ok-pool-0-iscsi:4 vds-ok-pool-0-iscsi:5 vds-ok-pool-0-iscsi:6 vds-ok-pool-0-iscsi:7 vds-ok-pool-0-iscsi:8 ] - Clone Set: cl-vds-ok-pool-0-vg [vds-ok-pool-0-vg] - Started: [ mgmt01 v03-a v03-b ] -- Stopped: [ vds-ok-pool-0-vg:3 vds-ok-pool-0-vg:4 vds-ok-pool-0-vg:5 vds-ok-pool-0-vg:6 vds-ok-pool-0-vg:7 vds-ok-pool-0-vg:8 ] - Clone Set: cl-vds-ok-pool-1-iscsi [vds-ok-pool-1-iscsi] - Started: [ mgmt01 v03-a v03-b ] -- Stopped: [ vds-ok-pool-1-iscsi:3 vds-ok-pool-1-iscsi:4 vds-ok-pool-1-iscsi:5 vds-ok-pool-1-iscsi:6 vds-ok-pool-1-iscsi:7 vds-ok-pool-1-iscsi:8 ] - Clone Set: cl-vds-ok-pool-1-vg [vds-ok-pool-1-vg] - Started: [ mgmt01 v03-a v03-b ] -- Stopped: [ vds-ok-pool-1-vg:3 vds-ok-pool-1-vg:4 vds-ok-pool-1-vg:5 vds-ok-pool-1-vg:6 vds-ok-pool-1-vg:7 vds-ok-pool-1-vg:8 ] - Clone Set: cl-libvirt-images-pool [libvirt-images-pool] - Started: [ v03-a v03-b ] -- Stopped: [ libvirt-images-pool:2 libvirt-images-pool:3 libvirt-images-pool:4 libvirt-images-pool:5 libvirt-images-pool:6 libvirt-images-pool:7 ] -+ Stopped: [ mgmt01 ] - Clone Set: cl-vds-ok-pool-0-pool [vds-ok-pool-0-pool] - Started: [ v03-a v03-b ] -- Stopped: [ vds-ok-pool-0-pool:2 vds-ok-pool-0-pool:3 vds-ok-pool-0-pool:4 vds-ok-pool-0-pool:5 vds-ok-pool-0-pool:6 vds-ok-pool-0-pool:7 ] -+ Stopped: [ mgmt01 ] - Clone Set: cl-vds-ok-pool-1-pool [vds-ok-pool-1-pool] - Started: [ v03-a v03-b ] -- Stopped: [ vds-ok-pool-1-pool:2 vds-ok-pool-1-pool:3 vds-ok-pool-1-pool:4 vds-ok-pool-1-pool:5 vds-ok-pool-1-pool:6 vds-ok-pool-1-pool:7 ] -+ Stopped: [ mgmt01 ] - git.vds-ok.com-vm (ocf::vds-ok:VirtualDomain): Stopped - Clone Set: cl-libvirt-qpid [libvirt-qpid] - Started: [ v03-a v03-b ] -- Stopped: [ libvirt-qpid:2 libvirt-qpid:3 libvirt-qpid:4 libvirt-qpid:5 libvirt-qpid:6 libvirt-qpid:7 ] -+ Stopped: [ mgmt01 ] - vd01-a.cdev.ttc.prague.cz.vds-ok.com-vm (ocf::vds-ok:VirtualDomain): Started v03-b - vd01-b.cdev.ttc.prague.cz.vds-ok.com-vm (ocf::vds-ok:VirtualDomain): Started v03-b - vd01-c.cdev.ttc.prague.cz.vds-ok.com-vm (ocf::vds-ok:VirtualDomain): Started v03-a -@@ -315,7 +295,7 @@ Online: [ mgmt01 v03-a v03-b ] - zakaz.transferrus.ru-vm (ocf::vds-ok:VirtualDomain): Stopped - Clone Set: cl-vlan200-if [vlan200-if] - Started: [ v03-a v03-b ] -- Stopped: [ vlan200-if:2 vlan200-if:3 vlan200-if:4 vlan200-if:5 vlan200-if:6 vlan200-if:7 ] -+ Stopped: [ mgmt01 ] - anbriz-gw-vm (ocf::vds-ok:VirtualDomain): Stopped - anbriz-work-vm (ocf::vds-ok:VirtualDomain): Stopped - lenny-x32-devel-vm (ocf::vds-ok:VirtualDomain): Started v03-a -@@ -390,6 +370,6 @@ Online: [ mgmt01 v03-a v03-b ] - c5-x64-devel.vds-ok.com-vm (ocf::vds-ok:VirtualDomain): Started v03-a - Clone Set: cl-mcast-test-net [mcast-test-net] - Started: [ v03-a v03-b ] -- Stopped: [ mcast-test-net:2 mcast-test-net:3 mcast-test-net:4 mcast-test-net:5 mcast-test-net:6 mcast-test-net:7 ] -+ Stopped: [ mgmt01 ] - dist.fly-uni.org-vm (ocf::vds-ok:VirtualDomain): Stopped - -diff --git a/pengine/test10/per-node-attrs.dot b/pengine/test10/per-node-attrs.dot -new file mode 100644 -index 0000000..6dca5e0 ---- /dev/null -+++ b/pengine/test10/per-node-attrs.dot -@@ -0,0 +1,17 @@ -+digraph "g" { -+"dummy_monitor_0 pcmk-1" -> "probe_complete pcmk-1" [ style = bold] -+"dummy_monitor_0 pcmk-1" [ style=bold color="green" fontcolor="black"] -+"dummy_monitor_0 pcmk-2" -> "probe_complete pcmk-2" [ style = bold] -+"dummy_monitor_0 pcmk-2" [ style=bold color="green" fontcolor="black"] -+"dummy_monitor_0 pcmk-3" -> "probe_complete pcmk-3" [ style = bold] -+"dummy_monitor_0 pcmk-3" [ style=bold color="green" fontcolor="black"] -+"dummy_start_0 pcmk-1" [ style=bold color="green" fontcolor="black"] -+"probe_complete pcmk-1" -> "probe_complete" [ style = bold] -+"probe_complete pcmk-1" [ style=bold color="green" fontcolor="black"] -+"probe_complete pcmk-2" -> "probe_complete" [ style = bold] -+"probe_complete pcmk-2" [ style=bold color="green" fontcolor="black"] -+"probe_complete pcmk-3" -> "probe_complete" [ style = bold] -+"probe_complete pcmk-3" [ style=bold color="green" fontcolor="black"] -+"probe_complete" -> "dummy_start_0 pcmk-1" [ style = bold] -+"probe_complete" [ style=bold color="green" fontcolor="orange"] -+} -diff --git a/pengine/test10/per-node-attrs.exp b/pengine/test10/per-node-attrs.exp -new file mode 100644 -index 0000000..1e38557 ---- /dev/null -+++ b/pengine/test10/per-node-attrs.exp -@@ -0,0 +1,97 @@ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -diff --git a/pengine/test10/per-node-attrs.scores b/pengine/test10/per-node-attrs.scores -new file mode 100644 -index 0000000..14a57ff ---- /dev/null -+++ b/pengine/test10/per-node-attrs.scores -@@ -0,0 +1,4 @@ -+Allocation scores: -+native_color: dummy allocation score on pcmk-1: 0 -+native_color: dummy allocation score on pcmk-2: 0 -+native_color: dummy allocation score on pcmk-3: 0 -diff --git a/pengine/test10/per-node-attrs.summary b/pengine/test10/per-node-attrs.summary -new file mode 100644 -index 0000000..420f2d2 ---- /dev/null -+++ b/pengine/test10/per-node-attrs.summary -@@ -0,0 +1,21 @@ -+ -+Current cluster status: -+Online: [ pcmk-1 pcmk-2 pcmk-3 ] -+ -+ dummy (ocf::heartbeat:IPaddr2): Stopped -+ -+Transition Summary: -+ * Start dummy (pcmk-1) -+ -+Executing cluster transition: -+ * Resource action: dummy monitor on pcmk-3 -+ * Resource action: dummy monitor on pcmk-2 -+ * Resource action: dummy monitor on pcmk-1 -+ * Pseudo action: probe_complete -+ * Resource action: dummy start on pcmk-1 -+ -+Revised cluster status: -+Online: [ pcmk-1 pcmk-2 pcmk-3 ] -+ -+ dummy (ocf::heartbeat:IPaddr2): Started pcmk-1 -+ -diff --git a/pengine/test10/per-node-attrs.xml b/pengine/test10/per-node-attrs.xml -new file mode 100644 -index 0000000..928debe ---- /dev/null -+++ b/pengine/test10/per-node-attrs.xml -@@ -0,0 +1,54 @@ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -diff --git a/pengine/test10/probe-0.summary b/pengine/test10/probe-0.summary -index 3df06f6..d1340c5 100644 ---- a/pengine/test10/probe-0.summary -+++ b/pengine/test10/probe-0.summary -@@ -7,7 +7,7 @@ Online: [ x32c47 x32c48 ] - Clone Set: imagestorecloneset [imagestoreclone] - Started: [ x32c47 x32c48 ] - Clone Set: configstorecloneset [configstoreclone] -- Stopped: [ configstoreclone:0 configstoreclone:1 ] -+ Stopped: [ x32c47 x32c48 ] - - Transition Summary: - * Start configstoreclone:0 (x32c47) -diff --git a/pengine/test10/probe-2.summary b/pengine/test10/probe-2.summary -index 39fb48c..e8a2269 100644 ---- a/pengine/test10/probe-2.summary -+++ b/pengine/test10/probe-2.summary -@@ -136,23 +136,23 @@ Online: [ wc01 ] - intip_nfs (ocf::heartbeat:IPaddr2): Started wc01 - Master/Slave Set: ms_drbd_mysql [drbd_mysql] - Masters: [ wc01 ] -- Stopped: [ drbd_mysql:1 ] -+ Stopped: [ wc02 ] - Resource Group: group_mysql - fs_mysql (ocf::heartbeat:Filesystem): Started wc01 - intip_sql (ocf::heartbeat:IPaddr2): Started wc01 - mysql-server (ocf::heartbeat:mysql): Started wc01 - Master/Slave Set: ms_drbd_www [drbd_www] - Masters: [ wc01 ] -- Stopped: [ drbd_www:1 ] -+ Stopped: [ wc02 ] - Clone Set: clone_nfs-common [group_nfs-common] - Started: [ wc01 ] -- Stopped: [ group_nfs-common:1 ] -+ Stopped: [ wc02 ] - Clone Set: clone_mysql-proxy [group_mysql-proxy] - Started: [ wc01 ] -- Stopped: [ group_mysql-proxy:1 ] -+ Stopped: [ wc02 ] - Clone Set: clone_webservice [group_webservice] - Started: [ wc01 ] -- Stopped: [ group_webservice:1 ] -+ Stopped: [ wc02 ] - Resource Group: group_ftpd - extip_ftp (ocf::heartbeat:IPaddr2): Started wc01 - pure-ftpd (ocf::heartbeat:Pure-FTPd): Started wc01 -diff --git a/pengine/test10/probe-3.scores b/pengine/test10/probe-3.scores -index 794ed4a..277670c 100644 ---- a/pengine/test10/probe-3.scores -+++ b/pengine/test10/probe-3.scores -@@ -44,15 +44,15 @@ clone_color: ping-1:3 allocation score on pcmk-2: 0 - clone_color: ping-1:3 allocation score on pcmk-3: 0 - clone_color: ping-1:3 allocation score on pcmk-4: 0 - clone_color: stateful-1:0 allocation score on pcmk-1: 11 --clone_color: stateful-1:0 allocation score on pcmk-2: 5 --clone_color: stateful-1:0 allocation score on pcmk-3: 5 -+clone_color: stateful-1:0 allocation score on pcmk-2: 0 -+clone_color: stateful-1:0 allocation score on pcmk-3: 0 - clone_color: stateful-1:0 allocation score on pcmk-4: 0 --clone_color: stateful-1:1 allocation score on pcmk-1: 10 -+clone_color: stateful-1:1 allocation score on pcmk-1: 0 - clone_color: stateful-1:1 allocation score on pcmk-2: 6 --clone_color: stateful-1:1 allocation score on pcmk-3: 5 -+clone_color: stateful-1:1 allocation score on pcmk-3: 0 - clone_color: stateful-1:1 allocation score on pcmk-4: 0 --clone_color: stateful-1:2 allocation score on pcmk-1: 10 --clone_color: stateful-1:2 allocation score on pcmk-2: 5 -+clone_color: stateful-1:2 allocation score on pcmk-1: 0 -+clone_color: stateful-1:2 allocation score on pcmk-2: 0 - clone_color: stateful-1:2 allocation score on pcmk-3: 6 - clone_color: stateful-1:2 allocation score on pcmk-4: 0 - clone_color: stateful-1:3 allocation score on pcmk-1: 0 -@@ -147,11 +147,11 @@ native_color: stateful-1:0 allocation score on pcmk-1: 11 - native_color: stateful-1:0 allocation score on pcmk-2: -INFINITY - native_color: stateful-1:0 allocation score on pcmk-3: -INFINITY - native_color: stateful-1:0 allocation score on pcmk-4: -INFINITY --native_color: stateful-1:1 allocation score on pcmk-1: 10 -+native_color: stateful-1:1 allocation score on pcmk-1: 0 - native_color: stateful-1:1 allocation score on pcmk-2: 6 --native_color: stateful-1:1 allocation score on pcmk-3: 5 -+native_color: stateful-1:1 allocation score on pcmk-3: 0 - native_color: stateful-1:1 allocation score on pcmk-4: -INFINITY --native_color: stateful-1:2 allocation score on pcmk-1: 10 -+native_color: stateful-1:2 allocation score on pcmk-1: 0 - native_color: stateful-1:2 allocation score on pcmk-2: -INFINITY - native_color: stateful-1:2 allocation score on pcmk-3: 6 - native_color: stateful-1:2 allocation score on pcmk-4: -INFINITY -diff --git a/pengine/test10/probe-3.summary b/pengine/test10/probe-3.summary -index 8ab28ef..c11a5ba 100644 ---- a/pengine/test10/probe-3.summary -+++ b/pengine/test10/probe-3.summary -@@ -15,14 +15,14 @@ Online: [ pcmk-1 pcmk-2 pcmk-3 ] - migrator (ocf::pacemaker:Dummy): Started pcmk-3 - Clone Set: Connectivity [ping-1] - Started: [ pcmk-1 pcmk-2 pcmk-3 ] -- Stopped: [ ping-1:3 ] -+ Stopped: [ pcmk-4 ] - Master/Slave Set: master-1 [stateful-1] - Masters: [ pcmk-1 ] - Slaves: [ pcmk-2 pcmk-3 ] -- Stopped: [ stateful-1:3 ] -+ Stopped: [ pcmk-4 ] - Clone Set: Fencing [FencingChild] - Started: [ pcmk-1 pcmk-2 pcmk-3 ] -- Stopped: [ FencingChild:3 ] -+ Stopped: [ pcmk-4 ] - - Transition Summary: - -@@ -44,12 +44,12 @@ Online: [ pcmk-1 pcmk-2 pcmk-3 ] - migrator (ocf::pacemaker:Dummy): Started pcmk-3 - Clone Set: Connectivity [ping-1] - Started: [ pcmk-1 pcmk-2 pcmk-3 ] -- Stopped: [ ping-1:3 ] -+ Stopped: [ pcmk-4 ] - Master/Slave Set: master-1 [stateful-1] - Masters: [ pcmk-1 ] - Slaves: [ pcmk-2 pcmk-3 ] -- Stopped: [ stateful-1:3 ] -+ Stopped: [ pcmk-4 ] - Clone Set: Fencing [FencingChild] - Started: [ pcmk-1 pcmk-2 pcmk-3 ] -- Stopped: [ FencingChild:3 ] -+ Stopped: [ pcmk-4 ] - -diff --git a/pengine/test10/probe-4.scores b/pengine/test10/probe-4.scores -index 080ab8b..b1615c0 100644 ---- a/pengine/test10/probe-4.scores -+++ b/pengine/test10/probe-4.scores -@@ -46,15 +46,15 @@ clone_color: ping-1:3 allocation score on pcmk-2: 0 - clone_color: ping-1:3 allocation score on pcmk-3: 0 - clone_color: ping-1:3 allocation score on pcmk-4: 0 - clone_color: stateful-1:0 allocation score on pcmk-1: 11 --clone_color: stateful-1:0 allocation score on pcmk-2: 5 --clone_color: stateful-1:0 allocation score on pcmk-3: 5 -+clone_color: stateful-1:0 allocation score on pcmk-2: 0 -+clone_color: stateful-1:0 allocation score on pcmk-3: 0 - clone_color: stateful-1:0 allocation score on pcmk-4: 0 --clone_color: stateful-1:1 allocation score on pcmk-1: 10 -+clone_color: stateful-1:1 allocation score on pcmk-1: 0 - clone_color: stateful-1:1 allocation score on pcmk-2: 6 --clone_color: stateful-1:1 allocation score on pcmk-3: 5 -+clone_color: stateful-1:1 allocation score on pcmk-3: 0 - clone_color: stateful-1:1 allocation score on pcmk-4: 0 --clone_color: stateful-1:2 allocation score on pcmk-1: 10 --clone_color: stateful-1:2 allocation score on pcmk-2: 5 -+clone_color: stateful-1:2 allocation score on pcmk-1: 0 -+clone_color: stateful-1:2 allocation score on pcmk-2: 0 - clone_color: stateful-1:2 allocation score on pcmk-3: 6 - clone_color: stateful-1:2 allocation score on pcmk-4: 0 - clone_color: stateful-1:3 allocation score on pcmk-1: 0 -@@ -149,11 +149,11 @@ native_color: stateful-1:0 allocation score on pcmk-1: 11 - native_color: stateful-1:0 allocation score on pcmk-2: -INFINITY - native_color: stateful-1:0 allocation score on pcmk-3: -INFINITY - native_color: stateful-1:0 allocation score on pcmk-4: -INFINITY --native_color: stateful-1:1 allocation score on pcmk-1: 10 -+native_color: stateful-1:1 allocation score on pcmk-1: 0 - native_color: stateful-1:1 allocation score on pcmk-2: 6 --native_color: stateful-1:1 allocation score on pcmk-3: 5 -+native_color: stateful-1:1 allocation score on pcmk-3: 0 - native_color: stateful-1:1 allocation score on pcmk-4: -INFINITY --native_color: stateful-1:2 allocation score on pcmk-1: 10 -+native_color: stateful-1:2 allocation score on pcmk-1: 0 - native_color: stateful-1:2 allocation score on pcmk-2: -INFINITY - native_color: stateful-1:2 allocation score on pcmk-3: 6 - native_color: stateful-1:2 allocation score on pcmk-4: -INFINITY -diff --git a/pengine/test10/probe-4.summary b/pengine/test10/probe-4.summary -index d666e5d..e2eb8af 100644 ---- a/pengine/test10/probe-4.summary -+++ b/pengine/test10/probe-4.summary -@@ -15,14 +15,14 @@ Online: [ pcmk-1 pcmk-2 pcmk-3 ] - migrator (ocf::pacemaker:Dummy): Stopped - Clone Set: Connectivity [ping-1] - Started: [ pcmk-1 pcmk-2 pcmk-3 ] -- Stopped: [ ping-1:3 ] -+ Stopped: [ pcmk-4 ] - Master/Slave Set: master-1 [stateful-1] - Masters: [ pcmk-1 ] - Slaves: [ pcmk-2 pcmk-3 ] -- Stopped: [ stateful-1:3 ] -+ Stopped: [ pcmk-4 ] - Clone Set: Fencing [FencingChild] - Started: [ pcmk-1 pcmk-2 pcmk-3 ] -- Stopped: [ FencingChild:3 ] -+ Stopped: [ pcmk-4 ] - - Transition Summary: - * Start migrator (pcmk-3) -@@ -47,12 +47,12 @@ Online: [ pcmk-1 pcmk-2 pcmk-3 ] - migrator (ocf::pacemaker:Dummy): Stopped - Clone Set: Connectivity [ping-1] - Started: [ pcmk-1 pcmk-2 pcmk-3 ] -- Stopped: [ ping-1:3 ] -+ Stopped: [ pcmk-4 ] - Master/Slave Set: master-1 [stateful-1] - Masters: [ pcmk-1 ] - Slaves: [ pcmk-2 pcmk-3 ] -- Stopped: [ stateful-1:3 ] -+ Stopped: [ pcmk-4 ] - Clone Set: Fencing [FencingChild] - Started: [ pcmk-1 pcmk-2 pcmk-3 ] -- Stopped: [ FencingChild:3 ] -+ Stopped: [ pcmk-4 ] - -diff --git a/pengine/test10/rec-node-13.summary b/pengine/test10/rec-node-13.summary -index e425beb..2833d0b 100644 ---- a/pengine/test10/rec-node-13.summary -+++ b/pengine/test10/rec-node-13.summary -@@ -6,7 +6,7 @@ OFFLINE: [ c001n03 c001n05 ] - - Clone Set: DoFencing [child_DoFencing] - Started: [ c001n02 c001n06 c001n07 ] -- Stopped: [ child_DoFencing:3 child_DoFencing:4 child_DoFencing:5 ] -+ Stopped: [ c001n03 c001n04 c001n05 ] - DcIPaddr (ocf::heartbeat:IPaddr): Stopped - Resource Group: group-1 - ocf_192.168.100.181 (ocf::heartbeat:IPaddr): Started c001n02 -@@ -50,7 +50,7 @@ OFFLINE: [ c001n03 c001n04 c001n05 ] - - Clone Set: DoFencing [child_DoFencing] - Started: [ c001n02 c001n06 c001n07 ] -- Stopped: [ child_DoFencing:3 child_DoFencing:4 child_DoFencing:5 ] -+ Stopped: [ c001n03 c001n04 c001n05 ] - DcIPaddr (ocf::heartbeat:IPaddr): Stopped - Resource Group: group-1 - ocf_192.168.100.181 (ocf::heartbeat:IPaddr): Started c001n02 -diff --git a/pengine/test10/rsc-sets-clone-1.summary b/pengine/test10/rsc-sets-clone-1.summary -index d840454..4e2ced1 100644 ---- a/pengine/test10/rsc-sets-clone-1.summary -+++ b/pengine/test10/rsc-sets-clone-1.summary -@@ -10,9 +10,9 @@ Online: [ sys2 sys3 ] - stonithsys3 (stonith:external/ipmi): Started sys2 - Clone Set: baseclone [basegrp] - Started: [ sys2 ] -- Stopped: [ basegrp:1 ] -+ Stopped: [ sys3 ] - Clone Set: fs1 [nfs1] -- Stopped: [ nfs1:0 nfs1:1 ] -+ Stopped: [ sys2 sys3 ] - - Transition Summary: - * Restart stonithsys3 (Started sys2) -@@ -80,5 +80,5 @@ Online: [ sys2 sys3 ] - Clone Set: baseclone [basegrp] - Started: [ sys2 sys3 ] - Clone Set: fs1 [nfs1] -- Stopped: [ nfs1:0 nfs1:1 ] -+ Stopped: [ sys2 sys3 ] - -diff --git a/pengine/test10/rsc-sets-clone.summary b/pengine/test10/rsc-sets-clone.summary -index 697f94a..7ee23a2 100644 ---- a/pengine/test10/rsc-sets-clone.summary -+++ b/pengine/test10/rsc-sets-clone.summary -@@ -33,5 +33,5 @@ Online: [ node2 ] - rsc3 (ocf::pacemaker:Dummy): Started node2 - Clone Set: clone-rsc [rsc] - Started: [ node2 ] -- Stopped: [ rsc:1 ] -+ Stopped: [ node1 ] - -diff --git a/pengine/test10/rsc-sets-master.summary b/pengine/test10/rsc-sets-master.summary -index fafb423..5415cda 100644 ---- a/pengine/test10/rsc-sets-master.summary -+++ b/pengine/test10/rsc-sets-master.summary -@@ -41,7 +41,7 @@ Online: [ node2 ] - - Master/Slave Set: ms-rsc [rsc] - Masters: [ node2 ] -- Stopped: [ rsc:1 ] -+ Stopped: [ node1 ] - rsc1 (ocf::pacemaker:Dummy): Started node2 - rsc2 (ocf::pacemaker:Dummy): Started node2 - rsc3 (ocf::pacemaker:Dummy): Started node2 -diff --git a/pengine/test10/stonith-0.summary b/pengine/test10/stonith-0.summary -index e4253c7..a91a06c 100644 ---- a/pengine/test10/stonith-0.summary -+++ b/pengine/test10/stonith-0.summary -@@ -19,7 +19,7 @@ Online: [ c001n02 c001n04 c001n06 c001n07 c001n08 ] - rsc_c001n08 (ocf::heartbeat:IPaddr): Started c001n08 - Clone Set: DoFencing [child_DoFencing] - Started: [ c001n02 c001n04 c001n06 c001n07 c001n08 ] -- Stopped: [ child_DoFencing:5 child_DoFencing:6 ] -+ Stopped: [ c001n03 c001n05 ] - Master/Slave Set: master_rsc_1 [ocf_msdummy] (unique) - ocf_msdummy:0 (ocf::heartbeat:/usr/lib/heartbeat/cts/OCFMSDummy): Master c001n02 - ocf_msdummy:1 (ocf::heartbeat:/usr/lib/heartbeat/cts/OCFMSDummy): Started c001n02 -@@ -90,7 +90,7 @@ OFFLINE: [ c001n03 c001n05 ] - rsc_c001n08 (ocf::heartbeat:IPaddr): Started c001n08 - Clone Set: DoFencing [child_DoFencing] - Started: [ c001n02 c001n04 c001n06 c001n07 c001n08 ] -- Stopped: [ child_DoFencing:5 child_DoFencing:6 ] -+ Stopped: [ c001n03 c001n05 ] - Master/Slave Set: master_rsc_1 [ocf_msdummy] (unique) - ocf_msdummy:0 (ocf::heartbeat:/usr/lib/heartbeat/cts/OCFMSDummy): Master c001n02 - ocf_msdummy:1 (ocf::heartbeat:/usr/lib/heartbeat/cts/OCFMSDummy): Started c001n02 -diff --git a/pengine/test10/stonith-1.summary b/pengine/test10/stonith-1.summary -index b2c46d9..d3e81db 100644 ---- a/pengine/test10/stonith-1.summary -+++ b/pengine/test10/stonith-1.summary -@@ -15,7 +15,7 @@ Online: [ sles-1 sles-2 sles-4 ] - rsc_sles-4 (ocf::heartbeat:IPaddr): Started sles-4 - Clone Set: DoFencing [child_DoFencing] - Started: [ sles-1 sles-2 sles-3 ] -- Stopped: [ child_DoFencing:3 ] -+ Stopped: [ sles-4 ] - Master/Slave Set: master_rsc_1 [ocf_msdummy] (unique) - ocf_msdummy:0 (ocf::heartbeat:Stateful): Stopped - ocf_msdummy:1 (ocf::heartbeat:Stateful): Stopped -@@ -98,7 +98,7 @@ OFFLINE: [ sles-3 ] - rsc_sles-4 (ocf::heartbeat:IPaddr): Started sles-4 - Clone Set: DoFencing [child_DoFencing] - Started: [ sles-1 sles-2 sles-4 ] -- Stopped: [ child_DoFencing:3 ] -+ Stopped: [ sles-3 ] - Master/Slave Set: master_rsc_1 [ocf_msdummy] (unique) - ocf_msdummy:0 (ocf::heartbeat:Stateful): Started sles-4 - ocf_msdummy:1 (ocf::heartbeat:Stateful): Started sles-1 -diff --git a/pengine/test10/stonith-2.summary b/pengine/test10/stonith-2.summary -index 59e5fc4..f02dd74 100644 ---- a/pengine/test10/stonith-2.summary -+++ b/pengine/test10/stonith-2.summary -@@ -17,7 +17,7 @@ Online: [ sles-1 sles-2 sles-3 sles-4 sles-6 ] - rsc_sles-6 (ocf::heartbeat:IPaddr): Started sles-6 - Clone Set: DoFencing [child_DoFencing] - Started: [ sles-1 sles-2 sles-3 sles-4 sles-6 ] -- Stopped: [ child_DoFencing:5 ] -+ Stopped: [ sles-5 ] - Master/Slave Set: master_rsc_1 [ocf_msdummy] (unique) - ocf_msdummy:0 (ocf::heartbeat:Stateful): Started sles-3 - ocf_msdummy:1 (ocf::heartbeat:Stateful): Started sles-4 -@@ -60,7 +60,7 @@ OFFLINE: [ sles-5 ] - rsc_sles-6 (ocf::heartbeat:IPaddr): Started sles-6 - Clone Set: DoFencing [child_DoFencing] - Started: [ sles-1 sles-2 sles-3 sles-4 sles-6 ] -- Stopped: [ child_DoFencing:5 ] -+ Stopped: [ sles-5 ] - Master/Slave Set: master_rsc_1 [ocf_msdummy] (unique) - ocf_msdummy:0 (ocf::heartbeat:Stateful): Started sles-3 - ocf_msdummy:1 (ocf::heartbeat:Stateful): Started sles-4 -diff --git a/pengine/test10/stonith-3.summary b/pengine/test10/stonith-3.summary -index 651974c..b5b6d8a 100644 ---- a/pengine/test10/stonith-3.summary -+++ b/pengine/test10/stonith-3.summary -@@ -5,7 +5,7 @@ Online: [ rh5node2 ] - - prmIpPostgreSQLDB (ocf::heartbeat:IPaddr): Stopped - Clone Set: clnStonith [grpStonith] -- Stopped: [ grpStonith:0 grpStonith:1 ] -+ Stopped: [ rh5node1 rh5node2 ] - - Transition Summary: - * Start prmIpPostgreSQLDB (rh5node2) -@@ -33,5 +33,5 @@ OFFLINE: [ rh5node1 ] - prmIpPostgreSQLDB (ocf::heartbeat:IPaddr): Started rh5node2 - Clone Set: clnStonith [grpStonith] - Started: [ rh5node2 ] -- Stopped: [ grpStonith:1 ] -+ Stopped: [ rh5node1 ] - -diff --git a/pengine/test10/target-1.summary b/pengine/test10/target-1.summary -index 95ab900..3dd4852 100644 ---- a/pengine/test10/target-1.summary -+++ b/pengine/test10/target-1.summary -@@ -7,6 +7,7 @@ Online: [ c001n01 c001n02 c001n03 c001n08 ] - rsc_c001n02 (ocf::heartbeat:IPaddr): Started c001n02 - Master/Slave Set: promoteme [rsc_c001n03] - Slaves: [ c001n03 ] -+ Stopped: [ c001n01 c001n02 c001n08 ] - rsc_c001n01 (ocf::heartbeat:IPaddr): Started c001n01 - - Transition Summary: -@@ -37,5 +38,6 @@ Online: [ c001n01 c001n02 c001n03 c001n08 ] - rsc_c001n02 (ocf::heartbeat:IPaddr): Started c001n02 - Master/Slave Set: promoteme [rsc_c001n03] - Slaves: [ c001n03 ] -+ Stopped: [ c001n01 c001n02 c001n08 ] - rsc_c001n01 (ocf::heartbeat:IPaddr): Started c001n01 - -diff --git a/pengine/test10/template-clone-group.summary b/pengine/test10/template-clone-group.summary -index f1386fb..930758c 100644 ---- a/pengine/test10/template-clone-group.summary -+++ b/pengine/test10/template-clone-group.summary -@@ -3,7 +3,7 @@ Current cluster status: - Online: [ node1 node2 ] - - Clone Set: clone1 [group1] -- Stopped: [ group1:0 group1:1 ] -+ Stopped: [ node1 node2 ] - - Transition Summary: - * Start rsc1:0 (node1) -diff --git a/pengine/test10/template-clone-primitive.summary b/pengine/test10/template-clone-primitive.summary -index 20fb1e0..ba41149 100644 ---- a/pengine/test10/template-clone-primitive.summary -+++ b/pengine/test10/template-clone-primitive.summary -@@ -3,7 +3,7 @@ Current cluster status: - Online: [ node1 node2 ] - - Clone Set: clone1 [rsc1] -- Stopped: [ rsc1:0 rsc1:1 ] -+ Stopped: [ node1 node2 ] - - Transition Summary: - * Start rsc1:0 (node1) -diff --git a/pengine/test10/ticket-clone-1.summary b/pengine/test10/ticket-clone-1.summary -index c6087ad..7bc4d43 100644 ---- a/pengine/test10/ticket-clone-1.summary -+++ b/pengine/test10/ticket-clone-1.summary -@@ -4,7 +4,7 @@ Online: [ node1 node2 ] - - rsc_stonith (stonith:null): Started node1 - Clone Set: clone1 [rsc1] -- Stopped: [ rsc1:0 rsc1:1 ] -+ Stopped: [ node1 node2 ] - - Transition Summary: - -@@ -18,5 +18,5 @@ Online: [ node1 node2 ] - - rsc_stonith (stonith:null): Started node1 - Clone Set: clone1 [rsc1] -- Stopped: [ rsc1:0 rsc1:1 ] -+ Stopped: [ node1 node2 ] - -diff --git a/pengine/test10/ticket-clone-10.summary b/pengine/test10/ticket-clone-10.summary -index c6087ad..7bc4d43 100644 ---- a/pengine/test10/ticket-clone-10.summary -+++ b/pengine/test10/ticket-clone-10.summary -@@ -4,7 +4,7 @@ Online: [ node1 node2 ] - - rsc_stonith (stonith:null): Started node1 - Clone Set: clone1 [rsc1] -- Stopped: [ rsc1:0 rsc1:1 ] -+ Stopped: [ node1 node2 ] - - Transition Summary: - -@@ -18,5 +18,5 @@ Online: [ node1 node2 ] - - rsc_stonith (stonith:null): Started node1 - Clone Set: clone1 [rsc1] -- Stopped: [ rsc1:0 rsc1:1 ] -+ Stopped: [ node1 node2 ] - -diff --git a/pengine/test10/ticket-clone-11.summary b/pengine/test10/ticket-clone-11.summary -index 6a38240..3188894 100644 ---- a/pengine/test10/ticket-clone-11.summary -+++ b/pengine/test10/ticket-clone-11.summary -@@ -4,7 +4,7 @@ Online: [ node1 node2 ] - - rsc_stonith (stonith:null): Started node1 - Clone Set: clone1 [rsc1] -- Stopped: [ rsc1:0 rsc1:1 ] -+ Stopped: [ node1 node2 ] - - Transition Summary: - * Start rsc1:0 (node2) -diff --git a/pengine/test10/ticket-clone-13.summary b/pengine/test10/ticket-clone-13.summary -index 5ba88ce..7628a9e 100644 ---- a/pengine/test10/ticket-clone-13.summary -+++ b/pengine/test10/ticket-clone-13.summary -@@ -4,7 +4,7 @@ Online: [ node1 node2 ] - - rsc_stonith (stonith:null): Started node1 - Clone Set: clone1 [rsc1] -- Stopped: [ rsc1:0 rsc1:1 ] -+ Stopped: [ node1 node2 ] - - Transition Summary: - -@@ -15,5 +15,5 @@ Online: [ node1 node2 ] - - rsc_stonith (stonith:null): Started node1 - Clone Set: clone1 [rsc1] -- Stopped: [ rsc1:0 rsc1:1 ] -+ Stopped: [ node1 node2 ] - -diff --git a/pengine/test10/ticket-clone-14.summary b/pengine/test10/ticket-clone-14.summary -index de7268f..c807656 100644 ---- a/pengine/test10/ticket-clone-14.summary -+++ b/pengine/test10/ticket-clone-14.summary -@@ -22,5 +22,5 @@ Online: [ node1 node2 ] - - rsc_stonith (stonith:null): Started node1 - Clone Set: clone1 [rsc1] -- Stopped: [ rsc1:0 rsc1:1 ] -+ Stopped: [ node1 node2 ] - -diff --git a/pengine/test10/ticket-clone-15.summary b/pengine/test10/ticket-clone-15.summary -index de7268f..c807656 100644 ---- a/pengine/test10/ticket-clone-15.summary -+++ b/pengine/test10/ticket-clone-15.summary -@@ -22,5 +22,5 @@ Online: [ node1 node2 ] - - rsc_stonith (stonith:null): Started node1 - Clone Set: clone1 [rsc1] -- Stopped: [ rsc1:0 rsc1:1 ] -+ Stopped: [ node1 node2 ] - -diff --git a/pengine/test10/ticket-clone-16.summary b/pengine/test10/ticket-clone-16.summary -index 5ba88ce..7628a9e 100644 ---- a/pengine/test10/ticket-clone-16.summary -+++ b/pengine/test10/ticket-clone-16.summary -@@ -4,7 +4,7 @@ Online: [ node1 node2 ] - - rsc_stonith (stonith:null): Started node1 - Clone Set: clone1 [rsc1] -- Stopped: [ rsc1:0 rsc1:1 ] -+ Stopped: [ node1 node2 ] - - Transition Summary: - -@@ -15,5 +15,5 @@ Online: [ node1 node2 ] - - rsc_stonith (stonith:null): Started node1 - Clone Set: clone1 [rsc1] -- Stopped: [ rsc1:0 rsc1:1 ] -+ Stopped: [ node1 node2 ] - -diff --git a/pengine/test10/ticket-clone-17.summary b/pengine/test10/ticket-clone-17.summary -index de7268f..c807656 100644 ---- a/pengine/test10/ticket-clone-17.summary -+++ b/pengine/test10/ticket-clone-17.summary -@@ -22,5 +22,5 @@ Online: [ node1 node2 ] - - rsc_stonith (stonith:null): Started node1 - Clone Set: clone1 [rsc1] -- Stopped: [ rsc1:0 rsc1:1 ] -+ Stopped: [ node1 node2 ] - -diff --git a/pengine/test10/ticket-clone-18.summary b/pengine/test10/ticket-clone-18.summary -index de7268f..c807656 100644 ---- a/pengine/test10/ticket-clone-18.summary -+++ b/pengine/test10/ticket-clone-18.summary -@@ -22,5 +22,5 @@ Online: [ node1 node2 ] - - rsc_stonith (stonith:null): Started node1 - Clone Set: clone1 [rsc1] -- Stopped: [ rsc1:0 rsc1:1 ] -+ Stopped: [ node1 node2 ] - -diff --git a/pengine/test10/ticket-clone-19.summary b/pengine/test10/ticket-clone-19.summary -index 5ba88ce..7628a9e 100644 ---- a/pengine/test10/ticket-clone-19.summary -+++ b/pengine/test10/ticket-clone-19.summary -@@ -4,7 +4,7 @@ Online: [ node1 node2 ] - - rsc_stonith (stonith:null): Started node1 - Clone Set: clone1 [rsc1] -- Stopped: [ rsc1:0 rsc1:1 ] -+ Stopped: [ node1 node2 ] - - Transition Summary: - -@@ -15,5 +15,5 @@ Online: [ node1 node2 ] - - rsc_stonith (stonith:null): Started node1 - Clone Set: clone1 [rsc1] -- Stopped: [ rsc1:0 rsc1:1 ] -+ Stopped: [ node1 node2 ] - -diff --git a/pengine/test10/ticket-clone-2.summary b/pengine/test10/ticket-clone-2.summary -index 6a38240..3188894 100644 ---- a/pengine/test10/ticket-clone-2.summary -+++ b/pengine/test10/ticket-clone-2.summary -@@ -4,7 +4,7 @@ Online: [ node1 node2 ] - - rsc_stonith (stonith:null): Started node1 - Clone Set: clone1 [rsc1] -- Stopped: [ rsc1:0 rsc1:1 ] -+ Stopped: [ node1 node2 ] - - Transition Summary: - * Start rsc1:0 (node2) -diff --git a/pengine/test10/ticket-clone-20.summary b/pengine/test10/ticket-clone-20.summary -index de7268f..c807656 100644 ---- a/pengine/test10/ticket-clone-20.summary -+++ b/pengine/test10/ticket-clone-20.summary -@@ -22,5 +22,5 @@ Online: [ node1 node2 ] - - rsc_stonith (stonith:null): Started node1 - Clone Set: clone1 [rsc1] -- Stopped: [ rsc1:0 rsc1:1 ] -+ Stopped: [ node1 node2 ] - -diff --git a/pengine/test10/ticket-clone-21.summary b/pengine/test10/ticket-clone-21.summary -index f254cb2..7973159 100644 ---- a/pengine/test10/ticket-clone-21.summary -+++ b/pengine/test10/ticket-clone-21.summary -@@ -27,5 +27,5 @@ OFFLINE: [ node1 node2 ] - - rsc_stonith (stonith:null): Stopped - Clone Set: clone1 [rsc1] -- Stopped: [ rsc1:0 rsc1:1 ] -+ Stopped: [ node1 node2 ] - -diff --git a/pengine/test10/ticket-clone-22.summary b/pengine/test10/ticket-clone-22.summary -index 5ba88ce..7628a9e 100644 ---- a/pengine/test10/ticket-clone-22.summary -+++ b/pengine/test10/ticket-clone-22.summary -@@ -4,7 +4,7 @@ Online: [ node1 node2 ] - - rsc_stonith (stonith:null): Started node1 - Clone Set: clone1 [rsc1] -- Stopped: [ rsc1:0 rsc1:1 ] -+ Stopped: [ node1 node2 ] - - Transition Summary: - -@@ -15,5 +15,5 @@ Online: [ node1 node2 ] - - rsc_stonith (stonith:null): Started node1 - Clone Set: clone1 [rsc1] -- Stopped: [ rsc1:0 rsc1:1 ] -+ Stopped: [ node1 node2 ] - -diff --git a/pengine/test10/ticket-clone-23.summary b/pengine/test10/ticket-clone-23.summary -index de7268f..c807656 100644 ---- a/pengine/test10/ticket-clone-23.summary -+++ b/pengine/test10/ticket-clone-23.summary -@@ -22,5 +22,5 @@ Online: [ node1 node2 ] - - rsc_stonith (stonith:null): Started node1 - Clone Set: clone1 [rsc1] -- Stopped: [ rsc1:0 rsc1:1 ] -+ Stopped: [ node1 node2 ] - -diff --git a/pengine/test10/ticket-clone-3.summary b/pengine/test10/ticket-clone-3.summary -index de7268f..c807656 100644 ---- a/pengine/test10/ticket-clone-3.summary -+++ b/pengine/test10/ticket-clone-3.summary -@@ -22,5 +22,5 @@ Online: [ node1 node2 ] - - rsc_stonith (stonith:null): Started node1 - Clone Set: clone1 [rsc1] -- Stopped: [ rsc1:0 rsc1:1 ] -+ Stopped: [ node1 node2 ] - -diff --git a/pengine/test10/ticket-clone-4.summary b/pengine/test10/ticket-clone-4.summary -index c6087ad..7bc4d43 100644 ---- a/pengine/test10/ticket-clone-4.summary -+++ b/pengine/test10/ticket-clone-4.summary -@@ -4,7 +4,7 @@ Online: [ node1 node2 ] - - rsc_stonith (stonith:null): Started node1 - Clone Set: clone1 [rsc1] -- Stopped: [ rsc1:0 rsc1:1 ] -+ Stopped: [ node1 node2 ] - - Transition Summary: - -@@ -18,5 +18,5 @@ Online: [ node1 node2 ] - - rsc_stonith (stonith:null): Started node1 - Clone Set: clone1 [rsc1] -- Stopped: [ rsc1:0 rsc1:1 ] -+ Stopped: [ node1 node2 ] - -diff --git a/pengine/test10/ticket-clone-5.summary b/pengine/test10/ticket-clone-5.summary -index 6a38240..3188894 100644 ---- a/pengine/test10/ticket-clone-5.summary -+++ b/pengine/test10/ticket-clone-5.summary -@@ -4,7 +4,7 @@ Online: [ node1 node2 ] - - rsc_stonith (stonith:null): Started node1 - Clone Set: clone1 [rsc1] -- Stopped: [ rsc1:0 rsc1:1 ] -+ Stopped: [ node1 node2 ] - - Transition Summary: - * Start rsc1:0 (node2) -diff --git a/pengine/test10/ticket-clone-6.summary b/pengine/test10/ticket-clone-6.summary -index de7268f..c807656 100644 ---- a/pengine/test10/ticket-clone-6.summary -+++ b/pengine/test10/ticket-clone-6.summary -@@ -22,5 +22,5 @@ Online: [ node1 node2 ] - - rsc_stonith (stonith:null): Started node1 - Clone Set: clone1 [rsc1] -- Stopped: [ rsc1:0 rsc1:1 ] -+ Stopped: [ node1 node2 ] - -diff --git a/pengine/test10/ticket-clone-7.summary b/pengine/test10/ticket-clone-7.summary -index c6087ad..7bc4d43 100644 ---- a/pengine/test10/ticket-clone-7.summary -+++ b/pengine/test10/ticket-clone-7.summary -@@ -4,7 +4,7 @@ Online: [ node1 node2 ] - - rsc_stonith (stonith:null): Started node1 - Clone Set: clone1 [rsc1] -- Stopped: [ rsc1:0 rsc1:1 ] -+ Stopped: [ node1 node2 ] - - Transition Summary: - -@@ -18,5 +18,5 @@ Online: [ node1 node2 ] - - rsc_stonith (stonith:null): Started node1 - Clone Set: clone1 [rsc1] -- Stopped: [ rsc1:0 rsc1:1 ] -+ Stopped: [ node1 node2 ] - -diff --git a/pengine/test10/ticket-clone-8.summary b/pengine/test10/ticket-clone-8.summary -index 6a38240..3188894 100644 ---- a/pengine/test10/ticket-clone-8.summary -+++ b/pengine/test10/ticket-clone-8.summary -@@ -4,7 +4,7 @@ Online: [ node1 node2 ] - - rsc_stonith (stonith:null): Started node1 - Clone Set: clone1 [rsc1] -- Stopped: [ rsc1:0 rsc1:1 ] -+ Stopped: [ node1 node2 ] - - Transition Summary: - * Start rsc1:0 (node2) -diff --git a/pengine/test10/ticket-clone-9.summary b/pengine/test10/ticket-clone-9.summary -index f254cb2..7973159 100644 ---- a/pengine/test10/ticket-clone-9.summary -+++ b/pengine/test10/ticket-clone-9.summary -@@ -27,5 +27,5 @@ OFFLINE: [ node1 node2 ] - - rsc_stonith (stonith:null): Stopped - Clone Set: clone1 [rsc1] -- Stopped: [ rsc1:0 rsc1:1 ] -+ Stopped: [ node1 node2 ] - -diff --git a/pengine/test10/ticket-master-1.summary b/pengine/test10/ticket-master-1.summary -index a28786f..41ba380 100644 ---- a/pengine/test10/ticket-master-1.summary -+++ b/pengine/test10/ticket-master-1.summary -@@ -4,7 +4,7 @@ Online: [ node1 node2 ] - - rsc_stonith (stonith:null): Started node1 - Master/Slave Set: ms1 [rsc1] -- Stopped: [ rsc1:0 rsc1:1 ] -+ Stopped: [ node1 node2 ] - - Transition Summary: - -@@ -18,5 +18,5 @@ Online: [ node1 node2 ] - - rsc_stonith (stonith:null): Started node1 - Master/Slave Set: ms1 [rsc1] -- Stopped: [ rsc1:0 rsc1:1 ] -+ Stopped: [ node1 node2 ] - -diff --git a/pengine/test10/ticket-master-10.summary b/pengine/test10/ticket-master-10.summary -index 9538cf4..2b6f2c6 100644 ---- a/pengine/test10/ticket-master-10.summary -+++ b/pengine/test10/ticket-master-10.summary -@@ -4,7 +4,7 @@ Online: [ node1 node2 ] - - rsc_stonith (stonith:null): Started node1 - Master/Slave Set: ms1 [rsc1] -- Stopped: [ rsc1:0 rsc1:1 ] -+ Stopped: [ node1 node2 ] - - Transition Summary: - * Start rsc1:0 (node2) -diff --git a/pengine/test10/ticket-master-13.summary b/pengine/test10/ticket-master-13.summary -index 1f201d3..5f5d0d1 100644 ---- a/pengine/test10/ticket-master-13.summary -+++ b/pengine/test10/ticket-master-13.summary -@@ -4,7 +4,7 @@ Online: [ node1 node2 ] - - rsc_stonith (stonith:null): Started node1 - Master/Slave Set: ms1 [rsc1] -- Stopped: [ rsc1:0 rsc1:1 ] -+ Stopped: [ node1 node2 ] - - Transition Summary: - -@@ -15,5 +15,5 @@ Online: [ node1 node2 ] - - rsc_stonith (stonith:null): Started node1 - Master/Slave Set: ms1 [rsc1] -- Stopped: [ rsc1:0 rsc1:1 ] -+ Stopped: [ node1 node2 ] - -diff --git a/pengine/test10/ticket-master-14.summary b/pengine/test10/ticket-master-14.summary -index 86b5ec4..9f7b89d 100644 ---- a/pengine/test10/ticket-master-14.summary -+++ b/pengine/test10/ticket-master-14.summary -@@ -26,5 +26,5 @@ Online: [ node1 node2 ] - - rsc_stonith (stonith:null): Started node1 - Master/Slave Set: ms1 [rsc1] -- Stopped: [ rsc1:0 rsc1:1 ] -+ Stopped: [ node1 node2 ] - -diff --git a/pengine/test10/ticket-master-15.summary b/pengine/test10/ticket-master-15.summary -index 86b5ec4..9f7b89d 100644 ---- a/pengine/test10/ticket-master-15.summary -+++ b/pengine/test10/ticket-master-15.summary -@@ -26,5 +26,5 @@ Online: [ node1 node2 ] - - rsc_stonith (stonith:null): Started node1 - Master/Slave Set: ms1 [rsc1] -- Stopped: [ rsc1:0 rsc1:1 ] -+ Stopped: [ node1 node2 ] - -diff --git a/pengine/test10/ticket-master-2.summary b/pengine/test10/ticket-master-2.summary -index 3ba0728..96a797e 100644 ---- a/pengine/test10/ticket-master-2.summary -+++ b/pengine/test10/ticket-master-2.summary -@@ -4,7 +4,7 @@ Online: [ node1 node2 ] - - rsc_stonith (stonith:null): Started node1 - Master/Slave Set: ms1 [rsc1] -- Stopped: [ rsc1:0 rsc1:1 ] -+ Stopped: [ node1 node2 ] - - Transition Summary: - * Start rsc1:0 (node2) -diff --git a/pengine/test10/ticket-master-21.summary b/pengine/test10/ticket-master-21.summary -index 2229553..ca5bf84 100644 ---- a/pengine/test10/ticket-master-21.summary -+++ b/pengine/test10/ticket-master-21.summary -@@ -31,5 +31,5 @@ OFFLINE: [ node1 ] - rsc_stonith (stonith:null): Started node2 - Master/Slave Set: ms1 [rsc1] - Slaves: [ node2 ] -- Stopped: [ rsc1:1 ] -+ Stopped: [ node1 ] - -diff --git a/pengine/test10/ticket-master-3.summary b/pengine/test10/ticket-master-3.summary -index 86b5ec4..9f7b89d 100644 ---- a/pengine/test10/ticket-master-3.summary -+++ b/pengine/test10/ticket-master-3.summary -@@ -26,5 +26,5 @@ Online: [ node1 node2 ] - - rsc_stonith (stonith:null): Started node1 - Master/Slave Set: ms1 [rsc1] -- Stopped: [ rsc1:0 rsc1:1 ] -+ Stopped: [ node1 node2 ] - -diff --git a/pengine/test10/ticket-master-4.summary b/pengine/test10/ticket-master-4.summary -index 9538cf4..2b6f2c6 100644 ---- a/pengine/test10/ticket-master-4.summary -+++ b/pengine/test10/ticket-master-4.summary -@@ -4,7 +4,7 @@ Online: [ node1 node2 ] - - rsc_stonith (stonith:null): Started node1 - Master/Slave Set: ms1 [rsc1] -- Stopped: [ rsc1:0 rsc1:1 ] -+ Stopped: [ node1 node2 ] - - Transition Summary: - * Start rsc1:0 (node2) -diff --git a/pengine/test10/ticket-master-7.summary b/pengine/test10/ticket-master-7.summary -index 9538cf4..2b6f2c6 100644 ---- a/pengine/test10/ticket-master-7.summary -+++ b/pengine/test10/ticket-master-7.summary -@@ -4,7 +4,7 @@ Online: [ node1 node2 ] - - rsc_stonith (stonith:null): Started node1 - Master/Slave Set: ms1 [rsc1] -- Stopped: [ rsc1:0 rsc1:1 ] -+ Stopped: [ node1 node2 ] - - Transition Summary: - * Start rsc1:0 (node2) -diff --git a/pengine/test10/ticket-master-9.summary b/pengine/test10/ticket-master-9.summary -index 2229553..ca5bf84 100644 ---- a/pengine/test10/ticket-master-9.summary -+++ b/pengine/test10/ticket-master-9.summary -@@ -31,5 +31,5 @@ OFFLINE: [ node1 ] - rsc_stonith (stonith:null): Started node2 - Master/Slave Set: ms1 [rsc1] - Slaves: [ node2 ] -- Stopped: [ rsc1:1 ] -+ Stopped: [ node1 ] - -diff --git a/pengine/test10/ticket-rsc-sets-1.summary b/pengine/test10/ticket-rsc-sets-1.summary -index 5d1c19f..b893af6 100644 ---- a/pengine/test10/ticket-rsc-sets-1.summary -+++ b/pengine/test10/ticket-rsc-sets-1.summary -@@ -8,9 +8,9 @@ Online: [ node1 node2 ] - rsc2 (ocf::pacemaker:Dummy): Stopped - rsc3 (ocf::pacemaker:Dummy): Stopped - Clone Set: clone4 [rsc4] -- Stopped: [ rsc4:0 rsc4:1 ] -+ Stopped: [ node1 node2 ] - Master/Slave Set: ms5 [rsc5] -- Stopped: [ rsc5:0 rsc5:1 ] -+ Stopped: [ node1 node2 ] - - Transition Summary: - * Start rsc5:0 (node2) -@@ -42,7 +42,7 @@ Online: [ node1 node2 ] - rsc2 (ocf::pacemaker:Dummy): Stopped - rsc3 (ocf::pacemaker:Dummy): Stopped - Clone Set: clone4 [rsc4] -- Stopped: [ rsc4:0 rsc4:1 ] -+ Stopped: [ node1 node2 ] - Master/Slave Set: ms5 [rsc5] - Slaves: [ node1 node2 ] - -diff --git a/pengine/test10/ticket-rsc-sets-10.summary b/pengine/test10/ticket-rsc-sets-10.summary -index e4e3ed5..66f807a 100644 ---- a/pengine/test10/ticket-rsc-sets-10.summary -+++ b/pengine/test10/ticket-rsc-sets-10.summary -@@ -45,7 +45,7 @@ Online: [ node1 node2 ] - rsc2 (ocf::pacemaker:Dummy): Stopped - rsc3 (ocf::pacemaker:Dummy): Stopped - Clone Set: clone4 [rsc4] -- Stopped: [ rsc4:0 rsc4:1 ] -+ Stopped: [ node1 node2 ] - Master/Slave Set: ms5 [rsc5] - Slaves: [ node1 node2 ] - -diff --git a/pengine/test10/ticket-rsc-sets-11.summary b/pengine/test10/ticket-rsc-sets-11.summary -index d44934f..47d3923 100644 ---- a/pengine/test10/ticket-rsc-sets-11.summary -+++ b/pengine/test10/ticket-rsc-sets-11.summary -@@ -8,7 +8,7 @@ Online: [ node1 node2 ] - rsc2 (ocf::pacemaker:Dummy): Stopped - rsc3 (ocf::pacemaker:Dummy): Stopped - Clone Set: clone4 [rsc4] -- Stopped: [ rsc4:0 rsc4:1 ] -+ Stopped: [ node1 node2 ] - Master/Slave Set: ms5 [rsc5] - Slaves: [ node1 node2 ] - -@@ -25,7 +25,7 @@ Online: [ node1 node2 ] - rsc2 (ocf::pacemaker:Dummy): Stopped - rsc3 (ocf::pacemaker:Dummy): Stopped - Clone Set: clone4 [rsc4] -- Stopped: [ rsc4:0 rsc4:1 ] -+ Stopped: [ node1 node2 ] - Master/Slave Set: ms5 [rsc5] - Slaves: [ node1 node2 ] - -diff --git a/pengine/test10/ticket-rsc-sets-12.summary b/pengine/test10/ticket-rsc-sets-12.summary -index b5c4da0..6801c64 100644 ---- a/pengine/test10/ticket-rsc-sets-12.summary -+++ b/pengine/test10/ticket-rsc-sets-12.summary -@@ -8,7 +8,7 @@ Online: [ node1 node2 ] - rsc2 (ocf::pacemaker:Dummy): Started node1 - rsc3 (ocf::pacemaker:Dummy): Started node1 - Clone Set: clone4 [rsc4] -- Stopped: [ rsc4:0 rsc4:1 ] -+ Stopped: [ node1 node2 ] - Master/Slave Set: ms5 [rsc5] - Slaves: [ node1 node2 ] - -@@ -34,7 +34,7 @@ Online: [ node1 node2 ] - rsc2 (ocf::pacemaker:Dummy): Stopped - rsc3 (ocf::pacemaker:Dummy): Stopped - Clone Set: clone4 [rsc4] -- Stopped: [ rsc4:0 rsc4:1 ] -+ Stopped: [ node1 node2 ] - Master/Slave Set: ms5 [rsc5] - Slaves: [ node1 node2 ] - -diff --git a/pengine/test10/ticket-rsc-sets-13.summary b/pengine/test10/ticket-rsc-sets-13.summary -index e4e3ed5..66f807a 100644 ---- a/pengine/test10/ticket-rsc-sets-13.summary -+++ b/pengine/test10/ticket-rsc-sets-13.summary -@@ -45,7 +45,7 @@ Online: [ node1 node2 ] - rsc2 (ocf::pacemaker:Dummy): Stopped - rsc3 (ocf::pacemaker:Dummy): Stopped - Clone Set: clone4 [rsc4] -- Stopped: [ rsc4:0 rsc4:1 ] -+ Stopped: [ node1 node2 ] - Master/Slave Set: ms5 [rsc5] - Slaves: [ node1 node2 ] - -diff --git a/pengine/test10/ticket-rsc-sets-14.summary b/pengine/test10/ticket-rsc-sets-14.summary -index e4e3ed5..66f807a 100644 ---- a/pengine/test10/ticket-rsc-sets-14.summary -+++ b/pengine/test10/ticket-rsc-sets-14.summary -@@ -45,7 +45,7 @@ Online: [ node1 node2 ] - rsc2 (ocf::pacemaker:Dummy): Stopped - rsc3 (ocf::pacemaker:Dummy): Stopped - Clone Set: clone4 [rsc4] -- Stopped: [ rsc4:0 rsc4:1 ] -+ Stopped: [ node1 node2 ] - Master/Slave Set: ms5 [rsc5] - Slaves: [ node1 node2 ] - -diff --git a/pengine/test10/ticket-rsc-sets-2.summary b/pengine/test10/ticket-rsc-sets-2.summary -index 21357a1..e17dfdb 100644 ---- a/pengine/test10/ticket-rsc-sets-2.summary -+++ b/pengine/test10/ticket-rsc-sets-2.summary -@@ -8,7 +8,7 @@ Online: [ node1 node2 ] - rsc2 (ocf::pacemaker:Dummy): Stopped - rsc3 (ocf::pacemaker:Dummy): Stopped - Clone Set: clone4 [rsc4] -- Stopped: [ rsc4:0 rsc4:1 ] -+ Stopped: [ node1 node2 ] - Master/Slave Set: ms5 [rsc5] - Slaves: [ node1 node2 ] - -diff --git a/pengine/test10/ticket-rsc-sets-3.summary b/pengine/test10/ticket-rsc-sets-3.summary -index e4e3ed5..66f807a 100644 ---- a/pengine/test10/ticket-rsc-sets-3.summary -+++ b/pengine/test10/ticket-rsc-sets-3.summary -@@ -45,7 +45,7 @@ Online: [ node1 node2 ] - rsc2 (ocf::pacemaker:Dummy): Stopped - rsc3 (ocf::pacemaker:Dummy): Stopped - Clone Set: clone4 [rsc4] -- Stopped: [ rsc4:0 rsc4:1 ] -+ Stopped: [ node1 node2 ] - Master/Slave Set: ms5 [rsc5] - Slaves: [ node1 node2 ] - -diff --git a/pengine/test10/ticket-rsc-sets-4.summary b/pengine/test10/ticket-rsc-sets-4.summary -index 5d1c19f..b893af6 100644 ---- a/pengine/test10/ticket-rsc-sets-4.summary -+++ b/pengine/test10/ticket-rsc-sets-4.summary -@@ -8,9 +8,9 @@ Online: [ node1 node2 ] - rsc2 (ocf::pacemaker:Dummy): Stopped - rsc3 (ocf::pacemaker:Dummy): Stopped - Clone Set: clone4 [rsc4] -- Stopped: [ rsc4:0 rsc4:1 ] -+ Stopped: [ node1 node2 ] - Master/Slave Set: ms5 [rsc5] -- Stopped: [ rsc5:0 rsc5:1 ] -+ Stopped: [ node1 node2 ] - - Transition Summary: - * Start rsc5:0 (node2) -@@ -42,7 +42,7 @@ Online: [ node1 node2 ] - rsc2 (ocf::pacemaker:Dummy): Stopped - rsc3 (ocf::pacemaker:Dummy): Stopped - Clone Set: clone4 [rsc4] -- Stopped: [ rsc4:0 rsc4:1 ] -+ Stopped: [ node1 node2 ] - Master/Slave Set: ms5 [rsc5] - Slaves: [ node1 node2 ] - -diff --git a/pengine/test10/ticket-rsc-sets-5.summary b/pengine/test10/ticket-rsc-sets-5.summary -index ceb25af..2982a43 100644 ---- a/pengine/test10/ticket-rsc-sets-5.summary -+++ b/pengine/test10/ticket-rsc-sets-5.summary -@@ -8,7 +8,7 @@ Online: [ node1 node2 ] - rsc2 (ocf::pacemaker:Dummy): Stopped - rsc3 (ocf::pacemaker:Dummy): Stopped - Clone Set: clone4 [rsc4] -- Stopped: [ rsc4:0 rsc4:1 ] -+ Stopped: [ node1 node2 ] - Master/Slave Set: ms5 [rsc5] - Slaves: [ node1 node2 ] - -@@ -36,7 +36,7 @@ Online: [ node1 node2 ] - rsc2 (ocf::pacemaker:Dummy): Started node1 - rsc3 (ocf::pacemaker:Dummy): Started node1 - Clone Set: clone4 [rsc4] -- Stopped: [ rsc4:0 rsc4:1 ] -+ Stopped: [ node1 node2 ] - Master/Slave Set: ms5 [rsc5] - Slaves: [ node1 node2 ] - -diff --git a/pengine/test10/ticket-rsc-sets-6.summary b/pengine/test10/ticket-rsc-sets-6.summary -index 74a6550..7bb1686 100644 ---- a/pengine/test10/ticket-rsc-sets-6.summary -+++ b/pengine/test10/ticket-rsc-sets-6.summary -@@ -8,7 +8,7 @@ Online: [ node1 node2 ] - rsc2 (ocf::pacemaker:Dummy): Started node1 - rsc3 (ocf::pacemaker:Dummy): Started node1 - Clone Set: clone4 [rsc4] -- Stopped: [ rsc4:0 rsc4:1 ] -+ Stopped: [ node1 node2 ] - Master/Slave Set: ms5 [rsc5] - Slaves: [ node1 node2 ] - -diff --git a/pengine/test10/ticket-rsc-sets-7.summary b/pengine/test10/ticket-rsc-sets-7.summary -index e4e3ed5..66f807a 100644 ---- a/pengine/test10/ticket-rsc-sets-7.summary -+++ b/pengine/test10/ticket-rsc-sets-7.summary -@@ -45,7 +45,7 @@ Online: [ node1 node2 ] - rsc2 (ocf::pacemaker:Dummy): Stopped - rsc3 (ocf::pacemaker:Dummy): Stopped - Clone Set: clone4 [rsc4] -- Stopped: [ rsc4:0 rsc4:1 ] -+ Stopped: [ node1 node2 ] - Master/Slave Set: ms5 [rsc5] - Slaves: [ node1 node2 ] - -diff --git a/pengine/test10/ticket-rsc-sets-8.summary b/pengine/test10/ticket-rsc-sets-8.summary -index d44934f..47d3923 100644 ---- a/pengine/test10/ticket-rsc-sets-8.summary -+++ b/pengine/test10/ticket-rsc-sets-8.summary -@@ -8,7 +8,7 @@ Online: [ node1 node2 ] - rsc2 (ocf::pacemaker:Dummy): Stopped - rsc3 (ocf::pacemaker:Dummy): Stopped - Clone Set: clone4 [rsc4] -- Stopped: [ rsc4:0 rsc4:1 ] -+ Stopped: [ node1 node2 ] - Master/Slave Set: ms5 [rsc5] - Slaves: [ node1 node2 ] - -@@ -25,7 +25,7 @@ Online: [ node1 node2 ] - rsc2 (ocf::pacemaker:Dummy): Stopped - rsc3 (ocf::pacemaker:Dummy): Stopped - Clone Set: clone4 [rsc4] -- Stopped: [ rsc4:0 rsc4:1 ] -+ Stopped: [ node1 node2 ] - Master/Slave Set: ms5 [rsc5] - Slaves: [ node1 node2 ] - -diff --git a/pengine/test10/ticket-rsc-sets-9.summary b/pengine/test10/ticket-rsc-sets-9.summary -index e4e3ed5..66f807a 100644 ---- a/pengine/test10/ticket-rsc-sets-9.summary -+++ b/pengine/test10/ticket-rsc-sets-9.summary -@@ -45,7 +45,7 @@ Online: [ node1 node2 ] - rsc2 (ocf::pacemaker:Dummy): Stopped - rsc3 (ocf::pacemaker:Dummy): Stopped - Clone Set: clone4 [rsc4] -- Stopped: [ rsc4:0 rsc4:1 ] -+ Stopped: [ node1 node2 ] - Master/Slave Set: ms5 [rsc5] - Slaves: [ node1 node2 ] - -diff --git a/pengine/test10/unmanaged-master.summary b/pengine/test10/unmanaged-master.summary -index 3dded53..066f139 100644 ---- a/pengine/test10/unmanaged-master.summary -+++ b/pengine/test10/unmanaged-master.summary -@@ -4,9 +4,9 @@ Online: [ pcmk-1 pcmk-2 ] - OFFLINE: [ pcmk-3 pcmk-4 ] - - Clone Set: Fencing [FencingChild] (unmanaged) -- FencingChild:2 (stonith:fence_xvm): Started pcmk-2 (unmanaged) -- FencingChild:3 (stonith:fence_xvm): Started pcmk-1 (unmanaged) -- Stopped: [ FencingChild:0 FencingChild:1 ] -+ FencingChild (stonith:fence_xvm): Started pcmk-2 (unmanaged) -+ FencingChild (stonith:fence_xvm): Started pcmk-1 (unmanaged) -+ Stopped: [ pcmk-3 pcmk-4 ] - Resource Group: group-1 - r192.168.122.126 (ocf::heartbeat:IPaddr): Started pcmk-2 (unmanaged) - r192.168.122.127 (ocf::heartbeat:IPaddr): Started pcmk-2 (unmanaged) -@@ -18,13 +18,13 @@ OFFLINE: [ pcmk-3 pcmk-4 ] - lsb-dummy (lsb:/usr/share/pacemaker/tests/cts/LSBDummy): Started pcmk-2 (unmanaged) - migrator (ocf::pacemaker:Dummy): Started pcmk-4 (unmanaged) - Clone Set: Connectivity [ping-1] (unmanaged) -- ping-1:2 (ocf::pacemaker:ping): Started pcmk-2 (unmanaged) -- ping-1:3 (ocf::pacemaker:ping): Started pcmk-1 (unmanaged) -- Stopped: [ ping-1:0 ping-1:1 ] -+ ping-1 (ocf::pacemaker:ping): Started pcmk-2 (unmanaged) -+ ping-1 (ocf::pacemaker:ping): Started pcmk-1 (unmanaged) -+ Stopped: [ pcmk-3 pcmk-4 ] - Master/Slave Set: master-1 [stateful-1] (unmanaged) -- stateful-1:2 (ocf::pacemaker:Stateful): Master pcmk-2 (unmanaged) -- stateful-1:3 (ocf::pacemaker:Stateful): Started pcmk-1 (unmanaged) -- Stopped: [ stateful-1:0 stateful-1:1 ] -+ stateful-1 (ocf::pacemaker:Stateful): Master pcmk-2 (unmanaged) -+ stateful-1 (ocf::pacemaker:Stateful): Started pcmk-1 (unmanaged) -+ Stopped: [ pcmk-3 pcmk-4 ] - - Transition Summary: - -@@ -37,9 +37,9 @@ Online: [ pcmk-1 pcmk-2 ] - OFFLINE: [ pcmk-3 pcmk-4 ] - - Clone Set: Fencing [FencingChild] (unmanaged) -- FencingChild:2 (stonith:fence_xvm): Started pcmk-2 (unmanaged) -- FencingChild:3 (stonith:fence_xvm): Started pcmk-1 (unmanaged) -- Stopped: [ FencingChild:0 FencingChild:1 ] -+ FencingChild (stonith:fence_xvm): Started pcmk-2 (unmanaged) -+ FencingChild (stonith:fence_xvm): Started pcmk-1 (unmanaged) -+ Stopped: [ pcmk-3 pcmk-4 ] - Resource Group: group-1 - r192.168.122.126 (ocf::heartbeat:IPaddr): Started pcmk-2 (unmanaged) - r192.168.122.127 (ocf::heartbeat:IPaddr): Started pcmk-2 (unmanaged) -@@ -51,11 +51,11 @@ OFFLINE: [ pcmk-3 pcmk-4 ] - lsb-dummy (lsb:/usr/share/pacemaker/tests/cts/LSBDummy): Started pcmk-2 (unmanaged) - migrator (ocf::pacemaker:Dummy): Started pcmk-4 (unmanaged) - Clone Set: Connectivity [ping-1] (unmanaged) -- ping-1:2 (ocf::pacemaker:ping): Started pcmk-2 (unmanaged) -- ping-1:3 (ocf::pacemaker:ping): Started pcmk-1 (unmanaged) -- Stopped: [ ping-1:0 ping-1:1 ] -+ ping-1 (ocf::pacemaker:ping): Started pcmk-2 (unmanaged) -+ ping-1 (ocf::pacemaker:ping): Started pcmk-1 (unmanaged) -+ Stopped: [ pcmk-3 pcmk-4 ] - Master/Slave Set: master-1 [stateful-1] (unmanaged) -- stateful-1:2 (ocf::pacemaker:Stateful): Master pcmk-2 (unmanaged) -- stateful-1:3 (ocf::pacemaker:Stateful): Started pcmk-1 (unmanaged) -- Stopped: [ stateful-1:0 stateful-1:1 ] -+ stateful-1 (ocf::pacemaker:Stateful): Master pcmk-2 (unmanaged) -+ stateful-1 (ocf::pacemaker:Stateful): Started pcmk-1 (unmanaged) -+ Stopped: [ pcmk-3 pcmk-4 ] - -diff --git a/pengine/test10/unmanaged-stop-1.dot b/pengine/test10/unmanaged-stop-1.dot -new file mode 100644 -index 0000000..e36de8b ---- /dev/null -+++ b/pengine/test10/unmanaged-stop-1.dot -@@ -0,0 +1,8 @@ -+digraph "g" { -+"all_stopped" [ style=dashed color="red" fontcolor="orange"] -+"probe_complete yingying.site" [ style=bold color="green" fontcolor="black"] -+"rsc1_start_0 " [ style=dashed color="red" fontcolor="black"] -+"rsc1_stop_0 yingying.site" -> "all_stopped" [ style = dashed] -+"rsc1_stop_0 yingying.site" -> "rsc1_start_0 " [ style = dashed] -+"rsc1_stop_0 yingying.site" [ style=dashed color="red" fontcolor="black"] -+} -diff --git a/pengine/test10/unmanaged-stop-1.exp b/pengine/test10/unmanaged-stop-1.exp -new file mode 100644 -index 0000000..7845919 ---- /dev/null -+++ b/pengine/test10/unmanaged-stop-1.exp -@@ -0,0 +1,11 @@ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -diff --git a/pengine/test10/unmanaged-stop-1.scores b/pengine/test10/unmanaged-stop-1.scores -new file mode 100644 -index 0000000..4cb1c8f ---- /dev/null -+++ b/pengine/test10/unmanaged-stop-1.scores -@@ -0,0 +1,3 @@ -+Allocation scores: -+native_color: rsc1 allocation score on yingying.site: -INFINITY -+native_color: rsc2 allocation score on yingying.site: -INFINITY -diff --git a/pengine/test10/unmanaged-stop-1.summary b/pengine/test10/unmanaged-stop-1.summary -new file mode 100644 -index 0000000..7a0f680 ---- /dev/null -+++ b/pengine/test10/unmanaged-stop-1.summary -@@ -0,0 +1,18 @@ -+ -+Current cluster status: -+Online: [ yingying.site ] -+ -+ rsc1 (ocf::pacemaker:Dummy): Started yingying.site -+ rsc2 (ocf::pacemaker:Dummy): Started yingying.site (unmanaged) FAILED -+ -+Transition Summary: -+ * Stop rsc1 (yingying.site - blocked) -+ -+Executing cluster transition: -+ -+Revised cluster status: -+Online: [ yingying.site ] -+ -+ rsc1 (ocf::pacemaker:Dummy): Started yingying.site -+ rsc2 (ocf::pacemaker:Dummy): Started yingying.site (unmanaged) FAILED -+ -diff --git a/pengine/test10/unmanaged-stop-1.xml b/pengine/test10/unmanaged-stop-1.xml -new file mode 100644 -index 0000000..93a114f ---- /dev/null -+++ b/pengine/test10/unmanaged-stop-1.xml -@@ -0,0 +1,56 @@ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -diff --git a/pengine/test10/unmanaged-stop-2.dot b/pengine/test10/unmanaged-stop-2.dot -new file mode 100644 -index 0000000..e36de8b ---- /dev/null -+++ b/pengine/test10/unmanaged-stop-2.dot -@@ -0,0 +1,8 @@ -+digraph "g" { -+"all_stopped" [ style=dashed color="red" fontcolor="orange"] -+"probe_complete yingying.site" [ style=bold color="green" fontcolor="black"] -+"rsc1_start_0 " [ style=dashed color="red" fontcolor="black"] -+"rsc1_stop_0 yingying.site" -> "all_stopped" [ style = dashed] -+"rsc1_stop_0 yingying.site" -> "rsc1_start_0 " [ style = dashed] -+"rsc1_stop_0 yingying.site" [ style=dashed color="red" fontcolor="black"] -+} -diff --git a/pengine/test10/unmanaged-stop-2.exp b/pengine/test10/unmanaged-stop-2.exp -new file mode 100644 -index 0000000..7845919 ---- /dev/null -+++ b/pengine/test10/unmanaged-stop-2.exp -@@ -0,0 +1,11 @@ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -diff --git a/pengine/test10/unmanaged-stop-2.scores b/pengine/test10/unmanaged-stop-2.scores -new file mode 100644 -index 0000000..4cb1c8f ---- /dev/null -+++ b/pengine/test10/unmanaged-stop-2.scores -@@ -0,0 +1,3 @@ -+Allocation scores: -+native_color: rsc1 allocation score on yingying.site: -INFINITY -+native_color: rsc2 allocation score on yingying.site: -INFINITY -diff --git a/pengine/test10/unmanaged-stop-2.summary b/pengine/test10/unmanaged-stop-2.summary -new file mode 100644 -index 0000000..7a0f680 ---- /dev/null -+++ b/pengine/test10/unmanaged-stop-2.summary -@@ -0,0 +1,18 @@ -+ -+Current cluster status: -+Online: [ yingying.site ] -+ -+ rsc1 (ocf::pacemaker:Dummy): Started yingying.site -+ rsc2 (ocf::pacemaker:Dummy): Started yingying.site (unmanaged) FAILED -+ -+Transition Summary: -+ * Stop rsc1 (yingying.site - blocked) -+ -+Executing cluster transition: -+ -+Revised cluster status: -+Online: [ yingying.site ] -+ -+ rsc1 (ocf::pacemaker:Dummy): Started yingying.site -+ rsc2 (ocf::pacemaker:Dummy): Started yingying.site (unmanaged) FAILED -+ -diff --git a/pengine/test10/unmanaged-stop-2.xml b/pengine/test10/unmanaged-stop-2.xml -new file mode 100644 -index 0000000..9ed61cd ---- /dev/null -+++ b/pengine/test10/unmanaged-stop-2.xml -@@ -0,0 +1,56 @@ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -diff --git a/pengine/test10/unmanaged-stop-3.dot b/pengine/test10/unmanaged-stop-3.dot -new file mode 100644 -index 0000000..02d8d88 ---- /dev/null -+++ b/pengine/test10/unmanaged-stop-3.dot -@@ -0,0 +1,11 @@ -+digraph "g" { -+"all_stopped" [ style=dashed color="red" fontcolor="orange"] -+"group1_stop_0" -> "group1_stopped_0" [ style = dashed] -+"group1_stop_0" -> "rsc1_stop_0 yingying.site" [ style = dashed] -+"group1_stop_0" [ style=bold color="green" fontcolor="orange"] -+"group1_stopped_0" [ style=dashed color="red" fontcolor="orange"] -+"probe_complete yingying.site" [ style=bold color="green" fontcolor="black"] -+"rsc1_stop_0 yingying.site" -> "all_stopped" [ style = dashed] -+"rsc1_stop_0 yingying.site" -> "group1_stopped_0" [ style = dashed] -+"rsc1_stop_0 yingying.site" [ style=dashed color="red" fontcolor="black"] -+} -diff --git a/pengine/test10/unmanaged-stop-3.exp b/pengine/test10/unmanaged-stop-3.exp -new file mode 100644 -index 0000000..2cb2435 ---- /dev/null -+++ b/pengine/test10/unmanaged-stop-3.exp -@@ -0,0 +1,19 @@ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -diff --git a/pengine/test10/unmanaged-stop-3.scores b/pengine/test10/unmanaged-stop-3.scores -new file mode 100644 -index 0000000..8106031 ---- /dev/null -+++ b/pengine/test10/unmanaged-stop-3.scores -@@ -0,0 +1,6 @@ -+Allocation scores: -+group_color: group1 allocation score on yingying.site: 0 -+group_color: rsc1 allocation score on yingying.site: 0 -+group_color: rsc2 allocation score on yingying.site: -INFINITY -+native_color: rsc1 allocation score on yingying.site: -INFINITY -+native_color: rsc2 allocation score on yingying.site: -INFINITY -diff --git a/pengine/test10/unmanaged-stop-3.summary b/pengine/test10/unmanaged-stop-3.summary -new file mode 100644 -index 0000000..9edcfd5 ---- /dev/null -+++ b/pengine/test10/unmanaged-stop-3.summary -@@ -0,0 +1,21 @@ -+ -+Current cluster status: -+Online: [ yingying.site ] -+ -+ Resource Group: group1 -+ rsc1 (ocf::pacemaker:Dummy): Started yingying.site -+ rsc2 (ocf::pacemaker:Dummy): Started yingying.site (unmanaged) FAILED -+ -+Transition Summary: -+ * Stop rsc1 (yingying.site - blocked) -+ -+Executing cluster transition: -+ * Pseudo action: group1_stop_0 -+ -+Revised cluster status: -+Online: [ yingying.site ] -+ -+ Resource Group: group1 -+ rsc1 (ocf::pacemaker:Dummy): Started yingying.site -+ rsc2 (ocf::pacemaker:Dummy): Started yingying.site (unmanaged) FAILED -+ -diff --git a/pengine/test10/unmanaged-stop-3.xml b/pengine/test10/unmanaged-stop-3.xml -new file mode 100644 -index 0000000..36ff29f ---- /dev/null -+++ b/pengine/test10/unmanaged-stop-3.xml -@@ -0,0 +1,56 @@ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -diff --git a/pengine/test10/unmanaged-stop-4.dot b/pengine/test10/unmanaged-stop-4.dot -new file mode 100644 -index 0000000..02d8d88 ---- /dev/null -+++ b/pengine/test10/unmanaged-stop-4.dot -@@ -0,0 +1,11 @@ -+digraph "g" { -+"all_stopped" [ style=dashed color="red" fontcolor="orange"] -+"group1_stop_0" -> "group1_stopped_0" [ style = dashed] -+"group1_stop_0" -> "rsc1_stop_0 yingying.site" [ style = dashed] -+"group1_stop_0" [ style=bold color="green" fontcolor="orange"] -+"group1_stopped_0" [ style=dashed color="red" fontcolor="orange"] -+"probe_complete yingying.site" [ style=bold color="green" fontcolor="black"] -+"rsc1_stop_0 yingying.site" -> "all_stopped" [ style = dashed] -+"rsc1_stop_0 yingying.site" -> "group1_stopped_0" [ style = dashed] -+"rsc1_stop_0 yingying.site" [ style=dashed color="red" fontcolor="black"] -+} -diff --git a/pengine/test10/unmanaged-stop-4.exp b/pengine/test10/unmanaged-stop-4.exp -new file mode 100644 -index 0000000..2cb2435 ---- /dev/null -+++ b/pengine/test10/unmanaged-stop-4.exp -@@ -0,0 +1,19 @@ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -diff --git a/pengine/test10/unmanaged-stop-4.scores b/pengine/test10/unmanaged-stop-4.scores -new file mode 100644 -index 0000000..8811025 ---- /dev/null -+++ b/pengine/test10/unmanaged-stop-4.scores -@@ -0,0 +1,8 @@ -+Allocation scores: -+group_color: group1 allocation score on yingying.site: 0 -+group_color: rsc1 allocation score on yingying.site: 0 -+group_color: rsc2 allocation score on yingying.site: -INFINITY -+group_color: rsc3 allocation score on yingying.site: 0 -+native_color: rsc1 allocation score on yingying.site: -INFINITY -+native_color: rsc2 allocation score on yingying.site: -INFINITY -+native_color: rsc3 allocation score on yingying.site: -INFINITY -diff --git a/pengine/test10/unmanaged-stop-4.summary b/pengine/test10/unmanaged-stop-4.summary -new file mode 100644 -index 0000000..96996c3 ---- /dev/null -+++ b/pengine/test10/unmanaged-stop-4.summary -@@ -0,0 +1,23 @@ -+ -+Current cluster status: -+Online: [ yingying.site ] -+ -+ Resource Group: group1 -+ rsc1 (ocf::pacemaker:Dummy): Started yingying.site -+ rsc2 (ocf::pacemaker:Dummy): Started yingying.site (unmanaged) FAILED -+ rsc3 (ocf::heartbeat:Dummy): Stopped -+ -+Transition Summary: -+ * Stop rsc1 (yingying.site - blocked) -+ -+Executing cluster transition: -+ * Pseudo action: group1_stop_0 -+ -+Revised cluster status: -+Online: [ yingying.site ] -+ -+ Resource Group: group1 -+ rsc1 (ocf::pacemaker:Dummy): Started yingying.site -+ rsc2 (ocf::pacemaker:Dummy): Started yingying.site (unmanaged) FAILED -+ rsc3 (ocf::heartbeat:Dummy): Stopped -+ -diff --git a/pengine/test10/unmanaged-stop-4.xml b/pengine/test10/unmanaged-stop-4.xml -new file mode 100644 -index 0000000..5a793ca ---- /dev/null -+++ b/pengine/test10/unmanaged-stop-4.xml -@@ -0,0 +1,65 @@ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -diff --git a/pengine/test10/unrunnable-1.summary b/pengine/test10/unrunnable-1.summary -index ec05391..e9597f3 100644 ---- a/pengine/test10/unrunnable-1.summary -+++ b/pengine/test10/unrunnable-1.summary -@@ -27,7 +27,7 @@ Transition Summary: - * Start rsc_c001n02 (c001n03 - blocked) - * Start rsc_c001n03 (c001n03 - blocked) - * Start rsc_c001n01 (c001n03 - blocked) -- * Stop child_DoFencing:1 (c001n02) -+ * Stop child_DoFencing:1 (c001n02 - blocked) - - Executing cluster transition: - * Resource action: DcIPaddr monitor on c001n03 -diff --git a/pengine/test10/use-after-free-merge.summary b/pengine/test10/use-after-free-merge.summary -index cc17523..c4e9ac6 100644 ---- a/pengine/test10/use-after-free-merge.summary -+++ b/pengine/test10/use-after-free-merge.summary -@@ -7,7 +7,7 @@ Online: [ hex-13 hex-14 ] - d0 (ocf::heartbeat:Dummy): Stopped - d1 (ocf::heartbeat:Dummy): Stopped - Master/Slave Set: ms0 [s0] -- Stopped: [ s0:0 s0:1 ] -+ Stopped: [ hex-13 hex-14 ] - - Transition Summary: - * Start fencing-sbd (hex-14) -diff --git a/pengine/test10/utilization-order2.summary b/pengine/test10/utilization-order2.summary -index 6a6d845..7871579 100644 ---- a/pengine/test10/utilization-order2.summary -+++ b/pengine/test10/utilization-order2.summary -@@ -33,6 +33,6 @@ Online: [ node1 node2 ] - rsc3 (ocf::pacemaker:Dummy): Started node2 - Clone Set: clone-rsc2 [rsc2] - Started: [ node2 ] -- Stopped: [ rsc2:1 ] -+ Stopped: [ node1 ] - rsc1 (ocf::pacemaker:Dummy): Stopped - -diff --git a/pengine/test10/utilization-order4.summary b/pengine/test10/utilization-order4.summary -index 22a9610..20fe903 100644 ---- a/pengine/test10/utilization-order4.summary -+++ b/pengine/test10/utilization-order4.summary -@@ -53,8 +53,8 @@ Online: [ deglxen001 ] - stonith_sbd (stonith:external/sbd): Started deglxen001 - Clone Set: clone-nfs [grp-nfs] - Started: [ deglxen001 ] -- Stopped: [ grp-nfs:1 ] -+ Stopped: [ deglxen002 ] - Clone Set: clone-ping [prim-ping] - Started: [ deglxen001 ] -- Stopped: [ prim-ping:1 ] -+ Stopped: [ deglxen002 ] - -diff --git a/pengine/test10/whitebox-fail1.exp b/pengine/test10/whitebox-fail1.exp -index cc46c36..5741955 100644 ---- a/pengine/test10/whitebox-fail1.exp -+++ b/pengine/test10/whitebox-fail1.exp -@@ -173,7 +173,7 @@ - - - -- -+ - - - -diff --git a/pengine/test10/whitebox-fail1.summary b/pengine/test10/whitebox-fail1.summary -index 4df3c74..8bf6d52 100644 ---- a/pengine/test10/whitebox-fail1.summary -+++ b/pengine/test10/whitebox-fail1.summary -@@ -1,14 +1,13 @@ - - Current cluster status: --Online: [ 18node1 18node2 18node3 lxc2 ] --OFFLINE: [ lxc1 ] -+Online: [ 18node1 18node2 18node3 lxc2:container2 ] -+OFFLINE: [ lxc1:container1 ] - - container1 (ocf::heartbeat:VirtualDomain): Started 18node2 FAILED - container2 (ocf::heartbeat:VirtualDomain): Started 18node2 - shoot1 (stonith:fence_xvm): Started 18node3 - Clone Set: M-clone [M] - Started: [ 18node1 18node2 18node3 lxc2 ] -- Stopped: [ M:4 ] - A (ocf::pacemaker:Dummy): Started 18node1 - B (ocf::pacemaker:Dummy): Started lxc1 FAILED - C (ocf::pacemaker:Dummy): Started lxc2 -@@ -39,7 +38,7 @@ Executing cluster transition: - * Resource action: B monitor=10000 on lxc1 - - Revised cluster status: --Online: [ 18node1 18node2 18node3 lxc1 lxc2 ] -+Online: [ 18node1 18node2 18node3 lxc1:container1 lxc2:container2 ] - - container1 (ocf::heartbeat:VirtualDomain): Started 18node2 - container2 (ocf::heartbeat:VirtualDomain): Started 18node2 -diff --git a/pengine/test10/whitebox-fail2.exp b/pengine/test10/whitebox-fail2.exp -index b8e4d9a..5741955 100644 ---- a/pengine/test10/whitebox-fail2.exp -+++ b/pengine/test10/whitebox-fail2.exp -@@ -8,13 +8,13 @@ - - - -- -+ - - - - - -- -+ - - - -@@ -68,7 +68,7 @@ - - - -- -+ - - - -@@ -165,7 +165,7 @@ - - - -- -+ - - - -@@ -173,7 +173,7 @@ - - - -- -+ - - - -@@ -196,7 +196,7 @@ - - - -- -+ - - - -@@ -215,7 +215,7 @@ - - - -- -+ - - - -diff --git a/pengine/test10/whitebox-fail2.summary b/pengine/test10/whitebox-fail2.summary -index d185251..81407d3 100644 ---- a/pengine/test10/whitebox-fail2.summary -+++ b/pengine/test10/whitebox-fail2.summary -@@ -1,14 +1,13 @@ - - Current cluster status: --Online: [ 18node1 18node2 18node3 lxc2 ] --OFFLINE: [ lxc1 ] -+Online: [ 18node1 18node2 18node3 lxc2:container2 ] -+OFFLINE: [ lxc1:container1 ] - - container1 (ocf::heartbeat:VirtualDomain): Started 18node2 FAILED - container2 (ocf::heartbeat:VirtualDomain): Started 18node2 - shoot1 (stonith:fence_xvm): Started 18node3 - Clone Set: M-clone [M] - Started: [ 18node1 18node2 18node3 lxc2 ] -- Stopped: [ M:4 ] - A (ocf::pacemaker:Dummy): Started 18node1 - B (ocf::pacemaker:Dummy): Started lxc1 FAILED - C (ocf::pacemaker:Dummy): Started lxc2 -@@ -39,7 +38,7 @@ Executing cluster transition: - * Resource action: B monitor=10000 on lxc1 - - Revised cluster status: --Online: [ 18node1 18node2 18node3 lxc1 lxc2 ] -+Online: [ 18node1 18node2 18node3 lxc1:container1 lxc2:container2 ] - - container1 (ocf::heartbeat:VirtualDomain): Started 18node2 - container2 (ocf::heartbeat:VirtualDomain): Started 18node2 -diff --git a/pengine/test10/whitebox-fail2.xml b/pengine/test10/whitebox-fail2.xml -index 496189d..2244c48 100644 ---- a/pengine/test10/whitebox-fail2.xml -+++ b/pengine/test10/whitebox-fail2.xml -@@ -146,13 +146,13 @@ - - - -- -+ - - - - - -- -+ - - - -diff --git a/pengine/test10/whitebox-fail3.dot b/pengine/test10/whitebox-fail3.dot -new file mode 100644 -index 0000000..278d0d2 ---- /dev/null -+++ b/pengine/test10/whitebox-fail3.dot -@@ -0,0 +1,40 @@ -+digraph "g" { -+"18builder_monitor_0 dvossel-laptop2" -> "probe_complete dvossel-laptop2" [ style = bold] -+"18builder_monitor_0 dvossel-laptop2" [ style=bold color="green" fontcolor="black"] -+"18builder_monitor_30000 dvossel-laptop2" [ style=bold color="green" fontcolor="black"] -+"18builder_start_0 dvossel-laptop2" -> "18builder_monitor_30000 dvossel-laptop2" [ style = bold] -+"18builder_start_0 dvossel-laptop2" -> "FAKE_start_0 18builder" [ style = bold] -+"18builder_start_0 dvossel-laptop2" -> "W:1_monitor_10000 18builder" [ style = bold] -+"18builder_start_0 dvossel-laptop2" -> "W:1_start_0 18builder" [ style = bold] -+"18builder_start_0 dvossel-laptop2" -> "X:1_monitor_10000 18builder" [ style = bold] -+"18builder_start_0 dvossel-laptop2" -> "X:1_start_0 18builder" [ style = bold] -+"18builder_start_0 dvossel-laptop2" [ style=bold color="green" fontcolor="black"] -+"FAKE_start_0 18builder" [ style=bold color="green" fontcolor="black"] -+"FAKE_stop_0 dvossel-laptop2" -> "FAKE_start_0 18builder" [ style = bold] -+"FAKE_stop_0 dvossel-laptop2" -> "all_stopped" [ style = bold] -+"FAKE_stop_0 dvossel-laptop2" [ style=bold color="green" fontcolor="black"] -+"W-master_running_0" [ style=bold color="green" fontcolor="orange"] -+"W-master_start_0" -> "W-master_running_0" [ style = bold] -+"W-master_start_0" -> "W:1_start_0 18builder" [ style = bold] -+"W-master_start_0" [ style=bold color="green" fontcolor="orange"] -+"W:1_monitor_10000 18builder" [ style=bold color="green" fontcolor="black"] -+"W:1_start_0 18builder" -> "W-master_running_0" [ style = bold] -+"W:1_start_0 18builder" -> "W:1_monitor_10000 18builder" [ style = bold] -+"W:1_start_0 18builder" [ style=bold color="green" fontcolor="black"] -+"X-master_running_0" [ style=bold color="green" fontcolor="orange"] -+"X-master_start_0" -> "X-master_running_0" [ style = bold] -+"X-master_start_0" -> "X:1_start_0 18builder" [ style = bold] -+"X-master_start_0" [ style=bold color="green" fontcolor="orange"] -+"X:1_monitor_10000 18builder" [ style=bold color="green" fontcolor="black"] -+"X:1_start_0 18builder" -> "X-master_running_0" [ style = bold] -+"X:1_start_0 18builder" -> "X:1_monitor_10000 18builder" [ style = bold] -+"X:1_start_0 18builder" [ style=bold color="green" fontcolor="black"] -+"all_stopped" [ style=bold color="green" fontcolor="orange"] -+"probe_complete dvossel-laptop2" -> "probe_complete" [ style = bold] -+"probe_complete dvossel-laptop2" [ style=bold color="green" fontcolor="black"] -+"probe_complete" -> "18builder_start_0 dvossel-laptop2" [ style = bold] -+"probe_complete" -> "FAKE_stop_0 dvossel-laptop2" [ style = bold] -+"probe_complete" [ style=bold color="green" fontcolor="orange"] -+"vm_start_0 dvossel-laptop2" -> "18builder_start_0 dvossel-laptop2" [ style = bold] -+"vm_start_0 dvossel-laptop2" [ style=bold color="green" fontcolor="black"] -+} -diff --git a/pengine/test10/whitebox-fail3.exp b/pengine/test10/whitebox-fail3.exp -new file mode 100644 -index 0000000..1b8d144 ---- /dev/null -+++ b/pengine/test10/whitebox-fail3.exp -@@ -0,0 +1,225 @@ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -diff --git a/pengine/test10/whitebox-fail3.scores b/pengine/test10/whitebox-fail3.scores -new file mode 100644 -index 0000000..6e09dd5 ---- /dev/null -+++ b/pengine/test10/whitebox-fail3.scores -@@ -0,0 +1,64 @@ -+Allocation scores: -+W:0 promotion score on dvossel-laptop2: 10 -+W:1 promotion score on 18builder: -1 -+W:2 promotion score on none: 0 -+X:0 promotion score on dvossel-laptop2: 10 -+X:1 promotion score on 18builder: -1 -+X:2 promotion score on none: 0 -+clone_color: W-master allocation score on 18builder: 0 -+clone_color: W-master allocation score on 18node1: 0 -+clone_color: W-master allocation score on dvossel-laptop2: 0 -+clone_color: W:0 allocation score on 18builder: 0 -+clone_color: W:0 allocation score on 18node1: 0 -+clone_color: W:0 allocation score on dvossel-laptop2: 11 -+clone_color: W:1 allocation score on 18builder: 0 -+clone_color: W:1 allocation score on 18node1: 0 -+clone_color: W:1 allocation score on dvossel-laptop2: 0 -+clone_color: W:2 allocation score on 18builder: 0 -+clone_color: W:2 allocation score on 18node1: 0 -+clone_color: W:2 allocation score on dvossel-laptop2: 0 -+clone_color: X-master allocation score on 18builder: 0 -+clone_color: X-master allocation score on 18node1: 0 -+clone_color: X-master allocation score on dvossel-laptop2: 0 -+clone_color: X:0 allocation score on 18builder: 0 -+clone_color: X:0 allocation score on 18node1: 0 -+clone_color: X:0 allocation score on dvossel-laptop2: 11 -+clone_color: X:1 allocation score on 18builder: 0 -+clone_color: X:1 allocation score on 18node1: 0 -+clone_color: X:1 allocation score on dvossel-laptop2: 0 -+clone_color: X:2 allocation score on 18builder: 0 -+clone_color: X:2 allocation score on 18node1: 0 -+clone_color: X:2 allocation score on dvossel-laptop2: 0 -+native_color: 18builder allocation score on 18builder: -INFINITY -+native_color: 18builder allocation score on 18node1: -INFINITY -+native_color: 18builder allocation score on dvossel-laptop2: 0 -+native_color: 18node1 allocation score on 18builder: -INFINITY -+native_color: 18node1 allocation score on 18node1: -INFINITY -+native_color: 18node1 allocation score on dvossel-laptop2: -INFINITY -+native_color: FAKE allocation score on 18builder: 0 -+native_color: FAKE allocation score on 18node1: 0 -+native_color: FAKE allocation score on dvossel-laptop2: 0 -+native_color: W:0 allocation score on 18builder: 0 -+native_color: W:0 allocation score on 18node1: -INFINITY -+native_color: W:0 allocation score on dvossel-laptop2: 11 -+native_color: W:1 allocation score on 18builder: 0 -+native_color: W:1 allocation score on 18node1: -INFINITY -+native_color: W:1 allocation score on dvossel-laptop2: -INFINITY -+native_color: W:2 allocation score on 18builder: -INFINITY -+native_color: W:2 allocation score on 18node1: -INFINITY -+native_color: W:2 allocation score on dvossel-laptop2: -INFINITY -+native_color: X:0 allocation score on 18builder: 0 -+native_color: X:0 allocation score on 18node1: -INFINITY -+native_color: X:0 allocation score on dvossel-laptop2: 11 -+native_color: X:1 allocation score on 18builder: 0 -+native_color: X:1 allocation score on 18node1: -INFINITY -+native_color: X:1 allocation score on dvossel-laptop2: -INFINITY -+native_color: X:2 allocation score on 18builder: -INFINITY -+native_color: X:2 allocation score on 18node1: -INFINITY -+native_color: X:2 allocation score on dvossel-laptop2: -INFINITY -+native_color: vm allocation score on 18builder: -INFINITY -+native_color: vm allocation score on 18node1: -INFINITY -+native_color: vm allocation score on dvossel-laptop2: 0 -+native_color: vm2 allocation score on 18builder: -INFINITY -+native_color: vm2 allocation score on 18node1: -INFINITY -+native_color: vm2 allocation score on dvossel-laptop2: -INFINITY -diff --git a/pengine/test10/whitebox-fail3.summary b/pengine/test10/whitebox-fail3.summary -new file mode 100644 -index 0000000..1d25724 ---- /dev/null -+++ b/pengine/test10/whitebox-fail3.summary -@@ -0,0 +1,56 @@ -+ -+Current cluster status: -+Online: [ dvossel-laptop2 ] -+OFFLINE: [ 18builder:vm 18node1:vm2 ] -+ -+ vm (ocf::heartbeat:VirtualDomain): Stopped -+ vm2 (ocf::heartbeat:VirtualDomain): Stopped -+ FAKE (ocf::pacemaker:Dummy): Started dvossel-laptop2 -+ Master/Slave Set: W-master [W] -+ Masters: [ dvossel-laptop2 ] -+ Stopped: [ 18builder 18node1 ] -+ Master/Slave Set: X-master [X] -+ Masters: [ dvossel-laptop2 ] -+ Stopped: [ 18builder 18node1 ] -+ -+Transition Summary: -+ * Start vm (dvossel-laptop2) -+ * Move FAKE (Started dvossel-laptop2 -> 18builder) -+ * Start W:1 (18builder) -+ * Start X:1 (18builder) -+ * Start 18builder (dvossel-laptop2) -+ -+Executing cluster transition: -+ * Resource action: vm start on dvossel-laptop2 -+ * Pseudo action: W-master_start_0 -+ * Pseudo action: X-master_start_0 -+ * Resource action: 18builder monitor on dvossel-laptop2 -+ * Pseudo action: probe_complete -+ * Resource action: FAKE stop on dvossel-laptop2 -+ * Resource action: 18builder start on dvossel-laptop2 -+ * Pseudo action: all_stopped -+ * Resource action: FAKE start on 18builder -+ * Resource action: W start on 18builder -+ * Pseudo action: W-master_running_0 -+ * Resource action: X start on 18builder -+ * Pseudo action: X-master_running_0 -+ * Resource action: 18builder monitor=30000 on dvossel-laptop2 -+ * Resource action: W monitor=10000 on 18builder -+ * Resource action: X monitor=10000 on 18builder -+ -+Revised cluster status: -+Online: [ 18builder:vm dvossel-laptop2 ] -+OFFLINE: [ 18node1:vm2 ] -+ -+ vm (ocf::heartbeat:VirtualDomain): Started dvossel-laptop2 -+ vm2 (ocf::heartbeat:VirtualDomain): Stopped -+ FAKE (ocf::pacemaker:Dummy): Started 18builder -+ Master/Slave Set: W-master [W] -+ Masters: [ dvossel-laptop2 ] -+ Slaves: [ 18builder ] -+ Stopped: [ 18node1 ] -+ Master/Slave Set: X-master [X] -+ Masters: [ dvossel-laptop2 ] -+ Slaves: [ 18builder ] -+ Stopped: [ 18node1 ] -+ -diff --git a/pengine/test10/whitebox-fail3.xml b/pengine/test10/whitebox-fail3.xml -new file mode 100644 -index 0000000..081708d ---- /dev/null -+++ b/pengine/test10/whitebox-fail3.xml -@@ -0,0 +1,104 @@ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -diff --git a/pengine/test10/whitebox-move.exp b/pengine/test10/whitebox-move.exp -index 1a3d89b..8dbdda0 100644 ---- a/pengine/test10/whitebox-move.exp -+++ b/pengine/test10/whitebox-move.exp -@@ -181,7 +181,7 @@ - - - -- -+ - - - -diff --git a/pengine/test10/whitebox-move.summary b/pengine/test10/whitebox-move.summary -index eacefdf..6dc2f6f 100644 ---- a/pengine/test10/whitebox-move.summary -+++ b/pengine/test10/whitebox-move.summary -@@ -1,6 +1,6 @@ - - Current cluster status: --Online: [ 18node1 18node2 18node3 lxc1 lxc2 ] -+Online: [ 18node1 18node2 18node3 lxc1:container1 lxc2:container2 ] - - container1 (ocf::heartbeat:VirtualDomain): Started 18node1 - container2 (ocf::heartbeat:VirtualDomain): Started 18node2 -@@ -34,7 +34,7 @@ Executing cluster transition: - * Resource action: lxc1 monitor=30000 on 18node2 - - Revised cluster status: --Online: [ 18node1 18node2 18node3 lxc1 lxc2 ] -+Online: [ 18node1 18node2 18node3 lxc1:container1 lxc2:container2 ] - - container1 (ocf::heartbeat:VirtualDomain): Started 18node2 - container2 (ocf::heartbeat:VirtualDomain): Started 18node2 -diff --git a/pengine/test10/whitebox-start.exp b/pengine/test10/whitebox-start.exp -index fc54e18..4dcfdc4 100644 ---- a/pengine/test10/whitebox-start.exp -+++ b/pengine/test10/whitebox-start.exp -@@ -80,7 +80,7 @@ - - - -- -+ - - - -diff --git a/pengine/test10/whitebox-start.summary b/pengine/test10/whitebox-start.summary -index a3dd39c..e5d654b 100644 ---- a/pengine/test10/whitebox-start.summary -+++ b/pengine/test10/whitebox-start.summary -@@ -1,14 +1,14 @@ - - Current cluster status: --Online: [ 18node1 18node2 18node3 lxc2 ] --OFFLINE: [ lxc1 ] -+Online: [ 18node1 18node2 18node3 lxc2:container2 ] -+OFFLINE: [ lxc1:container1 ] - - container1 (ocf::heartbeat:VirtualDomain): Stopped - container2 (ocf::heartbeat:VirtualDomain): Started 18node2 - shoot1 (stonith:fence_xvm): Started 18node3 - Clone Set: M-clone [M] - Started: [ 18node1 18node2 18node3 lxc2 ] -- Stopped: [ M:4 ] -+ Stopped: [ lxc1 ] - A (ocf::pacemaker:Dummy): Started 18node1 - B (ocf::pacemaker:Dummy): Started lxc2 - C (ocf::pacemaker:Dummy): Started lxc2 -@@ -29,7 +29,7 @@ Executing cluster transition: - * Resource action: M monitor=10000 on lxc1 - - Revised cluster status: --Online: [ 18node1 18node2 18node3 lxc1 lxc2 ] -+Online: [ 18node1 18node2 18node3 lxc1:container1 lxc2:container2 ] - - container1 (ocf::heartbeat:VirtualDomain): Started 18node1 - container2 (ocf::heartbeat:VirtualDomain): Started 18node2 -diff --git a/pengine/test10/whitebox-stop.summary b/pengine/test10/whitebox-stop.summary -index c1e5f96..4116571 100644 ---- a/pengine/test10/whitebox-stop.summary -+++ b/pengine/test10/whitebox-stop.summary -@@ -1,6 +1,6 @@ - - Current cluster status: --Online: [ 18node1 18node2 18node3 lxc1 lxc2 ] -+Online: [ 18node1 18node2 18node3 lxc1:container1 lxc2:container2 ] - - container1 (ocf::heartbeat:VirtualDomain): Started 18node2 - container2 (ocf::heartbeat:VirtualDomain): Started 18node2 -@@ -30,15 +30,15 @@ Executing cluster transition: - * Pseudo action: all_stopped - - Revised cluster status: --Online: [ 18node1 18node2 18node3 lxc2 ] --OFFLINE: [ lxc1 ] -+Online: [ 18node1 18node2 18node3 lxc2:container2 ] -+OFFLINE: [ lxc1:container1 ] - - container1 (ocf::heartbeat:VirtualDomain): Stopped - container2 (ocf::heartbeat:VirtualDomain): Started 18node2 - shoot1 (stonith:fence_xvm): Started 18node3 - Clone Set: M-clone [M] - Started: [ 18node1 18node2 18node3 lxc2 ] -- Stopped: [ M:4 ] -+ Stopped: [ lxc1 ] - A (ocf::pacemaker:Dummy): Started 18node1 - B (ocf::pacemaker:Dummy): Started lxc2 - C (ocf::pacemaker:Dummy): Started lxc2 -diff --git a/tools/Makefile.am b/tools/Makefile.am -index 0e7b1a9..ad469d2 100644 ---- a/tools/Makefile.am -+++ b/tools/Makefile.am -@@ -5,17 +5,17 @@ - # modify it under the terms of the GNU General Public License - # as published by the Free Software Foundation; either version 2 - # of the License, or (at your option) any later version. --# -+# - # This program is distributed in the hope that it will be useful, - # but WITHOUT ANY WARRANTY; without even the implied warranty of - # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - # GNU General Public License for more details. --# -+# - # You should have received a copy of the GNU General Public License - # along with this program; if not, write to the Free Software - # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - # --MAINTAINERCLEANFILES = Makefile.in -+MAINTAINERCLEANFILES = Makefile.in - - INCLUDES = -I$(top_builddir)/include -I$(top_srcdir)/include \ - -I$(top_builddir)/libltdl -I$(top_srcdir)/libltdl -@@ -23,15 +23,18 @@ INCLUDES = -I$(top_builddir)/include -I$(top_srcdir)/include \ - COMMONLIBS = \ - $(top_builddir)/lib/common/libcrmcommon.la \ - $(top_builddir)/lib/cib/libcib.la \ -- $(CURSESLIBS) $(CLUSTERLIBS) -+ $(CURSESLIBS) $(CLUSTERLIBS) - - headerdir = $(pkgincludedir)/crm --header_HEADERS = -+header_HEADERS = - - pcmkdir = $(datadir)/$(PACKAGE) - pcmk_DATA = report.common report.collector - - sbin_SCRIPTS = crm_report crm_standby crm_master crm_failcount -+if BUILD_CIBSECRETS -+sbin_SCRIPTS += cibsecret -+endif - EXTRA_DIST = $(sbin_SCRIPTS) - - halibdir = $(CRM_DAEMON_DIR) -@@ -40,9 +43,9 @@ halib_PROGRAMS = attrd - sbin_PROGRAMS = crm_simulate crmadmin cibadmin crm_node crm_attribute crm_resource crm_verify \ - crm_shadow attrd_updater crm_diff crm_mon iso8601 crm_ticket crm_error - --clidir = $(datadir)/$(PACKAGE)/tests/cli --cli_SCRIPTS = regression.sh --cli_DATA = regression.exp cli.supp -+testdir = $(datadir)/$(PACKAGE)/tests/cli -+test_SCRIPTS = regression.sh -+test_DATA = regression.exp - - if BUILD_HEARTBEAT_SUPPORT - sbin_PROGRAMS += crm_uuid -@@ -60,7 +63,7 @@ endif - - ## SOURCES - --noinst_HEADERS = -+noinst_HEADERS = - - crmadmin_SOURCES = crmadmin.c - crmadmin_LDADD = $(top_builddir)/lib/pengine/libpe_status.la \ -@@ -73,10 +76,10 @@ crm_uuid_SOURCES = crm_uuid.c - crm_uuid_LDADD = $(COMMONLIBS) $(top_builddir)/lib/cluster/libcrmcluster.la - - cibadmin_SOURCES = cibadmin.c --cibadmin_LDADD = $(COMMONLIBS) -+cibadmin_LDADD = $(COMMONLIBS) - - crm_shadow_SOURCES = cib_shadow.c --crm_shadow_LDADD = $(COMMONLIBS) -+crm_shadow_LDADD = $(COMMONLIBS) - - crm_node_SOURCES = crm_node.c - crm_node_LDADD = $(top_builddir)/lib/cluster/libcrmcluster.la \ -@@ -108,7 +111,7 @@ crm_verify_LDADD = $(top_builddir)/lib/pengine/libpe_status.la \ - $(COMMONLIBS) - - crm_attribute_SOURCES = crm_attribute.c --crm_attribute_LDADD = $(top_builddir)/lib/cluster/libcrmcluster.la $(COMMONLIBS) -+crm_attribute_LDADD = $(top_builddir)/lib/cluster/libcrmcluster.la $(COMMONLIBS) - - crm_resource_SOURCES = crm_resource.c - crm_resource_LDADD = $(top_builddir)/lib/pengine/libpe_rules.la \ -@@ -119,7 +122,7 @@ crm_resource_LDADD = $(top_builddir)/lib/pengine/libpe_rules.la \ - $(COMMONLIBS) - - iso8601_SOURCES = test.iso8601.c --iso8601_LDADD = $(COMMONLIBS) -+iso8601_LDADD = $(COMMONLIBS) - - attrd_SOURCES = attrd.c - attrd_LDADD = $(top_builddir)/lib/cluster/libcrmcluster.la $(COMMONLIBS) -@@ -144,13 +147,14 @@ endif - - if BUILD_OPENIPMI_SERVICELOG - ipmiservicelogd_SOURCES = ipmiservicelogd.c --ipmiservicelogd_CFLAGS = $(OPENIPMI_SERVICELOG_CFLAGS) $(SERVICELOG_CFLAGS) -+ipmiservicelogd_CFLAGS = $(OPENIPMI_SERVICELOG_CFLAGS) $(SERVICELOG_CFLAGS) - ipmiservicelogd_LDFLAGS = $(top_builddir)/lib/common/libcrmcommon.la $(OPENIPMI_SERVICELOG_LIBS) $(SERVICELOG_LIBS) - endif - - %.8: % crm_attribute - echo Creating $@ - chmod a+x $(top_builddir)/tools/$< -+ $(top_builddir)/tools/$< --help - PATH=$(top_builddir)/tools:$$PATH $(HELP2MAN) --output $@ --no-info --section 8 --name "Part of the Pacemaker cluster resource manager" $(top_builddir)/tools/$< - - clean-generic: -diff --git a/tools/attrd.c b/tools/attrd.c -index 571708a..1e834ea 100644 ---- a/tools/attrd.c -+++ b/tools/attrd.c -@@ -179,7 +179,7 @@ attrd_shutdown(int nsig) - if (mainloop != NULL && g_main_is_running(mainloop)) { - g_main_quit(mainloop); - } else { -- crm_exit(0); -+ crm_exit(pcmk_ok); - } - } - -@@ -296,7 +296,7 @@ attrd_ha_connection_destroy(gpointer user_data) - g_main_quit(mainloop); - return; - } -- crm_exit(EX_OK); -+ crm_exit(pcmk_ok); - } - - static void -@@ -377,7 +377,7 @@ attrd_ais_destroy(gpointer unused) - g_main_quit(mainloop); - return; - } -- crm_exit(EX_USAGE); -+ crm_exit(EINVAL); - } - #endif - -@@ -394,7 +394,7 @@ attrd_cib_connection_destroy(gpointer user_data) - } else { - /* eventually this will trigger a reconnect, not a shutdown */ - crm_err("Connection to the CIB terminated..."); -- crm_exit(1); -+ crm_exit(ENOTCONN); - } - - return; -@@ -483,7 +483,7 @@ cib_connect(void *user_data) - - if (was_err) { - crm_err("Aborting startup"); -- crm_exit(100); -+ crm_exit(DAEMON_RESPAWN_STOP); - } - - cib_conn = local_conn; -@@ -565,11 +565,7 @@ main(int argc, char **argv) - crm_info("Cluster connection active"); - - if (was_err == FALSE) { -- ipcs = mainloop_add_ipc_server(T_ATTRD, QB_IPC_NATIVE, &ipc_callbacks); -- if (ipcs == NULL) { -- crm_err("Failed to create IPC server: shutting down and inhibiting respawn"); -- crm_exit(100); -- } -+ attrd_ipc_server_init(&ipcs, &ipc_callbacks); - } - - crm_info("Accepting attribute updates"); -@@ -618,9 +614,8 @@ main(int argc, char **argv) - - g_hash_table_destroy(attr_hash); - free(attrd_uuid); -- empty_uuid_cache(); - -- return crm_exit(0); -+ return crm_exit(pcmk_ok); - } - - struct attrd_callback_s { -diff --git a/tools/attrd_updater.c b/tools/attrd_updater.c -index 10d9c8a..5c8944d 100644 ---- a/tools/attrd_updater.c -+++ b/tools/attrd_updater.c -@@ -140,13 +140,14 @@ main(int argc, char **argv) - fprintf(stderr, "-Q,--query is not yet implemented, use -D to delete existing values\n\n"); - crm_help('?', EX_USAGE); - -- } else -- if (FALSE == -- attrd_update_delegate(NULL, command, NULL, attr_name, attr_value, attr_section, -- attr_set, attr_dampen, NULL)) { -- fprintf(stderr, "Could not update %s=%s\n", attr_name, attr_value); -- crm_exit(1); -+ } else { -+ int rc = attrd_update_delegate(NULL, command, NULL, attr_name, attr_value, attr_section, -+ attr_set, attr_dampen, NULL); -+ if (rc != pcmk_ok) { -+ fprintf(stderr, "Could not update %s=%s: %s (%d)\n", attr_name, attr_value, pcmk_strerror(rc), rc); -+ } -+ crm_exit(rc); - } - -- crm_exit(0); -+ crm_exit(pcmk_ok); - } -diff --git a/tools/cib_shadow.c b/tools/cib_shadow.c -index d33be20..ebb17d3 100644 ---- a/tools/cib_shadow.c -+++ b/tools/cib_shadow.c -@@ -1,17 +1,17 @@ - --/* -+/* - * Copyright (C) 2004 Andrew Beekhof -- * -+ * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. -- * -+ * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. -- * -+ * - * You should have received a copy of the GNU General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -@@ -148,7 +148,7 @@ static struct crm_option long_options[] = { - {"help", 0, 0, '?', "\t\tThis text"}, - {"version", 0, 0, '$', "\t\tVersion information" }, - {"verbose", 0, 0, 'V', "\t\tIncrease debug output"}, -- -+ - {"-spacer-", 1, 0, '-', "\nQueries:"}, - {"which", no_argument, NULL, 'w', "\t\tIndicate the active shadow copy"}, - {"display", no_argument, NULL, 'p', "\t\tDisplay the contents of the active shadow copy"}, -@@ -163,7 +163,7 @@ static struct crm_option long_options[] = { - {"delete", required_argument, NULL, 'D', "\tDelete the contents of the named shadow copy"}, - {"reset", required_argument, NULL, 'r', "\tRecreate the named shadow copy from the active cluster configuration"}, - {"switch", required_argument, NULL, 's', "\t(Advanced) Switch to the named shadow copy"}, -- -+ - {"-spacer-", 1, 0, '-', "\nAdditional Options:"}, - {"force", no_argument, NULL, 'f', "\t\t(Advanced) Force the action to be performed"}, - {"batch", no_argument, NULL, 'b', "\t\t(Advanced) Don't spawn a new shell" }, -@@ -180,7 +180,7 @@ static struct crm_option long_options[] = { - {"-spacer-", 1, 0, '-', " crm_shadow --delete myShadow", pcmk_option_example}, - {"-spacer-", 1, 0, '-', "Upload the current shadow configuration (named myShadow) to the running cluster:", pcmk_option_paragraph}, - {"-spacer-", 1, 0, '-', " crm_shadow --commit myShadow", pcmk_option_example}, -- -+ - {0, 0, 0, 0} - }; - /* *INDENT-ON* */ -@@ -226,7 +226,16 @@ main(int argc, char **argv) - case 'F': - command = flag; - free(shadow); -- shadow = strdup(getenv("CIB_shadow")); -+ shadow = NULL; -+ { -+ const char *env = getenv("CIB_shadow"); -+ if(env) { -+ shadow = strdup(env); -+ } else { -+ fprintf(stderr, "No active shadow configuration defined\n"); -+ crm_exit(ENOENT); -+ } -+ } - break; - case 'e': - case 'c': -diff --git a/tools/cibadmin.c b/tools/cibadmin.c -index 886fd9c..0fef594 100644 ---- a/tools/cibadmin.c -+++ b/tools/cibadmin.c -@@ -71,6 +71,8 @@ int request_id = 0; - int operation_status = 0; - cib_t *the_cib = NULL; - gboolean force_flag = FALSE; -+gboolean quiet = FALSE; -+int bump_log_num = 0; - - /* *INDENT-OFF* */ - static struct crm_option long_options[] = { -@@ -227,7 +229,7 @@ main(int argc, char **argv) - - int option_index = 0; - -- crm_log_init(NULL, LOG_CRIT, FALSE, FALSE, argc, argv, FALSE); -+ crm_system_name = strdup("cibadmin"); - crm_set_options(NULL, "command [options] [data]", long_options, - "Provides direct access to the cluster configuration." - "\n\nAllows the configuration, or sections of it, to be queried, modified, replaced and deleted." -@@ -266,6 +268,7 @@ main(int argc, char **argv) - break; - case 'Q': - cib_action = CIB_OP_QUERY; -+ quiet = TRUE; - break; - case 'P': - cib_action = CIB_OP_APPLY_DIFF; -@@ -316,7 +319,7 @@ main(int argc, char **argv) - break; - case 'V': - command_options = command_options | cib_verbose; -- crm_bump_log_level(argc, argv); -+ bump_log_num++; - break; - case '?': - case '$': -@@ -384,6 +387,15 @@ main(int argc, char **argv) - break; - } - } -+ -+ if (bump_log_num > 0) { -+ quiet = FALSE; -+ } -+ crm_log_init(NULL, LOG_CRIT, FALSE, FALSE, argc, argv, quiet); -+ while (bump_log_num > 0) { -+ crm_bump_log_level(argc, argv); -+ bump_log_num--; -+ } - - if (optind < argc) { - printf("non-option ARGV-elements: "); -@@ -469,7 +481,7 @@ main(int argc, char **argv) - if (exit_code != pcmk_ok) { - crm_err("Init failed, could not perform requested operations"); - fprintf(stderr, "Init failed, could not perform requested operations\n"); -- return -exit_code; -+ return crm_exit(-exit_code); - } - - exit_code = do_work(input, command_options, &output); -@@ -523,7 +535,7 @@ main(int argc, char **argv) - the_cib->cmds->signoff(the_cib); - cib_delete(the_cib); - bail: -- return crm_exit(-exit_code); -+ return crm_exit(exit_code); - } - - int -diff --git a/tools/cibsecret.in b/tools/cibsecret.in -new file mode 100644 -index 0000000..157feee ---- /dev/null -+++ b/tools/cibsecret.in -@@ -0,0 +1,380 @@ -+#!/bin/sh -+ -+# Copyright (C) 2011 Dejan Muhamedagic -+# -+# This program is free software; you can redistribute it and/or -+# modify it under the terms of the GNU General Public -+# License as published by the Free Software Foundation; either -+# version 2.1 of the License, or (at your option) any later version. -+# -+# This software is distributed in the hope that it will be useful, -+# but WITHOUT ANY WARRANTY; without even the implied warranty of -+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+# General Public License for more details. -+# -+# You should have received a copy of the GNU General Public -+# License along with this library; if not, write to the Free Software -+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -+# -+ -+# WARNING: -+# -+# The CIB secrets interface and implementation is still being -+# discussed, it may change -+ -+# -+# cibsecret: manage the secrets directory /var/lib/heartbeat/lrm/secrets -+# -+# secrets are ascii files, holding just one value per file: -+# /var/lib/heartbeat/lrm/secrets// -+# -+# NB: this program depends on utillib.sh -+# -+ -+. @OCF_ROOT_DIR@/lib/heartbeat/ocf-shellfuncs -+ -+LRM_CIBSECRETS=@LRM_CIBSECRETS_DIR@ -+LRM_LEGACY_CIBSECRETS=@LRM_LEGACY_CIBSECRETS_DIR@ -+ -+PROG=`basename $0` -+SSH_OPTS="-o StrictHostKeyChecking=no" -+ -+usage() { -+ echo "cibsecret - A tool for managing cib secrets"; -+ echo ""; -+ echo "usage: $PROG [-C] "; -+ echo ""; -+ echo "-C: don't read/write the CIB" -+ echo "" -+ echo "command: set | delete | stash | unstash | get | check | sync" -+ echo "" -+ echo " set " -+ echo "" -+ echo " get " -+ echo "" -+ echo " check " -+ echo "" -+ echo " stash (if not -C)" -+ echo "" -+ echo " unstash (if not -C)" -+ echo "" -+ echo " delete " -+ echo "" -+ echo " sync" -+ echo "" -+ echo "stash/unstash: move the parameter from/to the CIB (if you already" -+ echo "have the parameter set in the CIB)." -+ echo "" -+ echo "set/delete: add/remove a parameter from the local file." -+ echo "" -+ echo "get: display the parameter from the local file." -+ echo "" -+ echo "check: verify MD5 hash of the parameter from the local file and the CIB." -+ echo "" -+ echo "sync: copy $LRM_CIBSECRETS to other nodes." -+ echo "" -+ echo "Examples:" -+ echo "" -+ echo " $PROG set ipmi_node1 passwd SecreT_PASS" -+ echo "" -+ echo " $PROG stash ipmi_node1 passwd" -+ echo "" -+ echo " $PROG get ipmi_node1 passwd" -+ echo "" -+ echo " $PROG check ipmi_node1 passwd" -+ echo "" -+ echo " $PROG sync" -+ -+ exit $1 -+} -+fatal() { -+ echo "ERROR: $*" -+ exit 1 -+} -+warn() { -+ echo "WARNING: $*" -+} -+info() { -+ echo "INFO: $*" -+} -+ -+check_env() { -+ which md5sum >/dev/null 2>&1 || -+ fatal "please install md5sum to run $PROG" -+ if which pssh >/dev/null 2>&1; then -+ rsh=pssh_fun -+ rcp=pscp_fun -+ elif which pdsh >/dev/null 2>&1; then -+ rsh=pdsh_fun -+ rcp=pdcp_fun -+ elif which ssh >/dev/null 2>&1; then -+ rsh=ssh_fun -+ rcp=scp_fun -+ else -+ fatal "please install pssh, pdsh, or ssh to run $PROG" -+ fi -+ ps -ef | grep '[c]rmd' >/dev/null || -+ fatal "pacemaker not running? $PROG needs pacemaker" -+} -+ -+get_other_nodes() { -+ crm_node -l | awk '{print $2}' | grep -v `uname -n` -+} -+ -+get_live_nodes() { -+ if [ `id -u` = 0 ] && which fping >/dev/null 2>&1; then -+ fping -a $@ 2>/dev/null -+ else -+ local h -+ for h; do ping -c 2 -q $h >/dev/null 2>&1 && echo $h; done -+ fi -+} -+ -+check_down_nodes() { -+ local n down_nodes -+ down_nodes=`(for n; do echo $n; done) | sort | uniq -u` -+ if [ -n "$down_nodes" ]; then -+ if [ `echo $down_nodes | wc -w` = 1 ]; then -+ warn "node $down_nodes is down" -+ warn "you'll need to update it using $PROG sync later" -+ else -+ warn "nodes `echo $down_nodes` are down" -+ warn "you'll need to update them using $PROG sync later" -+ fi -+ fi -+} -+ -+pssh_fun() { -+ pssh -qi -H "$nodes" -x "$SSH_OPTS" $* -+} -+pscp_fun() { -+ pscp -q -H "$nodes" -x "-pr" -x "$SSH_OPTS" $* -+} -+pdsh_fun() { -+ local pdsh_nodes=`echo $nodes | tr ' ' ','` -+ export PDSH_SSH_ARGS_APPEND="$SSH_OPTS" -+ pdsh -w $pdsh_nodes $* -+} -+pdcp_fun() { -+ local pdsh_nodes=`echo $nodes | tr ' ' ','` -+ export PDSH_SSH_ARGS_APPEND="$SSH_OPTS" -+ pdcp -pr -w $pdsh_nodes $* -+} -+ssh_fun() { -+ local h -+ for h in $nodes; do -+ ssh $SSH_OPTS $h $* || return -+ done -+} -+scp_fun() { -+ local h src="$1" dest=$2 -+ for h in $nodes; do -+ scp -pr -q $SSH_OPTS $src $h:$dest || return -+ done -+} -+# TODO: this procedure should be replaced with csync2 -+# provided that csync2 has already been configured -+sync_files() { -+ local crm_nodes=`get_other_nodes` -+ local nodes=`get_live_nodes $crm_nodes` -+ check_down_nodes $nodes $crm_nodes -+ [ "$nodes" = "" ] && { -+ info "no other nodes live" -+ return -+ } -+ info "syncing $LRM_CIBSECRETS to `echo $nodes` ..." -+ $rsh rm -rf $LRM_CIBSECRETS && -+ $rsh mkdir -p `dirname $LRM_CIBSECRETS` && -+ $rcp $LRM_CIBSECRETS `dirname $LRM_CIBSECRETS` -+} -+sync_one() { -+ local f=$1 f_all="$1 $1.sign" -+ local crm_nodes=`get_other_nodes` -+ local nodes=`get_live_nodes $crm_nodes` -+ check_down_nodes $nodes $crm_nodes -+ [ "$nodes" = "" ] && { -+ info "no other nodes live" -+ return -+ } -+ info "syncing $f to `echo $nodes` ..." -+ $rsh mkdir -p `dirname $f` && -+ if [ -f "$f" ]; then -+ $rcp "$f_all" `dirname $f` -+ else -+ $rsh rm -f $f_all -+ fi -+} -+ -+is_secret() { -+ # assume that the secret is in the CIB if we cannot talk to -+ # cib -+ [ "$NO_CRM" ] || -+ test "$1" = "$MAGIC" -+} -+check_cib_rsc() { -+ local rsc=$1 output -+ output=`$NO_CRM crm_resource -r $rsc -W >/dev/null 2>&1` || -+ fatal "resource $rsc doesn't exist: $output" -+} -+get_cib_param() { -+ local rsc=$1 param=$2 -+ check_cib_rsc $rsc -+ $NO_CRM crm_resource -r $rsc -g $param 2>/dev/null -+} -+set_cib_param() { -+ local rsc=$1 param=$2 value=$3 -+ check_cib_rsc $rsc -+ $NO_CRM crm_resource -r $rsc -p $param -v "$value" 2>/dev/null -+} -+remove_cib_param() { -+ local rsc=$1 param=$2 -+ check_cib_rsc $rsc -+ $NO_CRM crm_resource -r $rsc -d $param 2>/dev/null -+} -+ -+localfiles() { -+ local cmd=$1 -+ local rsc=$2 param=$3 value=$4 -+ local local_file=$LRM_CIBSECRETS/$rsc/$param -+ local local_legacy_file=$LRM_LEGACY_CIBSECRETS/$rsc/$param -+ case $cmd in -+ "get") -+ cat $local_file 2>/dev/null || -+ cat $local_legacy_file 2>/dev/null -+ true -+ ;; -+ "getsum") -+ cat $local_file.sign 2>/dev/null || -+ cat $local_legacy_file.sign 2>/dev/null -+ true -+ ;; -+ "set") -+ local md5sum -+ md5sum=`printf $value | md5sum` || -+ fatal "md5sum failed to produce hash for resource $rsc parameter $param" -+ md5sum=`echo $md5sum | awk '{print $1}'` -+ mkdir -p `dirname $local_file` && -+ echo $value > $local_file && -+ echo $md5sum > $local_file.sign && ( -+ sync_one $local_file -+ rm -f $local_legacy_file -+ rm -f $local_legacy_file.sign -+ sync_one $local_legacy_file) -+ ;; -+ "remove") -+ rm -f $local_legacy_file -+ rm -f $local_legacy_file.sign -+ sync_one $local_legacy_file -+ -+ rm -f $local_file -+ rm -f $local_file.sign -+ sync_one $local_file -+ ;; -+ *) -+ # not reached, this is local interface -+ ;; -+ esac -+} -+get_local_param() { -+ local rsc=$1 param=$2 -+ localfiles get $rsc $param -+} -+set_local_param() { -+ local rsc=$1 param=$2 value=$3 -+ localfiles set $rsc $param $value -+} -+remove_local_param() { -+ local rsc=$1 param=$2 -+ localfiles remove $rsc $param -+} -+ -+cibsecret_set() { -+ local value=$1 -+ -+ if [ -z "$NO_CRM" ]; then -+ [ "$current" -a "$current" != "$MAGIC" -a "$current" != "$value" ] && -+ fatal "CIB value <$current> different for $rsc parameter $param; please delete it first" -+ fi -+ set_local_param $rsc $param $value && -+ set_cib_param $rsc $param "$MAGIC" -+} -+ -+cibsecret_check() { -+ local md5sum local_md5sum -+ is_secret "$current" || -+ fatal "resource $rsc parameter $param not set as secret, nothing to check" -+ local_md5sum=`localfiles getsum $rsc $param` -+ [ "$local_md5sum" ] || -+ fatal "no MD5 hash for resource $rsc parameter $param" -+ md5sum=`printf "$current_local" | md5sum | awk '{print $1}'` -+ [ "$md5sum" = "$local_md5sum" ] || -+ fatal "MD5 hash mismatch for resource $rsc parameter $param" -+} -+ -+cibsecret_get() { -+ cibsecret_check -+ echo "$current_local" -+} -+ -+cibsecret_delete() { -+ remove_local_param $rsc $param && -+ remove_cib_param $rsc $param -+} -+ -+cibsecret_stash() { -+ [ "$NO_CRM" ] && -+ fatal "no access to Pacemaker, stash not supported" -+ [ "$current" = "" ] && -+ fatal "nothing to stash for resource $rsc parameter $param" -+ is_secret "$current" && -+ fatal "resource $rsc parameter $param already set as secret, nothing to stash" -+ cibsecret_set "$current" -+} -+ -+cibsecret_unstash() { -+ [ "$NO_CRM" ] && -+ fatal "no access to Pacemaker, unstash not supported" -+ [ "$current_local" = "" ] && -+ fatal "nothing to unstash for resource $rsc parameter $param" -+ is_secret "$current" || -+ warn "resource $rsc parameter $param not set as secret, but we have local value so proceeding anyway" -+ remove_local_param $rsc $param && -+ set_cib_param $rsc $param $current_local -+} -+ -+cibsecret_sync() { -+ sync_files -+} -+ -+check_env -+ -+MAGIC="lrm://" -+umask 0077 -+ -+if [ "$1" = "-C" ]; then -+ NO_CRM=':' -+ shift 1 -+fi -+ -+cmd=$1 -+rsc=$2 -+param=$3 -+value=$4 -+ -+case "$cmd" in -+ set) [ $# -ne 4 ] && usage 1;; -+ get) [ $# -ne 3 ] && usage 1;; -+ check) [ $# -ne 3 ] && usage 1;; -+ stash) [ $# -ne 3 ] && usage 1;; -+ unstash) [ $# -ne 3 ] && usage 1;; -+ delete) [ $# -ne 3 ] && usage 1;; -+ sync) [ $# -ne 1 ] && usage 1;; -+ --help) usage 0;; -+ *) usage 1; -+esac -+ -+# we'll need these two often -+current=`get_cib_param $rsc $param` -+current_local=`get_local_param $rsc $param` -+ -+cibsecret_$cmd $value -diff --git a/tools/cli.supp b/tools/cli.supp -deleted file mode 100644 -index 19470e6..0000000 ---- a/tools/cli.supp -+++ /dev/null -@@ -1,7 +0,0 @@ --# Valgrind suppressions file for CLI tools --{ -- Valgrind bug -- Memcheck:Addr8 -- fun:__strspn_sse42 -- fun:crm_get_msec --} -diff --git a/tools/crm_attribute.c b/tools/crm_attribute.c -index 4b00f24..26df264 100644 ---- a/tools/crm_attribute.c -+++ b/tools/crm_attribute.c -@@ -122,6 +122,7 @@ main(int argc, char **argv) - int flag; - - int option_index = 0; -+ int is_remote_node = 0; - - crm_log_cli_init("crm_attribute"); - crm_set_options(NULL, "command -n attribute [options]", long_options, -@@ -232,16 +233,26 @@ main(int argc, char **argv) - - } else if (safe_str_neq(type, XML_CIB_TAG_TICKETS)) { - if (dest_uname == NULL) { -- dest_uname = get_local_node_name(); -+ dest_uname = get_node_name(0); - } -- if (pcmk_ok != query_node_uuid(the_cib, dest_uname, &dest_node)) { -+ if (pcmk_ok != query_node_uuid(the_cib, dest_uname, &dest_node, &is_remote_node)) { - fprintf(stderr, "Could not map name=%s to a UUID\n", dest_uname); - } - } - -+ if (is_remote_node && safe_str_neq(type, XML_CIB_TAG_STATUS)) { -+ /* Only the status section can exists for remote_nodes */ -+ type = XML_CIB_TAG_STATUS; -+ if (command == 'v') { -+ fprintf(stderr, "Remote-nodes do not maintain permanent attributes, '%s=%s' will be removed after %s reboots.\n", -+ attr_name, attr_value, dest_uname); -+ } -+ } -+ - if ((command == 'v' || command == 'D') -+ && is_remote_node == FALSE /* always send remote node attr directly to cib */ - && safe_str_eq(type, XML_CIB_TAG_STATUS) -- && attrd_update_delegate(NULL, command, dest_uname, attr_name, attr_value, type, set_name, -+ && pcmk_ok == attrd_update_delegate(NULL, command, dest_uname, attr_name, attr_value, type, set_name, - NULL, NULL)) { - crm_info("Update %s=%s sent via attrd", attr_name, command == 'D' ? "" : attr_value); - -diff --git a/tools/crm_error.c b/tools/crm_error.c -index 5fb0c7b..bd75a8f 100644 ---- a/tools/crm_error.c -+++ b/tools/crm_error.c -@@ -27,6 +27,11 @@ static struct crm_option long_options[] = { - {"version", 0, 0, '$', "\tVersion information" }, - {"verbose", 0, 0, 'V', "\tIncrease debug output"}, - -+ {"name", 0, 0, 'n', "\tShow the error's name rather than the description." -+ "\n\t\t\tUseful for looking for sources of the error in source code"}, -+ -+ {"list", 0, 0, 'l', "\tShow all known errors."}, -+ - {0, 0, 0, 0} - }; - /* *INDENT-ON* */ -@@ -39,9 +44,12 @@ main(int argc, char **argv) - int flag = 0; - int option_index = 0; - -+ bool do_list = FALSE; -+ bool with_name = FALSE; -+ - crm_log_cli_init("crm_error"); - crm_set_options(NULL, "[options] -- rc", long_options, -- "Tool for displaying the textual description of a reported error code"); -+ "Tool for displaying the textual name or description of a reported error code"); - - while (flag >= 0) { - flag = crm_get_option(argc, argv, &option_index); -@@ -55,15 +63,40 @@ main(int argc, char **argv) - case '?': - crm_help(flag, EX_OK); - break; -+ case 'n': -+ with_name = TRUE; -+ break; -+ case 'l': -+ do_list = TRUE; -+ break; - default: - crm_help(flag, EX_OK); - break; - } - } - -+ if(do_list) { -+ for (rc = 0; rc < 256; rc++) { -+ const char *name = pcmk_errorname(rc); -+ const char *desc = pcmk_strerror(rc); -+ if(name == NULL || strcmp("Unknown", name) == 0) { -+ /* Unknown */ -+ } else if(with_name) { -+ printf("%.3d: %-25s %s\n", rc, name, desc); -+ } else { -+ printf("%.3d: %s\n", rc, desc); -+ } -+ } -+ return 0; -+ } -+ - for (lpc = optind; lpc < argc; lpc++) { - rc = crm_atoi(argv[lpc], NULL); -- printf("%s\n", pcmk_strerror(rc)); -+ if(with_name) { -+ printf("%s - %s\n", pcmk_errorname(rc), pcmk_strerror(rc)); -+ } else { -+ printf("%s\n", pcmk_strerror(rc)); -+ } - } - return 0; - } -diff --git a/tools/crm_mon.c b/tools/crm_mon.c -index b646a83..d2b9a07 100644 ---- a/tools/crm_mon.c -+++ b/tools/crm_mon.c -@@ -1,17 +1,17 @@ - --/* -+/* - * Copyright (C) 2004 Andrew Beekhof -- * -+ * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. -- * -+ * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. -- * -+ * - * You should have received a copy of the GNU General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -@@ -318,7 +318,7 @@ static struct crm_option long_options[] = { - {"snmp-traps", 1, 0, 'S', "Send SNMP traps to this station", !ENABLE_SNMP}, - {"snmp-community", 1, 0, 'C', "Specify community for SNMP traps(default is NULL)", !ENABLE_SNMP}, - {"mail-to", 1, 0, 'T', "Send Mail alerts to this user. See also --mail-from, --mail-host, --mail-prefix", !ENABLE_ESMTP}, -- -+ - {"-spacer-", 1, 0, '-', "\nDisplay Options:"}, - {"group-by-node", 0, 0, 'n', "\tGroup resources by node" }, - {"inactive", 0, 0, 'r', "\tDisplay inactive resources" }, -@@ -341,7 +341,7 @@ static struct crm_option long_options[] = { - {"external-agent", 1, 0, 'E', "A program to run when resource operations take place."}, - {"external-recipient",1, 0, 'e', "A recipient for your program (assuming you want the program to send something to someone)."}, - -- -+ - {"xml-file", 1, 0, 'x', NULL, 1}, - - {"-spacer-", 1, 0, '-', "\nExamples:", pcmk_option_paragraph}, -@@ -359,7 +359,7 @@ static struct crm_option long_options[] = { - {"-spacer-", 1, 0, '-', " crm_mon --daemonize --mail-to user@example.com --mail-host mail.example.com", pcmk_option_example|!ENABLE_ESMTP}, - {"-spacer-", 1, 0, '-', "Start crm_mon as a background daemon and have it send SNMP alerts:", pcmk_option_paragraph|!ENABLE_SNMP}, - {"-spacer-", 1, 0, '-', " crm_mon --daemonize --snmp-traps snmptrapd.example.com", pcmk_option_example|!ENABLE_SNMP}, -- -+ - {NULL, 0, 0, 0} - }; - /* *INDENT-ON* */ -@@ -696,9 +696,6 @@ print_simple_status(pe_working_set_t * data_set) - return 0; - } - --extern int get_failcount(node_t * node, resource_t * rsc, int *last_failure, -- pe_working_set_t * data_set); -- - static void - print_date(time_t time) - { -@@ -714,23 +711,18 @@ print_date(time_t time) - print_as("'%s'", date_str); - } - -+#include - static void - print_rsc_summary(pe_working_set_t * data_set, node_t * node, resource_t * rsc, gboolean all) - { - gboolean printed = FALSE; -- time_t last_failure = 0; - -- char *fail_attr = crm_concat("fail-count", rsc->id, '-'); -- const char *value = g_hash_table_lookup(node->details->attrs, fail_attr); -- -- int failcount = char2score(value); /* Get the true value, not the effective one from get_failcount() */ -- -- get_failcount(node, rsc, (int *)&last_failure, data_set); -- free(fail_attr); -+ time_t last_failure = 0; -+ int failcount = get_failcount_full(node, rsc, &last_failure, FALSE, data_set); - - if (all || failcount || last_failure > 0) { - printed = TRUE; -- print_as(" %s: migration-threshold=%d", rsc->id, rsc->migration_threshold); -+ print_as(" %s: migration-threshold=%d", rsc_printable_id(rsc), rsc->migration_threshold); - } - - if (failcount > 0) { -@@ -805,31 +797,35 @@ print_rsc_history(pe_working_set_t * data_set, node_t * node, xmlNode * rsc_entr - - if (print_timing) { - int int_value; -- const char *attr = "last-rc-change"; -+ const char *attr = XML_RSC_OP_LAST_CHANGE; - - value = crm_element_value(xml_op, attr); - if (value) { - int_value = crm_parse_int(value, NULL); -- print_as(" %s=", attr); -- print_date(int_value); -+ if (int_value > 0) { -+ print_as(" %s=", attr); -+ print_date(int_value); -+ } - } - -- attr = "last-run"; -+ attr = XML_RSC_OP_LAST_RUN; - value = crm_element_value(xml_op, attr); - if (value) { - int_value = crm_parse_int(value, NULL); -- print_as(" %s=", attr); -- print_date(int_value); -+ if (int_value > 0) { -+ print_as(" %s=", attr); -+ print_date(int_value); -+ } - } - -- attr = "exec-time"; -+ attr = XML_RSC_OP_T_EXEC; - value = crm_element_value(xml_op, attr); - if (value) { - int_value = crm_parse_int(value, NULL); - print_as(" %s=%dms", attr, int_value); - } - -- attr = "queue-time"; -+ attr = XML_RSC_OP_T_QUEUE; - value = crm_element_value(xml_op, attr); - if (value) { - int_value = crm_parse_int(value, NULL); -@@ -1106,6 +1102,13 @@ print_status(pe_working_set_t * data_set) - for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) { - node_t *node = (node_t *) gIter->data; - const char *node_mode = NULL; -+ char *node_name = NULL; -+ -+ if(node->details->remote_rsc) { -+ node_name = g_strdup_printf("%s:%s", node->details->uname, node->details->remote_rsc->container->id); -+ } else { -+ node_name = g_strdup_printf("%s", node->details->uname); -+ } - - if (node->details->unclean) { - if (node->details->online && node->details->unclean) { -@@ -1134,22 +1137,26 @@ print_status(pe_working_set_t * data_set) - } else if (node->details->online) { - node_mode = "online"; - if (group_by_node == FALSE) { -- online_nodes = add_list_element(online_nodes, node->details->uname); -+ online_nodes = add_list_element(online_nodes, node_name); - continue; - } - - } else { - node_mode = "OFFLINE"; - if (group_by_node == FALSE) { -- offline_nodes = add_list_element(offline_nodes, node->details->uname); -+ offline_nodes = add_list_element(offline_nodes, node_name); - continue; - } - } - -+ if(node->details->remote_rsc) { -+ online_nodes = add_list_element(online_nodes, node->details->remote_rsc->id); -+ } -+ - if (safe_str_eq(node->details->uname, node->details->id)) { -- print_as("Node %s: %s\n", node->details->uname, node_mode); -+ print_as("Node %s: %s\n", node_name, node_mode); - } else { -- print_as("Node %s (%s): %s\n", node->details->uname, node->details->id, node_mode); -+ print_as("Node %s (%s): %s\n", node_name, node->details->id, node_mode); - } - - if (group_by_node) { -@@ -1161,6 +1168,7 @@ print_status(pe_working_set_t * data_set) - rsc->fns->print(rsc, "\t", print_opts | pe_print_rsconly, stdout); - } - } -+ free(node_name); - } - - if (online_nodes) { -@@ -1229,7 +1237,7 @@ print_status(pe_working_set_t * data_set) - int val = 0; - const char *id = ID(xml_op); - const char *op_key = crm_element_value(xml_op, XML_LRM_ATTR_TASK_KEY); -- const char *last = crm_element_value(xml_op, "last_run"); -+ const char *last = crm_element_value(xml_op, XML_RSC_OP_LAST_CHANGE); - const char *node = crm_element_value(xml_op, XML_ATTR_UNAME); - const char *call = crm_element_value(xml_op, XML_LRM_ATTR_CALLID); - const char *rc = crm_element_value(xml_op, XML_LRM_ATTR_RC); -@@ -1242,10 +1250,10 @@ print_status(pe_working_set_t * data_set) - if (last) { - time_t run_at = crm_parse_int(last, "0"); - -- print_as(", last-run=%s, queued=%sms, exec=%sms\n", -+ print_as(", last-rc-change=%s, queued=%sms, exec=%sms\n", - ctime(&run_at), -- crm_element_value(xml_op, "exec_time"), -- crm_element_value(xml_op, "queue_time")); -+ crm_element_value(xml_op, XML_RSC_OP_T_EXEC), -+ crm_element_value(xml_op, XML_RSC_OP_T_QUEUE)); - } - - val = crm_parse_int(rc, "0"); -@@ -2127,7 +2135,7 @@ crm_diff_update(const char *event, xmlNode * msg) - { - int rc = -1; - long now = time(NULL); -- const char *op = NULL; -+ static bool stale = FALSE; - - print_dot(); - -@@ -2140,14 +2148,13 @@ crm_diff_update(const char *event, xmlNode * msg) - free_xml(cib_last); - - switch (rc) { -- case pcmk_err_diff_resync: -- case pcmk_err_diff_failed: -- crm_warn("[%s] %s Patch aborted: %s (%d)", event, op, pcmk_strerror(rc), rc); -+ case -pcmk_err_diff_resync: -+ case -pcmk_err_diff_failed: -+ crm_notice("[%s] Patch aborted: %s (%d)", event, pcmk_strerror(rc), rc); - case pcmk_ok: - break; - default: -- crm_warn("[%s] %s ABORTED: %s (%d)", event, op, pcmk_strerror(rc), rc); -- return; -+ crm_notice("[%s] ABORTED: %s (%d)", event, pcmk_strerror(rc), rc); - } - } - -@@ -2160,20 +2167,25 @@ crm_diff_update(const char *event, xmlNode * msg) - xmlXPathObject *xpathObj = xpath_search(msg, - "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED - "//" XML_LRM_TAG_RSC_OP); -- if (xpathObj && xpathObj->nodesetval->nodeNr > 0) { -- int lpc = 0, max = xpathObj->nodesetval->nodeNr; -+ int lpc = 0, max = numXpathResults(xpathObj); - -- for (lpc = 0; lpc < max; lpc++) { -- xmlNode *rsc_op = getXpathResult(xpathObj, lpc); -+ for (lpc = 0; lpc < max; lpc++) { -+ xmlNode *rsc_op = getXpathResult(xpathObj, lpc); - -- handle_rsc_op(rsc_op); -- } -+ handle_rsc_op(rsc_op); - } -- if (xpathObj) { -- xmlXPathFreeObject(xpathObj); -+ freeXpathObject(xpathObj); -+ } -+ -+ if (current_cib == NULL) { -+ if(!stale) { -+ print_as("--- Stale data ---"); - } -+ stale = TRUE; -+ return; - } - -+ stale = FALSE; - if ((now - last_refresh) > (reconnect_msec / 1000)) { - /* Force a refresh */ - mon_refresh_display(NULL); -diff --git a/tools/crm_node.c b/tools/crm_node.c -index eac2494..a25b3b4 100644 ---- a/tools/crm_node.c -+++ b/tools/crm_node.c -@@ -220,7 +220,7 @@ read_local_hb_uuid(void) - fseek(input, 0L, start); - if (start != ftell(input)) { - fprintf(stderr, "fseek not behaving: %ld vs. %ld\n", start, ftell(input)); -- crm_exit(2); -+ crm_exit(pcmk_err_generic); - } - - buffer = malloc(50); -@@ -229,7 +229,7 @@ read_local_hb_uuid(void) - - if (read_len != UUID_LEN) { - fprintf(stderr, "Expected and read bytes differ: %d vs. %ld\n", UUID_LEN, read_len); -- crm_exit(3); -+ crm_exit(pcmk_err_generic); - - } else if (buffer != NULL) { - cl_uuid_unparse(&uuid, buffer); -@@ -238,7 +238,7 @@ read_local_hb_uuid(void) - - } else { - fprintf(stderr, "No buffer to unparse\n"); -- crm_exit(4); -+ crm_exit(ENODATA); - } - - free(buffer); -@@ -294,7 +294,7 @@ ccm_age_callback(oc_ed_t event, void *cookie, size_t size, const void *data) - fprintf(stdout, "\n"); - } - fflush(stdout); -- crm_exit(0); -+ crm_exit(pcmk_ok); - } - - static gboolean -@@ -353,15 +353,15 @@ try_heartbeat(int command, enum cluster_type_e stack) - - if (command == 'i') { - if (read_local_hb_uuid()) { -- crm_exit(0); -+ crm_exit(pcmk_ok); - } - - } else if (command == 'R') { - if (crmd_remove_node_cache(target_uname)) { - crm_err("Failed to connect to crmd to remove node id %s", target_uname); -- crm_exit(-pcmk_err_generic); -+ crm_exit(pcmk_err_generic); - } -- crm_exit(0); -+ crm_exit(pcmk_ok); - - } else if (ccm_age_connect(&ccm_fd)) { - int rc = 0; -@@ -463,11 +463,11 @@ try_cman(int command, enum cluster_type_e stack) - crm_help('?', EX_USAGE); - } - cman_finish(cman_handle); -- crm_exit(0); -+ crm_exit(pcmk_ok); - - cman_bail: - cman_finish(cman_handle); -- return crm_exit(EX_USAGE); -+ return crm_exit(EINVAL); - } - #endif - -@@ -477,7 +477,7 @@ ais_membership_destroy(gpointer user_data) - { - crm_err("AIS connection terminated"); - ais_fd_sync = -1; -- crm_exit(1); -+ crm_exit(ENOTCONN); - } - - static gint -@@ -548,7 +548,7 @@ ais_membership_dispatch(int kind, const char *from, const char *data) - fprintf(stdout, "\n"); - } - -- crm_exit(0); -+ crm_exit(pcmk_ok); - - return TRUE; - } -@@ -586,7 +586,7 @@ node_mcp_dispatch(const char *buffer, ssize_t length, gpointer userdata) - fprintf(stdout, "\n"); - } - -- crm_exit(0); -+ crm_exit(pcmk_ok); - } - - return 0; -@@ -595,7 +595,7 @@ node_mcp_dispatch(const char *buffer, ssize_t length, gpointer userdata) - static void - node_mcp_destroy(gpointer user_data) - { -- crm_exit(1); -+ crm_exit(ENOTCONN); - } - - static gboolean -@@ -626,7 +626,7 @@ try_corosync(int command, enum cluster_type_e stack) - case 'e': - /* Age makes no sense (yet) in an AIS cluster */ - fprintf(stdout, "1\n"); -- crm_exit(0); -+ crm_exit(pcmk_ok); - - case 'q': - /* Go direct to the Quorum API */ -@@ -648,7 +648,7 @@ try_corosync(int command, enum cluster_type_e stack) - fprintf(stdout, "0\n"); - } - quorum_finalize(q_handle); -- crm_exit(0); -+ crm_exit(pcmk_ok); - - case 'i': - /* Go direct to the CPG API */ -@@ -666,7 +666,7 @@ try_corosync(int command, enum cluster_type_e stack) - - fprintf(stdout, "%u\n", nodeid); - cpg_finalize(c_handle); -- crm_exit(0); -+ crm_exit(pcmk_ok); - - case 'l': - case 'p': -@@ -704,13 +704,13 @@ try_openais(int command, enum cluster_type_e stack) - switch (command) { - case 'R': - send_ais_text(crm_class_rmpeer, target_uname, TRUE, NULL, crm_msg_ais); -- cib_remove_node(atoi(target_uname), NULL); -- crm_exit(0); -+ cib_remove_node(0, target_uname); -+ crm_exit(pcmk_ok); - - case 'e': - /* Age makes no sense (yet) in an AIS cluster */ - fprintf(stdout, "1\n"); -- crm_exit(0); -+ crm_exit(pcmk_ok); - - case 'q': - send_ais_text(crm_class_quorum, NULL, TRUE, NULL, crm_msg_ais); -@@ -724,7 +724,7 @@ try_openais(int command, enum cluster_type_e stack) - - case 'i': - printf("%u\n", cluster.nodeid); -- crm_exit(0); -+ crm_exit(pcmk_ok); - - default: - fprintf(stderr, "Unknown option '%c'\n", command); -@@ -819,11 +819,11 @@ main(int argc, char **argv) - - if (command == 'n') { - fprintf(stdout, "%s\n", get_local_node_name()); -- crm_exit(0); -+ crm_exit(pcmk_ok); - - } else if (command == 'N') { - fprintf(stdout, "%s\n", get_node_name(nodeid)); -- crm_exit(0); -+ crm_exit(pcmk_ok); - } - - if (dangerous_cmd && force_flag == FALSE) { -@@ -831,7 +831,7 @@ main(int argc, char **argv) - " To prevent accidental destruction of the cluster," - " the --force flag is required in order to proceed.\n"); - fflush(stderr); -- crm_exit(EX_USAGE); -+ crm_exit(EINVAL); - } - - try_stack = get_cluster_type(); -diff --git a/tools/crm_report.in b/tools/crm_report.in -index f1d0f14..3af1b3f 100755 ---- a/tools/crm_report.in -+++ b/tools/crm_report.in -@@ -1,17 +1,17 @@ - #!/bin/sh - - # Copyright (C) 2010 Andrew Beekhof -- # -+ # - # This program is free software; you can redistribute it and/or - # modify it under the terms of the GNU General Public - # License as published by the Free Software Foundation; either - # version 2.1 of the License, or (at your option) any later version. -- # -+ # - # This software is distributed in the hope that it will be useful, - # but WITHOUT ANY WARRANTY; without even the implied warranty of - # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - # General Public License for more details. -- # -+ # - # You should have received a copy of the GNU General Public - # License along with this library; if not, write to the Free Software - # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -@@ -20,8 +20,8 @@ - - # Note the quotes around `$TEMP': they are essential! - TEMP=`getopt \ -- -o hv?xl:f:t:n:T:Lp:c:dSACHu:MVs \ -- --long help,cts:,cts-log:,dest:,node:,nodes:,from:,to:logfile:,as-directory,single-node,cluster:,user:,version,features \ -+ -o hv?xl:f:t:n:T:Lp:c:dSACHu:D:MVs \ -+ --long help,cts:,cts-log:,dest:,node:,nodes:,from:,to:logfile:,as-directory,single-node,cluster:,user:,max-depth:,version,features \ - -n 'crm_report' -- "$@"` - eval set -- "$TEMP" - -@@ -44,7 +44,7 @@ usage() { - - cat< - #include - -+bool scope_master = FALSE; - gboolean do_force = FALSE; - gboolean BE_QUIET = FALSE; - const char *attr_set_type = XML_TAG_ATTR_SETS; -@@ -112,7 +113,7 @@ resource_ipc_callback(const char *buffer, ssize_t length, gpointer userdata) - if (crmd_replies_needed == 0) { - fprintf(stderr, " OK\n"); - crm_debug("Got all the replies we expected"); -- return crm_exit(0); -+ return crm_exit(pcmk_ok); - } - - free_xml(msg); -@@ -675,7 +676,7 @@ send_lrm_rsc_op(crm_ipc_t * crmd_channel, const char *op, - free(key); - - crm_xml_add(msg_data, XML_LRM_ATTR_TARGET, host_uname); -- if (safe_str_eq(router_node, host_uname)) { -+ if (safe_str_neq(router_node, host_uname)) { - crm_xml_add(msg_data, XML_LRM_ATTR_ROUTER_NODE, router_node); - } - -@@ -730,7 +731,7 @@ send_lrm_rsc_op(crm_ipc_t * crmd_channel, const char *op, - } - - static int --delete_lrm_rsc(crm_ipc_t * crmd_channel, const char *host_uname, -+delete_lrm_rsc(cib_t *cib_conn, crm_ipc_t * crmd_channel, const char *host_uname, - resource_t * rsc, pe_working_set_t * data_set) - { - int rc = pcmk_ok; -@@ -744,7 +745,7 @@ delete_lrm_rsc(crm_ipc_t * crmd_channel, const char *host_uname, - for (lpc = rsc->children; lpc != NULL; lpc = lpc->next) { - resource_t *child = (resource_t *) lpc->data; - -- delete_lrm_rsc(crmd_channel, host_uname, child, data_set); -+ delete_lrm_rsc(cib_conn, crmd_channel, host_uname, child, data_set); - } - return pcmk_ok; - -@@ -755,7 +756,7 @@ delete_lrm_rsc(crm_ipc_t * crmd_channel, const char *host_uname, - node_t *node = (node_t *) lpc->data; - - if (node->details->online) { -- delete_lrm_rsc(crmd_channel, node->details->uname, rsc, data_set); -+ delete_lrm_rsc(cib_conn, crmd_channel, node->details->uname, rsc, data_set); - } - } - -@@ -767,14 +768,22 @@ delete_lrm_rsc(crm_ipc_t * crmd_channel, const char *host_uname, - if (rc == pcmk_ok) { - char *attr_name = NULL; - const char *id = rsc->id; -+ node_t *node = pe_find_node(data_set->nodes, host_uname); - - if (rsc->clone_name) { - id = rsc->clone_name; - } - - attr_name = crm_concat("fail-count", id, '-'); -- attrd_update_delegate(NULL, 'D', host_uname, attr_name, NULL, XML_CIB_TAG_STATUS, NULL, -+ if (node && node->details->remote_rsc) { -+ /* TODO talk directly to cib for remote nodes until we can re-write -+ * attrd to handle remote-nodes */ -+ rc = delete_attr_delegate(cib_conn, cib_sync_call, XML_CIB_TAG_STATUS, node->details->id, NULL, NULL, -+ NULL, attr_name, NULL, FALSE, NULL); -+ } else { -+ rc = attrd_update_delegate(NULL, 'D', host_uname, attr_name, NULL, XML_CIB_TAG_STATUS, NULL, - NULL, NULL); -+ } - free(attr_name); - } - return rc; -@@ -788,150 +797,162 @@ fail_lrm_rsc(crm_ipc_t * crmd_channel, const char *host_uname, - return send_lrm_rsc_op(crmd_channel, CRM_OP_LRM_FAIL, host_uname, rsc_id, FALSE, data_set); - } - --static int --refresh_lrm(crm_ipc_t * crmd_channel, const char *host_uname) -+static char * -+parse_cli_lifetime(const char *input) - { -- xmlNode *cmd = NULL; -- int rc = -ECOMM; -+ char *later_s = NULL; -+ crm_time_t *now = NULL; -+ crm_time_t *later = NULL; -+ crm_time_t *duration = NULL; - -- cmd = create_request(CRM_OP_LRM_REFRESH, NULL, host_uname, -- CRM_SYSTEM_CRMD, crm_system_name, our_pid); -+ if (input == NULL) { -+ return NULL; -+ } - -- if (crm_ipc_send(crmd_channel, cmd, 0, 0, NULL) > 0) { -- rc = 0; -+ duration = crm_time_parse_duration(move_lifetime); -+ if (duration == NULL) { -+ CMD_ERR("Invalid duration specified: %s\n", move_lifetime); -+ CMD_ERR("Please refer to" -+ " http://en.wikipedia.org/wiki/ISO_8601#Duration" -+ " for examples of valid durations\n"); -+ return NULL; - } -- free_xml(cmd); -- return rc; -+ -+ now = crm_time_new(NULL); -+ later = crm_time_add(now, duration); -+ crm_time_log(LOG_INFO, "now ", now, -+ crm_time_log_date | crm_time_log_timeofday | crm_time_log_with_timezone); -+ crm_time_log(LOG_INFO, "later ", later, -+ crm_time_log_date | crm_time_log_timeofday | crm_time_log_with_timezone); -+ crm_time_log(LOG_INFO, "duration", duration, crm_time_log_date | crm_time_log_timeofday); -+ later_s = crm_time_as_string(later, crm_time_log_date | crm_time_log_timeofday); -+ printf("Migration will take effect until: %s\n", later_s); -+ -+ crm_time_free(duration); -+ crm_time_free(later); -+ crm_time_free(now); -+ return later_s; - } - - static int --move_resource(const char *rsc_id, -- const char *existing_node, const char *preferred_node, cib_t * cib_conn) -+ban_resource(const char *rsc_id, const char *host, GListPtr allnodes, cib_t * cib_conn) - { -- char *later_s = NULL; -+ char *later_s = parse_cli_lifetime(move_lifetime); - int rc = pcmk_ok; - char *id = NULL; -- xmlNode *rule = NULL; -- xmlNode *expr = NULL; -- xmlNode *constraints = NULL; - xmlNode *fragment = NULL; -+ xmlNode *location = NULL; - -- xmlNode *can_run = NULL; -- xmlNode *dont_run = NULL; -+ if(host == NULL) { -+ GListPtr n = allnodes; -+ for(; n && rc == pcmk_ok; n = n->next) { -+ node_t *target = n->data; -+ -+ rc = ban_resource(rsc_id, target->details->uname, NULL, cib_conn); -+ } -+ return rc; -+ } -+ -+ later_s = parse_cli_lifetime(move_lifetime); -+ if(move_lifetime && later_s == NULL) { -+ return -EINVAL; -+ } - - fragment = create_xml_node(NULL, XML_CIB_TAG_CONSTRAINTS); -- constraints = fragment; - -- id = crm_concat("cli-prefer", rsc_id, '-'); -- can_run = create_xml_node(NULL, XML_CONS_TAG_RSC_LOCATION); -- crm_xml_add(can_run, XML_ATTR_ID, id); -+ id = g_strdup_printf("cli-ban-%s-on-%s", rsc_id, host); -+ location = create_xml_node(fragment, XML_CONS_TAG_RSC_LOCATION); -+ crm_xml_add(location, XML_ATTR_ID, id); - free(id); - -- id = crm_concat("cli-standby", rsc_id, '-'); -- dont_run = create_xml_node(NULL, XML_CONS_TAG_RSC_LOCATION); -- crm_xml_add(dont_run, XML_ATTR_ID, id); -- free(id); -+ if (BE_QUIET == FALSE) { -+ CMD_ERR("WARNING: Creating rsc_location constraint '%s'" -+ " with a score of -INFINITY for resource %s" -+ " on %s.\n", ID(location), rsc_id, host); -+ CMD_ERR("\tThis will prevent %s from running" -+ " on %s until the constraint is removed using" -+ " the 'crm_resource --clear' command or manually" -+ " with cibadmin\n", rsc_id, host); -+ CMD_ERR("\tThis will be the case even if %s is" -+ " the last node in the cluster\n", host); -+ CMD_ERR("\tThis message can be disabled with --quiet\n"); -+ } - -- if (move_lifetime) { -- crm_time_t *now = NULL; -- crm_time_t *later = NULL; -- crm_time_t *duration = crm_time_parse_duration(move_lifetime); -- -- if (duration == NULL) { -- CMD_ERR("Invalid duration specified: %s\n", move_lifetime); -- CMD_ERR("Please refer to" -- " http://en.wikipedia.org/wiki/ISO_8601#Duration" -- " for examples of valid durations\n"); -- return -EINVAL; -- } -- now = crm_time_new(NULL); -- later = crm_time_add(now, duration); -- crm_time_log(LOG_INFO, "now ", now, -- crm_time_log_date | crm_time_log_timeofday | crm_time_log_with_timezone); -- crm_time_log(LOG_INFO, "later ", later, -- crm_time_log_date | crm_time_log_timeofday | crm_time_log_with_timezone); -- crm_time_log(LOG_INFO, "duration", duration, crm_time_log_date | crm_time_log_timeofday); -- later_s = crm_time_as_string(later, crm_time_log_date | crm_time_log_timeofday); -- printf("Migration will take effect until: %s\n", later_s); -- -- crm_time_free(duration); -- crm_time_free(later); -- crm_time_free(now); -- } -- -- if (existing_node == NULL) { -- crm_log_xml_notice(can_run, "Deleting"); -- rc = cib_conn->cmds->delete(cib_conn, XML_CIB_TAG_CONSTRAINTS, dont_run, cib_options); -- if (rc == -ENXIO) { -- rc = pcmk_ok; -- -- } else if (rc != pcmk_ok) { -- goto bail; -- } -+ crm_xml_add(location, XML_COLOC_ATTR_SOURCE, rsc_id); -+ -+ if (later_s == NULL) { -+ /* Short form */ -+ crm_xml_add(location, XML_CIB_TAG_NODE, host); -+ crm_xml_add(location, XML_RULE_ATTR_SCORE, MINUS_INFINITY_S); - - } else { -- if (BE_QUIET == FALSE) { -- fprintf(stderr, -- "WARNING: Creating rsc_location constraint '%s'" -- " with a score of -INFINITY for resource %s" -- " on %s.\n", ID(dont_run), rsc_id, existing_node); -- CMD_ERR("\tThis will prevent %s from running" -- " on %s until the constraint is removed using" -- " the 'crm_resource -U' command or manually" -- " with cibadmin\n", rsc_id, existing_node); -- CMD_ERR("\tThis will be the case even if %s is" -- " the last node in the cluster\n", existing_node); -- CMD_ERR("\tThis message can be disabled with -Q\n"); -- } -- -- crm_xml_add(dont_run, "rsc", rsc_id); -- -- rule = create_xml_node(dont_run, XML_TAG_RULE); -- expr = create_xml_node(rule, XML_TAG_EXPRESSION); -- id = crm_concat("cli-standby-rule", rsc_id, '-'); -+ xmlNode *rule = create_xml_node(location, XML_TAG_RULE); -+ xmlNode *expr = create_xml_node(rule, XML_TAG_EXPRESSION); -+ -+ id = g_strdup_printf("cli-ban-%s-on-%s-rule", rsc_id, host); - crm_xml_add(rule, XML_ATTR_ID, id); - free(id); - - crm_xml_add(rule, XML_RULE_ATTR_SCORE, MINUS_INFINITY_S); - crm_xml_add(rule, XML_RULE_ATTR_BOOLEAN_OP, "and"); - -- id = crm_concat("cli-standby-expr", rsc_id, '-'); -+ id = g_strdup_printf("cli-ban-%s-on-%s-expr", rsc_id, host); - crm_xml_add(expr, XML_ATTR_ID, id); - free(id); - - crm_xml_add(expr, XML_EXPR_ATTR_ATTRIBUTE, "#uname"); - crm_xml_add(expr, XML_EXPR_ATTR_OPERATION, "eq"); -- crm_xml_add(expr, XML_EXPR_ATTR_VALUE, existing_node); -+ crm_xml_add(expr, XML_EXPR_ATTR_VALUE, host); - crm_xml_add(expr, XML_EXPR_ATTR_TYPE, "string"); - -- if (later_s) { -- expr = create_xml_node(rule, "date_expression"); -- id = crm_concat("cli-standby-lifetime-end", rsc_id, '-'); -- crm_xml_add(expr, XML_ATTR_ID, id); -- free(id); -+ expr = create_xml_node(rule, "date_expression"); -+ id = g_strdup_printf("cli-ban-%s-on-%s-lifetime", rsc_id, host); -+ crm_xml_add(expr, XML_ATTR_ID, id); -+ free(id); - -- crm_xml_add(expr, "operation", "lt"); -- crm_xml_add(expr, "end", later_s); -- } -+ crm_xml_add(expr, "operation", "lt"); -+ crm_xml_add(expr, "end", later_s); -+ } -+ -+ crm_log_xml_notice(fragment, "Modify"); -+ rc = cib_conn->cmds->update(cib_conn, XML_CIB_TAG_CONSTRAINTS, fragment, cib_options); - -- add_node_copy(constraints, dont_run); -+ free_xml(fragment); -+ free(later_s); -+ return rc; -+} -+ -+static int -+prefer_resource(const char *rsc_id, const char *host, cib_t * cib_conn) -+{ -+ char *later_s = parse_cli_lifetime(move_lifetime); -+ int rc = pcmk_ok; -+ char *id = NULL; -+ xmlNode *location = NULL; -+ xmlNode *fragment = NULL; -+ -+ if(move_lifetime && later_s == NULL) { -+ return -EINVAL; - } - -- if (preferred_node == NULL) { -- crm_log_xml_notice(can_run, "Deleting"); -- rc = cib_conn->cmds->delete(cib_conn, XML_CIB_TAG_CONSTRAINTS, can_run, cib_options); -- if (rc == -ENXIO) { -- rc = pcmk_ok; -+ fragment = create_xml_node(NULL, XML_CIB_TAG_CONSTRAINTS); - -- } else if (rc != pcmk_ok) { -- goto bail; -- } -+ id = g_strdup_printf("cli-prefer-%s", rsc_id); -+ location = create_xml_node(fragment, XML_CONS_TAG_RSC_LOCATION); -+ crm_xml_add(location, XML_ATTR_ID, id); -+ free(id); -+ -+ crm_xml_add(location, XML_COLOC_ATTR_SOURCE, rsc_id); -+ -+ if (later_s == NULL) { -+ /* Short form */ -+ crm_xml_add(location, XML_CIB_TAG_NODE, host); -+ crm_xml_add(location, XML_RULE_ATTR_SCORE, INFINITY_S); - - } else { -- crm_xml_add(can_run, "rsc", rsc_id); -+ xmlNode *rule = create_xml_node(location, XML_TAG_RULE); -+ xmlNode *expr = create_xml_node(rule, XML_TAG_EXPRESSION); - -- rule = create_xml_node(can_run, XML_TAG_RULE); -- expr = create_xml_node(rule, XML_TAG_EXPRESSION); - id = crm_concat("cli-prefer-rule", rsc_id, '-'); - crm_xml_add(rule, XML_ATTR_ID, id); - free(id); -@@ -945,32 +966,73 @@ move_resource(const char *rsc_id, - - crm_xml_add(expr, XML_EXPR_ATTR_ATTRIBUTE, "#uname"); - crm_xml_add(expr, XML_EXPR_ATTR_OPERATION, "eq"); -- crm_xml_add(expr, XML_EXPR_ATTR_VALUE, preferred_node); -+ crm_xml_add(expr, XML_EXPR_ATTR_VALUE, host); - crm_xml_add(expr, XML_EXPR_ATTR_TYPE, "string"); - -- if (later_s) { -- expr = create_xml_node(rule, "date_expression"); -- id = crm_concat("cli-prefer-lifetime-end", rsc_id, '-'); -- crm_xml_add(expr, XML_ATTR_ID, id); -- free(id); -+ expr = create_xml_node(rule, "date_expression"); -+ id = crm_concat("cli-prefer-lifetime-end", rsc_id, '-'); -+ crm_xml_add(expr, XML_ATTR_ID, id); -+ free(id); -+ -+ crm_xml_add(expr, "operation", "lt"); -+ crm_xml_add(expr, "end", later_s); -+ } -+ -+ crm_log_xml_info(fragment, "Modify"); -+ rc = cib_conn->cmds->update(cib_conn, XML_CIB_TAG_CONSTRAINTS, fragment, cib_options); -+ -+ free_xml(fragment); -+ free(later_s); -+ return rc; -+} -+ -+static int -+clear_resource(const char *rsc_id, const char *host, GListPtr allnodes, cib_t * cib_conn) -+{ -+ char *id = NULL; -+ int rc = pcmk_ok; -+ xmlNode *fragment = NULL; -+ xmlNode *location = NULL; -+ -+ fragment = create_xml_node(NULL, XML_CIB_TAG_CONSTRAINTS); - -- crm_xml_add(expr, "operation", "lt"); -- crm_xml_add(expr, "end", later_s); -+ if(host) { -+ id = g_strdup_printf("cli-ban-%s-on-%s", rsc_id, host); -+ location = create_xml_node(fragment, XML_CONS_TAG_RSC_LOCATION); -+ crm_xml_add(location, XML_ATTR_ID, id); -+ free(id); -+ -+ } else { -+ GListPtr n = allnodes; -+ for(; n; n = n->next) { -+ node_t *target = n->data; -+ -+ id = g_strdup_printf("cli-ban-%s-on-%s", rsc_id, target->details->uname); -+ location = create_xml_node(fragment, XML_CONS_TAG_RSC_LOCATION); -+ crm_xml_add(location, XML_ATTR_ID, id); -+ free(id); - } -+ } - -- add_node_copy(constraints, can_run); -+ id = g_strdup_printf("cli-prefer-%s", rsc_id); -+ location = create_xml_node(fragment, XML_CONS_TAG_RSC_LOCATION); -+ crm_xml_add(location, XML_ATTR_ID, id); -+ if(host && do_force == FALSE) { -+ crm_xml_add(location, XML_CIB_TAG_NODE, host); - } -+ free(id); -+ -+ crm_log_xml_info(fragment, "Delete"); -+ rc = cib_conn->cmds->delete(cib_conn, XML_CIB_TAG_CONSTRAINTS, fragment, cib_options); -+ if (rc == -ENXIO) { -+ rc = pcmk_ok; - -- if (preferred_node != NULL || existing_node != NULL) { -- crm_log_xml_notice(fragment, "CLI Update"); -- rc = cib_conn->cmds->update(cib_conn, XML_CIB_TAG_CONSTRAINTS, fragment, cib_options); -+ } else if (rc != pcmk_ok) { -+ goto bail; - } - - bail: - free_xml(fragment); -- free_xml(dont_run); -- free_xml(can_run); -- free(later_s); - return rc; - } - -@@ -987,13 +1049,17 @@ list_resource_operations(const char *rsc_id, const char *host_uname, gboolean ac - xmlNode *xml_op = (xmlNode *) lpc->data; - - const char *op_rsc = crm_element_value(xml_op, "resource"); -- const char *last = crm_element_value(xml_op, "last_run"); -+ const char *last = crm_element_value(xml_op, XML_RSC_OP_LAST_CHANGE); - const char *status_s = crm_element_value(xml_op, XML_LRM_ATTR_OPSTATUS); - const char *op_key = crm_element_value(xml_op, XML_LRM_ATTR_TASK_KEY); - int status = crm_parse_int(status_s, "0"); - - rsc = pe_find_resource(data_set->resources, op_rsc); -- rsc->fns->print(rsc, "", opts, stdout); -+ if(rsc) { -+ rsc->fns->print(rsc, "", opts, stdout); -+ } else { -+ fprintf(stdout, "Unknown resource %s", op_rsc); -+ } - - fprintf(stdout, ": %s (node=%s, call=%s, rc=%s", - op_key ? op_key : ID(xml_op), -@@ -1003,8 +1069,8 @@ list_resource_operations(const char *rsc_id, const char *host_uname, gboolean ac - if (last) { - time_t run_at = crm_parse_int(last, "0"); - -- fprintf(stdout, ", last-run=%s, exec=%sms\n", -- ctime(&run_at), crm_element_value(xml_op, "exec_time")); -+ fprintf(stdout, ", last-rc-change=%s, exec=%sms\n", -+ ctime(&run_at), crm_element_value(xml_op, XML_RSC_OP_T_EXEC)); - } - fprintf(stdout, "): %s\n", services_lrm_status_str(status)); - } -@@ -1183,32 +1249,53 @@ static struct crm_option long_options[] = { - {"constraints",0, 0, 'a', "\tDisplay the (co)location constraints that apply to a resource"}, - - {"-spacer-", 1, 0, '-', "\nCommands:"}, -+ {"cleanup", 0, 0, 'C', "\t\tDelete the resource history and re-check the current state. Optional: --resource"}, - {"set-parameter", 1, 0, 'p', "Set the named parameter for a resource. See also -m, --meta"}, - {"get-parameter", 1, 0, 'g', "Display the named parameter for a resource. See also -m, --meta"}, - {"delete-parameter",1, 0, 'd', "Delete the named parameter for a resource. See also -m, --meta"}, - {"get-property", 1, 0, 'G', "Display the 'class', 'type' or 'provider' of a resource", 1}, - {"set-property", 1, 0, 'S', "(Advanced) Set the class, type or provider of a resource", 1}, -- {"move", 0, 0, 'M', -- "\t\tMove a resource from its current location, optionally specifying a destination (-N) and/or a period for which it should take effect (-u)" -- "\n\t\t\t\tIf -N is not specified, the cluster will force the resource to move by creating a rule for the current location and a score of -INFINITY" -- "\n\t\t\t\tNOTE: This will prevent the resource from running on this node until the constraint is removed with -U"}, -- {"un-move", 0, 0, 'U', "\t\tRemove all constraints created by a move command"}, -+ -+ {"-spacer-", 1, 0, '-', "\nResource location:"}, -+ { -+ "move", 0, 0, 'M', -+ "\t\tMove a resource from its current location to the named destination.\n " -+ "\t\t\t\tRequires: --host. Optional: --lifetime, --master\n\n" -+ "\t\t\t\tNOTE: This may prevent the resource from running on the previous location node until the implicit constraints expire or are removed with --unban\n" -+ }, -+ { -+ "ban", 0, 0, 'B', -+ "\t\tPrevent the named resource from running on the named --host. \n" -+ "\t\t\t\tRequires: --resource. Optional: --host, --lifetime, --master\n\n" -+ "\t\t\t\tIf --host is not specified, it defaults to:\n" -+ "\t\t\t\t * the curent location for primitives and groups, or\n\n" -+ "\t\t\t\t * the curent location of the master for m/s resources with master-max=1\n\n" -+ "\t\t\t\tAll other situations result in an error as there is no sane default.\n\n" -+ "\t\t\t\tNOTE: This will prevent the resource from running on this node until the constraint expires or is removed with --clear\n" -+ }, -+ { -+ "clear", 0, 0, 'U', "\t\tRemove all constraints created by the --ban and/or --move commands. \n" -+ "\t\t\t\tRequires: --resource. Optional: --host, --master\n\n" -+ "\t\t\t\tIf --host is not specified, all constraints created by --ban and --move will be removed for the named resource.\n" -+ }, -+ {"lifetime", 1, 0, 'u', "\tLifespan of constraints created by the --ban and --move commands"}, -+ { -+ "master", 0, 0, 0, -+ "\t\tLimit the scope of the --ban, --move and --clear commands to the Master role.\n" -+ "\t\t\t\tFor --ban and --move, the previous master can still remain active in the Slave role." -+ }, - - {"-spacer-", 1, 0, '-', "\nAdvanced Commands:"}, - {"delete", 0, 0, 'D', "\t\t(Advanced) Delete a resource from the CIB"}, - {"fail", 0, 0, 'F', "\t\t(Advanced) Tell the cluster this resource has failed"}, -- {"refresh", 0, 0, 'R', "\t\t(Advanced) Refresh the CIB from the LRM"}, -- {"cleanup", 0, 0, 'C', "\t\t(Advanced) Delete a resource from the LRM"}, -- {"reprobe", 0, 0, 'P', "\t\t(Advanced) Re-check for resources started outside of the CRM\n"}, -- {"force-stop", 0, 0, 0, "\t(Advanced) Bypass the cluster and stop a resource on the local node"}, -- {"force-start",0, 0, 0, "\t(Advanced) Bypass the cluster and start a resource on the local node"}, -- {"force-check",0, 0, 0, "\t(Advanced) Bypass the cluster and check the state of a resource on the local node\n"}, -+ {"force-stop", 0, 0, 0, "\t(Advanced) Bypass the cluster and stop a resource on the local node. Additional detail with -V"}, -+ {"force-start",0, 0, 0, "\t(Advanced) Bypass the cluster and start a resource on the local node. Additional detail with -V"}, -+ {"force-check",0, 0, 0, "\t(Advanced) Bypass the cluster and check the state of a resource on the local node. Additional detail with -V\n"}, - - {"-spacer-", 1, 0, '-', "\nAdditional Options:"}, - {"node", 1, 0, 'N', "\tHost uname"}, - {"resource-type", 1, 0, 't', "Resource type (primitive, clone, group, ...)"}, - {"parameter-value", 1, 0, 'v', "Value to use with -p, -g or -d"}, -- {"lifetime", 1, 0, 'u', "\tLifespan of migration constraints\n"}, - {"meta", 0, 0, 'm', "\t\tModify a resource's configuration option rather than one which is passed to the resource agent script. For use with -p, -g, -d"}, - {"utilization", 0, 0, 'z', "\tModify a resource's utilization attribute. For use with -p, -g, -d"}, - {"set-name", 1, 0, 's', "\t(Advanced) ID of the instance_attributes object to change"}, -@@ -1224,6 +1311,10 @@ static struct crm_option long_options[] = { - {"host-uname", 1, 0, 'H', NULL, 1}, - {"migrate", 0, 0, 'M', NULL, 1}, - {"un-migrate", 0, 0, 'U', NULL, 1}, -+ {"un-move", 0, 0, 'U', NULL, 1}, -+ -+ {"refresh", 0, 0, 'R', NULL, 1}, -+ {"reprobe", 0, 0, 'P', NULL, 1}, - - {"-spacer-", 1, 0, '-', "\nExamples:", pcmk_option_paragraph}, - {"-spacer-", 1, 0, '-', "List the configured resources:", pcmk_option_paragraph}, -@@ -1263,10 +1354,10 @@ main(int argc, char **argv) - const char *longname = NULL; - pe_working_set_t data_set; - xmlNode *cib_xml_copy = NULL; -- - cib_t *cib_conn = NULL; -- int rc = pcmk_ok; -+ bool do_trace = FALSE; - -+ int rc = pcmk_ok; - int option_index = 0; - int argerr = 0; - int flag; -@@ -1286,7 +1377,10 @@ main(int argc, char **argv) - - switch (flag) { - case 0: -- if (safe_str_eq("force-stop", longname) -+ if (safe_str_eq("master", longname)) { -+ scope_master = TRUE; -+ -+ } else if (safe_str_eq("force-stop", longname) - || safe_str_eq("force-start", longname) - || safe_str_eq("force-check", longname)) { - rsc_cmd = flag; -@@ -1396,6 +1490,7 @@ main(int argc, char **argv) - } - break; - case 'V': -+ do_trace = TRUE; - crm_bump_log_level(argc, argv); - break; - case '$': -@@ -1435,9 +1530,10 @@ main(int argc, char **argv) - case 't': - rsc_type = optarg; - break; -+ case 'C': - case 'R': - case 'P': -- rsc_cmd = flag; -+ rsc_cmd = 'C'; - break; - case 'L': - case 'c': -@@ -1446,10 +1542,10 @@ main(int argc, char **argv) - case 'w': - case 'D': - case 'F': -- case 'C': - case 'W': - case 'M': - case 'U': -+ case 'B': - case 'O': - case 'o': - case 'A': -@@ -1507,7 +1603,7 @@ main(int argc, char **argv) - } - - set_working_set_defaults(&data_set); -- if (rsc_cmd != 'P') { -+ if (rsc_cmd != 'P' || rsc_id) { - resource_t *rsc = NULL; - - cib_conn = cib_new(); -@@ -1536,7 +1632,7 @@ main(int argc, char **argv) - if (rsc_id) { - rsc = find_rsc_or_clone(rsc_id, &data_set); - } -- if (rsc == NULL) { -+ if (rsc == NULL && rsc_cmd != 'C') { - rc = -ENXIO; - } - } -@@ -1606,11 +1702,26 @@ main(int argc, char **argv) - rclass = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS); - rprov = crm_element_value(rsc->xml, XML_AGENT_ATTR_PROVIDER); - rtype = crm_element_value(rsc->xml, XML_ATTR_TYPE); -- params = generate_resource_params(rsc, &data_set); - -+ if(safe_str_eq(rclass, "stonith")){ -+ CMD_ERR("Sorry, --%s doesn't support %s resources yet\n", rsc_long_cmd, rclass); -+ crm_exit(EOPNOTSUPP); -+ } -+ -+ params = generate_resource_params(rsc, &data_set); - op = resources_action_create(rsc->id, rclass, rprov, rtype, action, 0, -1, params); - -- if (services_action_sync(op)) { -+ if(do_trace) { -+ setenv("OCF_TRACE_RA", "1", 1); -+ } -+ -+ if(op == NULL) { -+ /* Re-run but with stderr enabled so we can display a sane error message */ -+ crm_enable_stderr(TRUE); -+ resources_action_create(rsc->id, rclass, rprov, rtype, action, 0, -1, params); -+ return crm_exit(EINVAL); -+ -+ } else if (services_action_sync(op)) { - int more, lpc, last; - char *local_copy = NULL; - -@@ -1700,14 +1811,6 @@ main(int argc, char **argv) - } - print_cts_constraints(&data_set); - -- } else if (rsc_cmd == 'C') { -- resource_t *rsc = pe_find_resource(data_set.resources, rsc_id); -- -- rc = delete_lrm_rsc(crmd_channel, host_uname, rsc, &data_set); -- if (rc == pcmk_ok) { -- start_mainloop(); -- } -- - } else if (rsc_cmd == 'F') { - rc = fail_lrm_rsc(crmd_channel, host_uname, rsc_id, &data_set); - if (rc == pcmk_ok) { -@@ -1721,7 +1824,7 @@ main(int argc, char **argv) - rc = list_resource_operations(rsc_id, host_uname, FALSE, &data_set); - - } else if (rc == -ENXIO) { -- CMD_ERR("Resource %s not found: %s\n", crm_str(rsc_id), pcmk_strerror(rc)); -+ CMD_ERR("Resource '%s' not found: %s\n", crm_str(rsc_id), pcmk_strerror(rc)); - - } else if (rsc_cmd == 'W') { - if (rsc_id == NULL) { -@@ -1748,57 +1851,141 @@ main(int argc, char **argv) - rc = dump_resource(rsc_id, &data_set, FALSE); - - } else if (rsc_cmd == 'U') { -+ node_t *dest = NULL; -+ - if (rsc_id == NULL) { -- CMD_ERR("Must supply a resource id with -r\n"); -+ CMD_ERR("No value specified for --resource\n"); - rc = -ENXIO; - goto bail; - } -- /* coverity[var_deref_model] False positive */ -- rc = move_resource(rsc_id, NULL, NULL, cib_conn); - -- } else if (rsc_cmd == 'M') { -- node_t *dest = NULL; -- node_t *current = NULL; -- const char *current_uname = NULL; -+ if (host_uname) { -+ dest = pe_find_node(data_set.nodes, host_uname); -+ if (dest == NULL) { -+ CMD_ERR("Unknown node: %s\n", host_uname); -+ rc = -ENXIO; -+ goto bail; -+ } -+ rc = clear_resource(rsc_id, dest->details->uname, NULL, cib_conn); -+ -+ } else { -+ rc = clear_resource(rsc_id, NULL, data_set.nodes, cib_conn); -+ } -+ -+ } else if (rsc_cmd == 'M' && host_uname) { - resource_t *rsc = pe_find_resource(data_set.resources, rsc_id); -+ node_t *dest = pe_find_node(data_set.nodes, host_uname); -+ -+ rc = -EINVAL; -+ -+ if (rsc == NULL) { -+ CMD_ERR("Resource '%s' not moved: not found\n", rsc_id); -+ rc = -ENXIO; -+ goto bail; -+ -+ } else if(rsc->variant == pe_clone) { -+ CMD_ERR("Resource '%s' not moved: moving a clone makes no sense\n", rsc_id); -+ goto bail; -+ -+ } else if (rsc->variant < pe_clone && g_list_length(rsc->running_on) > 1) { -+ CMD_ERR("Resource '%s' not moved: active on multiple nodes\n", rsc_id); -+ goto bail; -+ } - -- if (rsc != NULL && rsc->running_on != NULL) { -- current = rsc->running_on->data; -- if (current != NULL) { -- current_uname = current->details->uname; -+ if(dest == NULL) { -+ CMD_ERR("Error performing operation: node '%s' is unknown\n", host_uname); -+ rc = -ENXIO; -+ goto bail; -+ } -+ -+ if(g_list_length(rsc->running_on) == 1) { -+ node_t *current = rsc->running_on->data; -+ -+ if (safe_str_eq(current->details->uname, dest->details->uname)) { -+ CMD_ERR("Error performing operation: %s is already active on %s\n", rsc_id, dest->details->uname); -+ goto bail; - } -+ /* } else if (rsc->variant == pe_master) { Find the master and ban it */ - } - -- if (host_uname != NULL) { -- dest = pe_find_node(data_set.nodes, host_uname); -+ /* Clear any previous constraints for 'dest' */ -+ clear_resource(rsc_id, dest->details->uname, data_set.nodes, cib_conn); -+ -+ /* Record an explicit preference for 'dest' */ -+ rc = prefer_resource(rsc_id, dest->details->uname, cib_conn); -+ -+ if(do_force && g_list_length(rsc->running_on) == 1) { -+ node_t *current = rsc->running_on->data; -+ -+ /* Ban the original location */ -+ ban_resource(rsc_id, current->details->uname, NULL, cib_conn); - } - -- if (rsc == NULL) { -- CMD_ERR("Resource %s not moved: not found\n", rsc_id); -+ } else if (rsc_cmd == 'B' && host_uname) { -+ resource_t *rsc = pe_find_resource(data_set.resources, rsc_id); -+ node_t *dest = pe_find_node(data_set.nodes, host_uname); - -- } else if (rsc->variant == pe_native && g_list_length(rsc->running_on) > 1) { -- CMD_ERR("Resource %s not moved: active on multiple nodes\n", rsc_id); -+ rc = -ENXIO; -+ if (rsc_id == NULL) { -+ CMD_ERR("No value specified for --resource\n"); -+ goto bail; -+ } else if(rsc == NULL) { -+ CMD_ERR("Resource '%s' not moved: unknown\n", rsc_id); - -- } else if (host_uname != NULL && dest == NULL) { -- CMD_ERR("Error performing operation: %s is not a known node\n", host_uname); -- rc = -ENXIO; -+ } else if (dest == NULL) { -+ CMD_ERR("Error performing operation: node '%s' is unknown\n", host_uname); -+ goto bail; -+ } -+ rc = ban_resource(rsc_id, dest->details->uname, NULL, cib_conn); -+ -+ } else if (rsc_cmd == 'B' || rsc_cmd == 'M') { -+ resource_t *rsc = pe_find_resource(data_set.resources, rsc_id); -+ -+ rc = -ENXIO; -+ if (rsc_id == NULL) { -+ CMD_ERR("No value specified for --resource\n"); -+ goto bail; -+ } - -- } else if (host_uname != NULL && safe_str_eq(current_uname, host_uname)) { -- CMD_ERR("Error performing operation: " -- "%s is already active on %s\n", rsc_id, host_uname); -+ rc = -EINVAL; -+ if(rsc == NULL) { -+ CMD_ERR("Resource '%s' not moved: unknown\n", rsc_id); -+ -+ } else if(g_list_length(rsc->running_on) == 1) { -+ node_t *current = rsc->running_on->data; -+ rc = ban_resource(rsc_id, current->details->uname, NULL, cib_conn); -+ -+ } else if(scope_master && rsc->variant == pe_master) { -+ int count = 0; -+ GListPtr iter = NULL; -+ node_t *current = NULL; -+ -+ for(iter = rsc->children; iter; iter = iter->next) { -+ resource_t *child = (resource_t *)iter->data; -+ if(child->role == RSC_ROLE_MASTER) { -+ count++; -+ current = child->running_on->data; -+ } -+ } - -- } else if (current_uname != NULL && (do_force || host_uname == NULL)) { -- /* coverity[var_deref_model] False positive */ -- rc = move_resource(rsc_id, current_uname, host_uname, cib_conn); -+ if(count == 1 && current) { -+ rc = ban_resource(rsc_id, current->details->uname, NULL, cib_conn); - -- } else if (host_uname != NULL) { -- /* coverity[var_deref_model] False positive */ -- rc = move_resource(rsc_id, NULL, host_uname, cib_conn); -+ } else { -+ CMD_ERR("Resource '%s' not moved: currently promoted in %d locations.\n", rsc_id, count); -+ CMD_ERR("You can prevent '%s' from being promoted at a specific location with:" -+ " --ban --master --host \n", rsc_id); -+ } - - } else { -- CMD_ERR("Resource %s not moved: not-active and no preferred location specified.\n", -- rsc_id); -- rc = -EINVAL; -+ CMD_ERR("Resource '%s' not moved: active in %d locations.\n", rsc_id, g_list_length(rsc->running_on)); -+ CMD_ERR("You can prevent '%s' from running on a specific location with: --ban --host \n", rsc_id); -+ -+ if(rsc->variant == pe_master && g_list_length(rsc->running_on) > 0) { -+ CMD_ERR("You can prevent '%s' from being promoted at its current location with: --ban --master\n", rsc_id); -+ CMD_ERR("You can prevent '%s' from being promoted at a specific location with:" -+ " --ban --master --host \n", rsc_id); -+ } - } - - } else if (rsc_cmd == 'G') { -@@ -1870,22 +2057,31 @@ main(int argc, char **argv) - /* coverity[var_deref_model] False positive */ - rc = delete_resource_attr(rsc_id, prop_set, prop_id, prop_name, cib_conn, &data_set); - -- } else if (rsc_cmd == 'P') { -+ } else if (rsc_cmd == 'C' && rsc_id) { -+ resource_t *rsc = pe_find_resource(data_set.resources, rsc_id); -+ -+ crm_debug("Re-checking the state of %s on %s", rsc_id, host_uname); -+ if(rsc) { -+ rc = delete_lrm_rsc(cib_conn, crmd_channel, host_uname, rsc, &data_set); -+ } else { -+ rc = -ENODEV; -+ } -+ -+ if (rc == pcmk_ok) { -+ start_mainloop(); -+ } -+ -+ } else if (rsc_cmd == 'C') { - xmlNode *cmd = create_request(CRM_OP_REPROBE, NULL, host_uname, - CRM_SYSTEM_CRMD, crm_system_name, our_pid); - -+ crm_debug("Re-checking the state of all resources on %s", host_uname); - if (crm_ipc_send(crmd_channel, cmd, 0, 0, NULL) > 0) { - start_mainloop(); - } - - free_xml(cmd); - -- } else if (rsc_cmd == 'R') { -- rc = refresh_lrm(crmd_channel, host_uname); -- if (rc == pcmk_ok) { -- start_mainloop(); -- } -- - } else if (rsc_cmd == 'D') { - xmlNode *msg_data = NULL; - -diff --git a/tools/crm_simulate.c b/tools/crm_simulate.c -index 38ebe26..d0937f8 100644 ---- a/tools/crm_simulate.c -+++ b/tools/crm_simulate.c -@@ -1,16 +1,16 @@ --/* -+/* - * Copyright (C) 2009 Andrew Beekhof -- * -+ * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. -- * -+ * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. -- * -+ * - * You should have received a copy of the GNU General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -@@ -46,8 +46,6 @@ gboolean bringing_nodes_online = FALSE; - #define op_template "//"XML_CIB_TAG_STATE"[@uname='%s']//"XML_LRM_TAG_RESOURCE"[@id='%s']/"XML_LRM_TAG_RSC_OP"[@id='%s']" - /* #define op_template "//"XML_CIB_TAG_STATE"[@uname='%s']//"XML_LRM_TAG_RESOURCE"[@id='%s']/"XML_LRM_TAG_RSC_OP"[@id='%s' and @"XML_LRM_ATTR_CALLID"='%d']" */ - --#define FAKE_TE_ID "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -- - #define quiet_log(fmt, args...) do { \ - if(quiet == FALSE) { \ - printf(fmt , ##args); \ -@@ -135,14 +133,14 @@ inject_node_state(cib_t * cib_conn, const char *node, const char *uuid) - if (cib_object && ID(cib_object) == NULL) { - crm_err("Detected multiple node_state entries for xpath=%s, bailing", xpath); - crm_log_xml_warn(cib_object, "Duplicates"); -- crm_exit(1); -+ crm_exit(ENOTUNIQ); - } - - if (rc == -ENXIO) { - char *found_uuid = NULL; - - if (uuid == NULL) { -- query_node_uuid(cib_conn, node, &found_uuid); -+ query_node_uuid(cib_conn, node, &found_uuid, NULL); - } else { - found_uuid = strdup(uuid); - } -@@ -294,6 +292,8 @@ create_op(xmlNode * cib_resource, const char *task, int interval, int outcome) - op->rc = outcome; - op->op_status = 0; - op->params = NULL; /* TODO: Fill me in */ -+ op->t_run = time(NULL); -+ op->t_rcchange = op->t_run; - - op->call_id = 0; - for (xop = __xml_first_child(cib_resource); xop != NULL; xop = __xml_next(xop)) { -@@ -362,7 +362,7 @@ exec_rsc_action(crm_graph_t * graph, crm_action_t * action) - GListPtr gIter = NULL; - lrmd_event_data_t *op = NULL; - int target_outcome = 0; -- gboolean uname_is_uuid = TRUE; -+ gboolean uname_is_uuid = FALSE; - - const char *rtype = NULL; - const char *rclass = NULL; -@@ -376,6 +376,7 @@ exec_rsc_action(crm_graph_t * graph, crm_action_t * action) - xmlNode *action_rsc = first_named_child(action->xml, XML_CIB_TAG_RESOURCE); - - char *node = crm_element_value_copy(action->xml, XML_LRM_ATTR_TARGET); -+ char *uuid = crm_element_value_copy(action->xml, XML_LRM_ATTR_TARGET_UUID); - const char *router_node = crm_element_value(action->xml, XML_LRM_ATTR_ROUTER_NODE); - - if (safe_str_eq(operation, "probe_complete")) { -@@ -385,7 +386,7 @@ exec_rsc_action(crm_graph_t * graph, crm_action_t * action) - - if (action_rsc == NULL) { - crm_log_xml_err(action->xml, "Bad"); -- free(node); -+ free(node); free(uuid); - return FALSE; - } - -@@ -422,7 +423,7 @@ exec_rsc_action(crm_graph_t * graph, crm_action_t * action) - uname_is_uuid = TRUE; - } - -- cib_node = inject_node_state(global_cib, node, uname_is_uuid ? node : NULL); -+ cib_node = inject_node_state(global_cib, node, uname_is_uuid ? node : uuid); - CRM_ASSERT(cib_node != NULL); - - cib_resource = inject_resource(cib_node, resource, rclass, rtype, rprovider); -@@ -464,7 +465,7 @@ exec_rsc_action(crm_graph_t * graph, crm_action_t * action) - CRM_ASSERT(rc == pcmk_ok); - - done: -- free(node); -+ free(node); free(uuid); - free_xml(cib_node); - action->confirmed = TRUE; - update_graph(graph, action); -@@ -476,10 +477,15 @@ exec_crmd_action(crm_graph_t * graph, crm_action_t * action) - { - const char *node = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); - const char *task = crm_element_value(action->xml, XML_LRM_ATTR_TASK); -+ xmlNode *rsc = first_named_child(action->xml, XML_CIB_TAG_RESOURCE); - - action->confirmed = TRUE; - -- quiet_log(" * Cluster action: %s on %s\n", task, node); -+ if(rsc) { -+ quiet_log(" * Cluster action: %s for %s on %s\n", task, ID(rsc), node); -+ } else { -+ quiet_log(" * Cluster action: %s on %s\n", task, node); -+ } - update_graph(graph, action); - return TRUE; - } -@@ -528,6 +534,13 @@ print_cluster_status(pe_working_set_t * data_set) - for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) { - node_t *node = (node_t *) gIter->data; - const char *node_mode = NULL; -+ char *node_name = NULL; -+ -+ if(node->details->remote_rsc) { -+ node_name = g_strdup_printf("%s:%s", node->details->uname, node->details->remote_rsc->container->id); -+ } else { -+ node_name = g_strdup_printf("%s", node->details->uname); -+ } - - if (node->details->unclean) { - if (node->details->online && node->details->unclean) { -@@ -555,20 +568,23 @@ print_cluster_status(pe_working_set_t * data_set) - - } else if (node->details->online) { - node_mode = "online"; -- online_nodes = add_list_element(online_nodes, node->details->uname); -+ online_nodes = add_list_element(online_nodes, node_name); -+ free(node_name); - continue; - - } else { - node_mode = "OFFLINE"; -- offline_nodes = add_list_element(offline_nodes, node->details->uname); -+ offline_nodes = add_list_element(offline_nodes, node_name); -+ free(node_name); - continue; - } - -- if (safe_str_eq(node->details->uname, node->details->id)) { -- printf("Node %s: %s\n", node->details->uname, node_mode); -+ if (safe_str_eq(node_name, node->details->id)) { -+ printf("Node %s: %s\n", node_name, node_mode); - } else { -- printf("Node %s (%s): %s\n", node->details->uname, node->details->id, node_mode); -+ printf("Node %s (%s): %s\n", node_name, node->details->id, node_mode); - } -+ free(node_name); - } - - if (online_nodes) { -@@ -925,6 +941,7 @@ modify_configuration(pe_working_set_t * data_set, - rc = global_cib->cmds->modify(global_cib, XML_CIB_TAG_STATUS, cib_node, - cib_sync_call | cib_scope_local); - CRM_ASSERT(rc == pcmk_ok); -+ free_xml(cib_node); - } - - for (gIter = node_down; gIter != NULL; gIter = gIter->next) { -@@ -937,6 +954,7 @@ modify_configuration(pe_working_set_t * data_set, - rc = global_cib->cmds->modify(global_cib, XML_CIB_TAG_STATUS, cib_node, - cib_sync_call | cib_scope_local); - CRM_ASSERT(rc == pcmk_ok); -+ free_xml(cib_node); - } - - for (gIter = node_fail; gIter != NULL; gIter = gIter->next) { -@@ -950,6 +968,7 @@ modify_configuration(pe_working_set_t * data_set, - rc = global_cib->cmds->modify(global_cib, XML_CIB_TAG_STATUS, cib_node, - cib_sync_call | cib_scope_local); - CRM_ASSERT(rc == pcmk_ok); -+ free_xml(cib_node); - } - - for (gIter = ticket_grant; gIter != NULL; gIter = gIter->next) { -@@ -1077,7 +1096,7 @@ setup_input(const char *input, const char *output) - - if (cib_object == NULL) { - fprintf(stderr, "Live CIB query failed: empty result\n"); -- crm_exit(3); -+ crm_exit(ENOTCONN); - } - - } else if (safe_str_eq(input, "-")) { -@@ -1093,12 +1112,12 @@ setup_input(const char *input, const char *output) - - if (cli_config_update(&cib_object, NULL, FALSE) == FALSE) { - free_xml(cib_object); -- crm_exit(-ENOKEY); -+ crm_exit(ENOKEY); - } - - if (validate_xml(cib_object, NULL, FALSE) != TRUE) { - free_xml(cib_object); -- crm_exit(-pcmk_err_dtd_validation); -+ crm_exit(pcmk_err_dtd_validation); - } - - if (output == NULL) { -@@ -1158,13 +1177,13 @@ static struct crm_option long_options[] = { - {"ticket-activate", 1, 0, 'e', "Activate a ticket"}, - - {"-spacer-", 0, 0, '-', "\nOutput Options:"}, -- -+ - {"save-input", 1, 0, 'I', "\tSave the input configuration to the named file"}, - {"save-output", 1, 0, 'O', "Save the output configuration to the named file"}, - {"save-graph", 1, 0, 'G', "\tSave the transition graph (XML format) to the named file"}, - {"save-dotfile", 1, 0, 'D', "Save the transition graph (DOT format) to the named file"}, - {"all-actions", 0, 0, 'a', "\tDisplay all possible actions in the DOT graph - even ones not part of the transition"}, -- -+ - {"-spacer-", 0, 0, '-', "\nData Source:"}, - {"live-check", 0, 0, 'L', "\tConnect to the CIB and use the current contents as input"}, - {"xml-file", 1, 0, 'x', "\tRetrieve XML from the named file"}, -@@ -1175,7 +1194,7 @@ static struct crm_option long_options[] = { - {"-spacer-", 0, 0, '-', " crm_simulate -LS --op-inject memcached:0_monitor_20000@bart.example.com=7 --op-fail memcached:0_stop_0@fred.example.com=1 --save-output /tmp/memcached-test.xml", pcmk_option_example}, - {"-spacer-", 0, 0, '-', "Now see what the reaction to the stop failure would be", pcmk_option_paragraph}, - {"-spacer-", 0, 0, '-', " crm_simulate -S --xml-file /tmp/memcached-test.xml", pcmk_option_example}, -- -+ - {0, 0, 0, 0} - }; - /* *INDENT-ON* */ -@@ -1495,20 +1514,7 @@ main(int argc, char **argv) - input = NULL; /* Don't try and free it twice */ - - if (graph_file != NULL) { -- char *msg_buffer = dump_xml_formatted(data_set.graph); -- FILE *graph_strm = fopen(graph_file, "w"); -- -- if (graph_strm == NULL) { -- crm_perror(LOG_ERR, "Could not open %s for writing", graph_file); -- -- } else { -- if (fprintf(graph_strm, "%s\n", msg_buffer) < 0) { -- crm_perror(LOG_ERR, "Write to %s failed", graph_file); -- } -- fflush(graph_strm); -- fclose(graph_strm); -- } -- free(msg_buffer); -+ write_xml_file(data_set.graph, graph_file, FALSE); - } - - if (dot_file != NULL) { -diff --git a/tools/crmadmin.c b/tools/crmadmin.c -index ca0a318..c7ac30f 100644 ---- a/tools/crmadmin.c -+++ b/tools/crmadmin.c -@@ -391,7 +391,7 @@ crmadmin_ipc_connection_destroy(gpointer user_data) - if (mainloop) { - g_main_quit(mainloop); - } else { -- crm_exit(1); -+ crm_exit(ENOTCONN); - } - } - -@@ -503,7 +503,7 @@ admin_msg_callback(const char *buffer, ssize_t length, gpointer userdata) - if (BE_SILENT && dc != NULL) { - fprintf(stderr, "%s\n", dc); - } -- crm_exit(0); -+ crm_exit(pcmk_ok); - } - } - -@@ -512,7 +512,7 @@ admin_msg_callback(const char *buffer, ssize_t length, gpointer userdata) - if (received_responses >= expected_responses) { - crm_trace("Received expected number (%d) of messages from Heartbeat." - " Exiting normally.", expected_responses); -- crm_exit(0); -+ crm_exit(pcmk_ok); - } - - message_timer_id = g_timeout_add(message_timeout_ms, admin_message_timeout, NULL); -diff --git a/tools/ipmiservicelogd.c b/tools/ipmiservicelogd.c -index 07acad5..b24c7bc 100644 ---- a/tools/ipmiservicelogd.c -+++ b/tools/ipmiservicelogd.c -@@ -511,7 +511,7 @@ entity_change(enum ipmi_update_e op, ipmi_domain_t * domain, ipmi_entity_t * ent - rv = ipmi_entity_add_sensor_update_handler(entity, sensor_change, entity); - if (rv) { - crm_err("ipmi_entity_set_sensor_update_handler: 0x%x", rv); -- crm_exit(1); -+ crm_exit(pcmk_err_generic); - } - } - } -@@ -548,7 +548,7 @@ main(int argc, char *argv[]) - os_hnd = ipmi_posix_setup_os_handler(); - if (!os_hnd) { - crm_err("ipmi_smi_setup_con: Unable to allocate os handler"); -- crm_exit(1); -+ crm_exit(pcmk_err_generic); - } - - /* Initialize the OpenIPMI library. */ -@@ -559,7 +559,7 @@ main(int argc, char *argv[]) - if (rv) { - crm_err("Error parsing command arguments, argument %d: %s", curr_arg, strerror(rv)); - usage(argv[0]); -- crm_exit(1); -+ crm_exit(pcmk_err_generic); - } - #endif - -@@ -572,7 +572,7 @@ main(int argc, char *argv[]) - if (rv) { - crm_err("ipmi_ip_setup_con: %s", strerror(rv)); - crm_err("Error: Is IPMI configured correctly?"); -- crm_exit(1); -+ crm_exit(pcmk_err_generic); - } - #else - /* If all you need is an SMI connection, this is all the code you -@@ -587,14 +587,14 @@ main(int argc, char *argv[]) - if (rv) { - crm_err("ipmi_smi_setup_con: %s", strerror(rv)); - crm_err("Error: Is IPMI configured correctly?"); -- crm_exit(1); -+ crm_exit(pcmk_err_generic); - } - #endif - - rv = ipmi_open_domain("", &con, 1, setup_done, NULL, NULL, NULL, NULL, 0, NULL); - if (rv) { - crm_err("ipmi_init_domain: %s", strerror(rv)); -- crm_exit(1); -+ crm_exit(pcmk_err_generic); - } - - /* This is the main loop of the event-driven program. -diff --git a/tools/regression.exp b/tools/regression.exp -index 1e9bd44..ef27353 100755 ---- a/tools/regression.exp -+++ b/tools/regression.exp -@@ -1,103 +1,244 @@ -+=#=#=#= Begin test: 2006-W01-7 =#=#=#= - Date: 2006-01-08 00:00:00Z --Date: 2006-W01-7 00:00:00Z -+=#=#=#= End test: 2006-W01-7 - OK (0) =#=#=#= - * Passed: iso8601 - 2006-W01-7 -+=#=#=#= Begin test: 2006-W01-7 - round-trip =#=#=#= -+Date: 2006-W01-7 00:00:00Z -+=#=#=#= End test: 2006-W01-7 - round-trip - OK (0) =#=#=#= -+* Passed: iso8601 - 2006-W01-7 - round-trip -+=#=#=#= Begin test: 2006-W01-1 =#=#=#= - Date: 2006-01-02 00:00:00Z --Date: 2006-W01-1 00:00:00Z -+=#=#=#= End test: 2006-W01-1 - OK (0) =#=#=#= - * Passed: iso8601 - 2006-W01-1 -+=#=#=#= Begin test: 2006-W01-1 - round-trip =#=#=#= -+Date: 2006-W01-1 00:00:00Z -+=#=#=#= End test: 2006-W01-1 - round-trip - OK (0) =#=#=#= -+* Passed: iso8601 - 2006-W01-1 - round-trip -+=#=#=#= Begin test: 2007-W01-7 =#=#=#= - Date: 2007-01-07 00:00:00Z --Date: 2007-W01-7 00:00:00Z -+=#=#=#= End test: 2007-W01-7 - OK (0) =#=#=#= - * Passed: iso8601 - 2007-W01-7 -+=#=#=#= Begin test: 2007-W01-7 - round-trip =#=#=#= -+Date: 2007-W01-7 00:00:00Z -+=#=#=#= End test: 2007-W01-7 - round-trip - OK (0) =#=#=#= -+* Passed: iso8601 - 2007-W01-7 - round-trip -+=#=#=#= Begin test: 2007-W01-1 =#=#=#= - Date: 2007-01-01 00:00:00Z --Date: 2007-W01-1 00:00:00Z -+=#=#=#= End test: 2007-W01-1 - OK (0) =#=#=#= - * Passed: iso8601 - 2007-W01-1 -+=#=#=#= Begin test: 2007-W01-1 - round-trip =#=#=#= -+Date: 2007-W01-1 00:00:00Z -+=#=#=#= End test: 2007-W01-1 - round-trip - OK (0) =#=#=#= -+* Passed: iso8601 - 2007-W01-1 - round-trip -+=#=#=#= Begin test: 2008-W01-7 =#=#=#= - Date: 2008-01-06 00:00:00Z --Date: 2008-W01-7 00:00:00Z -+=#=#=#= End test: 2008-W01-7 - OK (0) =#=#=#= - * Passed: iso8601 - 2008-W01-7 -+=#=#=#= Begin test: 2008-W01-7 - round-trip =#=#=#= -+Date: 2008-W01-7 00:00:00Z -+=#=#=#= End test: 2008-W01-7 - round-trip - OK (0) =#=#=#= -+* Passed: iso8601 - 2008-W01-7 - round-trip -+=#=#=#= Begin test: 2008-W01-1 =#=#=#= - Date: 2007-12-31 00:00:00Z --Date: 2008-W01-1 00:00:00Z -+=#=#=#= End test: 2008-W01-1 - OK (0) =#=#=#= - * Passed: iso8601 - 2008-W01-1 -+=#=#=#= Begin test: 2008-W01-1 - round-trip =#=#=#= -+Date: 2008-W01-1 00:00:00Z -+=#=#=#= End test: 2008-W01-1 - round-trip - OK (0) =#=#=#= -+* Passed: iso8601 - 2008-W01-1 - round-trip -+=#=#=#= Begin test: 2009-W01-7 =#=#=#= - Date: 2009-01-04 00:00:00Z --Date: 2009-W01-7 00:00:00Z -+=#=#=#= End test: 2009-W01-7 - OK (0) =#=#=#= - * Passed: iso8601 - 2009-W01-7 -+=#=#=#= Begin test: 2009-W01-7 - round-trip =#=#=#= -+Date: 2009-W01-7 00:00:00Z -+=#=#=#= End test: 2009-W01-7 - round-trip - OK (0) =#=#=#= -+* Passed: iso8601 - 2009-W01-7 - round-trip -+=#=#=#= Begin test: 2009-W01-1 =#=#=#= - Date: 2008-12-29 00:00:00Z --Date: 2009-W01-1 00:00:00Z -+=#=#=#= End test: 2009-W01-1 - OK (0) =#=#=#= - * Passed: iso8601 - 2009-W01-1 -+=#=#=#= Begin test: 2009-W01-1 - round-trip =#=#=#= -+Date: 2009-W01-1 00:00:00Z -+=#=#=#= End test: 2009-W01-1 - round-trip - OK (0) =#=#=#= -+* Passed: iso8601 - 2009-W01-1 - round-trip -+=#=#=#= Begin test: 2010-W01-7 =#=#=#= - Date: 2010-01-10 00:00:00Z --Date: 2010-W01-7 00:00:00Z -+=#=#=#= End test: 2010-W01-7 - OK (0) =#=#=#= - * Passed: iso8601 - 2010-W01-7 -+=#=#=#= Begin test: 2010-W01-7 - round-trip =#=#=#= -+Date: 2010-W01-7 00:00:00Z -+=#=#=#= End test: 2010-W01-7 - round-trip - OK (0) =#=#=#= -+* Passed: iso8601 - 2010-W01-7 - round-trip -+=#=#=#= Begin test: 2010-W01-1 =#=#=#= - Date: 2010-01-04 00:00:00Z --Date: 2010-W01-1 00:00:00Z -+=#=#=#= End test: 2010-W01-1 - OK (0) =#=#=#= - * Passed: iso8601 - 2010-W01-1 -+=#=#=#= Begin test: 2010-W01-1 - round-trip =#=#=#= -+Date: 2010-W01-1 00:00:00Z -+=#=#=#= End test: 2010-W01-1 - round-trip - OK (0) =#=#=#= -+* Passed: iso8601 - 2010-W01-1 - round-trip -+=#=#=#= Begin test: 2011-W01-7 =#=#=#= - Date: 2011-01-09 00:00:00Z --Date: 2011-W01-7 00:00:00Z -+=#=#=#= End test: 2011-W01-7 - OK (0) =#=#=#= - * Passed: iso8601 - 2011-W01-7 -+=#=#=#= Begin test: 2011-W01-7 - round-trip =#=#=#= -+Date: 2011-W01-7 00:00:00Z -+=#=#=#= End test: 2011-W01-7 - round-trip - OK (0) =#=#=#= -+* Passed: iso8601 - 2011-W01-7 - round-trip -+=#=#=#= Begin test: 2011-W01-1 =#=#=#= - Date: 2011-01-03 00:00:00Z --Date: 2011-W01-1 00:00:00Z -+=#=#=#= End test: 2011-W01-1 - OK (0) =#=#=#= - * Passed: iso8601 - 2011-W01-1 -+=#=#=#= Begin test: 2011-W01-1 - round-trip =#=#=#= -+Date: 2011-W01-1 00:00:00Z -+=#=#=#= End test: 2011-W01-1 - round-trip - OK (0) =#=#=#= -+* Passed: iso8601 - 2011-W01-1 - round-trip -+=#=#=#= Begin test: 2012-W01-7 =#=#=#= - Date: 2012-01-08 00:00:00Z --Date: 2012-W01-7 00:00:00Z -+=#=#=#= End test: 2012-W01-7 - OK (0) =#=#=#= - * Passed: iso8601 - 2012-W01-7 -+=#=#=#= Begin test: 2012-W01-7 - round-trip =#=#=#= -+Date: 2012-W01-7 00:00:00Z -+=#=#=#= End test: 2012-W01-7 - round-trip - OK (0) =#=#=#= -+* Passed: iso8601 - 2012-W01-7 - round-trip -+=#=#=#= Begin test: 2012-W01-1 =#=#=#= - Date: 2012-01-02 00:00:00Z --Date: 2012-W01-1 00:00:00Z -+=#=#=#= End test: 2012-W01-1 - OK (0) =#=#=#= - * Passed: iso8601 - 2012-W01-1 -+=#=#=#= Begin test: 2012-W01-1 - round-trip =#=#=#= -+Date: 2012-W01-1 00:00:00Z -+=#=#=#= End test: 2012-W01-1 - round-trip - OK (0) =#=#=#= -+* Passed: iso8601 - 2012-W01-1 - round-trip -+=#=#=#= Begin test: 2013-W01-7 =#=#=#= - Date: 2013-01-06 00:00:00Z --Date: 2013-W01-7 00:00:00Z -+=#=#=#= End test: 2013-W01-7 - OK (0) =#=#=#= - * Passed: iso8601 - 2013-W01-7 -+=#=#=#= Begin test: 2013-W01-7 - round-trip =#=#=#= -+Date: 2013-W01-7 00:00:00Z -+=#=#=#= End test: 2013-W01-7 - round-trip - OK (0) =#=#=#= -+* Passed: iso8601 - 2013-W01-7 - round-trip -+=#=#=#= Begin test: 2013-W01-1 =#=#=#= - Date: 2012-12-31 00:00:00Z --Date: 2013-W01-1 00:00:00Z -+=#=#=#= End test: 2013-W01-1 - OK (0) =#=#=#= - * Passed: iso8601 - 2013-W01-1 -+=#=#=#= Begin test: 2013-W01-1 - round-trip =#=#=#= -+Date: 2013-W01-1 00:00:00Z -+=#=#=#= End test: 2013-W01-1 - round-trip - OK (0) =#=#=#= -+* Passed: iso8601 - 2013-W01-1 - round-trip -+=#=#=#= Begin test: 2014-W01-7 =#=#=#= - Date: 2014-01-05 00:00:00Z --Date: 2014-W01-7 00:00:00Z -+=#=#=#= End test: 2014-W01-7 - OK (0) =#=#=#= - * Passed: iso8601 - 2014-W01-7 -+=#=#=#= Begin test: 2014-W01-7 - round-trip =#=#=#= -+Date: 2014-W01-7 00:00:00Z -+=#=#=#= End test: 2014-W01-7 - round-trip - OK (0) =#=#=#= -+* Passed: iso8601 - 2014-W01-7 - round-trip -+=#=#=#= Begin test: 2014-W01-1 =#=#=#= - Date: 2013-12-30 00:00:00Z --Date: 2014-W01-1 00:00:00Z -+=#=#=#= End test: 2014-W01-1 - OK (0) =#=#=#= - * Passed: iso8601 - 2014-W01-1 -+=#=#=#= Begin test: 2014-W01-1 - round-trip =#=#=#= -+Date: 2014-W01-1 00:00:00Z -+=#=#=#= End test: 2014-W01-1 - round-trip - OK (0) =#=#=#= -+* Passed: iso8601 - 2014-W01-1 - round-trip -+=#=#=#= Begin test: 2015-W01-7 =#=#=#= - Date: 2015-01-04 00:00:00Z --Date: 2015-W01-7 00:00:00Z -+=#=#=#= End test: 2015-W01-7 - OK (0) =#=#=#= - * Passed: iso8601 - 2015-W01-7 -+=#=#=#= Begin test: 2015-W01-7 - round-trip =#=#=#= -+Date: 2015-W01-7 00:00:00Z -+=#=#=#= End test: 2015-W01-7 - round-trip - OK (0) =#=#=#= -+* Passed: iso8601 - 2015-W01-7 - round-trip -+=#=#=#= Begin test: 2015-W01-1 =#=#=#= - Date: 2014-12-29 00:00:00Z --Date: 2015-W01-1 00:00:00Z -+=#=#=#= End test: 2015-W01-1 - OK (0) =#=#=#= - * Passed: iso8601 - 2015-W01-1 -+=#=#=#= Begin test: 2015-W01-1 - round-trip =#=#=#= -+Date: 2015-W01-1 00:00:00Z -+=#=#=#= End test: 2015-W01-1 - round-trip - OK (0) =#=#=#= -+* Passed: iso8601 - 2015-W01-1 - round-trip -+=#=#=#= Begin test: 2016-W01-7 =#=#=#= - Date: 2016-01-10 00:00:00Z --Date: 2016-W01-7 00:00:00Z -+=#=#=#= End test: 2016-W01-7 - OK (0) =#=#=#= - * Passed: iso8601 - 2016-W01-7 -+=#=#=#= Begin test: 2016-W01-7 - round-trip =#=#=#= -+Date: 2016-W01-7 00:00:00Z -+=#=#=#= End test: 2016-W01-7 - round-trip - OK (0) =#=#=#= -+* Passed: iso8601 - 2016-W01-7 - round-trip -+=#=#=#= Begin test: 2016-W01-1 =#=#=#= - Date: 2016-01-04 00:00:00Z --Date: 2016-W01-1 00:00:00Z -+=#=#=#= End test: 2016-W01-1 - OK (0) =#=#=#= - * Passed: iso8601 - 2016-W01-1 -+=#=#=#= Begin test: 2016-W01-1 - round-trip =#=#=#= -+Date: 2016-W01-1 00:00:00Z -+=#=#=#= End test: 2016-W01-1 - round-trip - OK (0) =#=#=#= -+* Passed: iso8601 - 2016-W01-1 - round-trip -+=#=#=#= Begin test: 2017-W01-7 =#=#=#= - Date: 2017-01-08 00:00:00Z --Date: 2017-W01-7 00:00:00Z -+=#=#=#= End test: 2017-W01-7 - OK (0) =#=#=#= - * Passed: iso8601 - 2017-W01-7 -+=#=#=#= Begin test: 2017-W01-7 - round-trip =#=#=#= -+Date: 2017-W01-7 00:00:00Z -+=#=#=#= End test: 2017-W01-7 - round-trip - OK (0) =#=#=#= -+* Passed: iso8601 - 2017-W01-7 - round-trip -+=#=#=#= Begin test: 2017-W01-1 =#=#=#= - Date: 2017-01-02 00:00:00Z --Date: 2017-W01-1 00:00:00Z -+=#=#=#= End test: 2017-W01-1 - OK (0) =#=#=#= - * Passed: iso8601 - 2017-W01-1 -+=#=#=#= Begin test: 2017-W01-1 - round-trip =#=#=#= -+Date: 2017-W01-1 00:00:00Z -+=#=#=#= End test: 2017-W01-1 - round-trip - OK (0) =#=#=#= -+* Passed: iso8601 - 2017-W01-1 - round-trip -+=#=#=#= Begin test: 2018-W01-7 =#=#=#= - Date: 2018-01-07 00:00:00Z --Date: 2018-W01-7 00:00:00Z -+=#=#=#= End test: 2018-W01-7 - OK (0) =#=#=#= - * Passed: iso8601 - 2018-W01-7 -+=#=#=#= Begin test: 2018-W01-7 - round-trip =#=#=#= -+Date: 2018-W01-7 00:00:00Z -+=#=#=#= End test: 2018-W01-7 - round-trip - OK (0) =#=#=#= -+* Passed: iso8601 - 2018-W01-7 - round-trip -+=#=#=#= Begin test: 2018-W01-1 =#=#=#= - Date: 2018-01-01 00:00:00Z --Date: 2018-W01-1 00:00:00Z -+=#=#=#= End test: 2018-W01-1 - OK (0) =#=#=#= - * Passed: iso8601 - 2018-W01-1 -+=#=#=#= Begin test: 2018-W01-1 - round-trip =#=#=#= -+Date: 2018-W01-1 00:00:00Z -+=#=#=#= End test: 2018-W01-1 - round-trip - OK (0) =#=#=#= -+* Passed: iso8601 - 2018-W01-1 - round-trip -+=#=#=#= Begin test: 2009-W53-07 =#=#=#= - Date: 2009-W53-7 00:00:00Z -+=#=#=#= End test: 2009-W53-07 - OK (0) =#=#=#= - * Passed: iso8601 - 2009-W53-07 -+=#=#=#= Begin test: 2009-01-31 + 1 Month =#=#=#= - Date: 2009-01-31 00:00:00Z - Duration: 0000-01-00 00:00:00Z - Duration ends at: 2009-02-28 00:00:00Z -+=#=#=#= End test: 2009-01-31 + 1 Month - OK (0) =#=#=#= - * Passed: iso8601 - 2009-01-31 + 1 Month -+=#=#=#= Begin test: 2009-01-31 + 2 Months =#=#=#= - Date: 2009-01-31 00:00:00Z - Duration: 0000-02-00 00:00:00Z - Duration ends at: 2009-03-31 00:00:00Z -+=#=#=#= End test: 2009-01-31 + 2 Months - OK (0) =#=#=#= - * Passed: iso8601 - 2009-01-31 + 2 Months -+=#=#=#= Begin test: 2009-01-31 + 3 Months =#=#=#= - Date: 2009-01-31 00:00:00Z - Duration: 0000-03-00 00:00:00Z - Duration ends at: 2009-04-30 00:00:00Z -+=#=#=#= End test: 2009-01-31 + 3 Months - OK (0) =#=#=#= - * Passed: iso8601 - 2009-01-31 + 3 Months -+=#=#=#= Begin test: 2009-03-31 - 1 Month =#=#=#= - Date: 2009-03-31 00:00:00Z - Duration: 0000--01-00 00:00:00Z - Duration ends at: 2009-02-28 00:00:00Z -+=#=#=#= End test: 2009-03-31 - 1 Month - OK (0) =#=#=#= - * Passed: iso8601 - 2009-03-31 - 1 Month - Setting up shadow instance - A new shadow instance was created. To begin using it paste the following into your shell: - CIB_shadow=tools-regression ; export CIB_shadow -- -+=#=#=#= Begin test: Validate CIB =#=#=#= -+ - - - -@@ -106,8 +247,20 @@ A new shadow instance was created. To begin using it paste the following into y - - - -+ -+ -+ -+ -+ -+ -+ -+ -+ -+=#=#=#= End test: Validate CIB - OK (0) =#=#=#= -+* Passed: cibadmin - Validate CIB -+=#=#=#= Begin test: Require --force for CIB erasure =#=#=#= - The supplied command is considered dangerous. To prevent accidental destruction of the cluster, the --force flag is required in order to proceed. -- -+ - - - -@@ -116,8 +269,10 @@ The supplied command is considered dangerous. To prevent accidental destruction - - - -+=#=#=#= End test: Require --force for CIB erasure - Invalid argument (22) =#=#=#= - * Passed: cibadmin - Require --force for CIB erasure -- -+=#=#=#= Begin test: Allow CIB erasure with --force =#=#=#= -+ - - - -@@ -126,8 +281,10 @@ The supplied command is considered dangerous. To prevent accidental destruction - - - -+=#=#=#= End test: Allow CIB erasure with --force - OK (0) =#=#=#= - * Passed: cibadmin - Allow CIB erasure with --force -- -+=#=#=#= Begin test: Query CIB =#=#=#= -+ - - - -@@ -136,8 +293,10 @@ The supplied command is considered dangerous. To prevent accidental destruction - - - -+=#=#=#= End test: Query CIB - OK (0) =#=#=#= - * Passed: cibadmin - Query CIB -- -+=#=#=#= Begin test: Set cluster option =#=#=#= -+ - - - -@@ -150,9 +309,11 @@ The supplied command is considered dangerous. To prevent accidental destruction - - - -+=#=#=#= End test: Set cluster option - OK (0) =#=#=#= - * Passed: crm_attribute - Set cluster option -+=#=#=#= Begin test: Query new cluster option =#=#=#= - -- -+ - - - -@@ -165,8 +326,10 @@ The supplied command is considered dangerous. To prevent accidental destruction - - - -+=#=#=#= End test: Query new cluster option - OK (0) =#=#=#= - * Passed: cibadmin - Query new cluster option -- -+=#=#=#= Begin test: Query cluster options =#=#=#= -+ - - - -@@ -179,11 +342,32 @@ The supplied command is considered dangerous. To prevent accidental destruction - - - -+=#=#=#= End test: Query cluster options - OK (0) =#=#=#= - * Passed: cibadmin - Query cluster options -- -+=#=#=#= Begin test: Set no-quorum policy =#=#=#= -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+=#=#=#= End test: Set no-quorum policy - OK (0) =#=#=#= -+* Passed: crm_attribute - Set no-quorum policy -+=#=#=#= Begin test: Delete nvpair =#=#=#= -+ - - -- -+ -+ -+ - - - -@@ -191,7 +375,9 @@ The supplied command is considered dangerous. To prevent accidental destruction - - - -+=#=#=#= End test: Delete nvpair - OK (0) =#=#=#= - * Passed: cibadmin - Delete nvpair -+=#=#=#= Begin test: Create operaton should fail =#=#=#= - Call failed: Name not unique on network - - -@@ -200,10 +386,12 @@ Call failed: Name not unique on network - - - -- -+ - - -- -+ -+ -+ - - - -@@ -211,11 +399,14 @@ Call failed: Name not unique on network - - - --* Passed: cibadmin - Create operaton should fail with: -76, The object already exists -- -+=#=#=#= End test: Create operaton should fail - Name not unique on network (76) =#=#=#= -+* Passed: cibadmin - Create operaton should fail -+=#=#=#= Begin test: Modify cluster options section =#=#=#= -+ - - - -+ - - - -@@ -225,12 +416,15 @@ Call failed: Name not unique on network - - - -+=#=#=#= End test: Modify cluster options section - OK (0) =#=#=#= - * Passed: cibadmin - Modify cluster options section -+=#=#=#= Begin test: Query updated cluster option =#=#=#= - -- -+ - - - -+ - - - -@@ -240,11 +434,14 @@ Call failed: Name not unique on network - - - -+=#=#=#= End test: Query updated cluster option - OK (0) =#=#=#= - * Passed: cibadmin - Query updated cluster option -- -+=#=#=#= Begin test: Set duplicate cluster option =#=#=#= -+ - - - -+ - - - -@@ -257,12 +454,18 @@ Call failed: Name not unique on network - - - -+=#=#=#= End test: Set duplicate cluster option - OK (0) =#=#=#= - * Passed: crm_attribute - Set duplicate cluster option --Please choose from one of the matches above and suppy the 'id' with --attr-id -- -+=#=#=#= Begin test: Setting multiply defined cluster option should fail =#=#=#= -+Error performing operation: Name not unique on network -+Multiple attributes match name=cluster-delay -+ Value: 60s (id=cib-bootstrap-options-cluster-delay) -+ Value: 40s (id=duplicate-cluster-delay) -+ - - - -+ - - - -@@ -275,11 +478,14 @@ Please choose from one of the matches above and suppy the 'id' with --attr-id - - - --* Passed: crm_attribute - Setting multiply defined cluster option should fail with -216, Could not set cluster option -- -+=#=#=#= End test: Setting multiply defined cluster option should fail - Name not unique on network (76) =#=#=#= -+* Passed: crm_attribute - Setting multiply defined cluster option should fail -+=#=#=#= Begin test: Set cluster option with -s =#=#=#= -+ - - - -+ - - - -@@ -292,13 +498,17 @@ Please choose from one of the matches above and suppy the 'id' with --attr-id - - - -+=#=#=#= End test: Set cluster option with -s - OK (0) =#=#=#= - * Passed: crm_attribute - Set cluster option with -s -+=#=#=#= Begin test: Delete cluster option with -i =#=#=#= - Deleted crm_config option: id=(null) name=cluster-delay - -- -+ - - -- -+ -+ -+ - - - -@@ -309,55 +519,61 @@ Deleted crm_config option: id=(null) name=cluster-delay - - - -+=#=#=#= End test: Delete cluster option with -i - OK (0) =#=#=#= - * Passed: crm_attribute - Delete cluster option with -i -- -+=#=#=#= Begin test: Create node1 and bring it online =#=#=#= -+ -+Current cluster status: -+ -+ -+Performing requested modifications -+ + Bringing node node1 online -+ -+Transition Summary: -+ -+Executing cluster transition: -+ -+Revised cluster status: -+Online: [ node1 ] -+ -+ -+ - - -- -- -- -+ -+ - -- -- -- -- -- -- -- -- -- --* Passed: cibadmin - Create node entry -- -- -- -- - - - - - -- -+ - - - - - -- -+ - - --* Passed: cibadmin - Create node status entry -- -+=#=#=#= End test: Create node1 and bring it online - OK (0) =#=#=#= -+* Passed: crm_simulate - Create node1 and bring it online -+=#=#=#= Begin test: Create node attribute =#=#=#= -+ - - -- -+ -+ -+ - - - - - -- -- -- -+ -+ -+ - - - -@@ -365,23 +581,27 @@ Deleted crm_config option: id=(null) name=cluster-delay - - - -- -+ - - -+=#=#=#= End test: Create node attribute - OK (0) =#=#=#= - * Passed: crm_attribute - Create node attribute -- -- -+=#=#=#= Begin test: Query new node attribute =#=#=#= -+ -+ - - -- -+ -+ -+ - - - - - -- -- -- -+ -+ -+ - - - -@@ -389,22 +609,26 @@ Deleted crm_config option: id=(null) name=cluster-delay - - - -- -+ - - -+=#=#=#= End test: Query new node attribute - OK (0) =#=#=#= - * Passed: cibadmin - Query new node attribute --Digest: -+=#=#=#= Begin test: Digest calculation =#=#=#= -+Digest: - - -- -+ -+ -+ - - - - - -- -- -- -+ -+ -+ - - - -@@ -412,23 +636,27 @@ Digest: -+ - - -+=#=#=#= End test: Digest calculation - OK (0) =#=#=#= - * Passed: cibadmin - Digest calculation -+=#=#=#= Begin test: Replace operation should fail =#=#=#= - Call failed: Update was older than existing configuration -- -+ - - -- -+ -+ -+ - - - - - -- -- -- -+ -+ -+ - - - -@@ -436,24 +664,28 @@ Call failed: Update was older than existing configuration - - - -- -+ - - --* Passed: cibadmin - Replace operation should fail with: -45, Update was older than existing configuration -+=#=#=#= End test: Replace operation should fail - Update was older than existing configuration (205) =#=#=#= -+* Passed: cibadmin - Replace operation should fail -+=#=#=#= Begin test: Default standby value =#=#=#= - Error performing operation: No such device or address - scope=status name=standby value=off -- -+ - - -- -+ -+ -+ - - - - - -- -- -- -+ -+ -+ - - - -@@ -461,23 +693,27 @@ scope=status name=standby value=off - - - -- -+ - - -+=#=#=#= End test: Default standby value - OK (0) =#=#=#= - * Passed: crm_standby - Default standby value -- -+=#=#=#= Begin test: Set standby status =#=#=#= -+ - - -- -+ -+ -+ - - - - - -- -- -- -- -+ -+ -+ -+ - - - -@@ -485,24 +721,28 @@ scope=status name=standby value=off - - - -- -+ - - -+=#=#=#= End test: Set standby status - OK (0) =#=#=#= - * Passed: crm_standby - Set standby status -+=#=#=#= Begin test: Query standby value =#=#=#= - scope=nodes name=standby value=true -- -+ - - -- -+ -+ -+ - - - - - -- -- -- -- -+ -+ -+ -+ - - - -@@ -510,29 +750,33 @@ scope=nodes name=standby value=true - - - -- -+ - - -+=#=#=#= End test: Query standby value - OK (0) =#=#=#= - * Passed: crm_standby - Query standby value --Deleted nodes attribute: id=nodes-clusterNode-UUID-standby name=standby -+=#=#=#= Begin test: Delete standby value =#=#=#= -+Deleted nodes attribute: id=nodes-node1-standby name=standby - - Could not establish attrd connection: Connection refused (111) - Could not establish attrd connection: Connection refused (111) - Could not establish attrd connection: Connection refused (111) - Could not establish attrd connection: Connection refused (111) - Could not establish attrd connection: Connection refused (111) -- -+ - - -- -+ -+ -+ - - - - - -- -- -- -+ -+ -+ - - - -@@ -540,22 +784,26 @@ Could not establish attrd connection: Connection refused (111) - - - -- -+ - - -+=#=#=#= End test: Delete standby value - OK (0) =#=#=#= - * Passed: crm_standby - Delete standby value -- -+=#=#=#= Begin test: Create a resource =#=#=#= -+ - - -- -+ -+ -+ - - - - - -- -- -- -+ -+ -+ - - - -@@ -565,22 +813,26 @@ Could not establish attrd connection: Connection refused (111) - - - -- -+ - - -+=#=#=#= End test: Create a resource - OK (0) =#=#=#= - * Passed: cibadmin - Create a resource -- -+=#=#=#= Begin test: Create a resource meta attribute =#=#=#= -+ - - -- -+ -+ -+ - - - - - -- -- -- -+ -+ -+ - - - -@@ -594,23 +846,27 @@ Could not establish attrd connection: Connection refused (111) - - - -- -+ - - -+=#=#=#= End test: Create a resource meta attribute - OK (0) =#=#=#= - * Passed: crm_resource - Create a resource meta attribute -+=#=#=#= Begin test: Query a resource meta attribute =#=#=#= - false -- -+ - - -- -+ -+ -+ - - - - - -- -- -- -+ -+ -+ - - - -@@ -624,23 +880,27 @@ false - - - -- -+ - - -+=#=#=#= End test: Query a resource meta attribute - OK (0) =#=#=#= - * Passed: crm_resource - Query a resource meta attribute -+=#=#=#= Begin test: Remove a resource meta attribute =#=#=#= - Deleted dummy option: id=dummy-meta_attributes-is-managed name=is-managed -- -+ - - -- -+ -+ -+ - - - - - -- -- -- -+ -+ -+ - - - -@@ -652,22 +912,26 @@ Deleted dummy option: id=dummy-meta_attributes-is-managed name=is-managed - - - -- -+ - - -+=#=#=#= End test: Remove a resource meta attribute - OK (0) =#=#=#= - * Passed: crm_resource - Remove a resource meta attribute -- -+=#=#=#= Begin test: Create a resource attribute =#=#=#= -+ - - -- -+ -+ -+ - - - - - -- -- -- -+ -+ -+ - - - -@@ -682,23 +946,27 @@ Deleted dummy option: id=dummy-meta_attributes-is-managed name=is-managed - - - -- -+ - - -+=#=#=#= End test: Create a resource attribute - OK (0) =#=#=#= - * Passed: crm_resource - Create a resource attribute -+=#=#=#= Begin test: List the configured resources =#=#=#= - dummy (ocf::pacemaker:Dummy): Stopped -- -+ - - -- -+ -+ -+ - - - - - -- -- -- -+ -+ -+ - - - -@@ -713,27 +981,31 @@ Deleted dummy option: id=dummy-meta_attributes-is-managed name=is-managed - - - -- -+ - - -+=#=#=#= End test: List the configured resources - OK (0) =#=#=#= - * Passed: crm_resource - List the configured resources -+=#=#=#= Begin test: Set a resource's fail-count =#=#=#= - Could not establish attrd connection: Connection refused (111) - Could not establish attrd connection: Connection refused (111) - Could not establish attrd connection: Connection refused (111) - Could not establish attrd connection: Connection refused (111) - Could not establish attrd connection: Connection refused (111) -- -+ - - -- -+ -+ -+ - - - - - -- -- -- -+ -+ -+ - - - -@@ -748,30 +1020,35 @@ Could not establish attrd connection: Connection refused (111) - - - -- -- -- -- -+ -+ -+ -+ - - - - - --* Passed: crm_resource - Set a resource's fail-count --Resource dummy not moved: not-active and no preferred location specified. -+=#=#=#= End test: Set a resource's fail-count - OK (0) =#=#=#= -+* Passed: crm_failcount - Set a resource's fail-count -+=#=#=#= Begin test: Require a destination when migrating a resource that is stopped =#=#=#= -+Resource 'dummy' not moved: active in 0 locations. -+You can prevent 'dummy' from running on a specific location with: --ban --host - Error performing operation: Invalid argument -- -+ - - -- -+ -+ -+ - - - - - -- -- -- -+ -+ -+ - - - -@@ -786,30 +1063,34 @@ Error performing operation: Invalid argument - - - -- -- -- -- -+ -+ -+ -+ - - - - - -+=#=#=#= End test: Require a destination when migrating a resource that is stopped - Invalid argument (22) =#=#=#= - * Passed: crm_resource - Require a destination when migrating a resource that is stopped --Error performing operation: i.dont.exist is not a known node -+=#=#=#= Begin test: Don't support migration to non-existant locations =#=#=#= -+Error performing operation: node 'i.dont.exist' is unknown - Error performing operation: No such device or address -- -+ - - -- -+ -+ -+ - - - - - -- -- -- -+ -+ -+ - - - -@@ -824,28 +1105,205 @@ Error performing operation: No such device or address - - - -- -- -- -- -+ -+ -+ -+ - - - - - -+=#=#=#= End test: Don't support migration to non-existant locations - No such device or address (6) =#=#=#= - * Passed: crm_resource - Don't support migration to non-existant locations -- -+=#=#=#= Begin test: Create a fencing resource =#=#=#= -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+=#=#=#= End test: Create a fencing resource - OK (0) =#=#=#= -+* Passed: cibadmin - Create a fencing resource -+=#=#=#= Begin test: Bring resources online =#=#=#= -+ -+Current cluster status: -+Online: [ node1 ] -+ -+ dummy (ocf::pacemaker:Dummy): Stopped -+ Fence (stonith:fence_true): Stopped -+ -+Transition Summary: -+ * Start dummy (node1) -+ * Start Fence (node1) -+ -+Executing cluster transition: -+ * Resource action: dummy monitor on node1 -+ * Resource action: Fence monitor on node1 -+ * Pseudo action: probe_complete -+ * Resource action: dummy start on node1 -+ * Resource action: Fence start on node1 -+ -+Revised cluster status: -+Online: [ node1 ] -+ -+ dummy (ocf::pacemaker:Dummy): Started node1 -+ Fence (stonith:fence_true): Started node1 -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+=#=#=#= End test: Bring resources online - OK (0) =#=#=#= -+* Passed: crm_simulate - Bring resources online -+=#=#=#= Begin test: Try to move a resource to its existing location =#=#=#= -+Error performing operation: dummy is already active on node1 -+Error performing operation: Invalid argument -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+=#=#=#= End test: Try to move a resource to its existing location - Invalid argument (22) =#=#=#= -+* Passed: crm_resource - Try to move a resource to its existing location -+=#=#=#= Begin test: Move a resource from its existing location =#=#=#= -+WARNING: Creating rsc_location constraint 'cli-ban-dummy-on-node1' with a score of -INFINITY for resource dummy on node1. -+ This will prevent dummy from running on node1 until the constraint is removed using the 'crm_resource --clear' command or manually with cibadmin -+ This will be the case even if node1 is the last node in the cluster -+ This message can be disabled with --quiet -+ - - -- -+ -+ -+ - - - - - -- -- -- -+ -+ -+ - - - -@@ -856,38 +1314,49 @@ Error performing operation: No such device or address - - - -+ - - -- -- -- -- -- -+ - - - -- -- -- -- -+ -+ -+ -+ - - -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ - - - --* Passed: crm_resource - Migrate a resource -- -+=#=#=#= End test: Move a resource from its existing location - OK (0) =#=#=#= -+* Passed: crm_resource - Move a resource from its existing location -+=#=#=#= Begin test: Clear out constraints generated by --move =#=#=#= -+ - - -- -+ -+ -+ - - - - - -- -- -- -+ -+ -+ - - - -@@ -898,33 +1367,48 @@ Error performing operation: No such device or address - - - -+ - - - - -- -- -- -- -+ -+ -+ -+ - - -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ - - - --* Passed: crm_resource - Un-migrate a resource -+=#=#=#= End test: Clear out constraints generated by --move - OK (0) =#=#=#= -+* Passed: crm_resource - Clear out constraints generated by --move -+=#=#=#= Begin test: Default ticket granted state =#=#=#= - false -- -+ - - -- -+ -+ -+ - - - - - -- -- -- -+ -+ -+ - - - -@@ -935,32 +1419,47 @@ false - - - -+ - - - - -- -- -- -- -+ -+ -+ -+ - - -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ - - - -+=#=#=#= End test: Default ticket granted state - OK (0) =#=#=#= - * Passed: crm_ticket - Default ticket granted state -- -+=#=#=#= Begin test: Set ticket granted state =#=#=#= -+ - - -- -+ -+ -+ - - - - - -- -- -- -+ -+ -+ - - - -@@ -971,36 +1470,51 @@ false - - - -+ - - - - -- -- -- -- -+ -+ -+ -+ - - -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ - - - - - - -+=#=#=#= End test: Set ticket granted state - OK (0) =#=#=#= - * Passed: crm_ticket - Set ticket granted state -+=#=#=#= Begin test: Query ticket granted state =#=#=#= - false -- -+ - - -- -+ -+ -+ - - - - - -- -- -- -+ -+ -+ - - - -@@ -1011,36 +1525,51 @@ false - - - -+ - - - - -- -- -- -- -+ -+ -+ -+ - - -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ - - - - - - -+=#=#=#= End test: Query ticket granted state - OK (0) =#=#=#= - * Passed: crm_ticket - Query ticket granted state -+=#=#=#= Begin test: Delete ticket granted state =#=#=#= - Deleted ticketA state attribute: name=granted -- -+ - - -- -+ -+ -+ - - - - - -- -- -- -+ -+ -+ - - - -@@ -1051,35 +1580,50 @@ Deleted ticketA state attribute: name=granted - - - -+ - - - - -- -- -- -- -+ -+ -+ -+ - - -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ - - - - - - -+=#=#=#= End test: Delete ticket granted state - OK (0) =#=#=#= - * Passed: crm_ticket - Delete ticket granted state -- -+=#=#=#= Begin test: Make a ticket standby =#=#=#= -+ - - -- -+ -+ -+ - - - - - -- -- -- -+ -+ -+ - - - -@@ -1090,36 +1634,51 @@ Deleted ticketA state attribute: name=granted - - - -+ - - - - -- -- -- -- -+ -+ -+ -+ - - -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ - - - - - - -+=#=#=#= End test: Make a ticket standby - OK (0) =#=#=#= - * Passed: crm_ticket - Make a ticket standby -+=#=#=#= Begin test: Query ticket standby state =#=#=#= - true -- -+ - - -- -+ -+ -+ - - - - - -- -- -- -+ -+ -+ - - - -@@ -1130,35 +1689,50 @@ true - - - -+ - - - - -- -- -- -- -+ -+ -+ -+ - - -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ - - - - - - -+=#=#=#= End test: Query ticket standby state - OK (0) =#=#=#= - * Passed: crm_ticket - Query ticket standby state -- -+=#=#=#= Begin test: Activate a ticket =#=#=#= -+ - - -- -+ -+ -+ - - - - - -- -- -- -+ -+ -+ - - - -@@ -1169,36 +1743,51 @@ true - - - -+ - - - - -- -- -- -- -+ -+ -+ -+ - - -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ - - - - - - -+=#=#=#= End test: Activate a ticket - OK (0) =#=#=#= - * Passed: crm_ticket - Activate a ticket -+=#=#=#= Begin test: Delete ticket standby state =#=#=#= - Deleted ticketA state attribute: name=standby -- -+ - - -- -+ -+ -+ - - - - - -- -- -- -+ -+ -+ - - - -@@ -1209,20 +1798,640 @@ Deleted ticketA state attribute: name=standby - - - -+ - - - - -- -- -- -- -+ -+ -+ -+ - - -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ - - - - - - -+=#=#=#= End test: Delete ticket standby state - OK (0) =#=#=#= - * Passed: crm_ticket - Delete ticket standby state -+=#=#=#= Begin test: Ban a resource on unknown node =#=#=#= -+Error performing operation: node 'host1' is unknown -+Error performing operation: No such device or address -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+=#=#=#= End test: Ban a resource on unknown node - No such device or address (6) =#=#=#= -+* Passed: crm_resource - Ban a resource on unknown node -+=#=#=#= Begin test: Create two more nodes and bring them online =#=#=#= -+ -+Current cluster status: -+Online: [ node1 ] -+ -+ dummy (ocf::pacemaker:Dummy): Started node1 -+ Fence (stonith:fence_true): Started node1 -+ -+Performing requested modifications -+ + Bringing node node2 online -+ + Bringing node node3 online -+ -+Transition Summary: -+ * Move Fence (Started node1 -> node2) -+ -+Executing cluster transition: -+ * Resource action: dummy monitor on node3 -+ * Resource action: dummy monitor on node2 -+ * Resource action: Fence monitor on node3 -+ * Resource action: Fence monitor on node2 -+ * Pseudo action: probe_complete -+ * Resource action: Fence stop on node1 -+ * Pseudo action: all_stopped -+ * Resource action: Fence start on node2 -+ -+Revised cluster status: -+Online: [ node1 node2 node3 ] -+ -+ dummy (ocf::pacemaker:Dummy): Started node1 -+ Fence (stonith:fence_true): Started node2 -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+=#=#=#= End test: Create two more nodes and bring them online - OK (0) =#=#=#= -+* Passed: crm_simulate - Create two more nodes and bring them online -+=#=#=#= Begin test: Ban dummy from node1 =#=#=#= -+WARNING: Creating rsc_location constraint 'cli-ban-dummy-on-node1' with a score of -INFINITY for resource dummy on node1. -+ This will prevent dummy from running on node1 until the constraint is removed using the 'crm_resource --clear' command or manually with cibadmin -+ This will be the case even if node1 is the last node in the cluster -+ This message can be disabled with --quiet -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+=#=#=#= End test: Ban dummy from node1 - OK (0) =#=#=#= -+* Passed: crm_resource - Ban dummy from node1 -+=#=#=#= Begin test: Ban dummy from node2 =#=#=#= -+WARNING: Creating rsc_location constraint 'cli-ban-dummy-on-node2' with a score of -INFINITY for resource dummy on node2. -+ This will prevent dummy from running on node2 until the constraint is removed using the 'crm_resource --clear' command or manually with cibadmin -+ This will be the case even if node2 is the last node in the cluster -+ This message can be disabled with --quiet -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+=#=#=#= End test: Ban dummy from node2 - OK (0) =#=#=#= -+* Passed: crm_resource - Ban dummy from node2 -+=#=#=#= Begin test: Relocate resources due to ban =#=#=#= -+ -+Current cluster status: -+Online: [ node1 node2 node3 ] -+ -+ dummy (ocf::pacemaker:Dummy): Started node1 -+ Fence (stonith:fence_true): Started node2 -+ -+Transition Summary: -+ * Move dummy (Started node1 -> node3) -+ -+Executing cluster transition: -+ * Resource action: dummy stop on node1 -+ * Pseudo action: all_stopped -+ * Resource action: dummy start on node3 -+ -+Revised cluster status: -+Online: [ node1 node2 node3 ] -+ -+ dummy (ocf::pacemaker:Dummy): Started node3 -+ Fence (stonith:fence_true): Started node2 -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+=#=#=#= End test: Relocate resources due to ban - OK (0) =#=#=#= -+* Passed: crm_simulate - Relocate resources due to ban -+=#=#=#= Begin test: Move dummy to node1 =#=#=#= -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+=#=#=#= End test: Move dummy to node1 - OK (0) =#=#=#= -+* Passed: crm_resource - Move dummy to node1 -+=#=#=#= Begin test: Clear implicit constraints for dummy on node2 =#=#=#= -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+=#=#=#= End test: Clear implicit constraints for dummy on node2 - OK (0) =#=#=#= -+* Passed: crm_resource - Clear implicit constraints for dummy on node2 -diff --git a/tools/regression.sh b/tools/regression.sh -index 3b8d3d4..aa478dc 100755 ---- a/tools/regression.sh -+++ b/tools/regression.sh -@@ -6,28 +6,30 @@ num_errors=0 - num_passed=0 - GREP_OPTIONS= - --function assert() { -- rc=$1; shift -+function test_assert() { - target=$1; shift -- app=$1; shift -- msg=$1; shift - cib=$1; shift -+ app=`echo "$cmd" | sed 's/\ .*//'` -+ printf "* Running: $app - $desc\n" 1>&2 - -- if [ x$cib = x0 ]; then -- : nothing -- else -+ printf "=#=#=#= Begin test: $desc =#=#=#=\n" -+ eval $VALGRIND_CMD $cmd 2>&1 -+ rc=$? -+ -+ if [ x$cib != x0 ]; then - cibadmin -Q - fi - -+ printf "=#=#=#= End test: $desc - `crm_error $rc` ($rc) =#=#=#=\n" -+ - if [ $rc -ne $target ]; then - num_errors=`expr $num_errors + 1` -- printf "* Failed (rc=%.3d): %-14s - %s\n" $rc $app "$msg" -- printf "* Failed (rc=%.3d): %-14s - %s\n" $rc $app "$msg" 1>&2 -+ printf "* Failed (rc=%.3d): %-14s - %s\n" $rc $app "$desc" -+ printf "* Failed (rc=%.3d): %-14s - %s\n" $rc $app "$desc" 1>&2 - return - exit 1 - else -- printf "* Passed: %-14s - %s\n" $app "$msg" -- printf "* Passed: %-14s - %s\n" $app "$msg" 1>&2 -+ printf "* Passed: %-14s - %s\n" $app "$desc" - - num_passed=`expr $num_passed + 1` - fi -@@ -44,9 +46,9 @@ VALGRIND_CMD= - while test "$done" = "0"; do - case "$1" in - -V|--verbose) verbose=1; shift;; -- -v|--valgrind) -+ -v|--valgrind) - export G_SLICE=always-malloc -- VALGRIND_CMD="valgrind -q --show-reachable=no --leak-check=full --trace-children=no --time-stamp=yes --num-callers=20 --suppressions=$test_home/cli.supp" -+ VALGRIND_CMD="valgrind -q --show-reachable=no --leak-check=full --trace-children=no --time-stamp=yes --num-callers=20 --suppressions=/usr/share/pacemaker/tests/valgrind-pcmk.suppressions" - shift;; - -x) set -x; shift;; - -s) do_save=1; shift;; -@@ -66,164 +68,263 @@ function test_tools() { - export CIB_shadow_dir=$test_home - $VALGRIND_CMD crm_shadow --batch --force --create-empty $shadow 2>&1 - export CIB_shadow=$shadow -- $VALGRIND_CMD cibadmin -Q 2>&1 -- -- $VALGRIND_CMD cibadmin -E 2>&1 -- assert $? 22 cibadmin "Require --force for CIB erasure" -- -- $VALGRIND_CMD cibadmin -E --force -- assert $? 0 cibadmin "Allow CIB erasure with --force" -- -- $VALGRIND_CMD cibadmin -Q > /tmp/$$.existing.xml -- assert $? 0 cibadmin "Query CIB" -- -- $VALGRIND_CMD crm_attribute -n cluster-delay -v 60s -- assert $? 0 crm_attribute "Set cluster option" -- -- $VALGRIND_CMD cibadmin -Q -o crm_config | grep cib-bootstrap-options-cluster-delay -- assert $? 0 cibadmin "Query new cluster option" -- -- $VALGRIND_CMD cibadmin -Q -o crm_config > /tmp/$$.opt.xml -- assert $? 0 cibadmin "Query cluster options" -- -- $VALGRIND_CMD cibadmin -D -o crm_config --xml-text '' -- assert $? 0 cibadmin "Delete nvpair" -- -- $VALGRIND_CMD cibadmin -C -o crm_config --xml-file /tmp/$$.opt.xml 2>&1 -- assert $? 76 cibadmin "Create operaton should fail with: -76, The object already exists" -- -- $VALGRIND_CMD cibadmin -M -o crm_config --xml-file /tmp/$$.opt.xml -- assert $? 0 cibadmin "Modify cluster options section" -- -- $VALGRIND_CMD cibadmin -Q -o crm_config | grep cib-bootstrap-options-cluster-delay -- assert $? 0 cibadmin "Query updated cluster option" -- -- $VALGRIND_CMD crm_attribute -n cluster-delay -v 40s -s duplicate -- assert $? 0 crm_attribute "Set duplicate cluster option" -- -- $VALGRIND_CMD crm_attribute -n cluster-delay -v 30s -- assert $? 234 crm_attribute "Setting multiply defined cluster option should fail with -216, Could not set cluster option" -- -- $VALGRIND_CMD crm_attribute -n cluster-delay -v 30s -s duplicate -- assert $? 0 crm_attribute "Set cluster option with -s" -- -- $VALGRIND_CMD crm_attribute -n cluster-delay -D -i cib-bootstrap-options-cluster-delay -- assert $? 0 crm_attribute "Delete cluster option with -i" -- -- $VALGRIND_CMD cibadmin -C -o nodes --xml-text '' -- assert $? 0 cibadmin "Create node entry" -- -- $VALGRIND_CMD cibadmin -C -o status --xml-text '' -- assert $? 0 cibadmin "Create node status entry" -- -- $VALGRIND_CMD crm_attribute -n ram -v 1024M -U clusterNode-UNAME -t nodes -- assert $? 0 crm_attribute "Create node attribute" -- -- $VALGRIND_CMD cibadmin -Q -o nodes | grep clusterNode-UUID-ram -- assert $? 0 cibadmin "Query new node attribute" -- -- $VALGRIND_CMD cibadmin -Q | cibadmin -5 -p 2>&1 > /dev/null -- assert $? 0 cibadmin "Digest calculation" -- -- # This update will fail because it has version numbers -- $VALGRIND_CMD cibadmin -R --xml-file /tmp/$$.existing.xml 2>&1 -- assert $? 237 cibadmin "Replace operation should fail with: -45, Update was older than existing configuration" - -- crm_standby -N clusterNode-UNAME -G -- assert $? 0 crm_standby "Default standby value" -+ desc="Validate CIB" -+ cmd="cibadmin -Q" -+ test_assert 0 -+ -+ desc="Require --force for CIB erasure" -+ cmd="cibadmin -E" -+ test_assert 22 -+ -+ desc="Allow CIB erasure with --force" -+ cmd="cibadmin -E --force" -+ test_assert 0 - -- crm_standby -N clusterNode-UNAME -v true -- assert $? 0 crm_standby "Set standby status" -+ desc="Query CIB" -+ cmd="cibadmin -Q > /tmp/$$.existing.xml" -+ test_assert 0 - -- crm_standby -N clusterNode-UNAME -G -- assert $? 0 crm_standby "Query standby value" -- -- crm_standby -N clusterNode-UNAME -D 2>&1 -- assert $? 0 crm_standby "Delete standby value" -- -- $VALGRIND_CMD cibadmin -C -o resources --xml-text '' -- assert $? 0 cibadmin "Create a resource" -+ desc="Set cluster option" -+ cmd="crm_attribute -n cluster-delay -v 60s" -+ test_assert 0 - -- $VALGRIND_CMD crm_resource -r dummy --meta -p is-managed -v false -- assert $? 0 crm_resource "Create a resource meta attribute" -+ desc="Query new cluster option" -+ cmd="cibadmin -Q -o crm_config | grep cib-bootstrap-options-cluster-delay" -+ test_assert 0 - -- $VALGRIND_CMD crm_resource -r dummy --meta -g is-managed -- assert $? 0 crm_resource "Query a resource meta attribute" -+ desc="Query cluster options" -+ cmd="cibadmin -Q -o crm_config > /tmp/$$.opt.xml" -+ test_assert 0 - -- $VALGRIND_CMD crm_resource -r dummy --meta -d is-managed -- assert $? 0 crm_resource "Remove a resource meta attribute" -+ desc="Set no-quorum policy" -+ cmd="crm_attribute -n no-quorum-policy -v ignore" -+ test_assert 0 - -- $VALGRIND_CMD crm_resource -r dummy -p delay -v 10s -- assert $? 0 crm_resource "Create a resource attribute" -+ desc="Delete nvpair" -+ cmd="cibadmin -D -o crm_config --xml-text ''" -+ test_assert 0 - -- $VALGRIND_CMD crm_resource -L -- assert $? 0 crm_resource "List the configured resources" -+ desc="Create operaton should fail" -+ cmd="cibadmin -C -o crm_config --xml-file /tmp/$$.opt.xml" -+ test_assert 76 - -- crm_failcount -r dummy -v 10 -N clusterNode-UNAME 2>&1 -- assert $? 0 crm_resource "Set a resource's fail-count" -+ desc="Modify cluster options section" -+ cmd="cibadmin -M -o crm_config --xml-file /tmp/$$.opt.xml" -+ test_assert 0 - -- $VALGRIND_CMD crm_resource -r dummy -M 2>&1 -- assert $? 234 crm_resource "Require a destination when migrating a resource that is stopped" -+ desc="Query updated cluster option" -+ cmd="cibadmin -Q -o crm_config | grep cib-bootstrap-options-cluster-delay" -+ test_assert 0 - -- $VALGRIND_CMD crm_resource -r dummy -M -N i.dont.exist 2>&1 -- assert $? 250 crm_resource "Don't support migration to non-existant locations" -+ desc="Set duplicate cluster option" -+ cmd="crm_attribute -n cluster-delay -v 40s -s duplicate" -+ test_assert 0 - -- $VALGRIND_CMD crm_resource -r dummy -M -N clusterNode-UNAME -- assert $? 0 crm_resource "Migrate a resource" -+ desc="Setting multiply defined cluster option should fail" -+ cmd="crm_attribute -n cluster-delay -v 30s" -+ test_assert 76 - -- $VALGRIND_CMD crm_resource -r dummy -U -- assert $? 0 crm_resource "Un-migrate a resource" -+ desc="Set cluster option with -s" -+ cmd="crm_attribute -n cluster-delay -v 30s -s duplicate" -+ test_assert 0 - -- $VALGRIND_CMD crm_ticket -t ticketA -G granted -d false -- assert $? 0 crm_ticket "Default ticket granted state" -+ desc="Delete cluster option with -i" -+ cmd="crm_attribute -n cluster-delay -D -i cib-bootstrap-options-cluster-delay" -+ test_assert 0 - -- $VALGRIND_CMD crm_ticket -t ticketA -r --force -- assert $? 0 crm_ticket "Set ticket granted state" -+ desc="Create node1 and bring it online" -+ cmd="crm_simulate --live-check --in-place --node-up=node1" -+ test_assert 0 -+ -+ desc="Create node attribute" -+ cmd="crm_attribute -n ram -v 1024M -U node1 -t nodes" -+ test_assert 0 -+ -+ desc="Query new node attribute" -+ cmd="cibadmin -Q -o nodes | grep node1-ram" -+ test_assert 0 -+ -+ desc="Digest calculation" -+ cmd="cibadmin -Q | cibadmin -5 -p 2>&1 > /dev/null" -+ test_assert 0 -+ -+ # This update will fail because it has version numbers -+ desc="Replace operation should fail" -+ cmd="cibadmin -R --xml-file /tmp/$$.existing.xml" -+ test_assert 205 -+ -+ desc="Default standby value" -+ cmd="crm_standby -N node1 -G" -+ test_assert 0 -+ -+ desc="Set standby status" -+ cmd="crm_standby -N node1 -v true" -+ test_assert 0 -+ -+ desc="Query standby value" -+ cmd="crm_standby -N node1 -G" -+ test_assert 0 -+ -+ desc="Delete standby value" -+ cmd="crm_standby -N node1 -D" -+ test_assert 0 -+ -+ desc="Create a resource" -+ cmd="cibadmin -C -o resources --xml-text ''" -+ test_assert 0 -+ -+ desc="Create a resource meta attribute" -+ cmd="crm_resource -r dummy --meta -p is-managed -v false" -+ test_assert 0 -+ -+ desc="Query a resource meta attribute" -+ cmd="crm_resource -r dummy --meta -g is-managed" -+ test_assert 0 -+ -+ desc="Remove a resource meta attribute" -+ cmd="crm_resource -r dummy --meta -d is-managed" -+ test_assert 0 -+ -+ desc="Create a resource attribute" -+ cmd="crm_resource -r dummy -p delay -v 10s" -+ test_assert 0 -+ -+ desc="List the configured resources" -+ cmd="crm_resource -L" -+ test_assert 0 -+ -+ desc="Set a resource's fail-count" -+ cmd="crm_failcount -r dummy -v 10 -N node1" -+ test_assert 0 -+ -+ desc="Require a destination when migrating a resource that is stopped" -+ cmd="crm_resource -r dummy -M" -+ test_assert 22 -+ -+ desc="Don't support migration to non-existant locations" -+ cmd="crm_resource -r dummy -M -N i.dont.exist" -+ test_assert 6 -+ -+ desc="Create a fencing resource" -+ cmd="cibadmin -C -o resources --xml-text ''" -+ test_assert 0 -+ -+ desc="Bring resources online" -+ cmd="crm_simulate --live-check --in-place -S" -+ test_assert 0 -+ -+ desc="Try to move a resource to its existing location" -+ cmd="crm_resource -r dummy --move --host node1" -+ test_assert 22 -+ -+ desc="Move a resource from its existing location" -+ cmd="crm_resource -r dummy --move" -+ test_assert 0 -+ -+ desc="Clear out constraints generated by --move" -+ cmd="crm_resource -r dummy --clear" -+ test_assert 0 -+ -+ desc="Default ticket granted state" -+ cmd="crm_ticket -t ticketA -G granted -d false" -+ test_assert 0 -+ -+ desc="Set ticket granted state" -+ cmd="crm_ticket -t ticketA -r --force" -+ test_assert 0 -+ -+ desc="Query ticket granted state" -+ cmd="crm_ticket -t ticketA -G granted" -+ test_assert 0 -+ -+ desc="Delete ticket granted state" -+ cmd="crm_ticket -t ticketA -D granted --force" -+ test_assert 0 -+ -+ desc="Make a ticket standby" -+ cmd="crm_ticket -t ticketA -s" -+ test_assert 0 -+ -+ desc="Query ticket standby state" -+ cmd="crm_ticket -t ticketA -G standby" -+ test_assert 0 -+ -+ desc="Activate a ticket" -+ cmd="crm_ticket -t ticketA -a" -+ test_assert 0 -+ -+ desc="Delete ticket standby state" -+ cmd="crm_ticket -t ticketA -D standby" -+ test_assert 0 -+ -+ desc="Ban a resource on unknown node" -+ cmd="crm_resource -r dummy -B -N host1" -+ test_assert 6 -+ -+ desc="Create two more nodes and bring them online" -+ cmd="crm_simulate --live-check --in-place --node-up=node2 --node-up=node3" -+ test_assert 0 -+ -+ desc="Ban dummy from node1" -+ cmd="crm_resource -r dummy -B -N node1" -+ test_assert 0 - -- $VALGRIND_CMD crm_ticket -t ticketA -G granted -- assert $? 0 crm_ticket "Query ticket granted state" -- -- $VALGRIND_CMD crm_ticket -t ticketA -D granted --force -- assert $? 0 crm_ticket "Delete ticket granted state" -+ desc="Ban dummy from node2" -+ cmd="crm_resource -r dummy -B -N node2" -+ test_assert 0 - -- $VALGRIND_CMD crm_ticket -t ticketA -s -- assert $? 0 crm_ticket "Make a ticket standby" -+ desc="Relocate resources due to ban" -+ cmd="crm_simulate --live-check --in-place -S" -+ test_assert 0 - -- $VALGRIND_CMD crm_ticket -t ticketA -G standby -- assert $? 0 crm_ticket "Query ticket standby state" -- -- $VALGRIND_CMD crm_ticket -t ticketA -a -- assert $? 0 crm_ticket "Activate a ticket" -+ desc="Move dummy to node1" -+ cmd="crm_resource -r dummy -M -N node1" -+ test_assert 0 - -- $VALGRIND_CMD crm_ticket -t ticketA -D standby -- assert $? 0 crm_ticket "Delete ticket standby state" -+ desc="Clear implicit constraints for dummy on node2" -+ cmd="crm_resource -r dummy -U -N node2" -+ test_assert 0 - } - - function test_date() { --# $VALGRIND_CMD cibadmin -Q - for y in 06 07 08 09 10 11 12 13 14 15 16 17 18; do -- $VALGRIND_CMD iso8601 -d "20$y-W01-7 00Z" -- $VALGRIND_CMD iso8601 -d "20$y-W01-7 00Z" -W -E "20$y-W01-7 00:00:00Z" -- assert $? 0 iso8601 "20$y-W01-7" 0 -- $VALGRIND_CMD iso8601 -d "20$y-W01-1 00Z" -- $VALGRIND_CMD iso8601 -d "20$y-W01-1 00Z" -W -E "20$y-W01-1 00:00:00Z" -- assert $? 0 iso8601 "20$y-W01-1" 0 -+ desc="20$y-W01-7" -+ cmd="iso8601 -d '20$y-W01-7 00Z'" -+ test_assert 0 0 -+ -+ desc="20$y-W01-7 - round-trip" -+ cmd="iso8601 -d '20$y-W01-7 00Z' -W -E '20$y-W01-7 00:00:00Z'" -+ test_assert 0 0 -+ -+ desc="20$y-W01-1" -+ cmd="iso8601 -d '20$y-W01-1 00Z'" -+ test_assert 0 0 -+ -+ desc="20$y-W01-1 - round-trip" -+ cmd="iso8601 -d '20$y-W01-1 00Z' -W -E '20$y-W01-1 00:00:00Z'" -+ test_assert 0 0 - done - -- $VALGRIND_CMD iso8601 -d "2009-W53-7 00:00:00Z" -W -E "2009-W53-7 00:00:00Z" -- assert $? 0 iso8601 "2009-W53-07" 0 -+ desc="2009-W53-07" -+ cmd="iso8601 -d '2009-W53-7 00:00:00Z' -W -E '2009-W53-7 00:00:00Z'" -+ test_assert 0 0 - -- $VALGRIND_CMD iso8601 -d "2009-01-31 00:00:00Z" -D "P1M" -E "2009-02-28 00:00:00Z" -- assert $? 0 iso8601 "2009-01-31 + 1 Month" 0 -+ desc="2009-01-31 + 1 Month" -+ cmd="iso8601 -d '2009-01-31 00:00:00Z' -D P1M -E '2009-02-28 00:00:00Z'" -+ test_assert 0 0 - -- $VALGRIND_CMD iso8601 -d "2009-01-31 00:00:00Z" -D "P2M" -E "2009-03-31 00:00:00Z" -- assert $? 0 iso8601 "2009-01-31 + 2 Months" 0 -+ desc="2009-01-31 + 2 Months" -+ cmd="iso8601 -d '2009-01-31 00:00:00Z' -D P2M -E '2009-03-31 00:00:00Z'" -+ test_assert 0 0 - -- $VALGRIND_CMD iso8601 -d "2009-01-31 00:00:00Z" -D "P3M" -E "2009-04-30 00:00:00Z" -- assert $? 0 iso8601 "2009-01-31 + 3 Months" 0 -+ desc="2009-01-31 + 3 Months" -+ cmd="iso8601 -d '2009-01-31 00:00:00Z' -D P3M -E '2009-04-30 00:00:00Z'" -+ test_assert 0 0 - -- $VALGRIND_CMD iso8601 -d "2009-03-31 00:00:00Z" -D "P-1M" -E "2009-02-28 00:00:00Z" -- assert $? 0 iso8601 "2009-03-31 - 1 Month" 0 -+ desc="2009-03-31 - 1 Month" -+ cmd="iso8601 -d '2009-03-31 00:00:00Z' -D P-1M -E '2009-02-28 00:00:00Z'" -+ test_assert 0 0 - } - - echo "Testing dates" -@@ -231,24 +332,30 @@ test_date > $test_home/regression.out - echo "Testing tools" - test_tools >> $test_home/regression.out - sed -i.sed 's/cib-last-written.*>/>/' $test_home/regression.out -+sed -i.sed 's/ last-run=\"[0-9]*\"//' $test_home/regression.out -+sed -i.sed 's/ last-rc-change=\"[0-9]*\"//' $test_home/regression.out - - if [ $do_save = 1 ]; then - cp $test_home/regression.out $test_home/regression.exp - fi - -+failed=0 -+ -+echo -e "\n\nResults" -+diff -wu $test_home/regression.exp $test_home/regression.out -+if [ $? != 0 ]; then -+ failed=1 -+fi -+ -+echo -e "\n\nSummary" - grep -e "^*" $test_home/regression.out - - if [ $num_errors != 0 ]; then - echo $num_errors tests failed -- diff -u $test_home/regression.exp $test_home/regression.out - exit 1 --fi -- --diff -u $test_home/regression.exp $test_home/regression.out --if [ $? != 0 ]; then -+elif [ $failed = 1 ]; then - echo $num_passed tests passed but diff failed - exit 2 -- - else - echo $num_passed tests passed - exit 0 -diff --git a/tools/report.collector b/tools/report.collector -index e4d1013..b13c3e7 100644 ---- a/tools/report.collector -+++ b/tools/report.collector -@@ -738,7 +738,12 @@ for l in $logfiles $EXTRA_LOGS; do - fi - done - --if [ "$REPORT_MASTER" != "$REPORT_TARGET" ]; then -+if [ -e $REPORT_HOME/.env ]; then -+ debug "Localhost: $REPORT_MASTER $REPORT_TARGET" -+ # Need to send something back or tar on the caller will complain -+ (cd $REPORT_HOME && tar cf - .env) -+ -+elif [ "$REPORT_MASTER" != "$REPORT_TARGET" ]; then - debug "Streaming report back to $REPORT_MASTER" - (cd $REPORT_HOME && tar cf - $REPORT_TARGET) - if [ "$REMOVE" = "1" ]; then -diff --git a/tools/report.common b/tools/report.common -deleted file mode 100644 -index 0e3b945..0000000 ---- a/tools/report.common -+++ /dev/null -@@ -1,742 +0,0 @@ -- # Copyright (C) 2007 Dejan Muhamedagic -- # Almost everything as part of hb_report -- # Copyright (C) 2010 Andrew Beekhof -- # Cleanups, refactoring, extensions -- # -- # -- # This program is free software; you can redistribute it and/or -- # modify it under the terms of the GNU General Public -- # License as published by the Free Software Foundation; either -- # version 2.1 of the License, or (at your option) any later version. -- # -- # This software is distributed in the hope that it will be useful, -- # but WITHOUT ANY WARRANTY; without even the implied warranty of -- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -- # General Public License for more details. -- # -- # You should have received a copy of the GNU General Public -- # License along with this library; if not, write to the Free Software -- # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -- # -- --host=`uname -n` --shorthost=`echo $host | sed s:\\\\..*::` --if [ -z $verbose ]; then -- verbose=0 --fi -- --# Target Files --EVENTS_F=events.txt --ANALYSIS_F=analysis.txt --DESCRIPTION_F=description.txt --HALOG_F=cluster-log.txt --BT_F=backtraces.txt --SYSINFO_F=sysinfo.txt --SYSSTATS_F=sysstats.txt --DLM_DUMP_F=dlm_dump.txt --CRM_MON_F=crm_mon.txt --MEMBERSHIP_F=members.txt --HB_UUID_F=hb_uuid.txt --HOSTCACHE=hostcache --CRM_VERIFY_F=crm_verify.txt --PERMISSIONS_F=permissions.txt --CIB_F=cib.xml --CIB_TXT_F=cib.txt -- --EVENT_PATTERNS=" --state do_state_transition --membership pcmk_peer_update.*(lost|memb): --quorum crmd.*crm_update_quorum|crmd.*ais.disp.*quorum.(lost|ac?quir) --pause Process.pause.detected --resources lrmd.*rsc:(start|stop) --stonith te_fence_node|stonith-ng.*log_oper.*report|stonithd.*(requests|(Succeeded|Failed).to.STONITH|result=) --start_stop sutdown.decision|Starting.heartbeat|Corosync.Cluster.Engine|corosync.*Initializing.transport|Executive.Service.RELEASE|crm_shutdown:.Requesting.shutdown|pcmk_shutdown:.Shutdown.complete --" -- --PACKAGES="pacemaker pacemaker-libs libpacemaker3 --pacemaker-pygui pacemaker-pymgmt pymgmt-client --openais libopenais2 libopenais3 corosync libcorosync4 --resource-agents cluster-glue-libs cluster-glue libglue2 ldirectord --heartbeat heartbeat-common heartbeat-resources libheartbeat2 --ocfs2-tools ocfs2-tools-o2cb ocfs2console --ocfs2-kmp-default ocfs2-kmp-pae ocfs2-kmp-xen ocfs2-kmp-debug ocfs2-kmp-trace --drbd drbd-kmp-xen drbd-kmp-pae drbd-kmp-default drbd-kmp-debug drbd-kmp-trace --drbd-heartbeat drbd-pacemaker drbd-utils drbd-bash-completion drbd-xen --lvm2 lvm2-clvm cmirrord --libdlm libdlm2 libdlm3 --hawk ruby lighttpd --kernel-default kernel-pae kernel-xen --glibc --" -- --# --# keep the user posted --# -- --log() { -- printf "%-10s $*\n" "$shorthost:" 1>&2 --} -- --debug() { -- if [ $verbose -gt 0 ]; then -- log "Debug: $*" -- fi --} -- --info() { -- log "$*" --} -- --warning() { -- log "WARN: $*" --} -- --fatal() { -- log "ERROR: $*" -- exit 1 --} -- --detect_host() { -- -- depth="-maxdepth 5" -- local_state_dir=/var -- -- if [ -d $local_state_dir/run ]; then -- CRM_STATE_DIR=$local_state_dir/run/crm -- else -- info "Searching for where Pacemaker keeps runtime data... this may take a while" -- for d in `find / $depth -type d -name run`; do -- local_state_dir=`dirname $d` -- CRM_STATE_DIR=$d/crm -- break -- done -- info "Found: $CRM_STATE_DIR" -- fi -- debug "Machine runtime directory: $local_state_dir" -- debug "Pacemaker runtime data located in: $CRM_STATE_DIR" -- -- CRM_DAEMON_DIR= -- for p in /usr /usr/local /opt/local; do -- for d in libexec lib64 lib; do -- if [ -e $p/$d/pacemaker/pengine ]; then -- CRM_DAEMON_DIR=$p/$d/pacemaker -- break -- elif [ -e $p/$d/heartbeat/pengine ]; then -- CRM_DAEMON_DIR=$p/$d/heartbeat -- break -- fi -- done -- done -- -- if [ ! -d $CRM_DAEMON_DIR ]; then -- info "Searching for where Pacemaker daemons live... this may take a while" -- for f in `find / $depth -type f -name pengine`; do -- CRM_DAEMON_DIR=`dirname $f` -- break -- done -- info "Found: $CRM_DAEMON_DIR" -- fi -- -- if [ -z $CRM_DAEMON_DIR ]; then -- fatal "Non-standard Pacemaker installation: daemons not found" -- else -- debug "Pacemaker daemons located under: $CRM_DAEMON_DIR" -- fi -- -- CRM_CONFIG_DIR= -- for d in pacemaker/cib heartbeat/crm; do -- if [ -f $local_state_dir/lib/$d/cib.xml ]; then -- CRM_CONFIG_DIR=$local_state_dir/lib/$d -- break -- fi -- done -- -- if [ ! -d $CRM_CONFIG_DIR ]; then -- info "Detecting where Pacemaker keeps config information... this may take a while" -- for f in `find / $depth -type f -name cib.xml`; do -- CRM_CONFIG_DIR=`dirname $f` -- break -- done -- info "Found: $CRM_CONFIG_DIR" -- fi -- if [ -z $CRM_CONFIG_DIR ]; then -- warning "Non-standard Pacemaker installation: config not found" -- else -- debug "Pacemaker config files located in: $CRM_CONFIG_DIR" -- fi -- -- # Assume new layout -- # $local_state_dir/lib/pacemaker/(cib,pengine,blackbox,cores) -- config_root=`dirname $CRM_CONFIG_DIR` -- -- # Older versions had none -- BLACKBOX_DIR=$config_root/blackbox -- debug "Pacemaker blackboxes (if any) located in: $BLACKBOX_DIR" -- -- PE_STATE_DIR=$config_root/pengine -- if [ ! -d $PE_STATE_DIR ]; then -- info "Detecting where Pacemaker keeps Policy Engine inputs... this may take a while" -- for d in `find / $depth -type d -name pengine`; do -- PE_STATE_DIR=$d -- break -- done -- info "Found: $PE_STATE_DIR" -- fi -- if [ -z $PE_STATE_DIR ]; then -- fatal "Non-standard Pacemaker installation: Policy Engine directory not found" -- else -- debug "PE files located in: $PE_STATE_DIR" -- fi -- -- HA_STATE_DIR=$local_state_dir/lib/heartbeat -- debug "Assuming Heartbeat state files, if any, are located in: $HA_STATE_DIR" -- -- CRM_CORE_DIRS="" -- for d in $config_root/cores $HA_STATE_DIR/cores $local_state_dir/lib/corosync $local_state_dir/lib/openais; do -- if [ -d $d ]; then -- CRM_CORE_DIRS="$CRM_CORE_DIRS $d" -- fi -- done -- debug "Core files located under: $CRM_CORE_DIRS" --} -- --time2str() { -- perl -e "use POSIX; print strftime('%x %X',localtime($1));" --} -- --get_time() { -- perl -e "\$time=\"$*\";" -e ' -- eval "use Date::Parse"; -- if (index($time, ":") < 0) { -- -- } elsif (!$@) { -- print str2time($time); -- } else { -- eval "use Date::Manip"; -- if (!$@) { -- print UnixDate(ParseDateString($time), "%s"); -- } -- } -- ' --} -- --get_time_() { -- warning "Unknown time format used by: $*" --} -- --get_time_syslog() { -- awk '{print $1,$2,$3}' --} -- --get_time_legacy() { -- awk '{print $2}' | sed 's/_/ /' --} -- --get_time_format_for_string() { -- l="$*" -- t=$(get_time `echo $l | get_time_syslog`) -- if [ "x$t" != x ]; then -- echo syslog -- return -- fi -- -- t=$(get_time `echo $l | get_time_legacy`) -- if [ "x$t" != x ]; then -- echo legacy -- return -- fi --} -- --get_time_format() { -- t=0 l="" func="" -- trycnt=10 -- while [ $trycnt -gt 0 ] && read l; do -- func=$(get_time_format_for_string $l) -- if [ "x$func" != x ]; then -- break -- fi -- trycnt=$(($trycnt-1)) -- done -- #debug "Logfile uses the $func time format" -- echo $func --} -- --get_first_time() { -- l="" -- format=$1 -- while read l; do -- t=$(echo $l | get_time_$format) -- ts=$(get_time $t) -- if [ "x$ts" != x ]; then -- echo "$ts" -- return -- fi -- done --} -- --get_last_time() { -- l="" -- best=`date +%s` # Now -- format=$1 -- while read l; do -- t=$(echo $l | get_time_$format) -- ts=$(get_time $t) -- if [ "x$ts" != x ]; then -- best=$ts -- fi -- done -- echo $best --} -- --linetime() { -- l=`tail -n +$2 $1 | grep ":[0-5][0-9]:" | head -1` -- format=`get_time_format_for_string $l` -- t=`echo $l | get_time_$format` -- get_time "$t" --} -- --# Find pattern in a logfile somewhere --# Return $max ordered results by age (newest first) --findmsg() { -- max=$1 -- pattern=$2 -- logfiles="" -- syslogdirs="/var/log /var/logs /var/syslog /var/adm /var/log/ha /var/log/cluster" -- -- for d in $syslogdirs; do -- if [ -d $d ]; then -- logfiles="$logfiles `grep -l -e "$pattern" $d/*`" -- fi -- done 2>/dev/null -- -- if [ "x$logfiles" != "x" ]; then -- list=`ls -t $logfiles | head -n $max | tr '\n' ' '` -- echo $list -- debug "Pattern \'$pattern\' found in: [ $list ]" -- else -- debug "Pattern \'$pattern\' not found anywhere" -- fi --} -- --node_events() { -- if [ -e $1 ]; then -- Epatt=`echo "$EVENT_PATTERNS" | -- while read title p; do [ -n "$p" ] && echo -n "|$p"; done | -- sed 's/.//' -- ` -- grep -E "$Epatt" $1 -- fi --} -- --pickfirst() { -- for x; do -- which $x >/dev/null 2>&1 && { -- echo $x -- return 0 -- } -- done -- return 1 --} -- --shrink() { -- olddir=$PWD -- dir=`dirname $1` -- base=`basename $1` -- -- target=$1.tar -- tar_options="cf" -- -- variant=`pickfirst bzip2 gzip false` -- case $variant in -- bz*) -- tar_options="jcf" -- target="$target.bz2" -- ;; -- gz*) -- tar_options="zcf" -- target="$target.gz" -- ;; -- *) -- warning "Could not find a compression program, the resulting tarball may be huge" -- ;; -- esac -- -- if [ -e $target ]; then -- fatal "Destination $target already exists, specify an alternate name with --dest" -- fi -- -- cd $dir >/dev/null 2>&1 -- tar $tar_options $target $base >/dev/null 2>&1 -- cd $olddir >/dev/null 2>&1 -- -- echo $target --} -- --findln_by_time() { -- local logf=$1 -- local tm=$2 -- local first=1 -- local last=`wc -l < $logf` -- while [ $first -le $last ]; do -- mid=$((($last+$first)/2)) -- trycnt=10 -- while [ $trycnt -gt 0 ]; do -- tmid=`linetime $logf $mid` -- [ "$tmid" ] && break -- warning "cannot extract time: $logf:$mid; will try the next one" -- trycnt=$(($trycnt-1)) -- # shift the whole first-last segment -- first=$(($first-1)) -- last=$(($last-1)) -- mid=$((($last+$first)/2)) -- done -- if [ -z "$tmid" ]; then -- warning "giving up on log..." -- return -- fi -- if [ $tmid -gt $tm ]; then -- last=$(($mid-1)) -- elif [ $tmid -lt $tm ]; then -- first=$(($mid+1)) -- else -- break -- fi -- done -- echo $mid --} -- --dumplog() { -- local logf=$1 -- local from_line=$2 -- local to_line=$3 -- [ "$from_line" ] || -- return -- tail -n +$from_line $logf | -- if [ "$to_line" ]; then -- head -$(($to_line-$from_line+1)) -- else -- cat -- fi --} -- --# --# find log/set of logs which are interesting for us --# --# --# find log slices --# -- --find_decompressor() { -- if echo $1 | grep -qs 'bz2$'; then -- echo "bzip2 -dc" -- elif echo $1 | grep -qs 'gz$'; then -- echo "gzip -dc" -- else -- echo "cat" -- fi --} --# --# check if the log contains a piece of our segment --# --is_our_log() { -- local logf=$1 -- local from_time=$2 -- local to_time=$3 -- -- local cat=`find_decompressor $logf` -- local format=`$cat $logf | get_time_format` -- local first_time=`$cat $logf | head -10 | get_first_time $format` -- local last_time=`$cat $logf | tail -10 | get_last_time $format` -- -- if [ x = "x$first_time" -o x = "x$last_time" ]; then -- warning "Skipping bad logfile '$1': Could not determine log dates" -- return 0 # skip (empty log?) -- fi -- if [ $from_time -gt $last_time ]; then -- # we shouldn't get here anyway if the logs are in order -- return 2 # we're past good logs; exit -- fi -- if [ $from_time -ge $first_time ]; then -- return 3 # this is the last good log -- fi -- # have to go further back -- if [ x = "x$to_time" -o $to_time -ge $first_time ]; then -- return 1 # include this log -- else -- return 0 # don't include this log -- fi --} --# --# go through archived logs (timewise backwards) and see if there --# are lines belonging to us --# (we rely on untouched log files, i.e. that modify time --# hasn't been changed) --# --arch_logs() { -- local logf=$1 -- local from_time=$2 -- local to_time=$3 -- -- # look for files such as: ha-log-20090308 or -- # ha-log-20090308.gz (.bz2) or ha-log.0, etc -- ls -t $logf $logf*[0-9z] 2>/dev/null | -- while read next_log; do -- is_our_log $next_log $from_time $to_time -- case $? in -- 0) ;; # noop, continue -- 1) echo $next_log # include log and continue -- debug "Found log $next_log" -- ;; -- 2) break;; # don't go through older logs! -- 3) echo $next_log # include log and continue -- debug "Found log $next_log" -- break -- ;; # don't go through older logs! -- esac -- done --} -- --# --# print part of the log --# --drop_tmp_file() { -- [ -z "$tmp" ] || rm -f "$tmp" --} -- --print_logseg() { -- local logf=$1 -- local from_time=$2 -- local to_time=$3 -- -- # uncompress to a temp file (if necessary) -- local cat=`find_decompressor $logf` -- if [ "$cat" != "cat" ]; then -- tmp=`mktemp` -- $cat $logf > $tmp -- trap drop_tmp_file 0 -- sourcef=$tmp -- else -- sourcef=$logf -- tmp="" -- fi -- -- if [ "$from_time" = 0 ]; then -- FROM_LINE=1 -- else -- FROM_LINE=`findln_by_time $sourcef $from_time` -- fi -- if [ -z "$FROM_LINE" ]; then -- warning "couldn't find line for time $from_time; corrupt log file?" -- return -- fi -- -- TO_LINE="" -- if [ "$to_time" != 0 ]; then -- TO_LINE=`findln_by_time $sourcef $to_time` -- if [ -z "$TO_LINE" ]; then -- warning "couldn't find line for time $to_time; corrupt log file?" -- return -- fi -- if [ $FROM_LINE -lt $TO_LINE ]; then -- dumplog $sourcef $FROM_LINE $TO_LINE -- log "Including segment [$FROM_LINE-$TO_LINE] from $logf" -- else -- debug "Empty segment [$FROM_LINE-$TO_LINE] from $logf" -- fi -- else -- dumplog $sourcef $FROM_LINE $TO_LINE -- log "Including all logs after line $FROM_LINE from $logf" -- fi -- drop_tmp_file -- trap "" 0 --} -- --# --# find log/set of logs which are interesting for us --# --dumplogset() { -- local logf=$1 -- local from_time=$2 -- local to_time=$3 -- -- local logf_set=`arch_logs $logf $from_time $to_time` -- if [ x = "x$logf_set" ]; then -- return -- fi -- -- local num_logs=`echo "$logf_set" | wc -l` -- local oldest=`echo $logf_set | awk '{print $NF}'` -- local newest=`echo $logf_set | awk '{print $1}'` -- local mid_logfiles=`echo $logf_set | awk '{for(i=NF-1; i>1; i--) print $i}'` -- -- # the first logfile: from $from_time to $to_time (or end) -- # logfiles in the middle: all -- # the last logfile: from beginning to $to_time (or end) -- case $num_logs in -- 1) print_logseg $newest $from_time $to_time;; -- *) -- print_logseg $oldest $from_time 0 -- for f in $mid_logfiles; do -- `find_decompressor $f` $f -- debug "including complete $f logfile" -- done -- print_logseg $newest 0 $to_time -- ;; -- esac --} -- --# cut out a stanza --getstanza() { -- awk -v name="$1" ' -- !in_stanza && NF==2 && /^[a-z][a-z]*[[:space:]]*{/ { # stanza start -- if ($1 == name) -- in_stanza = 1 -- } -- in_stanza { print } -- in_stanza && NF==1 && $1 == "}" { exit } -- ' --} --# supply stanza in $1 and variable name in $2 --# (stanza is optional) --getcfvar() { -- cf_type=$1; shift; -- cf_var=$1; shift; -- cf_file=$* -- -- [ -f "$cf_file" ] || return -- case $cf_type in -- cman) -- grep $cf_var $cf_file | sed s/.*$cf_var=\"// | sed s/\".*// -- ;; -- corosync|openais) -- sed 's/#.*//' < $cf_file | -- if [ $# -eq 2 ]; then -- getstanza "$cf_var" -- shift 1 -- else -- cat -- fi | -- awk -v varname="$cf_var" ' -- NF==2 && match($1,varname":$")==1 { print $2; exit; } -- ' -- ;; -- heartbeat) -- sed 's/#.*//' < $cf_file | -- grep -w "^$cf_var" | -- sed 's/^[^[:space:]]*[[:space:]]*//' -- -- ;; -- logd) -- sed 's/#.*//' < $cf_file | -- grep -w "^$cf_var" | -- sed 's/^[^[:space:]]*[[:space:]]*//' -- -- ;; -- esac --} -- --pickfirst() { -- for x; do -- which $x >/dev/null 2>&1 && { -- echo $x -- return 0 -- } -- done -- return 1 --} -- --# --# figure out the cluster type, depending on the process list --# and existence of configuration files --# --get_cluster_type() { -- if -- ps -ef | egrep -qs '[c]orosync' -- then -- tool=`pickfirst corosync-objctl corosync-cmapctl` -- case $tool in -- *objctl) quorum=`$tool -a | grep quorum.provider | sed s/.*=//`;; -- *cmapctl) quorum=`$tool | grep quorum.provider | sed s/.*=//`;; -- esac -- if [ x"$quorum" = x"quorum_cman" ]; then -- stack="cman" -- else -- stack="corosync" -- fi -- -- elif -- ps -ef | egrep -qs '[a]isexec' -- then -- stack="openais" -- elif -- ps -ef | grep -v -e grep -e "eartbeat/[clasp]" | egrep -qs '[h]eartbeat' -- then -- stack="heartbeat" -- -- # Now we're guessing... -- -- elif [ -f /etc/cluster/cluster.conf ]; then -- stack="cman" -- -- # TODO: Technically these could be anywhere :-/ -- elif [ -f /etc/corosync/corosync.conf ]; then -- stack="corosync" -- -- elif [ -f /etc/ais/openais.conf ]; then -- stack="openais" -- -- else -- stack="heartbeat" -- fi -- -- debug "Detected the '$stack' cluster stack" -- echo $stack --} -- --find_cluster_cf() { -- case $1 in -- cman) echo "/etc/cluster/cluster.conf";; -- corosync) -- best_size=0 -- best_file="" -- -- # TODO: Technically these could be anywhere :-/ -- for cf in /etc/ais/openais.conf /etc/corosync/corosync.conf; do -- if [ -f $cf ]; then -- size=`wc -l $cf | awk '{print $1}'` -- if [ $size -gt $best_size ]; then -- best_size=$size -- best_file=$cf -- fi -- fi -- done -- echo "$best_file" -- ;; -- openais) -- # TODO: Technically it could be anywhere :-/ -- cf="/etc/ais/openais.conf" -- if [ -f $cf ]; then -- echo "$cf" -- fi -- ;; -- heartbeat) -- cf="/etc/ha.d/ha.cf" -- if [ -f $cf ]; then -- echo "$cf" -- fi -- ;; -- *) -- warning "Unknown cluster type: $1" -- ;; -- esac --} -- --# --# check for the major prereq for a) parameter parsing and b) --# parsing logs --# --t=`get_time "12:00"` --if [ "$t" = "" ]; then -- fatal "please install the perl Date::Parse module (perl-DateTime-Format-DateParse on Fedora/Red Hat)" --fi -- -diff --git a/tools/report.common.in b/tools/report.common.in -new file mode 100644 -index 0000000..febb092 ---- /dev/null -+++ b/tools/report.common.in -@@ -0,0 +1,753 @@ -+ # Copyright (C) 2007 Dejan Muhamedagic -+ # Almost everything as part of hb_report -+ # Copyright (C) 2010 Andrew Beekhof -+ # Cleanups, refactoring, extensions -+ # -+ # -+ # This program is free software; you can redistribute it and/or -+ # modify it under the terms of the GNU General Public -+ # License as published by the Free Software Foundation; either -+ # version 2.1 of the License, or (at your option) any later version. -+ # -+ # This software is distributed in the hope that it will be useful, -+ # but WITHOUT ANY WARRANTY; without even the implied warranty of -+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ # General Public License for more details. -+ # -+ # You should have received a copy of the GNU General Public -+ # License along with this library; if not, write to the Free Software -+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -+ # -+ -+host=`uname -n` -+shorthost=`echo $host | sed s:\\\\..*::` -+if [ -z $verbose ]; then -+ verbose=0 -+fi -+ -+# Target Files -+EVENTS_F=events.txt -+ANALYSIS_F=analysis.txt -+DESCRIPTION_F=description.txt -+HALOG_F=cluster-log.txt -+BT_F=backtraces.txt -+SYSINFO_F=sysinfo.txt -+SYSSTATS_F=sysstats.txt -+DLM_DUMP_F=dlm_dump.txt -+CRM_MON_F=crm_mon.txt -+MEMBERSHIP_F=members.txt -+HB_UUID_F=hb_uuid.txt -+HOSTCACHE=hostcache -+CRM_VERIFY_F=crm_verify.txt -+PERMISSIONS_F=permissions.txt -+CIB_F=cib.xml -+CIB_TXT_F=cib.txt -+ -+EVENT_PATTERNS=" -+state do_state_transition -+membership pcmk_peer_update.*(lost|memb): -+quorum crmd.*crm_update_quorum|crmd.*ais.disp.*quorum.(lost|ac?quir) -+pause Process.pause.detected -+resources lrmd.*rsc:(start|stop) -+stonith te_fence_node|stonith-ng.*log_oper.*report|stonithd.*(requests|(Succeeded|Failed).to.STONITH|result=) -+start_stop sutdown.decision|Starting.heartbeat|Corosync.Cluster.Engine|corosync.*Initializing.transport|Executive.Service.RELEASE|crm_shutdown:.Requesting.shutdown|pcmk_shutdown:.Shutdown.complete -+" -+ -+PACKAGES="pacemaker pacemaker-libs libpacemaker3 -+pacemaker-pygui pacemaker-pymgmt pymgmt-client -+openais libopenais2 libopenais3 corosync libcorosync4 -+resource-agents cluster-glue-libs cluster-glue libglue2 ldirectord -+heartbeat heartbeat-common heartbeat-resources libheartbeat2 -+ocfs2-tools ocfs2-tools-o2cb ocfs2console -+ocfs2-kmp-default ocfs2-kmp-pae ocfs2-kmp-xen ocfs2-kmp-debug ocfs2-kmp-trace -+drbd drbd-kmp-xen drbd-kmp-pae drbd-kmp-default drbd-kmp-debug drbd-kmp-trace -+drbd-heartbeat drbd-pacemaker drbd-utils drbd-bash-completion drbd-xen -+lvm2 lvm2-clvm cmirrord -+libdlm libdlm2 libdlm3 -+hawk ruby lighttpd -+kernel-default kernel-pae kernel-xen -+glibc -+" -+ -+# -+# keep the user posted -+# -+ -+log() { -+ printf "%-10s $*\n" "$shorthost:" 1>&2 -+} -+ -+debug() { -+ if [ $verbose -gt 0 ]; then -+ log "Debug: $*" -+ fi -+} -+ -+info() { -+ log "$*" -+} -+ -+warning() { -+ log "WARN: $*" -+} -+ -+fatal() { -+ log "ERROR: $*" -+ exit 1 -+} -+ -+detect_host() { -+ if [ -z "$maxdepth" ]; then -+ depth="-maxdepth 5" -+ else -+ depth="-maxdepth $maxdepth" -+ fi -+ -+ local_state_dir=@localstatedir@ -+ -+ if [ -d $local_state_dir/run ]; then -+ CRM_STATE_DIR=$local_state_dir/run/crm -+ else -+ info "Searching for where Pacemaker keeps runtime data... this may take a while" -+ for d in `find / $depth -type d -name run`; do -+ local_state_dir=`dirname $d` -+ CRM_STATE_DIR=$d/crm -+ break -+ done -+ info "Found: $CRM_STATE_DIR" -+ fi -+ debug "Machine runtime directory: $local_state_dir" -+ debug "Pacemaker runtime data located in: $CRM_STATE_DIR" -+ -+ CRM_DAEMON_DIR= -+ for p in /usr /usr/local /opt/local @exec_prefix@; do -+ for d in libexec lib64 lib; do -+ if [ -e $p/$d/pacemaker/pengine ]; then -+ CRM_DAEMON_DIR=$p/$d/pacemaker -+ break -+ elif [ -e $p/$d/heartbeat/pengine ]; then -+ CRM_DAEMON_DIR=$p/$d/heartbeat -+ break -+ fi -+ done -+ done -+ -+ if [ ! -d $CRM_DAEMON_DIR ]; then -+ info "Searching for where Pacemaker daemons live... this may take a while" -+ for f in `find / $depth -type f -name pengine`; do -+ CRM_DAEMON_DIR=`dirname $f` -+ break -+ done -+ info "Found: $CRM_DAEMON_DIR" -+ fi -+ -+ if [ -z $CRM_DAEMON_DIR ]; then -+ fatal "Non-standard Pacemaker installation: daemons not found" -+ else -+ debug "Pacemaker daemons located under: $CRM_DAEMON_DIR" -+ fi -+ -+ CRM_CONFIG_DIR= -+ for d in pacemaker/cib heartbeat/crm; do -+ if [ -f $local_state_dir/lib/$d/cib.xml ]; then -+ CRM_CONFIG_DIR=$local_state_dir/lib/$d -+ break -+ fi -+ done -+ -+ if [ ! -d $CRM_CONFIG_DIR ]; then -+ info "Detecting where Pacemaker keeps config information... this may take a while" -+ for f in `find / $depth -type f -name cib.xml`; do -+ CRM_CONFIG_DIR=`dirname $f` -+ break -+ done -+ info "Found: $CRM_CONFIG_DIR" -+ fi -+ if [ -z $CRM_CONFIG_DIR ]; then -+ warning "Non-standard Pacemaker installation: config not found" -+ else -+ debug "Pacemaker config files located in: $CRM_CONFIG_DIR" -+ fi -+ -+ # Assume new layout -+ # $local_state_dir/lib/pacemaker/(cib,pengine,blackbox,cores) -+ config_root=`dirname $CRM_CONFIG_DIR` -+ -+ # Older versions had none -+ BLACKBOX_DIR=$config_root/blackbox -+ debug "Pacemaker blackboxes (if any) located in: $BLACKBOX_DIR" -+ -+ PE_STATE_DIR=$config_root/pengine -+ if [ ! -d $PE_STATE_DIR ]; then -+ info "Detecting where Pacemaker keeps Policy Engine inputs... this may take a while" -+ for d in `find / $depth -type d -name pengine`; do -+ PE_STATE_DIR=$d -+ break -+ done -+ info "Found: $PE_STATE_DIR" -+ fi -+ if [ -z $PE_STATE_DIR ]; then -+ fatal "Non-standard Pacemaker installation: Policy Engine directory not found" -+ else -+ debug "PE files located in: $PE_STATE_DIR" -+ fi -+ -+ HA_STATE_DIR=$local_state_dir/lib/heartbeat -+ debug "Assuming Heartbeat state files, if any, are located in: $HA_STATE_DIR" -+ -+ CRM_CORE_DIRS="" -+ for d in $config_root/cores $HA_STATE_DIR/cores $local_state_dir/lib/corosync $local_state_dir/lib/openais; do -+ if [ -d $d ]; then -+ CRM_CORE_DIRS="$CRM_CORE_DIRS $d" -+ fi -+ done -+ debug "Core files located under: $CRM_CORE_DIRS" -+} -+ -+time2str() { -+ perl -e "use POSIX; print strftime('%x %X',localtime($1));" -+} -+ -+get_time() { -+ perl -e "\$time=\"$*\";" -e ' -+ eval "use Date::Parse"; -+ if (index($time, ":") < 0) { -+ -+ } elsif (!$@) { -+ print str2time($time); -+ } else { -+ eval "use Date::Manip"; -+ if (!$@) { -+ print UnixDate(ParseDateString($time), "%s"); -+ } -+ } -+ ' -+} -+ -+get_time_() { -+ warning "Unknown time format used by: $*" -+} -+ -+get_time_syslog() { -+ awk '{print $1,$2,$3}' -+} -+ -+get_time_legacy() { -+ awk '{print $2}' | sed 's/_/ /' -+} -+ -+get_time_format_for_string() { -+ l="$*" -+ t=$(get_time `echo $l | get_time_syslog`) -+ if [ "x$t" != x ]; then -+ echo syslog -+ return -+ fi -+ -+ t=$(get_time `echo $l | get_time_legacy`) -+ if [ "x$t" != x ]; then -+ echo legacy -+ return -+ fi -+} -+ -+get_time_format() { -+ t=0 l="" func="" -+ trycnt=10 -+ while [ $trycnt -gt 0 ] && read l; do -+ func=$(get_time_format_for_string $l) -+ if [ "x$func" != x ]; then -+ break -+ fi -+ trycnt=$(($trycnt-1)) -+ done -+ #debug "Logfile uses the $func time format" -+ echo $func -+} -+ -+get_first_time() { -+ l="" -+ format=$1 -+ while read l; do -+ t=$(echo $l | get_time_$format) -+ ts=$(get_time $t) -+ if [ "x$ts" != x ]; then -+ echo "$ts" -+ return -+ fi -+ done -+} -+ -+get_last_time() { -+ l="" -+ best=`date +%s` # Now -+ format=$1 -+ while read l; do -+ t=$(echo $l | get_time_$format) -+ ts=$(get_time $t) -+ if [ "x$ts" != x ]; then -+ best=$ts -+ fi -+ done -+ echo $best -+} -+ -+linetime() { -+ l=`tail -n +$2 $1 | grep ":[0-5][0-9]:" | head -1` -+ format=`get_time_format_for_string $l` -+ t=`echo $l | get_time_$format` -+ get_time "$t" -+} -+ -+# Find pattern in a logfile somewhere -+# Return $max ordered results by age (newest first) -+findmsg() { -+ max=$1 -+ pattern=$2 -+ logfiles="" -+ syslogdirs="/var/log /var/logs /var/syslog /var/adm /var/log/ha /var/log/cluster" -+ -+ for d in $syslogdirs; do -+ if [ -d $d ]; then -+ logfiles="$logfiles `grep -l -e "$pattern" $d/*`" -+ fi -+ done 2>/dev/null -+ -+ if [ "x$logfiles" != "x" ]; then -+ list=`ls -t $logfiles | head -n $max | tr '\n' ' '` -+ echo $list -+ debug "Pattern \'$pattern\' found in: [ $list ]" -+ else -+ debug "Pattern \'$pattern\' not found anywhere" -+ fi -+} -+ -+node_events() { -+ if [ -e $1 ]; then -+ Epatt=`echo "$EVENT_PATTERNS" | -+ while read title p; do [ -n "$p" ] && echo -n "|$p"; done | -+ sed 's/.//' -+ ` -+ grep -E "$Epatt" $1 -+ fi -+} -+ -+pickfirst() { -+ for x; do -+ which $x >/dev/null 2>&1 && { -+ echo $x -+ return 0 -+ } -+ done -+ return 1 -+} -+ -+shrink() { -+ olddir=$PWD -+ dir=`dirname $1` -+ base=`basename $1` -+ -+ target=$1.tar -+ tar_options="cf" -+ -+ variant=`pickfirst bzip2 gzip false` -+ case $variant in -+ bz*) -+ tar_options="jcf" -+ target="$target.bz2" -+ ;; -+ gz*) -+ tar_options="zcf" -+ target="$target.gz" -+ ;; -+ *) -+ warning "Could not find a compression program, the resulting tarball may be huge" -+ ;; -+ esac -+ -+ if [ -e $target ]; then -+ fatal "Destination $target already exists, specify an alternate name with --dest" -+ fi -+ -+ cd $dir >/dev/null 2>&1 -+ tar $tar_options $target $base >/dev/null 2>&1 -+ cd $olddir >/dev/null 2>&1 -+ -+ echo $target -+} -+ -+findln_by_time() { -+ local logf=$1 -+ local tm=$2 -+ local first=1 -+ local last=`wc -l < $logf` -+ while [ $first -le $last ]; do -+ mid=$((($last+$first)/2)) -+ trycnt=10 -+ while [ $trycnt -gt 0 ]; do -+ tmid=`linetime $logf $mid` -+ [ "$tmid" ] && break -+ warning "cannot extract time: $logf:$mid; will try the next one" -+ trycnt=$(($trycnt-1)) -+ # shift the whole first-last segment -+ first=$(($first-1)) -+ last=$(($last-1)) -+ mid=$((($last+$first)/2)) -+ done -+ if [ -z "$tmid" ]; then -+ warning "giving up on log..." -+ return -+ fi -+ if [ $tmid -gt $tm ]; then -+ last=$(($mid-1)) -+ elif [ $tmid -lt $tm ]; then -+ first=$(($mid+1)) -+ else -+ break -+ fi -+ done -+ echo $mid -+} -+ -+dumplog() { -+ local logf=$1 -+ local from_line=$2 -+ local to_line=$3 -+ [ "$from_line" ] || -+ return -+ tail -n +$from_line $logf | -+ if [ "$to_line" ]; then -+ head -$(($to_line-$from_line+1)) -+ else -+ cat -+ fi -+} -+ -+# -+# find log/set of logs which are interesting for us -+# -+# -+# find log slices -+# -+ -+find_decompressor() { -+ if echo $1 | grep -qs 'bz2$'; then -+ echo "bzip2 -dc" -+ elif echo $1 | grep -qs 'gz$'; then -+ echo "gzip -dc" -+ else -+ echo "cat" -+ fi -+} -+# -+# check if the log contains a piece of our segment -+# -+is_our_log() { -+ local logf=$1 -+ local from_time=$2 -+ local to_time=$3 -+ -+ local cat=`find_decompressor $logf` -+ local format=`$cat $logf | get_time_format` -+ local first_time=`$cat $logf | head -10 | get_first_time $format` -+ local last_time=`$cat $logf | tail -10 | get_last_time $format` -+ -+ if [ x = "x$first_time" -o x = "x$last_time" ]; then -+ warning "Skipping bad logfile '$1': Could not determine log dates" -+ return 0 # skip (empty log?) -+ fi -+ if [ $from_time -gt $last_time ]; then -+ # we shouldn't get here anyway if the logs are in order -+ return 2 # we're past good logs; exit -+ fi -+ if [ $from_time -ge $first_time ]; then -+ return 3 # this is the last good log -+ fi -+ # have to go further back -+ if [ x = "x$to_time" -o $to_time -ge $first_time ]; then -+ return 1 # include this log -+ else -+ return 0 # don't include this log -+ fi -+} -+# -+# go through archived logs (timewise backwards) and see if there -+# are lines belonging to us -+# (we rely on untouched log files, i.e. that modify time -+# hasn't been changed) -+# -+arch_logs() { -+ local logf=$1 -+ local from_time=$2 -+ local to_time=$3 -+ -+ # look for files such as: ha-log-20090308 or -+ # ha-log-20090308.gz (.bz2) or ha-log.0, etc -+ ls -t $logf $logf*[0-9z] 2>/dev/null | -+ while read next_log; do -+ is_our_log $next_log $from_time $to_time -+ case $? in -+ 0) ;; # noop, continue -+ 1) echo $next_log # include log and continue -+ debug "Found log $next_log" -+ ;; -+ 2) break;; # don't go through older logs! -+ 3) echo $next_log # include log and continue -+ debug "Found log $next_log" -+ break -+ ;; # don't go through older logs! -+ esac -+ done -+} -+ -+# -+# print part of the log -+# -+drop_tmp_file() { -+ [ -z "$tmp" ] || rm -f "$tmp" -+} -+ -+print_logseg() { -+ local logf=$1 -+ local from_time=$2 -+ local to_time=$3 -+ -+ # uncompress to a temp file (if necessary) -+ local cat=`find_decompressor $logf` -+ if [ "$cat" != "cat" ]; then -+ tmp=`mktemp` -+ $cat $logf > $tmp -+ trap drop_tmp_file 0 -+ sourcef=$tmp -+ else -+ sourcef=$logf -+ tmp="" -+ fi -+ -+ if [ "$from_time" = 0 ]; then -+ FROM_LINE=1 -+ else -+ FROM_LINE=`findln_by_time $sourcef $from_time` -+ fi -+ if [ -z "$FROM_LINE" ]; then -+ warning "couldn't find line for time $from_time; corrupt log file?" -+ return -+ fi -+ -+ TO_LINE="" -+ if [ "$to_time" != 0 ]; then -+ TO_LINE=`findln_by_time $sourcef $to_time` -+ if [ -z "$TO_LINE" ]; then -+ warning "couldn't find line for time $to_time; corrupt log file?" -+ return -+ fi -+ if [ $FROM_LINE -lt $TO_LINE ]; then -+ dumplog $sourcef $FROM_LINE $TO_LINE -+ log "Including segment [$FROM_LINE-$TO_LINE] from $logf" -+ else -+ debug "Empty segment [$FROM_LINE-$TO_LINE] from $logf" -+ fi -+ else -+ dumplog $sourcef $FROM_LINE $TO_LINE -+ log "Including all logs after line $FROM_LINE from $logf" -+ fi -+ drop_tmp_file -+ trap "" 0 -+} -+ -+# -+# find log/set of logs which are interesting for us -+# -+dumplogset() { -+ local logf=$1 -+ local from_time=$2 -+ local to_time=$3 -+ -+ local logf_set=`arch_logs $logf $from_time $to_time` -+ if [ x = "x$logf_set" ]; then -+ return -+ fi -+ -+ local num_logs=`echo "$logf_set" | wc -l` -+ local oldest=`echo $logf_set | awk '{print $NF}'` -+ local newest=`echo $logf_set | awk '{print $1}'` -+ local mid_logfiles=`echo $logf_set | awk '{for(i=NF-1; i>1; i--) print $i}'` -+ -+ # the first logfile: from $from_time to $to_time (or end) -+ # logfiles in the middle: all -+ # the last logfile: from beginning to $to_time (or end) -+ case $num_logs in -+ 1) print_logseg $newest $from_time $to_time;; -+ *) -+ print_logseg $oldest $from_time 0 -+ for f in $mid_logfiles; do -+ `find_decompressor $f` $f -+ debug "including complete $f logfile" -+ done -+ print_logseg $newest 0 $to_time -+ ;; -+ esac -+} -+ -+# cut out a stanza -+getstanza() { -+ awk -v name="$1" ' -+ !in_stanza && NF==2 && /^[a-z][a-z]*[[:space:]]*{/ { # stanza start -+ if ($1 == name) -+ in_stanza = 1 -+ } -+ in_stanza { print } -+ in_stanza && NF==1 && $1 == "}" { exit } -+ ' -+} -+# supply stanza in $1 and variable name in $2 -+# (stanza is optional) -+getcfvar() { -+ cf_type=$1; shift; -+ cf_var=$1; shift; -+ cf_file=$* -+ -+ [ -f "$cf_file" ] || return -+ case $cf_type in -+ cman) -+ grep $cf_var $cf_file | sed s/.*$cf_var=\"// | sed s/\".*// -+ ;; -+ corosync|openais) -+ sed 's/#.*//' < $cf_file | -+ if [ $# -eq 2 ]; then -+ getstanza "$cf_var" -+ shift 1 -+ else -+ cat -+ fi | -+ awk -v varname="$cf_var" ' -+ NF==2 && match($1,varname":$")==1 { print $2; exit; } -+ ' -+ ;; -+ heartbeat) -+ sed 's/#.*//' < $cf_file | -+ grep -w "^$cf_var" | -+ sed 's/^[^[:space:]]*[[:space:]]*//' -+ -+ ;; -+ logd) -+ sed 's/#.*//' < $cf_file | -+ grep -w "^$cf_var" | -+ sed 's/^[^[:space:]]*[[:space:]]*//' -+ -+ ;; -+ esac -+} -+ -+pickfirst() { -+ for x; do -+ which $x >/dev/null 2>&1 && { -+ echo $x -+ return 0 -+ } -+ done -+ return 1 -+} -+ -+# -+# figure out the cluster type, depending on the process list -+# and existence of configuration files -+# -+get_cluster_type() { -+ if -+ ps -ef | egrep -qs '[c]orosync' -+ then -+ tool=`pickfirst corosync-objctl corosync-cmapctl` -+ case $tool in -+ *objctl) quorum=`$tool -a | grep quorum.provider | sed s/.*=//`;; -+ *cmapctl) quorum=`$tool | grep quorum.provider | sed s/.*=//`;; -+ esac -+ if [ x"$quorum" = x"quorum_cman" ]; then -+ stack="cman" -+ else -+ stack="corosync" -+ fi -+ -+ elif -+ ps -ef | egrep -qs '[a]isexec' -+ then -+ stack="openais" -+ elif -+ ps -ef | grep -v -e grep -e "eartbeat/[clasp]" | egrep -qs '[h]eartbeat' -+ then -+ stack="heartbeat" -+ -+ # Now we're guessing... -+ -+ elif [ -f /etc/cluster/cluster.conf ]; then -+ stack="cman" -+ -+ # TODO: Technically these could be anywhere :-/ -+ elif [ -f /etc/corosync/corosync.conf ]; then -+ stack="corosync" -+ -+ elif [ -f /etc/ais/openais.conf ]; then -+ stack="openais" -+ -+ else -+ stack="heartbeat" -+ fi -+ -+ debug "Detected the '$stack' cluster stack" -+ echo $stack -+} -+ -+find_cluster_cf() { -+ case $1 in -+ cman) echo "/etc/cluster/cluster.conf";; -+ corosync) -+ best_size=0 -+ best_file="" -+ -+ # TODO: Technically these could be anywhere :-/ -+ for cf in /etc/ais/openais.conf /etc/corosync/corosync.conf; do -+ if [ -f $cf ]; then -+ size=`wc -l $cf | awk '{print $1}'` -+ if [ $size -gt $best_size ]; then -+ best_size=$size -+ best_file=$cf -+ fi -+ fi -+ done -+ if [ -z "$best_file" ]; then -+ debug "Looking for corosync configuration file. This may take a while..." -+ for f in `find / $depth -type f -name corosync.conf`; do -+ best_file=$f -+ break -+ done -+ fi -+ debug "Located corosync config file: $best_file" -+ echo "$best_file" -+ ;; -+ openais) -+ # TODO: Technically it could be anywhere :-/ -+ cf="/etc/ais/openais.conf" -+ if [ -f $cf ]; then -+ echo "$cf" -+ fi -+ ;; -+ heartbeat) -+ cf="/etc/ha.d/ha.cf" -+ if [ -f $cf ]; then -+ echo "$cf" -+ fi -+ ;; -+ *) -+ warning "Unknown cluster type: $1" -+ ;; -+ esac -+} -+ -+# -+# check for the major prereq for a) parameter parsing and b) -+# parsing logs -+# -+t=`get_time "12:00"` -+if [ "$t" = "" ]; then -+ fatal "please install the perl Date::Parse module (perl-DateTime-Format-DateParse on Fedora/Red Hat)" -+fi -diff --git a/xml/resources-1.2.rng b/xml/resources-1.2.rng -index 0ef6066..81a8f82 100644 ---- a/xml/resources-1.2.rng -+++ b/xml/resources-1.2.rng -@@ -36,6 +36,7 @@ - upstart - service - systemd -+ nagios - - - -@@ -210,6 +211,7 @@ - restart - standby - fence -+ restart-container - - - diff --git a/pacemaker-hotfixes.patch b/pacemaker-hotfixes.patch deleted file mode 100644 index 0792c19..0000000 --- a/pacemaker-hotfixes.patch +++ /dev/null @@ -1,45 +0,0 @@ -diff --git a/mcp/corosync.c b/mcp/corosync.c -index 07d7490..a4f6b34 100644 ---- a/mcp/corosync.c -+++ b/mcp/corosync.c -@@ -50,7 +50,6 @@ static struct cpg_name cpg_group = { - gboolean use_cman = FALSE; - static cpg_handle_t cpg_handle; - static corosync_cfg_handle_t cfg_handle; --static corosync_cfg_state_notification_t cfg_buffer; - - /* =::=::=::= CFG - Shutdown stuff =::=::=::= */ - -@@ -143,14 +142,6 @@ cluster_connect_cfg(uint32_t * nodeid) - - crm_debug("Our nodeid: %d", *nodeid); - -- retries = 0; -- cs_repeat(retries, 30, rc = corosync_cfg_state_track(cfg_handle, 0, &cfg_buffer)); -- -- if (rc != CS_OK) { -- crm_err("corosync cfg stack_track error %d", rc); -- goto bail; -- } -- - crm_debug("Adding fd=%d to mainloop", fd); - G_main_add_fd(G_PRIORITY_HIGH, fd, FALSE, pcmk_cfg_dispatch, &cfg_handle, - cfg_connection_destroy); -diff --git a/tools/report.collector b/tools/report.collector -index 20203f6..4fdac3e 100644 ---- a/tools/report.collector -+++ b/tools/report.collector -@@ -700,10 +700,9 @@ for l in $logfiles $EXTRA_LOGS; do - node_events `basename $l` > $EVENTS_F - - # Link the first logfile to a standard name if it doesn't yet exist -- if [ -e $HALOG_F ]; then -- : nothing -- else -- ln -s `basename $l` $HALOG_F -+ f=`basename $l` -+ if [ -e $f -a ! -e $HALOG_F ]; then -+ ln -s $f $HALOG_F - fi - done - diff --git a/pacemaker-multilib-header.patch b/pacemaker-multilib-header.patch deleted file mode 100644 index 3784836..0000000 --- a/pacemaker-multilib-header.patch +++ /dev/null @@ -1,23 +0,0 @@ -diff -r c6a01b02950b include/crm_config.h.in ---- a/include/crm_config.h.in Sat Mar 19 10:50:21 2011 +0100 -+++ b/include/crm_config.h.in Fri Mar 25 18:34:59 2011 +0100 -@@ -36,9 +36,6 @@ - /* Where to keep CIB configuration files */ - #undef CRM_CONFIG_DIR - --/* Location for Pacemaker daemons */ --#undef CRM_DAEMON_DIR -- - /* Group to run Pacemaker daemons as */ - #undef CRM_DAEMON_GROUP - -@@ -69,9 +66,6 @@ - /* Compatability alias for SUPPORT_COROSYNC */ - #undef AIS_COROSYNC - --/* Correct printf format for logging uint64_t */ --#undef U64T -- - /* Use g_hash_table compatibility functions */ - #undef USE_GHASH_COMPAT - diff --git a/pacemaker-rollup-3a7715d.patch b/pacemaker-rollup-3a7715d.patch new file mode 100644 index 0000000..6b1935c --- /dev/null +++ b/pacemaker-rollup-3a7715d.patch @@ -0,0 +1,4919 @@ +diff --git a/attrd/commands.c b/attrd/commands.c +index 18c0523..c6586c7 100644 +--- a/attrd/commands.c ++++ b/attrd/commands.c +@@ -832,7 +832,6 @@ attrd_cib_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *u + } + } + done: +- free(name); + if(a && a->changed && election_state(writer) == election_won) { + write_attribute(a); + } +@@ -1019,8 +1018,10 @@ write_attribute(attribute_t *a) + crm_info("Sent update %d with %d changes for %s, id=%s, set=%s", + a->update, cib_updates, a->id, (a->uuid? a->uuid : ""), a->set); + +- the_cib->cmds->register_callback( +- the_cib, a->update, 120, FALSE, strdup(a->id), "attrd_cib_callback", attrd_cib_callback); ++ the_cib->cmds->register_callback_full(the_cib, a->update, 120, FALSE, ++ strdup(a->id), ++ "attrd_cib_callback", ++ attrd_cib_callback, free); + } + free_xml(xml_top); + } +diff --git a/attrd/legacy.c b/attrd/legacy.c +index 4aae4c4..8a18c38 100644 +--- a/attrd/legacy.c ++++ b/attrd/legacy.c +@@ -635,6 +635,20 @@ struct attrd_callback_s { + char *value; + }; + ++/* ++ * \internal ++ * \brief Free an attrd callback structure ++ */ ++static void ++free_attrd_callback(void *user_data) ++{ ++ struct attrd_callback_s *data = user_data; ++ ++ free(data->attr); ++ free(data->value); ++ free(data); ++} ++ + static void + attrd_cib_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) + { +@@ -646,7 +660,7 @@ attrd_cib_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *u + + } else if (call_id < 0) { + crm_warn("Update %s=%s failed: %s", data->attr, data->value, pcmk_strerror(call_id)); +- goto cleanup; ++ return; + } + + switch (rc) { +@@ -674,10 +688,6 @@ attrd_cib_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *u + crm_err("Update %d for %s=%s failed: %s", + call_id, data->attr, data->value, pcmk_strerror(rc)); + } +- cleanup: +- free(data->value); +- free(data->attr); +- free(data); + } + + void +@@ -749,8 +759,10 @@ attrd_perform_update(attr_hash_entry_t * hash_entry) + if (hash_entry->value != NULL) { + data->value = strdup(hash_entry->value); + } +- cib_conn->cmds->register_callback(cib_conn, rc, 120, FALSE, data, "attrd_cib_callback", +- attrd_cib_callback); ++ cib_conn->cmds->register_callback_full(cib_conn, rc, 120, FALSE, data, ++ "attrd_cib_callback", ++ attrd_cib_callback, ++ free_attrd_callback); + return; + } + +diff --git a/bumplibs.sh b/bumplibs.sh +index 68f2f58..2044efa 100755 +--- a/bumplibs.sh ++++ b/bumplibs.sh +@@ -3,6 +3,7 @@ + declare -A headers + headers[crmcommon]="include/crm/common include/crm/crm.h" + headers[crmcluster]="include/crm/cluster.h" ++headers[crmservice]="include/crm/services.h" + headers[transitioner]="include/crm/transition.h" + headers[cib]="include/crm/cib.h include/crm/cib/util.h" + headers[pe_rules]="include/crm/pengine/rules.h" +@@ -11,8 +12,17 @@ headers[pengine]="include/crm/pengine/common.h include/crm/pengine/complex.h i + headers[stonithd]="include/crm/stonith-ng.h" + headers[lrmd]="include/crm/lrmd.h" + +-LAST_RELEASE=`test -e /Volumes || git tag -l | grep Pacemaker | grep -v rc | sort -Vr | head -n 1` +-for lib in crmcommon crmcluster transitioner cib pe_rules pe_status stonithd pengine lrmd; do ++if [ ! -z $1 ]; then ++ LAST_RELEASE=$1 ++else ++ LAST_RELEASE=`test -e /Volumes || git tag -l | grep Pacemaker | grep -v rc | sort -Vr | head -n 1` ++fi ++libs=$(find . -name "*.am" -exec grep "lib.*_la_LDFLAGS.*version-info" \{\} \; | sed -e s/_la_LDFLAGS.*// -e s/^lib//) ++for lib in $libs; do ++ if [ -z "${headers[$lib]}" ]; then ++ echo "Unknown headers for lib$lib" ++ exit 0 ++ fi + git diff -w $LAST_RELEASE..HEAD ${headers[$lib]} + echo "" + +@@ -27,6 +37,7 @@ for lib in crmcommon crmcluster transitioner cib pe_rules pe_status stonithd pen + fi + + sources=`grep "lib${lib}_la_SOURCES" $am | sed s/.*=// | sed 's:$(top_builddir)/::' | sed 's:$(top_srcdir)/::' | sed 's:\\\::' | sed 's:$(libpe_rules_la_SOURCES):rules.c\ common.c:'` ++ + full_sources="" + for f in $sources; do + if +@@ -48,6 +59,11 @@ for lib in crmcommon crmcluster transitioner cib pe_rules pe_status stonithd pen + echo "" + echo "New arguments to functions or changes to the middle of structs are incompatible additions" + echo "" ++ echo "Where possible:" ++ echo "- move new fields to the end of structs" ++ echo "- use bitfields instead of booleans" ++ echo "- when adding arguments, create new functions that the old version can call" ++ echo "" + read -p "Are the changes to lib$lib: [a]dditions, [i]ncompatible additions, [r]emovals or [f]ixes? [None]: " CHANGE + + git show $LAST_RELEASE:$am | grep version-info +diff --git a/cib/callbacks.c b/cib/callbacks.c +index 1452ded..28844b8 100644 +--- a/cib/callbacks.c ++++ b/cib/callbacks.c +@@ -1570,7 +1570,7 @@ static gboolean + cib_force_exit(gpointer data) + { + crm_notice("Forcing exit!"); +- terminate_cib(__FUNCTION__, TRUE); ++ terminate_cib(__FUNCTION__, -1); + return FALSE; + } + +@@ -1656,7 +1656,7 @@ initiate_exit(void) + + active = crm_active_peers(); + if (active < 2) { +- terminate_cib(__FUNCTION__, FALSE); ++ terminate_cib(__FUNCTION__, 0); + return; + } + +@@ -1675,9 +1675,19 @@ initiate_exit(void) + extern int remote_fd; + extern int remote_tls_fd; + ++/* ++ * \internal ++ * \brief Close remote sockets, free the global CIB and quit ++ * ++ * \param[in] caller Name of calling function (for log message) ++ * \param[in] fast If 1, skip disconnect; if -1, also exit error ++ */ + void +-terminate_cib(const char *caller, gboolean fast) ++terminate_cib(const char *caller, int fast) + { ++ crm_info("%s: Exiting%s...", caller, ++ (fast < 0)? " fast" : mainloop ? " from mainloop" : ""); ++ + if (remote_fd > 0) { + close(remote_fd); + remote_fd = 0; +@@ -1687,27 +1697,29 @@ terminate_cib(const char *caller, gboolean fast) + remote_tls_fd = 0; + } + +- if (!fast) { +- crm_info("%s: Disconnecting from cluster infrastructure", caller); +- crm_cluster_disconnect(&crm_cluster); +- } +- + uninitializeCib(); + +- crm_info("%s: Exiting%s...", caller, fast ? " fast" : mainloop ? " from mainloop" : ""); ++ if (fast < 0) { ++ /* Quit fast on error */ ++ cib_ipc_servers_destroy(ipcs_ro, ipcs_rw, ipcs_shm); ++ crm_exit(EINVAL); + +- if (fast == FALSE && mainloop != NULL && g_main_is_running(mainloop)) { ++ } else if ((mainloop != NULL) && g_main_is_running(mainloop)) { ++ /* Quit via returning from the main loop. If fast == 1, we skip the ++ * disconnect here, and it will be done when the main loop returns ++ * (this allows the peer status callback to avoid messing with the ++ * peer caches). ++ */ ++ if (fast == 0) { ++ crm_cluster_disconnect(&crm_cluster); ++ } + g_main_quit(mainloop); + + } else { +- qb_ipcs_destroy(ipcs_ro); +- qb_ipcs_destroy(ipcs_rw); +- qb_ipcs_destroy(ipcs_shm); +- +- if (fast) { +- crm_exit(EINVAL); +- } else { +- crm_exit(pcmk_ok); +- } ++ /* Quit via clean exit. Even the peer status callback can disconnect ++ * here, because we're not returning control to the caller. */ ++ crm_cluster_disconnect(&crm_cluster); ++ cib_ipc_servers_destroy(ipcs_ro, ipcs_rw, ipcs_shm); ++ crm_exit(pcmk_ok); + } + } +diff --git a/cib/callbacks.h b/cib/callbacks.h +index bca9992..a49428e 100644 +--- a/cib/callbacks.h ++++ b/cib/callbacks.h +@@ -71,7 +71,7 @@ extern void cib_common_callback_worker(uint32_t id, uint32_t flags, xmlNode * op + + void cib_shutdown(int nsig); + void initiate_exit(void); +-void terminate_cib(const char *caller, gboolean fast); ++void terminate_cib(const char *caller, int fast); + + extern gboolean cib_legacy_mode(void); + +diff --git a/cib/main.c b/cib/main.c +index e20a2b6..cbaf7b5 100644 +--- a/cib/main.c ++++ b/cib/main.c +@@ -71,8 +71,6 @@ gboolean cib_register_ha(ll_cluster_t * hb_cluster, const char *client_name); + void *hb_conn = NULL; + #endif + +-extern void terminate_cib(const char *caller, gboolean fast); +- + GMainLoop *mainloop = NULL; + const char *cib_root = NULL; + char *cib_our_uname = NULL; +@@ -414,7 +412,7 @@ cib_cs_destroy(gpointer user_data) + crm_info("Corosync disconnection complete"); + } else { + crm_err("Corosync connection lost! Exiting."); +- terminate_cib(__FUNCTION__, TRUE); ++ terminate_cib(__FUNCTION__, -1); + } + } + #endif +@@ -422,30 +420,29 @@ cib_cs_destroy(gpointer user_data) + static void + cib_peer_update_callback(enum crm_status_type type, crm_node_t * node, const void *data) + { +- if ((type == crm_status_processes) && legacy_mode +- && is_not_set(node->processes, crm_get_cluster_proc())) { +- uint32_t old = 0; +- +- if (data) { +- old = *(const uint32_t *)data; +- } ++ switch (type) { ++ case crm_status_processes: ++ if (legacy_mode && is_not_set(node->processes, crm_get_cluster_proc())) { ++ uint32_t old = data? *(const uint32_t *)data : 0; ++ ++ if ((node->processes ^ old) & crm_proc_cpg) { ++ crm_info("Attempting to disable legacy mode after %s left the cluster", ++ node->uname); ++ legacy_mode = FALSE; ++ } ++ } ++ break; + +- if ((node->processes ^ old) & crm_proc_cpg) { +- crm_info("Attempting to disable legacy mode after %s left the cluster", node->uname); +- legacy_mode = FALSE; +- } +- } ++ case crm_status_uname: ++ case crm_status_rstate: ++ case crm_status_nstate: ++ if (cib_shutdown_flag && (crm_active_peers() < 2) ++ && crm_hash_table_size(client_connections) == 0) { + +- if (cib_shutdown_flag && crm_active_peers() < 2 && crm_hash_table_size(client_connections) == 0) { +- crm_info("No more peers"); +- /* @TODO +- * terminate_cib() calls crm_cluster_disconnect() which calls +- * crm_peer_destroy() which destroys the peer caches, which a peer +- * status callback shouldn't do. For now, there is a workaround in +- * crm_update_peer_proc(), but CIB should be refactored to avoid +- * destroying the peer caches here. +- */ +- terminate_cib(__FUNCTION__, FALSE); ++ crm_info("No more peers"); ++ terminate_cib(__FUNCTION__, 1); ++ } ++ break; + } + } + +@@ -455,10 +452,10 @@ cib_ha_connection_destroy(gpointer user_data) + { + if (cib_shutdown_flag) { + crm_info("Heartbeat disconnection complete... exiting"); +- terminate_cib(__FUNCTION__, FALSE); ++ terminate_cib(__FUNCTION__, 0); + } else { + crm_err("Heartbeat connection lost! Exiting."); +- terminate_cib(__FUNCTION__, TRUE); ++ terminate_cib(__FUNCTION__, -1); + } + } + #endif +@@ -541,8 +538,12 @@ cib_init(void) + /* Create the mainloop and run it... */ + mainloop = g_main_new(FALSE); + crm_info("Starting %s mainloop", crm_system_name); +- + g_main_run(mainloop); ++ ++ /* If main loop returned, clean up and exit. We disconnect in case ++ * terminate_cib() was called with fast=1. ++ */ ++ crm_cluster_disconnect(&crm_cluster); + cib_ipc_servers_destroy(ipcs_ro, ipcs_rw, ipcs_shm); + + return crm_exit(pcmk_ok); +diff --git a/cib/messages.c b/cib/messages.c +index 363562c..eca63b9 100644 +--- a/cib/messages.c ++++ b/cib/messages.c +@@ -87,7 +87,7 @@ cib_process_shutdown_req(const char *op, int options, const char *section, xmlNo + + } else if (cib_shutdown_flag) { + crm_info("Shutdown ACK from %s", host); +- terminate_cib(__FUNCTION__, FALSE); ++ terminate_cib(__FUNCTION__, 0); + return pcmk_ok; + + } else { +diff --git a/crmd/crmd_utils.h b/crmd/crmd_utils.h +index 78ccad2..78214bf 100644 +--- a/crmd/crmd_utils.h ++++ b/crmd/crmd_utils.h +@@ -102,11 +102,14 @@ gboolean too_many_st_failures(void); + void st_fail_count_reset(const char * target); + void crmd_peer_down(crm_node_t *peer, bool full); + ++/* Convenience macro for registering a CIB callback ++ * (assumes that data can be freed with free()) ++ */ + # define fsa_register_cib_callback(id, flag, data, fn) do { \ + CRM_ASSERT(fsa_cib_conn); \ +- fsa_cib_conn->cmds->register_callback( \ ++ fsa_cib_conn->cmds->register_callback_full( \ + fsa_cib_conn, id, 10 * (1 + crm_active_peers()), \ +- flag, data, #fn, fn); \ ++ flag, data, #fn, fn, free); \ + } while(0) + + # define start_transition(state) do { \ +diff --git a/crmd/join_client.c b/crmd/join_client.c +index 286cd92..65e3bed 100644 +--- a/crmd/join_client.c ++++ b/crmd/join_client.c +@@ -116,8 +116,8 @@ do_cl_join_offer_respond(long long action, + + /* we only ever want the last one */ + if (query_call_id > 0) { +- /* Calling remove_cib_op_callback() would result in a memory leak of the data field */ + crm_trace("Cancelling previous join query: %d", query_call_id); ++ remove_cib_op_callback(query_call_id, FALSE); + query_call_id = 0; + } + +@@ -173,7 +173,6 @@ join_query_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void * + + done: + free_xml(generation); +- free(join_id); + } + + /* A_CL_JOIN_RESULT */ +diff --git a/crmd/join_dc.c b/crmd/join_dc.c +index f777296..5280b6e 100644 +--- a/crmd/join_dc.c ++++ b/crmd/join_dc.c +@@ -452,8 +452,6 @@ finalize_sync_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, voi + crm_debug("No longer the DC in S_FINALIZE_JOIN: %s/%s", + AM_I_DC ? "DC" : "CRMd", fsa_state2string(fsa_state)); + } +- +- free(user_data); + } + + static void +diff --git a/crmd/lrm_state.c b/crmd/lrm_state.c +index 162ad03..c03fa0b 100644 +--- a/crmd/lrm_state.c ++++ b/crmd/lrm_state.c +@@ -490,7 +490,7 @@ remote_proxy_cb(lrmd_t *lrmd, void *userdata, xmlNode *msg) + if (remote_proxy_new(lrm_state->node_name, session, channel) == NULL) { + remote_proxy_notify_destroy(lrmd, session); + } +- crm_info("new remote proxy client established to %s, session id %s", channel, session); ++ crm_trace("new remote proxy client established to %s, session id %s", channel, session); + } else if (safe_str_eq(op, "destroy")) { + remote_proxy_end_session(session); + +@@ -534,7 +534,16 @@ remote_proxy_cb(lrmd_t *lrmd, void *userdata, xmlNode *msg) + } + + } else if(is_set(flags, crm_ipc_proxied)) { +- int rc = crm_ipc_send(proxy->ipc, request, flags, 5000, NULL); ++ const char *type = crm_element_value(request, F_TYPE); ++ int rc = 0; ++ ++ if (safe_str_eq(type, T_ATTRD) ++ && crm_element_value(request, F_ATTRD_HOST) == NULL) { ++ crm_xml_add(request, F_ATTRD_HOST, proxy->node_name); ++ crm_xml_add_int(request, F_ATTRD_HOST_ID, get_local_nodeid(0)); ++ } ++ ++ rc = crm_ipc_send(proxy->ipc, request, flags, 5000, NULL); + + if(rc < 0) { + xmlNode *op_reply = create_xml_node(NULL, "nack"); +diff --git a/crmd/membership.c b/crmd/membership.c +index 447e6a8..27ae710 100644 +--- a/crmd/membership.c ++++ b/crmd/membership.c +@@ -200,7 +200,6 @@ remove_conflicting_node_callback(xmlNode * msg, int call_id, int rc, + do_crm_log_unlikely(rc == 0 ? LOG_DEBUG : LOG_NOTICE, + "Deletion of the unknown conflicting node \"%s\": %s (rc=%d)", + node_uuid, pcmk_strerror(rc), rc); +- free(node_uuid); + } + + static void +@@ -215,11 +214,9 @@ search_conflicting_node_callback(xmlNode * msg, int call_id, int rc, + crm_notice("Searching conflicting nodes for %s failed: %s (%d)", + new_node_uuid, pcmk_strerror(rc), rc); + } +- free(new_node_uuid); + return; + + } else if (output == NULL) { +- free(new_node_uuid); + return; + } + +@@ -283,8 +280,6 @@ search_conflicting_node_callback(xmlNode * msg, int call_id, int rc, + free_xml(node_state_xml); + } + } +- +- free(new_node_uuid); + } + + static void +diff --git a/crmd/pengine.c b/crmd/pengine.c +index c9544a9..46df648 100644 +--- a/crmd/pengine.c ++++ b/crmd/pengine.c +@@ -77,8 +77,6 @@ save_cib_contents(xmlNode * msg, int call_id, int rc, xmlNode * output, void *us + + free(filename); + } +- +- free(id); + } + + static void +@@ -320,9 +318,10 @@ do_pe_invoke_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void + crm_debug("Discarding PE request in state: %s", fsa_state2string(fsa_state)); + return; + +- } else if (num_cib_op_callbacks() != 0) { +- crm_debug("Re-asking for the CIB: %d peer updates still pending", num_cib_op_callbacks()); +- ++ /* this callback counts as 1 */ ++ } else if (num_cib_op_callbacks() > 1) { ++ crm_debug("Re-asking for the CIB: %d other peer updates still pending", ++ (num_cib_op_callbacks() - 1)); + sleep(1); + register_fsa_action(A_PE_INVOKE); + return; +diff --git a/crmd/te_callbacks.c b/crmd/te_callbacks.c +index 68742c2..c22b273 100644 +--- a/crmd/te_callbacks.c ++++ b/crmd/te_callbacks.c +@@ -294,6 +294,49 @@ static char *get_node_from_xpath(const char *xpath) + return nodeid; + } + ++static char *extract_node_uuid(const char *xpath) ++{ ++ char *mutable_path = strdup(xpath); ++ char *node_uuid = NULL; ++ char *search = NULL; ++ char *match = NULL; ++ ++ match = strstr(mutable_path, "node_state[@id=\'") + strlen("node_state[@id=\'"); ++ search = strchr(match, '\''); ++ search[0] = 0; ++ ++ node_uuid = strdup(match); ++ free(mutable_path); ++ return node_uuid; ++} ++ ++static void abort_unless_down(const char *xpath, const char *op, xmlNode *change, const char *reason) ++{ ++ char *node_uuid = NULL; ++ crm_action_t *down = NULL; ++ ++ if(safe_str_neq(op, "delete")) { ++ abort_transition(INFINITY, tg_restart, reason, change); ++ return; ++ } ++ ++ node_uuid = extract_node_uuid(xpath); ++ if(node_uuid == NULL) { ++ crm_err("Could not extract node ID from %s", xpath); ++ abort_transition(INFINITY, tg_restart, reason, change); ++ return; ++ } ++ ++ down = match_down_event(0, node_uuid, NULL, FALSE); ++ if(down == NULL || down->executed == false) { ++ crm_trace("Not expecting %s to be down (%s)", node_uuid, xpath); ++ abort_transition(INFINITY, tg_restart, reason, change); ++ } else { ++ crm_trace("Expecting changes to %s (%s)", node_uuid, xpath); ++ } ++ free(node_uuid); ++} ++ + void + te_update_diff(const char *event, xmlNode * msg) + { +@@ -388,27 +431,22 @@ te_update_diff(const char *event, xmlNode * msg) + break; /* Wont be packaged with any resource operations we may be waiting for */ + + } else if(strstr(xpath, "/"XML_TAG_TRANSIENT_NODEATTRS"[") || safe_str_eq(name, XML_TAG_TRANSIENT_NODEATTRS)) { +- abort_transition(INFINITY, tg_restart, "Transient attribute change", change); ++ abort_unless_down(xpath, op, change, "Transient attribute change"); + break; /* Wont be packaged with any resource operations we may be waiting for */ + + } else if(strstr(xpath, "/"XML_LRM_TAG_RSC_OP"[") && safe_str_eq(op, "delete")) { + crm_action_t *cancel = NULL; + char *mutable_key = strdup(xpath); +- char *mutable_node = strdup(xpath); + char *search = NULL; + + const char *key = NULL; +- const char *node_uuid = NULL; ++ char *node_uuid = extract_node_uuid(xpath); + + search = strrchr(mutable_key, '\''); + search[0] = 0; + + key = strrchr(mutable_key, '\'') + 1; + +- node_uuid = strstr(mutable_node, "node_state[@id=\'") + strlen("node_state[@id=\'"); +- search = strchr(node_uuid, '\''); +- search[0] = 0; +- + cancel = get_cancel_action(key, node_uuid); + if (cancel == NULL) { + abort_transition(INFINITY, tg_restart, "Resource operation removal", change); +@@ -422,14 +460,14 @@ te_update_diff(const char *event, xmlNode * msg) + trigger_graph(); + + } +- free(mutable_node); + free(mutable_key); ++ free(node_uuid); + + } else if(strstr(xpath, "/"XML_CIB_TAG_LRM"[") && safe_str_eq(op, "delete")) { +- abort_transition(INFINITY, tg_restart, "Resource state removal", change); ++ abort_unless_down(xpath, op, change, "Resource state removal"); + + } else if(strstr(xpath, "/"XML_CIB_TAG_STATE"[") && safe_str_eq(op, "delete")) { +- abort_transition(INFINITY, tg_restart, "Node state removal", change); ++ abort_unless_down(xpath, op, change, "Node state removal"); + + } else if(name == NULL) { + crm_debug("No result for %s operation to %s", op, xpath); +@@ -717,7 +755,6 @@ cib_fencing_updated(xmlNode * msg, int call_id, int rc, xmlNode * output, void * + } else { + crm_info("Fencing update %d for %s: complete", call_id, (char *)user_data); + } +- free(user_data); + } + + void +diff --git a/crmd/utils.c b/crmd/utils.c +index 5ca4b9d..4fe3a49 100644 +--- a/crmd/utils.c ++++ b/crmd/utils.c +@@ -999,7 +999,6 @@ erase_xpath_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void + + do_crm_log_unlikely(rc == 0 ? LOG_DEBUG : LOG_NOTICE, + "Deletion of \"%s\": %s (rc=%d)", xpath, pcmk_strerror(rc), rc); +- free(xpath); + } + + void +diff --git a/cts/CIB.py b/cts/CIB.py +index 82d02d7..8fbba6c 100644 +--- a/cts/CIB.py ++++ b/cts/CIB.py +@@ -105,7 +105,7 @@ class CIB11(ConfigBase): + if not name: + name = "r%s%d" % (self.CM.Env["IPagent"], self.counter) + self.counter = self.counter + 1 +- r = Resource(self.Factory, name, self.CM.Env["IPagent"], standard) ++ r = Resource(self.Factory, name, self.CM.Env["IPagent"], standard) + + r.add_op("monitor", "5s") + return r +@@ -387,7 +387,7 @@ class ConfigFactory: + """register a constructor""" + _args = [constructor] + _args.extend(args) +- setattr(self, methodName, apply(ConfigFactoryItem,_args, kargs)) ++ setattr(self, methodName, ConfigFactoryItem(*_args, **kargs)) + + def unregister(self, methodName): + """unregister a constructor""" +@@ -415,7 +415,6 @@ class ConfigFactory: + + class ConfigFactoryItem: + def __init__(self, function, *args, **kargs): +- assert callable(function), "function should be a callable obj" + self._function = function + self._args = args + self._kargs = kargs +@@ -426,7 +425,7 @@ class ConfigFactoryItem: + _args.extend(args) + _kargs = self._kargs.copy() + _kargs.update(kargs) +- return apply(self._function,_args,_kargs) ++ return self._function(*_args,**_kargs) + + # Basic Sanity Testing + if __name__ == '__main__': +@@ -449,4 +448,4 @@ if __name__ == '__main__': + + CibFactory = ConfigFactory(manager) + cib = CibFactory.createConfig("pacemaker-1.1") +- print cib.contents() ++ print(cib.contents()) +diff --git a/cts/CM_ais.py b/cts/CM_ais.py +index a34f9b1..d2e2c1f 100644 +--- a/cts/CM_ais.py ++++ b/cts/CM_ais.py +@@ -80,7 +80,7 @@ class crm_ais(crm_lha): + # Processes running under valgrind can't be shot with "killall -9 processname", + # so don't include them in the returned list + vgrind = self.Env["valgrind-procs"].split() +- for key in self.fullcomplist.keys(): ++ for key in list(self.fullcomplist.keys()): + if self.Env["valgrind-tests"]: + if key in vgrind: + self.log("Filtering %s from the component list as it is being profiled by valgrind" % key) +diff --git a/cts/CM_lha.py b/cts/CM_lha.py +index b192272..28742d9 100755 +--- a/cts/CM_lha.py ++++ b/cts/CM_lha.py +@@ -92,7 +92,7 @@ class crm_lha(ClusterManager): + self.log("Node %s is not up." % node) + return None + +- if not self.CIBsync.has_key(node) and self.Env["ClobberCIB"] == 1: ++ if not node in self.CIBsync and self.Env["ClobberCIB"] == 1: + self.CIBsync[node] = 1 + self.rsh(node, "rm -f "+CTSvars.CRM_CONFIG_DIR+"/cib*") + +diff --git a/cts/CTS.py b/cts/CTS.py +index 9f9a291..634348a 100644 +--- a/cts/CTS.py ++++ b/cts/CTS.py +@@ -69,7 +69,7 @@ function status() { + function start() { + # Is it already running? + if +- status ++ status + then + return + fi +@@ -94,20 +94,20 @@ case $action in + nohup $0 $f start >/dev/null 2>&1 > $f +- echo " $*" >> $f ++ uptime | sed s/up.*:/,/ | tr '\\n' ',' >> $f ++ echo " $*" >> $f + start +- ;; ++ ;; + *) +- echo "Unknown action: $action." +- ;; ++ echo "Unknown action: $action." ++ ;; + esac + """ + +@@ -157,7 +157,7 @@ class CtsLab: + self.Env.dump() + + def has_key(self, key): +- return self.Env.has_key(key) ++ return key in self.Env.keys() + + def __getitem__(self, key): + return self.Env[key] +@@ -275,7 +275,7 @@ class ClusterManager(UserDict): + None + + def _finalConditions(self): +- for key in self.keys(): ++ for key in list(self.keys()): + if self[key] == None: + raise ValueError("Improper derivation: self[" + key + "] must be overridden by subclass.") + +@@ -299,14 +299,14 @@ class ClusterManager(UserDict): + if key == "Name": + return self.name + +- print "FIXME: Getting %s from %s" % (key, repr(self)) +- if self.data.has_key(key): ++ print("FIXME: Getting %s from %s" % (key, repr(self))) ++ if key in self.data: + return self.data[key] + + return self.templates.get_patterns(self.Env["Name"], key) + + def __setitem__(self, key, value): +- print "FIXME: Setting %s=%s on %s" % (key, value, repr(self)) ++ print("FIXME: Setting %s=%s on %s" % (key, value, repr(self))) + self.data[key] = value + + def key_for_node(self, node): +@@ -333,7 +333,7 @@ class ClusterManager(UserDict): + def prepare(self): + '''Finish the Initialization process. Prepare to test...''' + +- print repr(self)+"prepare" ++ print(repr(self)+"prepare") + for node in self.Env["nodes"]: + if self.StataCM(node): + self.ShouldBeStatus[node] = "up" +@@ -387,11 +387,11 @@ class ClusterManager(UserDict): + return None + + if not self.templates["Pat:Fencing_start"]: +- print "No start pattern" ++ print("No start pattern") + return None + + if not self.templates["Pat:Fencing_ok"]: +- print "No ok pattern" ++ print("No ok pattern") + return None + + stonith = None +@@ -500,7 +500,7 @@ class ClusterManager(UserDict): + else: self.debug("Starting %s on node %s" % (self.templates["Name"], node)) + ret = 1 + +- if not self.ShouldBeStatus.has_key(node): ++ if not node in self.ShouldBeStatus: + self.ShouldBeStatus[node] = "down" + + if self.ShouldBeStatus[node] != "down": +@@ -871,13 +871,13 @@ class ClusterManager(UserDict): + + for host in self.Env["nodes"]: + log_stats_file = "%s/cts-stats.csv" % CTSvars.CRM_DAEMON_DIR +- if has_log_stats.has_key(host): ++ if host in has_log_stats: + self.rsh(host, '''bash %s %s stop''' % (log_stats_bin, log_stats_file)) + (rc, lines) = self.rsh(host, '''cat %s''' % log_stats_file, stdout=2) + self.rsh(host, '''bash %s %s delete''' % (log_stats_bin, log_stats_file)) + + fname = "cts-stats-%d-nodes-%s.csv" % (len(self.Env["nodes"]), host) +- print "Extracted stats: %s" % fname ++ print("Extracted stats: %s" % fname) + fd = open(fname, "a") + fd.writelines(lines) + fd.close() +@@ -891,7 +891,7 @@ class ClusterManager(UserDict): + + for host in self.Env["nodes"]: + log_stats_file = "%s/cts-stats.csv" % CTSvars.CRM_DAEMON_DIR +- if not has_log_stats.has_key(host): ++ if not host in has_log_stats: + + global log_stats + global log_stats_bin +@@ -986,7 +986,7 @@ class Process(Component): + self.CM = cm + self.badnews_ignore = badnews_ignore + self.badnews_ignore.extend(common_ignore) +- self.triggersreboot = triggersreboot ++ self.triggersreboot = triggersreboot + + if process: + self.proc = str(process) +diff --git a/cts/CTSaudits.py b/cts/CTSaudits.py +index 8d52062..e8663f2 100755 +--- a/cts/CTSaudits.py ++++ b/cts/CTSaudits.py +@@ -108,7 +108,7 @@ class LogAudit(ClusterAudit): + self.CM.log ("ERROR: Cannot execute remote command [%s] on %s" % (cmd, node)) + + for k in self.kinds: +- if watch.has_key(k): ++ if k in watch: + w = watch[k] + if watch_pref == "any": self.CM.log("Testing for %s logs" % (k)) + w.lookforall(silent=True) +@@ -118,7 +118,7 @@ class LogAudit(ClusterAudit): + self.CM.Env["LogWatcher"] = w.kind + return 1 + +- for k in watch.keys(): ++ for k in list(watch.keys()): + w = watch[k] + if w.unmatched: + for regex in w.unmatched: +@@ -226,7 +226,7 @@ class FileAudit(ClusterAudit): + self.known.append(line) + self.CM.log("Warning: Corosync core file on %s: %s" % (node, line)) + +- if self.CM.ShouldBeStatus.has_key(node) and self.CM.ShouldBeStatus[node] == "down": ++ if node in self.CM.ShouldBeStatus and self.CM.ShouldBeStatus[node] == "down": + clean = 0 + (rc, lsout) = self.CM.rsh(node, "ls -al /dev/shm | grep qb-", None) + for line in lsout: +@@ -532,7 +532,7 @@ class CrmdStateAudit(ClusterAudit): + , "auditfail":0} + + def has_key(self, key): +- return self.Stats.has_key(key) ++ return key in self.Stats + + def __setitem__(self, key, value): + self.Stats[key] = value +@@ -542,7 +542,7 @@ class CrmdStateAudit(ClusterAudit): + + def incr(self, name): + '''Increment (or initialize) the value associated with the given name''' +- if not self.Stats.has_key(name): ++ if not name in self.Stats: + self.Stats[name] = 0 + self.Stats[name] = self.Stats[name]+1 + +@@ -601,7 +601,7 @@ class CIBAudit(ClusterAudit): + , "auditfail":0} + + def has_key(self, key): +- return self.Stats.has_key(key) ++ return key in self.Stats + + def __setitem__(self, key, value): + self.Stats[key] = value +@@ -611,7 +611,7 @@ class CIBAudit(ClusterAudit): + + def incr(self, name): + '''Increment (or initialize) the value associated with the given name''' +- if not self.Stats.has_key(name): ++ if not name in self.Stats: + self.Stats[name] = 0 + self.Stats[name] = self.Stats[name]+1 + +@@ -726,7 +726,7 @@ class PartitionAudit(ClusterAudit): + + def incr(self, name): + '''Increment (or initialize) the value associated with the given name''' +- if not self.Stats.has_key(name): ++ if not name in self.Stats: + self.Stats[name] = 0 + self.Stats[name] = self.Stats[name]+1 + +diff --git a/cts/CTSscenarios.py b/cts/CTSscenarios.py +index 2f3a69b..cc6e67e 100644 +--- a/cts/CTSscenarios.py ++++ b/cts/CTSscenarios.py +@@ -124,7 +124,7 @@ A partially set up scenario is torn down if it fails during setup. + + def incr(self, name): + '''Increment (or initialize) the value associated with the given name''' +- if not self.Stats.has_key(name): ++ if not name in self.Stats: + self.Stats[name] = 0 + self.Stats[name] = self.Stats[name]+1 + +@@ -176,7 +176,7 @@ A partially set up scenario is torn down if it fails during setup. + + elapsed_time = stoptime - starttime + test_time = stoptime - test.get_timer() +- if not test.has_key("min_time"): ++ if not test["min_time"]: + test["elapsed_time"] = elapsed_time + test["min_time"] = test_time + test["max_time"] = test_time +@@ -211,7 +211,7 @@ A partially set up scenario is torn down if it fails during setup. + } + self.ClusterManager.log("Test Summary") + for test in self.Tests: +- for key in stat_filter.keys(): ++ for key in list(stat_filter.keys()): + stat_filter[key] = test.Stats[key] + self.ClusterManager.log(("Test %s: "%test.name).ljust(25) + " %s"%repr(stat_filter)) + +@@ -387,7 +387,7 @@ According to the manual page for ping: + '''Start the PingFest!''' + + self.PingSize = 1024 +- if CM.Env.has_key("PingSize"): ++ if "PingSize" in CM.Env.keys(): + self.PingSize = CM.Env["PingSize"] + + CM.log("Starting %d byte flood pings" % self.PingSize) +@@ -550,7 +550,7 @@ Test a rolling upgrade between two versions of the stack + return self.install(node, self.CM.Env["previous-version"]) + + def SetUp(self, CM): +- print repr(self)+"prepare" ++ print(repr(self)+"prepare") + CM.prepare() + + # Clear out the cobwebs +diff --git a/cts/CTStests.py b/cts/CTStests.py +index f817004..00fcd13 100644 +--- a/cts/CTStests.py ++++ b/cts/CTStests.py +@@ -97,13 +97,18 @@ class CTSTest: + self.logger.debug(args) + + def has_key(self, key): +- return self.Stats.has_key(key) ++ return key in self.Stats + + def __setitem__(self, key, value): + self.Stats[key] = value + + def __getitem__(self, key): +- return self.Stats[key] ++ if str(key) == "0": ++ raise ValueError("Bad call to 'foo in X', should reference 'foo in X.Stats' instead") ++ ++ if key in self.Stats: ++ return self.Stats[key] ++ return None + + def log_mark(self, msg): + self.debug("MARK: test %s %s %d" % (self.name,msg,time.time())) +@@ -128,7 +133,7 @@ class CTSTest: + + def incr(self, name): + '''Increment (or initialize) the value associated with the given name''' +- if not self.Stats.has_key(name): ++ if not name in self.Stats: + self.Stats[name] = 0 + self.Stats[name] = self.Stats[name]+1 + +@@ -534,7 +539,7 @@ class StonithdTest(CTSTest): + if not self.is_applicable_common(): + return 0 + +- if self.Env.has_key("DoFencing"): ++ if "DoFencing" in self.Env.keys(): + return self.Env["DoFencing"] + + return 1 +@@ -1048,7 +1053,7 @@ class BandwidthTest(CTSTest): + T1 = linesplit[0] + timesplit = string.split(T1,":") + time2split = string.split(timesplit[2],".") +- time1 = (long(timesplit[0])*60+long(timesplit[1]))*60+long(time2split[0])+long(time2split[1])*0.000001 ++ time1 = (int(timesplit[0])*60+int(timesplit[1]))*60+int(time2split[0])+int(time2split[1])*0.000001 + break + + while count < 100: +@@ -1070,7 +1075,7 @@ class BandwidthTest(CTSTest): + T2 = linessplit[0] + timesplit = string.split(T2,":") + time2split = string.split(timesplit[2],".") +- time2 = (long(timesplit[0])*60+long(timesplit[1]))*60+long(time2split[0])+long(time2split[1])*0.000001 ++ time2 = (int(timesplit[0])*60+int(timesplit[1]))*60+int(time2split[0])+int(time2split[1])*0.000001 + time = time2-time1 + if (time <= 0): + return 0 +@@ -1105,7 +1110,7 @@ class MaintenanceMode(CTSTest): + # fail the resource right after turning Maintenance mode on + # verify it is not recovered until maintenance mode is turned off + if action == "On": +- pats.append("pengine.*: warning:.* Processing failed op %s for %s on" % (self.action, self.rid)) ++ pats.append(r"pengine.*:\s+warning:.*Processing failed op %s for %s on" % (self.action, self.rid)) + else: + pats.append(self.templates["Pat:RscOpOK"] % (self.rid, "stop_0")) + pats.append(self.templates["Pat:RscOpOK"] % (self.rid, "start_0")) +@@ -1314,7 +1319,7 @@ class ResourceRecover(CTSTest): + self.debug("Shooting %s aka. %s" % (rsc.clone_id, rsc.id)) + + pats = [] +- pats.append(r"pengine.*: warning:.* Processing failed op %s for (%s|%s) on" % (self.action, ++ pats.append(r"pengine.*:\s+warning:.*Processing failed op %s for (%s|%s) on" % (self.action, + rsc.id, rsc.clone_id)) + + if rsc.managed(): +@@ -1574,7 +1579,7 @@ class SplitBrainTest(CTSTest): + p_max = len(self.Env["nodes"]) + for node in self.Env["nodes"]: + p = self.Env.RandomGen.randint(1, p_max) +- if not partitions.has_key(p): ++ if not p in partitions: + partitions[p] = [] + partitions[p].append(node) + p_max = len(partitions.keys()) +@@ -1583,13 +1588,13 @@ class SplitBrainTest(CTSTest): + # else, try again + + self.debug("Created %d partitions" % p_max) +- for key in partitions.keys(): ++ for key in list(partitions.keys()): + self.debug("Partition["+str(key)+"]:\t"+repr(partitions[key])) + + # Disabling STONITH to reduce test complexity for now + self.rsh(node, "crm_attribute -V -n stonith-enabled -v false") + +- for key in partitions.keys(): ++ for key in list(partitions.keys()): + self.isolate_partition(partitions[key]) + + count = 30 +@@ -1612,7 +1617,7 @@ class SplitBrainTest(CTSTest): + self.CM.partitions_expected = 1 + + # And heal them again +- for key in partitions.keys(): ++ for key in list(partitions.keys()): + self.heal_partition(partitions[key]) + + # Wait for a single partition to form +@@ -2247,11 +2252,11 @@ class RollingUpgradeTest(CTSTest): + if not self.is_applicable_common(): + return None + +- if not self.Env.has_key("rpm-dir"): ++ if not "rpm-dir" in self.Env.keys(): + return None +- if not self.Env.has_key("current-version"): ++ if not "current-version" in self.Env.keys(): + return None +- if not self.Env.has_key("previous-version"): ++ if not "previous-version" in self.Env.keys(): + return None + + return 1 +@@ -2305,7 +2310,7 @@ class BSC_AddResource(CTSTest): + if ":" in ip: + fields = ip.rpartition(":") + fields[2] = str(hex(int(fields[2], 16)+1)) +- print str(hex(int(f[2], 16)+1)) ++ print(str(hex(int(f[2], 16)+1))) + else: + fields = ip.rpartition('.') + fields[2] = str(int(fields[2])+1) +@@ -3109,7 +3114,7 @@ class RemoteStonithd(CTSTest): + if not self.driver.is_applicable(): + return False + +- if self.Env.has_key("DoFencing"): ++ if "DoFencing" in self.Env.keys(): + return self.Env["DoFencing"] + + return True +diff --git a/cts/OCFIPraTest.py b/cts/OCFIPraTest.py +index 9900a62..03d964b 100755 +--- a/cts/OCFIPraTest.py ++++ b/cts/OCFIPraTest.py +@@ -28,13 +28,13 @@ from cts.CTSvars import * + + + def usage(): +- print "usage: " + sys.argv[0] \ ++ print("usage: " + sys.argv[0] \ + + " [-2]"\ + + " [--ipbase|-i first-test-ip]"\ + + " [--ipnum|-n test-ip-num]"\ + + " [--help|-h]"\ + + " [--perform|-p op]"\ +- + " [number-of-iterations]" ++ + " [number-of-iterations]") + sys.exit(1) + + +@@ -71,7 +71,7 @@ def log(towrite): + t = time.strftime("%Y/%m/%d_%H:%M:%S\t", time.localtime(time.time())) + logstr = t + " "+str(towrite) + syslog.syslog(logstr) +- print logstr ++ print(logstr) + + if __name__ == '__main__': + ra = "IPaddr" +diff --git a/cts/cib_xml.py b/cts/cib_xml.py +index 0bd963b..3d8f8d4 100644 +--- a/cts/cib_xml.py ++++ b/cts/cib_xml.py +@@ -19,7 +19,7 @@ class XmlBase(CibBase): + text = '''<%s''' % self.tag + if self.name: + text += ''' id="%s"''' % (self.name) +- for k in self.kwargs.keys(): ++ for k in list(self.kwargs.keys()): + text += ''' %s="%s"''' % (k, self.kwargs[k]) + + if not self.children: +@@ -149,22 +149,22 @@ class Resource(XmlBase): + def constraints(self): + text = "" + +- for k in self.scores.keys(): ++ for k in list(self.scores.keys()): + text += '''''' % (k, self.name) + text += self.scores[k].show() + text += '''''' + +- for k in self.needs.keys(): ++ for k in list(self.needs.keys()): + text += '''''' + +- for k in self.coloc.keys(): ++ for k in list(self.coloc.keys()): + text += '''''' + +@@ -179,13 +179,13 @@ class Resource(XmlBase): + + if len(self.meta) > 0: + text += '''''' % self.name +- for p in self.meta.keys(): ++ for p in list(self.meta.keys()): + text += '''''' % (self.name, p, p, self.meta[p]) + text += '''''' + + if len(self.param) > 0: + text += '''''' % self.name +- for p in self.param.keys(): ++ for p in list(self.param.keys()): + text += '''''' % (self.name, p, p, self.param[p]) + text += '''''' + +@@ -219,7 +219,7 @@ class Group(Resource): + + if len(self.meta) > 0: + text += '''''' % self.name +- for p in self.meta.keys(): ++ for p in list(self.meta.keys()): + text += '''''' % (self.name, p, p, self.meta[p]) + text += '''''' + +diff --git a/cts/environment.py b/cts/environment.py +index 61d4211..4ed5ced 100644 +--- a/cts/environment.py ++++ b/cts/environment.py +@@ -92,7 +92,7 @@ class Environment: + + def dump(self): + keys = [] +- for key in self.data.keys(): ++ for key in list(self.data.keys()): + keys.append(key) + + keys.sort() +@@ -106,16 +106,19 @@ class Environment: + if key == "nodes": + return True + +- return self.data.has_key(key) ++ return key in self.data + + def __getitem__(self, key): ++ if str(key) == "0": ++ raise ValueError("Bad call to 'foo in X', should reference 'foo in X.keys()' instead") ++ + if key == "nodes": + return self.Nodes + + elif key == "Name": + return self.get_stack_short() + +- elif self.data.has_key(key): ++ elif key in self.data: + return self.data[key] + + else: +@@ -175,12 +178,12 @@ class Environment: + self.data["Stack"] = "corosync (plugin v0)" + + else: +- print "Unknown stack: "+name ++ raise ValueError("Unknown stack: "+name) + sys.exit(1) + + def get_stack_short(self): + # Create the Cluster Manager object +- if not self.data.has_key("Stack"): ++ if not "Stack" in self.data: + return "unknown" + + elif self.data["Stack"] == "heartbeat": +@@ -202,12 +205,12 @@ class Environment: + return "crm-plugin-v0" + + else: +- LogFactory().log("Unknown stack: "+self.data["stack"]) +- sys.exit(1) ++ LogFactory().log("Unknown stack: "+self["stack"]) ++ raise ValueError("Unknown stack: "+self["stack"]) + + def detect_syslog(self): + # Detect syslog variant +- if not self.has_key("syslogd"): ++ if not "syslogd" in self.data: + if self["have_systemd"]: + # Systemd + self["syslogd"] = self.rsh(self.target, "systemctl list-units | grep syslog.*\.service.*active.*running | sed 's:.service.*::'", stdout=1).strip() +@@ -215,13 +218,13 @@ class Environment: + # SYS-V + self["syslogd"] = self.rsh(self.target, "chkconfig --list | grep syslog.*on | awk '{print $1}' | head -n 1", stdout=1).strip() + +- if not self.has_key("syslogd") or not self["syslogd"]: ++ if not "syslogd" in self.data or not self["syslogd"]: + # default + self["syslogd"] = "rsyslog" + + def detect_at_boot(self): + # Detect if the cluster starts at boot +- if not self.has_key("at-boot"): ++ if not "at-boot" in self.data: + atboot = 0 + + if self["have_systemd"]: +@@ -237,7 +240,7 @@ class Environment: + + def detect_ip_offset(self): + # Try to determin an offset for IPaddr resources +- if self["CIBResource"] and not self.has_key("IPBase"): ++ if self["CIBResource"] and not "IPBase" in self.data: + network=self.rsh(self.target, "ip addr | grep inet | grep -v -e link -e inet6 -e '/32' -e ' lo' | awk '{print $2}'", stdout=1).strip() + self["IPBase"] = self.rsh(self.target, "nmap -sn -n %s | grep 'scan report' | awk '{print $NF}' | sed 's:(::' | sed 's:)::' | sort -V | tail -n 1" % network, stdout=1).strip() + if not self["IPBase"]: +@@ -261,7 +264,7 @@ class Environment: + + def validate(self): + if len(self["nodes"]) < 1: +- print "No nodes specified!" ++ print("No nodes specified!") + sys.exit(1) + + def discover(self): +@@ -276,7 +279,7 @@ class Environment: + break; + self["cts-master"] = master + +- if not self.has_key("have_systemd"): ++ if not "have_systemd" in self.data: + self["have_systemd"] = not self.rsh(self.target, "systemctl list-units") + + self.detect_syslog() +@@ -390,7 +393,7 @@ class Environment: + self["DoStonith"]=1 + self["stonith-type"] = "fence_openstack" + +- print "Obtaining OpenStack credentials from the current environment" ++ print("Obtaining OpenStack credentials from the current environment") + self["stonith-params"] = "region=%s,tenant=%s,auth=%s,user=%s,password=%s" % ( + os.environ['OS_REGION_NAME'], + os.environ['OS_TENANT_NAME'], +@@ -403,7 +406,7 @@ class Environment: + self["DoStonith"]=1 + self["stonith-type"] = "fence_rhevm" + +- print "Obtaining RHEV-M credentials from the current environment" ++ print("Obtaining RHEV-M credentials from the current environment") + self["stonith-params"] = "login=%s,passwd=%s,ipaddr=%s,ipport=%s,ssl=1,shell_timeout=10" % ( + os.environ['RHEVM_USERNAME'], + os.environ['RHEVM_PASSWORD'], +@@ -442,7 +445,7 @@ class Environment: + try: + float(args[i+1]) + except ValueError: +- print ("--xmit-loss parameter should be float") ++ print("--xmit-loss parameter should be float") + self.usage(args[i+1]) + skipthis=1 + self["XmitLoss"] = args[i+1] +@@ -451,7 +454,7 @@ class Environment: + try: + float(args[i+1]) + except ValueError: +- print ("--recv-loss parameter should be float") ++ print("--recv-loss parameter should be float") + self.usage(args[i+1]) + skipthis=1 + self["RecvLoss"] = args[i+1] +@@ -503,7 +506,7 @@ class Environment: + self["DoStonith"]=1 + self["stonith-type"] = "fence_rhevm" + +- print "Obtaining RHEV-M credentials from the current environment" ++ print("Obtaining RHEV-M credentials from the current environment") + self["stonith-params"] = "login=%s,passwd=%s,ipaddr=%s,ipport=%s,ssl=1,shell_timeout=10" % ( + os.environ['RHEVM_USERNAME'], + os.environ['RHEVM_PASSWORD'], +@@ -605,7 +608,7 @@ class Environment: + skipthis=1 + (name, value) = args[i+1].split('=') + self[name] = value +- print "Setting %s = %s" % (name, value) ++ print("Setting %s = %s" % (name, value)) + + elif args[i] == "--help": + self.usage(args[i], 0) +@@ -622,52 +625,52 @@ class Environment: + + def usage(self, arg, status=1): + if status: +- print "Illegal argument %s" % arg +- print "usage: " + sys.argv[0] +" [options] number-of-iterations" +- print "\nCommon options: " +- print "\t [--nodes 'node list'] list of cluster nodes separated by whitespace" +- print "\t [--group | -g 'name'] use the nodes listed in the named DSH group (~/.dsh/groups/$name)" +- print "\t [--limit-nodes max] only use the first 'max' cluster nodes supplied with --nodes" +- print "\t [--stack (v0|v1|cman|corosync|heartbeat|openais)] which cluster stack is installed" +- print "\t [--list-tests] list the valid tests" +- print "\t [--benchmark] add the timing information" +- print "\t " +- print "Options that CTS will usually auto-detect correctly: " +- print "\t [--logfile path] where should the test software look for logs from cluster nodes" +- print "\t [--syslog-facility name] which syslog facility should the test software log to" +- print "\t [--at-boot (1|0)] does the cluster software start at boot time" +- print "\t [--test-ip-base ip] offset for generated IP address resources" +- print "\t " +- print "Options for release testing: " +- print "\t [--populate-resources | -r] generate a sample configuration" +- print "\t [--choose name] run only the named test" +- print "\t [--stonith (1 | 0 | yes | no | rhcs | ssh)]" +- print "\t [--once] run all valid tests once" +- print "\t " +- print "Additional (less common) options: " +- print "\t [--clobber-cib | -c ] erase any existing configuration" +- print "\t [--outputfile path] optional location for the test software to write logs to" +- print "\t [--trunc] truncate logfile before starting" +- print "\t [--xmit-loss lost-rate(0.0-1.0)]" +- print "\t [--recv-loss lost-rate(0.0-1.0)]" +- print "\t [--standby (1 | 0 | yes | no)]" +- print "\t [--fencing (1 | 0 | yes | no | rhcs | lha | openstack )]" +- print "\t [--stonith-type type]" +- print "\t [--stonith-args name=value]" +- print "\t [--bsc]" +- print "\t [--no-loop-tests] dont run looping/time-based tests" +- print "\t [--no-unsafe-tests] dont run tests that are unsafe for use with ocfs2/drbd" +- print "\t [--valgrind-tests] include tests using valgrind" +- print "\t [--experimental-tests] include experimental tests" +- print "\t [--container-tests] include pacemaker_remote tests that run in lxc container resources" +- print "\t [--oprofile 'node list'] list of cluster nodes to run oprofile on]" +- print "\t [--qarsh] use the QARSH backdoor to access nodes instead of SSH" +- print "\t [--docker] Indicates nodes are docker nodes." +- print "\t [--seed random_seed]" +- print "\t [--set option=value]" +- print "\t " +- print "\t Example: " +- print "\t python sys.argv[0] -g virt1 --stack cs -r --stonith ssh --schema pacemaker-1.0 500" ++ print("Illegal argument %s" % arg) ++ print("usage: " + sys.argv[0] +" [options] number-of-iterations") ++ print("\nCommon options: ") ++ print("\t [--nodes 'node list'] list of cluster nodes separated by whitespace") ++ print("\t [--group | -g 'name'] use the nodes listed in the named DSH group (~/.dsh/groups/$name)") ++ print("\t [--limit-nodes max] only use the first 'max' cluster nodes supplied with --nodes") ++ print("\t [--stack (v0|v1|cman|corosync|heartbeat|openais)] which cluster stack is installed") ++ print("\t [--list-tests] list the valid tests") ++ print("\t [--benchmark] add the timing information") ++ print("\t ") ++ print("Options that CTS will usually auto-detect correctly: ") ++ print("\t [--logfile path] where should the test software look for logs from cluster nodes") ++ print("\t [--syslog-facility name] which syslog facility should the test software log to") ++ print("\t [--at-boot (1|0)] does the cluster software start at boot time") ++ print("\t [--test-ip-base ip] offset for generated IP address resources") ++ print("\t ") ++ print("Options for release testing: ") ++ print("\t [--populate-resources | -r] generate a sample configuration") ++ print("\t [--choose name] run only the named test") ++ print("\t [--stonith (1 | 0 | yes | no | rhcs | ssh)]") ++ print("\t [--once] run all valid tests once") ++ print("\t ") ++ print("Additional (less common) options: ") ++ print("\t [--clobber-cib | -c ] erase any existing configuration") ++ print("\t [--outputfile path] optional location for the test software to write logs to") ++ print("\t [--trunc] truncate logfile before starting") ++ print("\t [--xmit-loss lost-rate(0.0-1.0)]") ++ print("\t [--recv-loss lost-rate(0.0-1.0)]") ++ print("\t [--standby (1 | 0 | yes | no)]") ++ print("\t [--fencing (1 | 0 | yes | no | rhcs | lha | openstack )]") ++ print("\t [--stonith-type type]") ++ print("\t [--stonith-args name=value]") ++ print("\t [--bsc]") ++ print("\t [--no-loop-tests] dont run looping/time-based tests") ++ print("\t [--no-unsafe-tests] dont run tests that are unsafe for use with ocfs2/drbd") ++ print("\t [--valgrind-tests] include tests using valgrind") ++ print("\t [--experimental-tests] include experimental tests") ++ print("\t [--container-tests] include pacemaker_remote tests that run in lxc container resources") ++ print("\t [--oprofile 'node list'] list of cluster nodes to run oprofile on]") ++ print("\t [--qarsh] use the QARSH backdoor to access nodes instead of SSH") ++ print("\t [--docker] Indicates nodes are docker nodes.") ++ print("\t [--seed random_seed]") ++ print("\t [--set option=value]") ++ print("\t ") ++ print("\t Example: ") ++ print("\t python sys.argv[0] -g virt1 --stack cs -r --stonith ssh --schema pacemaker-1.0 500") + + sys.exit(status) + +diff --git a/cts/logging.py b/cts/logging.py +index 8afa611..08da44a 100644 +--- a/cts/logging.py ++++ b/cts/logging.py +@@ -22,7 +22,7 @@ Licensed under the GNU GPL. + # along with this program; if not, write to the Free Software + # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + +-import types, string, sys, time, os ++import string, sys, time, os + + class Logger: + TimeFormat = "%b %d %H:%M:%S\t" +@@ -47,7 +47,7 @@ class StdErrLog(Logger): + + def __call__(self, lines): + t = time.strftime(Logger.TimeFormat, time.localtime(time.time())) +- if isinstance(lines, types.StringType): ++ if isinstance(lines, basestring): + sys.__stderr__.writelines([t, lines, "\n"]) + else: + for line in lines: +@@ -71,7 +71,7 @@ class FileLog(Logger): + fd = open(self.logfile, "a") + t = time.strftime(Logger.TimeFormat, time.localtime(time.time())) + +- if isinstance(lines, types.StringType): ++ if isinstance(lines, basestring): + fd.writelines([t, self.hostname, self.source, lines, "\n"]) + else: + for line in lines: +diff --git a/cts/patterns.py b/cts/patterns.py +index 493b690..3cdce2f 100644 +--- a/cts/patterns.py ++++ b/cts/patterns.py +@@ -67,9 +67,9 @@ class BasePatterns: + } + + def get_component(self, key): +- if self.components.has_key(key): ++ if key in self.components: + return self.components[key] +- print "Unknown component '%s' for %s" % (key, self.name) ++ print("Unknown component '%s' for %s" % (key, self.name)) + return [] + + def get_patterns(self, key): +@@ -87,12 +87,12 @@ class BasePatterns: + def __getitem__(self, key): + if key == "Name": + return self.name +- elif self.commands.has_key(key): ++ elif key in self.commands: + return self.commands[key] +- elif self.search.has_key(key): ++ elif key in self.search: + return self.search[key] + else: +- print "Unknown template '%s' for %s" % (key, self.name) ++ print("Unknown template '%s' for %s" % (key, self.name)) + return None + + class crm_lha(BasePatterns): +@@ -489,9 +489,9 @@ class PatternSelector: + crm_mcp_docker(name) + + def get_variant(self, variant): +- if patternvariants.has_key(variant): ++ if variant in patternvariants: + return patternvariants[variant] +- print "defaulting to crm-base for %s" % variant ++ print("defaulting to crm-base for %s" % variant) + return self.base + + def get_patterns(self, variant, kind): +@@ -532,7 +532,7 @@ if __name__ == '__main__': + template = args[i+1] + + else: +- print "Illegal argument " + args[i] ++ print("Illegal argument " + args[i]) + + +- print PatternSelector(kind)[template] ++ print(PatternSelector(kind)[template]) +diff --git a/cts/remote.py b/cts/remote.py +index b32b028..040b48a 100644 +--- a/cts/remote.py ++++ b/cts/remote.py +@@ -147,7 +147,7 @@ class RemoteExec: + sysname = args[0] + command = args[1] + +- #print "sysname: %s, us: %s" % (sysname, self.OurNode) ++ #print("sysname: %s, us: %s" % (sysname, self.OurNode)) + if sysname == None or string.lower(sysname) == self.OurNode or sysname == "localhost": + ret = command + else: +@@ -164,7 +164,7 @@ class RemoteExec: + self.logger.debug(args) + + def call_async(self, node, command, completionDelegate=None): +- #if completionDelegate: print "Waiting for %d on %s: %s" % (proc.pid, node, command) ++ #if completionDelegate: print("Waiting for %d on %s: %s" % (proc.pid, node, command)) + aproc = AsyncRemoteCmd(node, self._cmd([node, command]), completionDelegate=completionDelegate) + aproc.start() + return aproc +@@ -186,7 +186,7 @@ class RemoteExec: + proc = Popen(self._cmd([node, command]), + stdout = PIPE, stderr = PIPE, close_fds = True, shell = True) + +- #if completionDelegate: print "Waiting for %d on %s: %s" % (proc.pid, node, command) ++ #if completionDelegate: print("Waiting for %d on %s: %s" % (proc.pid, node, command)) + if not synchronous and proc.pid > 0 and not self.silent: + aproc = AsyncWaitProc(proc, node, command, completionDelegate=completionDelegate) + aproc.start() +@@ -257,14 +257,14 @@ class RemoteFactory: + return RemoteExec(RemoteFactory.rsh, silent) + + def enable_docker(self): +- print "Using DOCKER backend for connections to cluster nodes" ++ print("Using DOCKER backend for connections to cluster nodes") + + RemoteFactory.rsh.Command = "/usr/libexec/phd/docker/phd_docker_remote_cmd " + RemoteFactory.rsh.CpCommand = "/usr/libexec/phd/docker/phd_docker_cp" + + def enable_qarsh(self): + # http://nstraz.wordpress.com/2008/12/03/introducing-qarsh/ +- print "Using QARSH for connections to cluster nodes" ++ print("Using QARSH for connections to cluster nodes") + + RemoteFactory.rsh.Command = "qarsh -t 300 -l root" + RemoteFactory.rsh.CpCommand = "qacp -q" +diff --git a/cts/watcher.py b/cts/watcher.py +index 1182c8b..de032f7 100644 +--- a/cts/watcher.py ++++ b/cts/watcher.py +@@ -73,7 +73,7 @@ for i in range(0, len(args)): + skipthis=1 + + if not os.access(filename, os.R_OK): +- print prefix + 'Last read: %d, limit=%d, count=%d - unreadable' % (0, limit, 0) ++ print(prefix + 'Last read: %d, limit=%d, count=%d - unreadable' % (0, limit, 0)) + sys.exit(1) + + logfile=open(filename, 'r') +@@ -85,7 +85,7 @@ if offset != 'EOF': + if newsize >= offset: + logfile.seek(offset) + else: +- print prefix + ('File truncated from %d to %d' % (offset, newsize)) ++ print(prefix + ('File truncated from %d to %d' % (offset, newsize))) + if (newsize*1.05) < offset: + logfile.seek(0) + # else: we probably just lost a few logs after a fencing op +@@ -103,10 +103,10 @@ while True: + line = logfile.readline() + if not line: break + +- print line.strip() ++ print(line.strip()) + count += 1 + +-print prefix + 'Last read: %d, limit=%d, count=%d' % (logfile.tell(), limit, count) ++print(prefix + 'Last read: %d, limit=%d, count=%d' % (logfile.tell(), limit, count)) + logfile.close() + """ + +@@ -158,7 +158,7 @@ class FileObj(SearchObj): + SearchObj.__init__(self, filename, host, name) + + if host is not None: +- if not has_log_watcher.has_key(host): ++ if not host in has_log_watcher: + + global log_watcher + global log_watcher_bin +@@ -381,7 +381,7 @@ class LogWatcher(RemoteExec): + else: + self.file_list.append(FileObj(self.filename)) + +- # print "%s now has %d files" % (self.name, len(self.file_list)) ++ # print("%s now has %d files" % (self.name, len(self.file_list))) + + def __del__(self): + if self.debug_level > 1: self.debug("Destroy") +@@ -406,7 +406,7 @@ class LogWatcher(RemoteExec): + raise ValueError("No sources to read from") + + pending = [] +- #print "%s waiting for %d operations" % (self.name, self.pending) ++ #print("%s waiting for %d operations" % (self.name, self.pending)) + for f in self.file_list: + t = f.harvest_async(self) + if t: +@@ -418,7 +418,7 @@ class LogWatcher(RemoteExec): + self.logger.log("%s: Aborting after 20s waiting for %s logging commands" % (self.name, repr(t))) + return + +- #print "Got %d lines" % len(self.line_cache) ++ #print("Got %d lines" % len(self.line_cache)) + + def end(self): + for f in self.file_list: +diff --git a/doc/Pacemaker_Explained/en-US/Ch-Resources.txt b/doc/Pacemaker_Explained/en-US/Ch-Resources.txt +index 5d5fa33..b0115fb 100644 +--- a/doc/Pacemaker_Explained/en-US/Ch-Resources.txt ++++ b/doc/Pacemaker_Explained/en-US/Ch-Resources.txt +@@ -643,6 +643,16 @@ indexterm:[Action,Property,on-fail] + indexterm:[enabled,Action Property] + indexterm:[Action,Property,enabled] + ++|role ++| ++|This option only makes sense for recurring operations. It restricts ++ the operation to a specific role. The truely paranoid can even ++ specify +role=Stopped+ which allows the cluster to detect an admin ++ that manually started cluster services. ++ Allowed values: +Stopped+, +Started+, +Slave+, +Master+. ++ indexterm:[role,Action Property] ++ indexterm:[Action,Property,role] ++ + |========================================================= + + [[s-operation-defaults]] +diff --git a/fencing/commands.c b/fencing/commands.c +index 0d2d614..bd3b27d 100644 +--- a/fencing/commands.c ++++ b/fencing/commands.c +@@ -124,17 +124,7 @@ static xmlNode *stonith_construct_async_reply(async_command_t * cmd, const char + static gboolean + is_action_required(const char *action, stonith_device_t *device) + { +- if(device == NULL) { +- return FALSE; +- +- } else if (device->required_actions == NULL) { +- return FALSE; +- +- } else if (strstr(device->required_actions, action)) { +- return TRUE; +- } +- +- return FALSE; ++ return device && device->automatic_unfencing && safe_str_eq(action, "on"); + } + + static int +@@ -449,7 +439,6 @@ free_device(gpointer data) + free_xml(device->agent_metadata); + free(device->namespace); + free(device->on_target_actions); +- free(device->required_actions); + free(device->agent); + free(device->id); + free(device); +@@ -713,8 +702,6 @@ read_action_metadata(stonith_device_t *device) + for (lpc = 0; lpc < max; lpc++) { + const char *on_target = NULL; + const char *action = NULL; +- const char *automatic = NULL; +- const char *required = NULL; + xmlNode *match = getXpathResult(xpath, lpc); + + CRM_LOG_ASSERT(match != NULL); +@@ -722,8 +709,6 @@ read_action_metadata(stonith_device_t *device) + + on_target = crm_element_value(match, "on_target"); + action = crm_element_value(match, "name"); +- automatic = crm_element_value(match, "automatic"); +- required = crm_element_value(match, "required"); + + if(safe_str_eq(action, "list")) { + set_bit(device->flags, st_device_supports_list); +@@ -731,17 +716,21 @@ read_action_metadata(stonith_device_t *device) + set_bit(device->flags, st_device_supports_status); + } else if(safe_str_eq(action, "reboot")) { + set_bit(device->flags, st_device_supports_reboot); +- } else if(safe_str_eq(action, "on") && (crm_is_true(automatic))) { +- /* this setting implies required=true for unfencing */ +- required = "true"; ++ } else if (safe_str_eq(action, "on")) { ++ /* "automatic" means the cluster will unfence node when it joins */ ++ const char *automatic = crm_element_value(match, "automatic"); ++ ++ /* "required" is a deprecated synonym for "automatic" */ ++ const char *required = crm_element_value(match, "required"); ++ ++ if (crm_is_true(automatic) || crm_is_true(required)) { ++ device->automatic_unfencing = TRUE; ++ } + } + + if (action && crm_is_true(on_target)) { + device->on_target_actions = add_action(device->on_target_actions, action); + } +- if (action && crm_is_true(required)) { +- device->required_actions = add_action(device->required_actions, action); +- } + } + + freeXpathObject(xpath); +@@ -778,8 +767,7 @@ build_device_from_xml(xmlNode * msg) + + value = crm_element_value(dev, "rsc_provides"); + if (safe_str_eq(value, "unfencing")) { +- /* if this agent requires unfencing, 'on' is considered a required action */ +- device->required_actions = add_action(device->required_actions, "on"); ++ device->automatic_unfencing = TRUE; + } + + if (is_action_required("on", device)) { +@@ -1224,7 +1212,6 @@ stonith_device_action(xmlNode * msg, char **output) + } else if (device) { + cmd = create_async_command(msg); + if (cmd == NULL) { +- free_device(device); + return -EPROTO; + } + +diff --git a/fencing/internal.h b/fencing/internal.h +index 5fb8f9c..0f418ec 100644 +--- a/fencing/internal.h ++++ b/fencing/internal.h +@@ -26,12 +26,13 @@ typedef struct stonith_device_s { + + /*! list of actions that must execute on the target node. Used for unfencing */ + char *on_target_actions; +- char *required_actions; + GListPtr targets; + time_t targets_age; + gboolean has_attr_map; + /* should nodeid parameter for victim be included in agent arguments */ + gboolean include_nodeid; ++ /* whether the cluster should automatically unfence nodes with the device */ ++ gboolean automatic_unfencing; + guint priority; + guint active_pid; + +@@ -59,7 +60,8 @@ typedef struct stonith_device_s { + enum st_remap_phase { + st_phase_requested = 0, + st_phase_off = 1, +- st_phase_on = 2 ++ st_phase_on = 2, ++ st_phase_max = 3 + }; + + typedef struct remote_fencing_op_s { +@@ -128,15 +130,9 @@ typedef struct remote_fencing_op_s { + /*! The current operation phase being executed */ + enum st_remap_phase phase; + +- /* For phase 0 or 1 (requested action or a remapped "off"), required devices +- * will be executed regardless of what topology level is being executed +- * currently. For phase 1 (remapped "on"), required devices will not be +- * attempted, because the cluster will execute them automatically when the +- * node next joins the cluster. +- */ +- /*! Lists of devices marked as required for each phase */ +- GListPtr required_list[3]; +- /*! The device list of all the devices at the current executing topology level. */ ++ /*! Devices with automatic unfencing (always run if "on" requested, never if remapped) */ ++ GListPtr automatic_list; ++ /*! List of all devices at the currently executing topology level */ + GListPtr devices_list; + /*! Current entry in the topology device list */ + GListPtr devices; +diff --git a/fencing/main.c b/fencing/main.c +index 46d7352..c48e12d 100644 +--- a/fencing/main.c ++++ b/fencing/main.c +@@ -553,7 +553,7 @@ remove_fencing_topology(xmlXPathObjectPtr xpathObj) + } + + static void +-register_fencing_topology(xmlXPathObjectPtr xpathObj, gboolean force) ++register_fencing_topology(xmlXPathObjectPtr xpathObj) + { + int max = numXpathResults(xpathObj), lpc = 0; + +@@ -584,7 +584,7 @@ register_fencing_topology(xmlXPathObjectPtr xpathObj, gboolean force) + */ + + static void +-fencing_topology_init(xmlNode * msg) ++fencing_topology_init() + { + xmlXPathObjectPtr xpathObj = NULL; + const char *xpath = "//" XML_TAG_FENCING_LEVEL; +@@ -598,7 +598,7 @@ fencing_topology_init(xmlNode * msg) + + /* Grab everything */ + xpathObj = xpath_search(local_cib, xpath); +- register_fencing_topology(xpathObj, TRUE); ++ register_fencing_topology(xpathObj); + + freeXpathObject(xpathObj); + } +@@ -931,7 +931,7 @@ update_fencing_topology(const char *event, xmlNode * msg) + xpath = "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//" XML_TAG_FENCING_LEVEL; + xpathObj = xpath_search(msg, xpath); + +- register_fencing_topology(xpathObj, FALSE); ++ register_fencing_topology(xpathObj); + freeXpathObject(xpathObj); + + } else if(format == 2) { +@@ -969,7 +969,7 @@ update_fencing_topology(const char *event, xmlNode * msg) + /* Nuclear option, all we have is the path and an id... not enough to remove a specific entry */ + crm_info("Re-initializing fencing topology after %s operation %d.%d.%d for %s", + op, add[0], add[1], add[2], xpath); +- fencing_topology_init(NULL); ++ fencing_topology_init(); + return; + } + +@@ -977,7 +977,7 @@ update_fencing_topology(const char *event, xmlNode * msg) + /* Change to the topology in general */ + crm_info("Re-initializing fencing topology after top-level %s operation %d.%d.%d for %s", + op, add[0], add[1], add[2], xpath); +- fencing_topology_init(NULL); ++ fencing_topology_init(); + return; + + } else if (strstr(xpath, "/" XML_CIB_TAG_CONFIGURATION)) { +@@ -989,7 +989,7 @@ update_fencing_topology(const char *event, xmlNode * msg) + } else if(strcmp(op, "delete") == 0 || strcmp(op, "create") == 0) { + crm_info("Re-initializing fencing topology after top-level %s operation %d.%d.%d for %s.", + op, add[0], add[1], add[2], xpath); +- fencing_topology_init(NULL); ++ fencing_topology_init(); + return; + } + +@@ -1098,7 +1098,7 @@ update_cib_cache_cb(const char *event, xmlNode * msg) + } else if (stonith_enabled_saved == FALSE) { + crm_info("Updating stonith device and topology lists now that stonith is enabled"); + stonith_enabled_saved = TRUE; +- fencing_topology_init(NULL); ++ fencing_topology_init(); + cib_devices_update(); + + } else { +@@ -1114,7 +1114,7 @@ init_cib_cache_cb(xmlNode * msg, int call_id, int rc, xmlNode * output, void *us + have_cib_devices = TRUE; + local_cib = copy_xml(output); + +- fencing_topology_init(msg); ++ fencing_topology_init(); + cib_devices_update(); + } + +@@ -1239,7 +1239,7 @@ st_peer_update_callback(enum crm_status_type type, crm_node_t * node, const void + * This is a hack until we can send to a nodeid and/or we fix node name lookups + * These messages are ignored in stonith_peer_callback() + */ +- xmlNode *query = query = create_xml_node(NULL, "stonith_command"); ++ xmlNode *query = create_xml_node(NULL, "stonith_command"); + + crm_xml_add(query, F_XML_TAGNAME, "stonith_command"); + crm_xml_add(query, F_TYPE, T_STONITH_NG); +diff --git a/fencing/remote.c b/fencing/remote.c +index 2c00b5f..d741672 100644 +--- a/fencing/remote.c ++++ b/fencing/remote.c +@@ -60,13 +60,13 @@ typedef struct device_properties_s { + /* The remaining members are indexed by the operation's "phase" */ + + /* Whether this device has been executed in each phase */ +- gboolean executed[3]; ++ gboolean executed[st_phase_max]; + /* Whether this device is disallowed from executing in each phase */ +- gboolean disallowed[3]; ++ gboolean disallowed[st_phase_max]; + /* Action-specific timeout for each phase */ +- int custom_action_timeout[3]; ++ int custom_action_timeout[st_phase_max]; + /* Action-specific maximum random delay for each phase */ +- int delay_max[3]; ++ int delay_max[st_phase_max]; + } device_properties_t; + + typedef struct st_query_result_s { +@@ -207,22 +207,6 @@ grab_peer_device(const remote_fencing_op_t *op, st_query_result_t *peer, + return TRUE; + } + +-/* +- * \internal +- * \brief Free the list of required devices for a particular phase +- * +- * \param[in,out] op Operation to modify +- * \param[in] phase Phase to modify +- */ +-static void +-free_required_list(remote_fencing_op_t *op, enum st_remap_phase phase) +-{ +- if (op->required_list[phase]) { +- g_list_free_full(op->required_list[phase], free); +- op->required_list[phase] = NULL; +- } +-} +- + static void + clear_remote_op_timers(remote_fencing_op_t * op) + { +@@ -268,9 +252,7 @@ free_remote_op(gpointer data) + g_list_free_full(op->devices_list, free); + op->devices_list = NULL; + } +- free_required_list(op, st_phase_requested); +- free_required_list(op, st_phase_off); +- free_required_list(op, st_phase_on); ++ g_list_free_full(op->automatic_list, free); + free(op); + } + +@@ -323,10 +305,10 @@ op_phase_on(remote_fencing_op_t *op) + op->phase = st_phase_on; + strcpy(op->action, "on"); + +- /* Any devices that are required for "on" will be automatically executed by +- * the cluster when the node next joins, so we skip them here. ++ /* Skip devices with automatic unfencing, because the cluster will handle it ++ * when the node rejoins. + */ +- for (iter = op->required_list[op->phase]; iter != NULL; iter = iter->next) { ++ for (iter = op->automatic_list; iter != NULL; iter = iter->next) { + GListPtr match = g_list_find_custom(op->devices_list, iter->data, + sort_strings); + +@@ -334,12 +316,8 @@ op_phase_on(remote_fencing_op_t *op) + op->devices_list = g_list_remove(op->devices_list, match->data); + } + } +- +- /* We know this level will succeed, because phase 1 completed successfully +- * and we ignore any errors from phase 2. So we can free the required list, +- * which will keep them from being executed after the device list is done. +- */ +- free_required_list(op, op->phase); ++ g_list_free_full(op->automatic_list, free); ++ op->automatic_list = NULL; + + /* Rewind device list pointer */ + op->devices = op->devices_list; +@@ -659,28 +637,25 @@ topology_is_empty(stonith_topology_t *tp) + + /* + * \internal +- * \brief Add a device to the required list for a particular phase ++ * \brief Add a device to an operation's automatic unfencing list + * + * \param[in,out] op Operation to modify +- * \param[in] phase Phase to modify + * \param[in] device Device ID to add + */ + static void +-add_required_device(remote_fencing_op_t *op, enum st_remap_phase phase, +- const char *device) ++add_required_device(remote_fencing_op_t *op, const char *device) + { +- GListPtr match = g_list_find_custom(op->required_list[phase], device, ++ GListPtr match = g_list_find_custom(op->automatic_list, device, + sort_strings); + + if (!match) { +- op->required_list[phase] = g_list_prepend(op->required_list[phase], +- strdup(device)); ++ op->automatic_list = g_list_prepend(op->automatic_list, strdup(device)); + } + } + + /* + * \internal +- * \brief Remove a device from the required list for the current phase ++ * \brief Remove a device from the automatic unfencing list + * + * \param[in,out] op Operation to modify + * \param[in] device Device ID to remove +@@ -688,12 +663,11 @@ add_required_device(remote_fencing_op_t *op, enum st_remap_phase phase, + static void + remove_required_device(remote_fencing_op_t *op, const char *device) + { +- GListPtr match = g_list_find_custom(op->required_list[op->phase], device, ++ GListPtr match = g_list_find_custom(op->automatic_list, device, + sort_strings); + + if (match) { +- op->required_list[op->phase] = g_list_remove(op->required_list[op->phase], +- match->data); ++ op->automatic_list = g_list_remove(op->automatic_list, match->data); + } + } + +@@ -938,7 +912,7 @@ create_remote_stonith_op(const char *client, xmlNode * request, gboolean peer) + + op = calloc(1, sizeof(remote_fencing_op_t)); + +- crm_element_value_int(request, F_STONITH_TIMEOUT, (int *)&(op->base_timeout)); ++ crm_element_value_int(request, F_STONITH_TIMEOUT, &(op->base_timeout)); + + if (peer && dev) { + op->id = crm_element_value_copy(dev, F_STONITH_REMOTE_OP_ID); +@@ -974,7 +948,7 @@ create_remote_stonith_op(const char *client, xmlNode * request, gboolean peer) + crm_element_value_int(request, F_STONITH_CALLOPTS, &call_options); + op->call_options = call_options; + +- crm_element_value_int(request, F_STONITH_CALLID, (int *)&(op->client_callid)); ++ crm_element_value_int(request, F_STONITH_CALLID, &(op->client_callid)); + + crm_trace("%s new stonith op: %s - %s of %s for %s", + (peer +@@ -1352,14 +1326,17 @@ advance_op_topology(remote_fencing_op_t *op, const char *device, xmlNode *msg, + op->devices = op->devices->next; + } + +- /* If this device was required, it's not anymore */ +- remove_required_device(op, device); ++ /* Handle automatic unfencing if an "on" action was requested */ ++ if ((op->phase == st_phase_requested) && safe_str_eq(op->action, "on")) { ++ /* If the device we just executed was required, it's not anymore */ ++ remove_required_device(op, device); + +- /* If there are no more devices at this topology level, +- * run through any required devices not already executed +- */ +- if (op->devices == NULL) { +- op->devices = op->required_list[op->phase]; ++ /* If there are no more devices at this topology level, run through any ++ * remaining devices with automatic unfencing ++ */ ++ if (op->devices == NULL) { ++ op->devices = op->automatic_list; ++ } + } + + if ((op->devices == NULL) && (op->phase == st_phase_off)) { +@@ -1613,8 +1590,6 @@ parse_action_specific(xmlNode *xml, const char *peer, const char *device, + const char *action, remote_fencing_op_t *op, + enum st_remap_phase phase, device_properties_t *props) + { +- int required; +- + props->custom_action_timeout[phase] = 0; + crm_element_value_int(xml, F_STONITH_ACTION_TIMEOUT, + &props->custom_action_timeout[phase]); +@@ -1630,20 +1605,16 @@ parse_action_specific(xmlNode *xml, const char *peer, const char *device, + peer, device, props->delay_max[phase], action); + } + +- required = 0; +- crm_element_value_int(xml, F_STONITH_DEVICE_REQUIRED, &required); +- if (required) { +- /* If the action is marked as required, add the device to the +- * operation's list of required devices for this phase. We use this +- * for unfencing when executing a topology. In phase 0 (requested +- * action) or phase 1 (remapped "off"), required devices get executed +- * regardless of their topology level; in phase 2 (remapped "on"), +- * required devices are not attempted, because the cluster will +- * execute them automatically later. +- */ +- crm_trace("Peer %s requires device %s to execute for action %s", +- peer, device, action); +- add_required_device(op, phase, device); ++ /* Handle devices with automatic unfencing */ ++ if (safe_str_eq(action, "on")) { ++ int required = 0; ++ ++ crm_element_value_int(xml, F_STONITH_DEVICE_REQUIRED, &required); ++ if (required) { ++ crm_trace("Peer %s requires device %s to execute for action %s", ++ peer, device, action); ++ add_required_device(op, device); ++ } + } + + /* If a reboot is remapped to off+on, it's possible that a node is allowed +diff --git a/include/crm/cib.h b/include/crm/cib.h +index cb465bf..306706e 100644 +--- a/include/crm/cib.h ++++ b/include/crm/cib.h +@@ -136,6 +136,13 @@ typedef struct cib_api_operations_s { + void *user_data, const char *callback_name, + void (*callback) (xmlNode *, int, int, xmlNode *, void *)); + ++ gboolean (*register_callback_full)(cib_t *cib, int call_id, int timeout, ++ gboolean only_success, void *user_data, ++ const char *callback_name, ++ void (*callback)(xmlNode *, int, int, ++ xmlNode *, void *), ++ void (*free_func)(void *)); ++ + } cib_api_operations_t; + + struct cib_s { +diff --git a/include/crm/cib/internal.h b/include/crm/cib/internal.h +index 431a2bd..adc2faf 100644 +--- a/include/crm/cib/internal.h ++++ b/include/crm/cib/internal.h +@@ -106,7 +106,7 @@ typedef struct cib_callback_client_s { + void *user_data; + gboolean only_success; + struct timer_rec_s *timer; +- ++ void (*free_func)(void *); + } cib_callback_client_t; + + struct timer_rec_s { +@@ -137,6 +137,13 @@ int cib_native_register_notification(cib_t * cib, const char *callback, int enab + gboolean cib_client_register_callback(cib_t * cib, int call_id, int timeout, gboolean only_success, + void *user_data, const char *callback_name, + void (*callback) (xmlNode *, int, int, xmlNode *, void *)); ++gboolean cib_client_register_callback_full(cib_t *cib, int call_id, ++ int timeout, gboolean only_success, ++ void *user_data, ++ const char *callback_name, ++ void (*callback)(xmlNode *, int, int, ++ xmlNode *, void *), ++ void (*free_func)(void *)); + + int cib_process_query(const char *op, int options, const char *section, xmlNode * req, + xmlNode * input, xmlNode * existing_cib, xmlNode ** result_cib, +diff --git a/include/crm/common/ipc.h b/include/crm/common/ipc.h +index db83b09..d6ceda2 100644 +--- a/include/crm/common/ipc.h ++++ b/include/crm/common/ipc.h +@@ -75,7 +75,7 @@ long crm_ipc_read(crm_ipc_t * client); + const char *crm_ipc_buffer(crm_ipc_t * client); + uint32_t crm_ipc_buffer_flags(crm_ipc_t * client); + const char *crm_ipc_name(crm_ipc_t * client); +-int crm_ipc_default_buffer_size(void); ++unsigned int crm_ipc_default_buffer_size(void); + + /* Utils */ + xmlNode *create_hello_message(const char *uuid, const char *client_name, +diff --git a/include/crm/common/ipcs.h b/include/crm/common/ipcs.h +index b43fc53..d825912 100644 +--- a/include/crm/common/ipcs.h ++++ b/include/crm/common/ipcs.h +@@ -110,7 +110,7 @@ void crm_ipcs_send_ack(crm_client_t * c, uint32_t request, uint32_t flags, + const char *tag, const char *function, int line); + + /* when max_send_size is 0, default ipc buffer size is used */ +-ssize_t crm_ipc_prepare(uint32_t request, xmlNode * message, struct iovec **result, int32_t max_send_size); ++ssize_t crm_ipc_prepare(uint32_t request, xmlNode * message, struct iovec ** result, uint32_t max_send_size); + ssize_t crm_ipcs_send(crm_client_t * c, uint32_t request, xmlNode * message, enum crm_ipc_flags flags); + ssize_t crm_ipcs_sendv(crm_client_t * c, struct iovec *iov, enum crm_ipc_flags flags); + xmlNode *crm_ipcs_recv(crm_client_t * c, void *data, size_t size, uint32_t * id, uint32_t * flags); +diff --git a/lib/cib/cib_client.c b/lib/cib/cib_client.c +index b13323e..f7a19b8 100644 +--- a/lib/cib/cib_client.c ++++ b/lib/cib/cib_client.c +@@ -198,6 +198,11 @@ cib_destroy_op_callback(gpointer data) + g_source_remove(blob->timer->ref); + } + free(blob->timer); ++ ++ if (blob->user_data && blob->free_func) { ++ blob->free_func(blob->user_data); ++ } ++ + free(blob); + } + +@@ -327,10 +332,15 @@ cib_new(void) + return cib_native_new(); + } + +-/* this is backwards... +- cib_*_new should call this not the other way around ++/* ++ * \internal ++ * \brief Create a generic CIB connection instance ++ * ++ * \return Newly allocated and initialized cib_t instance ++ * ++ * \note This is called by each variant's cib_*_new() function before setting ++ * variant-specific values. + */ +- + cib_t * + cib_new_variant(void) + { +@@ -364,6 +374,7 @@ cib_new_variant(void) + new_cib->cmds->add_notify_callback = cib_client_add_notify_callback; + new_cib->cmds->del_notify_callback = cib_client_del_notify_callback; + new_cib->cmds->register_callback = cib_client_register_callback; ++ new_cib->cmds->register_callback_full = cib_client_register_callback_full; + + new_cib->cmds->noop = cib_client_noop; + new_cib->cmds->ping = cib_client_ping; +@@ -545,6 +556,19 @@ cib_client_register_callback(cib_t * cib, int call_id, int timeout, gboolean onl + void *user_data, const char *callback_name, + void (*callback) (xmlNode *, int, int, xmlNode *, void *)) + { ++ return cib_client_register_callback_full(cib, call_id, timeout, ++ only_success, user_data, ++ callback_name, callback, NULL); ++} ++ ++gboolean ++cib_client_register_callback_full(cib_t *cib, int call_id, int timeout, ++ gboolean only_success, void *user_data, ++ const char *callback_name, ++ void (*callback)(xmlNode *, int, int, ++ xmlNode *, void *), ++ void (*free_func)(void *)) ++{ + cib_callback_client_t *blob = NULL; + + if (call_id < 0) { +@@ -553,6 +577,9 @@ cib_client_register_callback(cib_t * cib, int call_id, int timeout, gboolean onl + } else { + crm_warn("CIB call failed: %s", pcmk_strerror(call_id)); + } ++ if (user_data && free_func) { ++ free_func(user_data); ++ } + return FALSE; + } + +@@ -561,6 +588,7 @@ cib_client_register_callback(cib_t * cib, int call_id, int timeout, gboolean onl + blob->only_success = only_success; + blob->user_data = user_data; + blob->callback = callback; ++ blob->free_func = free_func; + + if (timeout > 0) { + struct timer_rec_s *async_timer = NULL; +diff --git a/lib/cib/cib_utils.c b/lib/cib/cib_utils.c +index d321517..4dc65aa 100644 +--- a/lib/cib/cib_utils.c ++++ b/lib/cib/cib_utils.c +@@ -624,12 +624,6 @@ cib_native_callback(cib_t * cib, xmlNode * msg, int call_id, int rc) + { + xmlNode *output = NULL; + cib_callback_client_t *blob = NULL; +- cib_callback_client_t local_blob; +- +- local_blob.id = NULL; +- local_blob.callback = NULL; +- local_blob.user_data = NULL; +- local_blob.only_success = FALSE; + + if (msg != NULL) { + crm_element_value_int(msg, F_CIB_RC, &rc); +@@ -638,16 +632,8 @@ cib_native_callback(cib_t * cib, xmlNode * msg, int call_id, int rc) + } + + blob = g_hash_table_lookup(cib_op_callback_table, GINT_TO_POINTER(call_id)); +- +- if (blob != NULL) { +- local_blob = *blob; +- blob = NULL; +- +- remove_cib_op_callback(call_id, FALSE); +- +- } else { ++ if (blob == NULL) { + crm_trace("No callback found for call %d", call_id); +- local_blob.callback = NULL; + } + + if (cib == NULL) { +@@ -659,15 +645,20 @@ cib_native_callback(cib_t * cib, xmlNode * msg, int call_id, int rc) + rc = pcmk_ok; + } + +- if (local_blob.callback != NULL && (rc == pcmk_ok || local_blob.only_success == FALSE)) { +- crm_trace("Invoking callback %s for call %d", crm_str(local_blob.id), call_id); +- local_blob.callback(msg, call_id, rc, output, local_blob.user_data); ++ if (blob && blob->callback && (rc == pcmk_ok || blob->only_success == FALSE)) { ++ crm_trace("Invoking callback %s for call %d", crm_str(blob->id), call_id); ++ blob->callback(msg, call_id, rc, output, blob->user_data); + + } else if (cib && cib->op_callback == NULL && rc != pcmk_ok) { + crm_warn("CIB command failed: %s", pcmk_strerror(rc)); + crm_log_xml_debug(msg, "Failed CIB Update"); + } + ++ /* This may free user_data, so do it after the callback */ ++ if (blob) { ++ remove_cib_op_callback(call_id, FALSE); ++ } ++ + if (cib && cib->op_callback != NULL) { + crm_trace("Invoking global callback for call %d", call_id); + cib->op_callback(msg, call_id, rc, output); +diff --git a/lib/cluster/legacy.c b/lib/cluster/legacy.c +index d93613d..e9905f6 100644 +--- a/lib/cluster/legacy.c ++++ b/lib/cluster/legacy.c +@@ -52,6 +52,21 @@ void *ais_ipc_ctx = NULL; + + hdb_handle_t ais_ipc_handle = 0; + ++static bool valid_cman_name(const char *name, uint32_t nodeid) ++{ ++ bool rc = TRUE; ++ ++ /* Yes, %d, because that's what CMAN does */ ++ char *fakename = crm_strdup_printf("Node%d", nodeid); ++ ++ if(crm_str_eq(fakename, name, TRUE)) { ++ rc = FALSE; ++ crm_notice("Ignoring inferred name from cman: %s", fakename); ++ } ++ free(fakename); ++ return rc; ++} ++ + static gboolean + plugin_get_details(uint32_t * id, char **uname) + { +@@ -361,6 +376,7 @@ cman_event_callback(cman_handle_t handle, void *privdata, int reason, int arg) + arg ? "retained" : "still lost"); + } + ++ memset(cman_nodes, 0, MAX_NODES * sizeof(cman_node_t)); + rc = cman_get_nodes(pcmk_cman_handle, MAX_NODES, &node_count, cman_nodes); + if (rc < 0) { + crm_err("Couldn't query cman node list: %d %d", rc, errno); +@@ -369,6 +385,7 @@ cman_event_callback(cman_handle_t handle, void *privdata, int reason, int arg) + + for (lpc = 0; lpc < node_count; lpc++) { + crm_node_t *peer = NULL; ++ const char *name = NULL; + + if (cman_nodes[lpc].cn_nodeid == 0) { + /* Never allow node ID 0 to be considered a member #315711 */ +@@ -376,7 +393,11 @@ cman_event_callback(cman_handle_t handle, void *privdata, int reason, int arg) + continue; + } + +- peer = crm_get_peer(cman_nodes[lpc].cn_nodeid, cman_nodes[lpc].cn_name); ++ if(valid_cman_name(cman_nodes[lpc].cn_name, cman_nodes[lpc].cn_nodeid)) { ++ name = cman_nodes[lpc].cn_name; ++ } ++ ++ peer = crm_get_peer(cman_nodes[lpc].cn_nodeid, name); + if(cman_nodes[lpc].cn_member) { + crm_update_peer_state(__FUNCTION__, peer, CRM_NODE_MEMBER, crm_peer_seq); + +@@ -631,15 +652,17 @@ cman_node_name(uint32_t nodeid) + + cman = cman_init(NULL); + if (cman != NULL && cman_is_active(cman)) { +- us.cn_name[0] = 0; ++ ++ memset(&us, 0, sizeof(cman_node_t)); + cman_get_node(cman, nodeid, &us); +- name = strdup(us.cn_name); +- crm_info("Using CMAN node name %s for %u", name, nodeid); +- } ++ if(valid_cman_name(us.cn_name, nodeid)) { ++ name = strdup(us.cn_name); ++ crm_info("Using CMAN node name %s for %u", name, nodeid); ++ } ++ } + + cman_finish(cman); + # endif +- + if (name == NULL) { + crm_debug("Unable to get node name for nodeid %u", nodeid); + } +@@ -667,7 +690,6 @@ init_cs_connection_once(crm_cluster_t * cluster) + if (cluster_connect_cpg(cluster) == FALSE) { + return FALSE; + } +- cluster->uname = cman_node_name(0 /* CMAN_NODEID_US */ ); + break; + case pcmk_cluster_heartbeat: + crm_info("Could not find an active corosync based cluster"); +diff --git a/lib/common/ipc.c b/lib/common/ipc.c +index d71c54a..f4188ed 100644 +--- a/lib/common/ipc.c ++++ b/lib/common/ipc.c +@@ -46,8 +46,8 @@ struct crm_ipc_response_header { + }; + + static int hdr_offset = 0; +-static int ipc_buffer_max = 0; +-static unsigned int pick_ipc_buffer(int max); ++static unsigned int ipc_buffer_max = 0; ++static unsigned int pick_ipc_buffer(unsigned int max); + + static inline void + crm_ipc_init(void) +@@ -60,7 +60,7 @@ crm_ipc_init(void) + } + } + +-int ++unsigned int + crm_ipc_default_buffer_size(void) + { + return pick_ipc_buffer(0); +@@ -91,7 +91,7 @@ generateReference(const char *custom1, const char *custom2) + since_epoch = calloc(1, reference_len); + + if (since_epoch != NULL) { +- sprintf(since_epoch, "%s-%s-%ld-%u", ++ sprintf(since_epoch, "%s-%s-%lu-%u", + local_cust1, local_cust2, (unsigned long)time(NULL), ref_counter++); + } + +@@ -431,7 +431,7 @@ crm_ipcs_recv(crm_client_t * c, void *data, size_t size, uint32_t * id, uint32_t + unsigned int size_u = 1 + header->size_uncompressed; + uncompressed = calloc(1, size_u); + +- crm_trace("Decompressing message data %d bytes into %d bytes", ++ crm_trace("Decompressing message data %u bytes into %u bytes", + header->size_compressed, size_u); + + rc = BZ2_bzBuffToBuffDecompress(uncompressed, &size_u, text, header->size_compressed, 1, 0); +@@ -531,9 +531,9 @@ crm_ipcs_flush_events(crm_client_t * c) + } + + ssize_t +-crm_ipc_prepare(uint32_t request, xmlNode * message, struct iovec ** result, int32_t max_send_size) ++crm_ipc_prepare(uint32_t request, xmlNode * message, struct iovec ** result, uint32_t max_send_size) + { +- static int biggest = 0; ++ static unsigned int biggest = 0; + struct iovec *iov; + unsigned int total = 0; + char *compressed = NULL; +@@ -579,20 +579,18 @@ crm_ipc_prepare(uint32_t request, xmlNode * message, struct iovec ** result, int + + free(buffer); + +- if (header->size_compressed > biggest) { +- biggest = 2 * QB_MAX(header->size_compressed, biggest); +- } ++ biggest = QB_MAX(header->size_compressed, biggest); + + } else { + ssize_t rc = -EMSGSIZE; + + crm_log_xml_trace(message, "EMSGSIZE"); +- biggest = 2 * QB_MAX(header->size_uncompressed, biggest); ++ biggest = QB_MAX(header->size_uncompressed, biggest); + + crm_err +- ("Could not compress the message into less than the configured ipc limit (%d bytes)." +- "Set PCMK_ipc_buffer to a higher value (%d bytes suggested)", max_send_size, +- biggest); ++ ("Could not compress the message (%u bytes) into less than the configured ipc limit (%u bytes). " ++ "Set PCMK_ipc_buffer to a higher value (%u bytes suggested)", ++ header->size_uncompressed, max_send_size, 4 * biggest); + + free(compressed); + free(buffer); +@@ -656,7 +654,7 @@ crm_ipcs_sendv(crm_client_t * c, struct iovec * iov, enum crm_ipc_flags flags) + + rc = qb_ipcs_response_sendv(c->ipcs, iov, 2); + if (rc < header->qb.size) { +- crm_notice("Response %d to %p[%d] (%d bytes) failed: %s (%d)", ++ crm_notice("Response %d to %p[%d] (%u bytes) failed: %s (%d)", + header->qb.id, c->ipcs, c->pid, header->qb.size, pcmk_strerror(rc), rc); + + } else { +@@ -747,9 +745,9 @@ struct crm_ipc_s { + }; + + static unsigned int +-pick_ipc_buffer(int max) ++pick_ipc_buffer(unsigned int max) + { +- static int global_max = 0; ++ static unsigned int global_max = 0; + + if(global_max == 0) { + const char *env = getenv("PCMK_ipc_buffer"); +@@ -925,7 +923,7 @@ crm_ipc_decompress(crm_ipc_t * client) + unsigned int new_buf_size = QB_MAX((hdr_offset + size_u), client->max_buf_size); + char *uncompressed = calloc(1, new_buf_size); + +- crm_trace("Decompressing message data %d bytes into %d bytes", ++ crm_trace("Decompressing message data %u bytes into %u bytes", + header->size_compressed, size_u); + + rc = BZ2_bzBuffToBuffDecompress(uncompressed + hdr_offset, &size_u, +@@ -986,7 +984,7 @@ crm_ipc_read(crm_ipc_t * client) + return -EBADMSG; + } + +- crm_trace("Received %s event %d, size=%d, rc=%d, text: %.100s", ++ crm_trace("Received %s event %d, size=%u, rc=%d, text: %.100s", + client->name, header->qb.id, header->qb.size, client->msg_size, + client->buffer + hdr_offset); + +@@ -1166,9 +1164,9 @@ crm_ipc_send(crm_ipc_t * client, xmlNode * message, enum crm_ipc_flags flags, in + + if(header->size_compressed) { + if(factor < 10 && (client->max_buf_size / 10) < (rc / factor)) { +- crm_notice("Compressed message exceeds %d0%% of the configured ipc limit (%d bytes), " +- "consider setting PCMK_ipc_buffer to %d or higher", +- factor, client->max_buf_size, 2*client->max_buf_size); ++ crm_notice("Compressed message exceeds %d0%% of the configured ipc limit (%u bytes), " ++ "consider setting PCMK_ipc_buffer to %u or higher", ++ factor, client->max_buf_size, 2 * client->max_buf_size); + factor++; + } + } +@@ -1211,7 +1209,7 @@ crm_ipc_send(crm_ipc_t * client, xmlNode * message, enum crm_ipc_flags flags, in + if (rc > 0) { + struct crm_ipc_response_header *hdr = (struct crm_ipc_response_header *)(void*)client->buffer; + +- crm_trace("Received response %d, size=%d, rc=%ld, text: %.200s", hdr->qb.id, hdr->qb.size, ++ crm_trace("Received response %d, size=%u, rc=%ld, text: %.200s", hdr->qb.id, hdr->qb.size, + rc, crm_ipc_buffer(client)); + + if (reply) { +diff --git a/lib/common/xml.c b/lib/common/xml.c +index 8eed245..299c7bf 100644 +--- a/lib/common/xml.c ++++ b/lib/common/xml.c +@@ -3821,6 +3821,7 @@ crm_xml_dump(xmlNode * data, int options, char **buffer, int *offset, int *max, + if(data == NULL) { + *offset = 0; + *max = 0; ++ return; + } + #if 0 + if (is_not_set(options, xml_log_option_filtered)) { +@@ -5621,7 +5622,7 @@ update_validation(xmlNode ** xml_blob, int *best, int max, gboolean transform, g + break; + + } else if (known_schemas[lpc].transform == NULL) { +- crm_notice("%s-style configuration is also valid for %s", ++ crm_debug("%s-style configuration is also valid for %s", + known_schemas[lpc].name, known_schemas[next].name); + + if (validate_with(xml, next, to_logs)) { +diff --git a/lib/lrmd/lrmd_client.c b/lib/lrmd/lrmd_client.c +index f5e34ee..42bdf2b 100644 +--- a/lib/lrmd/lrmd_client.c ++++ b/lib/lrmd/lrmd_client.c +@@ -1369,7 +1369,7 @@ lrmd_api_disconnect(lrmd_t * lrmd) + { + lrmd_private_t *native = lrmd->private; + +- crm_info("Disconnecting from lrmd service"); ++ crm_info("Disconnecting from %d lrmd service", native->type); + switch (native->type) { + case CRM_CLIENT_IPC: + lrmd_ipc_disconnect(lrmd); +diff --git a/lib/services/dbus.c b/lib/services/dbus.c +index e2efecb..d42affe 100644 +--- a/lib/services/dbus.c ++++ b/lib/services/dbus.c +@@ -329,9 +329,6 @@ pcmk_dbus_lookup_cb(DBusPendingCall *pending, void *user_data) + + pcmk_dbus_lookup_result(reply, user_data); + +- if(pending) { +- dbus_pending_call_unref(pending); +- } + if(reply) { + dbus_message_unref(reply); + } +diff --git a/lib/services/services.c b/lib/services/services.c +index 7e2b9f7..3f40078 100644 +--- a/lib/services/services.c ++++ b/lib/services/services.c +@@ -150,6 +150,7 @@ resources_action_create(const char *name, const char *standard, const char *prov + + op = calloc(1, sizeof(svc_action_t)); + op->opaque = calloc(1, sizeof(svc_action_private_t)); ++ op->opaque->pending = NULL; + op->rsc = strdup(name); + op->action = strdup(action); + op->interval = interval; +@@ -158,6 +159,7 @@ resources_action_create(const char *name, const char *standard, const char *prov + op->agent = strdup(agent); + op->sequence = ++operations; + op->flags = flags; ++ + if (asprintf(&op->id, "%s_%s_%d", name, action, interval) == -1) { + goto return_error; + } +@@ -335,6 +337,7 @@ services_action_create_generic(const char *exec, const char *args[]) + + op->opaque->exec = strdup(exec); + op->opaque->args[0] = strdup(exec); ++ op->opaque->pending = NULL; + + for (cur_arg = 1; args && args[cur_arg - 1]; cur_arg++) { + op->opaque->args[cur_arg] = strdup(args[cur_arg - 1]); +@@ -361,17 +364,17 @@ services_set_op_pending(svc_action_t *op, DBusPendingCall *pending) + { + if (op->opaque->pending && (op->opaque->pending != pending)) { + if (pending) { +- crm_info("Lost pending DBus call (%p)", op->opaque->pending); ++ crm_info("Lost pending %s DBus call (%p)", op->id, op->opaque->pending); + } else { +- crm_trace("Done with pending DBus call (%p)", op->opaque->pending); ++ crm_info("Done with pending %s DBus call (%p)", op->id, op->opaque->pending); + } + dbus_pending_call_unref(op->opaque->pending); + } + op->opaque->pending = pending; + if (pending) { +- crm_trace("Updated pending DBus call (%p)", pending); ++ crm_info("Updated pending %s DBus call (%p)", op->id, pending); + } else { +- crm_trace("Cleared pending DBus call"); ++ crm_info("Cleared pending %s DBus call", op->id); + } + } + #endif +@@ -457,7 +460,7 @@ services_action_free(svc_action_t * op) + gboolean + cancel_recurring_action(svc_action_t * op) + { +- crm_info("Cancelling operation %s", op->id); ++ crm_info("Cancelling %s operation %s", op->standard, op->id); + + if (recurring_actions) { + g_hash_table_remove(recurring_actions, op->id); +diff --git a/lib/services/systemd.c b/lib/services/systemd.c +index e1e1bc9..ca56915 100644 +--- a/lib/services/systemd.c ++++ b/lib/services/systemd.c +@@ -189,16 +189,13 @@ systemd_loadunit_cb(DBusPendingCall *pending, void *user_data) + reply = dbus_pending_call_steal_reply(pending); + } + +- if(op) { +- crm_trace("Got result: %p for %p for %s, %s", reply, pending, op->rsc, op->action); +- } else { +- crm_trace("Got result: %p for %p", reply, pending); +- } ++ crm_trace("Got result: %p for %p / %p for %s", reply, pending, op->opaque->pending, op->id); ++ ++ CRM_LOG_ASSERT(pending == op->opaque->pending); ++ services_set_op_pending(op, NULL); ++ + systemd_loadunit_result(reply, user_data); + +- if(pending) { +- dbus_pending_call_unref(pending); +- } + if(reply) { + dbus_message_unref(reply); + } +@@ -209,6 +206,7 @@ systemd_unit_by_name(const gchar * arg_name, svc_action_t *op) + { + DBusMessage *msg; + DBusMessage *reply = NULL; ++ DBusPendingCall* pending = NULL; + char *name = NULL; + + /* +@@ -249,7 +247,11 @@ systemd_unit_by_name(const gchar * arg_name, svc_action_t *op) + return munit; + } + +- pcmk_dbus_send(msg, systemd_proxy, systemd_loadunit_cb, op, op? op->timeout : DBUS_TIMEOUT_USE_DEFAULT); ++ pending = pcmk_dbus_send(msg, systemd_proxy, systemd_loadunit_cb, op, op->timeout); ++ if(pending) { ++ services_set_op_pending(op, pending); ++ } ++ + dbus_message_unref(msg); + return NULL; + } +@@ -459,23 +461,12 @@ systemd_async_dispatch(DBusPendingCall *pending, void *user_data) + reply = dbus_pending_call_steal_reply(pending); + } + +- if(op) { +- crm_trace("Got result: %p for %p for %s, %s", reply, pending, op->rsc, op->action); +- if (pending == op->opaque->pending) { +- op->opaque->pending = NULL; +- } else { +- crm_info("Received unexpected reply for pending DBus call (%p vs %p)", +- op->opaque->pending, pending); +- } +- systemd_exec_result(reply, op); ++ crm_trace("Got result: %p for %p for %s, %s", reply, pending, op->rsc, op->action); + +- } else { +- crm_trace("Got result: %p for %p", reply, pending); +- } ++ CRM_LOG_ASSERT(pending == op->opaque->pending); ++ services_set_op_pending(op, NULL); ++ systemd_exec_result(reply, op); + +- if(pending) { +- dbus_pending_call_unref(pending); +- } + if(reply) { + dbus_message_unref(reply); + } +@@ -536,7 +527,6 @@ systemd_unit_exec_with_unit(svc_action_t * op, const char *unit) + free(state); + return op->rc == PCMK_OCF_OK; + } else if (pending) { +- dbus_pending_call_ref(pending); + services_set_op_pending(op, pending); + return TRUE; + } +diff --git a/lib/services/upstart.c b/lib/services/upstart.c +index 31b875b..eb8cfa8 100644 +--- a/lib/services/upstart.c ++++ b/lib/services/upstart.c +@@ -322,10 +322,7 @@ upstart_job_check(const char *name, const char *state, void *userdata) + } + + if (op->synchronous == FALSE) { +- if (op->opaque->pending) { +- dbus_pending_call_unref(op->opaque->pending); +- } +- op->opaque->pending = NULL; ++ services_set_op_pending(op, NULL); + operation_finalize(op); + } + } +@@ -392,6 +389,7 @@ upstart_async_dispatch(DBusPendingCall *pending, void *user_data) + if(pending) { + reply = dbus_pending_call_steal_reply(pending); + } ++ + if(pcmk_dbus_find_error(op->action, pending, reply, &error)) { + + /* ignore "already started" or "not running" errors */ +@@ -419,11 +417,10 @@ upstart_async_dispatch(DBusPendingCall *pending, void *user_data) + } + } + ++ CRM_LOG_ASSERT(pending == op->opaque->pending); ++ services_set_op_pending(op, NULL); + operation_finalize(op); + +- if(pending) { +- dbus_pending_call_unref(pending); +- } + if(reply) { + dbus_message_unref(reply); + } +@@ -483,8 +480,7 @@ upstart_job_exec(svc_action_t * op, gboolean synchronous) + free(state); + return op->rc == PCMK_OCF_OK; + } else if (pending) { +- dbus_pending_call_ref(pending); +- op->opaque->pending = pending; ++ services_set_op_pending(op, pending); + return TRUE; + } + return FALSE; +@@ -527,8 +523,7 @@ upstart_job_exec(svc_action_t * op, gboolean synchronous) + free(job); + + if(pending) { +- dbus_pending_call_ref(pending); +- op->opaque->pending = pending; ++ services_set_op_pending(op, pending); + return TRUE; + } + return FALSE; +diff --git a/lrmd/ipc_proxy.c b/lrmd/ipc_proxy.c +index 72d83c4..9427393 100644 +--- a/lrmd/ipc_proxy.c ++++ b/lrmd/ipc_proxy.c +@@ -165,14 +165,14 @@ ipc_proxy_forward_client(crm_client_t *ipc_proxy, xmlNode *xml) + */ + + if (safe_str_eq(msg_type, "event")) { +- crm_info("Sending event to %s", ipc_client->id); ++ crm_trace("Sending event to %s", ipc_client->id); + rc = crm_ipcs_send(ipc_client, 0, msg, crm_ipc_server_event); + + } else if (safe_str_eq(msg_type, "response")) { + int msg_id = 0; + + crm_element_value_int(xml, F_LRMD_IPC_MSG_ID, &msg_id); +- crm_info("Sending response to %d - %s", ipc_client->request_id, ipc_client->id); ++ crm_trace("Sending response to %d - %s", ipc_client->request_id, ipc_client->id); + rc = crm_ipcs_send(ipc_client, msg_id, msg, FALSE); + + CRM_LOG_ASSERT(msg_id == ipc_client->request_id); +diff --git a/lrmd/pacemaker_remote.service.in b/lrmd/pacemaker_remote.service.in +index 7ec42b4..15e61fb 100644 +--- a/lrmd/pacemaker_remote.service.in ++++ b/lrmd/pacemaker_remote.service.in +@@ -9,7 +9,6 @@ WantedBy=multi-user.target + Type=simple + KillMode=process + NotifyAccess=none +-SysVStartPriority=99 + EnvironmentFile=-/etc/sysconfig/pacemaker + + ExecStart=@sbindir@/pacemaker_remoted +diff --git a/mcp/pacemaker.service.in b/mcp/pacemaker.service.in +index 2ef9454..9b0a824 100644 +--- a/mcp/pacemaker.service.in ++++ b/mcp/pacemaker.service.in +@@ -20,7 +20,6 @@ WantedBy=multi-user.target + Type=simple + KillMode=process + NotifyAccess=main +-SysVStartPriority=99 + EnvironmentFile=-@sysconfdir@/sysconfig/pacemaker + EnvironmentFile=-@sysconfdir@/sysconfig/sbd + SuccessExitStatus=100 +diff --git a/pengine/allocate.c b/pengine/allocate.c +index ec5a18d..c2e56f9 100644 +--- a/pengine/allocate.c ++++ b/pengine/allocate.c +@@ -1495,11 +1495,12 @@ stage6(pe_working_set_t * data_set) + } + } + +- if (last_stonith) { +- order_actions(last_stonith, done, pe_order_implies_then); + +- } else if (dc_fence) { ++ if (dc_fence) { + order_actions(dc_down, done, pe_order_implies_then); ++ ++ } else if (last_stonith) { ++ order_actions(last_stonith, done, pe_order_implies_then); + } + + order_actions(done, all_stopped, pe_order_implies_then); +diff --git a/pengine/test10/rec-node-14.dot b/pengine/test10/rec-node-14.dot +index 395fa89..5ceef92 100644 +--- a/pengine/test10/rec-node-14.dot ++++ b/pengine/test10/rec-node-14.dot +@@ -2,9 +2,9 @@ + "all_stopped" [ style=bold color="green" fontcolor="orange" ] + "stonith 'reboot' node1" -> "stonith 'reboot' node3" [ style = bold] + "stonith 'reboot' node1" [ style=bold color="green" fontcolor="black"] ++"stonith 'reboot' node2" -> "stonith_complete" [ style = bold] + "stonith 'reboot' node2" [ style=bold color="green" fontcolor="black"] + "stonith 'reboot' node3" -> "stonith 'reboot' node2" [ style = bold] +-"stonith 'reboot' node3" -> "stonith_complete" [ style = bold] + "stonith 'reboot' node3" [ style=bold color="green" fontcolor="black"] + "stonith_complete" -> "all_stopped" [ style = bold] + "stonith_complete" [ style=bold color="green" fontcolor="orange" ] +diff --git a/pengine/test10/rec-node-14.exp b/pengine/test10/rec-node-14.exp +index 58bb5ca..0e5e163 100644 +--- a/pengine/test10/rec-node-14.exp ++++ b/pengine/test10/rec-node-14.exp +@@ -39,7 +39,7 @@ + + + +- ++ + + + +diff --git a/pengine/test10/stonith-0.dot b/pengine/test10/stonith-0.dot +index 29cdd59..8ad32fd 100644 +--- a/pengine/test10/stonith-0.dot ++++ b/pengine/test10/stonith-0.dot +@@ -71,13 +71,13 @@ digraph "g" { + "stonith 'reboot' c001n03" -> "ocf_192.168.100.181_stop_0 c001n03" [ style = bold] + "stonith 'reboot' c001n03" -> "ocf_192.168.100.183_stop_0 c001n03" [ style = bold] + "stonith 'reboot' c001n03" -> "rsc_c001n07_stop_0 c001n03" [ style = bold] ++"stonith 'reboot' c001n03" -> "stonith_complete" [ style = bold] + "stonith 'reboot' c001n03" [ style=bold color="green" fontcolor="black"] + "stonith 'reboot' c001n05" -> "group-1_stop_0" [ style = bold] + "stonith 'reboot' c001n05" -> "ocf_192.168.100.181_stop_0 c001n05" [ style = bold] + "stonith 'reboot' c001n05" -> "ocf_192.168.100.183_stop_0 c001n05" [ style = bold] + "stonith 'reboot' c001n05" -> "rsc_c001n05_stop_0 c001n05" [ style = bold] + "stonith 'reboot' c001n05" -> "stonith 'reboot' c001n03" [ style = bold] +-"stonith 'reboot' c001n05" -> "stonith_complete" [ style = bold] + "stonith 'reboot' c001n05" [ style=bold color="green" fontcolor="black"] + "stonith_complete" -> "all_stopped" [ style = bold] + "stonith_complete" -> "heartbeat_192.168.100.182_start_0 c001n02" [ style = bold] +diff --git a/pengine/test10/stonith-0.exp b/pengine/test10/stonith-0.exp +index 9d47215..a6695c9 100644 +--- a/pengine/test10/stonith-0.exp ++++ b/pengine/test10/stonith-0.exp +@@ -394,7 +394,7 @@ + + + +- ++ + + + +diff --git a/pengine/test10/systemhealth1.dot b/pengine/test10/systemhealth1.dot +index 28841b7..a29f519 100644 +--- a/pengine/test10/systemhealth1.dot ++++ b/pengine/test10/systemhealth1.dot +@@ -1,8 +1,8 @@ + digraph "g" { + "all_stopped" [ style=bold color="green" fontcolor="orange" ] ++"stonith 'reboot' hs21c" -> "stonith_complete" [ style = bold] + "stonith 'reboot' hs21c" [ style=bold color="green" fontcolor="black"] + "stonith 'reboot' hs21d" -> "stonith 'reboot' hs21c" [ style = bold] +-"stonith 'reboot' hs21d" -> "stonith_complete" [ style = bold] + "stonith 'reboot' hs21d" [ style=bold color="green" fontcolor="black"] + "stonith_complete" -> "all_stopped" [ style = bold] + "stonith_complete" [ style=bold color="green" fontcolor="orange" ] +diff --git a/pengine/test10/systemhealth1.exp b/pengine/test10/systemhealth1.exp +index 80a2329..aa2afe1 100644 +--- a/pengine/test10/systemhealth1.exp ++++ b/pengine/test10/systemhealth1.exp +@@ -27,7 +27,7 @@ + + + +- ++ + + + +diff --git a/pengine/test10/systemhealthm1.dot b/pengine/test10/systemhealthm1.dot +index 28841b7..a29f519 100644 +--- a/pengine/test10/systemhealthm1.dot ++++ b/pengine/test10/systemhealthm1.dot +@@ -1,8 +1,8 @@ + digraph "g" { + "all_stopped" [ style=bold color="green" fontcolor="orange" ] ++"stonith 'reboot' hs21c" -> "stonith_complete" [ style = bold] + "stonith 'reboot' hs21c" [ style=bold color="green" fontcolor="black"] + "stonith 'reboot' hs21d" -> "stonith 'reboot' hs21c" [ style = bold] +-"stonith 'reboot' hs21d" -> "stonith_complete" [ style = bold] + "stonith 'reboot' hs21d" [ style=bold color="green" fontcolor="black"] + "stonith_complete" -> "all_stopped" [ style = bold] + "stonith_complete" [ style=bold color="green" fontcolor="orange" ] +diff --git a/pengine/test10/systemhealthm1.exp b/pengine/test10/systemhealthm1.exp +index 80a2329..aa2afe1 100644 +--- a/pengine/test10/systemhealthm1.exp ++++ b/pengine/test10/systemhealthm1.exp +@@ -27,7 +27,7 @@ + + + +- ++ + + + +diff --git a/pengine/test10/systemhealthn1.dot b/pengine/test10/systemhealthn1.dot +index 28841b7..a29f519 100644 +--- a/pengine/test10/systemhealthn1.dot ++++ b/pengine/test10/systemhealthn1.dot +@@ -1,8 +1,8 @@ + digraph "g" { + "all_stopped" [ style=bold color="green" fontcolor="orange" ] ++"stonith 'reboot' hs21c" -> "stonith_complete" [ style = bold] + "stonith 'reboot' hs21c" [ style=bold color="green" fontcolor="black"] + "stonith 'reboot' hs21d" -> "stonith 'reboot' hs21c" [ style = bold] +-"stonith 'reboot' hs21d" -> "stonith_complete" [ style = bold] + "stonith 'reboot' hs21d" [ style=bold color="green" fontcolor="black"] + "stonith_complete" -> "all_stopped" [ style = bold] + "stonith_complete" [ style=bold color="green" fontcolor="orange" ] +diff --git a/pengine/test10/systemhealthn1.exp b/pengine/test10/systemhealthn1.exp +index 80a2329..aa2afe1 100644 +--- a/pengine/test10/systemhealthn1.exp ++++ b/pengine/test10/systemhealthn1.exp +@@ -27,7 +27,7 @@ + + + +- ++ + + + +diff --git a/pengine/test10/systemhealtho1.dot b/pengine/test10/systemhealtho1.dot +index 28841b7..a29f519 100644 +--- a/pengine/test10/systemhealtho1.dot ++++ b/pengine/test10/systemhealtho1.dot +@@ -1,8 +1,8 @@ + digraph "g" { + "all_stopped" [ style=bold color="green" fontcolor="orange" ] ++"stonith 'reboot' hs21c" -> "stonith_complete" [ style = bold] + "stonith 'reboot' hs21c" [ style=bold color="green" fontcolor="black"] + "stonith 'reboot' hs21d" -> "stonith 'reboot' hs21c" [ style = bold] +-"stonith 'reboot' hs21d" -> "stonith_complete" [ style = bold] + "stonith 'reboot' hs21d" [ style=bold color="green" fontcolor="black"] + "stonith_complete" -> "all_stopped" [ style = bold] + "stonith_complete" [ style=bold color="green" fontcolor="orange" ] +diff --git a/pengine/test10/systemhealtho1.exp b/pengine/test10/systemhealtho1.exp +index 80a2329..aa2afe1 100644 +--- a/pengine/test10/systemhealtho1.exp ++++ b/pengine/test10/systemhealtho1.exp +@@ -27,7 +27,7 @@ + + + +- ++ + + + +diff --git a/pengine/test10/systemhealthp1.dot b/pengine/test10/systemhealthp1.dot +index 28841b7..a29f519 100644 +--- a/pengine/test10/systemhealthp1.dot ++++ b/pengine/test10/systemhealthp1.dot +@@ -1,8 +1,8 @@ + digraph "g" { + "all_stopped" [ style=bold color="green" fontcolor="orange" ] ++"stonith 'reboot' hs21c" -> "stonith_complete" [ style = bold] + "stonith 'reboot' hs21c" [ style=bold color="green" fontcolor="black"] + "stonith 'reboot' hs21d" -> "stonith 'reboot' hs21c" [ style = bold] +-"stonith 'reboot' hs21d" -> "stonith_complete" [ style = bold] + "stonith 'reboot' hs21d" [ style=bold color="green" fontcolor="black"] + "stonith_complete" -> "all_stopped" [ style = bold] + "stonith_complete" [ style=bold color="green" fontcolor="orange" ] +diff --git a/pengine/test10/systemhealthp1.exp b/pengine/test10/systemhealthp1.exp +index 80a2329..aa2afe1 100644 +--- a/pengine/test10/systemhealthp1.exp ++++ b/pengine/test10/systemhealthp1.exp +@@ -27,7 +27,7 @@ + + + +- ++ + + + +diff --git a/tools/1node2heartbeat b/tools/1node2heartbeat +deleted file mode 100755 +index b63a0c8..0000000 +--- a/tools/1node2heartbeat ++++ /dev/null +@@ -1,326 +0,0 @@ +-#!/usr/bin/python +-# +-# Program to determine current list of enabled services for init state 3 +-# and create heartbeat CRM configuration for heartbeat to manage them +-# +-__copyright__=''' +-Author: Alan Robertson +-Copyright (C) 2006 International Business Machines +-''' +- +-# This program is free software; you can redistribute it and/or +-# modify it under the terms of the GNU General Public License +-# as published by the Free Software Foundation; either version 2 +-# of the License, or (at your option) any later version. +-# +-# This program is distributed in the hope that it will be useful, +-# but WITHOUT ANY WARRANTY; without even the implied warranty of +-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +-# GNU General Public License for more details. +-# +-# You should have received a copy of the GNU General Public License +-# along with this program; if not, write to the Free Software +-# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +-import os,re +-# +-# Here's the plan: +-# Find out the default run level +-# Find out what (additional?) services are enabled in that run level +-# Figure out which of them start after the network (or heartbeat?) +-# Ignore heartbeat :-) +-# Figure out which services supply the $services +-# Look to see if the SUSE /etc/insserv.conf file exists +-# If so, then scan it for who provides the $services +-# defined by the LSB +-# If we're on Red Hat, then make some Red Hat type assumptions +-# (whatever those might be) +-# If we're not, then make some generic assumptions... +-# Scan the init scripts for their dependencies... +-# Eliminate anything at or before 'network'. +-# Create resources corresponding to all active services +-# Include monitor actions for those services +-# that can be started after 'network' +-# Add the start-after dependencies +-# +-# Things to consider doing in the future: +-# Constrain them to only run on the local system? +-# Put them all in a convenience group (no colocation, no ordering) +-# Add start and stop timeouts +- +-ServiceKeywords = {} +-ServiceMap = {} +-ProvidesMap = {} +-RequiresMap = {} +-SkipMap = {'heartbeat': None, 'random': None} +-NoMonitor = {'microcode': None} +-PreReqs = ['network'] +-IgnoreList = [] +-sysname = os.uname()[1] +-InitDir = "/etc/init.d" +- +-def service_is_hb_compatible(service): +- scriptname = os.path.join(InitDir, service) +- command=scriptname + " status >/dev/null 2>&1"; +- rc = os.system(command) +- return rc == 0 +- +-def find_ordered_services(dir): +- allscripts = os.listdir(dir) +- allscripts.sort() +- services = [] +- for entry in allscripts: +- matchobj = re.match("S[0-9]+(.*)", entry) +- if not matchobj: +- continue +- service = matchobj.group(1) +- if SkipMap.has_key(service): +- continue +- if service_is_hb_compatible(service): +- services.append(service) +- else: +- IgnoreList.append(service) +- return services +- +- +-def register_services(initdir, services): +- for service in services: +- if not ServiceMap.has_key(service): +- ServiceMap[service] = os.path.join(initdir, service) +- for service in services: +- script_dependency_scan(service, os.path.join(initdir, service), ServiceMap) +- +-# +-# From the LSB version 3.1: "Comment Conventions for Init Scripts" +-# +-### BEGIN INIT INFO +-### END INIT INFO +-# +-# The delimiter lines may contain trailing whitespace, which shall be ignored. +-# All lines inside the block shall begin with a hash character '#' in the +-# first column, so the shell interprets them as comment lines which do not +-# affect operation of the script. The lines shall be of the form: +-# {keyword}: arg1 [arg2...] +-# with exactly one space character between the '#' and the keyword, with a +-# single exception. In lines following a line containing the Description +-# keyword, and until the next keyword or block ending delimiter is seen, +-# a line where the '#' is followed by more than one space or a tab +-# character shall be treated as a continuation of the previous line. +-# +- +-# Make this a class to avoid recompiling it for each script we scan. +-class pats: +- begin=re.compile("###\s+BEGIN\s+INIT\s+INFO") +- end=re.compile("###\s+END\s+INIT\s+INFO") +- desc=re.compile("# Description:\s*(.*)", re.IGNORECASE) +- desc_continue=re.compile("#( +|\t)\s*(.*)") +- keyword=re.compile("# ([^\s:]+):\s*(.*)\s*\Z") +- +-def script_keyword_scan(filename, servicename): +- keywords = {} +- ST_START=0 +- ST_INITINFO=1 +- ST_DESCRIPTION=1 +- description="" +- state=ST_START +- +- try: +- fd = open(filename) +- except IOError: +- return keywords +- +- while 1: +- line = fd.readline() +- if not line: +- break +- +- if state == ST_START: +- if pats.begin.match(line): +- state = ST_INITINFO +- continue +- if pats.end.match(line): +- break +- +- if state == ST_DESCRIPTION: +- match = pats.desc_continue.match(line) +- if match: +- description += ("\n" + match.group(2)) +- continue +- state = ST_INITINFO +- +- match = pats.desc.match(line) +- if match: +- state = ST_DESCRIPTION +- description = match.group(1) +- continue +- +- match = pats.keyword.match(line) +- if match: +- keywords[match.group(1)] = match.group(2) +- +- # Clean up and return +- fd.close() +- if description != "": +- keywords["Description"] = description +- keywords["_PATHNAME_"] = filename +- keywords["_RESOURCENAME_"] = "R_" + sysname + "_" + servicename +- return keywords +- +-def script_dependency_scan(service, script, servicemap): +- keywords=script_keyword_scan(script, service) +- ServiceKeywords[service] = keywords +- +-SysServiceGuesses = { +- '$local_fs': ['boot.localfs'], +- '$network': ['network'], +- '$named': ['named'], +- '$portmap': ['portmap'], +- '$remote_fs': ['nfs'], +- '$syslog': ['syslog'], +- '$netdaemons': ['portmap', 'inetd'], +- '$time': ['ntp'], +-} +- +-# +-# For specific versions of Linux, there are often better ways +-# to do this... +-# +-# (e.g., for SUSE Linux, one should look at /etc/insserv.conf file) +-# +-def map_sys_services(servicemap): +- sysservicemap = {} +- for sysserv in SysServiceGuesses.keys(): +- servlist = SysServiceGuesses[sysserv] +- result = [] +- for service in servlist: +- if servicemap.has_key(service): +- result.append(service) +- +- sysservicemap[sysserv] = result +- return sysservicemap +- +-# +-# +-# +-def create_service_dependencies(servicekeywords, systemservicemap): +- dependencies = {} +- for service in servicekeywords.keys(): +- if not dependencies.has_key(service): +- dependencies[service] = {} +- for key in ('Required-Start', 'Should-Start'): +- if not servicekeywords[service].has_key(key): +- continue +- for depserv in servicekeywords[service][key].split(): +- if systemservicemap.has_key(depserv): +- sysserv = systemservicemap[depserv] +- for serv in sysserv: +- dependencies[service][serv] = None +- else: +- if servicekeywords.has_key(depserv): +- dependencies[service][depserv] = None +- if len(dependencies[service]) == 0: +- del dependencies[service] +- return dependencies +- +-# +-# Modify the service name map to include all the mappings from +-# 'Provides' services to real service script names... +-# +-def map_script_services(sysservmap, servicekeywords): +- for service in servicekeywords.keys(): +- if not servicekeywords[service].has_key('Provides'): +- continue +- for provided in servicekeywords[service]['Provides'].split(): +- if not sysservmap.has_key(provided): +- sysservmap[provided] = [] +- sysservmap[provided].append(service) +- return sysservmap +- +-def create_cib_update(keywords, depmap): +- services = keywords.keys() +- services.sort() +- result = "" +- # Create the XML for the resources +- result += '\n' +- result += '\n' +- result += '\n' +- result += '\n' +- result += '\n' +- groupname="G_" + sysname + "_localinit" +- result += ' \n' +- for service in services: +- rid = keywords[service]["_RESOURCENAME_"] +- monid = "OPmon_" + sysname + '_' + service +- result += \ +- ' \n' + \ +- ' \n' + \ +- ' \n' +- if not NoMonitor.has_key(service): +- result += \ +- ' \n' +- result += \ +- ' \n' \ +- ' \n' +- result += ' \n' +- result += '\n' +- services = depmap.keys() +- services.sort() +- result += '\n' +- for service in services: +- rid = keywords[service]["_RESOURCENAME_"] +- deps = depmap[service].keys() +- deps.sort() +- for dep in deps: +- if not keywords.has_key(dep): +- continue +- depid = keywords[dep]["_RESOURCENAME_"] +- orderid='O_' + sysname + '_' + service + '_' + dep +- result += ' \n' +- loc_id="Loc_" + sysname + "_localinit" +- rule_id="LocRule_" + sysname + "_localinit" +- expr_id="LocExp_" + sysname + "_localinit" +- +- result += ' \n' +- result += ' \n' +- result += ' \n' +- result += ' \n' +- result += ' \n' +- result += '\n' +- result += '\n' +- result += '\n' +- result += '\n' +- return result +- +- +- +-def remove_a_prereq(service, servicemap, keywords, deps): +- if deps.has_key(service): +- parents = deps[service].keys() +- del deps[service] +- else: +- parents = [] +- if servicemap.has_key(service): +- del servicemap[service] +- if keywords.has_key(service): +- del keywords[service] +- for parent in parents: +- if not deps.has_key(parent): +- continue +- remove_a_prereq(parent, servicemap, keywords, deps) +- +- +-def remove_important_prereqs(prereqs, servicemap, keywords, deps): +- # Find everything these important prereqs need and get rid of them... +- for service in prereqs: +- remove_a_prereq(service, servicemap, keywords, deps) +- +-ServiceList = find_ordered_services(os.path.join(InitDir, "rc3.d")) +-register_services(InitDir, ServiceList) +-SysServiceMap = map_sys_services(ServiceMap) +-map_script_services(SysServiceMap, ServiceKeywords) +-ServiceDependencies = create_service_dependencies(ServiceKeywords,SysServiceMap) +-remove_important_prereqs(PreReqs, SysServiceMap, ServiceKeywords, ServiceDependencies) +- +-print create_cib_update(ServiceKeywords, ServiceDependencies) +diff --git a/tools/crm_commands.py.in b/tools/crm_commands.py.in +deleted file mode 100644 +index c48d82c..0000000 +--- a/tools/crm_commands.py.in ++++ /dev/null +@@ -1,132 +0,0 @@ +-# +-# +-# pingd OCF Resource Agent +-# Records (in the CIB) the current number of ping nodes a +-# cluster node can connect to. +-# +-# Copyright (c) 2006 Andrew Beekhof +-# All Rights Reserved. +-# +-# This program is free software; you can redistribute it and/or modify +-# it under the terms of version 2 of the GNU General Public License as +-# published by the Free Software Foundation. +-# +-# This program is distributed in the hope that it would be useful, but +-# WITHOUT ANY WARRANTY; without even the implied warranty of +-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +-# +-# Further, this software is distributed without any warranty that it is +-# free of the rightful claim of any third person regarding infringement +-# or the like. Any license provided herein, whether implied or +-# otherwise, applies only to this software file. Patent licenses, if +-# any, provided herein do not apply to combinations of this program with +-# other software, or any other product whatsoever. +-# +-# You should have received a copy of the GNU General Public License +-# along with this program; if not, write the Free Software Foundation, +-# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. +-# +-####################################################################### +- +-import crm_utils as utl +- +-class HelpRequest(Exception): +- """Exception raised when a help listing is required.""" +- +-class ReparseRequest(Exception): +- """Exception raised when a command changed the command-line.""" +- +-def up(*args, **cmdoptions): +- l = len(utl.topic_stack) +- if l > 1: +- utl.topic_stack.pop() +- utl.set_topic(utl.topic_stack[-1]) +- else: +- utl.log_debug("Already at the top of the stack") +- +-def toggle_flag(*args, **cmdoptions): +- flag = cmdoptions["flag"] +- if utl.global_opts[flag]: +- utl.global_opts[flag] = 0 +- else: +- utl.global_opts[flag] = 1 +- +- return utl.global_opts[flag] +- +-def cd_(*args, **cmdoptions): +- utl.log_dev("args: %s\nopts: %s" % (repr(args), repr(cmdoptions))) +- if not cmdoptions["topic"]: +- utl.log_err("No topic specified") +- return 1 +- +- if cmdoptions["topic"]: +- utl.set_topic(cmdoptions["topic"]) +- if args: +- raise ReparseRequest() +- if utl.crm_topic not in utl.topic_stack: +- utl.topic_stack.append(cmdoptions["topic"]) +- if not utl.global_opts["interactive"]: +- help(cmdoptions["topic"]) +- return 0 +- +-def exit(*args, **cmdoptions): +- sys.exit(0) +- +-def help(*args, **cmdoptions): +- if args: +- raise HelpRequest(args[0]) +- raise HelpRequest(utl.crm_topic) +- +-def debugstate(*args, **cmdoptions): +- utl.log_info("Global Options: ") +- for opt in utl.global_opts.keys(): +- utl.log_info(" * %s:\t%s" % (opt, utl.global_opts[opt])) +- utl.log_info("Stack: "+repr(utl.topic_stack)) +- utl.log_info("Stack Head: "+utl.crm_topic) +- return 0 +- +-def do_list(*args, **cmdoptions): +- topic = utl.crm_topic +- if cmdoptions.has_key("topic") and cmdoptions["topic"]: +- topic = cmdoptions["topic"] +- +- utl.log_debug("Complete '%s' listing" % topic) +- if topic == "resources": +- utl.os_system("crm_resource -l", True) +- elif topic == "nodes": +- lines = utl.os_system("cibadmin -Q -o nodes", False) +- for line in lines: +- if line.find("node ") >= 0: +- print line.rstrip() +- else: +- utl.log_err("%s: Topic %s is not (yet) supported" % ("list", topic)) +- return 1 +- return 0 +- +-def do_status(*args, **cmdoptions): +- topic = utl.crm_topic +- if cmdoptions.has_key("topic") and cmdoptions["topic"]: +- topic = cmdoptions["topic"] +- +- if topic == "resources": +- if not args: +- utl.os_system("crm_resource -L", True) +- for rsc in args: +- utl.os_system("crm_resource -W -r %s"%rsc, True) +- +- elif topic == "nodes": +- lines = utl.os_system("cibadmin -Q -o status", False) +- for line in lines: +- line = line.rstrip() +- utl.log_dev("status line: "+line) +- if line.find("node_state ") >= 0: +- if not args: +- print line +- for node in args: +- if line.find(node) >= 0: +- print line +- else: +- utl.log_err("Topic %s is not (yet) supported" % topic) +- return 1 +- +- return 0 +diff --git a/tools/crm_mon.c b/tools/crm_mon.c +index 0b71275..46a59d6 100644 +--- a/tools/crm_mon.c ++++ b/tools/crm_mon.c +@@ -2715,6 +2715,7 @@ print_status(pe_working_set_t * data_set) + } else { + online_nodes = add_list_element(online_nodes, node_name); + } ++ free(node_name); + continue; + } + } else { +@@ -2727,6 +2728,7 @@ print_status(pe_working_set_t * data_set) + } else { + offline_nodes = add_list_element(offline_nodes, node_name); + } ++ free(node_name); + continue; + } + } +@@ -3078,6 +3080,7 @@ print_html_status(pe_working_set_t * data_set, const char *filename) + fprintf(stream, "\n"); + } + fprintf(stream, "\n"); ++ free(node_name); + } + fprintf(stream, "\n"); + +diff --git a/tools/crm_node.c b/tools/crm_node.c +index c484e17..d0195e3 100644 +--- a/tools/crm_node.c ++++ b/tools/crm_node.c +@@ -470,6 +470,7 @@ try_cman(int command, enum cluster_type_e stack) + + case 'l': + case 'p': ++ memset(cman_nodes, 0, MAX_NODES * sizeof(cman_node_t)); + rc = cman_get_nodes(cman_handle, MAX_NODES, &node_count, cman_nodes); + if (rc != 0) { + fprintf(stderr, "Couldn't query cman node list: %d %d", rc, errno); +@@ -489,6 +490,7 @@ try_cman(int command, enum cluster_type_e stack) + break; + + case 'i': ++ memset(&node, 0, sizeof(cman_node_t)); + rc = cman_get_node(cman_handle, CMAN_NODEID_US, &node); + if (rc != 0) { + fprintf(stderr, "Couldn't query cman node id: %d %d", rc, errno); +diff --git a/tools/crm_primitive.py.in b/tools/crm_primitive.py.in +deleted file mode 100644 +index cfe0b5c..0000000 +--- a/tools/crm_primitive.py.in ++++ /dev/null +@@ -1,268 +0,0 @@ +-#!@PYTHON@ +- +-'''Create an XML fragment describing a new resource +-''' +- +-__copyright__=''' +-Author: Andrew Beekhof +-Copyright (C) 2005 Andrew Beekhof +-''' +- +-# +-# This program is free software; you can redistribute it and/or +-# modify it under the terms of the GNU General Public License +-# as published by the Free Software Foundation; either version 2 +-# of the License, or (at your option) any later version. +-# +-# This program is distributed in the hope that it will be useful, +-# but WITHOUT ANY WARRANTY; without even the implied warranty of +-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +-# GNU General Public License for more details. +-# +-# You should have received a copy of the GNU General Public License +-# along with this program; if not, write to the Free Software +-# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +- +-import sys,string,os +-import xml.dom.minidom +- +-print_rsc_only = 0 +-rsc_name = None +-rsc_class = None +-rsc_type = None +-rsc_provider = None +-start_timeout = None +-stop_timeout = None +-monitor_interval = None +-monitor_timeout = None +-rsc_options = [] +-rsc_location = [] +-rsc_colocation = [] +- +-def create_cib() : +- doc = xml.dom.minidom.Document() +- cib = doc.createElement("cib") +- doc.appendChild(cib) +- +- configuration = doc.createElement("configuration") +- cib.appendChild(configuration) +- +- #crm_config = doc.createElement("crm_config") +- #configuration.appendChild(crm_config) +- +- resources = doc.createElement("resources") +- configuration.appendChild(resources) +- constraints = doc.createElement("constraints") +- configuration.appendChild(constraints) +- +- return doc, resources, constraints +- +-def cib_resource(doc, id, ra_class, type, provider): +- +- params = None +- +- resource = doc.createElement("primitive") +- +- resource.setAttribute("id", id) +- resource.setAttribute("type", type) +- resource.setAttribute("class", ra_class) +- +- if ra_class == "ocf": +- if not provider: +- provider = "heartbeat" +- resource.setAttribute("provider", provider) +- +- elif ra_class != "lsb" and ra_class != "heartbeat": +- print "Unknown resource class: "+ ra_class +- return None +- +- operations = doc.createElement("operations") +- resource.appendChild(operations) +- +- if monitor_interval != None: +- op = doc.createElement("op") +- operations.appendChild(op) +- op.setAttribute("id", id + "_mon_" + monitor_interval) +- op.setAttribute("name", "monitor") +- op.setAttribute("interval", monitor_interval) +- if monitor_timeout != None: +- op.setAttribute("timeout", monitor_timeout) +- +- if start_timeout != None: +- op = doc.createElement("op") +- operations.appendChild(op) +- op.setAttribute("id", id + "_start") +- op.setAttribute("name", "start") +- op.setAttribute("timeout", start_timeout) +- +- if stop_timeout != None: +- op = doc.createElement("op") +- operations.appendChild(op) +- op.setAttribute("id", id + "_stop") +- op.setAttribute("name", "stop") +- op.setAttribute("timeout", stop_timeout) +- +- instance_attributes = doc.createElement("instance_attributes") +- instance_attributes.setAttribute("id", id) +- resource.appendChild(instance_attributes) +- attributes = doc.createElement("attributes") +- instance_attributes.appendChild(attributes) +- for i in range(0,len(rsc_options)) : +- if rsc_options[i] == None : +- continue +- +- param = string.split(rsc_options[i], "=") +- nvpair = doc.createElement("nvpair") +- nvpair.setAttribute("id", id + "_" + param[0]) +- nvpair.setAttribute("name", param[0]) +- nvpair.setAttribute("value", param[1]) +- attributes.appendChild(nvpair) +- +- return resource +- +-def cib_rsc_location(doc, id, node, score): +- rule = doc.createElement("rule") +- rule.setAttribute("id", id+"_prefer_"+node+"_rule") +- rule.setAttribute("score", score) +- expression = doc.createElement("expression") +- expression.setAttribute("id",id+"_prefer_"+node+"_expr") +- expression.setAttribute("attribute","#uname") +- expression.setAttribute("operation","eq") +- expression.setAttribute("value", node) +- rule.appendChild(expression) +- return rule +- +-def cib_rsc_colocation(doc, id, other_resource, score): +- rsc_colocation = doc.createElement("rsc_colocation") +- rsc_colocation.setAttribute("id", id+"_colocate_with_"+other_resource) +- rsc_colocation.setAttribute("from", id) +- rsc_colocation.setAttribute("to", other_resource) +- rsc_colocation.setAttribute("score", score) +- return rsc_colocation +- +-def print_usage(): +- print "usage: " \ +- + sys.argv[0] \ +- + " --name "\ +- + " --class "\ +- + " --type "\ +- + " [--provider ]"\ +- + "\n\t"\ +- + " [--start-timeout ]"\ +- + " [--stop-timeout ]"\ +- + " [--monitor ]"\ +- + " [--monitor-timeout ]"\ +- + "\n\t"\ +- + " [--rsc-option name=value]*"\ +- + " [--rsc-location uname=score]*"\ +- + " [--rsc-colocation resource=score]*" +- print "Example:\n\t" + sys.argv[0] \ +- + " --name cluster_ip_1 --type IPaddr --provider heartbeat --class ocf "\ +- + "--rsc-option ip=192.168.1.101 --rsc-location node1=500 | cibadmin -C -p" +- sys.exit(1) +- +-if __name__=="__main__" : +- +- # Process arguments... +- skipthis = None +- args = sys.argv[1:] +- if len(args) == 0: +- print_usage() +- +- for i in range(0, len(args)) : +- if skipthis : +- skipthis = None +- continue +- +- elif args[i] == "--name" : +- skipthis = True +- rsc_name = args[i+1] +- +- elif args[i] == "--class" : +- skipthis = True +- rsc_class = args[i+1] +- +- elif args[i] == "--type" : +- skipthis = True +- rsc_type = args[i+1] +- +- elif args[i] == "--provider" : +- skipthis = True +- rsc_provider = args[i+1] +- +- elif args[i] == "--start-timeout" : +- skipthis = True +- start_timeout = args[i+1] +- +- elif args[i] == "--stop-timeout" : +- skipthis = True +- stop_timeout = args[i+1] +- +- elif args[i] == "--monitor" : +- skipthis = True +- monitor_interval = args[i+1] +- +- elif args[i] == "--monitor-timeout" : +- skipthis = True +- monitor_timeout = args[i+1] +- +- elif args[i] == "--rsc-option" : +- skipthis = True +- params = string.split(args[i+1], "=") +- if params[1] != None: +- rsc_options.append(args[i+1]) +- else: +- print "option '"+args[i+1]+"' must be of the form name=value" +- +- elif args[i] == "--rsc-location" : +- skipthis = True +- params = string.split(args[i+1], "=") +- if params[1] != None: +- rsc_location.append(args[i+1]) +- else: +- print "option '"+args[i+1]+"' must be of the form host=score" +- +- elif args[i] == "--rsc-colocation" : +- skipthis = True +- params = string.split(args[i+1], "=") +- if params[1] != None: +- rsc_colocation.append(args[i+1]) +- else: +- print "option '"+args[i+1]+"' must be of the form resource=score" +- +- elif args[i] == "--rsc-only" : +- print_rsc_only = 1 +- else: +- print "Unknown argument: "+ args[i] +- print_usage() +- +- cib = create_cib() +- pre_line = "" +- id_index = 1 +- resource = cib_resource(cib[0], rsc_name, rsc_class, rsc_type, rsc_provider) +- +- if print_rsc_only: +- print resource.toprettyxml() +- sys.exit(0) +- +- cib[1].appendChild(resource) +- +- if rsc_location != None : +- rsc_loc = cib[0].createElement("rsc_location") +- rsc_loc.setAttribute("id", rsc_name+"_preferences") +- rsc_loc.setAttribute("rsc", rsc_name) +- for i in range(0, len(rsc_location)) : +- param = string.split(rsc_location[i], "=") +- location_rule = cib_rsc_location(cib[0], rsc_name, param[0], param[1]) +- rsc_loc.appendChild(location_rule) +- cib[2].appendChild(rsc_loc) +- +- for i in range(0, len(rsc_colocation)) : +- if rsc_location[i] == None : +- continue +- +- param = string.split(rsc_colocation[i], "=") +- colocation_rule = cib_rsc_colocation(cib[0], rsc_name, param[0], param[1]) +- cib[2].appendChild(colocation_rule) +- +- print cib[0].toprettyxml() +diff --git a/tools/crm_resource.c b/tools/crm_resource.c +index 31136ef..2fce3b7 100644 +--- a/tools/crm_resource.c ++++ b/tools/crm_resource.c +@@ -853,6 +853,7 @@ main(int argc, char **argv) + rc = -ENXIO; + goto bail; + } ++ + rc = cli_resource_print_attribute(rsc_id, prop_name, &data_set); + + } else if (rsc_cmd == 'p') { +@@ -883,6 +884,10 @@ main(int argc, char **argv) + } else if (rsc_cmd == 'C' && rsc_id) { + resource_t *rsc = pe_find_resource(data_set.resources, rsc_id); + ++ if(do_force == FALSE) { ++ rsc = uber_parent(rsc); ++ } ++ + crm_debug("Re-checking the state of %s on %s", rsc_id, host_uname); + if(rsc) { + crmd_replies_needed = 0; +@@ -891,6 +896,11 @@ main(int argc, char **argv) + rc = -ENODEV; + } + ++ if(rc == pcmk_ok && BE_QUIET == FALSE) { ++ /* Now check XML_RSC_ATTR_TARGET_ROLE and XML_RSC_ATTR_MANAGED */ ++ cli_resource_check(cib_conn, rsc); ++ } ++ + if (rc == pcmk_ok) { + start_mainloop(); + } +diff --git a/tools/crm_resource.h b/tools/crm_resource.h +index 49b6138..5a206e0 100644 +--- a/tools/crm_resource.h ++++ b/tools/crm_resource.h +@@ -68,6 +68,7 @@ int cli_resource_print_property(const char *rsc, const char *attr, pe_working_se + int cli_resource_print_operations(const char *rsc_id, const char *host_uname, bool active, pe_working_set_t * data_set); + + /* runtime */ ++void cli_resource_check(cib_t * cib, resource_t *rsc); + int cli_resource_fail(crm_ipc_t * crmd_channel, const char *host_uname, const char *rsc_id, pe_working_set_t * data_set); + int cli_resource_search(const char *rsc, pe_working_set_t * data_set); + int cli_resource_delete(cib_t *cib_conn, crm_ipc_t * crmd_channel, const char *host_uname, resource_t * rsc, pe_working_set_t * data_set); +diff --git a/tools/crm_resource_print.c b/tools/crm_resource_print.c +index 9c3711c..946b9e3 100644 +--- a/tools/crm_resource_print.c ++++ b/tools/crm_resource_print.c +@@ -352,8 +352,11 @@ cli_resource_print_attribute(const char *rsc, const char *attr, pe_working_set_t + + if (safe_str_eq(attr_set_type, XML_TAG_ATTR_SETS)) { + get_rsc_attributes(params, the_rsc, current, data_set); ++ + } else if (safe_str_eq(attr_set_type, XML_TAG_META_SETS)) { ++ /* No need to redirect to the parent */ + get_meta_attributes(params, the_rsc, current, data_set); ++ + } else { + unpack_instance_attributes(data_set->input, the_rsc->xml, XML_TAG_UTILIZATION, NULL, + params, NULL, FALSE, data_set->now); +diff --git a/tools/crm_resource_runtime.c b/tools/crm_resource_runtime.c +index 006ec08..a270cbf 100644 +--- a/tools/crm_resource_runtime.c ++++ b/tools/crm_resource_runtime.c +@@ -198,6 +198,7 @@ cli_resource_update_attribute(const char *rsc_id, const char *attr_set, const ch + int rc = pcmk_ok; + static bool need_init = TRUE; + ++ char *lookup_id = NULL; + char *local_attr_id = NULL; + char *local_attr_set = NULL; + +@@ -212,14 +213,39 @@ cli_resource_update_attribute(const char *rsc_id, const char *attr_set, const ch + } + + if (safe_str_eq(attr_set_type, XML_TAG_ATTR_SETS)) { +- rc = find_resource_attr(cib, XML_ATTR_ID, rsc_id, XML_TAG_META_SETS, attr_set, attr_id, ++ rc = find_resource_attr(cib, XML_ATTR_ID, uber_parent(rsc)->id, XML_TAG_META_SETS, attr_set, attr_id, + attr_name, &local_attr_id); +- if (rc == pcmk_ok) { +- printf("WARNING: There is already a meta attribute called %s (id=%s)\n", attr_name, +- local_attr_id); ++ if(rc == pcmk_ok && do_force == FALSE) { ++ if (BE_QUIET == FALSE) { ++ printf("WARNING: There is already a meta attribute for '%s' called '%s' (id=%s)\n", ++ uber_parent(rsc)->id, attr_name, local_attr_id); ++ printf(" Delete '%s' first or use --force to override\n", local_attr_id); ++ } ++ return -ENOTUNIQ; ++ } ++ ++ } else if(rsc->parent) { ++ ++ switch(rsc->parent->variant) { ++ case pe_group: ++ if (BE_QUIET == FALSE) { ++ printf("Updating '%s' for '%s' will not apply to its peers in '%s'\n", attr_name, rsc_id, rsc->parent->id); ++ } ++ break; ++ case pe_master: ++ case pe_clone: ++ rsc = rsc->parent; ++ if (BE_QUIET == FALSE) { ++ printf("Updating '%s' for '%s'...\n", rsc->id, rsc_id); ++ } ++ break; ++ default: ++ break; + } + } +- rc = find_resource_attr(cib, XML_ATTR_ID, rsc_id, attr_set_type, attr_set, attr_id, attr_name, ++ ++ lookup_id = clone_strip(rsc->id); /* Could be a cloned group! */ ++ rc = find_resource_attr(cib, XML_ATTR_ID, lookup_id, attr_set_type, attr_set, attr_id, attr_name, + &local_attr_id); + + if (rc == pcmk_ok) { +@@ -227,6 +253,7 @@ cli_resource_update_attribute(const char *rsc_id, const char *attr_set, const ch + attr_id = local_attr_id; + + } else if (rc != -ENXIO) { ++ free(lookup_id); + free(local_attr_id); + return rc; + +@@ -250,7 +277,7 @@ cli_resource_update_attribute(const char *rsc_id, const char *attr_set, const ch + free_xml(cib_top); + + if (attr_set == NULL) { +- local_attr_set = crm_concat(rsc_id, attr_set_type, '-'); ++ local_attr_set = crm_concat(lookup_id, attr_set_type, '-'); + attr_set = local_attr_set; + } + if (attr_id == NULL) { +@@ -263,7 +290,7 @@ cli_resource_update_attribute(const char *rsc_id, const char *attr_set, const ch + } + + xml_top = create_xml_node(NULL, tag); +- crm_xml_add(xml_top, XML_ATTR_ID, rsc_id); ++ crm_xml_add(xml_top, XML_ATTR_ID, lookup_id); + + xml_obj = create_xml_node(xml_top, attr_set_type); + crm_xml_add(xml_obj, XML_ATTR_ID, attr_set); +@@ -285,7 +312,15 @@ cli_resource_update_attribute(const char *rsc_id, const char *attr_set, const ch + crm_log_xml_debug(xml_top, "Update"); + + rc = cib->cmds->modify(cib, XML_CIB_TAG_RESOURCES, xml_top, cib_options); ++ if (rc == pcmk_ok && BE_QUIET == FALSE) { ++ printf("Set '%s' option: id=%s%s%s%s%s=%s\n", lookup_id, local_attr_id, ++ attr_set ? " set=" : "", attr_set ? attr_set : "", ++ attr_name ? " name=" : "", attr_name ? attr_name : "", attr_value); ++ } ++ + free_xml(xml_top); ++ ++ free(lookup_id); + free(local_attr_id); + free(local_attr_set); + +@@ -330,6 +365,7 @@ cli_resource_delete_attribute(const char *rsc_id, const char *attr_set, const ch + xmlNode *xml_obj = NULL; + + int rc = pcmk_ok; ++ char *lookup_id = NULL; + char *local_attr_id = NULL; + resource_t *rsc = find_rsc_or_clone(rsc_id, data_set); + +@@ -337,7 +373,29 @@ cli_resource_delete_attribute(const char *rsc_id, const char *attr_set, const ch + return -ENXIO; + } + +- rc = find_resource_attr(cib, XML_ATTR_ID, rsc_id, attr_set_type, attr_set, attr_id, attr_name, ++ if(rsc->parent && safe_str_eq(attr_set_type, XML_TAG_META_SETS)) { ++ ++ switch(rsc->parent->variant) { ++ case pe_group: ++ if (BE_QUIET == FALSE) { ++ printf("Removing '%s' for '%s' will not apply to its peers in '%s'\n", attr_name, rsc_id, rsc->parent->id); ++ } ++ break; ++ case pe_master: ++ case pe_clone: ++ rsc = rsc->parent; ++ if (BE_QUIET == FALSE) { ++ printf("Removing '%s' from '%s' for '%s'...\n", attr_name, rsc->id, rsc_id); ++ } ++ break; ++ default: ++ break; ++ } ++ ++ } ++ ++ lookup_id = clone_strip(rsc->id); ++ rc = find_resource_attr(cib, XML_ATTR_ID, lookup_id, attr_set_type, attr_set, attr_id, attr_name, + &local_attr_id); + + if (rc == -ENXIO) { +@@ -360,8 +418,8 @@ cli_resource_delete_attribute(const char *rsc_id, const char *attr_set, const ch + CRM_ASSERT(cib); + rc = cib->cmds->delete(cib, XML_CIB_TAG_RESOURCES, xml_obj, cib_options); + +- if (rc == pcmk_ok) { +- printf("Deleted %s option: id=%s%s%s%s%s\n", rsc_id, local_attr_id, ++ if (rc == pcmk_ok && BE_QUIET == FALSE) { ++ printf("Deleted '%s' option: id=%s%s%s%s%s\n", lookup_id, local_attr_id, + attr_set ? " set=" : "", attr_set ? attr_set : "", + attr_name ? " name=" : "", attr_name ? attr_name : ""); + } +@@ -493,7 +551,10 @@ cli_resource_delete(cib_t *cib_conn, crm_ipc_t * crmd_channel, const char *host_ + for (lpc = rsc->children; lpc != NULL; lpc = lpc->next) { + resource_t *child = (resource_t *) lpc->data; + +- cli_resource_delete(cib_conn, crmd_channel, host_uname, child, data_set); ++ rc = cli_resource_delete(cib_conn, crmd_channel, host_uname, child, data_set); ++ if(rc != pcmk_ok || is_not_set(rsc->flags, pe_rsc_unique)) { ++ return rc; ++ } + } + return pcmk_ok; + +@@ -514,31 +575,78 @@ cli_resource_delete(cib_t *cib_conn, crm_ipc_t * crmd_channel, const char *host_ + node = pe_find_node(data_set->nodes, host_uname); + + if (node && node->details->rsc_discovery_enabled) { +- printf("Cleaning up %s on %s\n", rsc->id, host_uname); ++ printf("Cleaning up %s on %s", rsc->id, host_uname); + rc = send_lrm_rsc_op(crmd_channel, CRM_OP_LRM_DELETE, host_uname, rsc->id, TRUE, data_set); + } else { + printf("Resource discovery disabled on %s. Unable to delete lrm state.\n", host_uname); ++ rc = -EOPNOTSUPP; + } + + if (rc == pcmk_ok) { + char *attr_name = NULL; +- const char *id = rsc->id; + + if(node && node->details->remote_rsc == NULL && node->details->rsc_discovery_enabled) { + crmd_replies_needed++; + } +- if (rsc->clone_name) { +- id = rsc->clone_name; ++ ++ if(is_not_set(rsc->flags, pe_rsc_unique)) { ++ char *id = clone_strip(rsc->id); ++ attr_name = crm_strdup_printf("fail-count-%s", id); ++ free(id); ++ ++ } else if (rsc->clone_name) { ++ attr_name = crm_strdup_printf("fail-count-%s", rsc->clone_name); ++ ++ } else { ++ attr_name = crm_strdup_printf("fail-count-%s", rsc->id); + } + +- attr_name = crm_concat("fail-count", id, '-'); ++ printf(", removing %s\n", attr_name); + rc = attrd_update_delegate(NULL, 'D', host_uname, attr_name, NULL, XML_CIB_TAG_STATUS, NULL, + NULL, NULL, node ? is_remote_node(node) : FALSE); + free(attr_name); ++ ++ } else if(rc != -EOPNOTSUPP) { ++ printf(" - FAILED\n"); + } ++ + return rc; + } + ++void ++cli_resource_check(cib_t * cib_conn, resource_t *rsc) ++{ ++ ++ char *role_s = NULL; ++ char *managed = NULL; ++ resource_t *parent = uber_parent(rsc); ++ ++ find_resource_attr(cib_conn, XML_ATTR_ID, parent->id, ++ XML_TAG_META_SETS, NULL, NULL, XML_RSC_ATTR_MANAGED, &managed); ++ ++ find_resource_attr(cib_conn, XML_ATTR_ID, parent->id, ++ XML_TAG_META_SETS, NULL, NULL, XML_RSC_ATTR_TARGET_ROLE, &role_s); ++ ++ if(managed == NULL) { ++ managed = strdup("1"); ++ } ++ if(crm_is_true(managed) == FALSE) { ++ printf("\n\t*Resource %s is configured to not be managed by the cluster\n", parent->id); ++ } ++ if(role_s) { ++ enum rsc_role_e role = text2role(role_s); ++ if(role == RSC_ROLE_UNKNOWN) { ++ // Treated as if unset ++ ++ } else if(role == RSC_ROLE_STOPPED) { ++ printf("\n\t* The configuration specifies that '%s' should remain stopped\n", parent->id); ++ ++ } else if(parent->variant > pe_clone && role != RSC_ROLE_MASTER) { ++ printf("\n\t* The configuration specifies that '%s' should not be promoted\n", parent->id); ++ } ++ } ++} ++ + int + cli_resource_fail(crm_ipc_t * crmd_channel, const char *host_uname, + const char *rsc_id, pe_working_set_t * data_set) +diff --git a/tools/crm_simulate.c b/tools/crm_simulate.c +index 0051112..7d0a8eb 100644 +--- a/tools/crm_simulate.c ++++ b/tools/crm_simulate.c +@@ -59,8 +59,11 @@ char *use_date = NULL; + static void + get_date(pe_working_set_t * data_set) + { ++ int value = 0; + time_t original_date = 0; +- crm_element_value_int(data_set->input, "execution-date", (int*)&original_date); ++ ++ crm_element_value_int(data_set->input, "execution-date", &value); ++ original_date = value; + + if (use_date) { + data_set->now = crm_time_new(use_date); +diff --git a/tools/crm_utils.py.in b/tools/crm_utils.py.in +deleted file mode 100644 +index 67d6918..0000000 +--- a/tools/crm_utils.py.in ++++ /dev/null +@@ -1,188 +0,0 @@ +-#!/bin/env python +-# +-# +-# pingd OCF Resource Agent +-# Records (in the CIB) the current number of ping nodes a +-# cluster node can connect to. +-# +-# Copyright (c) 2006 Andrew Beekhof +-# All Rights Reserved. +-# +-# This program is free software; you can redistribute it and/or modify +-# it under the terms of version 2 of the GNU General Public License as +-# published by the Free Software Foundation. +-# +-# This program is distributed in the hope that it would be useful, but +-# WITHOUT ANY WARRANTY; without even the implied warranty of +-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +-# +-# Further, this software is distributed without any warranty that it is +-# free of the rightful claim of any third person regarding infringement +-# or the like. Any license provided herein, whether implied or +-# otherwise, applies only to this software file. Patent licenses, if +-# any, provided herein do not apply to combinations of this program with +-# other software, or any other product whatsoever. +-# +-# You should have received a copy of the GNU General Public License +-# along with this program; if not, write the Free Software Foundation, +-# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. +-# +-####################################################################### +- +-import os +-import sys +-import getopt +-import readline +-import traceback +-from popen2 import Popen3 +- +-crm_topic = "crm" +-topic_stack = [ crm_topic ] +-hist_file = os.environ.get('HOME')+"/.crm_history" +-global_opts = {} +- +-def exit_(code=0): +- if global_opts["interactive"]: +- log_info("Exiting... ") +- try: +- readline.write_history_file(hist_file) +- log_debug("Wrote history to: "+hist_file) +- except: +- log_debug("Couldnt write history to: "+hist_file) +- sys.exit(code) +- +-def log_debug(log): +- if global_opts.has_key("debug") and global_opts["debug"]: +- print log +- +-def log_dev(log): +- if global_opts.has_key("devlog") and global_opts["devlog"]: +- print log +- +-def log_info(log): +- print log +- +-def log_err(log): +- print "ERROR: "+log +- +-def set_topic(name): +- global crm_topic +- if crm_topic != name: +- log_dev("topic: %s->%s" % (crm_topic, name)) +- crm_topic = name +- +-def os_system(cmd, print_raw=False): +- log_debug("Performing command: "+cmd) +- p = Popen3(cmd, None) +- p.tochild.close() +- result = p.fromchild.readlines() +- p.fromchild.close() +- p.wait() +- if print_raw: +- for line in result: +- print line.rstrip() +- return result +- +-# +-# Creates an argv-style array (that preserves quoting) for use in shell-mode +-# +-def create_argv(text): +- args = [] +- word = [] +- index = 0 +- total = len(text) +- +- in_word = False +- in_verbatum = False +- +- while index < total: +- finish_word = False +- append_word = False +- #log_debug("processing: "+text[index]) +- if text[index] == '\\': +- index = index +1 +- append_word = True +- +- elif text[index].isspace(): +- if in_verbatum or in_word: +- append_word = True +- else: +- finish_word = True +- +- elif text[index] == '"': +- if in_verbatum: +- append_word = True +- else: +- finish_word = True +- if in_word: +- in_word = False +- else: +- in_word = True +- +- elif text[index] == '\'': +- finish_word = True +- if in_verbatum: +- in_verbatum = False +- else: +- in_verbatum = True +- else: +- append_word = True +- +- if finish_word: +- if word: +- args.append(''.join(word)) +- word = [] +- elif append_word: +- word.append(text[index]) +- #log_debug("Added %s to word: %s" % (text[index], str(word))) +- +- index = index +1 +- +- if in_verbatum or in_word: +- text="" +- if word: +- text=" after: '%s'"%''.join(word) +- raise QuotingError("Un-matched quoting%s"%text, args) +- +- elif word: +- args.append(''.join(word)) +- +- return args +- +-def init_readline(func): +- readline.set_completer(func) +- readline.parse_and_bind("tab: complete") +- readline.set_history_length(100) +- +- try: +- readline.read_history_file(hist_file) +- except: +- pass +- +-def fancyopts(args, options, state): +- long = [] +- short = '' +- map = {} +- dt = {} +- +- for s, l, d, c in options: +- pl = l.replace('-', '_') +- map['-'+s] = map['--'+l] = pl +- state[pl] = d +- dt[pl] = type(d) +- if not d is None and not callable(d): +- if s: s += ':' +- if l: l += '=' +- if s: short = short + s +- if l: long.append(l) +- +- opts, args = getopt.getopt(args, short, long) +- +- for opt, arg in opts: +- if dt[map[opt]] is type(fancyopts): state[map[opt]](state,map[opt],arg) +- elif dt[map[opt]] is type(1): state[map[opt]] = int(arg) +- elif dt[map[opt]] is type(''): state[map[opt]] = arg +- elif dt[map[opt]] is type([]): state[map[opt]].append(arg) +- elif dt[map[opt]] is type(None): state[map[opt]] = 1 +- +- return args +diff --git a/tools/regression.acls.exp b/tools/regression.acls.exp +index ae6735a..ac7ae0c 100644 +--- a/tools/regression.acls.exp ++++ b/tools/regression.acls.exp +@@ -253,10 +253,10 @@ Error performing operation: Permission denied + =#=#=#= End test: unknownguy: Set stonith-enabled - Permission denied (13) =#=#=#= + * Passed: crm_attribute - unknownguy: Set stonith-enabled + =#=#=#= Begin test: unknownguy: Create a resource =#=#=#= +-__xml_acl_check: Ordinary user unknownguy cannot access the CIB without any defined ACLs +-__xml_acl_check: Ordinary user unknownguy cannot access the CIB without any defined ACLs +-__xml_acl_check: Ordinary user unknownguy cannot access the CIB without any defined ACLs +-__xml_acl_check: Ordinary user unknownguy cannot access the CIB without any defined ACLs ++__xml_acl_check: Ordinary user unknownguy cannot access the CIB without any defined ACLs ++__xml_acl_check: Ordinary user unknownguy cannot access the CIB without any defined ACLs ++__xml_acl_check: Ordinary user unknownguy cannot access the CIB without any defined ACLs ++__xml_acl_check: Ordinary user unknownguy cannot access the CIB without any defined ACLs + Call failed: Permission denied + =#=#=#= End test: unknownguy: Create a resource - Permission denied (13) =#=#=#= + * Passed: cibadmin - unknownguy: Create a resource +@@ -273,8 +273,8 @@ Error performing operation: Permission denied + =#=#=#= End test: l33t-haxor: Set stonith-enabled - Permission denied (13) =#=#=#= + * Passed: crm_attribute - l33t-haxor: Set stonith-enabled + =#=#=#= Begin test: l33t-haxor: Create a resource =#=#=#= +-__xml_acl_check: 400 access denied to /cib/configuration/resources/primitive[@id='dummy']: parent +-__xml_acl_post_process: Cannot add new node primitive at /cib/configuration/resources/primitive[@id='dummy'] ++__xml_acl_check: 400 access denied to /cib/configuration/resources/primitive[@id='dummy']: parent ++__xml_acl_post_process: Cannot add new node primitive at /cib/configuration/resources/primitive[@id='dummy'] + Call failed: Permission denied + =#=#=#= End test: l33t-haxor: Create a resource - Permission denied (13) =#=#=#= + * Passed: cibadmin - l33t-haxor: Create a resource +@@ -323,13 +323,13 @@ Call failed: Permission denied + =#=#=#= End test: niceguy: Query configuration - OK (0) =#=#=#= + * Passed: cibadmin - niceguy: Query configuration + =#=#=#= Begin test: niceguy: Set enable-acl =#=#=#= +-__xml_acl_check: 400 access denied to /cib/configuration/crm_config/cluster_property_set[@id='cib-bootstrap-options']/nvpair[@id='cib-bootstrap-options-enable-acl'][@value]: default ++__xml_acl_check: 400 access denied to /cib/configuration/crm_config/cluster_property_set[@id='cib-bootstrap-options']/nvpair[@id='cib-bootstrap-options-enable-acl'][@value]: default + Error performing operation: Permission denied + Error setting enable-acl=false (section=crm_config, set=): Permission denied + =#=#=#= End test: niceguy: Set enable-acl - Permission denied (13) =#=#=#= + * Passed: crm_attribute - niceguy: Set enable-acl + =#=#=#= Begin test: niceguy: Set stonith-enabled =#=#=#= +-__xml_acl_post_process: Creation of nvpair=cib-bootstrap-options-stonith-enabled is allowed ++__xml_acl_post_process: Creation of nvpair=cib-bootstrap-options-stonith-enabled is allowed + =#=#=#= Current cib after: niceguy: Set stonith-enabled =#=#=#= + + +@@ -376,8 +376,8 @@ __xml_acl_post_process: Creation of nvpair=cib-bootstrap-options-stonith-enable + =#=#=#= End test: niceguy: Set stonith-enabled - OK (0) =#=#=#= + * Passed: crm_attribute - niceguy: Set stonith-enabled + =#=#=#= Begin test: niceguy: Create a resource =#=#=#= +-__xml_acl_check: 400 access denied to /cib/configuration/resources/primitive[@id='dummy']: default +-__xml_acl_post_process: Cannot add new node primitive at /cib/configuration/resources/primitive[@id='dummy'] ++__xml_acl_check: 400 access denied to /cib/configuration/resources/primitive[@id='dummy']: default ++__xml_acl_post_process: Cannot add new node primitive at /cib/configuration/resources/primitive[@id='dummy'] + Call failed: Permission denied + =#=#=#= End test: niceguy: Create a resource - Permission denied (13) =#=#=#= + * Passed: cibadmin - niceguy: Create a resource +@@ -533,10 +533,11 @@ Error performing operation: Permission denied + =#=#=#= End test: l33t-haxor: Remove a resource meta attribute - Permission denied (13) =#=#=#= + * Passed: crm_resource - l33t-haxor: Remove a resource meta attribute + =#=#=#= Begin test: niceguy: Create a resource meta attribute =#=#=#= +-error: unpack_resources: Resource start-up disabled since no STONITH resources have been defined +-error: unpack_resources: Either configure some or disable STONITH with the stonith-enabled option +-error: unpack_resources: NOTE: Clusters with shared data need STONITH to ensure data integrity +-__xml_acl_post_process: Creation of nvpair=dummy-meta_attributes-target-role is allowed ++error: unpack_resources: Resource start-up disabled since no STONITH resources have been defined ++error: unpack_resources: Either configure some or disable STONITH with the stonith-enabled option ++error: unpack_resources: NOTE: Clusters with shared data need STONITH to ensure data integrity ++__xml_acl_post_process: Creation of nvpair=dummy-meta_attributes-target-role is allowed ++Set 'dummy' option: id=dummy-meta_attributes-target-role set=dummy-meta_attributes name=target-role=Stopped + =#=#=#= Current cib after: niceguy: Create a resource meta attribute =#=#=#= + + +@@ -589,9 +590,9 @@ __xml_acl_post_process: Creation of nvpair=dummy-meta_attributes-target-role is + =#=#=#= End test: niceguy: Create a resource meta attribute - OK (0) =#=#=#= + * Passed: crm_resource - niceguy: Create a resource meta attribute + =#=#=#= Begin test: niceguy: Query a resource meta attribute =#=#=#= +-error: unpack_resources: Resource start-up disabled since no STONITH resources have been defined +-error: unpack_resources: Either configure some or disable STONITH with the stonith-enabled option +-error: unpack_resources: NOTE: Clusters with shared data need STONITH to ensure data integrity ++error: unpack_resources: Resource start-up disabled since no STONITH resources have been defined ++error: unpack_resources: Either configure some or disable STONITH with the stonith-enabled option ++error: unpack_resources: NOTE: Clusters with shared data need STONITH to ensure data integrity + Stopped + =#=#=#= Current cib after: niceguy: Query a resource meta attribute =#=#=#= + +@@ -645,10 +646,10 @@ Stopped + =#=#=#= End test: niceguy: Query a resource meta attribute - OK (0) =#=#=#= + * Passed: crm_resource - niceguy: Query a resource meta attribute + =#=#=#= Begin test: niceguy: Remove a resource meta attribute =#=#=#= +-error: unpack_resources: Resource start-up disabled since no STONITH resources have been defined +-error: unpack_resources: Either configure some or disable STONITH with the stonith-enabled option +-error: unpack_resources: NOTE: Clusters with shared data need STONITH to ensure data integrity +-Deleted dummy option: id=dummy-meta_attributes-target-role name=target-role ++error: unpack_resources: Resource start-up disabled since no STONITH resources have been defined ++error: unpack_resources: Either configure some or disable STONITH with the stonith-enabled option ++error: unpack_resources: NOTE: Clusters with shared data need STONITH to ensure data integrity ++Deleted 'dummy' option: id=dummy-meta_attributes-target-role name=target-role + =#=#=#= Current cib after: niceguy: Remove a resource meta attribute =#=#=#= + + +@@ -699,10 +700,11 @@ Deleted dummy option: id=dummy-meta_attributes-target-role name=target-role + =#=#=#= End test: niceguy: Remove a resource meta attribute - OK (0) =#=#=#= + * Passed: crm_resource - niceguy: Remove a resource meta attribute + =#=#=#= Begin test: niceguy: Create a resource meta attribute =#=#=#= +-error: unpack_resources: Resource start-up disabled since no STONITH resources have been defined +-error: unpack_resources: Either configure some or disable STONITH with the stonith-enabled option +-error: unpack_resources: NOTE: Clusters with shared data need STONITH to ensure data integrity +-__xml_acl_post_process: Creation of nvpair=dummy-meta_attributes-target-role is allowed ++error: unpack_resources: Resource start-up disabled since no STONITH resources have been defined ++error: unpack_resources: Either configure some or disable STONITH with the stonith-enabled option ++error: unpack_resources: NOTE: Clusters with shared data need STONITH to ensure data integrity ++__xml_acl_post_process: Creation of nvpair=dummy-meta_attributes-target-role is allowed ++Set 'dummy' option: id=dummy-meta_attributes-target-role set=dummy-meta_attributes name=target-role=Started + =#=#=#= Current cib after: niceguy: Create a resource meta attribute =#=#=#= + + +@@ -804,8 +806,8 @@ __xml_acl_post_process: Creation of nvpair=dummy-meta_attributes-target-role is + + + =#=#=#= Begin test: niceguy: Replace - remove acls =#=#=#= +-__xml_acl_check: 400 access denied to /cib[@epoch]: default +-__xml_acl_check: 400 access denied to /cib/configuration/acls: default ++__xml_acl_check: 400 access denied to /cib[@epoch]: default ++__xml_acl_check: 400 access denied to /cib/configuration/acls: default + Call failed: Permission denied + =#=#=#= End test: niceguy: Replace - remove acls - Permission denied (13) =#=#=#= + * Passed: cibadmin - niceguy: Replace - remove acls +@@ -859,9 +861,9 @@ Call failed: Permission denied + + + =#=#=#= Begin test: niceguy: Replace - create resource =#=#=#= +-__xml_acl_check: 400 access denied to /cib[@epoch]: default +-__xml_acl_check: 400 access denied to /cib/configuration/resources/primitive[@id='dummy2']: default +-__xml_acl_post_process: Cannot add new node primitive at /cib/configuration/resources/primitive[@id='dummy2'] ++__xml_acl_check: 400 access denied to /cib[@epoch]: default ++__xml_acl_check: 400 access denied to /cib/configuration/resources/primitive[@id='dummy2']: default ++__xml_acl_post_process: Cannot add new node primitive at /cib/configuration/resources/primitive[@id='dummy2'] + Call failed: Permission denied + =#=#=#= End test: niceguy: Replace - create resource - Permission denied (13) =#=#=#= + * Passed: cibadmin - niceguy: Replace - create resource +@@ -914,8 +916,8 @@ Call failed: Permission denied + + + =#=#=#= Begin test: niceguy: Replace - modify attribute (deny) =#=#=#= +-__xml_acl_check: 400 access denied to /cib[@epoch]: default +-__xml_acl_check: 400 access denied to /cib/configuration/crm_config/cluster_property_set[@id='cib-bootstrap-options']/nvpair[@id='cib-bootstrap-options-enable-acl'][@value]: default ++__xml_acl_check: 400 access denied to /cib[@epoch]: default ++__xml_acl_check: 400 access denied to /cib/configuration/crm_config/cluster_property_set[@id='cib-bootstrap-options']/nvpair[@id='cib-bootstrap-options-enable-acl'][@value]: default + Call failed: Permission denied + =#=#=#= End test: niceguy: Replace - modify attribute (deny) - Permission denied (13) =#=#=#= + * Passed: cibadmin - niceguy: Replace - modify attribute (deny) +@@ -968,8 +970,8 @@ Call failed: Permission denied + + + =#=#=#= Begin test: niceguy: Replace - delete attribute (deny) =#=#=#= +-__xml_acl_check: 400 access denied to /cib[@epoch]: default +-__xml_acl_check: 400 access denied to /cib/configuration/crm_config/cluster_property_set[@id='cib-bootstrap-options']/nvpair[@id='cib-bootstrap-options-enable-acl']: default ++__xml_acl_check: 400 access denied to /cib[@epoch]: default ++__xml_acl_check: 400 access denied to /cib/configuration/crm_config/cluster_property_set[@id='cib-bootstrap-options']/nvpair[@id='cib-bootstrap-options-enable-acl']: default + Call failed: Permission denied + =#=#=#= End test: niceguy: Replace - delete attribute (deny) - Permission denied (13) =#=#=#= + * Passed: cibadmin - niceguy: Replace - delete attribute (deny) +@@ -1022,8 +1024,8 @@ Call failed: Permission denied + + + =#=#=#= Begin test: niceguy: Replace - create attribute (deny) =#=#=#= +-__xml_acl_check: 400 access denied to /cib[@epoch]: default +-__xml_acl_check: 400 access denied to /cib/configuration/resources/primitive[@id='dummy'][@description]: default ++__xml_acl_check: 400 access denied to /cib[@epoch]: default ++__xml_acl_check: 400 access denied to /cib/configuration/resources/primitive[@id='dummy'][@description]: default + Call failed: Permission denied + =#=#=#= End test: niceguy: Replace - create attribute (deny) - Permission denied (13) =#=#=#= + * Passed: cibadmin - niceguy: Replace - create attribute (deny) +@@ -1180,28 +1182,28 @@ Call failed: Permission denied + + !#!#!#!#! Upgrading to pacemaker-2.0 and retesting !#!#!#!#! + =#=#=#= Begin test: root: Upgrade to pacemaker-2.0 =#=#=#= +-__xml_acl_post_process: Creation of acl_permission=observer-read-1 is allowed +-__xml_acl_post_process: Creation of acl_permission=observer-write-1 is allowed +-__xml_acl_post_process: Creation of acl_permission=observer-write-2 is allowed +-__xml_acl_post_process: Creation of acl_permission=admin-read-1 is allowed +-__xml_acl_post_process: Creation of acl_permission=admin-write-1 is allowed +-__xml_acl_post_process: Creation of acl_target=l33t-haxor is allowed +-__xml_acl_post_process: Creation of role=auto-l33t-haxor is allowed +-__xml_acl_post_process: Creation of acl_role=auto-l33t-haxor is allowed +-__xml_acl_post_process: Creation of acl_permission=crook-nothing is allowed +-__xml_acl_post_process: Creation of acl_target=niceguy is allowed +-__xml_acl_post_process: Creation of role=observer is allowed +-__xml_acl_post_process: Creation of acl_target=bob is allowed +-__xml_acl_post_process: Creation of role=admin is allowed +-__xml_acl_post_process: Creation of acl_target=badidea is allowed +-__xml_acl_post_process: Creation of role=auto-badidea is allowed +-__xml_acl_post_process: Creation of acl_role=auto-badidea is allowed +-__xml_acl_post_process: Creation of acl_permission=badidea-resources is allowed +-__xml_acl_post_process: Creation of acl_target=betteridea is allowed +-__xml_acl_post_process: Creation of role=auto-betteridea is allowed +-__xml_acl_post_process: Creation of acl_role=auto-betteridea is allowed +-__xml_acl_post_process: Creation of acl_permission=betteridea-nothing is allowed +-__xml_acl_post_process: Creation of acl_permission=betteridea-resources is allowed ++__xml_acl_post_process: Creation of acl_permission=observer-read-1 is allowed ++__xml_acl_post_process: Creation of acl_permission=observer-write-1 is allowed ++__xml_acl_post_process: Creation of acl_permission=observer-write-2 is allowed ++__xml_acl_post_process: Creation of acl_permission=admin-read-1 is allowed ++__xml_acl_post_process: Creation of acl_permission=admin-write-1 is allowed ++__xml_acl_post_process: Creation of acl_target=l33t-haxor is allowed ++__xml_acl_post_process: Creation of role=auto-l33t-haxor is allowed ++__xml_acl_post_process: Creation of acl_role=auto-l33t-haxor is allowed ++__xml_acl_post_process: Creation of acl_permission=crook-nothing is allowed ++__xml_acl_post_process: Creation of acl_target=niceguy is allowed ++__xml_acl_post_process: Creation of role=observer is allowed ++__xml_acl_post_process: Creation of acl_target=bob is allowed ++__xml_acl_post_process: Creation of role=admin is allowed ++__xml_acl_post_process: Creation of acl_target=badidea is allowed ++__xml_acl_post_process: Creation of role=auto-badidea is allowed ++__xml_acl_post_process: Creation of acl_role=auto-badidea is allowed ++__xml_acl_post_process: Creation of acl_permission=badidea-resources is allowed ++__xml_acl_post_process: Creation of acl_target=betteridea is allowed ++__xml_acl_post_process: Creation of role=auto-betteridea is allowed ++__xml_acl_post_process: Creation of acl_role=auto-betteridea is allowed ++__xml_acl_post_process: Creation of acl_permission=betteridea-nothing is allowed ++__xml_acl_post_process: Creation of acl_permission=betteridea-resources is allowed + =#=#=#= Current cib after: root: Upgrade to pacemaker-2.0 =#=#=#= + + +@@ -1271,10 +1273,10 @@ Error performing operation: Permission denied + =#=#=#= End test: unknownguy: Set stonith-enabled - Permission denied (13) =#=#=#= + * Passed: crm_attribute - unknownguy: Set stonith-enabled + =#=#=#= Begin test: unknownguy: Create a resource =#=#=#= +-__xml_acl_check: Ordinary user unknownguy cannot access the CIB without any defined ACLs +-__xml_acl_check: Ordinary user unknownguy cannot access the CIB without any defined ACLs +-__xml_acl_check: Ordinary user unknownguy cannot access the CIB without any defined ACLs +-__xml_acl_check: Ordinary user unknownguy cannot access the CIB without any defined ACLs ++__xml_acl_check: Ordinary user unknownguy cannot access the CIB without any defined ACLs ++__xml_acl_check: Ordinary user unknownguy cannot access the CIB without any defined ACLs ++__xml_acl_check: Ordinary user unknownguy cannot access the CIB without any defined ACLs ++__xml_acl_check: Ordinary user unknownguy cannot access the CIB without any defined ACLs + Call failed: Permission denied + =#=#=#= End test: unknownguy: Create a resource - Permission denied (13) =#=#=#= + * Passed: cibadmin - unknownguy: Create a resource +@@ -1291,8 +1293,8 @@ Error performing operation: Permission denied + =#=#=#= End test: l33t-haxor: Set stonith-enabled - Permission denied (13) =#=#=#= + * Passed: crm_attribute - l33t-haxor: Set stonith-enabled + =#=#=#= Begin test: l33t-haxor: Create a resource =#=#=#= +-__xml_acl_check: 400 access denied to /cib/configuration/resources/primitive[@id='dummy']: parent +-__xml_acl_post_process: Cannot add new node primitive at /cib/configuration/resources/primitive[@id='dummy'] ++__xml_acl_check: 400 access denied to /cib/configuration/resources/primitive[@id='dummy']: parent ++__xml_acl_post_process: Cannot add new node primitive at /cib/configuration/resources/primitive[@id='dummy'] + Call failed: Permission denied + =#=#=#= End test: l33t-haxor: Create a resource - Permission denied (13) =#=#=#= + * Passed: cibadmin - l33t-haxor: Create a resource +@@ -1351,7 +1353,7 @@ Call failed: Permission denied + =#=#=#= End test: niceguy: Query configuration - OK (0) =#=#=#= + * Passed: cibadmin - niceguy: Query configuration + =#=#=#= Begin test: niceguy: Set enable-acl =#=#=#= +-__xml_acl_check: 400 access denied to /cib/configuration/crm_config/cluster_property_set[@id='cib-bootstrap-options']/nvpair[@id='cib-bootstrap-options-enable-acl'][@value]: default ++__xml_acl_check: 400 access denied to /cib/configuration/crm_config/cluster_property_set[@id='cib-bootstrap-options']/nvpair[@id='cib-bootstrap-options-enable-acl'][@value]: default + Error performing operation: Permission denied + Error setting enable-acl=false (section=crm_config, set=): Permission denied + =#=#=#= End test: niceguy: Set enable-acl - Permission denied (13) =#=#=#= +@@ -1412,8 +1414,8 @@ Error setting enable-acl=false (section=crm_config, set=): Permission deni + =#=#=#= End test: niceguy: Set stonith-enabled - OK (0) =#=#=#= + * Passed: crm_attribute - niceguy: Set stonith-enabled + =#=#=#= Begin test: niceguy: Create a resource =#=#=#= +-__xml_acl_check: 400 access denied to /cib/configuration/resources/primitive[@id='dummy']: default +-__xml_acl_post_process: Cannot add new node primitive at /cib/configuration/resources/primitive[@id='dummy'] ++__xml_acl_check: 400 access denied to /cib/configuration/resources/primitive[@id='dummy']: default ++__xml_acl_post_process: Cannot add new node primitive at /cib/configuration/resources/primitive[@id='dummy'] + Call failed: Permission denied + =#=#=#= End test: niceguy: Create a resource - Permission denied (13) =#=#=#= + * Passed: cibadmin - niceguy: Create a resource +@@ -1596,10 +1598,11 @@ Error performing operation: Permission denied + =#=#=#= End test: l33t-haxor: Remove a resource meta attribute - Permission denied (13) =#=#=#= + * Passed: crm_resource - l33t-haxor: Remove a resource meta attribute + =#=#=#= Begin test: niceguy: Create a resource meta attribute =#=#=#= +-error: unpack_resources: Resource start-up disabled since no STONITH resources have been defined +-error: unpack_resources: Either configure some or disable STONITH with the stonith-enabled option +-error: unpack_resources: NOTE: Clusters with shared data need STONITH to ensure data integrity +-__xml_acl_post_process: Creation of nvpair=dummy-meta_attributes-target-role is allowed ++error: unpack_resources: Resource start-up disabled since no STONITH resources have been defined ++error: unpack_resources: Either configure some or disable STONITH with the stonith-enabled option ++error: unpack_resources: NOTE: Clusters with shared data need STONITH to ensure data integrity ++__xml_acl_post_process: Creation of nvpair=dummy-meta_attributes-target-role is allowed ++Set 'dummy' option: id=dummy-meta_attributes-target-role set=dummy-meta_attributes name=target-role=Stopped + =#=#=#= Current cib after: niceguy: Create a resource meta attribute =#=#=#= + + +@@ -1661,9 +1664,9 @@ __xml_acl_post_process: Creation of nvpair=dummy-meta_attributes-target-role is + =#=#=#= End test: niceguy: Create a resource meta attribute - OK (0) =#=#=#= + * Passed: crm_resource - niceguy: Create a resource meta attribute + =#=#=#= Begin test: niceguy: Query a resource meta attribute =#=#=#= +-error: unpack_resources: Resource start-up disabled since no STONITH resources have been defined +-error: unpack_resources: Either configure some or disable STONITH with the stonith-enabled option +-error: unpack_resources: NOTE: Clusters with shared data need STONITH to ensure data integrity ++error: unpack_resources: Resource start-up disabled since no STONITH resources have been defined ++error: unpack_resources: Either configure some or disable STONITH with the stonith-enabled option ++error: unpack_resources: NOTE: Clusters with shared data need STONITH to ensure data integrity + Stopped + =#=#=#= Current cib after: niceguy: Query a resource meta attribute =#=#=#= + +@@ -1726,10 +1729,10 @@ Stopped + =#=#=#= End test: niceguy: Query a resource meta attribute - OK (0) =#=#=#= + * Passed: crm_resource - niceguy: Query a resource meta attribute + =#=#=#= Begin test: niceguy: Remove a resource meta attribute =#=#=#= +-error: unpack_resources: Resource start-up disabled since no STONITH resources have been defined +-error: unpack_resources: Either configure some or disable STONITH with the stonith-enabled option +-error: unpack_resources: NOTE: Clusters with shared data need STONITH to ensure data integrity +-Deleted dummy option: id=dummy-meta_attributes-target-role name=target-role ++error: unpack_resources: Resource start-up disabled since no STONITH resources have been defined ++error: unpack_resources: Either configure some or disable STONITH with the stonith-enabled option ++error: unpack_resources: NOTE: Clusters with shared data need STONITH to ensure data integrity ++Deleted 'dummy' option: id=dummy-meta_attributes-target-role name=target-role + =#=#=#= Current cib after: niceguy: Remove a resource meta attribute =#=#=#= + + +@@ -1789,10 +1792,11 @@ Deleted dummy option: id=dummy-meta_attributes-target-role name=target-role + =#=#=#= End test: niceguy: Remove a resource meta attribute - OK (0) =#=#=#= + * Passed: crm_resource - niceguy: Remove a resource meta attribute + =#=#=#= Begin test: niceguy: Create a resource meta attribute =#=#=#= +-error: unpack_resources: Resource start-up disabled since no STONITH resources have been defined +-error: unpack_resources: Either configure some or disable STONITH with the stonith-enabled option +-error: unpack_resources: NOTE: Clusters with shared data need STONITH to ensure data integrity +-__xml_acl_post_process: Creation of nvpair=dummy-meta_attributes-target-role is allowed ++error: unpack_resources: Resource start-up disabled since no STONITH resources have been defined ++error: unpack_resources: Either configure some or disable STONITH with the stonith-enabled option ++error: unpack_resources: NOTE: Clusters with shared data need STONITH to ensure data integrity ++__xml_acl_post_process: Creation of nvpair=dummy-meta_attributes-target-role is allowed ++Set 'dummy' option: id=dummy-meta_attributes-target-role set=dummy-meta_attributes name=target-role=Started + =#=#=#= Current cib after: niceguy: Create a resource meta attribute =#=#=#= + + +@@ -1903,8 +1907,8 @@ __xml_acl_post_process: Creation of nvpair=dummy-meta_attributes-target-role is + + + =#=#=#= Begin test: niceguy: Replace - remove acls =#=#=#= +-__xml_acl_check: 400 access denied to /cib[@epoch]: default +-__xml_acl_check: 400 access denied to /cib/configuration/acls: default ++__xml_acl_check: 400 access denied to /cib[@epoch]: default ++__xml_acl_check: 400 access denied to /cib/configuration/acls: default + Call failed: Permission denied + =#=#=#= End test: niceguy: Replace - remove acls - Permission denied (13) =#=#=#= + * Passed: cibadmin - niceguy: Replace - remove acls +@@ -1967,9 +1971,9 @@ Call failed: Permission denied + + + =#=#=#= Begin test: niceguy: Replace - create resource =#=#=#= +-__xml_acl_check: 400 access denied to /cib[@epoch]: default +-__xml_acl_check: 400 access denied to /cib/configuration/resources/primitive[@id='dummy2']: default +-__xml_acl_post_process: Cannot add new node primitive at /cib/configuration/resources/primitive[@id='dummy2'] ++__xml_acl_check: 400 access denied to /cib[@epoch]: default ++__xml_acl_check: 400 access denied to /cib/configuration/resources/primitive[@id='dummy2']: default ++__xml_acl_post_process: Cannot add new node primitive at /cib/configuration/resources/primitive[@id='dummy2'] + Call failed: Permission denied + =#=#=#= End test: niceguy: Replace - create resource - Permission denied (13) =#=#=#= + * Passed: cibadmin - niceguy: Replace - create resource +@@ -2031,8 +2035,8 @@ Call failed: Permission denied + + + =#=#=#= Begin test: niceguy: Replace - modify attribute (deny) =#=#=#= +-__xml_acl_check: 400 access denied to /cib[@epoch]: default +-__xml_acl_check: 400 access denied to /cib/configuration/crm_config/cluster_property_set[@id='cib-bootstrap-options']/nvpair[@id='cib-bootstrap-options-enable-acl'][@value]: default ++__xml_acl_check: 400 access denied to /cib[@epoch]: default ++__xml_acl_check: 400 access denied to /cib/configuration/crm_config/cluster_property_set[@id='cib-bootstrap-options']/nvpair[@id='cib-bootstrap-options-enable-acl'][@value]: default + Call failed: Permission denied + =#=#=#= End test: niceguy: Replace - modify attribute (deny) - Permission denied (13) =#=#=#= + * Passed: cibadmin - niceguy: Replace - modify attribute (deny) +@@ -2094,8 +2098,8 @@ Call failed: Permission denied + + + =#=#=#= Begin test: niceguy: Replace - delete attribute (deny) =#=#=#= +-__xml_acl_check: 400 access denied to /cib[@epoch]: default +-__xml_acl_check: 400 access denied to /cib/configuration/crm_config/cluster_property_set[@id='cib-bootstrap-options']/nvpair[@id='cib-bootstrap-options-enable-acl']: default ++__xml_acl_check: 400 access denied to /cib[@epoch]: default ++__xml_acl_check: 400 access denied to /cib/configuration/crm_config/cluster_property_set[@id='cib-bootstrap-options']/nvpair[@id='cib-bootstrap-options-enable-acl']: default + Call failed: Permission denied + =#=#=#= End test: niceguy: Replace - delete attribute (deny) - Permission denied (13) =#=#=#= + * Passed: cibadmin - niceguy: Replace - delete attribute (deny) +@@ -2157,8 +2161,8 @@ Call failed: Permission denied + + + =#=#=#= Begin test: niceguy: Replace - create attribute (deny) =#=#=#= +-__xml_acl_check: 400 access denied to /cib[@epoch]: default +-__xml_acl_check: 400 access denied to /cib/configuration/resources/primitive[@id='dummy'][@description]: default ++__xml_acl_check: 400 access denied to /cib[@epoch]: default ++__xml_acl_check: 400 access denied to /cib/configuration/resources/primitive[@id='dummy'][@description]: default + Call failed: Permission denied + =#=#=#= End test: niceguy: Replace - create attribute (deny) - Permission denied (13) =#=#=#= + * Passed: cibadmin - niceguy: Replace - create attribute (deny) +diff --git a/tools/regression.tools.exp b/tools/regression.tools.exp +index 287caf9..b2f4df1 100644 +--- a/tools/regression.tools.exp ++++ b/tools/regression.tools.exp +@@ -626,6 +626,7 @@ Deleted nodes attribute: id=nodes-node1-standby name=standby + =#=#=#= End test: Create a resource - OK (0) =#=#=#= + * Passed: cibadmin - Create a resource + =#=#=#= Begin test: Create a resource meta attribute =#=#=#= ++Set 'dummy' option: id=dummy-meta_attributes-is-managed set=dummy-meta_attributes name=is-managed=false + =#=#=#= Current cib after: Create a resource meta attribute =#=#=#= + + +@@ -695,7 +696,7 @@ false + =#=#=#= End test: Query a resource meta attribute - OK (0) =#=#=#= + * Passed: crm_resource - Query a resource meta attribute + =#=#=#= Begin test: Remove a resource meta attribute =#=#=#= +-Deleted dummy option: id=dummy-meta_attributes-is-managed name=is-managed ++Deleted 'dummy' option: id=dummy-meta_attributes-is-managed name=is-managed + =#=#=#= Current cib after: Remove a resource meta attribute =#=#=#= + + +@@ -728,6 +729,7 @@ Deleted dummy option: id=dummy-meta_attributes-is-managed name=is-managed + =#=#=#= End test: Remove a resource meta attribute - OK (0) =#=#=#= + * Passed: crm_resource - Remove a resource meta attribute + =#=#=#= Begin test: Create a resource attribute =#=#=#= ++Set 'dummy' option: id=dummy-instance_attributes-delay set=dummy-instance_attributes name=delay=10s + =#=#=#= Current cib after: Create a resource attribute =#=#=#= + + +@@ -763,7 +765,7 @@ Deleted dummy option: id=dummy-meta_attributes-is-managed name=is-managed + =#=#=#= End test: Create a resource attribute - OK (0) =#=#=#= + * Passed: crm_resource - Create a resource attribute + =#=#=#= Begin test: List the configured resources =#=#=#= +- dummy (ocf::pacemaker:Dummy): Stopped ++ dummy (ocf::pacemaker:Dummy): Stopped + =#=#=#= Current cib after: List the configured resources =#=#=#= + + +@@ -973,8 +975,8 @@ Error performing operation: No such device or address + Current cluster status: + Online: [ node1 ] + +- dummy (ocf::pacemaker:Dummy): Stopped +- Fence (stonith:fence_true): Stopped ++ dummy (ocf::pacemaker:Dummy): Stopped ++ Fence (stonith:fence_true): Stopped + + Transition Summary: + * Start dummy (node1) +@@ -990,8 +992,8 @@ Executing cluster transition: + Revised cluster status: + Online: [ node1 ] + +- dummy (ocf::pacemaker:Dummy): Started node1 +- Fence (stonith:fence_true): Started node1 ++ dummy (ocf::pacemaker:Dummy): Started node1 ++ Fence (stonith:fence_true): Started node1 + + =#=#=#= Current cib after: Bring resources online =#=#=#= + +@@ -1710,8 +1712,8 @@ Error performing operation: No such device or address + Current cluster status: + Online: [ node1 ] + +- dummy (ocf::pacemaker:Dummy): Started node1 +- Fence (stonith:fence_true): Started node1 ++ dummy (ocf::pacemaker:Dummy): Started node1 ++ Fence (stonith:fence_true): Started node1 + + Performing requested modifications + + Bringing node node2 online +@@ -1733,8 +1735,8 @@ Executing cluster transition: + Revised cluster status: + Online: [ node1 node2 node3 ] + +- dummy (ocf::pacemaker:Dummy): Started node1 +- Fence (stonith:fence_true): Started node2 ++ dummy (ocf::pacemaker:Dummy): Started node1 ++ Fence (stonith:fence_true): Started node2 + + =#=#=#= Current cib after: Create two more nodes and bring them online =#=#=#= + +@@ -1996,8 +1998,8 @@ WARNING: Creating rsc_location constraint 'cli-ban-dummy-on-node2' with a score + Current cluster status: + Online: [ node1 node2 node3 ] + +- dummy (ocf::pacemaker:Dummy): Started node1 +- Fence (stonith:fence_true): Started node2 ++ dummy (ocf::pacemaker:Dummy): Started node1 ++ Fence (stonith:fence_true): Started node2 + + Transition Summary: + * Move dummy (Started node1 -> node3) +@@ -2010,8 +2012,8 @@ Executing cluster transition: + Revised cluster status: + Online: [ node1 node2 node3 ] + +- dummy (ocf::pacemaker:Dummy): Started node3 +- Fence (stonith:fence_true): Started node2 ++ dummy (ocf::pacemaker:Dummy): Started node3 ++ Fence (stonith:fence_true): Started node2 + + =#=#=#= Current cib after: Relocate resources due to ban =#=#=#= + +diff --git a/valgrind-pcmk.suppressions b/valgrind-pcmk.suppressions +index 2e382df..0a47096 100644 +--- a/valgrind-pcmk.suppressions ++++ b/valgrind-pcmk.suppressions +@@ -1,4 +1,4 @@ +-# Valgrind suppressions for PE testing ++# Valgrind suppressions for Pacemaker testing + { + Valgrind bug + Memcheck:Addr8 +@@ -57,6 +57,15 @@ + } + + { ++ Cman - Who cares if unused bytes are uninitialized ++ Memcheck:Param ++ sendmsg(msg) ++ fun:__sendmsg_nocancel ++ obj:*/libcman.so.3.0 ++ obj:*/libcman.so.3.0 ++} ++ ++{ + Cman - Jump or move depends on uninitialized values + Memcheck:Cond + obj:*/libcman.so.3.0 diff --git a/pacemaker-rollup-7-1-3d781d3.patch b/pacemaker-rollup-7-1-3d781d3.patch new file mode 100644 index 0000000..30afd6d --- /dev/null +++ b/pacemaker-rollup-7-1-3d781d3.patch @@ -0,0 +1,7989 @@ +diff --git a/cib/io.c b/cib/io.c +index e2873a8..4e2b24a 100644 +--- a/cib/io.c ++++ b/cib/io.c +@@ -254,9 +254,7 @@ readCibXmlFile(const char *dir, const char *file, gboolean discard_status) + if (cib_writes_enabled && use_valgrind) { + if (crm_is_true(use_valgrind) || strstr(use_valgrind, "cib")) { + cib_writes_enabled = FALSE; +- crm_err("*********************************************************"); + crm_err("*** Disabling disk writes to avoid confusing Valgrind ***"); +- crm_err("*********************************************************"); + } + } + +diff --git a/crmd/crmd_lrm.h b/crmd/crmd_lrm.h +index 81a53c5..78432df 100644 +--- a/crmd/crmd_lrm.h ++++ b/crmd/crmd_lrm.h +@@ -37,6 +37,8 @@ typedef struct resource_history_s { + GHashTable *stop_params; + } rsc_history_t; + ++void history_free(gpointer data); ++ + /* TDOD - Replace this with lrmd_event_data_t */ + struct recurring_op_s { + int call_id; +diff --git a/crmd/lrm.c b/crmd/lrm.c +index 062f769..418e7cf 100644 +--- a/crmd/lrm.c ++++ b/crmd/lrm.c +@@ -103,6 +103,80 @@ copy_meta_keys(gpointer key, gpointer value, gpointer user_data) + } + } + ++/* ++ * \internal ++ * \brief Remove a recurring operation from a resource's history ++ * ++ * \param[in,out] history Resource history to modify ++ * \param[in] op Operation to remove ++ * ++ * \return TRUE if the operation was found and removed, FALSE otherwise ++ */ ++static gboolean ++history_remove_recurring_op(rsc_history_t *history, const lrmd_event_data_t *op) ++{ ++ GList *iter; ++ ++ for (iter = history->recurring_op_list; iter != NULL; iter = iter->next) { ++ lrmd_event_data_t *existing = iter->data; ++ ++ if ((op->interval == existing->interval) ++ && crm_str_eq(op->rsc_id, existing->rsc_id, TRUE) ++ && safe_str_eq(op->op_type, existing->op_type)) { ++ ++ history->recurring_op_list = g_list_delete_link(history->recurring_op_list, iter); ++ lrmd_free_event(existing); ++ return TRUE; ++ } ++ } ++ return FALSE; ++} ++ ++/* ++ * \internal ++ * \brief Free all recurring operations in resource history ++ * ++ * \param[in,out] history Resource history to modify ++ */ ++static void ++history_free_recurring_ops(rsc_history_t *history) ++{ ++ GList *iter; ++ ++ for (iter = history->recurring_op_list; iter != NULL; iter = iter->next) { ++ lrmd_free_event(iter->data); ++ } ++ g_list_free(history->recurring_op_list); ++ history->recurring_op_list = NULL; ++} ++ ++/* ++ * \internal ++ * \brief Free resource history ++ * ++ * \param[in,out] history Resource history to free ++ */ ++void ++history_free(gpointer data) ++{ ++ rsc_history_t *history = (rsc_history_t*)data; ++ ++ if (history->stop_params) { ++ g_hash_table_destroy(history->stop_params); ++ } ++ ++ /* Don't need to free history->rsc.id because it's set to history->id */ ++ free(history->rsc.type); ++ free(history->rsc.class); ++ free(history->rsc.provider); ++ ++ lrmd_free_event(history->failed); ++ lrmd_free_event(history->last); ++ free(history->id); ++ history_free_recurring_ops(history); ++ free(history); ++} ++ + static void + update_history_cache(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, lrmd_event_data_t * op) + { +@@ -145,25 +219,10 @@ update_history_cache(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, lrmd_event_ + target_rc = rsc_op_expected_rc(op); + if (op->op_status == PCMK_LRM_OP_CANCELLED) { + if (op->interval > 0) { +- GList *gIter, *gIterNext; +- + crm_trace("Removing cancelled recurring op: %s_%s_%d", op->rsc_id, op->op_type, + op->interval); +- +- for (gIter = entry->recurring_op_list; gIter != NULL; gIter = gIterNext) { +- lrmd_event_data_t *existing = gIter->data; +- +- gIterNext = gIter->next; +- +- if (crm_str_eq(op->rsc_id, existing->rsc_id, TRUE) +- && safe_str_eq(op->op_type, existing->op_type) +- && op->interval == existing->interval) { +- lrmd_free_event(existing); +- entry->recurring_op_list = g_list_delete_link(entry->recurring_op_list, gIter); +- } +- } ++ history_remove_recurring_op(entry, op); + return; +- + } else { + crm_trace("Skipping %s_%s_%d rc=%d, status=%d", op->rsc_id, op->op_type, op->interval, + op->rc, op->op_status); +@@ -201,32 +260,17 @@ update_history_cache(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, lrmd_event_ + } + + if (op->interval > 0) { +- GListPtr iter = NULL; +- +- for(iter = entry->recurring_op_list; iter; iter = iter->next) { +- lrmd_event_data_t *o = iter->data; +- +- /* op->rsc_id is implied */ +- if(op->interval == o->interval && strcmp(op->op_type, o->op_type) == 0) { +- crm_trace("Removing existing recurring op entry: %s_%s_%d", op->rsc_id, op->op_type, op->interval); +- entry->recurring_op_list = g_list_remove(entry->recurring_op_list, o); +- break; +- } +- } ++ /* Ensure there are no duplicates */ ++ history_remove_recurring_op(entry, op); + + crm_trace("Adding recurring op: %s_%s_%d", op->rsc_id, op->op_type, op->interval); + entry->recurring_op_list = g_list_prepend(entry->recurring_op_list, lrmd_copy_event(op)); + + } else if (entry->recurring_op_list && safe_str_eq(op->op_type, RSC_STATUS) == FALSE) { +- GList *gIter = entry->recurring_op_list; +- + crm_trace("Dropping %d recurring ops because of: %s_%s_%d", +- g_list_length(gIter), op->rsc_id, op->op_type, op->interval); +- for (; gIter != NULL; gIter = gIter->next) { +- lrmd_free_event(gIter->data); +- } +- g_list_free(entry->recurring_op_list); +- entry->recurring_op_list = NULL; ++ g_list_length(entry->recurring_op_list), op->rsc_id, ++ op->op_type, op->interval); ++ history_free_recurring_ops(entry); + } + } + +diff --git a/crmd/lrm_state.c b/crmd/lrm_state.c +index 374c806..162ad03 100644 +--- a/crmd/lrm_state.c ++++ b/crmd/lrm_state.c +@@ -32,24 +32,6 @@ int lrmd_internal_proxy_send(lrmd_t * lrmd, xmlNode *msg); + void lrmd_internal_set_proxy_callback(lrmd_t * lrmd, void *userdata, void (*callback)(lrmd_t *lrmd, void *userdata, xmlNode *msg)); + + static void +-history_cache_destroy(gpointer data) +-{ +- rsc_history_t *entry = data; +- +- if (entry->stop_params) { +- g_hash_table_destroy(entry->stop_params); +- } +- +- free(entry->rsc.type); +- free(entry->rsc.class); +- free(entry->rsc.provider); +- +- lrmd_free_event(entry->failed); +- lrmd_free_event(entry->last); +- free(entry->id); +- free(entry); +-} +-static void + free_rsc_info(gpointer value) + { + lrmd_rsc_info_t *rsc_info = value; +@@ -155,7 +137,7 @@ lrm_state_create(const char *node_name) + g_str_equal, g_hash_destroy_str, free_recurring_op); + + state->resource_history = g_hash_table_new_full(crm_str_hash, +- g_str_equal, NULL, history_cache_destroy); ++ g_str_equal, NULL, history_free); + + g_hash_table_insert(lrm_state_table, (char *)state->node_name, state); + return state; +diff --git a/cts/CM_ais.py b/cts/CM_ais.py +index 44f91cd..a34f9b1 100644 +--- a/cts/CM_ais.py ++++ b/cts/CM_ais.py +@@ -49,42 +49,46 @@ class crm_ais(crm_lha): + def NodeUUID(self, node): + return node + +- def ais_components(self): ++ def ais_components(self, extra={}): + + complist = [] + if not len(self.fullcomplist.keys()): + for c in ["cib", "lrmd", "crmd", "attrd" ]: +- self.fullcomplist[c] = Process( +- self, c, +- pats = self.templates.get_component(self.name, c), +- badnews_ignore = self.templates.get_component(self.name, "%s-ignore"%c), +- common_ignore = self.templates.get_component(self.name, "common-ignore")) +- +- self.fullcomplist["pengine"] = Process( +- self, "pengine", +- dc_pats = self.templates.get_component(self.name, "pengine"), +- badnews_ignore = self.templates.get_component(self.name, "pengine-ignore"), +- common_ignore = self.templates.get_component(self.name, "common-ignore")) +- +- self.fullcomplist["stonith-ng"] = Process( +- self, "stonith-ng", process="stonithd", +- pats = self.templates.get_component(self.name, "stonith"), +- badnews_ignore = self.templates.get_component(self.name, "stonith-ignore"), +- common_ignore = self.templates.get_component(self.name, "common-ignore")) +- ++ self.fullcomplist[c] = Process( ++ self, c, ++ pats = self.templates.get_component(self.name, c), ++ badnews_ignore = self.templates.get_component(self.name, "%s-ignore" % c), ++ common_ignore = self.templates.get_component(self.name, "common-ignore")) ++ ++ # pengine uses dc_pats instead of pats ++ self.fullcomplist["pengine"] = Process( ++ self, "pengine", ++ dc_pats = self.templates.get_component(self.name, "pengine"), ++ badnews_ignore = self.templates.get_component(self.name, "pengine-ignore"), ++ common_ignore = self.templates.get_component(self.name, "common-ignore")) ++ ++ # stonith-ng's process name is different from its component name ++ self.fullcomplist["stonith-ng"] = Process( ++ self, "stonith-ng", process="stonithd", ++ pats = self.templates.get_component(self.name, "stonith"), ++ badnews_ignore = self.templates.get_component(self.name, "stonith-ignore"), ++ common_ignore = self.templates.get_component(self.name, "common-ignore")) ++ ++ # add (or replace) any extra components passed in ++ self.fullcomplist.update(extra) ++ ++ # Processes running under valgrind can't be shot with "killall -9 processname", ++ # so don't include them in the returned list + vgrind = self.Env["valgrind-procs"].split() + for key in self.fullcomplist.keys(): + if self.Env["valgrind-tests"]: +- if key in vgrind: +- # Processes running under valgrind can't be shot with "killall -9 processname" ++ if key in vgrind: + self.log("Filtering %s from the component list as it is being profiled by valgrind" % key) + continue + if key == "stonith-ng" and not self.Env["DoFencing"]: + continue +- + complist.append(self.fullcomplist[key]) + +- #self.complist = [ fullcomplist["pengine"] ] + return complist + + +@@ -100,17 +104,14 @@ class crm_cs_v0(crm_ais): + crm_ais.__init__(self, Environment, randseed=randseed, name=name) + + def Components(self): +- self.ais_components() +- c = "corosync" +- +- self.fullcomplist[c] = Process( +- self, c, +- pats = self.templates.get_component(self.name, c), +- badnews_ignore = self.templates.get_component(self.name, "%s-ignore"%c), ++ extra = {} ++ extra["corosync"] = Process( ++ self, "corosync", ++ pats = self.templates.get_component(self.name, "corosync"), ++ badnews_ignore = self.templates.get_component(self.name, "corosync-ignore"), + common_ignore = self.templates.get_component(self.name, "common-ignore") + ) +- +- return self.ais_components() ++ return self.ais_components(extra=extra) + + + class crm_cs_v1(crm_cs_v0): +diff --git a/cts/environment.py b/cts/environment.py +index a3399c3..61d4211 100644 +--- a/cts/environment.py ++++ b/cts/environment.py +@@ -59,7 +59,7 @@ class Environment: + self["stonith-params"] = "hostlist=all,livedangerously=yes" + self["loop-minutes"] = 60 + self["valgrind-prefix"] = None +- self["valgrind-procs"] = "cib crmd attrd pengine stonith-ng" ++ self["valgrind-procs"] = "attrd cib crmd lrmd pengine stonith-ng" + self["valgrind-opts"] = """--leak-check=full --show-reachable=yes --trace-children=no --num-callers=25 --gen-suppressions=all --suppressions="""+CTSvars.CTS_home+"""/cts.supp""" + + self["experimental-tests"] = 0 +@@ -578,6 +578,10 @@ class Environment: + elif args[i] == "--valgrind-tests": + self["valgrind-tests"] = 1 + ++ elif args[i] == "--valgrind-procs": ++ self["valgrind-procs"] = args[i+1] ++ skipthis = 1 ++ + elif args[i] == "--no-loop-tests": + self["loop-tests"] = 0 + +diff --git a/cts/patterns.py b/cts/patterns.py +index 1bc05a6..493b690 100644 +--- a/cts/patterns.py ++++ b/cts/patterns.py +@@ -7,7 +7,9 @@ class BasePatterns: + def __init__(self, name): + self.name = name + patternvariants[name] = self +- self.ignore = [] ++ self.ignore = [ ++ "avoid confusing Valgrind", ++ ] + self.BadNews = [] + self.components = {} + self.commands = { +@@ -140,7 +142,7 @@ class crm_lha(BasePatterns): + r"Parameters to .* changed", + ] + +- self.ignore = [ ++ self.ignore = self.ignore + [ + r"(ERROR|error):.*\s+assert\s+at\s+crm_glib_handler:" + "(ERROR|error): Message hist queue is filling up", + "stonithd.*CRIT: external_hostlist:.*'vmware gethosts' returned an empty hostlist", +@@ -177,7 +179,7 @@ class crm_cs_v0(BasePatterns): + "Pat:PacemakerUp" : "%s\W.*pacemakerd.*Starting Pacemaker", + }) + +- self.ignore = [ ++ self.ignore = self.ignore + [ + r"crm_mon:", + r"crmadmin:", + r"update_trace_data", +diff --git a/extra/ansible/docker/group_vars/all b/extra/ansible/docker/group_vars/all +new file mode 100644 +index 0000000..935e88a +--- /dev/null ++++ b/extra/ansible/docker/group_vars/all +@@ -0,0 +1,5 @@ ++max: 4 ++prefix: ansible-pcmk ++base_image: centos:centos7 ++subnet: 172.17.200 ++pacemaker_authkey: this_is_very_insecure +\ No newline at end of file +diff --git a/extra/ansible/docker/hosts b/extra/ansible/docker/hosts +new file mode 100644 +index 0000000..5b0fb71 +--- /dev/null ++++ b/extra/ansible/docker/hosts +@@ -0,0 +1,7 @@ ++[controllers] ++oss-uk-1.clusterlabs.org ++ ++[containers] ++ansible-1 ++ansible-2 ++ansible-3 +diff --git a/extra/ansible/docker/roles/docker-host/files/docker-enter b/extra/ansible/docker/roles/docker-host/files/docker-enter +new file mode 100644 +index 0000000..04c4822 +--- /dev/null ++++ b/extra/ansible/docker/roles/docker-host/files/docker-enter +@@ -0,0 +1,29 @@ ++#! /bin/sh -e ++ ++case "$1" in ++ -h|--help) ++ echo "Usage: docker-enter CONTAINER [COMMAND]" ++ exit 0 ++ ;; ++esac ++ ++if [ $(id -ru) -ne 0 ]; then ++ echo "You have to be root." ++ exit 1 ++fi ++ ++if [ $# -eq 0 ]; then ++ echo "Usage: docker-enter CONTAINER [COMMAND]" ++ exit 1 ++fi ++ ++container=$1; shift ++PID=$(docker inspect --format {{.State.Pid}} "$container") ++ ++if [ $# -ne 0 ]; then ++ nsenter --target $PID --mount --uts --ipc --net --pid -- $* ++ exit $? ++fi ++ ++nsenter --target $PID --mount --uts --ipc --net --pid ++exit 0 +diff --git a/extra/ansible/docker/roles/docker-host/files/fence_docker_cts b/extra/ansible/docker/roles/docker-host/files/fence_docker_cts +new file mode 100644 +index 0000000..6d6f025 +--- /dev/null ++++ b/extra/ansible/docker/roles/docker-host/files/fence_docker_cts +@@ -0,0 +1,202 @@ ++#!/bin/bash ++# ++# Copyright (c) 2014 David Vossel ++# All Rights Reserved. ++# ++# This program is free software; you can redistribute it and/or modify ++# it under the terms of version 2 of the GNU General Public License as ++# published by the Free Software Foundation. ++# ++# This program is distributed in the hope that it would be useful, but ++# WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. ++# ++# Further, this software is distributed without any warranty that it is ++# free of the rightful claim of any third person regarding infringement ++# or the like. Any license provided herein, whether implied or ++# otherwise, applies only to this software file. Patent licenses, if ++# any, provided herein do not apply to combinations of this program with ++# other software, or any other product whatsoever. ++# ++# You should have received a copy of the GNU General Public License ++# along with this program; if not, write the Free Software Foundation, ++# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. ++# ++####################################################################### ++ ++port="" ++action="list" # Default fence action ++ ++function usage() ++{ ++cat < ++ ++ ++ fence_docker_cts fences docker containers for testing purposes. ++ ++ ++ ++ ++ ++ Fencing Action ++ ++ ++ ++ ++ The name/id of docker container to control/check ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++EOF ++ exit 0; ++} ++ ++function docker_log() { ++ if ! [ "$action" = "list" ]; then ++ printf "$*\n" 1>&2 ++ fi ++} ++ ++# stdin option processing ++if [ -z $1 ]; then ++ # If there are no command line args, look for options from stdin ++ while read line; do ++ for word in $(echo "$line"); do ++ case $word in ++ option=*|action=*) action=`echo $word | sed s/.*=//`;; ++ port=*) port=`echo $word | sed s/.*=//`;; ++ node=*) port=`echo $word | sed s/.*=//`;; ++ nodename=*) port=`echo $word | sed s/.*=//`;; ++ --);; ++ *) docker_log "Invalid command: $word";; ++ esac ++ done ++ done ++fi ++ ++# Command line option processing ++while true ; do ++ if [ -z "$1" ]; then ++ break; ++ fi ++ case "$1" in ++ -o|--action|--option) action=$2; shift; shift;; ++ -n|--port) port=$2; shift; shift;; ++ -V|--version) echo "1.0.0"; exit 0;; ++ --help|-h) ++ usage; ++ exit 0;; ++ --) shift ; break ;; ++ *) docker_log "Unknown option: $1. See --help for details."; exit 1;; ++ esac ++done ++ ++action=`echo $action | tr 'A-Z' 'a-z'` ++case $action in ++ hostlist|list) action=list;; ++ stat|status) action=status;; ++ restart|reboot|reset) action=reboot;; ++ poweron|on) action=start;; ++ poweroff|off) action=stop;; ++esac ++ ++function fence_done() ++{ ++ if [ $1 -eq 0 ]; then ++ docker_log "Operation $action (port=$port) passed" ++ else ++ docker_log "Operation $action (port=$port) failed: $1" ++ fi ++ if [ -z "$returnfile" ]; then ++ rm -f $returnfile ++ fi ++ if [ -z "$helperscript" ]; then ++ rm -f $helperscript ++ fi ++ exit $1 ++} ++ ++case $action in ++ metadata) metadata;; ++esac ++ ++returnfile=$(mktemp /tmp/fence_docker_cts_returnfileXXXX) ++returnstring="" ++helper_script=$(mktemp /tmp/fence_docker_cts_helperXXXX) ++ ++exec_action() ++{ ++ echo "#!/bin/bash" > $helper_script ++ echo "sleep 10000" >> $helper_script ++ chmod 755 $helper_script ++ src="$(uname -n)" ++ ++ $helper_script "$src" "$action" "$returnfile" "$port" > /dev/null 2>&1 & ++ pid=$! ++ docker_log "waiting on pid $pid" ++ wait $pid > /dev/null 2>&1 ++ returnstring=$(cat $returnfile) ++ ++ if [ -z "$returnstring" ]; then ++ docker_log "fencing daemon did not respond" ++ fence_done 1 ++ fi ++ ++ if [ "$returnstring" == "fail" ]; then ++ docker_log "fencing daemon failed to execute action [$action on port $port]" ++ fence_done 1 ++ fi ++ ++ return 0 ++} ++ ++exec_action ++case $action in ++ list) ++ cat $returnfile ++ fence_done 0 ++ ;; ++ ++ status) ++ # 0 if container is on ++ # 1 if container can not be contacted or unknown ++ # 2 if container is off ++ if [ "$returnstring" = "true" ]; then ++ fence_done 0 ++ else ++ fence_done 2 ++ fi ++ ;; ++ monitor|stop|start|reboot) : ;; ++ *) docker_log "Unknown action: $action"; fence_done 1;; ++esac ++ ++fence_done $? +diff --git a/extra/ansible/docker/roles/docker-host/files/launch.sh b/extra/ansible/docker/roles/docker-host/files/launch.sh +new file mode 100644 +index 0000000..66bebf4 +--- /dev/null ++++ b/extra/ansible/docker/roles/docker-host/files/launch.sh +@@ -0,0 +1,4 @@ ++#!/bin/bash ++while true; do ++ sleep 1 ++done +diff --git a/extra/ansible/docker/roles/docker-host/files/pcmk_remote_start b/extra/ansible/docker/roles/docker-host/files/pcmk_remote_start +new file mode 100644 +index 0000000..1bf0320 +--- /dev/null ++++ b/extra/ansible/docker/roles/docker-host/files/pcmk_remote_start +@@ -0,0 +1,18 @@ ++#!/bin/bash ++/usr/sbin/ip_start ++pid=$(pidof pacemaker_remoted) ++if [ "$?" -ne 0 ]; then ++ mkdir -p /var/run ++ ++ export PCMK_debugfile=$pcmklogs ++ (pacemaker_remoted &) & > /dev/null 2>&1 ++ sleep 5 ++ ++ pid=$(pidof pacemaker_remoted) ++ if [ "$?" -ne 0 ]; then ++ echo "startup of pacemaker failed" ++ exit 1 ++ fi ++ echo "$pid" > /var/run/pacemaker_remoted.pid ++fi ++exit 0 +diff --git a/extra/ansible/docker/roles/docker-host/files/pcmk_remote_stop b/extra/ansible/docker/roles/docker-host/files/pcmk_remote_stop +new file mode 100644 +index 0000000..074cd59 +--- /dev/null ++++ b/extra/ansible/docker/roles/docker-host/files/pcmk_remote_stop +@@ -0,0 +1,36 @@ ++#!/bin/bash ++status() ++{ ++ pid=$(pidof $1 2>/dev/null) ++ rtrn=$? ++ if [ $rtrn -ne 0 ]; then ++ echo "$1 is stopped" ++ else ++ echo "$1 (pid $pid) is running..." ++ fi ++ return $rtrn ++} ++stop() ++{ ++ desc="Pacemaker Remote" ++ prog=$1 ++ shutdown_prog=$prog ++ ++ if status $shutdown_prog > /dev/null 2>&1; then ++ kill -TERM $(pidof $prog) > /dev/null 2>&1 ++ ++ while status $prog > /dev/null 2>&1; do ++ sleep 1 ++ echo -n "." ++ done ++ else ++ echo -n "$desc is already stopped" ++ fi ++ ++ rm -f /var/lock/subsystem/pacemaker ++ rm -f /var/run/${prog}.pid ++ killall -q -9 'crmd stonithd attrd cib lrmd pacemakerd pacemaker_remoted' ++} ++ ++stop "pacemaker_remoted" ++exit 0 +diff --git a/extra/ansible/docker/roles/docker-host/files/pcmk_start b/extra/ansible/docker/roles/docker-host/files/pcmk_start +new file mode 100644 +index 0000000..d8b2ba8 +--- /dev/null ++++ b/extra/ansible/docker/roles/docker-host/files/pcmk_start +@@ -0,0 +1,23 @@ ++#!/bin/bash ++ ++/usr/sbin/ip_start ++sed -i 's@to_syslog:.*yes@to_logfile: yes\nlogfile: /var/log/pacemaker.log@g' /etc/corosync/corosync.conf ++ ++/usr/share/corosync/corosync start > /dev/null 2>&1 ++ ++pid=$(pidof pacemakerd) ++if [ "$?" -ne 0 ]; then ++ mkdir -p /var/run ++ ++ export PCMK_debugfile=$pcmklogs ++ (pacemakerd &) & > /dev/null 2>&1 ++ sleep 5 ++ ++ pid=$(pidof pacemakerd) ++ if [ "$?" -ne 0 ]; then ++ echo "startup of pacemaker failed" ++ exit 1 ++ fi ++ echo "$pid" > /var/run/pacemakerd.pid ++fi ++exit 0 +diff --git a/extra/ansible/docker/roles/docker-host/files/pcmk_stop b/extra/ansible/docker/roles/docker-host/files/pcmk_stop +new file mode 100644 +index 0000000..a8f395a +--- /dev/null ++++ b/extra/ansible/docker/roles/docker-host/files/pcmk_stop +@@ -0,0 +1,45 @@ ++#!/bin/bash ++status() ++{ ++ pid=$(pidof $1 2>/dev/null) ++ rtrn=$? ++ if [ $rtrn -ne 0 ]; then ++ echo "$1 is stopped" ++ else ++ echo "$1 (pid $pid) is running..." ++ fi ++ return $rtrn ++} ++stop() ++{ ++ desc="Pacemaker Cluster Manager" ++ prog=$1 ++ shutdown_prog=$prog ++ ++ if ! status $prog > /dev/null 2>&1; then ++ shutdown_prog="crmd" ++ fi ++ ++ cname=$(crm_node --name) ++ crm_attribute -N $cname -n standby -v true -l reboot ++ ++ if status $shutdown_prog > /dev/null 2>&1; then ++ kill -TERM $(pidof $prog) > /dev/null 2>&1 ++ ++ while status $prog > /dev/null 2>&1; do ++ sleep 1 ++ echo -n "." ++ done ++ else ++ echo -n "$desc is already stopped" ++ fi ++ ++ rm -f /var/lock/subsystem/pacemaker ++ rm -f /var/run/${prog}.pid ++ killall -q -9 'crmd stonithd attrd cib lrmd pacemakerd pacemaker_remoted' ++} ++ ++stop "pacemakerd" ++/usr/share/corosync/corosync stop > /dev/null 2>&1 ++killall -q -9 'corosync' ++exit 0 +diff --git a/extra/ansible/docker/roles/docker-host/tasks/main.yml b/extra/ansible/docker/roles/docker-host/tasks/main.yml +new file mode 100644 +index 0000000..ce69adf +--- /dev/null ++++ b/extra/ansible/docker/roles/docker-host/tasks/main.yml +@@ -0,0 +1,77 @@ ++--- ++#local_action: command /usr/bin/take_out_of_pool {{ inventory_hostname }} ++- name: Update docker ++ yum: pkg=docker state=latest ++- name: Start docker ++ service: name=docker state=started enabled=yes ++- name: Install helper ++ copy: src=docker-enter dest=/usr/sbin/ mode=0755 ++- name: Download image ++ shell: docker pull {{ base_image }} ++- name: Cleanup kill ++ shell: docker kill $(docker ps -a | grep {{ prefix }} | awk '{print $1}') || echo "Nothing to kill" ++- name: Cleanup remove ++ shell: docker rm $(docker ps -a | grep {{ prefix }} | awk '{print $1}') || echo "Nothing to remove" ++- name: Cleanup docker skeleton ++ file: path={{ prefix }} state=absent ++- name: Create docker skeleton ++ file: path={{ prefix }}/{{ item }} state=directory recurse=yes ++ with_items: ++ - rpms ++ - repos ++ - bin_files ++ - launch_scripts ++- name: Create IP helper ++ template: src=ip_start.j2 dest={{ prefix }}/bin_files/ip_start mode=0755 ++- name: Copy helper scripts ++ copy: src={{ item }} dest={{ prefix }}/bin_files/{{ item }} mode=0755 ++ with_items: ++ - pcmk_stop ++ - pcmk_start ++ - pcmk_remote_stop ++ - pcmk_remote_start ++ - fence_docker_cts ++- name: Copy launch script ++ copy: src=launch.sh dest={{ prefix }}/launch_scripts/launch.sh mode=0755 ++- name: Copy authorized keys ++ shell: cp /root/.ssh/authorized_keys {{ prefix }} ++- name: Create docker file ++ template: src=Dockerfile.j2 dest={{ prefix }}/Dockerfile ++- name: Making image ++ shell: docker build -t {{ prefix }} {{ prefix }} ++- name: Launch images ++ shell: docker run -d -i -t -P -h {{ prefix }}-{{ item }} --name={{ prefix }}-{{ item }} -p 2200{{ item }}:22 $(docker images | grep {{ prefix }}.*latest | awk '{print $3}') /bin/bash ++ with_sequence: count={{ max }} ++- name: Calculate IPs ++ shell: for n in $(seq {{ max }} ); do echo {{ subnet }}.${n}; done | tr '\n' ' ' ++ register: node_ips ++- name: Start the IP ++ shell: docker-enter {{ prefix }}-{{ item }} ip_start ++ with_sequence: count={{ max }} ++- name: Configure cluster ++ shell: docker-enter {{ prefix }}-{{ item }} pcs cluster setup --local --name {{ prefix }} {{ node_ips.stdout }} ++ with_sequence: count={{ max }} ++- name: Start the cluster ++ shell: docker-enter {{ prefix }}-{{ item }} pcmk_start ++ with_sequence: count={{ max }} ++- name: Set cluster options ++ shell: docker-enter {{ prefix }}-1 pcs property set stonith-enabled=false ++- name: Configure VIP ++ shell: docker-enter {{ prefix }}-1 pcs resource create ClusterIP ocf:heartbeat:IPaddr2 ip={{ subnet }}.100 cidr_netmask=32 op monitor interval=30s ++- name: Configure ++ shell: docker-enter {{ prefix }}-1 pcs resource defaults resource-stickiness=100 ++- name: Configure ++ shell: docker-enter {{ prefix }}-1 pcs resource create WebSite apache configfile=/etc/httpd/conf/httpd.conf statusurl="http://localhost/server-status" op monitor interval=1min ++- name: Configure ++ shell: docker-enter {{ prefix }}-1 pcs constraint colocation add WebSite with ClusterIP INFINITY ++- name: Configure ++ shell: docker-enter {{ prefix }}-1 pcs constraint order ClusterIP then WebSite ++- name: Configure ++ shell: docker-enter {{ prefix }}-1 pcs constraint location WebSite prefers {{ prefix }}-1=50 ++# TODO: Enable fencing ++# TODO: Make this a full LAMP stack similar to https://github.com/ansible/ansible-examples/tree/master/lamp_simple ++# TODO: Create a Pacemaker module? ++ ++# run_once: true ++# delegate_to: web01.example.org ++ +diff --git a/extra/ansible/docker/roles/docker-host/templates/Dockerfile.j2 b/extra/ansible/docker/roles/docker-host/templates/Dockerfile.j2 +new file mode 100644 +index 0000000..1d57175 +--- /dev/null ++++ b/extra/ansible/docker/roles/docker-host/templates/Dockerfile.j2 +@@ -0,0 +1,16 @@ ++FROM {{ base_image }} ++ADD /repos /etc/yum.repos.d/ ++#ADD /rpms /root/ ++#RUN yum install -y /root/*.rpm ++ADD /launch_scripts /root/ ++ADD /bin_files /usr/sbin/ ++ ++RUN mkdir -p /root/.ssh; chmod 700 /root/.ssh ++ADD authorized_keys /root/.ssh/ ++ ++RUN yum install -y openssh-server net-tools pacemaker pacemaker-cts resource-agents pcs corosync which fence-agents-common sysvinit-tools ++RUN mkdir -p /etc/pacemaker/ ++RUN echo {{ pacemaker_authkey }} > /etc/pacemaker/authkey ++RUN /usr/sbin/sshd ++ ++ENTRYPOINT ["/root/launch.sh"] +diff --git a/extra/ansible/docker/roles/docker-host/templates/ip_start.j2 b/extra/ansible/docker/roles/docker-host/templates/ip_start.j2 +new file mode 100755 +index 0000000..edbd392 +--- /dev/null ++++ b/extra/ansible/docker/roles/docker-host/templates/ip_start.j2 +@@ -0,0 +1,3 @@ ++offset=$(hostname | sed s/.*-//) ++export OCF_ROOT=/usr/lib/ocf/ OCF_RESKEY_ip={{ subnet }}.${offset} OCF_RESKEY_cidr_netmask=32 ++/usr/lib/ocf/resource.d/heartbeat/IPaddr2 start +diff --git a/extra/ansible/docker/site.yml b/extra/ansible/docker/site.yml +new file mode 100644 +index 0000000..0cc65e4 +--- /dev/null ++++ b/extra/ansible/docker/site.yml +@@ -0,0 +1,12 @@ ++--- ++# See /etc/ansible/hosts or -i hosts ++- hosts: controllers ++ remote_user: root ++ roles: ++ - docker-host ++ ++#- hosts: containers ++# gather_facts: no ++# remote_user: root ++# roles: ++# - docker-container +diff --git a/include/crm/msg_xml.h b/include/crm/msg_xml.h +index 42f9003..15f1b3c 100644 +--- a/include/crm/msg_xml.h ++++ b/include/crm/msg_xml.h +@@ -194,6 +194,7 @@ + # define XML_RSC_ATTR_INTERLEAVE "interleave" + # define XML_RSC_ATTR_INCARNATION "clone" + # define XML_RSC_ATTR_INCARNATION_MAX "clone-max" ++# define XML_RSC_ATTR_INCARNATION_MIN "clone-min" + # define XML_RSC_ATTR_INCARNATION_NODEMAX "clone-node-max" + # define XML_RSC_ATTR_MASTER_MAX "master-max" + # define XML_RSC_ATTR_MASTER_NODEMAX "master-node-max" +diff --git a/include/crm/pengine/status.h b/include/crm/pengine/status.h +index 4214959..b95b1e5 100644 +--- a/include/crm/pengine/status.h ++++ b/include/crm/pengine/status.h +@@ -256,7 +256,6 @@ struct resource_s { + int stickiness; + int sort_index; + int failure_timeout; +- int remote_reconnect_interval; + int effective_priority; + int migration_threshold; + +@@ -295,6 +294,7 @@ struct resource_s { + + const char *isolation_wrapper; + gboolean exclusive_discover; ++ int remote_reconnect_interval; + }; + + struct pe_action_s { +@@ -324,6 +324,26 @@ struct pe_action_s { + GHashTable *meta; + GHashTable *extra; + ++ /* ++ * These two varables are associated with the constraint logic ++ * that involves first having one or more actions runnable before ++ * then allowing this action to execute. ++ * ++ * These varables are used with features such as 'clone-min' which ++ * requires at minimum X number of cloned instances to be running ++ * before an order dependency can run. Another option that uses ++ * this is 'require-all=false' in ordering constrants. This option ++ * says "only required one instance of a resource to start before ++ * allowing dependencies to start" basicall require-all=false is ++ * the same as clone-min=1. ++ */ ++ ++ /* current number of known runnable actions in the before list. */ ++ int runnable_before; ++ /* the number of "before" runnable actions required for this action ++ * to be considered runnable */ ++ int required_runnable_before; ++ + GListPtr actions_before; /* action_warpper_t* */ + GListPtr actions_after; /* action_warpper_t* */ + }; +diff --git a/lib/cib/Makefile.am b/lib/cib/Makefile.am +index e84f4f7..1e50511 100644 +--- a/lib/cib/Makefile.am ++++ b/lib/cib/Makefile.am +@@ -28,7 +28,7 @@ noinst_HEADERS = + libcib_la_SOURCES = cib_ops.c cib_utils.c cib_client.c cib_native.c cib_attrs.c + libcib_la_SOURCES += cib_file.c cib_remote.c + +-libcib_la_LDFLAGS = -version-info 4:1:0 -L$(top_builddir)/lib/pengine/.libs ++libcib_la_LDFLAGS = -version-info 4:2:0 -L$(top_builddir)/lib/pengine/.libs + libcib_la_LIBADD = $(CRYPTOLIB) $(top_builddir)/lib/pengine/libpe_rules.la $(top_builddir)/lib/common/libcrmcommon.la + libcib_la_CFLAGS = -I$(top_srcdir) + +diff --git a/lib/cluster/Makefile.am b/lib/cluster/Makefile.am +index 29413ba..29daeb2 100644 +--- a/lib/cluster/Makefile.am ++++ b/lib/cluster/Makefile.am +@@ -28,7 +28,7 @@ header_HEADERS = + lib_LTLIBRARIES = libcrmcluster.la + + libcrmcluster_la_SOURCES = election.c cluster.c membership.c +-libcrmcluster_la_LDFLAGS = -version-info 4:2:0 $(CLUSTERLIBS) ++libcrmcluster_la_LDFLAGS = -version-info 5:0:1 $(CLUSTERLIBS) + libcrmcluster_la_LIBADD = $(top_builddir)/lib/common/libcrmcommon.la $(top_builddir)/lib/fencing/libstonithd.la + libcrmcluster_la_DEPENDENCIES = $(top_builddir)/lib/common/libcrmcommon.la $(top_builddir)/lib/fencing/libstonithd.la + +diff --git a/lib/common/Makefile.am b/lib/common/Makefile.am +index a593f40..f5c0766 100644 +--- a/lib/common/Makefile.am ++++ b/lib/common/Makefile.am +@@ -37,7 +37,7 @@ if BUILD_CIBSECRETS + libcrmcommon_la_SOURCES += cib_secrets.c + endif + +-libcrmcommon_la_LDFLAGS = -version-info 7:0:4 ++libcrmcommon_la_LDFLAGS = -version-info 8:0:5 + libcrmcommon_la_LIBADD = @LIBADD_DL@ $(GNUTLSLIBS) + libcrmcommon_la_SOURCES += $(top_builddir)/lib/gnu/md5.c + +diff --git a/lib/fencing/Makefile.am b/lib/fencing/Makefile.am +index 2bdcfeb..fbe02e4 100644 +--- a/lib/fencing/Makefile.am ++++ b/lib/fencing/Makefile.am +@@ -25,7 +25,7 @@ AM_CPPFLAGS = -I$(top_builddir)/include -I$(top_srcdir)/include \ + lib_LTLIBRARIES = libstonithd.la + + libstonithd_la_SOURCES = st_client.c +-libstonithd_la_LDFLAGS = -version-info 3:2:1 ++libstonithd_la_LDFLAGS = -version-info 3:3:1 + libstonithd_la_LIBADD = $(top_builddir)/lib/common/libcrmcommon.la + + AM_CFLAGS = $(AM_CPPFLAGS) +diff --git a/lib/lrmd/Makefile.am b/lib/lrmd/Makefile.am +index f961ae1..820654c 100644 +--- a/lib/lrmd/Makefile.am ++++ b/lib/lrmd/Makefile.am +@@ -25,7 +25,7 @@ AM_CPPFLAGS = -I$(top_builddir)/include -I$(top_srcdir)/include \ + lib_LTLIBRARIES = liblrmd.la + + liblrmd_la_SOURCES = lrmd_client.c proxy_common.c +-liblrmd_la_LDFLAGS = -version-info 3:0:2 ++liblrmd_la_LDFLAGS = -version-info 3:1:2 + liblrmd_la_LIBADD = $(top_builddir)/lib/common/libcrmcommon.la \ + $(top_builddir)/lib/services/libcrmservice.la \ + $(top_builddir)/lib/fencing/libstonithd.la +diff --git a/lib/pengine/Makefile.am b/lib/pengine/Makefile.am +index 78da075..60d1770 100644 +--- a/lib/pengine/Makefile.am ++++ b/lib/pengine/Makefile.am +@@ -26,11 +26,11 @@ lib_LTLIBRARIES = libpe_rules.la libpe_status.la + ## SOURCES + noinst_HEADERS = unpack.h variant.h + +-libpe_rules_la_LDFLAGS = -version-info 2:4:0 ++libpe_rules_la_LDFLAGS = -version-info 2:5:0 + libpe_rules_la_SOURCES = rules.c common.c + libpe_rules_la_LIBADD = $(top_builddir)/lib/common/libcrmcommon.la + +-libpe_status_la_LDFLAGS = -version-info 8:0:4 ++libpe_status_la_LDFLAGS = -version-info 9:0:5 + libpe_status_la_SOURCES = status.c unpack.c utils.c complex.c native.c group.c clone.c rules.c common.c + libpe_status_la_LIBADD = @CURSESLIBS@ $(top_builddir)/lib/common/libcrmcommon.la + +diff --git a/lib/services/dbus.c b/lib/services/dbus.c +index 6341fc5..e2efecb 100644 +--- a/lib/services/dbus.c ++++ b/lib/services/dbus.c +@@ -64,11 +64,14 @@ pcmk_dbus_find_error(const char *method, DBusPendingCall* pending, DBusMessage * + } else { + DBusMessageIter args; + int dtype = dbus_message_get_type(reply); ++ char *sig; + + switch(dtype) { + case DBUS_MESSAGE_TYPE_METHOD_RETURN: + dbus_message_iter_init(reply, &args); +- crm_trace("Call to %s returned '%s'", method, dbus_message_iter_get_signature(&args)); ++ sig = dbus_message_iter_get_signature(&args); ++ crm_trace("Call to %s returned '%s'", method, sig); ++ dbus_free(sig); + break; + case DBUS_MESSAGE_TYPE_INVALID: + error.message = "Invalid reply"; +@@ -217,11 +220,14 @@ bool pcmk_dbus_type_check(DBusMessage *msg, DBusMessageIter *field, int expected + + if(dtype != expected) { + DBusMessageIter args; ++ char *sig; + + dbus_message_iter_init(msg, &args); ++ sig = dbus_message_iter_get_signature(&args); + do_crm_log_alias(LOG_ERR, __FILE__, function, line, +- "Unexepcted DBus type, expected %c in '%s' instead of %c", +- expected, dbus_message_iter_get_signature(&args), dtype); ++ "Unexpected DBus type, expected %c in '%s' instead of %c", ++ expected, sig, dtype); ++ dbus_free(sig); + return FALSE; + } + +diff --git a/lib/services/services.c b/lib/services/services.c +index 08bff88..7e2b9f7 100644 +--- a/lib/services/services.c ++++ b/lib/services/services.c +@@ -348,6 +348,34 @@ services_action_create_generic(const char *exec, const char *args[]) + return op; + } + ++#if SUPPORT_DBUS ++/* ++ * \internal ++ * \brief Update operation's pending DBus call, unreferencing old one if needed ++ * ++ * \param[in,out] op Operation to modify ++ * \param[in] pending Pending call to set ++ */ ++void ++services_set_op_pending(svc_action_t *op, DBusPendingCall *pending) ++{ ++ if (op->opaque->pending && (op->opaque->pending != pending)) { ++ if (pending) { ++ crm_info("Lost pending DBus call (%p)", op->opaque->pending); ++ } else { ++ crm_trace("Done with pending DBus call (%p)", op->opaque->pending); ++ } ++ dbus_pending_call_unref(op->opaque->pending); ++ } ++ op->opaque->pending = pending; ++ if (pending) { ++ crm_trace("Updated pending DBus call (%p)", pending); ++ } else { ++ crm_trace("Cleared pending DBus call"); ++ } ++} ++#endif ++ + void + services_action_cleanup(svc_action_t * op) + { +diff --git a/lib/services/services_private.h b/lib/services/services_private.h +index 183afb5..a98cd91 100644 +--- a/lib/services/services_private.h ++++ b/lib/services/services_private.h +@@ -63,4 +63,8 @@ void handle_blocked_ops(void); + + gboolean is_op_blocked(const char *rsc); + ++#if SUPPORT_DBUS ++void services_set_op_pending(svc_action_t *op, DBusPendingCall *pending); ++#endif ++ + #endif /* __MH_SERVICES_PRIVATE_H__ */ +diff --git a/lib/services/systemd.c b/lib/services/systemd.c +index 749d61c..e1e1bc9 100644 +--- a/lib/services/systemd.c ++++ b/lib/services/systemd.c +@@ -461,7 +461,12 @@ systemd_async_dispatch(DBusPendingCall *pending, void *user_data) + + if(op) { + crm_trace("Got result: %p for %p for %s, %s", reply, pending, op->rsc, op->action); +- op->opaque->pending = NULL; ++ if (pending == op->opaque->pending) { ++ op->opaque->pending = NULL; ++ } else { ++ crm_info("Received unexpected reply for pending DBus call (%p vs %p)", ++ op->opaque->pending, pending); ++ } + systemd_exec_result(reply, op); + + } else { +@@ -499,10 +504,7 @@ systemd_unit_check(const char *name, const char *state, void *userdata) + } + + if (op->synchronous == FALSE) { +- if (op->opaque->pending) { +- dbus_pending_call_unref(op->opaque->pending); +- } +- op->opaque->pending = NULL; ++ services_set_op_pending(op, NULL); + operation_finalize(op); + } + } +@@ -535,7 +537,7 @@ systemd_unit_exec_with_unit(svc_action_t * op, const char *unit) + return op->rc == PCMK_OCF_OK; + } else if (pending) { + dbus_pending_call_ref(pending); +- op->opaque->pending = pending; ++ services_set_op_pending(op, pending); + return TRUE; + } + +@@ -617,8 +619,7 @@ systemd_unit_exec_with_unit(svc_action_t * op, const char *unit) + + dbus_message_unref(msg); + if(pending) { +- dbus_pending_call_ref(pending); +- op->opaque->pending = pending; ++ services_set_op_pending(op, pending); + return TRUE; + } + return FALSE; +diff --git a/lib/transition/Makefile.am b/lib/transition/Makefile.am +index 8ce7775..04d18fe 100644 +--- a/lib/transition/Makefile.am ++++ b/lib/transition/Makefile.am +@@ -27,7 +27,7 @@ lib_LTLIBRARIES = libtransitioner.la + noinst_HEADERS = + libtransitioner_la_SOURCES = unpack.c graph.c utils.c + +-libtransitioner_la_LDFLAGS = -version-info 2:3:0 ++libtransitioner_la_LDFLAGS = -version-info 2:4:0 + libtransitioner_la_CFLAGS = -I$(top_builddir) + libtransitioner_la_LIBADD = $(top_builddir)/lib/common/libcrmcommon.la + +diff --git a/pengine/Makefile.am b/pengine/Makefile.am +index 31532cf..0e12a1f 100644 +--- a/pengine/Makefile.am ++++ b/pengine/Makefile.am +@@ -61,7 +61,7 @@ endif + noinst_HEADERS = allocate.h utils.h pengine.h + #utils.h pengine.h + +-libpengine_la_LDFLAGS = -version-info 8:0:4 ++libpengine_la_LDFLAGS = -version-info 9:0:5 + # -L$(top_builddir)/lib/pils -lpils -export-dynamic -module -avoid-version + libpengine_la_SOURCES = pengine.c allocate.c utils.c constraints.c + libpengine_la_SOURCES += native.c group.c clone.c master.c graph.c utilization.c +diff --git a/pengine/allocate.c b/pengine/allocate.c +index 68cafd4..ec5a18d 100644 +--- a/pengine/allocate.c ++++ b/pengine/allocate.c +@@ -1962,7 +1962,6 @@ expand_node_list(GListPtr list) + if(node_list) { + existing_len = strlen(node_list); + } +- + crm_trace("Adding %s (%dc) at offset %d", node->details->uname, len - 2, existing_len); + node_list = realloc_safe(node_list, len + existing_len); + sprintf(node_list + existing_len, "%s%s", existing_len == 0 ? "":" ", node->details->uname); +diff --git a/pengine/allocate.h b/pengine/allocate.h +index f6602c6..73f750e 100644 +--- a/pengine/allocate.h ++++ b/pengine/allocate.h +@@ -171,5 +171,6 @@ extern enum pe_graph_flags clone_update_actions(action_t * first, action_t * the + enum pe_action_flags filter, enum pe_ordering type); + + gboolean update_action_flags(action_t * action, enum pe_action_flags flags); ++gboolean update_action(action_t * action); + + #endif +diff --git a/pengine/clone.c b/pengine/clone.c +index 3840a0a..ebf53ed 100644 +--- a/pengine/clone.c ++++ b/pengine/clone.c +@@ -21,6 +21,7 @@ + #include + #include + #include ++#include + + #define VARIANT_CLONE 1 + #include +@@ -1338,6 +1339,8 @@ clone_update_actions(action_t * first, action_t * then, node_t * node, enum pe_a + changed |= native_update_actions(first, then, node, flags, filter, type); + + for (; gIter != NULL; gIter = gIter->next) { ++ enum pe_graph_flags child_changed = pe_graph_none; ++ GListPtr lpc = NULL; + resource_t *child = (resource_t *) gIter->data; + action_t *child_action = find_first_action(child->actions, NULL, then->task, node); + +@@ -1345,9 +1348,17 @@ clone_update_actions(action_t * first, action_t * then, node_t * node, enum pe_a + enum pe_action_flags child_flags = child->cmds->action_flags(child_action, node); + + if (is_set(child_flags, pe_action_runnable)) { +- changed |= ++ ++ child_changed |= + child->cmds->update_actions(first, child_action, node, flags, filter, type); + } ++ changed |= child_changed; ++ if (child_changed & pe_graph_updated_then) { ++ for (lpc = child_action->actions_after; lpc != NULL; lpc = lpc->next) { ++ action_wrapper_t *other = (action_wrapper_t *) lpc->data; ++ update_action(other->action); ++ } ++ } + } + } + } +diff --git a/pengine/constraints.c b/pengine/constraints.c +index 1f44811..7527aa6 100644 +--- a/pengine/constraints.c ++++ b/pengine/constraints.c +@@ -256,7 +256,7 @@ unpack_simple_rsc_order(xmlNode * xml_obj, pe_working_set_t * data_set) + resource_t *rsc_then = NULL; + resource_t *rsc_first = NULL; + gboolean invert_bool = TRUE; +- gboolean require_all = TRUE; ++ int min_required_before = 0; + enum pe_order_kind kind = pe_order_kind_mandatory; + enum pe_ordering cons_weight = pe_order_optional; + +@@ -351,7 +351,15 @@ unpack_simple_rsc_order(xmlNode * xml_obj, pe_working_set_t * data_set) + && crm_is_true(require_all_s) == FALSE + && rsc_first->variant >= pe_clone) { + +- require_all = FALSE; ++ /* require-all=false means only one instance of the clone is required */ ++ min_required_before = 1; ++ } else if (rsc_first->variant >= pe_clone) { ++ const char *min_clones_s = g_hash_table_lookup(rsc_first->meta, XML_RSC_ATTR_INCARNATION_MIN); ++ if (min_clones_s) { ++ /* if clone min is set, we require at a minimum X number of instances ++ * to be runnable before allowing dependencies to be runnable. */ ++ min_required_before = crm_parse_int(min_clones_s, "0"); ++ } + } + + cons_weight = pe_order_optional; +@@ -368,22 +376,31 @@ unpack_simple_rsc_order(xmlNode * xml_obj, pe_working_set_t * data_set) + cons_weight |= get_flags(id, kind, action_first, action_then, FALSE); + } + +- if (require_all == FALSE) { ++ /* If there is a minimum number of instances that must be runnable before ++ * the 'then' action is runnable, we use a pseudo action as an intermediate step ++ * start min number of clones -> pseudo action is runnable -> dependency runnable. */ ++ if (min_required_before) { + GListPtr rIter = NULL; + char *task = crm_concat(CRM_OP_RELAXED_CLONE, id, ':'); + action_t *unordered_action = get_pseudo_op(task, data_set); + free(task); + ++ /* require the pseudo action to have "min_required_before" number of ++ * actions to be considered runnable before allowing the pseudo action ++ * to be runnable. */ ++ unordered_action->required_runnable_before = min_required_before; + update_action_flags(unordered_action, pe_action_requires_any); + + for (rIter = rsc_first->children; id && rIter; rIter = rIter->next) { + resource_t *child = rIter->data; +- ++ /* order each clone instance before the pseudo action */ + custom_action_order(child, generate_op_key(child->id, action_first, 0), NULL, + NULL, NULL, unordered_action, + pe_order_one_or_more | pe_order_implies_then_printed, data_set); + } + ++ /* order the "then" dependency to occur after the pseudo action only if ++ * the pseudo action is runnable */ + order_id = custom_action_order(NULL, NULL, unordered_action, + rsc_then, generate_op_key(rsc_then->id, action_then, 0), NULL, + cons_weight | pe_order_runnable_left, data_set); +diff --git a/pengine/graph.c b/pengine/graph.c +index 9cfede6..3d832f0 100644 +--- a/pengine/graph.c ++++ b/pengine/graph.c +@@ -29,7 +29,6 @@ + #include + #include + +-gboolean update_action(action_t * action); + void update_colo_start_chain(action_t * action); + gboolean rsc_update_action(action_t * first, action_t * then, enum pe_ordering type); + +@@ -261,8 +260,16 @@ graph_update_action(action_t * first, action_t * then, node_t * node, enum pe_ac + pe_action_runnable, pe_order_one_or_more); + + } else if (is_set(flags, pe_action_runnable)) { +- if (update_action_flags(then, pe_action_runnable)) { +- changed |= pe_graph_updated_then; ++ /* alright. a "first" action is considered runnable, incremente ++ * the 'runnable_before' counter */ ++ then->runnable_before++; ++ ++ /* if the runnable before count for then exceeds the required number ++ * of "before" runnable actions... mark then as runnable */ ++ if (then->runnable_before >= then->required_runnable_before) { ++ if (update_action_flags(then, pe_action_runnable)) { ++ changed |= pe_graph_updated_then; ++ } + } + } + if (changed) { +@@ -456,6 +463,18 @@ update_action(action_t * then) + pe_action_pseudo) ? "pseudo" : then->node ? then->node->details->uname : ""); + + if (is_set(then->flags, pe_action_requires_any)) { ++ /* initialize current known runnable before actions to 0 ++ * from here as graph_update_action is called for each of ++ * then's before actions, this number will increment as ++ * runnable 'first' actions are encountered */ ++ then->runnable_before = 0; ++ ++ /* for backwards compatibility with previous options that use ++ * the 'requires_any' flag, initalize required to 1 if it is ++ * not set. */ ++ if (then->required_runnable_before == 0) { ++ then->required_runnable_before = 1; ++ } + clear_bit(then->flags, pe_action_runnable); + /* We are relying on the pe_order_one_or_more clause of + * graph_update_action(), called as part of the: +diff --git a/pengine/native.c b/pengine/native.c +index b93f8da..7d5f602 100644 +--- a/pengine/native.c ++++ b/pengine/native.c +@@ -2817,8 +2817,7 @@ native_create_probe(resource_t * rsc, node_t * node, action_t * complete, + } + + static void +-native_start_constraints(resource_t * rsc, action_t * stonith_op, gboolean is_stonith, +- pe_working_set_t * data_set) ++native_start_constraints(resource_t * rsc, action_t * stonith_op, pe_working_set_t * data_set) + { + node_t *target = stonith_op ? stonith_op->node : NULL; + +@@ -2893,14 +2892,24 @@ find_fence_target_node_actions(GListPtr search_list, const char *key, node_t *fe + } + + static void +-native_stop_constraints(resource_t * rsc, action_t * stonith_op, gboolean is_stonith, +- pe_working_set_t * data_set) ++native_stop_constraints(resource_t * rsc, action_t * stonith_op, pe_working_set_t * data_set) + { + char *key = NULL; + GListPtr gIter = NULL; + GListPtr action_list = NULL; ++ ++ action_t *start = NULL; + resource_t *top = uber_parent(rsc); + ++ key = start_key(rsc); ++ action_list = find_actions(rsc->actions, key, NULL); ++ if(action_list) { ++ start = action_list->data; ++ } ++ ++ g_list_free(action_list); ++ free(key); ++ + key = stop_key(rsc); + action_list = find_fence_target_node_actions(rsc->actions, key, stonith_op->node, data_set); + free(key); +@@ -2932,7 +2941,7 @@ native_stop_constraints(resource_t * rsc, action_t * stonith_op, gboolean is_sto + update_action_flags(action, pe_action_runnable); + update_action_flags(action, pe_action_implied_by_stonith); + +- { ++ if(start == NULL || start->needs > rsc_req_quorum) { + enum pe_ordering flags = pe_order_optional; + action_t *parent_stop = find_first_action(top->actions, NULL, RSC_STOP, NULL); + +@@ -3032,7 +3041,8 @@ native_stop_constraints(resource_t * rsc, action_t * stonith_op, gboolean is_sto + crm_trace("here - 1"); + update_action_flags(action, pe_action_pseudo); + update_action_flags(action, pe_action_runnable); +- if (is_stonith == FALSE) { ++ ++ if (start == NULL || start->needs > rsc_req_quorum) { + order_actions(stonith_op, action, pe_order_preserve|pe_order_optional); + } + } +@@ -3044,8 +3054,6 @@ native_stop_constraints(resource_t * rsc, action_t * stonith_op, gboolean is_sto + void + rsc_stonith_ordering(resource_t * rsc, action_t * stonith_op, pe_working_set_t * data_set) + { +- gboolean is_stonith = FALSE; +- + if (rsc->children) { + GListPtr gIter = NULL; + +@@ -3063,11 +3071,11 @@ rsc_stonith_ordering(resource_t * rsc, action_t * stonith_op, pe_working_set_t * + } + + /* Start constraints */ +- native_start_constraints(rsc, stonith_op, is_stonith, data_set); ++ native_start_constraints(rsc, stonith_op, data_set); + + /* Stop constraints */ + if (stonith_op) { +- native_stop_constraints(rsc, stonith_op, is_stonith, data_set); ++ native_stop_constraints(rsc, stonith_op, data_set); + } + } + +diff --git a/pengine/regression.sh b/pengine/regression.sh +index d184798..7f73f92 100755 +--- a/pengine/regression.sh ++++ b/pengine/regression.sh +@@ -31,6 +31,20 @@ info Performing the following tests from $io_dir + create_mode="false" + + echo "" ++do_test cloned_start_one "order first clone then clone... first clone_min=2" ++do_test cloned_start_two "order first clone then clone... first clone_min=2" ++do_test cloned_stop_one "order first clone then clone... first clone_min=2" ++do_test cloned_stop_two "order first clone then clone... first clone_min=2" ++do_test clone_min_interleave_start_one "order first clone then clone... first clone_min=2 and then has interleave=true" ++do_test clone_min_interleave_start_two "order first clone then clone... first clone_min=2 and then has interleave=true" ++do_test clone_min_interleave_stop_one "order first clone then clone... first clone_min=2 and then has interleave=true" ++do_test clone_min_interleave_stop_two "order first clone then clone... first clone_min=2 and then has interleave=true" ++do_test clone_min_start_one "order first clone then primitive... first clone_min=2" ++do_test clone_min_start_two "order first clone then primitive... first clone_min=2" ++do_test clone_min_stop_all "order first clone then primitive... first clone_min=2" ++do_test clone_min_stop_one "order first clone then primitive... first clone_min=2" ++do_test clone_min_stop_two "order first clone then primitive... first clone_min=2" ++ + do_test simple1 "Offline " + do_test simple2 "Start " + do_test simple3 "Start 2 " +diff --git a/pengine/test10/bug-5186-partial-migrate.dot b/pengine/test10/bug-5186-partial-migrate.dot +index 033d41d..65f5616 100644 +--- a/pengine/test10/bug-5186-partial-migrate.dot ++++ b/pengine/test10/bug-5186-partial-migrate.dot +@@ -66,13 +66,10 @@ + "stonith 'reboot' bl460g1n7" -> "clnDiskd1_stop_0" [ style = bold] + "stonith 'reboot' bl460g1n7" -> "clnDiskd2_stop_0" [ style = bold] + "stonith 'reboot' bl460g1n7" -> "clnPing_stop_0" [ style = bold] +-"stonith 'reboot' bl460g1n7" -> "grpStonith8_stop_0" [ style = bold] + "stonith 'reboot' bl460g1n7" -> "prmDiskd1_stop_0 bl460g1n7" [ style = bold] + "stonith 'reboot' bl460g1n7" -> "prmDiskd2_stop_0 bl460g1n7" [ style = bold] + "stonith 'reboot' bl460g1n7" -> "prmDummy_stop_0 bl460g1n7" [ style = bold] + "stonith 'reboot' bl460g1n7" -> "prmPing_stop_0 bl460g1n7" [ style = bold] +-"stonith 'reboot' bl460g1n7" -> "prmStonith8-1_stop_0 bl460g1n7" [ style = bold] +-"stonith 'reboot' bl460g1n7" -> "prmStonith8-2_stop_0 bl460g1n7" [ style = bold] + "stonith 'reboot' bl460g1n7" -> "prmVM2_stop_0 bl460g1n7" [ style = bold] + "stonith 'reboot' bl460g1n7" -> "stonith_complete" [ style = bold] + "stonith 'reboot' bl460g1n7" [ style=bold color="green" fontcolor="black"] +diff --git a/pengine/test10/bug-5186-partial-migrate.exp b/pengine/test10/bug-5186-partial-migrate.exp +index 216d962..bc058ea 100644 +--- a/pengine/test10/bug-5186-partial-migrate.exp ++++ b/pengine/test10/bug-5186-partial-migrate.exp +@@ -104,11 +104,7 @@ + + + +- +- +- +- +- ++ + + + +@@ -182,9 +178,6 @@ + + + +- +- +- + + + +@@ -229,9 +222,6 @@ + + + +- +- +- + + + +diff --git a/pengine/test10/bug-5186-partial-migrate.summary b/pengine/test10/bug-5186-partial-migrate.summary +index f848c97..5e62a23 100644 +--- a/pengine/test10/bug-5186-partial-migrate.summary ++++ b/pengine/test10/bug-5186-partial-migrate.summary +@@ -35,18 +35,22 @@ Transition Summary: + + Executing cluster transition: + * Resource action: prmVM2 stop on bl460g1n6 ++ * Pseudo action: grpStonith8_stop_0 ++ * Pseudo action: prmStonith8-2_stop_0 + * Fencing bl460g1n7 (reboot) + * Pseudo action: stonith_complete + * Pseudo action: prmDummy_stop_0 + * Pseudo action: prmVM2_stop_0 +- * Pseudo action: grpStonith8_stop_0 +- * Pseudo action: prmStonith8-2_stop_0 ++ * Pseudo action: prmStonith8-1_stop_0 + * Pseudo action: clnDiskd1_stop_0 + * Pseudo action: clnDiskd2_stop_0 + * Pseudo action: clnPing_stop_0 + * Resource action: prmDummy start on bl460g1n6 + * Resource action: prmVM2 start on bl460g1n8 +- * Pseudo action: prmStonith8-1_stop_0 ++ * Pseudo action: grpStonith8_stopped_0 ++ * Pseudo action: grpStonith8_start_0 ++ * Resource action: prmStonith8-1 start on bl460g1n6 ++ * Resource action: prmStonith8-2 start on bl460g1n6 + * Pseudo action: prmDiskd1_stop_0 + * Pseudo action: clnDiskd1_stopped_0 + * Pseudo action: prmDiskd2_stop_0 +@@ -55,10 +59,6 @@ Executing cluster transition: + * Pseudo action: clnPing_stopped_0 + * Pseudo action: all_stopped + * Resource action: prmVM2 monitor=10000 on bl460g1n8 +- * Pseudo action: grpStonith8_stopped_0 +- * Pseudo action: grpStonith8_start_0 +- * Resource action: prmStonith8-1 start on bl460g1n6 +- * Resource action: prmStonith8-2 start on bl460g1n6 + * Pseudo action: grpStonith8_running_0 + * Resource action: prmStonith8-1 monitor=10000 on bl460g1n6 + * Resource action: prmStonith8-2 monitor=3600000 on bl460g1n6 +diff --git a/pengine/test10/bug-lf-2551.dot b/pengine/test10/bug-lf-2551.dot +index ed80e15..18bca44 100644 +--- a/pengine/test10/bug-lf-2551.dot ++++ b/pengine/test10/bug-lf-2551.dot +@@ -56,7 +56,6 @@ digraph "g" { + "stonith 'reboot' hex-9" -> "cmirrord:3_stop_0 hex-9" [ style = bold] + "stonith 'reboot' hex-9" -> "dlm:3_stop_0 hex-9" [ style = bold] + "stonith 'reboot' hex-9" -> "dummy1_stop_0 hex-9" [ style = bold] +-"stonith 'reboot' hex-9" -> "fencing-sbd_stop_0 hex-9" [ style = bold] + "stonith 'reboot' hex-9" -> "o2cb:3_stop_0 hex-9" [ style = bold] + "stonith 'reboot' hex-9" -> "ocfs2-1:3_stop_0 hex-9" [ style = bold] + "stonith 'reboot' hex-9" -> "stonith_complete" [ style = bold] +diff --git a/pengine/test10/bug-lf-2551.exp b/pengine/test10/bug-lf-2551.exp +index 0af9010..d6266e1 100644 +--- a/pengine/test10/bug-lf-2551.exp ++++ b/pengine/test10/bug-lf-2551.exp +@@ -18,11 +18,7 @@ + + + +- +- +- +- +- ++ + + + +diff --git a/pengine/test10/bug-lf-2551.summary b/pengine/test10/bug-lf-2551.summary +index f8d861c..158eb73 100644 +--- a/pengine/test10/bug-lf-2551.summary ++++ b/pengine/test10/bug-lf-2551.summary +@@ -107,6 +107,7 @@ Transition Summary: + * Stop vm-61 (hex-9) + + Executing cluster transition: ++ * Pseudo action: fencing-sbd_stop_0 + * Resource action: dummy1 monitor=300000 on hex-8 + * Resource action: dummy1 monitor=300000 on hex-7 + * Fencing hex-9 (reboot) +@@ -114,7 +115,7 @@ Executing cluster transition: + * Pseudo action: load_stopped_hex-8 + * Pseudo action: load_stopped_hex-7 + * Pseudo action: load_stopped_hex-0 +- * Pseudo action: fencing-sbd_stop_0 ++ * Resource action: fencing-sbd start on hex-0 + * Pseudo action: dummy1_stop_0 + * Pseudo action: vm-03_stop_0 + * Pseudo action: vm-06_stop_0 +@@ -133,7 +134,6 @@ Executing cluster transition: + * Pseudo action: vm-57_stop_0 + * Pseudo action: vm-61_stop_0 + * Pseudo action: load_stopped_hex-9 +- * Resource action: fencing-sbd start on hex-0 + * Resource action: dummy1 start on hex-0 + * Pseudo action: base-clone_stop_0 + * Resource action: dummy1 monitor=30000 on hex-0 +diff --git a/pengine/test10/clone_min_interleave_start_one.dot b/pengine/test10/clone_min_interleave_start_one.dot +new file mode 100644 +index 0000000..15ac9be +--- /dev/null ++++ b/pengine/test10/clone_min_interleave_start_one.dot +@@ -0,0 +1,50 @@ ++ digraph "g" { ++"FAKE1-clone_running_0" [ style=bold color="green" fontcolor="orange"] ++"FAKE1-clone_start_0" -> "FAKE1-clone_running_0" [ style = bold] ++"FAKE1-clone_start_0" -> "FAKE1_start_0 c7auto1" [ style = bold] ++"FAKE1-clone_start_0" [ style=bold color="green" fontcolor="orange"] ++"FAKE1_monitor_10000 c7auto1" [ style=bold color="green" fontcolor="black"] ++"FAKE1_start_0 c7auto1" -> "FAKE1-clone_running_0" [ style = bold] ++"FAKE1_start_0 c7auto1" -> "FAKE1_monitor_10000 c7auto1" [ style = bold] ++"FAKE1_start_0 c7auto1" [ style=bold color="green" fontcolor="black"] ++"FAKE2-clone_running_0" -> "FAKE3-clone_start_0" [ style = dashed] ++"FAKE2-clone_running_0" [ style=dashed color="red" fontcolor="orange"] ++"FAKE2-clone_start_0" -> "FAKE2-clone_running_0" [ style = dashed] ++"FAKE2-clone_start_0" -> "FAKE2:1_start_0 c7auto3" [ style = dashed] ++"FAKE2-clone_start_0" -> "FAKE2:2_start_0 c7auto1" [ style = dashed] ++"FAKE2-clone_start_0" -> "FAKE2_start_0 c7auto2" [ style = dashed] ++"FAKE2-clone_start_0" [ style=dashed color="red" fontcolor="orange"] ++"FAKE2:1_monitor_10000 c7auto3" [ style=dashed color="red" fontcolor="black"] ++"FAKE2:1_start_0 c7auto3" -> "FAKE2-clone_running_0" [ style = dashed] ++"FAKE2:1_start_0 c7auto3" -> "FAKE2:1_monitor_10000 c7auto3" [ style = dashed] ++"FAKE2:1_start_0 c7auto3" -> "FAKE3:1_start_0 c7auto3" [ style = dashed] ++"FAKE2:1_start_0 c7auto3" [ style=dashed color="red" fontcolor="black"] ++"FAKE2:2_monitor_10000 c7auto1" [ style=dashed color="red" fontcolor="black"] ++"FAKE2:2_start_0 c7auto1" -> "FAKE2-clone_running_0" [ style = dashed] ++"FAKE2:2_start_0 c7auto1" -> "FAKE2:2_monitor_10000 c7auto1" [ style = dashed] ++"FAKE2:2_start_0 c7auto1" -> "FAKE3:2_start_0 c7auto1" [ style = dashed] ++"FAKE2:2_start_0 c7auto1" [ style=dashed color="red" fontcolor="black"] ++"FAKE2_monitor_10000 c7auto2" [ style=dashed color="red" fontcolor="black"] ++"FAKE2_start_0 c7auto2" -> "FAKE2-clone_running_0" [ style = dashed] ++"FAKE2_start_0 c7auto2" -> "FAKE2_monitor_10000 c7auto2" [ style = dashed] ++"FAKE2_start_0 c7auto2" -> "FAKE3_start_0 c7auto2" [ style = dashed] ++"FAKE2_start_0 c7auto2" [ style=dashed color="red" fontcolor="black"] ++"FAKE3-clone_running_0" [ style=dashed color="red" fontcolor="orange"] ++"FAKE3-clone_start_0" -> "FAKE3-clone_running_0" [ style = dashed] ++"FAKE3-clone_start_0" -> "FAKE3:1_start_0 c7auto3" [ style = dashed] ++"FAKE3-clone_start_0" -> "FAKE3:2_start_0 c7auto1" [ style = dashed] ++"FAKE3-clone_start_0" -> "FAKE3_start_0 c7auto2" [ style = dashed] ++"FAKE3-clone_start_0" [ style=dashed color="red" fontcolor="orange"] ++"FAKE3:1_monitor_10000 c7auto3" [ style=dashed color="red" fontcolor="black"] ++"FAKE3:1_start_0 c7auto3" -> "FAKE3-clone_running_0" [ style = dashed] ++"FAKE3:1_start_0 c7auto3" -> "FAKE3:1_monitor_10000 c7auto3" [ style = dashed] ++"FAKE3:1_start_0 c7auto3" [ style=dashed color="red" fontcolor="black"] ++"FAKE3:2_monitor_10000 c7auto1" [ style=dashed color="red" fontcolor="black"] ++"FAKE3:2_start_0 c7auto1" -> "FAKE3-clone_running_0" [ style = dashed] ++"FAKE3:2_start_0 c7auto1" -> "FAKE3:2_monitor_10000 c7auto1" [ style = dashed] ++"FAKE3:2_start_0 c7auto1" [ style=dashed color="red" fontcolor="black"] ++"FAKE3_monitor_10000 c7auto2" [ style=dashed color="red" fontcolor="black"] ++"FAKE3_start_0 c7auto2" -> "FAKE3-clone_running_0" [ style = dashed] ++"FAKE3_start_0 c7auto2" -> "FAKE3_monitor_10000 c7auto2" [ style = dashed] ++"FAKE3_start_0 c7auto2" [ style=dashed color="red" fontcolor="black"] ++} +diff --git a/pengine/test10/clone_min_interleave_start_one.exp b/pengine/test10/clone_min_interleave_start_one.exp +new file mode 100644 +index 0000000..b6e0c5d +--- /dev/null ++++ b/pengine/test10/clone_min_interleave_start_one.exp +@@ -0,0 +1,51 @@ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ +diff --git a/pengine/test10/clone_min_interleave_start_one.scores b/pengine/test10/clone_min_interleave_start_one.scores +new file mode 100644 +index 0000000..03de018 +--- /dev/null ++++ b/pengine/test10/clone_min_interleave_start_one.scores +@@ -0,0 +1,67 @@ ++Allocation scores: ++clone_color: FAKE1-clone allocation score on c7auto1: 0 ++clone_color: FAKE1-clone allocation score on c7auto2: -INFINITY ++clone_color: FAKE1-clone allocation score on c7auto3: -INFINITY ++clone_color: FAKE1:0 allocation score on c7auto1: 0 ++clone_color: FAKE1:0 allocation score on c7auto2: -INFINITY ++clone_color: FAKE1:0 allocation score on c7auto3: -INFINITY ++clone_color: FAKE1:1 allocation score on c7auto1: 0 ++clone_color: FAKE1:1 allocation score on c7auto2: -INFINITY ++clone_color: FAKE1:1 allocation score on c7auto3: -INFINITY ++clone_color: FAKE1:2 allocation score on c7auto1: 0 ++clone_color: FAKE1:2 allocation score on c7auto2: -INFINITY ++clone_color: FAKE1:2 allocation score on c7auto3: -INFINITY ++clone_color: FAKE2-clone allocation score on c7auto1: 0 ++clone_color: FAKE2-clone allocation score on c7auto2: 0 ++clone_color: FAKE2-clone allocation score on c7auto3: 0 ++clone_color: FAKE2:0 allocation score on c7auto1: 0 ++clone_color: FAKE2:0 allocation score on c7auto2: 0 ++clone_color: FAKE2:0 allocation score on c7auto3: 0 ++clone_color: FAKE2:1 allocation score on c7auto1: 0 ++clone_color: FAKE2:1 allocation score on c7auto2: 0 ++clone_color: FAKE2:1 allocation score on c7auto3: 0 ++clone_color: FAKE2:2 allocation score on c7auto1: 0 ++clone_color: FAKE2:2 allocation score on c7auto2: 0 ++clone_color: FAKE2:2 allocation score on c7auto3: 0 ++clone_color: FAKE3-clone allocation score on c7auto1: 0 ++clone_color: FAKE3-clone allocation score on c7auto2: 0 ++clone_color: FAKE3-clone allocation score on c7auto3: 0 ++clone_color: FAKE3:0 allocation score on c7auto1: 0 ++clone_color: FAKE3:0 allocation score on c7auto2: 0 ++clone_color: FAKE3:0 allocation score on c7auto3: 0 ++clone_color: FAKE3:1 allocation score on c7auto1: 0 ++clone_color: FAKE3:1 allocation score on c7auto2: 0 ++clone_color: FAKE3:1 allocation score on c7auto3: 0 ++clone_color: FAKE3:2 allocation score on c7auto1: 0 ++clone_color: FAKE3:2 allocation score on c7auto2: 0 ++clone_color: FAKE3:2 allocation score on c7auto3: 0 ++native_color: FAKE1:0 allocation score on c7auto1: 0 ++native_color: FAKE1:0 allocation score on c7auto2: -INFINITY ++native_color: FAKE1:0 allocation score on c7auto3: -INFINITY ++native_color: FAKE1:1 allocation score on c7auto1: -INFINITY ++native_color: FAKE1:1 allocation score on c7auto2: -INFINITY ++native_color: FAKE1:1 allocation score on c7auto3: -INFINITY ++native_color: FAKE1:2 allocation score on c7auto1: -INFINITY ++native_color: FAKE1:2 allocation score on c7auto2: -INFINITY ++native_color: FAKE1:2 allocation score on c7auto3: -INFINITY ++native_color: FAKE2:0 allocation score on c7auto1: 0 ++native_color: FAKE2:0 allocation score on c7auto2: 0 ++native_color: FAKE2:0 allocation score on c7auto3: 0 ++native_color: FAKE2:1 allocation score on c7auto1: 0 ++native_color: FAKE2:1 allocation score on c7auto2: -INFINITY ++native_color: FAKE2:1 allocation score on c7auto3: 0 ++native_color: FAKE2:2 allocation score on c7auto1: 0 ++native_color: FAKE2:2 allocation score on c7auto2: -INFINITY ++native_color: FAKE2:2 allocation score on c7auto3: -INFINITY ++native_color: FAKE3:0 allocation score on c7auto1: 0 ++native_color: FAKE3:0 allocation score on c7auto2: 0 ++native_color: FAKE3:0 allocation score on c7auto3: 0 ++native_color: FAKE3:1 allocation score on c7auto1: 0 ++native_color: FAKE3:1 allocation score on c7auto2: -INFINITY ++native_color: FAKE3:1 allocation score on c7auto3: 0 ++native_color: FAKE3:2 allocation score on c7auto1: 0 ++native_color: FAKE3:2 allocation score on c7auto2: -INFINITY ++native_color: FAKE3:2 allocation score on c7auto3: -INFINITY ++native_color: shooter allocation score on c7auto1: 0 ++native_color: shooter allocation score on c7auto2: 0 ++native_color: shooter allocation score on c7auto3: 0 +diff --git a/pengine/test10/clone_min_interleave_start_one.summary b/pengine/test10/clone_min_interleave_start_one.summary +new file mode 100644 +index 0000000..b15f68a +--- /dev/null ++++ b/pengine/test10/clone_min_interleave_start_one.summary +@@ -0,0 +1,39 @@ ++ ++Current cluster status: ++Online: [ c7auto1 c7auto2 c7auto3 ] ++ ++ shooter (stonith:fence_phd_kvm): Started c7auto1 ++ Clone Set: FAKE1-clone [FAKE1] ++ Stopped: [ c7auto1 c7auto2 c7auto3 ] ++ Clone Set: FAKE2-clone [FAKE2] ++ Stopped: [ c7auto1 c7auto2 c7auto3 ] ++ Clone Set: FAKE3-clone [FAKE3] ++ Stopped: [ c7auto1 c7auto2 c7auto3 ] ++ ++Transition Summary: ++ * Start FAKE1:0 (c7auto1) ++ * Start FAKE2:0 (c7auto2 - blocked) ++ * Start FAKE2:1 (c7auto3 - blocked) ++ * Start FAKE2:2 (c7auto1 - blocked) ++ * Start FAKE3:0 (c7auto2 - blocked) ++ * Start FAKE3:1 (c7auto3 - blocked) ++ * Start FAKE3:2 (c7auto1 - blocked) ++ ++Executing cluster transition: ++ * Pseudo action: FAKE1-clone_start_0 ++ * Resource action: FAKE1 start on c7auto1 ++ * Pseudo action: FAKE1-clone_running_0 ++ * Resource action: FAKE1 monitor=10000 on c7auto1 ++ ++Revised cluster status: ++Online: [ c7auto1 c7auto2 c7auto3 ] ++ ++ shooter (stonith:fence_phd_kvm): Started c7auto1 ++ Clone Set: FAKE1-clone [FAKE1] ++ Started: [ c7auto1 ] ++ Stopped: [ c7auto2 c7auto3 ] ++ Clone Set: FAKE2-clone [FAKE2] ++ Stopped: [ c7auto1 c7auto2 c7auto3 ] ++ Clone Set: FAKE3-clone [FAKE3] ++ Stopped: [ c7auto1 c7auto2 c7auto3 ] ++ +diff --git a/pengine/test10/clone_min_interleave_start_one.xml b/pengine/test10/clone_min_interleave_start_one.xml +new file mode 100644 +index 0000000..fbe99de +--- /dev/null ++++ b/pengine/test10/clone_min_interleave_start_one.xml +@@ -0,0 +1,155 @@ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ +diff --git a/pengine/test10/clone_min_interleave_start_two.dot b/pengine/test10/clone_min_interleave_start_two.dot +new file mode 100644 +index 0000000..f99ce32 +--- /dev/null ++++ b/pengine/test10/clone_min_interleave_start_two.dot +@@ -0,0 +1,59 @@ ++ digraph "g" { ++"FAKE1-clone_running_0" [ style=bold color="green" fontcolor="orange"] ++"FAKE1-clone_start_0" -> "FAKE1-clone_running_0" [ style = bold] ++"FAKE1-clone_start_0" -> "FAKE1:1_start_0 c7auto1" [ style = bold] ++"FAKE1-clone_start_0" -> "FAKE1_start_0 c7auto2" [ style = bold] ++"FAKE1-clone_start_0" [ style=bold color="green" fontcolor="orange"] ++"FAKE1:1_monitor_10000 c7auto1" [ style=bold color="green" fontcolor="black"] ++"FAKE1:1_start_0 c7auto1" -> "FAKE1-clone_running_0" [ style = bold] ++"FAKE1:1_start_0 c7auto1" -> "FAKE1:1_monitor_10000 c7auto1" [ style = bold] ++"FAKE1:1_start_0 c7auto1" -> "clone-one-or-more:order-FAKE1-clone-FAKE2-clone-mandatory" [ style = bold] ++"FAKE1:1_start_0 c7auto1" [ style=bold color="green" fontcolor="black"] ++"FAKE1_monitor_10000 c7auto2" [ style=bold color="green" fontcolor="black"] ++"FAKE1_start_0 c7auto2" -> "FAKE1-clone_running_0" [ style = bold] ++"FAKE1_start_0 c7auto2" -> "FAKE1_monitor_10000 c7auto2" [ style = bold] ++"FAKE1_start_0 c7auto2" -> "clone-one-or-more:order-FAKE1-clone-FAKE2-clone-mandatory" [ style = bold] ++"FAKE1_start_0 c7auto2" [ style=bold color="green" fontcolor="black"] ++"FAKE2-clone_running_0" -> "FAKE3-clone_start_0" [ style = bold] ++"FAKE2-clone_running_0" [ style=bold color="green" fontcolor="orange"] ++"FAKE2-clone_start_0" -> "FAKE2-clone_running_0" [ style = bold] ++"FAKE2-clone_start_0" -> "FAKE2:1_start_0 c7auto2" [ style = bold] ++"FAKE2-clone_start_0" -> "FAKE2:2_start_0 c7auto1" [ style = bold] ++"FAKE2-clone_start_0" -> "FAKE2_start_0 c7auto3" [ style = bold] ++"FAKE2-clone_start_0" [ style=bold color="green" fontcolor="orange"] ++"FAKE2:1_monitor_10000 c7auto2" [ style=bold color="green" fontcolor="black"] ++"FAKE2:1_start_0 c7auto2" -> "FAKE2-clone_running_0" [ style = bold] ++"FAKE2:1_start_0 c7auto2" -> "FAKE2:1_monitor_10000 c7auto2" [ style = bold] ++"FAKE2:1_start_0 c7auto2" -> "FAKE3:1_start_0 c7auto2" [ style = bold] ++"FAKE2:1_start_0 c7auto2" [ style=bold color="green" fontcolor="black"] ++"FAKE2:2_monitor_10000 c7auto1" [ style=bold color="green" fontcolor="black"] ++"FAKE2:2_start_0 c7auto1" -> "FAKE2-clone_running_0" [ style = bold] ++"FAKE2:2_start_0 c7auto1" -> "FAKE2:2_monitor_10000 c7auto1" [ style = bold] ++"FAKE2:2_start_0 c7auto1" -> "FAKE3:2_start_0 c7auto1" [ style = bold] ++"FAKE2:2_start_0 c7auto1" [ style=bold color="green" fontcolor="black"] ++"FAKE2_monitor_10000 c7auto3" [ style=bold color="green" fontcolor="black"] ++"FAKE2_start_0 c7auto3" -> "FAKE2-clone_running_0" [ style = bold] ++"FAKE2_start_0 c7auto3" -> "FAKE2_monitor_10000 c7auto3" [ style = bold] ++"FAKE2_start_0 c7auto3" -> "FAKE3_start_0 c7auto3" [ style = bold] ++"FAKE2_start_0 c7auto3" [ style=bold color="green" fontcolor="black"] ++"FAKE3-clone_running_0" [ style=bold color="green" fontcolor="orange"] ++"FAKE3-clone_start_0" -> "FAKE3-clone_running_0" [ style = bold] ++"FAKE3-clone_start_0" -> "FAKE3:1_start_0 c7auto2" [ style = bold] ++"FAKE3-clone_start_0" -> "FAKE3:2_start_0 c7auto1" [ style = bold] ++"FAKE3-clone_start_0" -> "FAKE3_start_0 c7auto3" [ style = bold] ++"FAKE3-clone_start_0" [ style=bold color="green" fontcolor="orange"] ++"FAKE3:1_monitor_10000 c7auto2" [ style=bold color="green" fontcolor="black"] ++"FAKE3:1_start_0 c7auto2" -> "FAKE3-clone_running_0" [ style = bold] ++"FAKE3:1_start_0 c7auto2" -> "FAKE3:1_monitor_10000 c7auto2" [ style = bold] ++"FAKE3:1_start_0 c7auto2" [ style=bold color="green" fontcolor="black"] ++"FAKE3:2_monitor_10000 c7auto1" [ style=bold color="green" fontcolor="black"] ++"FAKE3:2_start_0 c7auto1" -> "FAKE3-clone_running_0" [ style = bold] ++"FAKE3:2_start_0 c7auto1" -> "FAKE3:2_monitor_10000 c7auto1" [ style = bold] ++"FAKE3:2_start_0 c7auto1" [ style=bold color="green" fontcolor="black"] ++"FAKE3_monitor_10000 c7auto3" [ style=bold color="green" fontcolor="black"] ++"FAKE3_start_0 c7auto3" -> "FAKE3-clone_running_0" [ style = bold] ++"FAKE3_start_0 c7auto3" -> "FAKE3_monitor_10000 c7auto3" [ style = bold] ++"FAKE3_start_0 c7auto3" [ style=bold color="green" fontcolor="black"] ++"clone-one-or-more:order-FAKE1-clone-FAKE2-clone-mandatory" -> "FAKE2-clone_start_0" [ style = bold] ++"clone-one-or-more:order-FAKE1-clone-FAKE2-clone-mandatory" [ style=bold color="green" fontcolor="orange"] ++} +diff --git a/pengine/test10/clone_min_interleave_start_two.exp b/pengine/test10/clone_min_interleave_start_two.exp +new file mode 100644 +index 0000000..9846072 +--- /dev/null ++++ b/pengine/test10/clone_min_interleave_start_two.exp +@@ -0,0 +1,326 @@ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ +diff --git a/pengine/test10/clone_min_interleave_start_two.scores b/pengine/test10/clone_min_interleave_start_two.scores +new file mode 100644 +index 0000000..d443c58 +--- /dev/null ++++ b/pengine/test10/clone_min_interleave_start_two.scores +@@ -0,0 +1,67 @@ ++Allocation scores: ++clone_color: FAKE1-clone allocation score on c7auto1: 0 ++clone_color: FAKE1-clone allocation score on c7auto2: 0 ++clone_color: FAKE1-clone allocation score on c7auto3: -INFINITY ++clone_color: FAKE1:0 allocation score on c7auto1: 0 ++clone_color: FAKE1:0 allocation score on c7auto2: 0 ++clone_color: FAKE1:0 allocation score on c7auto3: -INFINITY ++clone_color: FAKE1:1 allocation score on c7auto1: 0 ++clone_color: FAKE1:1 allocation score on c7auto2: 0 ++clone_color: FAKE1:1 allocation score on c7auto3: -INFINITY ++clone_color: FAKE1:2 allocation score on c7auto1: 0 ++clone_color: FAKE1:2 allocation score on c7auto2: 0 ++clone_color: FAKE1:2 allocation score on c7auto3: -INFINITY ++clone_color: FAKE2-clone allocation score on c7auto1: 0 ++clone_color: FAKE2-clone allocation score on c7auto2: 0 ++clone_color: FAKE2-clone allocation score on c7auto3: 0 ++clone_color: FAKE2:0 allocation score on c7auto1: 0 ++clone_color: FAKE2:0 allocation score on c7auto2: 0 ++clone_color: FAKE2:0 allocation score on c7auto3: 0 ++clone_color: FAKE2:1 allocation score on c7auto1: 0 ++clone_color: FAKE2:1 allocation score on c7auto2: 0 ++clone_color: FAKE2:1 allocation score on c7auto3: 0 ++clone_color: FAKE2:2 allocation score on c7auto1: 0 ++clone_color: FAKE2:2 allocation score on c7auto2: 0 ++clone_color: FAKE2:2 allocation score on c7auto3: 0 ++clone_color: FAKE3-clone allocation score on c7auto1: 0 ++clone_color: FAKE3-clone allocation score on c7auto2: 0 ++clone_color: FAKE3-clone allocation score on c7auto3: 0 ++clone_color: FAKE3:0 allocation score on c7auto1: 0 ++clone_color: FAKE3:0 allocation score on c7auto2: 0 ++clone_color: FAKE3:0 allocation score on c7auto3: 0 ++clone_color: FAKE3:1 allocation score on c7auto1: 0 ++clone_color: FAKE3:1 allocation score on c7auto2: 0 ++clone_color: FAKE3:1 allocation score on c7auto3: 0 ++clone_color: FAKE3:2 allocation score on c7auto1: 0 ++clone_color: FAKE3:2 allocation score on c7auto2: 0 ++clone_color: FAKE3:2 allocation score on c7auto3: 0 ++native_color: FAKE1:0 allocation score on c7auto1: 0 ++native_color: FAKE1:0 allocation score on c7auto2: 0 ++native_color: FAKE1:0 allocation score on c7auto3: -INFINITY ++native_color: FAKE1:1 allocation score on c7auto1: 0 ++native_color: FAKE1:1 allocation score on c7auto2: -INFINITY ++native_color: FAKE1:1 allocation score on c7auto3: -INFINITY ++native_color: FAKE1:2 allocation score on c7auto1: -INFINITY ++native_color: FAKE1:2 allocation score on c7auto2: -INFINITY ++native_color: FAKE1:2 allocation score on c7auto3: -INFINITY ++native_color: FAKE2:0 allocation score on c7auto1: 0 ++native_color: FAKE2:0 allocation score on c7auto2: 0 ++native_color: FAKE2:0 allocation score on c7auto3: 0 ++native_color: FAKE2:1 allocation score on c7auto1: 0 ++native_color: FAKE2:1 allocation score on c7auto2: 0 ++native_color: FAKE2:1 allocation score on c7auto3: -INFINITY ++native_color: FAKE2:2 allocation score on c7auto1: 0 ++native_color: FAKE2:2 allocation score on c7auto2: -INFINITY ++native_color: FAKE2:2 allocation score on c7auto3: -INFINITY ++native_color: FAKE3:0 allocation score on c7auto1: 0 ++native_color: FAKE3:0 allocation score on c7auto2: 0 ++native_color: FAKE3:0 allocation score on c7auto3: 0 ++native_color: FAKE3:1 allocation score on c7auto1: 0 ++native_color: FAKE3:1 allocation score on c7auto2: 0 ++native_color: FAKE3:1 allocation score on c7auto3: -INFINITY ++native_color: FAKE3:2 allocation score on c7auto1: 0 ++native_color: FAKE3:2 allocation score on c7auto2: -INFINITY ++native_color: FAKE3:2 allocation score on c7auto3: -INFINITY ++native_color: shooter allocation score on c7auto1: 0 ++native_color: shooter allocation score on c7auto2: 0 ++native_color: shooter allocation score on c7auto3: 0 +diff --git a/pengine/test10/clone_min_interleave_start_two.summary b/pengine/test10/clone_min_interleave_start_two.summary +new file mode 100644 +index 0000000..9f928f2 +--- /dev/null ++++ b/pengine/test10/clone_min_interleave_start_two.summary +@@ -0,0 +1,59 @@ ++ ++Current cluster status: ++Online: [ c7auto1 c7auto2 c7auto3 ] ++ ++ shooter (stonith:fence_phd_kvm): Started c7auto1 ++ Clone Set: FAKE1-clone [FAKE1] ++ Stopped: [ c7auto1 c7auto2 c7auto3 ] ++ Clone Set: FAKE2-clone [FAKE2] ++ Stopped: [ c7auto1 c7auto2 c7auto3 ] ++ Clone Set: FAKE3-clone [FAKE3] ++ Stopped: [ c7auto1 c7auto2 c7auto3 ] ++ ++Transition Summary: ++ * Start FAKE1:0 (c7auto2) ++ * Start FAKE1:1 (c7auto1) ++ * Start FAKE2:0 (c7auto3) ++ * Start FAKE2:1 (c7auto2) ++ * Start FAKE2:2 (c7auto1) ++ * Start FAKE3:0 (c7auto3) ++ * Start FAKE3:1 (c7auto2) ++ * Start FAKE3:2 (c7auto1) ++ ++Executing cluster transition: ++ * Pseudo action: FAKE1-clone_start_0 ++ * Resource action: FAKE1 start on c7auto2 ++ * Resource action: FAKE1 start on c7auto1 ++ * Pseudo action: FAKE1-clone_running_0 ++ * Pseudo action: clone-one-or-more:order-FAKE1-clone-FAKE2-clone-mandatory ++ * Resource action: FAKE1 monitor=10000 on c7auto2 ++ * Resource action: FAKE1 monitor=10000 on c7auto1 ++ * Pseudo action: FAKE2-clone_start_0 ++ * Resource action: FAKE2 start on c7auto3 ++ * Resource action: FAKE2 start on c7auto2 ++ * Resource action: FAKE2 start on c7auto1 ++ * Pseudo action: FAKE2-clone_running_0 ++ * Pseudo action: FAKE3-clone_start_0 ++ * Resource action: FAKE2 monitor=10000 on c7auto3 ++ * Resource action: FAKE2 monitor=10000 on c7auto2 ++ * Resource action: FAKE2 monitor=10000 on c7auto1 ++ * Resource action: FAKE3 start on c7auto3 ++ * Resource action: FAKE3 start on c7auto2 ++ * Resource action: FAKE3 start on c7auto1 ++ * Pseudo action: FAKE3-clone_running_0 ++ * Resource action: FAKE3 monitor=10000 on c7auto3 ++ * Resource action: FAKE3 monitor=10000 on c7auto2 ++ * Resource action: FAKE3 monitor=10000 on c7auto1 ++ ++Revised cluster status: ++Online: [ c7auto1 c7auto2 c7auto3 ] ++ ++ shooter (stonith:fence_phd_kvm): Started c7auto1 ++ Clone Set: FAKE1-clone [FAKE1] ++ Started: [ c7auto1 c7auto2 ] ++ Stopped: [ c7auto3 ] ++ Clone Set: FAKE2-clone [FAKE2] ++ Started: [ c7auto1 c7auto2 c7auto3 ] ++ Clone Set: FAKE3-clone [FAKE3] ++ Started: [ c7auto1 c7auto2 c7auto3 ] ++ +diff --git a/pengine/test10/clone_min_interleave_start_two.xml b/pengine/test10/clone_min_interleave_start_two.xml +new file mode 100644 +index 0000000..2507018 +--- /dev/null ++++ b/pengine/test10/clone_min_interleave_start_two.xml +@@ -0,0 +1,154 @@ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ +diff --git a/pengine/test10/clone_min_interleave_stop_one.dot b/pengine/test10/clone_min_interleave_stop_one.dot +new file mode 100644 +index 0000000..a66ceb6 +--- /dev/null ++++ b/pengine/test10/clone_min_interleave_stop_one.dot +@@ -0,0 +1,18 @@ ++ digraph "g" { ++"FAKE1-clone_running_0" [ style=bold color="green" fontcolor="orange"] ++"FAKE1-clone_start_0" -> "FAKE1-clone_running_0" [ style = bold] ++"FAKE1-clone_start_0" -> "FAKE1_start_0 " [ style = dashed] ++"FAKE1-clone_start_0" [ style=bold color="green" fontcolor="orange"] ++"FAKE1-clone_stop_0" -> "FAKE1-clone_stopped_0" [ style = bold] ++"FAKE1-clone_stop_0" -> "FAKE1_stop_0 c7auto3" [ style = bold] ++"FAKE1-clone_stop_0" [ style=bold color="green" fontcolor="orange"] ++"FAKE1-clone_stopped_0" -> "FAKE1-clone_start_0" [ style = bold] ++"FAKE1-clone_stopped_0" [ style=bold color="green" fontcolor="orange"] ++"FAKE1_start_0 " -> "FAKE1-clone_running_0" [ style = dashed] ++"FAKE1_start_0 " [ style=dashed color="red" fontcolor="black"] ++"FAKE1_stop_0 c7auto3" -> "FAKE1-clone_stopped_0" [ style = bold] ++"FAKE1_stop_0 c7auto3" -> "FAKE1_start_0 " [ style = dashed] ++"FAKE1_stop_0 c7auto3" -> "all_stopped" [ style = bold] ++"FAKE1_stop_0 c7auto3" [ style=bold color="green" fontcolor="black"] ++"all_stopped" [ style=bold color="green" fontcolor="orange"] ++} +diff --git a/pengine/test10/clone_min_interleave_stop_one.exp b/pengine/test10/clone_min_interleave_stop_one.exp +new file mode 100644 +index 0000000..31a15da +--- /dev/null ++++ b/pengine/test10/clone_min_interleave_stop_one.exp +@@ -0,0 +1,74 @@ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ +diff --git a/pengine/test10/clone_min_interleave_stop_one.scores b/pengine/test10/clone_min_interleave_stop_one.scores +new file mode 100644 +index 0000000..1a98230 +--- /dev/null ++++ b/pengine/test10/clone_min_interleave_stop_one.scores +@@ -0,0 +1,67 @@ ++Allocation scores: ++clone_color: FAKE1-clone allocation score on c7auto1: 0 ++clone_color: FAKE1-clone allocation score on c7auto2: 0 ++clone_color: FAKE1-clone allocation score on c7auto3: -INFINITY ++clone_color: FAKE1:0 allocation score on c7auto1: 0 ++clone_color: FAKE1:0 allocation score on c7auto2: 0 ++clone_color: FAKE1:0 allocation score on c7auto3: -INFINITY ++clone_color: FAKE1:1 allocation score on c7auto1: 1 ++clone_color: FAKE1:1 allocation score on c7auto2: 0 ++clone_color: FAKE1:1 allocation score on c7auto3: -INFINITY ++clone_color: FAKE1:2 allocation score on c7auto1: 0 ++clone_color: FAKE1:2 allocation score on c7auto2: 1 ++clone_color: FAKE1:2 allocation score on c7auto3: -INFINITY ++clone_color: FAKE2-clone allocation score on c7auto1: 0 ++clone_color: FAKE2-clone allocation score on c7auto2: 0 ++clone_color: FAKE2-clone allocation score on c7auto3: 0 ++clone_color: FAKE2:0 allocation score on c7auto1: 0 ++clone_color: FAKE2:0 allocation score on c7auto2: 0 ++clone_color: FAKE2:0 allocation score on c7auto3: 1 ++clone_color: FAKE2:1 allocation score on c7auto1: 1 ++clone_color: FAKE2:1 allocation score on c7auto2: 0 ++clone_color: FAKE2:1 allocation score on c7auto3: 0 ++clone_color: FAKE2:2 allocation score on c7auto1: 0 ++clone_color: FAKE2:2 allocation score on c7auto2: 1 ++clone_color: FAKE2:2 allocation score on c7auto3: 0 ++clone_color: FAKE3-clone allocation score on c7auto1: 0 ++clone_color: FAKE3-clone allocation score on c7auto2: 0 ++clone_color: FAKE3-clone allocation score on c7auto3: 0 ++clone_color: FAKE3:0 allocation score on c7auto1: 0 ++clone_color: FAKE3:0 allocation score on c7auto2: 0 ++clone_color: FAKE3:0 allocation score on c7auto3: 1 ++clone_color: FAKE3:1 allocation score on c7auto1: 1 ++clone_color: FAKE3:1 allocation score on c7auto2: 0 ++clone_color: FAKE3:1 allocation score on c7auto3: 0 ++clone_color: FAKE3:2 allocation score on c7auto1: 0 ++clone_color: FAKE3:2 allocation score on c7auto2: 1 ++clone_color: FAKE3:2 allocation score on c7auto3: 0 ++native_color: FAKE1:0 allocation score on c7auto1: -INFINITY ++native_color: FAKE1:0 allocation score on c7auto2: -INFINITY ++native_color: FAKE1:0 allocation score on c7auto3: -INFINITY ++native_color: FAKE1:1 allocation score on c7auto1: 1 ++native_color: FAKE1:1 allocation score on c7auto2: -INFINITY ++native_color: FAKE1:1 allocation score on c7auto3: -INFINITY ++native_color: FAKE1:2 allocation score on c7auto1: 0 ++native_color: FAKE1:2 allocation score on c7auto2: 1 ++native_color: FAKE1:2 allocation score on c7auto3: -INFINITY ++native_color: FAKE2:0 allocation score on c7auto1: 0 ++native_color: FAKE2:0 allocation score on c7auto2: 0 ++native_color: FAKE2:0 allocation score on c7auto3: 1 ++native_color: FAKE2:1 allocation score on c7auto1: 1 ++native_color: FAKE2:1 allocation score on c7auto2: -INFINITY ++native_color: FAKE2:1 allocation score on c7auto3: -INFINITY ++native_color: FAKE2:2 allocation score on c7auto1: 0 ++native_color: FAKE2:2 allocation score on c7auto2: 1 ++native_color: FAKE2:2 allocation score on c7auto3: -INFINITY ++native_color: FAKE3:0 allocation score on c7auto1: 0 ++native_color: FAKE3:0 allocation score on c7auto2: 0 ++native_color: FAKE3:0 allocation score on c7auto3: 1 ++native_color: FAKE3:1 allocation score on c7auto1: 1 ++native_color: FAKE3:1 allocation score on c7auto2: -INFINITY ++native_color: FAKE3:1 allocation score on c7auto3: -INFINITY ++native_color: FAKE3:2 allocation score on c7auto1: 0 ++native_color: FAKE3:2 allocation score on c7auto2: 1 ++native_color: FAKE3:2 allocation score on c7auto3: -INFINITY ++native_color: shooter allocation score on c7auto1: 0 ++native_color: shooter allocation score on c7auto2: 0 ++native_color: shooter allocation score on c7auto3: 0 +diff --git a/pengine/test10/clone_min_interleave_stop_one.summary b/pengine/test10/clone_min_interleave_stop_one.summary +new file mode 100644 +index 0000000..9280b7e +--- /dev/null ++++ b/pengine/test10/clone_min_interleave_stop_one.summary +@@ -0,0 +1,35 @@ ++ ++Current cluster status: ++Online: [ c7auto1 c7auto2 c7auto3 ] ++ ++ shooter (stonith:fence_phd_kvm): Started c7auto1 ++ Clone Set: FAKE1-clone [FAKE1] ++ Started: [ c7auto1 c7auto2 c7auto3 ] ++ Clone Set: FAKE2-clone [FAKE2] ++ Started: [ c7auto1 c7auto2 c7auto3 ] ++ Clone Set: FAKE3-clone [FAKE3] ++ Started: [ c7auto1 c7auto2 c7auto3 ] ++ ++Transition Summary: ++ * Stop FAKE1:0 (c7auto3) ++ ++Executing cluster transition: ++ * Pseudo action: FAKE1-clone_stop_0 ++ * Resource action: FAKE1 stop on c7auto3 ++ * Pseudo action: FAKE1-clone_stopped_0 ++ * Pseudo action: FAKE1-clone_start_0 ++ * Pseudo action: all_stopped ++ * Pseudo action: FAKE1-clone_running_0 ++ ++Revised cluster status: ++Online: [ c7auto1 c7auto2 c7auto3 ] ++ ++ shooter (stonith:fence_phd_kvm): Started c7auto1 ++ Clone Set: FAKE1-clone [FAKE1] ++ Started: [ c7auto1 c7auto2 ] ++ Stopped: [ c7auto3 ] ++ Clone Set: FAKE2-clone [FAKE2] ++ Started: [ c7auto1 c7auto2 c7auto3 ] ++ Clone Set: FAKE3-clone [FAKE3] ++ Started: [ c7auto1 c7auto2 c7auto3 ] ++ +diff --git a/pengine/test10/clone_min_interleave_stop_one.xml b/pengine/test10/clone_min_interleave_stop_one.xml +new file mode 100644 +index 0000000..31db5f3 +--- /dev/null ++++ b/pengine/test10/clone_min_interleave_stop_one.xml +@@ -0,0 +1,153 @@ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ +diff --git a/pengine/test10/clone_min_interleave_stop_two.dot b/pengine/test10/clone_min_interleave_stop_two.dot +new file mode 100644 +index 0000000..73f60dd +--- /dev/null ++++ b/pengine/test10/clone_min_interleave_stop_two.dot +@@ -0,0 +1,108 @@ ++ digraph "g" { ++"FAKE1-clone_running_0" [ style=bold color="green" fontcolor="orange"] ++"FAKE1-clone_start_0" -> "FAKE1-clone_running_0" [ style = bold] ++"FAKE1-clone_start_0" -> "FAKE1_start_0 " [ style = dashed] ++"FAKE1-clone_start_0" [ style=bold color="green" fontcolor="orange"] ++"FAKE1-clone_stop_0" -> "FAKE1-clone_stopped_0" [ style = bold] ++"FAKE1-clone_stop_0" -> "FAKE1_stop_0 c7auto2" [ style = bold] ++"FAKE1-clone_stop_0" -> "FAKE1_stop_0 c7auto3" [ style = bold] ++"FAKE1-clone_stop_0" [ style=bold color="green" fontcolor="orange"] ++"FAKE1-clone_stopped_0" -> "FAKE1-clone_start_0" [ style = bold] ++"FAKE1-clone_stopped_0" [ style=bold color="green" fontcolor="orange"] ++"FAKE1_start_0 " -> "FAKE1-clone_running_0" [ style = dashed] ++"FAKE1_start_0 " [ style=dashed color="red" fontcolor="black"] ++"FAKE1_stop_0 c7auto2" -> "FAKE1-clone_stopped_0" [ style = bold] ++"FAKE1_stop_0 c7auto2" -> "FAKE1_start_0 " [ style = dashed] ++"FAKE1_stop_0 c7auto2" -> "all_stopped" [ style = bold] ++"FAKE1_stop_0 c7auto2" [ style=bold color="green" fontcolor="black"] ++"FAKE1_stop_0 c7auto3" -> "FAKE1-clone_stopped_0" [ style = bold] ++"FAKE1_stop_0 c7auto3" -> "FAKE1_start_0 " [ style = dashed] ++"FAKE1_stop_0 c7auto3" -> "all_stopped" [ style = bold] ++"FAKE1_stop_0 c7auto3" [ style=bold color="green" fontcolor="black"] ++"FAKE2-clone_running_0" -> "FAKE3-clone_start_0" [ style = dashed] ++"FAKE2-clone_running_0" [ style=dashed color="red" fontcolor="orange"] ++"FAKE2-clone_start_0" -> "FAKE2-clone_running_0" [ style = dashed] ++"FAKE2-clone_start_0" -> "FAKE2_start_0 c7auto1" [ style = dashed] ++"FAKE2-clone_start_0" -> "FAKE2_start_0 c7auto2" [ style = dashed] ++"FAKE2-clone_start_0" -> "FAKE2_start_0 c7auto3" [ style = dashed] ++"FAKE2-clone_start_0" [ style=dashed color="red" fontcolor="orange"] ++"FAKE2-clone_stop_0" -> "FAKE2-clone_stopped_0" [ style = bold] ++"FAKE2-clone_stop_0" -> "FAKE2_stop_0 c7auto1" [ style = bold] ++"FAKE2-clone_stop_0" -> "FAKE2_stop_0 c7auto2" [ style = bold] ++"FAKE2-clone_stop_0" -> "FAKE2_stop_0 c7auto3" [ style = bold] ++"FAKE2-clone_stop_0" [ style=bold color="green" fontcolor="orange"] ++"FAKE2-clone_stopped_0" -> "FAKE1-clone_stop_0" [ style = bold] ++"FAKE2-clone_stopped_0" -> "FAKE2-clone_start_0" [ style = dashed] ++"FAKE2-clone_stopped_0" [ style=bold color="green" fontcolor="orange"] ++"FAKE2_monitor_10000 c7auto1" [ style=dashed color="red" fontcolor="black"] ++"FAKE2_monitor_10000 c7auto2" [ style=dashed color="red" fontcolor="black"] ++"FAKE2_monitor_10000 c7auto3" [ style=dashed color="red" fontcolor="black"] ++"FAKE2_start_0 c7auto1" -> "FAKE2-clone_running_0" [ style = dashed] ++"FAKE2_start_0 c7auto1" -> "FAKE2_monitor_10000 c7auto1" [ style = dashed] ++"FAKE2_start_0 c7auto1" -> "FAKE3_start_0 c7auto1" [ style = dashed] ++"FAKE2_start_0 c7auto1" [ style=dashed color="red" fontcolor="black"] ++"FAKE2_start_0 c7auto2" -> "FAKE2-clone_running_0" [ style = dashed] ++"FAKE2_start_0 c7auto2" -> "FAKE2_monitor_10000 c7auto2" [ style = dashed] ++"FAKE2_start_0 c7auto2" -> "FAKE3_start_0 c7auto2" [ style = dashed] ++"FAKE2_start_0 c7auto2" [ style=dashed color="red" fontcolor="black"] ++"FAKE2_start_0 c7auto3" -> "FAKE2-clone_running_0" [ style = dashed] ++"FAKE2_start_0 c7auto3" -> "FAKE2_monitor_10000 c7auto3" [ style = dashed] ++"FAKE2_start_0 c7auto3" -> "FAKE3_start_0 c7auto3" [ style = dashed] ++"FAKE2_start_0 c7auto3" [ style=dashed color="red" fontcolor="black"] ++"FAKE2_stop_0 c7auto1" -> "FAKE2-clone_stopped_0" [ style = bold] ++"FAKE2_stop_0 c7auto1" -> "FAKE2_start_0 c7auto1" [ style = dashed] ++"FAKE2_stop_0 c7auto1" -> "all_stopped" [ style = bold] ++"FAKE2_stop_0 c7auto1" [ style=bold color="green" fontcolor="black"] ++"FAKE2_stop_0 c7auto2" -> "FAKE1_stop_0 c7auto2" [ style = bold] ++"FAKE2_stop_0 c7auto2" -> "FAKE2-clone_stopped_0" [ style = bold] ++"FAKE2_stop_0 c7auto2" -> "FAKE2_start_0 c7auto2" [ style = dashed] ++"FAKE2_stop_0 c7auto2" -> "all_stopped" [ style = bold] ++"FAKE2_stop_0 c7auto2" [ style=bold color="green" fontcolor="black"] ++"FAKE2_stop_0 c7auto3" -> "FAKE1_stop_0 c7auto3" [ style = bold] ++"FAKE2_stop_0 c7auto3" -> "FAKE2-clone_stopped_0" [ style = bold] ++"FAKE2_stop_0 c7auto3" -> "FAKE2_start_0 c7auto3" [ style = dashed] ++"FAKE2_stop_0 c7auto3" -> "all_stopped" [ style = bold] ++"FAKE2_stop_0 c7auto3" [ style=bold color="green" fontcolor="black"] ++"FAKE3-clone_running_0" [ style=dashed color="red" fontcolor="orange"] ++"FAKE3-clone_start_0" -> "FAKE3-clone_running_0" [ style = dashed] ++"FAKE3-clone_start_0" -> "FAKE3_start_0 c7auto1" [ style = dashed] ++"FAKE3-clone_start_0" -> "FAKE3_start_0 c7auto2" [ style = dashed] ++"FAKE3-clone_start_0" -> "FAKE3_start_0 c7auto3" [ style = dashed] ++"FAKE3-clone_start_0" [ style=dashed color="red" fontcolor="orange"] ++"FAKE3-clone_stop_0" -> "FAKE3-clone_stopped_0" [ style = bold] ++"FAKE3-clone_stop_0" -> "FAKE3_stop_0 c7auto1" [ style = bold] ++"FAKE3-clone_stop_0" -> "FAKE3_stop_0 c7auto2" [ style = bold] ++"FAKE3-clone_stop_0" -> "FAKE3_stop_0 c7auto3" [ style = bold] ++"FAKE3-clone_stop_0" [ style=bold color="green" fontcolor="orange"] ++"FAKE3-clone_stopped_0" -> "FAKE2-clone_stop_0" [ style = bold] ++"FAKE3-clone_stopped_0" -> "FAKE3-clone_start_0" [ style = dashed] ++"FAKE3-clone_stopped_0" [ style=bold color="green" fontcolor="orange"] ++"FAKE3_monitor_10000 c7auto1" [ style=dashed color="red" fontcolor="black"] ++"FAKE3_monitor_10000 c7auto2" [ style=dashed color="red" fontcolor="black"] ++"FAKE3_monitor_10000 c7auto3" [ style=dashed color="red" fontcolor="black"] ++"FAKE3_start_0 c7auto1" -> "FAKE3-clone_running_0" [ style = dashed] ++"FAKE3_start_0 c7auto1" -> "FAKE3_monitor_10000 c7auto1" [ style = dashed] ++"FAKE3_start_0 c7auto1" [ style=dashed color="red" fontcolor="black"] ++"FAKE3_start_0 c7auto2" -> "FAKE3-clone_running_0" [ style = dashed] ++"FAKE3_start_0 c7auto2" -> "FAKE3_monitor_10000 c7auto2" [ style = dashed] ++"FAKE3_start_0 c7auto2" [ style=dashed color="red" fontcolor="black"] ++"FAKE3_start_0 c7auto3" -> "FAKE3-clone_running_0" [ style = dashed] ++"FAKE3_start_0 c7auto3" -> "FAKE3_monitor_10000 c7auto3" [ style = dashed] ++"FAKE3_start_0 c7auto3" [ style=dashed color="red" fontcolor="black"] ++"FAKE3_stop_0 c7auto1" -> "FAKE2_stop_0 c7auto1" [ style = bold] ++"FAKE3_stop_0 c7auto1" -> "FAKE3-clone_stopped_0" [ style = bold] ++"FAKE3_stop_0 c7auto1" -> "FAKE3_start_0 c7auto1" [ style = dashed] ++"FAKE3_stop_0 c7auto1" -> "all_stopped" [ style = bold] ++"FAKE3_stop_0 c7auto1" [ style=bold color="green" fontcolor="black"] ++"FAKE3_stop_0 c7auto2" -> "FAKE2_stop_0 c7auto2" [ style = bold] ++"FAKE3_stop_0 c7auto2" -> "FAKE3-clone_stopped_0" [ style = bold] ++"FAKE3_stop_0 c7auto2" -> "FAKE3_start_0 c7auto2" [ style = dashed] ++"FAKE3_stop_0 c7auto2" -> "all_stopped" [ style = bold] ++"FAKE3_stop_0 c7auto2" [ style=bold color="green" fontcolor="black"] ++"FAKE3_stop_0 c7auto3" -> "FAKE2_stop_0 c7auto3" [ style = bold] ++"FAKE3_stop_0 c7auto3" -> "FAKE3-clone_stopped_0" [ style = bold] ++"FAKE3_stop_0 c7auto3" -> "FAKE3_start_0 c7auto3" [ style = dashed] ++"FAKE3_stop_0 c7auto3" -> "all_stopped" [ style = bold] ++"FAKE3_stop_0 c7auto3" [ style=bold color="green" fontcolor="black"] ++"all_stopped" [ style=bold color="green" fontcolor="orange"] ++} +diff --git a/pengine/test10/clone_min_interleave_stop_two.exp b/pengine/test10/clone_min_interleave_stop_two.exp +new file mode 100644 +index 0000000..62fe1e6 +--- /dev/null ++++ b/pengine/test10/clone_min_interleave_stop_two.exp +@@ -0,0 +1,270 @@ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ +diff --git a/pengine/test10/clone_min_interleave_stop_two.scores b/pengine/test10/clone_min_interleave_stop_two.scores +new file mode 100644 +index 0000000..ee7df92 +--- /dev/null ++++ b/pengine/test10/clone_min_interleave_stop_two.scores +@@ -0,0 +1,67 @@ ++Allocation scores: ++clone_color: FAKE1-clone allocation score on c7auto1: 0 ++clone_color: FAKE1-clone allocation score on c7auto2: -INFINITY ++clone_color: FAKE1-clone allocation score on c7auto3: -INFINITY ++clone_color: FAKE1:0 allocation score on c7auto1: 0 ++clone_color: FAKE1:0 allocation score on c7auto2: -INFINITY ++clone_color: FAKE1:0 allocation score on c7auto3: -INFINITY ++clone_color: FAKE1:1 allocation score on c7auto1: 1 ++clone_color: FAKE1:1 allocation score on c7auto2: -INFINITY ++clone_color: FAKE1:1 allocation score on c7auto3: -INFINITY ++clone_color: FAKE1:2 allocation score on c7auto1: 0 ++clone_color: FAKE1:2 allocation score on c7auto2: -INFINITY ++clone_color: FAKE1:2 allocation score on c7auto3: -INFINITY ++clone_color: FAKE2-clone allocation score on c7auto1: 0 ++clone_color: FAKE2-clone allocation score on c7auto2: 0 ++clone_color: FAKE2-clone allocation score on c7auto3: 0 ++clone_color: FAKE2:0 allocation score on c7auto1: 0 ++clone_color: FAKE2:0 allocation score on c7auto2: 0 ++clone_color: FAKE2:0 allocation score on c7auto3: 1 ++clone_color: FAKE2:1 allocation score on c7auto1: 1 ++clone_color: FAKE2:1 allocation score on c7auto2: 0 ++clone_color: FAKE2:1 allocation score on c7auto3: 0 ++clone_color: FAKE2:2 allocation score on c7auto1: 0 ++clone_color: FAKE2:2 allocation score on c7auto2: 1 ++clone_color: FAKE2:2 allocation score on c7auto3: 0 ++clone_color: FAKE3-clone allocation score on c7auto1: 0 ++clone_color: FAKE3-clone allocation score on c7auto2: 0 ++clone_color: FAKE3-clone allocation score on c7auto3: 0 ++clone_color: FAKE3:0 allocation score on c7auto1: 0 ++clone_color: FAKE3:0 allocation score on c7auto2: 0 ++clone_color: FAKE3:0 allocation score on c7auto3: 1 ++clone_color: FAKE3:1 allocation score on c7auto1: 1 ++clone_color: FAKE3:1 allocation score on c7auto2: 0 ++clone_color: FAKE3:1 allocation score on c7auto3: 0 ++clone_color: FAKE3:2 allocation score on c7auto1: 0 ++clone_color: FAKE3:2 allocation score on c7auto2: 1 ++clone_color: FAKE3:2 allocation score on c7auto3: 0 ++native_color: FAKE1:0 allocation score on c7auto1: -INFINITY ++native_color: FAKE1:0 allocation score on c7auto2: -INFINITY ++native_color: FAKE1:0 allocation score on c7auto3: -INFINITY ++native_color: FAKE1:1 allocation score on c7auto1: 1 ++native_color: FAKE1:1 allocation score on c7auto2: -INFINITY ++native_color: FAKE1:1 allocation score on c7auto3: -INFINITY ++native_color: FAKE1:2 allocation score on c7auto1: -INFINITY ++native_color: FAKE1:2 allocation score on c7auto2: -INFINITY ++native_color: FAKE1:2 allocation score on c7auto3: -INFINITY ++native_color: FAKE2:0 allocation score on c7auto1: 0 ++native_color: FAKE2:0 allocation score on c7auto2: -INFINITY ++native_color: FAKE2:0 allocation score on c7auto3: 1 ++native_color: FAKE2:1 allocation score on c7auto1: 1 ++native_color: FAKE2:1 allocation score on c7auto2: -INFINITY ++native_color: FAKE2:1 allocation score on c7auto3: -INFINITY ++native_color: FAKE2:2 allocation score on c7auto1: 0 ++native_color: FAKE2:2 allocation score on c7auto2: 1 ++native_color: FAKE2:2 allocation score on c7auto3: 0 ++native_color: FAKE3:0 allocation score on c7auto1: 0 ++native_color: FAKE3:0 allocation score on c7auto2: -INFINITY ++native_color: FAKE3:0 allocation score on c7auto3: 1 ++native_color: FAKE3:1 allocation score on c7auto1: 1 ++native_color: FAKE3:1 allocation score on c7auto2: -INFINITY ++native_color: FAKE3:1 allocation score on c7auto3: -INFINITY ++native_color: FAKE3:2 allocation score on c7auto1: 0 ++native_color: FAKE3:2 allocation score on c7auto2: 1 ++native_color: FAKE3:2 allocation score on c7auto3: 0 ++native_color: shooter allocation score on c7auto1: 0 ++native_color: shooter allocation score on c7auto2: 0 ++native_color: shooter allocation score on c7auto3: 0 +diff --git a/pengine/test10/clone_min_interleave_stop_two.summary b/pengine/test10/clone_min_interleave_stop_two.summary +new file mode 100644 +index 0000000..fb28e0d +--- /dev/null ++++ b/pengine/test10/clone_min_interleave_stop_two.summary +@@ -0,0 +1,53 @@ ++ ++Current cluster status: ++Online: [ c7auto1 c7auto2 c7auto3 ] ++ ++ shooter (stonith:fence_phd_kvm): Started c7auto1 ++ Clone Set: FAKE1-clone [FAKE1] ++ Started: [ c7auto1 c7auto2 c7auto3 ] ++ Clone Set: FAKE2-clone [FAKE2] ++ Started: [ c7auto1 c7auto2 c7auto3 ] ++ Clone Set: FAKE3-clone [FAKE3] ++ Started: [ c7auto1 c7auto2 c7auto3 ] ++ ++Transition Summary: ++ * Stop FAKE1:0 (c7auto3) ++ * Stop FAKE1:2 (c7auto2) ++ * Stop FAKE2:0 (Started c7auto3) ++ * Stop FAKE2:1 (Started c7auto1) ++ * Stop FAKE2:2 (Started c7auto2) ++ * Stop FAKE3:0 (Started c7auto3) ++ * Stop FAKE3:1 (Started c7auto1) ++ * Stop FAKE3:2 (Started c7auto2) ++ ++Executing cluster transition: ++ * Pseudo action: FAKE3-clone_stop_0 ++ * Resource action: FAKE3 stop on c7auto3 ++ * Resource action: FAKE3 stop on c7auto1 ++ * Resource action: FAKE3 stop on c7auto2 ++ * Pseudo action: FAKE3-clone_stopped_0 ++ * Pseudo action: FAKE2-clone_stop_0 ++ * Resource action: FAKE2 stop on c7auto3 ++ * Resource action: FAKE2 stop on c7auto1 ++ * Resource action: FAKE2 stop on c7auto2 ++ * Pseudo action: FAKE2-clone_stopped_0 ++ * Pseudo action: FAKE1-clone_stop_0 ++ * Resource action: FAKE1 stop on c7auto3 ++ * Resource action: FAKE1 stop on c7auto2 ++ * Pseudo action: FAKE1-clone_stopped_0 ++ * Pseudo action: FAKE1-clone_start_0 ++ * Pseudo action: all_stopped ++ * Pseudo action: FAKE1-clone_running_0 ++ ++Revised cluster status: ++Online: [ c7auto1 c7auto2 c7auto3 ] ++ ++ shooter (stonith:fence_phd_kvm): Started c7auto1 ++ Clone Set: FAKE1-clone [FAKE1] ++ Started: [ c7auto1 ] ++ Stopped: [ c7auto2 c7auto3 ] ++ Clone Set: FAKE2-clone [FAKE2] ++ Stopped: [ c7auto1 c7auto2 c7auto3 ] ++ Clone Set: FAKE3-clone [FAKE3] ++ Stopped: [ c7auto1 c7auto2 c7auto3 ] ++ +diff --git a/pengine/test10/clone_min_interleave_stop_two.xml b/pengine/test10/clone_min_interleave_stop_two.xml +new file mode 100644 +index 0000000..32c2b3b +--- /dev/null ++++ b/pengine/test10/clone_min_interleave_stop_two.xml +@@ -0,0 +1,154 @@ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ +diff --git a/pengine/test10/clone_min_start_one.dot b/pengine/test10/clone_min_start_one.dot +new file mode 100644 +index 0000000..3940361 +--- /dev/null ++++ b/pengine/test10/clone_min_start_one.dot +@@ -0,0 +1,20 @@ ++ digraph "g" { ++"FAKECLONE-clone_running_0" [ style=bold color="green" fontcolor="orange"] ++"FAKECLONE-clone_start_0" -> "FAKECLONE-clone_running_0" [ style = bold] ++"FAKECLONE-clone_start_0" -> "FAKECLONE_start_0 c7auto3" [ style = bold] ++"FAKECLONE-clone_start_0" [ style=bold color="green" fontcolor="orange"] ++"FAKECLONE_monitor_10000 c7auto3" [ style=bold color="green" fontcolor="black"] ++"FAKECLONE_start_0 c7auto3" -> "FAKECLONE-clone_running_0" [ style = bold] ++"FAKECLONE_start_0 c7auto3" -> "FAKECLONE_monitor_10000 c7auto3" [ style = bold] ++"FAKECLONE_start_0 c7auto3" [ style=bold color="green" fontcolor="black"] ++"FAKE_monitor_10000 c7auto4" [ style=dashed color="red" fontcolor="black"] ++"FAKE_start_0 c7auto4" -> "FAKE_monitor_10000 c7auto4" [ style = dashed] ++"FAKE_start_0 c7auto4" [ style=dashed color="red" fontcolor="black"] ++"all_stopped" [ style=bold color="green" fontcolor="orange"] ++"shooter_monitor_60000 c7auto3" [ style=bold color="green" fontcolor="black"] ++"shooter_start_0 c7auto3" -> "shooter_monitor_60000 c7auto3" [ style = bold] ++"shooter_start_0 c7auto3" [ style=bold color="green" fontcolor="black"] ++"shooter_stop_0 c7auto1" -> "all_stopped" [ style = bold] ++"shooter_stop_0 c7auto1" -> "shooter_start_0 c7auto3" [ style = bold] ++"shooter_stop_0 c7auto1" [ style=bold color="green" fontcolor="black"] ++} +diff --git a/pengine/test10/clone_min_start_one.exp b/pengine/test10/clone_min_start_one.exp +new file mode 100644 +index 0000000..a6868f6 +--- /dev/null ++++ b/pengine/test10/clone_min_start_one.exp +@@ -0,0 +1,98 @@ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ +diff --git a/pengine/test10/clone_min_start_one.scores b/pengine/test10/clone_min_start_one.scores +new file mode 100644 +index 0000000..668689e +--- /dev/null ++++ b/pengine/test10/clone_min_start_one.scores +@@ -0,0 +1,45 @@ ++Allocation scores: ++clone_color: FAKECLONE-clone allocation score on c7auto1: 0 ++clone_color: FAKECLONE-clone allocation score on c7auto2: 0 ++clone_color: FAKECLONE-clone allocation score on c7auto3: 0 ++clone_color: FAKECLONE-clone allocation score on c7auto4: -INFINITY ++clone_color: FAKECLONE:0 allocation score on c7auto1: 0 ++clone_color: FAKECLONE:0 allocation score on c7auto2: 0 ++clone_color: FAKECLONE:0 allocation score on c7auto3: 0 ++clone_color: FAKECLONE:0 allocation score on c7auto4: -INFINITY ++clone_color: FAKECLONE:1 allocation score on c7auto1: 0 ++clone_color: FAKECLONE:1 allocation score on c7auto2: 0 ++clone_color: FAKECLONE:1 allocation score on c7auto3: 0 ++clone_color: FAKECLONE:1 allocation score on c7auto4: -INFINITY ++clone_color: FAKECLONE:2 allocation score on c7auto1: 0 ++clone_color: FAKECLONE:2 allocation score on c7auto2: 0 ++clone_color: FAKECLONE:2 allocation score on c7auto3: 0 ++clone_color: FAKECLONE:2 allocation score on c7auto4: -INFINITY ++clone_color: FAKECLONE:3 allocation score on c7auto1: 0 ++clone_color: FAKECLONE:3 allocation score on c7auto2: 0 ++clone_color: FAKECLONE:3 allocation score on c7auto3: 0 ++clone_color: FAKECLONE:3 allocation score on c7auto4: -INFINITY ++native_color: FAKE allocation score on c7auto1: -INFINITY ++native_color: FAKE allocation score on c7auto2: -INFINITY ++native_color: FAKE allocation score on c7auto3: -INFINITY ++native_color: FAKE allocation score on c7auto4: 0 ++native_color: FAKECLONE:0 allocation score on c7auto1: -INFINITY ++native_color: FAKECLONE:0 allocation score on c7auto2: -INFINITY ++native_color: FAKECLONE:0 allocation score on c7auto3: 0 ++native_color: FAKECLONE:0 allocation score on c7auto4: -INFINITY ++native_color: FAKECLONE:1 allocation score on c7auto1: -INFINITY ++native_color: FAKECLONE:1 allocation score on c7auto2: -INFINITY ++native_color: FAKECLONE:1 allocation score on c7auto3: -INFINITY ++native_color: FAKECLONE:1 allocation score on c7auto4: -INFINITY ++native_color: FAKECLONE:2 allocation score on c7auto1: -INFINITY ++native_color: FAKECLONE:2 allocation score on c7auto2: -INFINITY ++native_color: FAKECLONE:2 allocation score on c7auto3: -INFINITY ++native_color: FAKECLONE:2 allocation score on c7auto4: -INFINITY ++native_color: FAKECLONE:3 allocation score on c7auto1: -INFINITY ++native_color: FAKECLONE:3 allocation score on c7auto2: -INFINITY ++native_color: FAKECLONE:3 allocation score on c7auto3: -INFINITY ++native_color: FAKECLONE:3 allocation score on c7auto4: -INFINITY ++native_color: shooter allocation score on c7auto1: 0 ++native_color: shooter allocation score on c7auto2: 0 ++native_color: shooter allocation score on c7auto3: 0 ++native_color: shooter allocation score on c7auto4: 0 +diff --git a/pengine/test10/clone_min_start_one.summary b/pengine/test10/clone_min_start_one.summary +new file mode 100644 +index 0000000..ee33e01 +--- /dev/null ++++ b/pengine/test10/clone_min_start_one.summary +@@ -0,0 +1,37 @@ ++ ++Current cluster status: ++Node c7auto1 (1): standby ++Node c7auto2 (2): standby ++Online: [ c7auto3 c7auto4 ] ++ ++ shooter (stonith:fence_phd_kvm): Started c7auto1 ++ Clone Set: FAKECLONE-clone [FAKECLONE] ++ Stopped: [ c7auto1 c7auto2 c7auto3 c7auto4 ] ++ FAKE (ocf::heartbeat:Dummy): Stopped ++ ++Transition Summary: ++ * Move shooter (Started c7auto1 -> c7auto3) ++ * Start FAKECLONE:0 (c7auto3) ++ * Start FAKE (c7auto4 - blocked) ++ ++Executing cluster transition: ++ * Resource action: shooter stop on c7auto1 ++ * Pseudo action: FAKECLONE-clone_start_0 ++ * Pseudo action: all_stopped ++ * Resource action: shooter start on c7auto3 ++ * Resource action: FAKECLONE start on c7auto3 ++ * Pseudo action: FAKECLONE-clone_running_0 ++ * Resource action: shooter monitor=60000 on c7auto3 ++ * Resource action: FAKECLONE monitor=10000 on c7auto3 ++ ++Revised cluster status: ++Node c7auto1 (1): standby ++Node c7auto2 (2): standby ++Online: [ c7auto3 c7auto4 ] ++ ++ shooter (stonith:fence_phd_kvm): Started c7auto3 ++ Clone Set: FAKECLONE-clone [FAKECLONE] ++ Started: [ c7auto3 ] ++ Stopped: [ c7auto1 c7auto2 c7auto4 ] ++ FAKE (ocf::heartbeat:Dummy): Stopped ++ +diff --git a/pengine/test10/clone_min_start_one.xml b/pengine/test10/clone_min_start_one.xml +new file mode 100644 +index 0000000..dfb9379 +--- /dev/null ++++ b/pengine/test10/clone_min_start_one.xml +@@ -0,0 +1,155 @@ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ +diff --git a/pengine/test10/clone_min_start_two.dot b/pengine/test10/clone_min_start_two.dot +new file mode 100644 +index 0000000..3fe0062 +--- /dev/null ++++ b/pengine/test10/clone_min_start_two.dot +@@ -0,0 +1,22 @@ ++ digraph "g" { ++"FAKECLONE-clone_running_0" [ style=bold color="green" fontcolor="orange"] ++"FAKECLONE-clone_start_0" -> "FAKECLONE-clone_running_0" [ style = bold] ++"FAKECLONE-clone_start_0" -> "FAKECLONE:1_start_0 c7auto1" [ style = bold] ++"FAKECLONE-clone_start_0" -> "FAKECLONE_start_0 c7auto3" [ style = bold] ++"FAKECLONE-clone_start_0" [ style=bold color="green" fontcolor="orange"] ++"FAKECLONE:1_monitor_10000 c7auto1" [ style=bold color="green" fontcolor="black"] ++"FAKECLONE:1_start_0 c7auto1" -> "FAKECLONE-clone_running_0" [ style = bold] ++"FAKECLONE:1_start_0 c7auto1" -> "FAKECLONE:1_monitor_10000 c7auto1" [ style = bold] ++"FAKECLONE:1_start_0 c7auto1" -> "clone-one-or-more:order-FAKECLONE-clone-FAKE-mandatory" [ style = bold] ++"FAKECLONE:1_start_0 c7auto1" [ style=bold color="green" fontcolor="black"] ++"FAKECLONE_monitor_10000 c7auto3" [ style=bold color="green" fontcolor="black"] ++"FAKECLONE_start_0 c7auto3" -> "FAKECLONE-clone_running_0" [ style = bold] ++"FAKECLONE_start_0 c7auto3" -> "FAKECLONE_monitor_10000 c7auto3" [ style = bold] ++"FAKECLONE_start_0 c7auto3" -> "clone-one-or-more:order-FAKECLONE-clone-FAKE-mandatory" [ style = bold] ++"FAKECLONE_start_0 c7auto3" [ style=bold color="green" fontcolor="black"] ++"FAKE_monitor_10000 c7auto4" [ style=bold color="green" fontcolor="black"] ++"FAKE_start_0 c7auto4" -> "FAKE_monitor_10000 c7auto4" [ style = bold] ++"FAKE_start_0 c7auto4" [ style=bold color="green" fontcolor="black"] ++"clone-one-or-more:order-FAKECLONE-clone-FAKE-mandatory" -> "FAKE_start_0 c7auto4" [ style = bold] ++"clone-one-or-more:order-FAKECLONE-clone-FAKE-mandatory" [ style=bold color="green" fontcolor="orange"] ++} +diff --git a/pengine/test10/clone_min_start_two.exp b/pengine/test10/clone_min_start_two.exp +new file mode 100644 +index 0000000..f7a053c +--- /dev/null ++++ b/pengine/test10/clone_min_start_two.exp +@@ -0,0 +1,121 @@ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ +diff --git a/pengine/test10/clone_min_start_two.scores b/pengine/test10/clone_min_start_two.scores +new file mode 100644 +index 0000000..b3bcac0 +--- /dev/null ++++ b/pengine/test10/clone_min_start_two.scores +@@ -0,0 +1,45 @@ ++Allocation scores: ++clone_color: FAKECLONE-clone allocation score on c7auto1: 0 ++clone_color: FAKECLONE-clone allocation score on c7auto2: 0 ++clone_color: FAKECLONE-clone allocation score on c7auto3: 0 ++clone_color: FAKECLONE-clone allocation score on c7auto4: -INFINITY ++clone_color: FAKECLONE:0 allocation score on c7auto1: 0 ++clone_color: FAKECLONE:0 allocation score on c7auto2: 0 ++clone_color: FAKECLONE:0 allocation score on c7auto3: 0 ++clone_color: FAKECLONE:0 allocation score on c7auto4: -INFINITY ++clone_color: FAKECLONE:1 allocation score on c7auto1: 0 ++clone_color: FAKECLONE:1 allocation score on c7auto2: 0 ++clone_color: FAKECLONE:1 allocation score on c7auto3: 0 ++clone_color: FAKECLONE:1 allocation score on c7auto4: -INFINITY ++clone_color: FAKECLONE:2 allocation score on c7auto1: 0 ++clone_color: FAKECLONE:2 allocation score on c7auto2: 0 ++clone_color: FAKECLONE:2 allocation score on c7auto3: 0 ++clone_color: FAKECLONE:2 allocation score on c7auto4: -INFINITY ++clone_color: FAKECLONE:3 allocation score on c7auto1: 0 ++clone_color: FAKECLONE:3 allocation score on c7auto2: 0 ++clone_color: FAKECLONE:3 allocation score on c7auto3: 0 ++clone_color: FAKECLONE:3 allocation score on c7auto4: -INFINITY ++native_color: FAKE allocation score on c7auto1: -INFINITY ++native_color: FAKE allocation score on c7auto2: -INFINITY ++native_color: FAKE allocation score on c7auto3: -INFINITY ++native_color: FAKE allocation score on c7auto4: 0 ++native_color: FAKECLONE:0 allocation score on c7auto1: 0 ++native_color: FAKECLONE:0 allocation score on c7auto2: -INFINITY ++native_color: FAKECLONE:0 allocation score on c7auto3: 0 ++native_color: FAKECLONE:0 allocation score on c7auto4: -INFINITY ++native_color: FAKECLONE:1 allocation score on c7auto1: 0 ++native_color: FAKECLONE:1 allocation score on c7auto2: -INFINITY ++native_color: FAKECLONE:1 allocation score on c7auto3: -INFINITY ++native_color: FAKECLONE:1 allocation score on c7auto4: -INFINITY ++native_color: FAKECLONE:2 allocation score on c7auto1: -INFINITY ++native_color: FAKECLONE:2 allocation score on c7auto2: -INFINITY ++native_color: FAKECLONE:2 allocation score on c7auto3: -INFINITY ++native_color: FAKECLONE:2 allocation score on c7auto4: -INFINITY ++native_color: FAKECLONE:3 allocation score on c7auto1: -INFINITY ++native_color: FAKECLONE:3 allocation score on c7auto2: -INFINITY ++native_color: FAKECLONE:3 allocation score on c7auto3: -INFINITY ++native_color: FAKECLONE:3 allocation score on c7auto4: -INFINITY ++native_color: shooter allocation score on c7auto1: 0 ++native_color: shooter allocation score on c7auto2: 0 ++native_color: shooter allocation score on c7auto3: 0 ++native_color: shooter allocation score on c7auto4: 0 +diff --git a/pengine/test10/clone_min_start_two.summary b/pengine/test10/clone_min_start_two.summary +new file mode 100644 +index 0000000..f0c649c +--- /dev/null ++++ b/pengine/test10/clone_min_start_two.summary +@@ -0,0 +1,36 @@ ++ ++Current cluster status: ++Node c7auto2 (2): standby ++Online: [ c7auto1 c7auto3 c7auto4 ] ++ ++ shooter (stonith:fence_phd_kvm): Started c7auto1 ++ Clone Set: FAKECLONE-clone [FAKECLONE] ++ Stopped: [ c7auto1 c7auto2 c7auto3 c7auto4 ] ++ FAKE (ocf::heartbeat:Dummy): Stopped ++ ++Transition Summary: ++ * Start FAKECLONE:0 (c7auto3) ++ * Start FAKECLONE:1 (c7auto1) ++ * Start FAKE (c7auto4) ++ ++Executing cluster transition: ++ * Pseudo action: FAKECLONE-clone_start_0 ++ * Resource action: FAKECLONE start on c7auto3 ++ * Resource action: FAKECLONE start on c7auto1 ++ * Pseudo action: FAKECLONE-clone_running_0 ++ * Pseudo action: clone-one-or-more:order-FAKECLONE-clone-FAKE-mandatory ++ * Resource action: FAKECLONE monitor=10000 on c7auto3 ++ * Resource action: FAKECLONE monitor=10000 on c7auto1 ++ * Resource action: FAKE start on c7auto4 ++ * Resource action: FAKE monitor=10000 on c7auto4 ++ ++Revised cluster status: ++Node c7auto2 (2): standby ++Online: [ c7auto1 c7auto3 c7auto4 ] ++ ++ shooter (stonith:fence_phd_kvm): Started c7auto1 ++ Clone Set: FAKECLONE-clone [FAKECLONE] ++ Started: [ c7auto1 c7auto3 ] ++ Stopped: [ c7auto2 c7auto4 ] ++ FAKE (ocf::heartbeat:Dummy): Started c7auto4 ++ +diff --git a/pengine/test10/clone_min_start_two.xml b/pengine/test10/clone_min_start_two.xml +new file mode 100644 +index 0000000..ae84425 +--- /dev/null ++++ b/pengine/test10/clone_min_start_two.xml +@@ -0,0 +1,153 @@ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ +diff --git a/pengine/test10/clone_min_stop_all.dot b/pengine/test10/clone_min_stop_all.dot +new file mode 100644 +index 0000000..254e889 +--- /dev/null ++++ b/pengine/test10/clone_min_stop_all.dot +@@ -0,0 +1,41 @@ ++ digraph "g" { ++"FAKECLONE-clone_running_0" [ style=dashed color="red" fontcolor="orange"] ++"FAKECLONE-clone_start_0" -> "FAKECLONE-clone_running_0" [ style = dashed] ++"FAKECLONE-clone_start_0" -> "FAKECLONE_start_0 " [ style = dashed] ++"FAKECLONE-clone_start_0" [ style=dashed color="red" fontcolor="orange"] ++"FAKECLONE-clone_stop_0" -> "FAKECLONE-clone_stopped_0" [ style = bold] ++"FAKECLONE-clone_stop_0" -> "FAKECLONE_stop_0 c7auto1" [ style = bold] ++"FAKECLONE-clone_stop_0" -> "FAKECLONE_stop_0 c7auto2" [ style = bold] ++"FAKECLONE-clone_stop_0" -> "FAKECLONE_stop_0 c7auto3" [ style = bold] ++"FAKECLONE-clone_stop_0" [ style=bold color="green" fontcolor="orange"] ++"FAKECLONE-clone_stopped_0" -> "FAKECLONE-clone_start_0" [ style = dashed] ++"FAKECLONE-clone_stopped_0" [ style=bold color="green" fontcolor="orange"] ++"FAKECLONE_start_0 " -> "FAKECLONE-clone_running_0" [ style = dashed] ++"FAKECLONE_start_0 " [ style=dashed color="red" fontcolor="black"] ++"FAKECLONE_stop_0 c7auto1" -> "FAKECLONE-clone_stopped_0" [ style = bold] ++"FAKECLONE_stop_0 c7auto1" -> "FAKECLONE_start_0 " [ style = dashed] ++"FAKECLONE_stop_0 c7auto1" -> "all_stopped" [ style = bold] ++"FAKECLONE_stop_0 c7auto1" [ style=bold color="green" fontcolor="black"] ++"FAKECLONE_stop_0 c7auto2" -> "FAKECLONE-clone_stopped_0" [ style = bold] ++"FAKECLONE_stop_0 c7auto2" -> "FAKECLONE_start_0 " [ style = dashed] ++"FAKECLONE_stop_0 c7auto2" -> "all_stopped" [ style = bold] ++"FAKECLONE_stop_0 c7auto2" [ style=bold color="green" fontcolor="black"] ++"FAKECLONE_stop_0 c7auto3" -> "FAKECLONE-clone_stopped_0" [ style = bold] ++"FAKECLONE_stop_0 c7auto3" -> "FAKECLONE_start_0 " [ style = dashed] ++"FAKECLONE_stop_0 c7auto3" -> "all_stopped" [ style = bold] ++"FAKECLONE_stop_0 c7auto3" [ style=bold color="green" fontcolor="black"] ++"FAKE_monitor_10000 c7auto4" [ style=dashed color="red" fontcolor="black"] ++"FAKE_start_0 c7auto4" -> "FAKE_monitor_10000 c7auto4" [ style = dashed] ++"FAKE_start_0 c7auto4" [ style=dashed color="red" fontcolor="black"] ++"FAKE_stop_0 c7auto4" -> "FAKECLONE-clone_stop_0" [ style = bold] ++"FAKE_stop_0 c7auto4" -> "FAKE_start_0 c7auto4" [ style = dashed] ++"FAKE_stop_0 c7auto4" -> "all_stopped" [ style = bold] ++"FAKE_stop_0 c7auto4" [ style=bold color="green" fontcolor="black"] ++"all_stopped" [ style=bold color="green" fontcolor="orange"] ++"shooter_monitor_60000 c7auto4" [ style=bold color="green" fontcolor="black"] ++"shooter_start_0 c7auto4" -> "shooter_monitor_60000 c7auto4" [ style = bold] ++"shooter_start_0 c7auto4" [ style=bold color="green" fontcolor="black"] ++"shooter_stop_0 c7auto1" -> "all_stopped" [ style = bold] ++"shooter_stop_0 c7auto1" -> "shooter_start_0 c7auto4" [ style = bold] ++"shooter_stop_0 c7auto1" [ style=bold color="green" fontcolor="black"] ++} +diff --git a/pengine/test10/clone_min_stop_all.exp b/pengine/test10/clone_min_stop_all.exp +new file mode 100644 +index 0000000..1b8c9ce +--- /dev/null ++++ b/pengine/test10/clone_min_stop_all.exp +@@ -0,0 +1,142 @@ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ +diff --git a/pengine/test10/clone_min_stop_all.scores b/pengine/test10/clone_min_stop_all.scores +new file mode 100644 +index 0000000..0bcbb1f +--- /dev/null ++++ b/pengine/test10/clone_min_stop_all.scores +@@ -0,0 +1,45 @@ ++Allocation scores: ++clone_color: FAKECLONE-clone allocation score on c7auto1: 0 ++clone_color: FAKECLONE-clone allocation score on c7auto2: 0 ++clone_color: FAKECLONE-clone allocation score on c7auto3: 0 ++clone_color: FAKECLONE-clone allocation score on c7auto4: -INFINITY ++clone_color: FAKECLONE:0 allocation score on c7auto1: 1 ++clone_color: FAKECLONE:0 allocation score on c7auto2: 0 ++clone_color: FAKECLONE:0 allocation score on c7auto3: 0 ++clone_color: FAKECLONE:0 allocation score on c7auto4: -INFINITY ++clone_color: FAKECLONE:1 allocation score on c7auto1: 0 ++clone_color: FAKECLONE:1 allocation score on c7auto2: 1 ++clone_color: FAKECLONE:1 allocation score on c7auto3: 0 ++clone_color: FAKECLONE:1 allocation score on c7auto4: -INFINITY ++clone_color: FAKECLONE:2 allocation score on c7auto1: 0 ++clone_color: FAKECLONE:2 allocation score on c7auto2: 0 ++clone_color: FAKECLONE:2 allocation score on c7auto3: 1 ++clone_color: FAKECLONE:2 allocation score on c7auto4: -INFINITY ++clone_color: FAKECLONE:3 allocation score on c7auto1: 0 ++clone_color: FAKECLONE:3 allocation score on c7auto2: 0 ++clone_color: FAKECLONE:3 allocation score on c7auto3: 0 ++clone_color: FAKECLONE:3 allocation score on c7auto4: -INFINITY ++native_color: FAKE allocation score on c7auto1: -INFINITY ++native_color: FAKE allocation score on c7auto2: -INFINITY ++native_color: FAKE allocation score on c7auto3: -INFINITY ++native_color: FAKE allocation score on c7auto4: 0 ++native_color: FAKECLONE:0 allocation score on c7auto1: -INFINITY ++native_color: FAKECLONE:0 allocation score on c7auto2: -INFINITY ++native_color: FAKECLONE:0 allocation score on c7auto3: -INFINITY ++native_color: FAKECLONE:0 allocation score on c7auto4: -INFINITY ++native_color: FAKECLONE:1 allocation score on c7auto1: -INFINITY ++native_color: FAKECLONE:1 allocation score on c7auto2: -INFINITY ++native_color: FAKECLONE:1 allocation score on c7auto3: -INFINITY ++native_color: FAKECLONE:1 allocation score on c7auto4: -INFINITY ++native_color: FAKECLONE:2 allocation score on c7auto1: -INFINITY ++native_color: FAKECLONE:2 allocation score on c7auto2: -INFINITY ++native_color: FAKECLONE:2 allocation score on c7auto3: -INFINITY ++native_color: FAKECLONE:2 allocation score on c7auto4: -INFINITY ++native_color: FAKECLONE:3 allocation score on c7auto1: -INFINITY ++native_color: FAKECLONE:3 allocation score on c7auto2: -INFINITY ++native_color: FAKECLONE:3 allocation score on c7auto3: -INFINITY ++native_color: FAKECLONE:3 allocation score on c7auto4: -INFINITY ++native_color: shooter allocation score on c7auto1: 0 ++native_color: shooter allocation score on c7auto2: 0 ++native_color: shooter allocation score on c7auto3: 0 ++native_color: shooter allocation score on c7auto4: 0 +diff --git a/pengine/test10/clone_min_stop_all.summary b/pengine/test10/clone_min_stop_all.summary +new file mode 100644 +index 0000000..eb2944f +--- /dev/null ++++ b/pengine/test10/clone_min_stop_all.summary +@@ -0,0 +1,43 @@ ++ ++Current cluster status: ++Node c7auto1 (1): standby ++Node c7auto2 (2): standby ++Node c7auto3 (3): standby ++Online: [ c7auto4 ] ++ ++ shooter (stonith:fence_phd_kvm): Started c7auto1 ++ Clone Set: FAKECLONE-clone [FAKECLONE] ++ Started: [ c7auto1 c7auto2 c7auto3 ] ++ Stopped: [ c7auto4 ] ++ FAKE (ocf::heartbeat:Dummy): Started c7auto4 ++ ++Transition Summary: ++ * Move shooter (Started c7auto1 -> c7auto4) ++ * Stop FAKECLONE:0 (c7auto1) ++ * Stop FAKECLONE:1 (c7auto2) ++ * Stop FAKECLONE:2 (c7auto3) ++ * Stop FAKE (Started c7auto4) ++ ++Executing cluster transition: ++ * Resource action: shooter stop on c7auto1 ++ * Resource action: FAKE stop on c7auto4 ++ * Resource action: shooter start on c7auto4 ++ * Pseudo action: FAKECLONE-clone_stop_0 ++ * Resource action: shooter monitor=60000 on c7auto4 ++ * Resource action: FAKECLONE stop on c7auto1 ++ * Resource action: FAKECLONE stop on c7auto2 ++ * Resource action: FAKECLONE stop on c7auto3 ++ * Pseudo action: FAKECLONE-clone_stopped_0 ++ * Pseudo action: all_stopped ++ ++Revised cluster status: ++Node c7auto1 (1): standby ++Node c7auto2 (2): standby ++Node c7auto3 (3): standby ++Online: [ c7auto4 ] ++ ++ shooter (stonith:fence_phd_kvm): Started c7auto4 ++ Clone Set: FAKECLONE-clone [FAKECLONE] ++ Stopped: [ c7auto1 c7auto2 c7auto3 c7auto4 ] ++ FAKE (ocf::heartbeat:Dummy): Stopped ++ +diff --git a/pengine/test10/clone_min_stop_all.xml b/pengine/test10/clone_min_stop_all.xml +new file mode 100644 +index 0000000..70e8a96 +--- /dev/null ++++ b/pengine/test10/clone_min_stop_all.xml +@@ -0,0 +1,158 @@ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ +diff --git a/pengine/test10/clone_min_stop_one.dot b/pengine/test10/clone_min_stop_one.dot +new file mode 100644 +index 0000000..19f84cc +--- /dev/null ++++ b/pengine/test10/clone_min_stop_one.dot +@@ -0,0 +1,18 @@ ++ digraph "g" { ++"FAKECLONE-clone_running_0" [ style=bold color="green" fontcolor="orange"] ++"FAKECLONE-clone_start_0" -> "FAKECLONE-clone_running_0" [ style = bold] ++"FAKECLONE-clone_start_0" -> "FAKECLONE_start_0 " [ style = dashed] ++"FAKECLONE-clone_start_0" [ style=bold color="green" fontcolor="orange"] ++"FAKECLONE-clone_stop_0" -> "FAKECLONE-clone_stopped_0" [ style = bold] ++"FAKECLONE-clone_stop_0" -> "FAKECLONE_stop_0 c7auto2" [ style = bold] ++"FAKECLONE-clone_stop_0" [ style=bold color="green" fontcolor="orange"] ++"FAKECLONE-clone_stopped_0" -> "FAKECLONE-clone_start_0" [ style = bold] ++"FAKECLONE-clone_stopped_0" [ style=bold color="green" fontcolor="orange"] ++"FAKECLONE_start_0 " -> "FAKECLONE-clone_running_0" [ style = dashed] ++"FAKECLONE_start_0 " [ style=dashed color="red" fontcolor="black"] ++"FAKECLONE_stop_0 c7auto2" -> "FAKECLONE-clone_stopped_0" [ style = bold] ++"FAKECLONE_stop_0 c7auto2" -> "FAKECLONE_start_0 " [ style = dashed] ++"FAKECLONE_stop_0 c7auto2" -> "all_stopped" [ style = bold] ++"FAKECLONE_stop_0 c7auto2" [ style=bold color="green" fontcolor="black"] ++"all_stopped" [ style=bold color="green" fontcolor="orange"] ++} +diff --git a/pengine/test10/clone_min_stop_one.exp b/pengine/test10/clone_min_stop_one.exp +new file mode 100644 +index 0000000..4e6edb8 +--- /dev/null ++++ b/pengine/test10/clone_min_stop_one.exp +@@ -0,0 +1,74 @@ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ +diff --git a/pengine/test10/clone_min_stop_one.scores b/pengine/test10/clone_min_stop_one.scores +new file mode 100644 +index 0000000..1f28932 +--- /dev/null ++++ b/pengine/test10/clone_min_stop_one.scores +@@ -0,0 +1,45 @@ ++Allocation scores: ++clone_color: FAKECLONE-clone allocation score on c7auto1: 0 ++clone_color: FAKECLONE-clone allocation score on c7auto2: 0 ++clone_color: FAKECLONE-clone allocation score on c7auto3: 0 ++clone_color: FAKECLONE-clone allocation score on c7auto4: -INFINITY ++clone_color: FAKECLONE:0 allocation score on c7auto1: 1 ++clone_color: FAKECLONE:0 allocation score on c7auto2: 0 ++clone_color: FAKECLONE:0 allocation score on c7auto3: 0 ++clone_color: FAKECLONE:0 allocation score on c7auto4: -INFINITY ++clone_color: FAKECLONE:1 allocation score on c7auto1: 0 ++clone_color: FAKECLONE:1 allocation score on c7auto2: 1 ++clone_color: FAKECLONE:1 allocation score on c7auto3: 0 ++clone_color: FAKECLONE:1 allocation score on c7auto4: -INFINITY ++clone_color: FAKECLONE:2 allocation score on c7auto1: 0 ++clone_color: FAKECLONE:2 allocation score on c7auto2: 0 ++clone_color: FAKECLONE:2 allocation score on c7auto3: 1 ++clone_color: FAKECLONE:2 allocation score on c7auto4: -INFINITY ++clone_color: FAKECLONE:3 allocation score on c7auto1: 0 ++clone_color: FAKECLONE:3 allocation score on c7auto2: 0 ++clone_color: FAKECLONE:3 allocation score on c7auto3: 0 ++clone_color: FAKECLONE:3 allocation score on c7auto4: -INFINITY ++native_color: FAKE allocation score on c7auto1: -INFINITY ++native_color: FAKE allocation score on c7auto2: -INFINITY ++native_color: FAKE allocation score on c7auto3: -INFINITY ++native_color: FAKE allocation score on c7auto4: 0 ++native_color: FAKECLONE:0 allocation score on c7auto1: 1 ++native_color: FAKECLONE:0 allocation score on c7auto2: -INFINITY ++native_color: FAKECLONE:0 allocation score on c7auto3: -INFINITY ++native_color: FAKECLONE:0 allocation score on c7auto4: -INFINITY ++native_color: FAKECLONE:1 allocation score on c7auto1: -INFINITY ++native_color: FAKECLONE:1 allocation score on c7auto2: -INFINITY ++native_color: FAKECLONE:1 allocation score on c7auto3: -INFINITY ++native_color: FAKECLONE:1 allocation score on c7auto4: -INFINITY ++native_color: FAKECLONE:2 allocation score on c7auto1: 0 ++native_color: FAKECLONE:2 allocation score on c7auto2: -INFINITY ++native_color: FAKECLONE:2 allocation score on c7auto3: 1 ++native_color: FAKECLONE:2 allocation score on c7auto4: -INFINITY ++native_color: FAKECLONE:3 allocation score on c7auto1: -INFINITY ++native_color: FAKECLONE:3 allocation score on c7auto2: -INFINITY ++native_color: FAKECLONE:3 allocation score on c7auto3: -INFINITY ++native_color: FAKECLONE:3 allocation score on c7auto4: -INFINITY ++native_color: shooter allocation score on c7auto1: 0 ++native_color: shooter allocation score on c7auto2: 0 ++native_color: shooter allocation score on c7auto3: 0 ++native_color: shooter allocation score on c7auto4: 0 +diff --git a/pengine/test10/clone_min_stop_one.summary b/pengine/test10/clone_min_stop_one.summary +new file mode 100644 +index 0000000..9206a0d +--- /dev/null ++++ b/pengine/test10/clone_min_stop_one.summary +@@ -0,0 +1,32 @@ ++ ++Current cluster status: ++Node c7auto2 (2): standby ++Online: [ c7auto1 c7auto3 c7auto4 ] ++ ++ shooter (stonith:fence_phd_kvm): Started c7auto1 ++ Clone Set: FAKECLONE-clone [FAKECLONE] ++ Started: [ c7auto1 c7auto2 c7auto3 ] ++ Stopped: [ c7auto4 ] ++ FAKE (ocf::heartbeat:Dummy): Started c7auto4 ++ ++Transition Summary: ++ * Stop FAKECLONE:1 (c7auto2) ++ ++Executing cluster transition: ++ * Pseudo action: FAKECLONE-clone_stop_0 ++ * Resource action: FAKECLONE stop on c7auto2 ++ * Pseudo action: FAKECLONE-clone_stopped_0 ++ * Pseudo action: FAKECLONE-clone_start_0 ++ * Pseudo action: all_stopped ++ * Pseudo action: FAKECLONE-clone_running_0 ++ ++Revised cluster status: ++Node c7auto2 (2): standby ++Online: [ c7auto1 c7auto3 c7auto4 ] ++ ++ shooter (stonith:fence_phd_kvm): Started c7auto1 ++ Clone Set: FAKECLONE-clone [FAKECLONE] ++ Started: [ c7auto1 c7auto3 ] ++ Stopped: [ c7auto2 c7auto4 ] ++ FAKE (ocf::heartbeat:Dummy): Started c7auto4 ++ +diff --git a/pengine/test10/clone_min_stop_one.xml b/pengine/test10/clone_min_stop_one.xml +new file mode 100644 +index 0000000..eb05803 +--- /dev/null ++++ b/pengine/test10/clone_min_stop_one.xml +@@ -0,0 +1,152 @@ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ +diff --git a/pengine/test10/clone_min_stop_two.dot b/pengine/test10/clone_min_stop_two.dot +new file mode 100644 +index 0000000..11640f4 +--- /dev/null ++++ b/pengine/test10/clone_min_stop_two.dot +@@ -0,0 +1,36 @@ ++ digraph "g" { ++"FAKECLONE-clone_running_0" [ style=bold color="green" fontcolor="orange"] ++"FAKECLONE-clone_start_0" -> "FAKECLONE-clone_running_0" [ style = bold] ++"FAKECLONE-clone_start_0" -> "FAKECLONE_start_0 " [ style = dashed] ++"FAKECLONE-clone_start_0" [ style=bold color="green" fontcolor="orange"] ++"FAKECLONE-clone_stop_0" -> "FAKECLONE-clone_stopped_0" [ style = bold] ++"FAKECLONE-clone_stop_0" -> "FAKECLONE_stop_0 c7auto1" [ style = bold] ++"FAKECLONE-clone_stop_0" -> "FAKECLONE_stop_0 c7auto2" [ style = bold] ++"FAKECLONE-clone_stop_0" [ style=bold color="green" fontcolor="orange"] ++"FAKECLONE-clone_stopped_0" -> "FAKECLONE-clone_start_0" [ style = bold] ++"FAKECLONE-clone_stopped_0" [ style=bold color="green" fontcolor="orange"] ++"FAKECLONE_start_0 " -> "FAKECLONE-clone_running_0" [ style = dashed] ++"FAKECLONE_start_0 " [ style=dashed color="red" fontcolor="black"] ++"FAKECLONE_stop_0 c7auto1" -> "FAKECLONE-clone_stopped_0" [ style = bold] ++"FAKECLONE_stop_0 c7auto1" -> "FAKECLONE_start_0 " [ style = dashed] ++"FAKECLONE_stop_0 c7auto1" -> "all_stopped" [ style = bold] ++"FAKECLONE_stop_0 c7auto1" [ style=bold color="green" fontcolor="black"] ++"FAKECLONE_stop_0 c7auto2" -> "FAKECLONE-clone_stopped_0" [ style = bold] ++"FAKECLONE_stop_0 c7auto2" -> "FAKECLONE_start_0 " [ style = dashed] ++"FAKECLONE_stop_0 c7auto2" -> "all_stopped" [ style = bold] ++"FAKECLONE_stop_0 c7auto2" [ style=bold color="green" fontcolor="black"] ++"FAKE_monitor_10000 c7auto4" [ style=dashed color="red" fontcolor="black"] ++"FAKE_start_0 c7auto4" -> "FAKE_monitor_10000 c7auto4" [ style = dashed] ++"FAKE_start_0 c7auto4" [ style=dashed color="red" fontcolor="black"] ++"FAKE_stop_0 c7auto4" -> "FAKECLONE-clone_stop_0" [ style = bold] ++"FAKE_stop_0 c7auto4" -> "FAKE_start_0 c7auto4" [ style = dashed] ++"FAKE_stop_0 c7auto4" -> "all_stopped" [ style = bold] ++"FAKE_stop_0 c7auto4" [ style=bold color="green" fontcolor="black"] ++"all_stopped" [ style=bold color="green" fontcolor="orange"] ++"shooter_monitor_60000 c7auto3" [ style=bold color="green" fontcolor="black"] ++"shooter_start_0 c7auto3" -> "shooter_monitor_60000 c7auto3" [ style = bold] ++"shooter_start_0 c7auto3" [ style=bold color="green" fontcolor="black"] ++"shooter_stop_0 c7auto1" -> "all_stopped" [ style = bold] ++"shooter_stop_0 c7auto1" -> "shooter_start_0 c7auto3" [ style = bold] ++"shooter_stop_0 c7auto1" [ style=bold color="green" fontcolor="black"] ++} +diff --git a/pengine/test10/clone_min_stop_two.exp b/pengine/test10/clone_min_stop_two.exp +new file mode 100644 +index 0000000..5697611 +--- /dev/null ++++ b/pengine/test10/clone_min_stop_two.exp +@@ -0,0 +1,147 @@ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ +diff --git a/pengine/test10/clone_min_stop_two.scores b/pengine/test10/clone_min_stop_two.scores +new file mode 100644 +index 0000000..ce43eb9 +--- /dev/null ++++ b/pengine/test10/clone_min_stop_two.scores +@@ -0,0 +1,45 @@ ++Allocation scores: ++clone_color: FAKECLONE-clone allocation score on c7auto1: 0 ++clone_color: FAKECLONE-clone allocation score on c7auto2: 0 ++clone_color: FAKECLONE-clone allocation score on c7auto3: 0 ++clone_color: FAKECLONE-clone allocation score on c7auto4: -INFINITY ++clone_color: FAKECLONE:0 allocation score on c7auto1: 1 ++clone_color: FAKECLONE:0 allocation score on c7auto2: 0 ++clone_color: FAKECLONE:0 allocation score on c7auto3: 0 ++clone_color: FAKECLONE:0 allocation score on c7auto4: -INFINITY ++clone_color: FAKECLONE:1 allocation score on c7auto1: 0 ++clone_color: FAKECLONE:1 allocation score on c7auto2: 1 ++clone_color: FAKECLONE:1 allocation score on c7auto3: 0 ++clone_color: FAKECLONE:1 allocation score on c7auto4: -INFINITY ++clone_color: FAKECLONE:2 allocation score on c7auto1: 0 ++clone_color: FAKECLONE:2 allocation score on c7auto2: 0 ++clone_color: FAKECLONE:2 allocation score on c7auto3: 1 ++clone_color: FAKECLONE:2 allocation score on c7auto4: -INFINITY ++clone_color: FAKECLONE:3 allocation score on c7auto1: 0 ++clone_color: FAKECLONE:3 allocation score on c7auto2: 0 ++clone_color: FAKECLONE:3 allocation score on c7auto3: 0 ++clone_color: FAKECLONE:3 allocation score on c7auto4: -INFINITY ++native_color: FAKE allocation score on c7auto1: -INFINITY ++native_color: FAKE allocation score on c7auto2: -INFINITY ++native_color: FAKE allocation score on c7auto3: -INFINITY ++native_color: FAKE allocation score on c7auto4: 0 ++native_color: FAKECLONE:0 allocation score on c7auto1: -INFINITY ++native_color: FAKECLONE:0 allocation score on c7auto2: -INFINITY ++native_color: FAKECLONE:0 allocation score on c7auto3: -INFINITY ++native_color: FAKECLONE:0 allocation score on c7auto4: -INFINITY ++native_color: FAKECLONE:1 allocation score on c7auto1: -INFINITY ++native_color: FAKECLONE:1 allocation score on c7auto2: -INFINITY ++native_color: FAKECLONE:1 allocation score on c7auto3: -INFINITY ++native_color: FAKECLONE:1 allocation score on c7auto4: -INFINITY ++native_color: FAKECLONE:2 allocation score on c7auto1: -INFINITY ++native_color: FAKECLONE:2 allocation score on c7auto2: -INFINITY ++native_color: FAKECLONE:2 allocation score on c7auto3: 1 ++native_color: FAKECLONE:2 allocation score on c7auto4: -INFINITY ++native_color: FAKECLONE:3 allocation score on c7auto1: -INFINITY ++native_color: FAKECLONE:3 allocation score on c7auto2: -INFINITY ++native_color: FAKECLONE:3 allocation score on c7auto3: -INFINITY ++native_color: FAKECLONE:3 allocation score on c7auto4: -INFINITY ++native_color: shooter allocation score on c7auto1: 0 ++native_color: shooter allocation score on c7auto2: 0 ++native_color: shooter allocation score on c7auto3: 0 ++native_color: shooter allocation score on c7auto4: 0 +diff --git a/pengine/test10/clone_min_stop_two.summary b/pengine/test10/clone_min_stop_two.summary +new file mode 100644 +index 0000000..c009d7d +--- /dev/null ++++ b/pengine/test10/clone_min_stop_two.summary +@@ -0,0 +1,42 @@ ++ ++Current cluster status: ++Node c7auto1 (1): standby ++Node c7auto2 (2): standby ++Online: [ c7auto3 c7auto4 ] ++ ++ shooter (stonith:fence_phd_kvm): Started c7auto1 ++ Clone Set: FAKECLONE-clone [FAKECLONE] ++ Started: [ c7auto1 c7auto2 c7auto3 ] ++ Stopped: [ c7auto4 ] ++ FAKE (ocf::heartbeat:Dummy): Started c7auto4 ++ ++Transition Summary: ++ * Move shooter (Started c7auto1 -> c7auto3) ++ * Stop FAKECLONE:0 (c7auto1) ++ * Stop FAKECLONE:1 (c7auto2) ++ * Stop FAKE (Started c7auto4) ++ ++Executing cluster transition: ++ * Resource action: shooter stop on c7auto1 ++ * Resource action: FAKE stop on c7auto4 ++ * Resource action: shooter start on c7auto3 ++ * Pseudo action: FAKECLONE-clone_stop_0 ++ * Resource action: shooter monitor=60000 on c7auto3 ++ * Resource action: FAKECLONE stop on c7auto1 ++ * Resource action: FAKECLONE stop on c7auto2 ++ * Pseudo action: FAKECLONE-clone_stopped_0 ++ * Pseudo action: FAKECLONE-clone_start_0 ++ * Pseudo action: all_stopped ++ * Pseudo action: FAKECLONE-clone_running_0 ++ ++Revised cluster status: ++Node c7auto1 (1): standby ++Node c7auto2 (2): standby ++Online: [ c7auto3 c7auto4 ] ++ ++ shooter (stonith:fence_phd_kvm): Started c7auto3 ++ Clone Set: FAKECLONE-clone [FAKECLONE] ++ Started: [ c7auto3 ] ++ Stopped: [ c7auto1 c7auto2 c7auto4 ] ++ FAKE (ocf::heartbeat:Dummy): Stopped ++ +diff --git a/pengine/test10/clone_min_stop_two.xml b/pengine/test10/clone_min_stop_two.xml +new file mode 100644 +index 0000000..8d085ad +--- /dev/null ++++ b/pengine/test10/clone_min_stop_two.xml +@@ -0,0 +1,154 @@ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ +diff --git a/pengine/test10/cloned_start_one.dot b/pengine/test10/cloned_start_one.dot +new file mode 100644 +index 0000000..b3c254c +--- /dev/null ++++ b/pengine/test10/cloned_start_one.dot +@@ -0,0 +1,32 @@ ++ digraph "g" { ++"FAKECLONE-clone_running_0" [ style=bold color="green" fontcolor="orange"] ++"FAKECLONE-clone_start_0" -> "FAKECLONE-clone_running_0" [ style = bold] ++"FAKECLONE-clone_start_0" -> "FAKECLONE_start_0 c7auto1" [ style = bold] ++"FAKECLONE-clone_start_0" [ style=bold color="green" fontcolor="orange"] ++"FAKECLONE2-clone_running_0" [ style=dashed color="red" fontcolor="orange"] ++"FAKECLONE2-clone_start_0" -> "FAKECLONE2-clone_running_0" [ style = dashed] ++"FAKECLONE2-clone_start_0" -> "FAKECLONE2_start_0 c7auto4" [ style = dashed] ++"FAKECLONE2-clone_start_0" [ style=dashed color="red" fontcolor="orange"] ++"FAKECLONE2-clone_stop_0" -> "FAKECLONE2-clone_stopped_0" [ style = bold] ++"FAKECLONE2-clone_stop_0" -> "FAKECLONE2_stop_0 c7auto3" [ style = bold] ++"FAKECLONE2-clone_stop_0" -> "FAKECLONE2_stop_0 c7auto4" [ style = bold] ++"FAKECLONE2-clone_stop_0" [ style=bold color="green" fontcolor="orange"] ++"FAKECLONE2-clone_stopped_0" -> "FAKECLONE2-clone_start_0" [ style = dashed] ++"FAKECLONE2-clone_stopped_0" [ style=bold color="green" fontcolor="orange"] ++"FAKECLONE2_monitor_10000 c7auto4" [ style=dashed color="red" fontcolor="black"] ++"FAKECLONE2_start_0 c7auto4" -> "FAKECLONE2-clone_running_0" [ style = dashed] ++"FAKECLONE2_start_0 c7auto4" -> "FAKECLONE2_monitor_10000 c7auto4" [ style = dashed] ++"FAKECLONE2_start_0 c7auto4" [ style=dashed color="red" fontcolor="black"] ++"FAKECLONE2_stop_0 c7auto3" -> "FAKECLONE2-clone_stopped_0" [ style = bold] ++"FAKECLONE2_stop_0 c7auto3" -> "all_stopped" [ style = bold] ++"FAKECLONE2_stop_0 c7auto3" [ style=bold color="green" fontcolor="black"] ++"FAKECLONE2_stop_0 c7auto4" -> "FAKECLONE2-clone_stopped_0" [ style = bold] ++"FAKECLONE2_stop_0 c7auto4" -> "FAKECLONE2_start_0 c7auto4" [ style = dashed] ++"FAKECLONE2_stop_0 c7auto4" -> "all_stopped" [ style = bold] ++"FAKECLONE2_stop_0 c7auto4" [ style=bold color="green" fontcolor="black"] ++"FAKECLONE_monitor_10000 c7auto1" [ style=bold color="green" fontcolor="black"] ++"FAKECLONE_start_0 c7auto1" -> "FAKECLONE-clone_running_0" [ style = bold] ++"FAKECLONE_start_0 c7auto1" -> "FAKECLONE_monitor_10000 c7auto1" [ style = bold] ++"FAKECLONE_start_0 c7auto1" [ style=bold color="green" fontcolor="black"] ++"all_stopped" [ style=bold color="green" fontcolor="orange"] ++} +diff --git a/pengine/test10/cloned_start_one.exp b/pengine/test10/cloned_start_one.exp +new file mode 100644 +index 0000000..636ccd8 +--- /dev/null ++++ b/pengine/test10/cloned_start_one.exp +@@ -0,0 +1,118 @@ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ +diff --git a/pengine/test10/cloned_start_one.scores b/pengine/test10/cloned_start_one.scores +new file mode 100644 +index 0000000..3dc6ab8 +--- /dev/null ++++ b/pengine/test10/cloned_start_one.scores +@@ -0,0 +1,77 @@ ++Allocation scores: ++clone_color: FAKECLONE-clone allocation score on c7auto1: 0 ++clone_color: FAKECLONE-clone allocation score on c7auto2: 0 ++clone_color: FAKECLONE-clone allocation score on c7auto3: 0 ++clone_color: FAKECLONE-clone allocation score on c7auto4: -INFINITY ++clone_color: FAKECLONE2-clone allocation score on c7auto1: -INFINITY ++clone_color: FAKECLONE2-clone allocation score on c7auto2: -INFINITY ++clone_color: FAKECLONE2-clone allocation score on c7auto3: 0 ++clone_color: FAKECLONE2-clone allocation score on c7auto4: 0 ++clone_color: FAKECLONE2:0 allocation score on c7auto1: -INFINITY ++clone_color: FAKECLONE2:0 allocation score on c7auto2: -INFINITY ++clone_color: FAKECLONE2:0 allocation score on c7auto3: 1 ++clone_color: FAKECLONE2:0 allocation score on c7auto4: 0 ++clone_color: FAKECLONE2:1 allocation score on c7auto1: -INFINITY ++clone_color: FAKECLONE2:1 allocation score on c7auto2: -INFINITY ++clone_color: FAKECLONE2:1 allocation score on c7auto3: 0 ++clone_color: FAKECLONE2:1 allocation score on c7auto4: 1 ++clone_color: FAKECLONE2:2 allocation score on c7auto1: -INFINITY ++clone_color: FAKECLONE2:2 allocation score on c7auto2: -INFINITY ++clone_color: FAKECLONE2:2 allocation score on c7auto3: 0 ++clone_color: FAKECLONE2:2 allocation score on c7auto4: 0 ++clone_color: FAKECLONE2:3 allocation score on c7auto1: -INFINITY ++clone_color: FAKECLONE2:3 allocation score on c7auto2: -INFINITY ++clone_color: FAKECLONE2:3 allocation score on c7auto3: 0 ++clone_color: FAKECLONE2:3 allocation score on c7auto4: 0 ++clone_color: FAKECLONE:0 allocation score on c7auto1: 0 ++clone_color: FAKECLONE:0 allocation score on c7auto2: 0 ++clone_color: FAKECLONE:0 allocation score on c7auto3: 0 ++clone_color: FAKECLONE:0 allocation score on c7auto4: -INFINITY ++clone_color: FAKECLONE:1 allocation score on c7auto1: 0 ++clone_color: FAKECLONE:1 allocation score on c7auto2: 0 ++clone_color: FAKECLONE:1 allocation score on c7auto3: 0 ++clone_color: FAKECLONE:1 allocation score on c7auto4: -INFINITY ++clone_color: FAKECLONE:2 allocation score on c7auto1: 0 ++clone_color: FAKECLONE:2 allocation score on c7auto2: 0 ++clone_color: FAKECLONE:2 allocation score on c7auto3: 0 ++clone_color: FAKECLONE:2 allocation score on c7auto4: -INFINITY ++clone_color: FAKECLONE:3 allocation score on c7auto1: 0 ++clone_color: FAKECLONE:3 allocation score on c7auto2: 0 ++clone_color: FAKECLONE:3 allocation score on c7auto3: 0 ++clone_color: FAKECLONE:3 allocation score on c7auto4: -INFINITY ++native_color: FAKECLONE2:0 allocation score on c7auto1: -INFINITY ++native_color: FAKECLONE2:0 allocation score on c7auto2: -INFINITY ++native_color: FAKECLONE2:0 allocation score on c7auto3: -INFINITY ++native_color: FAKECLONE2:0 allocation score on c7auto4: -INFINITY ++native_color: FAKECLONE2:1 allocation score on c7auto1: -INFINITY ++native_color: FAKECLONE2:1 allocation score on c7auto2: -INFINITY ++native_color: FAKECLONE2:1 allocation score on c7auto3: -INFINITY ++native_color: FAKECLONE2:1 allocation score on c7auto4: 1 ++native_color: FAKECLONE2:2 allocation score on c7auto1: -INFINITY ++native_color: FAKECLONE2:2 allocation score on c7auto2: -INFINITY ++native_color: FAKECLONE2:2 allocation score on c7auto3: -INFINITY ++native_color: FAKECLONE2:2 allocation score on c7auto4: -INFINITY ++native_color: FAKECLONE2:3 allocation score on c7auto1: -INFINITY ++native_color: FAKECLONE2:3 allocation score on c7auto2: -INFINITY ++native_color: FAKECLONE2:3 allocation score on c7auto3: -INFINITY ++native_color: FAKECLONE2:3 allocation score on c7auto4: -INFINITY ++native_color: FAKECLONE:0 allocation score on c7auto1: 0 ++native_color: FAKECLONE:0 allocation score on c7auto2: -INFINITY ++native_color: FAKECLONE:0 allocation score on c7auto3: -INFINITY ++native_color: FAKECLONE:0 allocation score on c7auto4: -INFINITY ++native_color: FAKECLONE:1 allocation score on c7auto1: -INFINITY ++native_color: FAKECLONE:1 allocation score on c7auto2: -INFINITY ++native_color: FAKECLONE:1 allocation score on c7auto3: -INFINITY ++native_color: FAKECLONE:1 allocation score on c7auto4: -INFINITY ++native_color: FAKECLONE:2 allocation score on c7auto1: -INFINITY ++native_color: FAKECLONE:2 allocation score on c7auto2: -INFINITY ++native_color: FAKECLONE:2 allocation score on c7auto3: -INFINITY ++native_color: FAKECLONE:2 allocation score on c7auto4: -INFINITY ++native_color: FAKECLONE:3 allocation score on c7auto1: -INFINITY ++native_color: FAKECLONE:3 allocation score on c7auto2: -INFINITY ++native_color: FAKECLONE:3 allocation score on c7auto3: -INFINITY ++native_color: FAKECLONE:3 allocation score on c7auto4: -INFINITY ++native_color: shooter allocation score on c7auto1: 0 ++native_color: shooter allocation score on c7auto2: 0 ++native_color: shooter allocation score on c7auto3: 0 ++native_color: shooter allocation score on c7auto4: 0 +diff --git a/pengine/test10/cloned_start_one.summary b/pengine/test10/cloned_start_one.summary +new file mode 100644 +index 0000000..20ac58f +--- /dev/null ++++ b/pengine/test10/cloned_start_one.summary +@@ -0,0 +1,41 @@ ++ ++Current cluster status: ++Node c7auto2 (2): standby ++Node c7auto3 (3): standby ++Online: [ c7auto1 c7auto4 ] ++ ++ shooter (stonith:fence_phd_kvm): Started c7auto1 ++ Clone Set: FAKECLONE-clone [FAKECLONE] ++ Stopped: [ c7auto1 c7auto2 c7auto3 c7auto4 ] ++ Clone Set: FAKECLONE2-clone [FAKECLONE2] ++ Started: [ c7auto3 c7auto4 ] ++ Stopped: [ c7auto1 c7auto2 ] ++ ++Transition Summary: ++ * Start FAKECLONE:0 (c7auto1) ++ * Stop FAKECLONE2:0 (c7auto3) ++ * Stop FAKECLONE2:1 (Started c7auto4) ++ ++Executing cluster transition: ++ * Pseudo action: FAKECLONE-clone_start_0 ++ * Pseudo action: FAKECLONE2-clone_stop_0 ++ * Resource action: FAKECLONE start on c7auto1 ++ * Pseudo action: FAKECLONE-clone_running_0 ++ * Resource action: FAKECLONE2 stop on c7auto3 ++ * Resource action: FAKECLONE2 stop on c7auto4 ++ * Pseudo action: FAKECLONE2-clone_stopped_0 ++ * Pseudo action: all_stopped ++ * Resource action: FAKECLONE monitor=10000 on c7auto1 ++ ++Revised cluster status: ++Node c7auto2 (2): standby ++Node c7auto3 (3): standby ++Online: [ c7auto1 c7auto4 ] ++ ++ shooter (stonith:fence_phd_kvm): Started c7auto1 ++ Clone Set: FAKECLONE-clone [FAKECLONE] ++ Started: [ c7auto1 ] ++ Stopped: [ c7auto2 c7auto3 c7auto4 ] ++ Clone Set: FAKECLONE2-clone [FAKECLONE2] ++ Stopped: [ c7auto1 c7auto2 c7auto3 c7auto4 ] ++ +diff --git a/pengine/test10/cloned_start_one.xml b/pengine/test10/cloned_start_one.xml +new file mode 100644 +index 0000000..6c2bfe1 +--- /dev/null ++++ b/pengine/test10/cloned_start_one.xml +@@ -0,0 +1,154 @@ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ +diff --git a/pengine/test10/cloned_start_two.dot b/pengine/test10/cloned_start_two.dot +new file mode 100644 +index 0000000..348d435 +--- /dev/null ++++ b/pengine/test10/cloned_start_two.dot +@@ -0,0 +1,26 @@ ++ digraph "g" { ++"FAKECLONE-clone_running_0" [ style=bold color="green" fontcolor="orange"] ++"FAKECLONE-clone_start_0" -> "FAKECLONE-clone_running_0" [ style = bold] ++"FAKECLONE-clone_start_0" -> "FAKECLONE:1_start_0 c7auto1" [ style = bold] ++"FAKECLONE-clone_start_0" -> "FAKECLONE_start_0 c7auto2" [ style = bold] ++"FAKECLONE-clone_start_0" [ style=bold color="green" fontcolor="orange"] ++"FAKECLONE2-clone_stop_0" -> "FAKECLONE2-clone_stopped_0" [ style = bold] ++"FAKECLONE2-clone_stop_0" -> "FAKECLONE2_stop_0 c7auto3" [ style = bold] ++"FAKECLONE2-clone_stop_0" [ style=bold color="green" fontcolor="orange"] ++"FAKECLONE2-clone_stopped_0" [ style=bold color="green" fontcolor="orange"] ++"FAKECLONE2_stop_0 c7auto3" -> "FAKECLONE2-clone_stopped_0" [ style = bold] ++"FAKECLONE2_stop_0 c7auto3" -> "all_stopped" [ style = bold] ++"FAKECLONE2_stop_0 c7auto3" [ style=bold color="green" fontcolor="black"] ++"FAKECLONE:1_monitor_10000 c7auto1" [ style=bold color="green" fontcolor="black"] ++"FAKECLONE:1_start_0 c7auto1" -> "FAKECLONE-clone_running_0" [ style = bold] ++"FAKECLONE:1_start_0 c7auto1" -> "FAKECLONE:1_monitor_10000 c7auto1" [ style = bold] ++"FAKECLONE:1_start_0 c7auto1" -> "clone-one-or-more:order-FAKECLONE-clone-FAKECLONE2-clone-mandatory" [ style = bold] ++"FAKECLONE:1_start_0 c7auto1" [ style=bold color="green" fontcolor="black"] ++"FAKECLONE_monitor_10000 c7auto2" [ style=bold color="green" fontcolor="black"] ++"FAKECLONE_start_0 c7auto2" -> "FAKECLONE-clone_running_0" [ style = bold] ++"FAKECLONE_start_0 c7auto2" -> "FAKECLONE_monitor_10000 c7auto2" [ style = bold] ++"FAKECLONE_start_0 c7auto2" -> "clone-one-or-more:order-FAKECLONE-clone-FAKECLONE2-clone-mandatory" [ style = bold] ++"FAKECLONE_start_0 c7auto2" [ style=bold color="green" fontcolor="black"] ++"all_stopped" [ style=bold color="green" fontcolor="orange"] ++"clone-one-or-more:order-FAKECLONE-clone-FAKECLONE2-clone-mandatory" [ style=bold color="green" fontcolor="orange"] ++} +diff --git a/pengine/test10/cloned_start_two.exp b/pengine/test10/cloned_start_two.exp +new file mode 100644 +index 0000000..ee82324 +--- /dev/null ++++ b/pengine/test10/cloned_start_two.exp +@@ -0,0 +1,143 @@ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ +diff --git a/pengine/test10/cloned_start_two.scores b/pengine/test10/cloned_start_two.scores +new file mode 100644 +index 0000000..dae3b5d +--- /dev/null ++++ b/pengine/test10/cloned_start_two.scores +@@ -0,0 +1,77 @@ ++Allocation scores: ++clone_color: FAKECLONE-clone allocation score on c7auto1: 0 ++clone_color: FAKECLONE-clone allocation score on c7auto2: 0 ++clone_color: FAKECLONE-clone allocation score on c7auto3: 0 ++clone_color: FAKECLONE-clone allocation score on c7auto4: -INFINITY ++clone_color: FAKECLONE2-clone allocation score on c7auto1: -INFINITY ++clone_color: FAKECLONE2-clone allocation score on c7auto2: -INFINITY ++clone_color: FAKECLONE2-clone allocation score on c7auto3: 0 ++clone_color: FAKECLONE2-clone allocation score on c7auto4: 0 ++clone_color: FAKECLONE2:0 allocation score on c7auto1: -INFINITY ++clone_color: FAKECLONE2:0 allocation score on c7auto2: -INFINITY ++clone_color: FAKECLONE2:0 allocation score on c7auto3: 1 ++clone_color: FAKECLONE2:0 allocation score on c7auto4: 0 ++clone_color: FAKECLONE2:1 allocation score on c7auto1: -INFINITY ++clone_color: FAKECLONE2:1 allocation score on c7auto2: -INFINITY ++clone_color: FAKECLONE2:1 allocation score on c7auto3: 0 ++clone_color: FAKECLONE2:1 allocation score on c7auto4: 1 ++clone_color: FAKECLONE2:2 allocation score on c7auto1: -INFINITY ++clone_color: FAKECLONE2:2 allocation score on c7auto2: -INFINITY ++clone_color: FAKECLONE2:2 allocation score on c7auto3: 0 ++clone_color: FAKECLONE2:2 allocation score on c7auto4: 0 ++clone_color: FAKECLONE2:3 allocation score on c7auto1: -INFINITY ++clone_color: FAKECLONE2:3 allocation score on c7auto2: -INFINITY ++clone_color: FAKECLONE2:3 allocation score on c7auto3: 0 ++clone_color: FAKECLONE2:3 allocation score on c7auto4: 0 ++clone_color: FAKECLONE:0 allocation score on c7auto1: 0 ++clone_color: FAKECLONE:0 allocation score on c7auto2: 0 ++clone_color: FAKECLONE:0 allocation score on c7auto3: 0 ++clone_color: FAKECLONE:0 allocation score on c7auto4: -INFINITY ++clone_color: FAKECLONE:1 allocation score on c7auto1: 0 ++clone_color: FAKECLONE:1 allocation score on c7auto2: 0 ++clone_color: FAKECLONE:1 allocation score on c7auto3: 0 ++clone_color: FAKECLONE:1 allocation score on c7auto4: -INFINITY ++clone_color: FAKECLONE:2 allocation score on c7auto1: 0 ++clone_color: FAKECLONE:2 allocation score on c7auto2: 0 ++clone_color: FAKECLONE:2 allocation score on c7auto3: 0 ++clone_color: FAKECLONE:2 allocation score on c7auto4: -INFINITY ++clone_color: FAKECLONE:3 allocation score on c7auto1: 0 ++clone_color: FAKECLONE:3 allocation score on c7auto2: 0 ++clone_color: FAKECLONE:3 allocation score on c7auto3: 0 ++clone_color: FAKECLONE:3 allocation score on c7auto4: -INFINITY ++native_color: FAKECLONE2:0 allocation score on c7auto1: -INFINITY ++native_color: FAKECLONE2:0 allocation score on c7auto2: -INFINITY ++native_color: FAKECLONE2:0 allocation score on c7auto3: -INFINITY ++native_color: FAKECLONE2:0 allocation score on c7auto4: -INFINITY ++native_color: FAKECLONE2:1 allocation score on c7auto1: -INFINITY ++native_color: FAKECLONE2:1 allocation score on c7auto2: -INFINITY ++native_color: FAKECLONE2:1 allocation score on c7auto3: -INFINITY ++native_color: FAKECLONE2:1 allocation score on c7auto4: 1 ++native_color: FAKECLONE2:2 allocation score on c7auto1: -INFINITY ++native_color: FAKECLONE2:2 allocation score on c7auto2: -INFINITY ++native_color: FAKECLONE2:2 allocation score on c7auto3: -INFINITY ++native_color: FAKECLONE2:2 allocation score on c7auto4: -INFINITY ++native_color: FAKECLONE2:3 allocation score on c7auto1: -INFINITY ++native_color: FAKECLONE2:3 allocation score on c7auto2: -INFINITY ++native_color: FAKECLONE2:3 allocation score on c7auto3: -INFINITY ++native_color: FAKECLONE2:3 allocation score on c7auto4: -INFINITY ++native_color: FAKECLONE:0 allocation score on c7auto1: 0 ++native_color: FAKECLONE:0 allocation score on c7auto2: 0 ++native_color: FAKECLONE:0 allocation score on c7auto3: -INFINITY ++native_color: FAKECLONE:0 allocation score on c7auto4: -INFINITY ++native_color: FAKECLONE:1 allocation score on c7auto1: 0 ++native_color: FAKECLONE:1 allocation score on c7auto2: -INFINITY ++native_color: FAKECLONE:1 allocation score on c7auto3: -INFINITY ++native_color: FAKECLONE:1 allocation score on c7auto4: -INFINITY ++native_color: FAKECLONE:2 allocation score on c7auto1: -INFINITY ++native_color: FAKECLONE:2 allocation score on c7auto2: -INFINITY ++native_color: FAKECLONE:2 allocation score on c7auto3: -INFINITY ++native_color: FAKECLONE:2 allocation score on c7auto4: -INFINITY ++native_color: FAKECLONE:3 allocation score on c7auto1: -INFINITY ++native_color: FAKECLONE:3 allocation score on c7auto2: -INFINITY ++native_color: FAKECLONE:3 allocation score on c7auto3: -INFINITY ++native_color: FAKECLONE:3 allocation score on c7auto4: -INFINITY ++native_color: shooter allocation score on c7auto1: 0 ++native_color: shooter allocation score on c7auto2: 0 ++native_color: shooter allocation score on c7auto3: 0 ++native_color: shooter allocation score on c7auto4: 0 +diff --git a/pengine/test10/cloned_start_two.summary b/pengine/test10/cloned_start_two.summary +new file mode 100644 +index 0000000..bea4609 +--- /dev/null ++++ b/pengine/test10/cloned_start_two.summary +@@ -0,0 +1,42 @@ ++ ++Current cluster status: ++Node c7auto3 (3): standby ++Online: [ c7auto1 c7auto2 c7auto4 ] ++ ++ shooter (stonith:fence_phd_kvm): Started c7auto1 ++ Clone Set: FAKECLONE-clone [FAKECLONE] ++ Stopped: [ c7auto1 c7auto2 c7auto3 c7auto4 ] ++ Clone Set: FAKECLONE2-clone [FAKECLONE2] ++ Started: [ c7auto3 c7auto4 ] ++ Stopped: [ c7auto1 c7auto2 ] ++ ++Transition Summary: ++ * Start FAKECLONE:0 (c7auto2) ++ * Start FAKECLONE:1 (c7auto1) ++ * Stop FAKECLONE2:0 (c7auto3) ++ ++Executing cluster transition: ++ * Pseudo action: FAKECLONE-clone_start_0 ++ * Pseudo action: FAKECLONE2-clone_stop_0 ++ * Resource action: FAKECLONE start on c7auto2 ++ * Resource action: FAKECLONE start on c7auto1 ++ * Pseudo action: FAKECLONE-clone_running_0 ++ * Resource action: FAKECLONE2 stop on c7auto3 ++ * Pseudo action: FAKECLONE2-clone_stopped_0 ++ * Pseudo action: all_stopped ++ * Pseudo action: clone-one-or-more:order-FAKECLONE-clone-FAKECLONE2-clone-mandatory ++ * Resource action: FAKECLONE monitor=10000 on c7auto2 ++ * Resource action: FAKECLONE monitor=10000 on c7auto1 ++ ++Revised cluster status: ++Node c7auto3 (3): standby ++Online: [ c7auto1 c7auto2 c7auto4 ] ++ ++ shooter (stonith:fence_phd_kvm): Started c7auto1 ++ Clone Set: FAKECLONE-clone [FAKECLONE] ++ Started: [ c7auto1 c7auto2 ] ++ Stopped: [ c7auto3 c7auto4 ] ++ Clone Set: FAKECLONE2-clone [FAKECLONE2] ++ Started: [ c7auto4 ] ++ Stopped: [ c7auto1 c7auto2 c7auto3 ] ++ +diff --git a/pengine/test10/cloned_start_two.xml b/pengine/test10/cloned_start_two.xml +new file mode 100644 +index 0000000..be78317 +--- /dev/null ++++ b/pengine/test10/cloned_start_two.xml +@@ -0,0 +1,152 @@ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ +diff --git a/pengine/test10/cloned_stop_one.dot b/pengine/test10/cloned_stop_one.dot +new file mode 100644 +index 0000000..d181135 +--- /dev/null ++++ b/pengine/test10/cloned_stop_one.dot +@@ -0,0 +1,26 @@ ++ digraph "g" { ++"FAKECLONE-clone_running_0" [ style=bold color="green" fontcolor="orange"] ++"FAKECLONE-clone_start_0" -> "FAKECLONE-clone_running_0" [ style = bold] ++"FAKECLONE-clone_start_0" -> "FAKECLONE_start_0 " [ style = dashed] ++"FAKECLONE-clone_start_0" [ style=bold color="green" fontcolor="orange"] ++"FAKECLONE-clone_stop_0" -> "FAKECLONE-clone_stopped_0" [ style = bold] ++"FAKECLONE-clone_stop_0" -> "FAKECLONE_stop_0 c7auto3" [ style = bold] ++"FAKECLONE-clone_stop_0" [ style=bold color="green" fontcolor="orange"] ++"FAKECLONE-clone_stopped_0" -> "FAKECLONE-clone_start_0" [ style = bold] ++"FAKECLONE-clone_stopped_0" [ style=bold color="green" fontcolor="orange"] ++"FAKECLONE2-clone_stop_0" -> "FAKECLONE2-clone_stopped_0" [ style = bold] ++"FAKECLONE2-clone_stop_0" -> "FAKECLONE2_stop_0 c7auto3" [ style = bold] ++"FAKECLONE2-clone_stop_0" [ style=bold color="green" fontcolor="orange"] ++"FAKECLONE2-clone_stopped_0" -> "FAKECLONE-clone_stop_0" [ style = bold] ++"FAKECLONE2-clone_stopped_0" [ style=bold color="green" fontcolor="orange"] ++"FAKECLONE2_stop_0 c7auto3" -> "FAKECLONE2-clone_stopped_0" [ style = bold] ++"FAKECLONE2_stop_0 c7auto3" -> "all_stopped" [ style = bold] ++"FAKECLONE2_stop_0 c7auto3" [ style=bold color="green" fontcolor="black"] ++"FAKECLONE_start_0 " -> "FAKECLONE-clone_running_0" [ style = dashed] ++"FAKECLONE_start_0 " [ style=dashed color="red" fontcolor="black"] ++"FAKECLONE_stop_0 c7auto3" -> "FAKECLONE-clone_stopped_0" [ style = bold] ++"FAKECLONE_stop_0 c7auto3" -> "FAKECLONE_start_0 " [ style = dashed] ++"FAKECLONE_stop_0 c7auto3" -> "all_stopped" [ style = bold] ++"FAKECLONE_stop_0 c7auto3" [ style=bold color="green" fontcolor="black"] ++"all_stopped" [ style=bold color="green" fontcolor="orange"] ++} +diff --git a/pengine/test10/cloned_stop_one.exp b/pengine/test10/cloned_stop_one.exp +new file mode 100644 +index 0000000..9613d6f +--- /dev/null ++++ b/pengine/test10/cloned_stop_one.exp +@@ -0,0 +1,117 @@ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ +diff --git a/pengine/test10/cloned_stop_one.scores b/pengine/test10/cloned_stop_one.scores +new file mode 100644 +index 0000000..6d66638 +--- /dev/null ++++ b/pengine/test10/cloned_stop_one.scores +@@ -0,0 +1,77 @@ ++Allocation scores: ++clone_color: FAKECLONE-clone allocation score on c7auto1: 0 ++clone_color: FAKECLONE-clone allocation score on c7auto2: 0 ++clone_color: FAKECLONE-clone allocation score on c7auto3: 0 ++clone_color: FAKECLONE-clone allocation score on c7auto4: -INFINITY ++clone_color: FAKECLONE2-clone allocation score on c7auto1: -INFINITY ++clone_color: FAKECLONE2-clone allocation score on c7auto2: -INFINITY ++clone_color: FAKECLONE2-clone allocation score on c7auto3: 0 ++clone_color: FAKECLONE2-clone allocation score on c7auto4: 0 ++clone_color: FAKECLONE2:0 allocation score on c7auto1: -INFINITY ++clone_color: FAKECLONE2:0 allocation score on c7auto2: -INFINITY ++clone_color: FAKECLONE2:0 allocation score on c7auto3: 1 ++clone_color: FAKECLONE2:0 allocation score on c7auto4: 0 ++clone_color: FAKECLONE2:1 allocation score on c7auto1: -INFINITY ++clone_color: FAKECLONE2:1 allocation score on c7auto2: -INFINITY ++clone_color: FAKECLONE2:1 allocation score on c7auto3: 0 ++clone_color: FAKECLONE2:1 allocation score on c7auto4: 1 ++clone_color: FAKECLONE2:2 allocation score on c7auto1: -INFINITY ++clone_color: FAKECLONE2:2 allocation score on c7auto2: -INFINITY ++clone_color: FAKECLONE2:2 allocation score on c7auto3: 0 ++clone_color: FAKECLONE2:2 allocation score on c7auto4: 0 ++clone_color: FAKECLONE2:3 allocation score on c7auto1: -INFINITY ++clone_color: FAKECLONE2:3 allocation score on c7auto2: -INFINITY ++clone_color: FAKECLONE2:3 allocation score on c7auto3: 0 ++clone_color: FAKECLONE2:3 allocation score on c7auto4: 0 ++clone_color: FAKECLONE:0 allocation score on c7auto1: 1 ++clone_color: FAKECLONE:0 allocation score on c7auto2: 0 ++clone_color: FAKECLONE:0 allocation score on c7auto3: 0 ++clone_color: FAKECLONE:0 allocation score on c7auto4: -INFINITY ++clone_color: FAKECLONE:1 allocation score on c7auto1: 0 ++clone_color: FAKECLONE:1 allocation score on c7auto2: 1 ++clone_color: FAKECLONE:1 allocation score on c7auto3: 0 ++clone_color: FAKECLONE:1 allocation score on c7auto4: -INFINITY ++clone_color: FAKECLONE:2 allocation score on c7auto1: 0 ++clone_color: FAKECLONE:2 allocation score on c7auto2: 0 ++clone_color: FAKECLONE:2 allocation score on c7auto3: 1 ++clone_color: FAKECLONE:2 allocation score on c7auto4: -INFINITY ++clone_color: FAKECLONE:3 allocation score on c7auto1: 0 ++clone_color: FAKECLONE:3 allocation score on c7auto2: 0 ++clone_color: FAKECLONE:3 allocation score on c7auto3: 0 ++clone_color: FAKECLONE:3 allocation score on c7auto4: -INFINITY ++native_color: FAKECLONE2:0 allocation score on c7auto1: -INFINITY ++native_color: FAKECLONE2:0 allocation score on c7auto2: -INFINITY ++native_color: FAKECLONE2:0 allocation score on c7auto3: -INFINITY ++native_color: FAKECLONE2:0 allocation score on c7auto4: -INFINITY ++native_color: FAKECLONE2:1 allocation score on c7auto1: -INFINITY ++native_color: FAKECLONE2:1 allocation score on c7auto2: -INFINITY ++native_color: FAKECLONE2:1 allocation score on c7auto3: -INFINITY ++native_color: FAKECLONE2:1 allocation score on c7auto4: 1 ++native_color: FAKECLONE2:2 allocation score on c7auto1: -INFINITY ++native_color: FAKECLONE2:2 allocation score on c7auto2: -INFINITY ++native_color: FAKECLONE2:2 allocation score on c7auto3: -INFINITY ++native_color: FAKECLONE2:2 allocation score on c7auto4: -INFINITY ++native_color: FAKECLONE2:3 allocation score on c7auto1: -INFINITY ++native_color: FAKECLONE2:3 allocation score on c7auto2: -INFINITY ++native_color: FAKECLONE2:3 allocation score on c7auto3: -INFINITY ++native_color: FAKECLONE2:3 allocation score on c7auto4: -INFINITY ++native_color: FAKECLONE:0 allocation score on c7auto1: 1 ++native_color: FAKECLONE:0 allocation score on c7auto2: -INFINITY ++native_color: FAKECLONE:0 allocation score on c7auto3: -INFINITY ++native_color: FAKECLONE:0 allocation score on c7auto4: -INFINITY ++native_color: FAKECLONE:1 allocation score on c7auto1: 0 ++native_color: FAKECLONE:1 allocation score on c7auto2: 1 ++native_color: FAKECLONE:1 allocation score on c7auto3: -INFINITY ++native_color: FAKECLONE:1 allocation score on c7auto4: -INFINITY ++native_color: FAKECLONE:2 allocation score on c7auto1: -INFINITY ++native_color: FAKECLONE:2 allocation score on c7auto2: -INFINITY ++native_color: FAKECLONE:2 allocation score on c7auto3: -INFINITY ++native_color: FAKECLONE:2 allocation score on c7auto4: -INFINITY ++native_color: FAKECLONE:3 allocation score on c7auto1: -INFINITY ++native_color: FAKECLONE:3 allocation score on c7auto2: -INFINITY ++native_color: FAKECLONE:3 allocation score on c7auto3: -INFINITY ++native_color: FAKECLONE:3 allocation score on c7auto4: -INFINITY ++native_color: shooter allocation score on c7auto1: 0 ++native_color: shooter allocation score on c7auto2: 0 ++native_color: shooter allocation score on c7auto3: 0 ++native_color: shooter allocation score on c7auto4: 0 +diff --git a/pengine/test10/cloned_stop_one.summary b/pengine/test10/cloned_stop_one.summary +new file mode 100644 +index 0000000..1a952a2 +--- /dev/null ++++ b/pengine/test10/cloned_stop_one.summary +@@ -0,0 +1,40 @@ ++ ++Current cluster status: ++Node c7auto3 (3): standby ++Online: [ c7auto1 c7auto2 c7auto4 ] ++ ++ shooter (stonith:fence_phd_kvm): Started c7auto1 ++ Clone Set: FAKECLONE-clone [FAKECLONE] ++ Started: [ c7auto1 c7auto2 c7auto3 ] ++ Stopped: [ c7auto4 ] ++ Clone Set: FAKECLONE2-clone [FAKECLONE2] ++ Started: [ c7auto3 c7auto4 ] ++ Stopped: [ c7auto1 c7auto2 ] ++ ++Transition Summary: ++ * Stop FAKECLONE:2 (c7auto3) ++ * Stop FAKECLONE2:0 (c7auto3) ++ ++Executing cluster transition: ++ * Pseudo action: FAKECLONE2-clone_stop_0 ++ * Resource action: FAKECLONE2 stop on c7auto3 ++ * Pseudo action: FAKECLONE2-clone_stopped_0 ++ * Pseudo action: FAKECLONE-clone_stop_0 ++ * Resource action: FAKECLONE stop on c7auto3 ++ * Pseudo action: FAKECLONE-clone_stopped_0 ++ * Pseudo action: FAKECLONE-clone_start_0 ++ * Pseudo action: all_stopped ++ * Pseudo action: FAKECLONE-clone_running_0 ++ ++Revised cluster status: ++Node c7auto3 (3): standby ++Online: [ c7auto1 c7auto2 c7auto4 ] ++ ++ shooter (stonith:fence_phd_kvm): Started c7auto1 ++ Clone Set: FAKECLONE-clone [FAKECLONE] ++ Started: [ c7auto1 c7auto2 ] ++ Stopped: [ c7auto3 c7auto4 ] ++ Clone Set: FAKECLONE2-clone [FAKECLONE2] ++ Started: [ c7auto4 ] ++ Stopped: [ c7auto1 c7auto2 c7auto3 ] ++ +diff --git a/pengine/test10/cloned_stop_one.xml b/pengine/test10/cloned_stop_one.xml +new file mode 100644 +index 0000000..2e2fdfd +--- /dev/null ++++ b/pengine/test10/cloned_stop_one.xml +@@ -0,0 +1,153 @@ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ +diff --git a/pengine/test10/cloned_stop_two.dot b/pengine/test10/cloned_stop_two.dot +new file mode 100644 +index 0000000..2c7fd3d +--- /dev/null ++++ b/pengine/test10/cloned_stop_two.dot +@@ -0,0 +1,45 @@ ++ digraph "g" { ++"FAKECLONE-clone_running_0" [ style=bold color="green" fontcolor="orange"] ++"FAKECLONE-clone_start_0" -> "FAKECLONE-clone_running_0" [ style = bold] ++"FAKECLONE-clone_start_0" -> "FAKECLONE_start_0 " [ style = dashed] ++"FAKECLONE-clone_start_0" [ style=bold color="green" fontcolor="orange"] ++"FAKECLONE-clone_stop_0" -> "FAKECLONE-clone_stopped_0" [ style = bold] ++"FAKECLONE-clone_stop_0" -> "FAKECLONE_stop_0 c7auto2" [ style = bold] ++"FAKECLONE-clone_stop_0" -> "FAKECLONE_stop_0 c7auto3" [ style = bold] ++"FAKECLONE-clone_stop_0" [ style=bold color="green" fontcolor="orange"] ++"FAKECLONE-clone_stopped_0" -> "FAKECLONE-clone_start_0" [ style = bold] ++"FAKECLONE-clone_stopped_0" [ style=bold color="green" fontcolor="orange"] ++"FAKECLONE2-clone_running_0" [ style=dashed color="red" fontcolor="orange"] ++"FAKECLONE2-clone_start_0" -> "FAKECLONE2-clone_running_0" [ style = dashed] ++"FAKECLONE2-clone_start_0" -> "FAKECLONE2_start_0 c7auto4" [ style = dashed] ++"FAKECLONE2-clone_start_0" [ style=dashed color="red" fontcolor="orange"] ++"FAKECLONE2-clone_stop_0" -> "FAKECLONE2-clone_stopped_0" [ style = bold] ++"FAKECLONE2-clone_stop_0" -> "FAKECLONE2_stop_0 c7auto3" [ style = bold] ++"FAKECLONE2-clone_stop_0" -> "FAKECLONE2_stop_0 c7auto4" [ style = bold] ++"FAKECLONE2-clone_stop_0" [ style=bold color="green" fontcolor="orange"] ++"FAKECLONE2-clone_stopped_0" -> "FAKECLONE-clone_stop_0" [ style = bold] ++"FAKECLONE2-clone_stopped_0" -> "FAKECLONE2-clone_start_0" [ style = dashed] ++"FAKECLONE2-clone_stopped_0" [ style=bold color="green" fontcolor="orange"] ++"FAKECLONE2_monitor_10000 c7auto4" [ style=dashed color="red" fontcolor="black"] ++"FAKECLONE2_start_0 c7auto4" -> "FAKECLONE2-clone_running_0" [ style = dashed] ++"FAKECLONE2_start_0 c7auto4" -> "FAKECLONE2_monitor_10000 c7auto4" [ style = dashed] ++"FAKECLONE2_start_0 c7auto4" [ style=dashed color="red" fontcolor="black"] ++"FAKECLONE2_stop_0 c7auto3" -> "FAKECLONE2-clone_stopped_0" [ style = bold] ++"FAKECLONE2_stop_0 c7auto3" -> "all_stopped" [ style = bold] ++"FAKECLONE2_stop_0 c7auto3" [ style=bold color="green" fontcolor="black"] ++"FAKECLONE2_stop_0 c7auto4" -> "FAKECLONE2-clone_stopped_0" [ style = bold] ++"FAKECLONE2_stop_0 c7auto4" -> "FAKECLONE2_start_0 c7auto4" [ style = dashed] ++"FAKECLONE2_stop_0 c7auto4" -> "all_stopped" [ style = bold] ++"FAKECLONE2_stop_0 c7auto4" [ style=bold color="green" fontcolor="black"] ++"FAKECLONE_start_0 " -> "FAKECLONE-clone_running_0" [ style = dashed] ++"FAKECLONE_start_0 " [ style=dashed color="red" fontcolor="black"] ++"FAKECLONE_stop_0 c7auto2" -> "FAKECLONE-clone_stopped_0" [ style = bold] ++"FAKECLONE_stop_0 c7auto2" -> "FAKECLONE_start_0 " [ style = dashed] ++"FAKECLONE_stop_0 c7auto2" -> "all_stopped" [ style = bold] ++"FAKECLONE_stop_0 c7auto2" [ style=bold color="green" fontcolor="black"] ++"FAKECLONE_stop_0 c7auto3" -> "FAKECLONE-clone_stopped_0" [ style = bold] ++"FAKECLONE_stop_0 c7auto3" -> "FAKECLONE_start_0 " [ style = dashed] ++"FAKECLONE_stop_0 c7auto3" -> "all_stopped" [ style = bold] ++"FAKECLONE_stop_0 c7auto3" [ style=bold color="green" fontcolor="black"] ++"all_stopped" [ style=bold color="green" fontcolor="orange"] ++} +diff --git a/pengine/test10/cloned_stop_two.exp b/pengine/test10/cloned_stop_two.exp +new file mode 100644 +index 0000000..4aa0e58 +--- /dev/null ++++ b/pengine/test10/cloned_stop_two.exp +@@ -0,0 +1,155 @@ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ +diff --git a/pengine/test10/cloned_stop_two.scores b/pengine/test10/cloned_stop_two.scores +new file mode 100644 +index 0000000..f6e9779 +--- /dev/null ++++ b/pengine/test10/cloned_stop_two.scores +@@ -0,0 +1,77 @@ ++Allocation scores: ++clone_color: FAKECLONE-clone allocation score on c7auto1: 0 ++clone_color: FAKECLONE-clone allocation score on c7auto2: 0 ++clone_color: FAKECLONE-clone allocation score on c7auto3: 0 ++clone_color: FAKECLONE-clone allocation score on c7auto4: -INFINITY ++clone_color: FAKECLONE2-clone allocation score on c7auto1: -INFINITY ++clone_color: FAKECLONE2-clone allocation score on c7auto2: -INFINITY ++clone_color: FAKECLONE2-clone allocation score on c7auto3: 0 ++clone_color: FAKECLONE2-clone allocation score on c7auto4: 0 ++clone_color: FAKECLONE2:0 allocation score on c7auto1: -INFINITY ++clone_color: FAKECLONE2:0 allocation score on c7auto2: -INFINITY ++clone_color: FAKECLONE2:0 allocation score on c7auto3: 1 ++clone_color: FAKECLONE2:0 allocation score on c7auto4: 0 ++clone_color: FAKECLONE2:1 allocation score on c7auto1: -INFINITY ++clone_color: FAKECLONE2:1 allocation score on c7auto2: -INFINITY ++clone_color: FAKECLONE2:1 allocation score on c7auto3: 0 ++clone_color: FAKECLONE2:1 allocation score on c7auto4: 1 ++clone_color: FAKECLONE2:2 allocation score on c7auto1: -INFINITY ++clone_color: FAKECLONE2:2 allocation score on c7auto2: -INFINITY ++clone_color: FAKECLONE2:2 allocation score on c7auto3: 0 ++clone_color: FAKECLONE2:2 allocation score on c7auto4: 0 ++clone_color: FAKECLONE2:3 allocation score on c7auto1: -INFINITY ++clone_color: FAKECLONE2:3 allocation score on c7auto2: -INFINITY ++clone_color: FAKECLONE2:3 allocation score on c7auto3: 0 ++clone_color: FAKECLONE2:3 allocation score on c7auto4: 0 ++clone_color: FAKECLONE:0 allocation score on c7auto1: 1 ++clone_color: FAKECLONE:0 allocation score on c7auto2: 0 ++clone_color: FAKECLONE:0 allocation score on c7auto3: 0 ++clone_color: FAKECLONE:0 allocation score on c7auto4: -INFINITY ++clone_color: FAKECLONE:1 allocation score on c7auto1: 0 ++clone_color: FAKECLONE:1 allocation score on c7auto2: 1 ++clone_color: FAKECLONE:1 allocation score on c7auto3: 0 ++clone_color: FAKECLONE:1 allocation score on c7auto4: -INFINITY ++clone_color: FAKECLONE:2 allocation score on c7auto1: 0 ++clone_color: FAKECLONE:2 allocation score on c7auto2: 0 ++clone_color: FAKECLONE:2 allocation score on c7auto3: 1 ++clone_color: FAKECLONE:2 allocation score on c7auto4: -INFINITY ++clone_color: FAKECLONE:3 allocation score on c7auto1: 0 ++clone_color: FAKECLONE:3 allocation score on c7auto2: 0 ++clone_color: FAKECLONE:3 allocation score on c7auto3: 0 ++clone_color: FAKECLONE:3 allocation score on c7auto4: -INFINITY ++native_color: FAKECLONE2:0 allocation score on c7auto1: -INFINITY ++native_color: FAKECLONE2:0 allocation score on c7auto2: -INFINITY ++native_color: FAKECLONE2:0 allocation score on c7auto3: -INFINITY ++native_color: FAKECLONE2:0 allocation score on c7auto4: -INFINITY ++native_color: FAKECLONE2:1 allocation score on c7auto1: -INFINITY ++native_color: FAKECLONE2:1 allocation score on c7auto2: -INFINITY ++native_color: FAKECLONE2:1 allocation score on c7auto3: -INFINITY ++native_color: FAKECLONE2:1 allocation score on c7auto4: 1 ++native_color: FAKECLONE2:2 allocation score on c7auto1: -INFINITY ++native_color: FAKECLONE2:2 allocation score on c7auto2: -INFINITY ++native_color: FAKECLONE2:2 allocation score on c7auto3: -INFINITY ++native_color: FAKECLONE2:2 allocation score on c7auto4: -INFINITY ++native_color: FAKECLONE2:3 allocation score on c7auto1: -INFINITY ++native_color: FAKECLONE2:3 allocation score on c7auto2: -INFINITY ++native_color: FAKECLONE2:3 allocation score on c7auto3: -INFINITY ++native_color: FAKECLONE2:3 allocation score on c7auto4: -INFINITY ++native_color: FAKECLONE:0 allocation score on c7auto1: 1 ++native_color: FAKECLONE:0 allocation score on c7auto2: -INFINITY ++native_color: FAKECLONE:0 allocation score on c7auto3: -INFINITY ++native_color: FAKECLONE:0 allocation score on c7auto4: -INFINITY ++native_color: FAKECLONE:1 allocation score on c7auto1: -INFINITY ++native_color: FAKECLONE:1 allocation score on c7auto2: -INFINITY ++native_color: FAKECLONE:1 allocation score on c7auto3: -INFINITY ++native_color: FAKECLONE:1 allocation score on c7auto4: -INFINITY ++native_color: FAKECLONE:2 allocation score on c7auto1: -INFINITY ++native_color: FAKECLONE:2 allocation score on c7auto2: -INFINITY ++native_color: FAKECLONE:2 allocation score on c7auto3: -INFINITY ++native_color: FAKECLONE:2 allocation score on c7auto4: -INFINITY ++native_color: FAKECLONE:3 allocation score on c7auto1: -INFINITY ++native_color: FAKECLONE:3 allocation score on c7auto2: -INFINITY ++native_color: FAKECLONE:3 allocation score on c7auto3: -INFINITY ++native_color: FAKECLONE:3 allocation score on c7auto4: -INFINITY ++native_color: shooter allocation score on c7auto1: 0 ++native_color: shooter allocation score on c7auto2: 0 ++native_color: shooter allocation score on c7auto3: 0 ++native_color: shooter allocation score on c7auto4: 0 +diff --git a/pengine/test10/cloned_stop_two.summary b/pengine/test10/cloned_stop_two.summary +new file mode 100644 +index 0000000..531295f +--- /dev/null ++++ b/pengine/test10/cloned_stop_two.summary +@@ -0,0 +1,45 @@ ++ ++Current cluster status: ++Node c7auto2 (2): standby ++Node c7auto3 (3): standby ++Online: [ c7auto1 c7auto4 ] ++ ++ shooter (stonith:fence_phd_kvm): Started c7auto1 ++ Clone Set: FAKECLONE-clone [FAKECLONE] ++ Started: [ c7auto1 c7auto2 c7auto3 ] ++ Stopped: [ c7auto4 ] ++ Clone Set: FAKECLONE2-clone [FAKECLONE2] ++ Started: [ c7auto3 c7auto4 ] ++ Stopped: [ c7auto1 c7auto2 ] ++ ++Transition Summary: ++ * Stop FAKECLONE:1 (c7auto2) ++ * Stop FAKECLONE:2 (c7auto3) ++ * Stop FAKECLONE2:0 (c7auto3) ++ * Stop FAKECLONE2:1 (Started c7auto4) ++ ++Executing cluster transition: ++ * Pseudo action: FAKECLONE2-clone_stop_0 ++ * Resource action: FAKECLONE2 stop on c7auto3 ++ * Resource action: FAKECLONE2 stop on c7auto4 ++ * Pseudo action: FAKECLONE2-clone_stopped_0 ++ * Pseudo action: FAKECLONE-clone_stop_0 ++ * Resource action: FAKECLONE stop on c7auto2 ++ * Resource action: FAKECLONE stop on c7auto3 ++ * Pseudo action: FAKECLONE-clone_stopped_0 ++ * Pseudo action: FAKECLONE-clone_start_0 ++ * Pseudo action: all_stopped ++ * Pseudo action: FAKECLONE-clone_running_0 ++ ++Revised cluster status: ++Node c7auto2 (2): standby ++Node c7auto3 (3): standby ++Online: [ c7auto1 c7auto4 ] ++ ++ shooter (stonith:fence_phd_kvm): Started c7auto1 ++ Clone Set: FAKECLONE-clone [FAKECLONE] ++ Started: [ c7auto1 ] ++ Stopped: [ c7auto2 c7auto3 c7auto4 ] ++ Clone Set: FAKECLONE2-clone [FAKECLONE2] ++ Stopped: [ c7auto1 c7auto2 c7auto3 c7auto4 ] ++ +diff --git a/pengine/test10/cloned_stop_two.xml b/pengine/test10/cloned_stop_two.xml +new file mode 100644 +index 0000000..220dfc2 +--- /dev/null ++++ b/pengine/test10/cloned_stop_two.xml +@@ -0,0 +1,157 @@ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ +diff --git a/pengine/test10/stonith-1.dot b/pengine/test10/stonith-1.dot +index d64edcf..3814ac9 100644 +--- a/pengine/test10/stonith-1.dot ++++ b/pengine/test10/stonith-1.dot +@@ -88,8 +88,6 @@ digraph "g" { + "rsc_sles-3_stop_0 sles-3" -> "rsc_sles-3_start_0 sles-4" [ style = bold] + "rsc_sles-3_stop_0 sles-3" [ style=bold color="green" fontcolor="orange" ] + "rsc_sles-4_monitor_5000 sles-4" [ style=bold color="green" fontcolor="black" ] +-"stonith 'reboot' sles-3" -> "DoFencing_stop_0" [ style = bold] +-"stonith 'reboot' sles-3" -> "child_DoFencing:2_stop_0 sles-3" [ style = bold] + "stonith 'reboot' sles-3" -> "master_rsc_1_stop_0" [ style = bold] + "stonith 'reboot' sles-3" -> "migrator_stop_0 sles-3" [ style = bold] + "stonith 'reboot' sles-3" -> "ocf_msdummy:2_stop_0 sles-3" [ style = bold] +diff --git a/pengine/test10/stonith-1.exp b/pengine/test10/stonith-1.exp +index 4d58afa..40b22cb 100644 +--- a/pengine/test10/stonith-1.exp ++++ b/pengine/test10/stonith-1.exp +@@ -210,9 +210,6 @@ + + + +- +- +- + + + +@@ -236,11 +233,7 @@ + + + +- +- +- +- +- ++ + + + +diff --git a/pengine/test10/stonith-1.summary b/pengine/test10/stonith-1.summary +index ef904fe..e99bb5e 100644 +--- a/pengine/test10/stonith-1.summary ++++ b/pengine/test10/stonith-1.summary +@@ -45,20 +45,22 @@ Executing cluster transition: + * Resource action: lsb_dummy monitor=5000 on sles-2 + * Resource action: rsc_sles-2 monitor=5000 on sles-2 + * Resource action: rsc_sles-4 monitor=5000 on sles-4 ++ * Pseudo action: DoFencing_stop_0 + * Fencing sles-3 (reboot) + * Pseudo action: stonith_complete + * Resource action: r192.168.100.183 start on sles-1 + * Pseudo action: migrator_stop_0 + * Pseudo action: rsc_sles-3_stop_0 +- * Pseudo action: DoFencing_stop_0 ++ * Pseudo action: child_DoFencing:2_stop_0 ++ * Pseudo action: DoFencing_stopped_0 ++ * Pseudo action: DoFencing_start_0 + * Pseudo action: master_rsc_1_stop_0 + * Pseudo action: group-1_running_0 + * Resource action: r192.168.100.183 monitor=5000 on sles-1 + * Resource action: migrator start on sles-4 + * Resource action: rsc_sles-3 start on sles-4 +- * Pseudo action: child_DoFencing:2_stop_0 +- * Pseudo action: DoFencing_stopped_0 +- * Pseudo action: DoFencing_start_0 ++ * Resource action: child_DoFencing:2 start on sles-4 ++ * Pseudo action: DoFencing_running_0 + * Pseudo action: ocf_msdummy:2_stop_0 + * Pseudo action: ocf_msdummy:5_stop_0 + * Pseudo action: master_rsc_1_stopped_0 +@@ -66,8 +68,7 @@ Executing cluster transition: + * Pseudo action: all_stopped + * Resource action: migrator monitor=10000 on sles-4 + * Resource action: rsc_sles-3 monitor=5000 on sles-4 +- * Resource action: child_DoFencing:2 start on sles-4 +- * Pseudo action: DoFencing_running_0 ++ * Resource action: child_DoFencing:2 monitor=60000 on sles-4 + * Resource action: ocf_msdummy:0 start on sles-4 + * Resource action: ocf_msdummy:1 start on sles-1 + * Resource action: ocf_msdummy:2 start on sles-2 +@@ -75,7 +76,6 @@ Executing cluster transition: + * Resource action: ocf_msdummy:4 start on sles-1 + * Resource action: ocf_msdummy:5 start on sles-2 + * Pseudo action: master_rsc_1_running_0 +- * Resource action: child_DoFencing:2 monitor=60000 on sles-4 + * Resource action: ocf_msdummy:0 monitor=5000 on sles-4 + * Resource action: ocf_msdummy:1 monitor=5000 on sles-1 + * Resource action: ocf_msdummy:2 monitor=5000 on sles-2 +diff --git a/pengine/test10/ticket-master-21.dot b/pengine/test10/ticket-master-21.dot +index 60386a8..3f94948 100644 +--- a/pengine/test10/ticket-master-21.dot ++++ b/pengine/test10/ticket-master-21.dot +@@ -23,7 +23,6 @@ digraph "g" { + "stonith 'reboot' node1" -> "ms1_stop_0" [ style = bold] + "stonith 'reboot' node1" -> "rsc1:1_demote_0 node1" [ style = bold] + "stonith 'reboot' node1" -> "rsc1:1_stop_0 node1" [ style = bold] +-"stonith 'reboot' node1" -> "rsc_stonith_stop_0 node1" [ style = bold] + "stonith 'reboot' node1" -> "stonith_complete" [ style = bold] + "stonith 'reboot' node1" [ style=bold color="green" fontcolor="black"] + "stonith_complete" -> "all_stopped" [ style = bold] +diff --git a/pengine/test10/ticket-master-21.exp b/pengine/test10/ticket-master-21.exp +index cc8df2f..c32bac5 100644 +--- a/pengine/test10/ticket-master-21.exp ++++ b/pengine/test10/ticket-master-21.exp +@@ -18,11 +18,7 @@ + + + +- +- +- +- +- ++ + + + +diff --git a/pengine/test10/ticket-master-21.summary b/pengine/test10/ticket-master-21.summary +index 64a9cbe..b228696 100644 +--- a/pengine/test10/ticket-master-21.summary ++++ b/pengine/test10/ticket-master-21.summary +@@ -12,14 +12,14 @@ Transition Summary: + * Demote rsc1:0 (Master -> Stopped node1) + + Executing cluster transition: ++ * Pseudo action: rsc_stonith_stop_0 + * Pseudo action: ms1_demote_0 + * Fencing node1 (reboot) + * Pseudo action: stonith_complete +- * Pseudo action: rsc_stonith_stop_0 ++ * Resource action: rsc_stonith start on node2 + * Pseudo action: rsc1:1_demote_0 + * Pseudo action: ms1_demoted_0 + * Pseudo action: ms1_stop_0 +- * Resource action: rsc_stonith start on node2 + * Pseudo action: rsc1:1_stop_0 + * Pseudo action: ms1_stopped_0 + * Pseudo action: all_stopped +diff --git a/pengine/test10/ticket-master-9.dot b/pengine/test10/ticket-master-9.dot +index 3a29836..c648feb 100644 +--- a/pengine/test10/ticket-master-9.dot ++++ b/pengine/test10/ticket-master-9.dot +@@ -23,7 +23,6 @@ digraph "g" { + "stonith 'reboot' node1" -> "ms1_stop_0" [ style = bold] + "stonith 'reboot' node1" -> "rsc1:1_demote_0 node1" [ style = bold] + "stonith 'reboot' node1" -> "rsc1:1_stop_0 node1" [ style = bold] +-"stonith 'reboot' node1" -> "rsc_stonith_stop_0 node1" [ style = bold] + "stonith 'reboot' node1" -> "stonith_complete" [ style = bold] + "stonith 'reboot' node1" [ style=bold color="green" fontcolor="black"] + "stonith_complete" -> "all_stopped" [ style = bold] +diff --git a/pengine/test10/ticket-master-9.exp b/pengine/test10/ticket-master-9.exp +index cc8df2f..c32bac5 100644 +--- a/pengine/test10/ticket-master-9.exp ++++ b/pengine/test10/ticket-master-9.exp +@@ -18,11 +18,7 @@ + + + +- +- +- +- +- ++ + + + +diff --git a/pengine/test10/ticket-master-9.summary b/pengine/test10/ticket-master-9.summary +index 64a9cbe..b228696 100644 +--- a/pengine/test10/ticket-master-9.summary ++++ b/pengine/test10/ticket-master-9.summary +@@ -12,14 +12,14 @@ Transition Summary: + * Demote rsc1:0 (Master -> Stopped node1) + + Executing cluster transition: ++ * Pseudo action: rsc_stonith_stop_0 + * Pseudo action: ms1_demote_0 + * Fencing node1 (reboot) + * Pseudo action: stonith_complete +- * Pseudo action: rsc_stonith_stop_0 ++ * Resource action: rsc_stonith start on node2 + * Pseudo action: rsc1:1_demote_0 + * Pseudo action: ms1_demoted_0 + * Pseudo action: ms1_stop_0 +- * Resource action: rsc_stonith start on node2 + * Pseudo action: rsc1:1_stop_0 + * Pseudo action: ms1_stopped_0 + * Pseudo action: all_stopped +diff --git a/pengine/test10/whitebox-imply-stop-on-fence.dot b/pengine/test10/whitebox-imply-stop-on-fence.dot +index 66700b8..b3fd40b 100644 +--- a/pengine/test10/whitebox-imply-stop-on-fence.dot ++++ b/pengine/test10/whitebox-imply-stop-on-fence.dot +@@ -69,7 +69,6 @@ + "stonith 'reboot' kiff-01" -> "clvmd_stop_0 kiff-01" [ style = bold] + "stonith 'reboot' kiff-01" -> "dlm-clone_stop_0" [ style = bold] + "stonith 'reboot' kiff-01" -> "dlm_stop_0 kiff-01" [ style = bold] +-"stonith 'reboot' kiff-01" -> "fence-kiff-02_stop_0 kiff-01" [ style = bold] + "stonith 'reboot' kiff-01" -> "lxc-01_kiff-01_stop_0 kiff-01" [ style = bold] + "stonith 'reboot' kiff-01" -> "lxc-02_kiff-01_stop_0 kiff-01" [ style = bold] + "stonith 'reboot' kiff-01" -> "shared0-clone_stop_0" [ style = bold] +diff --git a/pengine/test10/whitebox-imply-stop-on-fence.exp b/pengine/test10/whitebox-imply-stop-on-fence.exp +index d13c25f..4a3e757 100644 +--- a/pengine/test10/whitebox-imply-stop-on-fence.exp ++++ b/pengine/test10/whitebox-imply-stop-on-fence.exp +@@ -31,11 +31,7 @@ + + + +- +- +- +- +- ++ + + + +diff --git a/pengine/test10/whitebox-imply-stop-on-fence.summary b/pengine/test10/whitebox-imply-stop-on-fence.summary +index 3bb1572..3ee9570 100644 +--- a/pengine/test10/whitebox-imply-stop-on-fence.summary ++++ b/pengine/test10/whitebox-imply-stop-on-fence.summary +@@ -36,16 +36,16 @@ Transition Summary: + * Move lxc-02_kiff-01 (Started kiff-01 -> kiff-02) + + Executing cluster transition: ++ * Pseudo action: fence-kiff-02_stop_0 + * Fencing kiff-01 (reboot) + * Pseudo action: stonith_complete +- * Pseudo action: fence-kiff-02_stop_0 ++ * Resource action: fence-kiff-02 start on kiff-02 + * Pseudo action: vm-fs_stop_0 + * Pseudo action: lxc-01_kiff-01_stop_0 + * Pseudo action: lxc-02_kiff-01_stop_0 +- * Resource action: fence-kiff-02 start on kiff-02 ++ * Resource action: fence-kiff-02 monitor=60000 on kiff-02 + * Pseudo action: R-lxc-01_kiff-01_stop_0 + * Pseudo action: R-lxc-02_kiff-01_stop_0 +- * Resource action: fence-kiff-02 monitor=60000 on kiff-02 + * Pseudo action: shared0-clone_stop_0 + * Resource action: R-lxc-01_kiff-01 start on kiff-02 + * Resource action: R-lxc-02_kiff-01 start on kiff-02 diff --git a/pacemaker-systemd.patch b/pacemaker-systemd.patch deleted file mode 100644 index 6398fc0..0000000 --- a/pacemaker-systemd.patch +++ /dev/null @@ -1,665 +0,0 @@ -diff --git a/include/crm/common/logging.h b/include/crm/common/logging.h -index 5b8e47f..642fa92 100644 ---- a/include/crm/common/logging.h -+++ b/include/crm/common/logging.h -@@ -157,7 +157,7 @@ unsigned int get_crm_log_level(void); - } \ - } while(0) - --# define do_crm_log_always(level, fmt, args...) qb_log(level, "%s: " fmt, __FUNCTION__ , ##args) -+# define do_crm_log_always(level, fmt, args...) qb_log(level, fmt , ##args) - - # define crm_perror(level, fmt, args...) do { \ - const char *err = strerror(errno); \ -diff --git a/include/crm/services.h b/include/crm/services.h -index 5310709..1a02656 100644 ---- a/include/crm/services.h -+++ b/include/crm/services.h -@@ -262,8 +262,9 @@ enum nagios_exitcode { - */ - svc_action_t *services_action_create_generic(const char *exec, const char *args[]); - -- void -- services_action_free(svc_action_t * op); -+ void services_action_cleanup(svc_action_t * op); -+ -+ void services_action_free(svc_action_t * op); - - gboolean services_action_sync(svc_action_t * op); - -diff --git a/lib/cib/cib_utils.c b/lib/cib/cib_utils.c -index 15b354b..ea37c4b 100644 ---- a/lib/cib/cib_utils.c -+++ b/lib/cib/cib_utils.c -@@ -112,8 +112,13 @@ get_cib_copy(cib_t * cib) - { - xmlNode *xml_cib; - int options = cib_scope_local | cib_sync_call; -- int rc = cib->cmds->query(cib, NULL, &xml_cib, options); -+ int rc = pcmk_ok; -+ -+ if (cib->state == cib_disconnected) { -+ return NULL; -+ } - -+ rc = cib->cmds->query(cib, NULL, &xml_cib, options); - if (rc == -EACCES) { - return NULL; - -diff --git a/lib/services/dbus.c b/lib/services/dbus.c -index c0153b5..f44b590 100644 ---- a/lib/services/dbus.c -+++ b/lib/services/dbus.c -@@ -145,7 +145,7 @@ DBusMessage *pcmk_dbus_send_recv(DBusMessage *msg, DBusConnection *connection, D - return reply; - } - --bool pcmk_dbus_send(DBusMessage *msg, DBusConnection *connection, -+DBusPendingCall* pcmk_dbus_send(DBusMessage *msg, DBusConnection *connection, - void(*done)(DBusPendingCall *pending, void *user_data), void *user_data) - { - DBusError error; -@@ -161,27 +161,30 @@ bool pcmk_dbus_send(DBusMessage *msg, DBusConnection *connection, - // send message and get a handle for a reply - if (!dbus_connection_send_with_reply (connection, msg, &pending, -1/* aka. DBUS_TIMEOUT_USE_DEFAULT */)) { // -1 is default timeout - crm_err("Send with reply failed for %s", method); -- return FALSE; -+ return NULL; - - } else if (pending == NULL) { - crm_err("No pending call found for %s", method); -- return FALSE; -- -+ return NULL; - } - -+ crm_trace("DBus %s call sent", method); - if (dbus_pending_call_get_completed(pending)) { -- crm_info("DBus %s call completed too soon"); --#if 1 -+ crm_info("DBus %s call completed too soon", method); -+ if(done) { -+#if 0 - /* This sounds like a good idea, but allegedly it breaks things */ - done(pending, user_data); -+ pending = NULL; - #else - CRM_ASSERT(dbus_pending_call_set_notify(pending, done, user_data, NULL)); - #endif -+ } - -- } else { -+ } else if(done) { - CRM_ASSERT(dbus_pending_call_set_notify(pending, done, user_data, NULL)); - } -- return TRUE; -+ return pending; - } - - bool pcmk_dbus_type_check(DBusMessage *msg, DBusMessageIter *field, int expected, const char *function, int line) -@@ -286,6 +289,11 @@ pcmk_dbus_lookup_result(DBusMessage *reply, struct db_getall_data *data) - dbus_message_iter_next (&dict); - } - -+ if(data->name && data->callback) { -+ crm_trace("No value for property %s[%s]", data->object, data->name); -+ data->callback(data->name, NULL, data->userdata); -+ } -+ - cleanup: - free(data->target); - free(data->object); -@@ -306,6 +314,9 @@ pcmk_dbus_lookup_cb(DBusPendingCall *pending, void *user_data) - - pcmk_dbus_lookup_result(reply, user_data); - -+ if(pending) { -+ dbus_pending_call_unref(pending); -+ } - if(reply) { - dbus_message_unref(reply); - } -@@ -375,20 +386,59 @@ static void pcmk_dbus_connection_dispatch(DBusConnection *connection, DBusDispat - crm_trace("status %d for %p", new_status, data); - if (new_status == DBUS_DISPATCH_DATA_REMAINS){ - dbus_connection_dispatch(connection); -+ -+ while (dbus_connection_get_dispatch_status(connection) == DBUS_DISPATCH_DATA_REMAINS) { -+ dbus_connection_dispatch(connection); -+ } - } - } - -+/* Copied from dbus-watch.c */ -+ -+static const char* -+dbus_watch_flags_to_string (int flags) -+{ -+ const char *watch_type; -+ -+ if ((flags & DBUS_WATCH_READABLE) && -+ (flags & DBUS_WATCH_WRITABLE)) -+ watch_type = "readwrite"; -+ else if (flags & DBUS_WATCH_READABLE) -+ watch_type = "read"; -+ else if (flags & DBUS_WATCH_WRITABLE) -+ watch_type = "write"; -+ else -+ watch_type = "not read or write"; -+ return watch_type; -+} -+ - static int - pcmk_dbus_watch_dispatch(gpointer userdata) - { -+ bool oom = FALSE; - DBusWatch *watch = userdata; - int flags = dbus_watch_get_flags(watch); -+ bool enabled = dbus_watch_get_enabled (watch); -+ mainloop_io_t *client = dbus_watch_get_data(watch); - -- crm_trace("Dispatching %p with flags %d", watch, flags); -- if(flags & DBUS_WATCH_READABLE) { -- dbus_watch_handle(watch, DBUS_WATCH_READABLE); -- } else { -- dbus_watch_handle(watch, DBUS_WATCH_ERROR); -+ crm_trace("Dispatching client %p: %s", client, dbus_watch_flags_to_string(flags)); -+ if (enabled && is_set(flags, DBUS_WATCH_READABLE)) { -+ oom = !dbus_watch_handle(watch, flags); -+ -+ } else if (enabled && is_set(flags, DBUS_WATCH_READABLE)) { -+ oom = !dbus_watch_handle(watch, flags); -+ -+ } else if(enabled) { -+ oom = !dbus_watch_handle(watch, DBUS_WATCH_ERROR); -+ } -+ -+ if(flags != dbus_watch_get_flags(watch)) { -+ flags = dbus_watch_get_flags(watch); -+ crm_trace("Dispatched client %p: %s (%d)", client, dbus_watch_flags_to_string(flags), flags); -+ } -+ -+ if(oom) { -+ crm_err("DBus encountered OOM while attempting to dispatch %p (%s)", client, dbus_watch_flags_to_string(flags)); - } - return 0; - } -@@ -396,7 +446,8 @@ pcmk_dbus_watch_dispatch(gpointer userdata) - static void - pcmk_dbus_watch_destroy(gpointer userdata) - { -- crm_trace("Destroyed %p", userdata); -+ mainloop_io_t *client = dbus_watch_get_data(userdata); -+ crm_trace("Destroyed %p", client); - } - - -@@ -412,7 +463,7 @@ pcmk_dbus_watch_add(DBusWatch *watch, void *data){ - mainloop_io_t *client = mainloop_add_fd( - "dbus", G_PRIORITY_DEFAULT, fd, watch, &pcmk_dbus_cb); - -- crm_trace("Added %p with fd=%d", watch, fd); -+ crm_trace("Added watch %p with fd=%d to client %p", watch, fd, client); - dbus_watch_set_data(watch, client, NULL); - return TRUE; - } -@@ -429,14 +480,14 @@ static void - pcmk_dbus_watch_remove(DBusWatch *watch, void *data){ - mainloop_io_t *client = dbus_watch_get_data(watch); - -- crm_trace("Removed %p", watch); -+ crm_trace("Removed client %p (%p)", client, data); - mainloop_del_fd(client); - } - - static gboolean - pcmk_dbus_timeout_dispatch(gpointer data) - { -- crm_trace("Timeout for %p"); -+ crm_info("Timeout %p expired", data); - dbus_timeout_handle(data); - return FALSE; - } -@@ -445,6 +496,8 @@ static dbus_bool_t - pcmk_dbus_timeout_add(DBusTimeout *timeout, void *data){ - guint id = g_timeout_add(dbus_timeout_get_interval(timeout), pcmk_dbus_timeout_dispatch, timeout); - -+ crm_trace("Adding timeout %p (%ld)", timeout, dbus_timeout_get_interval(timeout)); -+ - if(id) { - dbus_timeout_set_data(timeout, GUINT_TO_POINTER(id), NULL); - } -@@ -456,6 +509,8 @@ pcmk_dbus_timeout_remove(DBusTimeout *timeout, void *data){ - void *vid = dbus_timeout_get_data(timeout); - guint id = GPOINTER_TO_UINT(vid); - -+ crm_trace("Removing timeout %p (%p)", timeout, data); -+ - if(id) { - g_source_remove(id); - dbus_timeout_set_data(timeout, 0, NULL); -@@ -464,7 +519,11 @@ pcmk_dbus_timeout_remove(DBusTimeout *timeout, void *data){ - - static void - pcmk_dbus_timeout_toggle(DBusTimeout *timeout, void *data){ -- if(dbus_timeout_get_enabled(timeout)) { -+ bool enabled = dbus_timeout_get_enabled(timeout); -+ -+ crm_trace("Toggling timeout for %p to %s", timeout, enabled?"off":"on"); -+ -+ if(enabled) { - pcmk_dbus_timeout_add(timeout, data); - } else { - pcmk_dbus_timeout_remove(timeout, data); -diff --git a/lib/services/pcmk-dbus.h b/lib/services/pcmk-dbus.h -index ed80c5f..468020e 100644 ---- a/lib/services/pcmk-dbus.h -+++ b/lib/services/pcmk-dbus.h -@@ -2,7 +2,7 @@ DBusConnection *pcmk_dbus_connect(void); - void pcmk_dbus_connection_setup_with_select(DBusConnection *c); - void pcmk_dbus_disconnect(DBusConnection *connection); - --bool pcmk_dbus_send(DBusMessage *msg, DBusConnection *connection, -+DBusPendingCall *pcmk_dbus_send(DBusMessage *msg, DBusConnection *connection, - void(*done)(DBusPendingCall *pending, void *user_data), void *user_data); - DBusMessage *pcmk_dbus_send_recv(DBusMessage *msg, DBusConnection *connection, DBusError *error); - bool pcmk_dbus_type_check(DBusMessage *msg, DBusMessageIter *field, int expected, const char *function, int line); -diff --git a/lib/services/services.c b/lib/services/services.c -index 9936c72..582fbe1 100644 ---- a/lib/services/services.c -+++ b/lib/services/services.c -@@ -303,18 +303,24 @@ services_action_create_generic(const char *exec, const char *args[]) - } - - void --services_action_free(svc_action_t * op) -+services_action_cleanup(svc_action_t * op) - { -- unsigned int i; -- -- if (op == NULL) { -- return; -+ if(op->opaque->timerid != 0) { -+ crm_trace("Removing timer for call %s to %s", op->action, op->rsc); -+ g_source_remove(op->opaque->timerid); -+ op->opaque->timerid = 0; - } - -- if (op->opaque->repeat_timer) { -- g_source_remove(op->opaque->repeat_timer); -- op->opaque->repeat_timer = 0; -+ if(op->opaque->pending) { -+ crm_trace("Cleaning up pending dbus call %p %s for %s", op->opaque->pending, op->action, op->rsc); -+ if(dbus_pending_call_get_completed(op->opaque->pending)) { -+ crm_warn("Pending dbus call %s for %s did not complete", op->action, op->rsc); -+ } -+ dbus_pending_call_cancel(op->opaque->pending); -+ dbus_pending_call_unref(op->opaque->pending); -+ op->opaque->pending = NULL; - } -+ - if (op->opaque->stderr_gsource) { - mainloop_del_fd(op->opaque->stderr_gsource); - op->opaque->stderr_gsource = NULL; -@@ -324,6 +330,23 @@ services_action_free(svc_action_t * op) - mainloop_del_fd(op->opaque->stdout_gsource); - op->opaque->stdout_gsource = NULL; - } -+} -+ -+void -+services_action_free(svc_action_t * op) -+{ -+ unsigned int i; -+ -+ if (op == NULL) { -+ return; -+ } -+ -+ services_action_cleanup(op); -+ -+ if (op->opaque->repeat_timer) { -+ g_source_remove(op->opaque->repeat_timer); -+ op->opaque->repeat_timer = 0; -+ } - - free(op->id); - free(op->opaque->exec); -diff --git a/lib/services/services_linux.c b/lib/services/services_linux.c -index 2279e4e..8d6f450 100644 ---- a/lib/services/services_linux.c -+++ b/lib/services/services_linux.c -@@ -264,6 +264,8 @@ operation_finalize(svc_action_t * op) - services_action_free(op); - return TRUE; - } -+ -+ services_action_cleanup(op); - return FALSE; - } - -diff --git a/lib/services/services_private.h b/lib/services/services_private.h -index dd759e3..bcf882c 100644 ---- a/lib/services/services_private.h -+++ b/lib/services/services_private.h -@@ -19,6 +19,10 @@ - #ifndef __MH_SERVICES_PRIVATE_H__ - # define __MH_SERVICES_PRIVATE_H__ - -+#if SUPPORT_DBUS -+# include -+#endif -+ - struct svc_action_private_s { - char *exec; - char *args[255]; -@@ -31,6 +35,10 @@ struct svc_action_private_s { - - int stdout_fd; - mainloop_io_t *stdout_gsource; -+#if SUPPORT_DBUS -+ DBusPendingCall* pending; -+ unsigned timerid; -+#endif - }; - - GList *services_os_get_directory_list(const char *root, gboolean files, gboolean executable); -diff --git a/lib/services/systemd.c b/lib/services/systemd.c -index 9a7b078..51ade44 100644 ---- a/lib/services/systemd.c -+++ b/lib/services/systemd.c -@@ -110,20 +110,48 @@ systemd_service_name(const char *name) - return g_strdup_printf("%s.service", name); - } - --static bool --systemd_daemon_reload(void) -+static void -+systemd_daemon_reload_complete(DBusPendingCall *pending, void *user_data) - { -- /* TODO: Make this asynchronous */ -- const char *method = "Reload"; -+ DBusError error; - DBusMessage *reply = NULL; -- DBusMessage *msg = systemd_new_method(BUS_NAME".Manager", method); -+ unsigned int reload_count = GPOINTER_TO_UINT(user_data); - -- CRM_ASSERT(msg != NULL); -- reply = pcmk_dbus_send_recv(msg, systemd_proxy, NULL); -- dbus_message_unref(msg); -+ dbus_error_init(&error); -+ if(pending) { -+ reply = dbus_pending_call_steal_reply(pending); -+ } -+ -+ if(pcmk_dbus_find_error("Reload", pending, reply, &error)) { -+ crm_err("Could not issue systemd reload %d: %s", reload_count, error.message); -+ -+ } else { -+ crm_trace("Reload %d complete", reload_count); -+ } -+ -+ if(pending) { -+ dbus_pending_call_unref(pending); -+ } - if(reply) { - dbus_message_unref(reply); - } -+} -+ -+static bool -+systemd_daemon_reload(void) -+{ -+ static unsigned int reload_count = 0; -+ const char *method = "Reload"; -+ -+ -+ reload_count++; -+ if(reload_count % 10 == 0) { -+ DBusMessage *msg = systemd_new_method(BUS_NAME".Manager", method); -+ -+ CRM_ASSERT(msg != NULL); -+ pcmk_dbus_send(msg, systemd_proxy, systemd_daemon_reload_complete, GUINT_TO_POINTER(reload_count)); -+ dbus_message_unref(msg); -+ } - return TRUE; - } - -@@ -155,13 +183,22 @@ static void - systemd_loadunit_cb(DBusPendingCall *pending, void *user_data) - { - DBusMessage *reply = NULL; -+ svc_action_t * op = user_data; - - if(pending) { - reply = dbus_pending_call_steal_reply(pending); - } - -+ if(op) { -+ crm_trace("Got result: %p for %p for %s, %s", reply, pending, op->rsc, op->action); -+ } else { -+ crm_trace("Got result: %p for %p", reply, pending); -+ } - systemd_loadunit_result(reply, user_data); - -+ if(pending) { -+ dbus_pending_call_unref(pending); -+ } - if(reply) { - dbus_message_unref(reply); - } -@@ -213,6 +250,7 @@ systemd_unit_by_name(const gchar * arg_name, svc_action_t *op) - } - - pcmk_dbus_send(msg, systemd_proxy, systemd_loadunit_cb, op); -+ dbus_message_unref(msg); - return NULL; - } - -@@ -421,6 +459,12 @@ systemd_async_dispatch(DBusPendingCall *pending, void *user_data) - reply = dbus_pending_call_steal_reply(pending); - } - -+ if(op) { -+ crm_trace("Got result: %p for %p for %s, %s", reply, pending, op->rsc, op->action); -+ } else { -+ crm_trace("Got result: %p for %p", reply, pending); -+ } -+ op->opaque->pending = NULL; - systemd_exec_result(reply, op); - - if(pending) { -@@ -437,10 +481,13 @@ static void - systemd_unit_check(const char *name, const char *state, void *userdata) - { - svc_action_t * op = userdata; -- -- CRM_ASSERT(state != NULL); - -- if (g_strcmp0(state, "active") == 0) { -+ crm_trace("Resource %s has %s='%s'", op->rsc, name, state); -+ -+ if(state == NULL) { -+ op->rc = PCMK_OCF_NOT_RUNNING; -+ -+ } else if (g_strcmp0(state, "active") == 0) { - op->rc = PCMK_OCF_OK; - } else if (g_strcmp0(state, "activating") == 0) { - op->rc = PCMK_OCF_PENDING; -@@ -449,6 +496,7 @@ systemd_unit_check(const char *name, const char *state, void *userdata) - } - - if (op->synchronous == FALSE) { -+ op->opaque->pending = NULL; - operation_finalize(op); - } - } -@@ -539,28 +587,29 @@ systemd_unit_exec_with_unit(svc_action_t * op, const char *unit) - } - - if (op->synchronous == FALSE) { -- return pcmk_dbus_send(msg, systemd_proxy, systemd_async_dispatch, op); -+ DBusPendingCall* pending = pcmk_dbus_send(msg, systemd_proxy, systemd_async_dispatch, op); -+ -+ dbus_message_unref(msg); -+ if(pending) { -+ dbus_pending_call_ref(pending); -+ op->opaque->pending = pending; -+ return TRUE; -+ } -+ return FALSE; - - } else { - DBusError error; - - reply = pcmk_dbus_send_recv(msg, systemd_proxy, &error); -+ dbus_message_unref(msg); - systemd_exec_result(reply, op); - - if(reply) { - dbus_message_unref(reply); - } -- if(msg) { -- dbus_message_unref(msg); -- } -- - return FALSE; - } - -- if(msg) { -- dbus_message_unref(msg); -- } -- - cleanup: - if (op->synchronous == FALSE) { - operation_finalize(op); -@@ -570,6 +619,18 @@ systemd_unit_exec_with_unit(svc_action_t * op, const char *unit) - return op->rc == PCMK_OCF_OK; - } - -+static gboolean -+systemd_timeout_callback(gpointer p) -+{ -+ svc_action_t * op = p; -+ -+ op->opaque->timerid = 0; -+ crm_warn("%s operation on systemd unit %s named '%s' timed out", op->action, op->agent, op->rsc); -+ operation_finalize(op); -+ -+ return FALSE; -+} -+ - gboolean - systemd_unit_exec(svc_action_t * op) - { -@@ -596,6 +657,7 @@ systemd_unit_exec(svc_action_t * op) - free(unit); - - if (op->synchronous == FALSE) { -+ op->opaque->timerid = g_timeout_add(op->timeout + 5000, systemd_timeout_callback, op); - return TRUE; - } - -diff --git a/lib/services/upstart.c b/lib/services/upstart.c -index 4c7211d..01ff817 100644 ---- a/lib/services/upstart.c -+++ b/lib/services/upstart.c -@@ -513,8 +513,15 @@ upstart_job_exec(svc_action_t * op, gboolean synchronous) - CRM_LOG_ASSERT(dbus_message_append_args(msg, DBUS_TYPE_BOOLEAN, &arg_wait, DBUS_TYPE_INVALID)); - - if (op->synchronous == FALSE) { -+ DBusPendingCall* pending = pcmk_dbus_send(msg, upstart_proxy, upstart_async_dispatch, op); - free(job); -- return pcmk_dbus_send(msg, upstart_proxy, upstart_async_dispatch, op); -+ -+ if(pending) { -+ dbus_pending_call_ref(pending); -+ op->opaque->pending = pending; -+ return TRUE; -+ } -+ return FALSE; - } - - dbus_error_init(&error); -diff --git a/lrmd/regression.py.in b/lrmd/regression.py.in -index a9a32ef..649c984 100755 ---- a/lrmd/regression.py.in -+++ b/lrmd/regression.py.in -@@ -27,12 +27,12 @@ build_dir="@abs_top_builddir@" - test_dir=sys.path[0] - - new_path=os.environ['PATH'] -- - if os.path.exists("%s/regression.py.in" % test_dir): - print "Running tests from the source tree: %s (%s)" % (build_dir, test_dir) - new_path = "%s/lrmd:%s" % (build_dir, new_path) # For lrmd, lrmd_test and pacemaker_remoted - new_path = "%s/tools:%s" % (build_dir, new_path) # For crm_resource - new_path = "%s/fencing:%s" % (build_dir, new_path) # For stonithd -+ - else: - print "Running tests from the install tree: @CRM_DAEMON_DIR@ (not %s)" % test_dir - new_path = "@CRM_DAEMON_DIR@:%s" % (new_path) # For stonithd, lrmd, lrmd_test and pacemaker_remoted -@@ -434,13 +434,15 @@ if __name__ == "__main__": - for ra in [ "Dummy", "Stateful", "ping" ]: - os.system("cp %s/extra/resources/%s @OCF_RA_DIR@/pacemaker/%s" % (build_dir, ra, ra)) - os.system("chmod a+x @OCF_RA_DIR@/pacemaker/%s" % (ra)) -- else: -- # Assume it's installed -- print "Using @datadir@/@PACKAGE@/tests/cts/LSBDummy" -- os.system("cp @datadir@/@PACKAGE@/tests/cts/LSBDummy /etc/init.d/LSBDummy") - -- os.system("chmod a+x /etc/init.d/LSBDummy") -- os.system("ls -al /etc/init.d/LSBDummy") -+ else: -+ # Assume it's installed -+ print "Using @datadir@/@PACKAGE@/tests/cts/LSBDummy" -+ os.system("cp @datadir@/@PACKAGE@/tests/cts/LSBDummy /etc/init.d/LSBDummy") -+ -+ os.system("chmod a+x /etc/init.d/LSBDummy") -+ os.system("ls -al /etc/init.d/LSBDummy") -+ - os.system("mkdir -p @CRM_CORE_DIR@/root") - - if os.path.exists("/bin/systemctl"): -@@ -747,6 +749,33 @@ if __name__ == "__main__": - test.add_cmd("-c unregister_rsc -r \"test_rsc\" "+self.action_timeout+ - "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") - -+ ### stress tests ### -+ def build_stress_tests(self): -+ timeout = "-t 20000" -+ iterations = 25 -+ -+ test = self.new_test("ocf_stress", "Verify systemd dbus connection works under load") -+ for i in range(iterations): -+ test.add_cmd("-c register_rsc -r rsc_%s %s -C ocf -P heartbeat -T Dummy -l \"NEW_EVENT event_type:register rsc_id:rsc_%s action:none rc:ok op_status:complete\"" % (i, timeout, i)) -+ test.add_cmd("-c exec -r rsc_%s -a start %s -l \"NEW_EVENT event_type:exec_complete rsc_id:rsc_%s action:start rc:ok op_status:complete\"" % (i, timeout, i)) -+ test.add_cmd("-c exec -r rsc_%s -a monitor %s -i 1000 -l \"NEW_EVENT event_type:exec_complete rsc_id:rsc_%s action:monitor rc:ok op_status:complete\"" % (i, timeout, i)) -+ for i in range(iterations): -+ test.add_cmd("-c exec -r rsc_%s -a stop %s -l \"NEW_EVENT event_type:exec_complete rsc_id:rsc_%s action:stop rc:ok op_status:complete\"" % (i, timeout, i)) -+ test.add_cmd("-c unregister_rsc -r rsc_%s %s -l \"NEW_EVENT event_type:unregister rsc_id:rsc_%s action:none rc:ok op_status:complete\"" % (i, timeout, i)) -+ -+ -+ if "systemd" in self.rsc_classes: -+ test = self.new_test("systemd_stress", "Verify systemd dbus connection works under load") -+ for i in range(iterations): -+ test.add_cmd("-c register_rsc -r rsc_%s %s -C systemd -T lrmd_dummy_daemon -l \"NEW_EVENT event_type:register rsc_id:rsc_%s action:none rc:ok op_status:complete\"" % (i, timeout, i)) -+ test.add_cmd("-c exec -r rsc_%s -a start %s -l \"NEW_EVENT event_type:exec_complete rsc_id:rsc_%s action:start rc:ok op_status:complete\"" % (i, timeout, i)) -+ test.add_cmd("-c exec -r rsc_%s -a monitor %s -i 1000 -l \"NEW_EVENT event_type:exec_complete rsc_id:rsc_%s action:monitor rc:ok op_status:complete\"" % (i, timeout, i)) -+ -+ for i in range(iterations): -+ test.add_cmd("-c exec -r rsc_%s -a stop %s -l \"NEW_EVENT event_type:exec_complete rsc_id:rsc_%s action:stop rc:ok op_status:complete\"" % (i, timeout, i)) -+ test.add_cmd("-c unregister_rsc -r rsc_%s %s -l \"NEW_EVENT event_type:unregister rsc_id:rsc_%s action:none rc:ok op_status:complete\"" % (i, timeout, i)) -+ -+ - ### These are tests that target specific cases ### - def build_custom_tests(self): - -@@ -1016,6 +1045,7 @@ def main(argv): - tests.build_multi_rsc_tests() - tests.build_negative_tests() - tests.build_custom_tests() -+ tests.build_stress_tests() - - tests.setup_test_environment() - diff --git a/pacemaker.spec b/pacemaker.spec index 6b58786..f0fefd5 100644 --- a/pacemaker.spec +++ b/pacemaker.spec @@ -2,11 +2,14 @@ %global uname hacluster %global pcmk_docdir %{_docdir}/%{name} -%global specversion 2.rev14 -%global commit 6052cd16c2f455809f8088af76ce86483bf98353 +%global specversion 3 +%global commit 44eb2ddf8d4f8fc05256aae2abc9fbf3ae4d1fbc %global shortcommit %(c=%{commit}; echo ${c:0:7}) %global github_owner ClusterLabs +%global nagios_name nagios-agents-metadata +%global nagios_hash 105ab8a7b2c16b9a29cf1c1596b80136eeef332b + # Turn off the auto compilation of python files not in the site-packages directory # Needed so that the -devel package is multilib compliant # py_auto_byte_compile macro: https://bugzilla.redhat.com/574437 @@ -59,7 +62,47 @@ License: GPLv2+ and LGPLv2+ Url: http://www.clusterlabs.org Group: System Environment/Daemons +# eg. https://github.com/ClusterLabs/pacemaker/archive/8ae45302394b039fb098e150f156df29fc0cb576/pacemaker-8ae45302394b039fb098e150f156df29fc0cb576.tar.gz Source0: https://github.com/%{github_owner}/%{name}/archive/%{commit}/%{name}-%{commit}.tar.gz +Source1: https://github.com/%{github_owner}/%{nagios_name}/archive/%{nagios_hash}/%{nagios_name}-%{nagios_hash}.tar.gz +Patch1: pacemaker-63f8e9a-rollup.patch +Patch2: pacemaker-rollup-7-1-3d781d3.patch +Patch3: pacemaker-rollup-3a7715d.patch +Patch4: 0004-Fix-crm_resource-Correctly-check-if-a-resource-is-un.patch +Patch5: 0005-Fix-PE-Bug-cl-5247-Imply-resources-running-on-a-cont.patch +Patch6: 0006-Fix-Date-Correctly-set-time-from-seconds-since-epoch.patch +Patch7: 0007-Test-PE-Bug-cl-5247-Imply-resources-running-on-a-con.patch +Patch8: 0008-Fix-tools-memory-leak-in-crm_resource.patch +Patch9: 0009-Fix-pengine-The-failed-action-of-the-resource-that-o.patch +Patch10: 0010-Log-services-Reduce-severity-of-noisy-log-messages.patch +Patch11: 0011-Fix-xml-Mark-xml-nodes-as-dirty-if-any-children-move.patch +Patch12: 0012-Feature-crmd-Implement-reliable-event-notifications.patch +Patch13: 0013-Fix-cman-Suppress-implied-node-names.patch +Patch14: 0014-Fix-crmd-Choose-more-appropriate-names-for-notificat.patch +Patch15: 0015-Fix-crmd-Correctly-enable-disable-notifications.patch +Patch16: 0016-Fix-crmd-Report-the-completion-status-and-output-of-.patch +Patch17: 0017-Fix-cman-Print-the-nodeid-of-nodes-with-fake-names.patch +Patch18: 0018-Refactor-Tools-Isolate-the-paths-which-truely-requir.patch +Patch19: 0019-Fix-corosync-Display-node-state-and-quorum-data-if-a.patch +Patch20: 0020-Fix-pacemakerd-Do-not-forget-about-nodes-that-leave-.patch +Patch21: 0021-Fix-pacemakerd-Track-node-state-in-pacemakerd.patch +Patch22: 0022-Fix-PE-Resolve-memory-leak.patch +Patch23: 0023-Fix-cman-Purge-all-node-caches-for-crm_node-R.patch +Patch24: 0024-Refactor-membership-Safely-autoreap-nodes-without-co.patch +Patch25: 0025-Fix-crmd-Prevent-segfault-by-correctly-detecting-whe.patch +Patch26: 0026-Fix-crmd-don-t-add-node-ID-to-proxied-remote-node-re.patch +Patch27: 0027-Fix-pacemaker_remote-memory-leak-in-ipc_proxy_dispat.patch +Patch28: 0028-Log-The-package-version-is-more-informative.patch +Patch29: 0029-Fix-crm_resource-Allow-the-resource-configuration-to.patch +Patch30: 0030-Log-lrmd-Improved-logging-when-no-pacemaker-remote-a.patch +Patch31: 0031-Fix-liblrmd-don-t-print-error-if-remote-key-environm.patch +Patch32: 0032-Fix-Tools-Repair-the-logging-of-interesting-command-.patch +Patch33: 0033-Feature-Tools-Do-not-send-command-lines-to-syslog.patch +Patch34: 0034-Log-cibadmin-Default-once-again-to-LOG_CRIT.patch +Patch35: 0035-Fix-crm_resource-Correctly-update-existing-meta-attr.patch +Patch36: 0036-Log-crm_resource-restart-Improved-user-feedback-on-f.patch +Patch37: 0037-Fix-crm_resource-Correctly-delete-existing-meta-attr.patch +Patch38: 0038-Fix-crm_resource-Correctly-observe-force-when-deleti.patch BuildRoot: %(mktemp -ud %{_tmppath}/%{name}-%{version}-%{release}-XXXXXX) AutoReqProv: on @@ -69,6 +112,7 @@ Requires: resource-agents Requires: %{name}-libs = %{version}-%{release} Requires: %{name}-cluster-libs = %{version}-%{release} Requires: %{name}-cli = %{version}-%{release} +Provides: pcmk-cluster-manager %if %{defined systemd_requires} %systemd_requires @@ -169,6 +213,7 @@ Group: System Environment/Daemons Requires: %{name}-libs = %{version}-%{release} Requires: %{name}-cli = %{version}-%{release} Requires: resource-agents +Provides: pcmk-cluster-manager %if %{defined systemd_requires} %systemd_requires %endif @@ -220,8 +265,26 @@ Documentation for Pacemaker. Pacemaker is an advanced, scalable High-Availability cluster resource manager for Corosync, CMAN and/or Linux-HA. +%package nagios-plugins-metadata +License: GPLv2+ and LGPLv2+ +Summary: Pacemaker Nagios Metadata +Group: System Environment/Daemons +# NOTE below are the plugins this metadata uses. +Requires: nagios-plugins-http +Requires: nagios-plugins-ldap +Requires: nagios-plugins-mysql +Requires: nagios-plugins-pgsql +Requires: nagios-plugins-tcp +Requires: pcmk-cluster-manager + +%description nagios-plugins-metadata +The metadata files required for Pacemaker to execute the nagios plugin +monitor resources. + %prep -%autosetup -n %{name}-%{commit} -p1 +%setup -q -a 0 -n %{name}-%{commit} +%setup -q -a 1 -n %{name}-%{commit} +%autopatch -p1 # Force the local time # @@ -240,7 +303,10 @@ docdir=%{pcmk_docdir} %{configure} \ %{?with_coverage: --with-coverage} \ --with-initdir=%{_initrddir} \ --localstatedir=%{_var} \ - --with-version=%{version}-%{release} + --with-version=%{version}-%{release} \ + --with-nagios \ + --with-nagios-metadata-dir=%{_datadir}/pacemaker/nagios/plugins-metadata/ \ + --with-nagios-plugin-dir=%{_libdir}/nagios/plugins/ make %{_smp_mflags} V=1 docdir=%{pcmk_docdir} all @@ -256,6 +322,11 @@ mkdir -p ${RPM_BUILD_ROOT}%{_sysconfdir}/sysconfig mkdir -p ${RPM_BUILD_ROOT}%{_var}/lib/pacemaker/cores install -m 644 mcp/pacemaker.sysconfig ${RPM_BUILD_ROOT}%{_sysconfdir}/sysconfig/pacemaker +mkdir -p %{buildroot}%{_datadir}/pacemaker/nagios/plugins-metadata +for file in $(find %{nagios_name}-%{nagios_hash}/metadata -type f); do + install -m 644 $file %{buildroot}%{_datadir}/pacemaker/nagios/plugins-metadata +done + %if %{with upstart_job} mkdir -p ${RPM_BUILD_ROOT}%{_sysconfdir}/init install -m 644 mcp/pacemaker.upstart ${RPM_BUILD_ROOT}%{_sysconfdir}/init/pacemaker.conf @@ -356,6 +427,7 @@ exit 0 %exclude %{_datadir}/pacemaker/report.common %exclude %{_datadir}/pacemaker/report.collector +%exclude %{_datadir}/pacemaker/nagios/plugins-metadata/* %{_datadir}/pacemaker %{_datadir}/snmp/mibs/PCMK-MIB.txt @@ -502,8 +574,22 @@ exit 0 %license COPYING.LIB %doc AUTHORS +%files nagios-plugins-metadata +%defattr(-,root,root) +%dir %{_datadir}/pacemaker/nagios/plugins-metadata +%attr(0644,root,root) %{_datadir}/pacemaker/nagios/plugins-metadata/* + %changelog -* Thu Aug 20 2015 Andrew Beekhof - 1.1.13-1 +* Wed Oct 14 2015 Jan Pokorný - 1.1.13-3 +- Update to Pacemaker-1.1.13 post-release + patches (sync) +- Add nagios-plugins-metadata subpackage enabling support of selected + Nagios plugins as resources recognized by Pacemaker +- Several specfile improvements: drop irrelevant stuff, rehash the + included/excluded files + dependencies, add check scriptlet, + reflect current packaging practice, do minor cleanups + (mostly adopted from another spec) + +* Thu Aug 20 2015 Andrew Beekhof - 1.1.13-2 - Update for new upstream tarball: Pacemaker-1.1.13 - See included ChangeLog file or https://raw.github.com/ClusterLabs/pacemaker/master/ChangeLog for full details diff --git a/sources b/sources index c226133..13427ea 100644 --- a/sources +++ b/sources @@ -1 +1,2 @@ -43529bc70e86c4a747c135eb9cf8ce8f pacemaker-6052cd16c2f455809f8088af76ce86483bf98353.tar.gz +64bfe54641ba416b459955932d7bcab5 pacemaker-44eb2ddf8d4f8fc05256aae2abc9fbf3ae4d1fbc.tar.gz +b914b3c0f16d2ba21339fb54e166500e nagios-agents-metadata-105ab8a7b2c16b9a29cf1c1596b80136eeef332b.tar.gz diff --git a/stonith-manpage.patch b/stonith-manpage.patch deleted file mode 100644 index 20416f2..0000000 --- a/stonith-manpage.patch +++ /dev/null @@ -1,27 +0,0 @@ -diff -r 39bbb85636f1 fencing/Makefile.am ---- a/fencing/Makefile.am Tue Apr 26 09:43:57 2011 +0200 -+++ b/fencing/Makefile.am Wed Apr 27 12:00:39 2011 +0200 -@@ -26,14 +26,19 @@ halib_PROGRAMS = stonithd stonith-test - sbin_PROGRAMS = stonith_admin - sbin_SCRIPTS = fence_legacy - --if BUILD_HELP --man8_MANS = $(sbin_PROGRAMS:%=%.8) fence_legacy.8 stonithd.8 -+man8_MANS = -+ -+if BUILD_XML_HELP -+man8_MANS += stonithd.8 - stonithd.xml: stonithd -- $(top_builddir)/fencing/$< metadata | $(XSLTPROC) --nonet --novalid --stringparam man.name $< $(top_srcdir)/xml/ocf-meta2man.xsl - > $(top_builddir)/crmd/$@ -+ $(top_builddir)/fencing/$< metadata | $(XSLTPROC) --nonet --novalid --stringparam man.name $< $(top_srcdir)/xml/ocf-meta2man.xsl - > $(top_builddir)/fencing/$@ - - stonithd.8: stonithd.xml -- $(XSLTPROC) $(MANPAGE_XSLT) $(top_builddir)/crmd/$< -+ $(XSLTPROC) $(MANPAGE_XSLT) $(top_builddir)/fencing/$< -+endif - -+if BUILD_HELP -+man8_MANS += $(sbin_PROGRAMS:%=%.8) fence_legacy.8 - %.8: % - echo Creating $@ - chmod a+x $<