pacemaker 2.1.7-2.1.7
Scalable High-Availability cluster resource manager
Loading...
Searching...
No Matches
unpack.c
Go to the documentation of this file.
1/*
2 * Copyright 2004-2023 the Pacemaker project contributors
3 *
4 * The version control history for this file may have further details.
5 *
6 * This source code is licensed under the GNU Lesser General Public License
7 * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
8 */
9
10#include <crm_internal.h>
11
12#include <stdio.h>
13#include <string.h>
14#include <glib.h>
15#include <time.h>
16
17#include <crm/crm.h>
18#include <crm/services.h>
19#include <crm/msg_xml.h>
20#include <crm/common/xml.h>
22
23#include <crm/common/util.h>
24#include <crm/pengine/rules.h>
26#include <pe_status_private.h>
27
29
30// A (parsed) resource action history entry
31struct action_history {
32 pcmk_resource_t *rsc; // Resource that history is for
33 pcmk_node_t *node; // Node that history is for
34 xmlNode *xml; // History entry XML
35
36 // Parsed from entry XML
37 const char *id; // XML ID of history entry
38 const char *key; // Operation key of action
39 const char *task; // Action name
40 const char *exit_reason; // Exit reason given for result
41 guint interval_ms; // Action interval
42 int call_id; // Call ID of action
43 int expected_exit_status; // Expected exit status of action
44 int exit_status; // Actual exit status of action
45 int execution_status; // Execution status of action
46};
47
48/* This uses pcmk__set_flags_as()/pcmk__clear_flags_as() directly rather than
49 * use pe__set_working_set_flags()/pe__clear_working_set_flags() so that the
50 * flag is stringified more readably in log messages.
51 */
52#define set_config_flag(scheduler, option, flag) do { \
53 const char *scf_value = pe_pref((scheduler)->config_hash, (option)); \
54 if (scf_value != NULL) { \
55 if (crm_is_true(scf_value)) { \
56 (scheduler)->flags = pcmk__set_flags_as(__func__, __LINE__, \
57 LOG_TRACE, "Scheduler", \
58 crm_system_name, (scheduler)->flags, \
59 (flag), #flag); \
60 } else { \
61 (scheduler)->flags = pcmk__clear_flags_as(__func__, __LINE__, \
62 LOG_TRACE, "Scheduler", \
63 crm_system_name, (scheduler)->flags, \
64 (flag), #flag); \
65 } \
66 } \
67 } while(0)
68
69static void unpack_rsc_op(pcmk_resource_t *rsc, pcmk_node_t *node,
70 xmlNode *xml_op, xmlNode **last_failure,
71 enum action_fail_response *failed);
72static void determine_remote_online_status(pcmk_scheduler_t *scheduler,
73 pcmk_node_t *this_node);
74static void add_node_attrs(const xmlNode *xml_obj, pcmk_node_t *node,
75 bool overwrite, pcmk_scheduler_t *scheduler);
76static void determine_online_status(const xmlNode *node_state,
77 pcmk_node_t *this_node,
79
80static void unpack_node_lrm(pcmk_node_t *node, const xmlNode *xml,
82
83
84static gboolean
85is_dangling_guest_node(pcmk_node_t *node)
86{
87 /* we are looking for a remote-node that was supposed to be mapped to a
88 * container resource, but all traces of that container have disappeared
89 * from both the config and the status section. */
91 node->details->remote_rsc &&
92 node->details->remote_rsc->container == NULL &&
95 return TRUE;
96 }
97
98 return FALSE;
99}
100
109void
111 const char *reason, bool priority_delay)
112{
113 CRM_CHECK(node, return);
114
115 /* A guest node is fenced by marking its container as failed */
116 if (pe__is_guest_node(node)) {
118
119 if (!pcmk_is_set(rsc->flags, pcmk_rsc_failed)) {
120 if (!pcmk_is_set(rsc->flags, pcmk_rsc_managed)) {
121 crm_notice("Not fencing guest node %s "
122 "(otherwise would because %s): "
123 "its guest resource %s is unmanaged",
124 pe__node_name(node), reason, rsc->id);
125 } else {
126 crm_warn("Guest node %s will be fenced "
127 "(by recovering its guest resource %s): %s",
128 pe__node_name(node), rsc->id, reason);
129
130 /* We don't mark the node as unclean because that would prevent the
131 * node from running resources. We want to allow it to run resources
132 * in this transition if the recovery succeeds.
133 */
134 node->details->remote_requires_reset = TRUE;
137 }
138 }
139
140 } else if (is_dangling_guest_node(node)) {
141 crm_info("Cleaning up dangling connection for guest node %s: "
142 "fencing was already done because %s, "
143 "and guest resource no longer exists",
144 pe__node_name(node), reason);
147
148 } else if (pe__is_remote_node(node)) {
149 pcmk_resource_t *rsc = node->details->remote_rsc;
150
151 if ((rsc != NULL) && !pcmk_is_set(rsc->flags, pcmk_rsc_managed)) {
152 crm_notice("Not fencing remote node %s "
153 "(otherwise would because %s): connection is unmanaged",
154 pe__node_name(node), reason);
155 } else if(node->details->remote_requires_reset == FALSE) {
156 node->details->remote_requires_reset = TRUE;
157 crm_warn("Remote node %s %s: %s",
158 pe__node_name(node),
159 pe_can_fence(scheduler, node)? "will be fenced" : "is unclean",
160 reason);
161 }
162 node->details->unclean = TRUE;
163 // No need to apply `priority-fencing-delay` for remote nodes
164 pe_fence_op(node, NULL, TRUE, reason, FALSE, scheduler);
165
166 } else if (node->details->unclean) {
167 crm_trace("Cluster node %s %s because %s",
168 pe__node_name(node),
169 pe_can_fence(scheduler, node)? "would also be fenced" : "also is unclean",
170 reason);
171
172 } else {
173 crm_warn("Cluster node %s %s: %s",
174 pe__node_name(node),
175 pe_can_fence(scheduler, node)? "will be fenced" : "is unclean",
176 reason);
177 node->details->unclean = TRUE;
178 pe_fence_op(node, NULL, TRUE, reason, priority_delay, scheduler);
179 }
180}
181
182// @TODO xpaths can't handle templates, rules, or id-refs
183
184// nvpair with provides or requires set to unfencing
185#define XPATH_UNFENCING_NVPAIR XML_CIB_TAG_NVPAIR \
186 "[(@" XML_NVPAIR_ATTR_NAME "='" PCMK_STONITH_PROVIDES "'" \
187 "or @" XML_NVPAIR_ATTR_NAME "='" XML_RSC_ATTR_REQUIRES "') " \
188 "and @" XML_NVPAIR_ATTR_VALUE "='" PCMK__VALUE_UNFENCING "']"
189
190// unfencing in rsc_defaults or any resource
191#define XPATH_ENABLE_UNFENCING \
192 "/" XML_TAG_CIB "/" XML_CIB_TAG_CONFIGURATION "/" XML_CIB_TAG_RESOURCES \
193 "//" XML_TAG_META_SETS "/" XPATH_UNFENCING_NVPAIR \
194 "|/" XML_TAG_CIB "/" XML_CIB_TAG_CONFIGURATION "/" XML_CIB_TAG_RSCCONFIG \
195 "/" XML_TAG_META_SETS "/" XPATH_UNFENCING_NVPAIR
196
197static void
198set_if_xpath(uint64_t flag, const char *xpath, pcmk_scheduler_t *scheduler)
199{
200 xmlXPathObjectPtr result = NULL;
201
202 if (!pcmk_is_set(scheduler->flags, flag)) {
204 if (result && (numXpathResults(result) > 0)) {
206 }
208 }
209}
210
211gboolean
213{
214 const char *value = NULL;
215 GHashTable *config_hash = pcmk__strkey_table(free, free);
216
217 pe_rule_eval_data_t rule_data = {
218 .node_hash = NULL,
219 .role = pcmk_role_unknown,
220 .now = scheduler->now,
221 .match_data = NULL,
222 .rsc_data = NULL,
223 .op_data = NULL
224 };
225
226 scheduler->config_hash = config_hash;
227
228 pe__unpack_dataset_nvpairs(config, XML_CIB_TAG_PROPSET, &rule_data, config_hash,
230
232
233 set_config_flag(scheduler, "enable-startup-probes",
236 crm_info("Startup probes: disabled (dangerous)");
237 }
238
240 if (value && crm_is_true(value)) {
241 crm_info("Watchdog-based self-fencing will be performed via SBD if "
242 "fencing is required and stonith-watchdog-timeout is nonzero");
244 }
245
246 /* Set certain flags via xpath here, so they can be used before the relevant
247 * configuration sections are unpacked.
248 */
250 scheduler);
251
252 value = pe_pref(scheduler->config_hash, "stonith-timeout");
254 crm_debug("STONITH timeout: %d", scheduler->stonith_timeout);
255
258 crm_debug("STONITH of failed nodes is enabled");
259 } else {
260 crm_debug("STONITH of failed nodes is disabled");
261 }
262
264 "stonith-action");
265 if (!strcmp(scheduler->stonith_action, "poweroff")) {
267 "Support for stonith-action of 'poweroff' is deprecated "
268 "and will be removed in a future release (use 'off' instead)");
270 }
271 crm_trace("STONITH will %s nodes", scheduler->stonith_action);
272
273 set_config_flag(scheduler, "concurrent-fencing",
276 crm_debug("Concurrent fencing is enabled");
277 } else {
278 crm_debug("Concurrent fencing is disabled");
279 }
280
283 if (value) {
285 / 1000;
286 crm_trace("Priority fencing delay is %ds",
288 }
289
290 set_config_flag(scheduler, "stop-all-resources", pcmk_sched_stop_all);
291 crm_debug("Stop all active resources: %s",
293
294 set_config_flag(scheduler, "symmetric-cluster",
297 crm_debug("Cluster is symmetric" " - resources can run anywhere by default");
298 }
299
300 value = pe_pref(scheduler->config_hash, "no-quorum-policy");
301
302 if (pcmk__str_eq(value, "ignore", pcmk__str_casei)) {
304
305 } else if (pcmk__str_eq(value, "freeze", pcmk__str_casei)) {
307
308 } else if (pcmk__str_eq(value, "demote", pcmk__str_casei)) {
310
311 } else if (pcmk__str_eq(value, "suicide", pcmk__str_casei)) {
313 int do_panic = 0;
314
316 &do_panic);
317 if (do_panic || pcmk_is_set(scheduler->flags, pcmk_sched_quorate)) {
319 } else {
320 crm_notice("Resetting no-quorum-policy to 'stop': cluster has never had quorum");
322 }
323 } else {
324 pcmk__config_err("Resetting no-quorum-policy to 'stop' because "
325 "fencing is disabled");
327 }
328
329 } else {
331 }
332
333 switch (scheduler->no_quorum_policy) {
335 crm_debug("On loss of quorum: Freeze resources");
336 break;
338 crm_debug("On loss of quorum: Stop ALL resources");
339 break;
341 crm_debug("On loss of quorum: "
342 "Demote promotable resources and stop other resources");
343 break;
345 crm_notice("On loss of quorum: Fence all remaining nodes");
346 break;
348 crm_notice("On loss of quorum: Ignore");
349 break;
350 }
351
352 set_config_flag(scheduler, "stop-orphan-resources",
355 crm_trace("Orphan resources are stopped");
356 } else {
357 crm_trace("Orphan resources are ignored");
358 }
359
360 set_config_flag(scheduler, "stop-orphan-actions",
363 crm_trace("Orphan resource actions are stopped");
364 } else {
365 crm_trace("Orphan resource actions are ignored");
366 }
367
368 value = pe_pref(scheduler->config_hash, "remove-after-stop");
369 if (value != NULL) {
370 if (crm_is_true(value)) {
372#ifndef PCMK__COMPAT_2_0
374 "Support for the remove-after-stop cluster property is"
375 " deprecated and will be removed in a future release");
376#endif
377 } else {
380 }
381 }
382
384 crm_trace("Maintenance mode: %s",
385 pcmk__btoa(pcmk_is_set(scheduler->flags,
387
388 set_config_flag(scheduler, "start-failure-is-fatal",
391 crm_trace("Start failures are always fatal");
392 } else {
393 crm_trace("Start failures are handled by failcount");
394 }
395
397 set_config_flag(scheduler, "startup-fencing",
399 }
401 crm_trace("Unseen nodes will be fenced");
402 } else {
403 pe_warn_once(pcmk__wo_blind, "Blind faith: not fencing unseen nodes");
404 }
405
407
409 "placement-strategy");
410 crm_trace("Placement strategy: %s", scheduler->placement_strategy);
411
417 crm_trace("Resources will be locked to nodes that were cleanly "
418 "shut down (locks expire after %s)",
420 } else {
421 crm_trace("Resources will not be locked to nodes that were cleanly "
422 "shut down");
423 }
424
429 crm_trace("Do not fence pending nodes");
430 } else {
431 crm_trace("Fence pending nodes after %s",
433 * 1000));
434 }
435
436 return TRUE;
437}
438
440pe_create_node(const char *id, const char *uname, const char *type,
441 const char *score, pcmk_scheduler_t *scheduler)
442{
443 pcmk_node_t *new_node = NULL;
444
445 if (pe_find_node(scheduler->nodes, uname) != NULL) {
446 pcmk__config_warn("More than one node entry has name '%s'", uname);
447 }
448
449 new_node = calloc(1, sizeof(pcmk_node_t));
450 if (new_node == NULL) {
451 return NULL;
452 }
453
454 new_node->weight = char2score(score);
455 new_node->details = calloc(1, sizeof(struct pe_node_shared_s));
456
457 if (new_node->details == NULL) {
458 free(new_node);
459 return NULL;
460 }
461
462 crm_trace("Creating node for entry %s/%s", uname, id);
463 new_node->details->id = id;
464 new_node->details->uname = uname;
465 new_node->details->online = FALSE;
466 new_node->details->shutdown = FALSE;
467 new_node->details->rsc_discovery_enabled = TRUE;
468 new_node->details->running_rsc = NULL;
469 new_node->details->data_set = scheduler;
470
471 if (pcmk__str_eq(type, "member", pcmk__str_null_matches | pcmk__str_casei)) {
473
474 } else if (pcmk__str_eq(type, "remote", pcmk__str_casei)) {
477
478 } else {
479 /* @COMPAT 'ping' is the default for backward compatibility, but it
480 * should be changed to 'member' at a compatibility break
481 */
482 if (!pcmk__str_eq(type, "ping", pcmk__str_casei)) {
483 pcmk__config_warn("Node %s has unrecognized type '%s', "
484 "assuming 'ping'", pcmk__s(uname, "without name"),
485 type);
486 }
488 "Support for nodes of type 'ping' (such as %s) is "
489 "deprecated and will be removed in a future release",
490 pcmk__s(uname, "unnamed node"));
491 new_node->details->type = node_ping;
492 }
493
494 new_node->details->attrs = pcmk__strkey_table(free, free);
495
496 if (pe__is_guest_or_remote_node(new_node)) {
497 g_hash_table_insert(new_node->details->attrs, strdup(CRM_ATTR_KIND),
498 strdup("remote"));
499 } else {
500 g_hash_table_insert(new_node->details->attrs, strdup(CRM_ATTR_KIND),
501 strdup("cluster"));
502 }
503
504 new_node->details->utilization = pcmk__strkey_table(free, free);
505 new_node->details->digest_cache = pcmk__strkey_table(free,
507
508 scheduler->nodes = g_list_insert_sorted(scheduler->nodes, new_node,
510 return new_node;
511}
512
513static const char *
514expand_remote_rsc_meta(xmlNode *xml_obj, xmlNode *parent, pcmk_scheduler_t *data)
515{
516 xmlNode *attr_set = NULL;
517 xmlNode *attr = NULL;
518
519 const char *container_id = ID(xml_obj);
520 const char *remote_name = NULL;
521 const char *remote_server = NULL;
522 const char *remote_port = NULL;
523 const char *connect_timeout = "60s";
524 const char *remote_allow_migrate=NULL;
525 const char *is_managed = NULL;
526
527 for (attr_set = pcmk__xe_first_child(xml_obj); attr_set != NULL;
528 attr_set = pcmk__xe_next(attr_set)) {
529
530 if (!pcmk__str_eq((const char *)attr_set->name, XML_TAG_META_SETS,
532 continue;
533 }
534
535 for (attr = pcmk__xe_first_child(attr_set); attr != NULL;
536 attr = pcmk__xe_next(attr)) {
537 const char *value = crm_element_value(attr, XML_NVPAIR_ATTR_VALUE);
538 const char *name = crm_element_value(attr, XML_NVPAIR_ATTR_NAME);
539
540 if (pcmk__str_eq(name, XML_RSC_ATTR_REMOTE_NODE, pcmk__str_casei)) {
541 remote_name = value;
542 } else if (pcmk__str_eq(name, "remote-addr", pcmk__str_casei)) {
543 remote_server = value;
544 } else if (pcmk__str_eq(name, "remote-port", pcmk__str_casei)) {
545 remote_port = value;
546 } else if (pcmk__str_eq(name, "remote-connect-timeout", pcmk__str_casei)) {
547 connect_timeout = value;
548 } else if (pcmk__str_eq(name, "remote-allow-migrate", pcmk__str_casei)) {
549 remote_allow_migrate=value;
550 } else if (pcmk__str_eq(name, XML_RSC_ATTR_MANAGED, pcmk__str_casei)) {
551 is_managed = value;
552 }
553 }
554 }
555
556 if (remote_name == NULL) {
557 return NULL;
558 }
559
560 if (pe_find_resource(data->resources, remote_name) != NULL) {
561 return NULL;
562 }
563
564 pe_create_remote_xml(parent, remote_name, container_id,
565 remote_allow_migrate, is_managed,
566 connect_timeout, remote_server, remote_port);
567 return remote_name;
568}
569
570static void
571handle_startup_fencing(pcmk_scheduler_t *scheduler, pcmk_node_t *new_node)
572{
573 if ((new_node->details->type == pcmk_node_variant_remote)
574 && (new_node->details->remote_rsc == NULL)) {
575 /* Ignore fencing for remote nodes that don't have a connection resource
576 * associated with them. This happens when remote node entries get left
577 * in the nodes section after the connection resource is removed.
578 */
579 return;
580 }
581
583 // All nodes are unclean until we've seen their status entry
584 new_node->details->unclean = TRUE;
585
586 } else {
587 // Blind faith ...
588 new_node->details->unclean = FALSE;
589 }
590
591 /* We need to be able to determine if a node's status section
592 * exists or not separate from whether the node is unclean. */
593 new_node->details->unseen = TRUE;
594}
595
596gboolean
598{
599 xmlNode *xml_obj = NULL;
600 pcmk_node_t *new_node = NULL;
601 const char *id = NULL;
602 const char *uname = NULL;
603 const char *type = NULL;
604 const char *score = NULL;
605
606 for (xml_obj = pcmk__xe_first_child(xml_nodes); xml_obj != NULL;
607 xml_obj = pcmk__xe_next(xml_obj)) {
608
609 if (pcmk__str_eq((const char *)xml_obj->name, XML_CIB_TAG_NODE, pcmk__str_none)) {
610 new_node = NULL;
611
612 id = crm_element_value(xml_obj, XML_ATTR_ID);
615 score = crm_element_value(xml_obj, XML_RULE_ATTR_SCORE);
616 crm_trace("Processing node %s/%s", uname, id);
617
618 if (id == NULL) {
620 "> entry in configuration without id");
621 continue;
622 }
623 new_node = pe_create_node(id, uname, type, score, scheduler);
624
625 if (new_node == NULL) {
626 return FALSE;
627 }
628
629 handle_startup_fencing(scheduler, new_node);
630
631 add_node_attrs(xml_obj, new_node, FALSE, scheduler);
632
633 crm_trace("Done with node %s", crm_element_value(xml_obj, XML_ATTR_UNAME));
634 }
635 }
636
639 crm_info("Creating a fake local node");
641 scheduler);
642 }
643
644 return TRUE;
645}
646
647static void
648setup_container(pcmk_resource_t *rsc, pcmk_scheduler_t *scheduler)
649{
650 const char *container_id = NULL;
651
652 if (rsc->children) {
653 g_list_foreach(rsc->children, (GFunc) setup_container, scheduler);
654 return;
655 }
656
657 container_id = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_CONTAINER);
658 if (container_id && !pcmk__str_eq(container_id, rsc->id, pcmk__str_casei)) {
660 container_id);
661
662 if (container) {
663 rsc->container = container;
665 container->fillers = g_list_append(container->fillers, rsc);
666 pe_rsc_trace(rsc, "Resource %s's container is %s", rsc->id, container_id);
667 } else {
668 pe_err("Resource %s: Unknown resource container (%s)", rsc->id, container_id);
669 }
670 }
671}
672
673gboolean
675{
676 xmlNode *xml_obj = NULL;
677
678 /* Create remote nodes and guest nodes from the resource configuration
679 * before unpacking resources.
680 */
681 for (xml_obj = pcmk__xe_first_child(xml_resources); xml_obj != NULL;
682 xml_obj = pcmk__xe_next(xml_obj)) {
683
684 const char *new_node_id = NULL;
685
686 /* Check for remote nodes, which are defined by ocf:pacemaker:remote
687 * primitives.
688 */
689 if (xml_contains_remote_node(xml_obj)) {
690 new_node_id = ID(xml_obj);
691 /* The "pe_find_node" check is here to make sure we don't iterate over
692 * an expanded node that has already been added to the node list. */
693 if (new_node_id
694 && (pe_find_node(scheduler->nodes, new_node_id) == NULL)) {
695 crm_trace("Found remote node %s defined by resource %s",
696 new_node_id, ID(xml_obj));
697 pe_create_node(new_node_id, new_node_id, "remote", NULL,
698 scheduler);
699 }
700 continue;
701 }
702
703 /* Check for guest nodes, which are defined by special meta-attributes
704 * of a primitive of any type (for example, VirtualDomain or Xen).
705 */
706 if (pcmk__str_eq((const char *)xml_obj->name, XML_CIB_TAG_RESOURCE, pcmk__str_none)) {
707 /* This will add an ocf:pacemaker:remote primitive to the
708 * configuration for the guest node's connection, to be unpacked
709 * later.
710 */
711 new_node_id = expand_remote_rsc_meta(xml_obj, xml_resources,
712 scheduler);
713 if (new_node_id
714 && (pe_find_node(scheduler->nodes, new_node_id) == NULL)) {
715 crm_trace("Found guest node %s in resource %s",
716 new_node_id, ID(xml_obj));
717 pe_create_node(new_node_id, new_node_id, "remote", NULL,
718 scheduler);
719 }
720 continue;
721 }
722
723 /* Check for guest nodes inside a group. Clones are currently not
724 * supported as guest nodes.
725 */
726 if (pcmk__str_eq((const char *)xml_obj->name, XML_CIB_TAG_GROUP, pcmk__str_none)) {
727 xmlNode *xml_obj2 = NULL;
728 for (xml_obj2 = pcmk__xe_first_child(xml_obj); xml_obj2 != NULL;
729 xml_obj2 = pcmk__xe_next(xml_obj2)) {
730
731 new_node_id = expand_remote_rsc_meta(xml_obj2, xml_resources,
732 scheduler);
733
734 if (new_node_id
735 && (pe_find_node(scheduler->nodes, new_node_id) == NULL)) {
736 crm_trace("Found guest node %s in resource %s inside group %s",
737 new_node_id, ID(xml_obj2), ID(xml_obj));
738 pe_create_node(new_node_id, new_node_id, "remote", NULL,
739 scheduler);
740 }
741 }
742 }
743 }
744 return TRUE;
745}
746
747/* Call this after all the nodes and resources have been
748 * unpacked, but before the status section is read.
749 *
750 * A remote node's online status is reflected by the state
751 * of the remote node's connection resource. We need to link
752 * the remote node to this connection resource so we can have
753 * easy access to the connection resource during the scheduler calculations.
754 */
755static void
756link_rsc2remotenode(pcmk_scheduler_t *scheduler, pcmk_resource_t *new_rsc)
757{
758 pcmk_node_t *remote_node = NULL;
759
760 if (new_rsc->is_remote_node == FALSE) {
761 return;
762 }
763
765 /* remote_nodes and remote_resources are not linked in quick location calculations */
766 return;
767 }
768
769 remote_node = pe_find_node(scheduler->nodes, new_rsc->id);
770 CRM_CHECK(remote_node != NULL, return);
771
772 pe_rsc_trace(new_rsc, "Linking remote connection resource %s to %s",
773 new_rsc->id, pe__node_name(remote_node));
774 remote_node->details->remote_rsc = new_rsc;
775
776 if (new_rsc->container == NULL) {
777 /* Handle start-up fencing for remote nodes (as opposed to guest nodes)
778 * the same as is done for cluster nodes.
779 */
780 handle_startup_fencing(scheduler, remote_node);
781
782 } else {
783 /* pe_create_node() marks the new node as "remote" or "cluster"; now
784 * that we know the node is a guest node, update it correctly.
785 */
786 g_hash_table_replace(remote_node->details->attrs, strdup(CRM_ATTR_KIND),
787 strdup("container"));
788 }
789}
790
791static void
792destroy_tag(gpointer data)
793{
794 pcmk_tag_t *tag = data;
795
796 if (tag) {
797 free(tag->id);
798 g_list_free_full(tag->refs, free);
799 free(tag);
800 }
801}
802
815gboolean
816unpack_resources(const xmlNode *xml_resources, pcmk_scheduler_t *scheduler)
817{
818 xmlNode *xml_obj = NULL;
819 GList *gIter = NULL;
820
821 scheduler->template_rsc_sets = pcmk__strkey_table(free, destroy_tag);
822
823 for (xml_obj = pcmk__xe_first_child(xml_resources); xml_obj != NULL;
824 xml_obj = pcmk__xe_next(xml_obj)) {
825
826 pcmk_resource_t *new_rsc = NULL;
827 const char *id = ID(xml_obj);
828
829 if (pcmk__str_empty(id)) {
830 pcmk__config_err("Ignoring <%s> resource without ID",
831 xml_obj->name);
832 continue;
833 }
834
835 if (pcmk__str_eq((const char *) xml_obj->name, XML_CIB_TAG_RSC_TEMPLATE,
837 if (g_hash_table_lookup_extended(scheduler->template_rsc_sets, id,
838 NULL, NULL) == FALSE) {
839 /* Record the template's ID for the knowledge of its existence anyway. */
840 g_hash_table_insert(scheduler->template_rsc_sets, strdup(id),
841 NULL);
842 }
843 continue;
844 }
845
846 crm_trace("Unpacking <%s " XML_ATTR_ID "='%s'>",
847 xml_obj->name, id);
848 if (pe__unpack_resource(xml_obj, &new_rsc, NULL,
849 scheduler) == pcmk_rc_ok) {
850 scheduler->resources = g_list_append(scheduler->resources, new_rsc);
851 pe_rsc_trace(new_rsc, "Added resource %s", new_rsc->id);
852
853 } else {
854 pcmk__config_err("Ignoring <%s> resource '%s' "
855 "because configuration is invalid",
856 xml_obj->name, id);
857 }
858 }
859
860 for (gIter = scheduler->resources; gIter != NULL; gIter = gIter->next) {
861 pcmk_resource_t *rsc = (pcmk_resource_t *) gIter->data;
862
863 setup_container(rsc, scheduler);
864 link_rsc2remotenode(scheduler, rsc);
865 }
866
867 scheduler->resources = g_list_sort(scheduler->resources,
870 /* Ignore */
871
874
875 pcmk__config_err("Resource start-up disabled since no STONITH resources have been defined");
876 pcmk__config_err("Either configure some or disable STONITH with the stonith-enabled option");
877 pcmk__config_err("NOTE: Clusters with shared data need STONITH to ensure data integrity");
878 }
879
880 return TRUE;
881}
882
883gboolean
885{
886 xmlNode *xml_tag = NULL;
887
888 scheduler->tags = pcmk__strkey_table(free, destroy_tag);
889
890 for (xml_tag = pcmk__xe_first_child(xml_tags); xml_tag != NULL;
891 xml_tag = pcmk__xe_next(xml_tag)) {
892
893 xmlNode *xml_obj_ref = NULL;
894 const char *tag_id = ID(xml_tag);
895
896 if (!pcmk__str_eq((const char *)xml_tag->name, XML_CIB_TAG_TAG, pcmk__str_none)) {
897 continue;
898 }
899
900 if (tag_id == NULL) {
901 pcmk__config_err("Ignoring <%s> without " XML_ATTR_ID,
902 (const char *) xml_tag->name);
903 continue;
904 }
905
906 for (xml_obj_ref = pcmk__xe_first_child(xml_tag); xml_obj_ref != NULL;
907 xml_obj_ref = pcmk__xe_next(xml_obj_ref)) {
908
909 const char *obj_ref = ID(xml_obj_ref);
910
911 if (!pcmk__str_eq((const char *)xml_obj_ref->name, XML_CIB_TAG_OBJ_REF, pcmk__str_none)) {
912 continue;
913 }
914
915 if (obj_ref == NULL) {
916 pcmk__config_err("Ignoring <%s> for tag '%s' without " XML_ATTR_ID,
917 xml_obj_ref->name, tag_id);
918 continue;
919 }
920
921 if (add_tag_ref(scheduler->tags, tag_id, obj_ref) == FALSE) {
922 return FALSE;
923 }
924 }
925 }
926
927 return TRUE;
928}
929
930/* The ticket state section:
931 * "/cib/status/tickets/ticket_state" */
932static gboolean
933unpack_ticket_state(xmlNode *xml_ticket, pcmk_scheduler_t *scheduler)
934{
935 const char *ticket_id = NULL;
936 const char *granted = NULL;
937 const char *last_granted = NULL;
938 const char *standby = NULL;
939 xmlAttrPtr xIter = NULL;
940
941 pcmk_ticket_t *ticket = NULL;
942
943 ticket_id = ID(xml_ticket);
944 if (pcmk__str_empty(ticket_id)) {
945 return FALSE;
946 }
947
948 crm_trace("Processing ticket state for %s", ticket_id);
949
950 ticket = g_hash_table_lookup(scheduler->tickets, ticket_id);
951 if (ticket == NULL) {
952 ticket = ticket_new(ticket_id, scheduler);
953 if (ticket == NULL) {
954 return FALSE;
955 }
956 }
957
958 for (xIter = xml_ticket->properties; xIter; xIter = xIter->next) {
959 const char *prop_name = (const char *)xIter->name;
960 const char *prop_value = pcmk__xml_attr_value(xIter);
961
962 if (pcmk__str_eq(prop_name, XML_ATTR_ID, pcmk__str_none)) {
963 continue;
964 }
965 g_hash_table_replace(ticket->state, strdup(prop_name), strdup(prop_value));
966 }
967
968 granted = g_hash_table_lookup(ticket->state, "granted");
969 if (granted && crm_is_true(granted)) {
970 ticket->granted = TRUE;
971 crm_info("We have ticket '%s'", ticket->id);
972 } else {
973 ticket->granted = FALSE;
974 crm_info("We do not have ticket '%s'", ticket->id);
975 }
976
977 last_granted = g_hash_table_lookup(ticket->state, "last-granted");
978 if (last_granted) {
979 long long last_granted_ll;
980
981 pcmk__scan_ll(last_granted, &last_granted_ll, 0LL);
982 ticket->last_granted = (time_t) last_granted_ll;
983 }
984
985 standby = g_hash_table_lookup(ticket->state, "standby");
986 if (standby && crm_is_true(standby)) {
987 ticket->standby = TRUE;
988 if (ticket->granted) {
989 crm_info("Granted ticket '%s' is in standby-mode", ticket->id);
990 }
991 } else {
992 ticket->standby = FALSE;
993 }
994
995 crm_trace("Done with ticket state for %s", ticket_id);
996
997 return TRUE;
998}
999
1000static gboolean
1001unpack_tickets_state(xmlNode *xml_tickets, pcmk_scheduler_t *scheduler)
1002{
1003 xmlNode *xml_obj = NULL;
1004
1005 for (xml_obj = pcmk__xe_first_child(xml_tickets); xml_obj != NULL;
1006 xml_obj = pcmk__xe_next(xml_obj)) {
1007
1008 if (!pcmk__str_eq((const char *)xml_obj->name, XML_CIB_TAG_TICKET_STATE, pcmk__str_none)) {
1009 continue;
1010 }
1011 unpack_ticket_state(xml_obj, scheduler);
1012 }
1013
1014 return TRUE;
1015}
1016
1017static void
1018unpack_handle_remote_attrs(pcmk_node_t *this_node, const xmlNode *state,
1020{
1021 const char *resource_discovery_enabled = NULL;
1022 const xmlNode *attrs = NULL;
1023 pcmk_resource_t *rsc = NULL;
1024
1025 if (!pcmk__str_eq((const char *)state->name, XML_CIB_TAG_STATE, pcmk__str_none)) {
1026 return;
1027 }
1028
1029 if ((this_node == NULL) || !pe__is_guest_or_remote_node(this_node)) {
1030 return;
1031 }
1032 crm_trace("Processing Pacemaker Remote node %s", pe__node_name(this_node));
1033
1035 &(this_node->details->remote_maintenance), 0);
1036
1037 rsc = this_node->details->remote_rsc;
1038 if (this_node->details->remote_requires_reset == FALSE) {
1039 this_node->details->unclean = FALSE;
1040 this_node->details->unseen = FALSE;
1041 }
1042 attrs = find_xml_node(state, XML_TAG_TRANSIENT_NODEATTRS, FALSE);
1043 add_node_attrs(attrs, this_node, TRUE, scheduler);
1044
1045 if (pe__shutdown_requested(this_node)) {
1046 crm_info("%s is shutting down", pe__node_name(this_node));
1047 this_node->details->shutdown = TRUE;
1048 }
1049
1050 if (crm_is_true(pe_node_attribute_raw(this_node, "standby"))) {
1051 crm_info("%s is in standby mode", pe__node_name(this_node));
1052 this_node->details->standby = TRUE;
1053 }
1054
1055 if (crm_is_true(pe_node_attribute_raw(this_node, "maintenance")) ||
1056 ((rsc != NULL) && !pcmk_is_set(rsc->flags, pcmk_rsc_managed))) {
1057 crm_info("%s is in maintenance mode", pe__node_name(this_node));
1058 this_node->details->maintenance = TRUE;
1059 }
1060
1061 resource_discovery_enabled = pe_node_attribute_raw(this_node, XML_NODE_ATTR_RSC_DISCOVERY);
1062 if (resource_discovery_enabled && !crm_is_true(resource_discovery_enabled)) {
1063 if (pe__is_remote_node(this_node)
1066 " attribute on Pacemaker Remote node %s"
1067 " because fencing is disabled",
1068 pe__node_name(this_node));
1069 } else {
1070 /* This is either a remote node with fencing enabled, or a guest
1071 * node. We don't care whether fencing is enabled when fencing guest
1072 * nodes, because they are "fenced" by recovering their containing
1073 * resource.
1074 */
1075 crm_info("%s has resource discovery disabled",
1076 pe__node_name(this_node));
1077 this_node->details->rsc_discovery_enabled = FALSE;
1078 }
1079 }
1080}
1081
1090static void
1091unpack_transient_attributes(const xmlNode *state, pcmk_node_t *node,
1093{
1094 const char *discovery = NULL;
1095 const xmlNode *attrs = find_xml_node(state, XML_TAG_TRANSIENT_NODEATTRS,
1096 FALSE);
1097
1098 add_node_attrs(attrs, node, TRUE, scheduler);
1099
1100 if (crm_is_true(pe_node_attribute_raw(node, "standby"))) {
1101 crm_info("%s is in standby mode", pe__node_name(node));
1102 node->details->standby = TRUE;
1103 }
1104
1105 if (crm_is_true(pe_node_attribute_raw(node, "maintenance"))) {
1106 crm_info("%s is in maintenance mode", pe__node_name(node));
1107 node->details->maintenance = TRUE;
1108 }
1109
1111 if ((discovery != NULL) && !crm_is_true(discovery)) {
1113 " attribute for %s because disabling resource discovery "
1114 "is not allowed for cluster nodes", pe__node_name(node));
1115 }
1116}
1117
1130static void
1131unpack_node_state(const xmlNode *state, pcmk_scheduler_t *scheduler)
1132{
1133 const char *id = NULL;
1134 const char *uname = NULL;
1135 pcmk_node_t *this_node = NULL;
1136
1137 id = crm_element_value(state, XML_ATTR_ID);
1138 if (id == NULL) {
1139 crm_warn("Ignoring malformed " XML_CIB_TAG_STATE " entry without "
1140 XML_ATTR_ID);
1141 return;
1142 }
1143
1145 if (uname == NULL) {
1146 /* If a joining peer makes the cluster acquire the quorum from corosync
1147 * meanwhile it has not joined CPG membership of pacemaker-controld yet,
1148 * it's possible that the created node_state entry doesn't have an uname
1149 * yet. We should recognize the node as `pending` and wait for it to
1150 * join CPG.
1151 */
1152 crm_trace("Handling " XML_CIB_TAG_STATE " entry with id=\"%s\" without "
1153 XML_ATTR_UNAME, id);
1154 }
1155
1156 this_node = pe_find_node_any(scheduler->nodes, id, uname);
1157 if (this_node == NULL) {
1158 pcmk__config_warn("Ignoring recorded node state for id=\"%s\" (%s) "
1159 "because it is no longer in the configuration",
1160 id, pcmk__s(uname, "uname unknown"));
1161 return;
1162 }
1163
1164 if (pe__is_guest_or_remote_node(this_node)) {
1165 /* We can't determine the online status of Pacemaker Remote nodes until
1166 * after all resource history has been unpacked. In this first pass, we
1167 * do need to mark whether the node has been fenced, as this plays a
1168 * role during unpacking cluster node resource state.
1169 */
1171 &(this_node->details->remote_was_fenced), 0);
1172 return;
1173 }
1174
1175 unpack_transient_attributes(state, this_node, scheduler);
1176
1177 /* Provisionally mark this cluster node as clean. We have at least seen it
1178 * in the current cluster's lifetime.
1179 */
1180 this_node->details->unclean = FALSE;
1181 this_node->details->unseen = FALSE;
1182
1183 crm_trace("Determining online status of cluster node %s (id %s)",
1184 pe__node_name(this_node), id);
1185 determine_online_status(state, this_node, scheduler);
1186
1188 && this_node->details->online
1190 /* Everything else should flow from this automatically
1191 * (at least until the scheduler becomes able to migrate off
1192 * healthy resources)
1193 */
1194 pe_fence_node(scheduler, this_node, "cluster does not have quorum",
1195 FALSE);
1196 }
1197}
1198
1216static int
1217unpack_node_history(const xmlNode *status, bool fence,
1219{
1220 int rc = pcmk_rc_ok;
1221
1222 // Loop through all node_state entries in CIB status
1223 for (const xmlNode *state = first_named_child(status, XML_CIB_TAG_STATE);
1224 state != NULL; state = crm_next_same_xml(state)) {
1225
1226 const char *id = ID(state);
1227 const char *uname = crm_element_value(state, XML_ATTR_UNAME);
1228 pcmk_node_t *this_node = NULL;
1229
1230 if ((id == NULL) || (uname == NULL)) {
1231 // Warning already logged in first pass through status section
1232 crm_trace("Not unpacking resource history from malformed "
1233 XML_CIB_TAG_STATE " without id and/or uname");
1234 continue;
1235 }
1236
1237 this_node = pe_find_node_any(scheduler->nodes, id, uname);
1238 if (this_node == NULL) {
1239 // Warning already logged in first pass through status section
1240 crm_trace("Not unpacking resource history for node %s because "
1241 "no longer in configuration", id);
1242 continue;
1243 }
1244
1245 if (this_node->details->unpacked) {
1246 crm_trace("Not unpacking resource history for node %s because "
1247 "already unpacked", id);
1248 continue;
1249 }
1250
1251 if (fence) {
1252 // We're processing all remaining nodes
1253
1254 } else if (pe__is_guest_node(this_node)) {
1255 /* We can unpack a guest node's history only after we've unpacked
1256 * other resource history to the point that we know that the node's
1257 * connection and containing resource are both up.
1258 */
1259 pcmk_resource_t *rsc = this_node->details->remote_rsc;
1260
1261 if ((rsc == NULL) || (rsc->role != pcmk_role_started)
1262 || (rsc->container->role != pcmk_role_started)) {
1263 crm_trace("Not unpacking resource history for guest node %s "
1264 "because container and connection are not known to "
1265 "be up", id);
1266 continue;
1267 }
1268
1269 } else if (pe__is_remote_node(this_node)) {
1270 /* We can unpack a remote node's history only after we've unpacked
1271 * other resource history to the point that we know that the node's
1272 * connection is up, with the exception of when shutdown locks are
1273 * in use.
1274 */
1275 pcmk_resource_t *rsc = this_node->details->remote_rsc;
1276
1277 if ((rsc == NULL)
1279 && (rsc->role != pcmk_role_started))) {
1280 crm_trace("Not unpacking resource history for remote node %s "
1281 "because connection is not known to be up", id);
1282 continue;
1283 }
1284
1285 /* If fencing and shutdown locks are disabled and we're not processing
1286 * unseen nodes, then we don't want to unpack offline nodes until online
1287 * nodes have been unpacked. This allows us to number active clone
1288 * instances first.
1289 */
1290 } else if (!pcmk_any_flags_set(scheduler->flags,
1293 && !this_node->details->online) {
1294 crm_trace("Not unpacking resource history for offline "
1295 "cluster node %s", id);
1296 continue;
1297 }
1298
1299 if (pe__is_guest_or_remote_node(this_node)) {
1300 determine_remote_online_status(scheduler, this_node);
1301 unpack_handle_remote_attrs(this_node, state, scheduler);
1302 }
1303
1304 crm_trace("Unpacking resource history for %snode %s",
1305 (fence? "unseen " : ""), id);
1306
1307 this_node->details->unpacked = TRUE;
1308 unpack_node_lrm(this_node, state, scheduler);
1309
1310 rc = EAGAIN; // Other node histories might depend on this one
1311 }
1312 return rc;
1313}
1314
1315/* remove nodes that are down, stopping */
1316/* create positive rsc_to_node constraints between resources and the nodes they are running on */
1317/* anything else? */
1318gboolean
1320{
1321 xmlNode *state = NULL;
1322
1323 crm_trace("Beginning unpack");
1324
1325 if (scheduler->tickets == NULL) {
1327 }
1328
1329 for (state = pcmk__xe_first_child(status); state != NULL;
1330 state = pcmk__xe_next(state)) {
1331
1332 if (pcmk__str_eq((const char *)state->name, XML_CIB_TAG_TICKETS, pcmk__str_none)) {
1333 unpack_tickets_state((xmlNode *) state, scheduler);
1334
1335 } else if (pcmk__str_eq((const char *)state->name, XML_CIB_TAG_STATE, pcmk__str_none)) {
1336 unpack_node_state(state, scheduler);
1337 }
1338 }
1339
1340 while (unpack_node_history(status, FALSE, scheduler) == EAGAIN) {
1341 crm_trace("Another pass through node resource histories is needed");
1342 }
1343
1344 // Now catch any nodes we didn't see
1345 unpack_node_history(status,
1348 scheduler);
1349
1350 /* Now that we know where resources are, we can schedule stops of containers
1351 * with failed bundle connections
1352 */
1353 if (scheduler->stop_needed != NULL) {
1354 for (GList *item = scheduler->stop_needed; item; item = item->next) {
1355 pcmk_resource_t *container = item->data;
1356 pcmk_node_t *node = pe__current_node(container);
1357
1358 if (node) {
1359 stop_action(container, node, FALSE);
1360 }
1361 }
1362 g_list_free(scheduler->stop_needed);
1363 scheduler->stop_needed = NULL;
1364 }
1365
1366 /* Now that we know status of all Pacemaker Remote connections and nodes,
1367 * we can stop connections for node shutdowns, and check the online status
1368 * of remote/guest nodes that didn't have any node history to unpack.
1369 */
1370 for (GList *gIter = scheduler->nodes; gIter != NULL; gIter = gIter->next) {
1371 pcmk_node_t *this_node = gIter->data;
1372
1373 if (!pe__is_guest_or_remote_node(this_node)) {
1374 continue;
1375 }
1376 if (this_node->details->shutdown
1377 && (this_node->details->remote_rsc != NULL)) {
1379 "remote shutdown");
1380 }
1381 if (!this_node->details->unpacked) {
1382 determine_remote_online_status(scheduler, this_node);
1383 }
1384 }
1385
1386 return TRUE;
1387}
1388
1400static long long
1401unpack_node_member(const xmlNode *node_state, pcmk_scheduler_t *scheduler)
1402{
1403 const char *member_time = crm_element_value(node_state, PCMK__XA_IN_CCM);
1404 int member = 0;
1405
1406 if (member_time == NULL) {
1407 return -1LL;
1408
1409 } else if (crm_str_to_boolean(member_time, &member) == 1) {
1410 /* If in_ccm=0, we'll return 0 here. If in_ccm=1, either the entry was
1411 * recorded as a boolean for a DC < 2.1.7, or the node is pending
1412 * shutdown and has left the CPG, in which case it was set to 1 to avoid
1413 * fencing for node-pending-timeout.
1414 *
1415 * We return the effective time for in_ccm=1 because what's important to
1416 * avoid fencing is that effective time minus this value is less than
1417 * the pending node timeout.
1418 */
1419 return member? (long long) get_effective_time(scheduler) : 0LL;
1420
1421 } else {
1422 long long when_member = 0LL;
1423
1424 if ((pcmk__scan_ll(member_time, &when_member,
1425 0LL) != pcmk_rc_ok) || (when_member < 0LL)) {
1426 crm_warn("Unrecognized value '%s' for " PCMK__XA_IN_CCM
1427 " in " XML_CIB_TAG_STATE " entry", member_time);
1428 return -1LL;
1429 }
1430 return when_member;
1431 }
1432}
1433
1443static long long
1444unpack_node_online(const xmlNode *node_state)
1445{
1446 const char *peer_time = crm_element_value(node_state, PCMK__XA_CRMD);
1447
1448 // @COMPAT Entries recorded for DCs < 2.1.7 have "online" or "offline"
1449 if (pcmk__str_eq(peer_time, OFFLINESTATUS,
1451 return 0LL;
1452
1453 } else if (pcmk__str_eq(peer_time, ONLINESTATUS, pcmk__str_casei)) {
1454 return 1LL;
1455
1456 } else {
1457 long long when_online = 0LL;
1458
1459 if ((pcmk__scan_ll(peer_time, &when_online, 0LL) != pcmk_rc_ok)
1460 || (when_online < 0)) {
1461 crm_warn("Unrecognized value '%s' for " PCMK__XA_CRMD " in "
1462 XML_CIB_TAG_STATE " entry, assuming offline", peer_time);
1463 return 0LL;
1464 }
1465 return when_online;
1466 }
1467}
1468
1478static bool
1479unpack_node_terminate(const pcmk_node_t *node, const xmlNode *node_state)
1480{
1481 long long value = 0LL;
1482 int value_i = 0;
1483 const char *value_s = pe_node_attribute_raw(node, PCMK_NODE_ATTR_TERMINATE);
1484
1485 // Value may be boolean or an epoch time
1486 if (crm_str_to_boolean(value_s, &value_i) == 1) {
1487 return (value_i != 0);
1488 }
1489 if (pcmk__scan_ll(value_s, &value, 0LL) == pcmk_rc_ok) {
1490 return (value > 0);
1491 }
1492 crm_warn("Ignoring unrecognized value '%s' for " PCMK_NODE_ATTR_TERMINATE
1493 "node attribute for %s", value_s, pe__node_name(node));
1494 return false;
1495}
1496
1497static gboolean
1498determine_online_status_no_fencing(pcmk_scheduler_t *scheduler,
1499 const xmlNode *node_state,
1500 pcmk_node_t *this_node)
1501{
1502 gboolean online = FALSE;
1503 const char *join = crm_element_value(node_state, PCMK__XA_JOIN);
1504 const char *exp_state = crm_element_value(node_state, PCMK__XA_EXPECTED);
1505 long long when_member = unpack_node_member(node_state, scheduler);
1506 long long when_online = unpack_node_online(node_state);
1507
1508 if (when_member <= 0) {
1509 crm_trace("Node %s is %sdown", pe__node_name(this_node),
1510 ((when_member < 0)? "presumed " : ""));
1511
1512 } else if (when_online > 0) {
1513 if (pcmk__str_eq(join, CRMD_JOINSTATE_MEMBER, pcmk__str_casei)) {
1514 online = TRUE;
1515 } else {
1516 crm_debug("Node %s is not ready to run resources: %s",
1517 pe__node_name(this_node), join);
1518 }
1519
1520 } else if (this_node->details->expected_up == FALSE) {
1521 crm_trace("Node %s controller is down: "
1522 "member@%lld online@%lld join=%s expected=%s",
1523 pe__node_name(this_node), when_member, when_online,
1524 pcmk__s(join, "<null>"), pcmk__s(exp_state, "<null>"));
1525
1526 } else {
1527 /* mark it unclean */
1528 pe_fence_node(scheduler, this_node, "peer is unexpectedly down", FALSE);
1529 crm_info("Node %s member@%lld online@%lld join=%s expected=%s",
1530 pe__node_name(this_node), when_member, when_online,
1531 pcmk__s(join, "<null>"), pcmk__s(exp_state, "<null>"));
1532 }
1533 return online;
1534}
1535
1549static inline bool
1550pending_too_long(pcmk_scheduler_t *scheduler, const pcmk_node_t *node,
1551 long long when_member, long long when_online)
1552{
1554 && (when_member > 0) && (when_online <= 0)) {
1555 // There is a timeout on pending nodes, and node is pending
1556
1557 time_t timeout = when_member + scheduler->node_pending_timeout;
1558
1559 if (get_effective_time(node->details->data_set) >= timeout) {
1560 return true; // Node has timed out
1561 }
1562
1563 // Node is pending, but still has time
1564 pe__update_recheck_time(timeout, scheduler, "pending node timeout");
1565 }
1566 return false;
1567}
1568
1569static bool
1570determine_online_status_fencing(pcmk_scheduler_t *scheduler,
1571 const xmlNode *node_state,
1572 pcmk_node_t *this_node)
1573{
1574 bool termination_requested = unpack_node_terminate(this_node, node_state);
1575 const char *join = crm_element_value(node_state, PCMK__XA_JOIN);
1576 const char *exp_state = crm_element_value(node_state, PCMK__XA_EXPECTED);
1577 long long when_member = unpack_node_member(node_state, scheduler);
1578 long long when_online = unpack_node_online(node_state);
1579
1580/*
1581 - PCMK__XA_JOIN ::= member|down|pending|banned
1582 - PCMK__XA_EXPECTED ::= member|down
1583
1584 @COMPAT with entries recorded for DCs < 2.1.7
1585 - PCMK__XA_IN_CCM ::= true|false
1586 - PCMK__XA_CRMD ::= online|offline
1587
1588 Since crm_feature_set 3.18.0 (pacemaker-2.1.7):
1589 - PCMK__XA_IN_CCM ::= <timestamp>|0
1590 Since when node has been a cluster member. A value 0 of means the node is not
1591 a cluster member.
1592
1593 - PCMK__XA_CRMD ::= <timestamp>|0
1594 Since when peer has been online in CPG. A value 0 means the peer is offline
1595 in CPG.
1596*/
1597
1598 crm_trace("Node %s member@%lld online@%lld join=%s expected=%s%s",
1599 pe__node_name(this_node), when_member, when_online,
1600 pcmk__s(join, "<null>"), pcmk__s(exp_state, "<null>"),
1601 (termination_requested? " (termination requested)" : ""));
1602
1603 if (this_node->details->shutdown) {
1604 crm_debug("%s is shutting down", pe__node_name(this_node));
1605
1606 /* Slightly different criteria since we can't shut down a dead peer */
1607 return (when_online > 0);
1608 }
1609
1610 if (when_member < 0) {
1611 pe_fence_node(scheduler, this_node,
1612 "peer has not been seen by the cluster", FALSE);
1613 return false;
1614 }
1615
1616 if (pcmk__str_eq(join, CRMD_JOINSTATE_NACK, pcmk__str_none)) {
1617 pe_fence_node(scheduler, this_node,
1618 "peer failed Pacemaker membership criteria", FALSE);
1619
1620 } else if (termination_requested) {
1621 if ((when_member <= 0) && (when_online <= 0)
1622 && pcmk__str_eq(join, CRMD_JOINSTATE_DOWN, pcmk__str_none)) {
1623 crm_info("%s was fenced as requested", pe__node_name(this_node));
1624 return false;
1625 }
1626 pe_fence_node(scheduler, this_node, "fencing was requested", false);
1627
1628 } else if (pcmk__str_eq(exp_state, CRMD_JOINSTATE_DOWN,
1630
1631 if (pending_too_long(scheduler, this_node, when_member, when_online)) {
1632 pe_fence_node(scheduler, this_node,
1633 "peer pending timed out on joining the process group",
1634 FALSE);
1635
1636 } else if ((when_member > 0) || (when_online > 0)) {
1637 crm_info("- %s is not ready to run resources",
1638 pe__node_name(this_node));
1639 this_node->details->standby = TRUE;
1640 this_node->details->pending = TRUE;
1641
1642 } else {
1643 crm_trace("%s is down or still coming up",
1644 pe__node_name(this_node));
1645 }
1646
1647 } else if (when_member <= 0) {
1648 // Consider `priority-fencing-delay` for lost nodes
1649 pe_fence_node(scheduler, this_node,
1650 "peer is no longer part of the cluster", TRUE);
1651
1652 } else if (when_online <= 0) {
1653 pe_fence_node(scheduler, this_node,
1654 "peer process is no longer available", FALSE);
1655
1656 /* Everything is running at this point, now check join state */
1657
1658 } else if (pcmk__str_eq(join, CRMD_JOINSTATE_MEMBER, pcmk__str_none)) {
1659 crm_info("%s is active", pe__node_name(this_node));
1660
1662 CRMD_JOINSTATE_DOWN, NULL)) {
1663 crm_info("%s is not ready to run resources", pe__node_name(this_node));
1664 this_node->details->standby = TRUE;
1665 this_node->details->pending = TRUE;
1666
1667 } else {
1668 pe_fence_node(scheduler, this_node, "peer was in an unknown state",
1669 FALSE);
1670 }
1671
1672 return (when_member > 0);
1673}
1674
1675static void
1676determine_remote_online_status(pcmk_scheduler_t *scheduler,
1677 pcmk_node_t *this_node)
1678{
1679 pcmk_resource_t *rsc = this_node->details->remote_rsc;
1680 pcmk_resource_t *container = NULL;
1681 pcmk_node_t *host = NULL;
1682
1683 /* If there is a node state entry for a (former) Pacemaker Remote node
1684 * but no resource creating that node, the node's connection resource will
1685 * be NULL. Consider it an offline remote node in that case.
1686 */
1687 if (rsc == NULL) {
1688 this_node->details->online = FALSE;
1689 goto remote_online_done;
1690 }
1691
1692 container = rsc->container;
1693
1694 if (container && pcmk__list_of_1(rsc->running_on)) {
1695 host = rsc->running_on->data;
1696 }
1697
1698 /* If the resource is currently started, mark it online. */
1699 if (rsc->role == pcmk_role_started) {
1700 crm_trace("%s node %s presumed ONLINE because connection resource is started",
1701 (container? "Guest" : "Remote"), this_node->details->id);
1702 this_node->details->online = TRUE;
1703 }
1704
1705 /* consider this node shutting down if transitioning start->stop */
1706 if ((rsc->role == pcmk_role_started)
1707 && (rsc->next_role == pcmk_role_stopped)) {
1708
1709 crm_trace("%s node %s shutting down because connection resource is stopping",
1710 (container? "Guest" : "Remote"), this_node->details->id);
1711 this_node->details->shutdown = TRUE;
1712 }
1713
1714 /* Now check all the failure conditions. */
1715 if(container && pcmk_is_set(container->flags, pcmk_rsc_failed)) {
1716 crm_trace("Guest node %s UNCLEAN because guest resource failed",
1717 this_node->details->id);
1718 this_node->details->online = FALSE;
1719 this_node->details->remote_requires_reset = TRUE;
1720
1721 } else if (pcmk_is_set(rsc->flags, pcmk_rsc_failed)) {
1722 crm_trace("%s node %s OFFLINE because connection resource failed",
1723 (container? "Guest" : "Remote"), this_node->details->id);
1724 this_node->details->online = FALSE;
1725
1726 } else if ((rsc->role == pcmk_role_stopped)
1727 || ((container != NULL)
1728 && (container->role == pcmk_role_stopped))) {
1729
1730 crm_trace("%s node %s OFFLINE because its resource is stopped",
1731 (container? "Guest" : "Remote"), this_node->details->id);
1732 this_node->details->online = FALSE;
1733 this_node->details->remote_requires_reset = FALSE;
1734
1735 } else if (host && (host->details->online == FALSE)
1736 && host->details->unclean) {
1737 crm_trace("Guest node %s UNCLEAN because host is unclean",
1738 this_node->details->id);
1739 this_node->details->online = FALSE;
1740 this_node->details->remote_requires_reset = TRUE;
1741 }
1742
1743remote_online_done:
1744 crm_trace("Remote node %s online=%s",
1745 this_node->details->id, this_node->details->online ? "TRUE" : "FALSE");
1746}
1747
1748static void
1749determine_online_status(const xmlNode *node_state, pcmk_node_t *this_node,
1751{
1752 gboolean online = FALSE;
1753 const char *exp_state = crm_element_value(node_state, PCMK__XA_EXPECTED);
1754
1755 CRM_CHECK(this_node != NULL, return);
1756
1757 this_node->details->shutdown = FALSE;
1758 this_node->details->expected_up = FALSE;
1759
1760 if (pe__shutdown_requested(this_node)) {
1761 this_node->details->shutdown = TRUE;
1762
1763 } else if (pcmk__str_eq(exp_state, CRMD_JOINSTATE_MEMBER, pcmk__str_casei)) {
1764 this_node->details->expected_up = TRUE;
1765 }
1766
1767 if (this_node->details->type == node_ping) {
1768 this_node->details->unclean = FALSE;
1769 online = FALSE; /* As far as resource management is concerned,
1770 * the node is safely offline.
1771 * Anyone caught abusing this logic will be shot
1772 */
1773
1775 online = determine_online_status_no_fencing(scheduler, node_state,
1776 this_node);
1777
1778 } else {
1779 online = determine_online_status_fencing(scheduler, node_state,
1780 this_node);
1781 }
1782
1783 if (online) {
1784 this_node->details->online = TRUE;
1785
1786 } else {
1787 /* remove node from contention */
1788 this_node->fixed = TRUE; // @COMPAT deprecated and unused
1789 this_node->weight = -INFINITY;
1790 }
1791
1792 if (online && this_node->details->shutdown) {
1793 /* don't run resources here */
1794 this_node->fixed = TRUE; // @COMPAT deprecated and unused
1795 this_node->weight = -INFINITY;
1796 }
1797
1798 if (this_node->details->type == node_ping) {
1799 crm_info("%s is not a Pacemaker node", pe__node_name(this_node));
1800
1801 } else if (this_node->details->unclean) {
1802 pe_proc_warn("%s is unclean", pe__node_name(this_node));
1803
1804 } else if (this_node->details->online) {
1805 crm_info("%s is %s", pe__node_name(this_node),
1806 this_node->details->shutdown ? "shutting down" :
1807 this_node->details->pending ? "pending" :
1808 this_node->details->standby ? "standby" :
1809 this_node->details->maintenance ? "maintenance" : "online");
1810
1811 } else {
1812 crm_trace("%s is offline", pe__node_name(this_node));
1813 }
1814}
1815
1824const char *
1825pe_base_name_end(const char *id)
1826{
1827 if (!pcmk__str_empty(id)) {
1828 const char *end = id + strlen(id) - 1;
1829
1830 for (const char *s = end; s > id; --s) {
1831 switch (*s) {
1832 case '0':
1833 case '1':
1834 case '2':
1835 case '3':
1836 case '4':
1837 case '5':
1838 case '6':
1839 case '7':
1840 case '8':
1841 case '9':
1842 break;
1843 case ':':
1844 return (s == end)? s : (s - 1);
1845 default:
1846 return end;
1847 }
1848 }
1849 return end;
1850 }
1851 return NULL;
1852}
1853
1864char *
1865clone_strip(const char *last_rsc_id)
1866{
1867 const char *end = pe_base_name_end(last_rsc_id);
1868 char *basename = NULL;
1869
1870 CRM_ASSERT(end);
1871 basename = strndup(last_rsc_id, end - last_rsc_id + 1);
1872 CRM_ASSERT(basename);
1873 return basename;
1874}
1875
1886char *
1887clone_zero(const char *last_rsc_id)
1888{
1889 const char *end = pe_base_name_end(last_rsc_id);
1890 size_t base_name_len = end - last_rsc_id + 1;
1891 char *zero = NULL;
1892
1893 CRM_ASSERT(end);
1894 zero = calloc(base_name_len + 3, sizeof(char));
1895 CRM_ASSERT(zero);
1896 memcpy(zero, last_rsc_id, base_name_len);
1897 zero[base_name_len] = ':';
1898 zero[base_name_len + 1] = '0';
1899 return zero;
1900}
1901
1902static pcmk_resource_t *
1903create_fake_resource(const char *rsc_id, const xmlNode *rsc_entry,
1905{
1906 pcmk_resource_t *rsc = NULL;
1907 xmlNode *xml_rsc = create_xml_node(NULL, XML_CIB_TAG_RESOURCE);
1908
1909 copy_in_properties(xml_rsc, rsc_entry);
1910 crm_xml_add(xml_rsc, XML_ATTR_ID, rsc_id);
1911 crm_log_xml_debug(xml_rsc, "Orphan resource");
1912
1913 if (pe__unpack_resource(xml_rsc, &rsc, NULL, scheduler) != pcmk_rc_ok) {
1914 return NULL;
1915 }
1916
1917 if (xml_contains_remote_node(xml_rsc)) {
1918 pcmk_node_t *node;
1919
1920 crm_debug("Detected orphaned remote node %s", rsc_id);
1921 node = pe_find_node(scheduler->nodes, rsc_id);
1922 if (node == NULL) {
1923 node = pe_create_node(rsc_id, rsc_id, "remote", NULL, scheduler);
1924 }
1925 link_rsc2remotenode(scheduler, rsc);
1926
1927 if (node) {
1928 crm_trace("Setting node %s as shutting down due to orphaned connection resource", rsc_id);
1929 node->details->shutdown = TRUE;
1930 }
1931 }
1932
1933 if (crm_element_value(rsc_entry, XML_RSC_ATTR_CONTAINER)) {
1934 /* This orphaned rsc needs to be mapped to a container. */
1935 crm_trace("Detected orphaned container filler %s", rsc_id);
1937 }
1939 scheduler->resources = g_list_append(scheduler->resources, rsc);
1940 return rsc;
1941}
1942
1954static pcmk_resource_t *
1955create_anonymous_orphan(pcmk_resource_t *parent, const char *rsc_id,
1957{
1959
1960 // find_rsc() because we might be a cloned group
1961 pcmk_resource_t *orphan = top->fns->find_rsc(top, rsc_id, NULL,
1963
1964 pe_rsc_debug(parent, "Created orphan %s for %s: %s on %s",
1965 top->id, parent->id, rsc_id, pe__node_name(node));
1966 return orphan;
1967}
1968
1983static pcmk_resource_t *
1984find_anonymous_clone(pcmk_scheduler_t *scheduler, const pcmk_node_t *node,
1985 pcmk_resource_t *parent, const char *rsc_id)
1986{
1987 GList *rIter = NULL;
1988 pcmk_resource_t *rsc = NULL;
1989 pcmk_resource_t *inactive_instance = NULL;
1990 gboolean skip_inactive = FALSE;
1991
1992 CRM_ASSERT(parent != NULL);
1993 CRM_ASSERT(pe_rsc_is_clone(parent));
1995
1996 // Check for active (or partially active, for cloned groups) instance
1997 pe_rsc_trace(parent, "Looking for %s on %s in %s",
1998 rsc_id, pe__node_name(node), parent->id);
1999 for (rIter = parent->children; rsc == NULL && rIter; rIter = rIter->next) {
2000 GList *locations = NULL;
2001 pcmk_resource_t *child = rIter->data;
2002
2003 /* Check whether this instance is already known to be active or pending
2004 * anywhere, at this stage of unpacking. Because this function is called
2005 * for a resource before the resource's individual operation history
2006 * entries are unpacked, locations will generally not contain the
2007 * desired node.
2008 *
2009 * However, there are three exceptions:
2010 * (1) when child is a cloned group and we have already unpacked the
2011 * history of another member of the group on the same node;
2012 * (2) when we've already unpacked the history of another numbered
2013 * instance on the same node (which can happen if globally-unique
2014 * was flipped from true to false); and
2015 * (3) when we re-run calculations on the same scheduler data as part of
2016 * a simulation.
2017 */
2018 child->fns->location(child, &locations, 2);
2019 if (locations) {
2020 /* We should never associate the same numbered anonymous clone
2021 * instance with multiple nodes, and clone instances can't migrate,
2022 * so there must be only one location, regardless of history.
2023 */
2024 CRM_LOG_ASSERT(locations->next == NULL);
2025
2026 if (((pcmk_node_t *) locations->data)->details == node->details) {
2027 /* This child instance is active on the requested node, so check
2028 * for a corresponding configured resource. We use find_rsc()
2029 * instead of child because child may be a cloned group, and we
2030 * need the particular member corresponding to rsc_id.
2031 *
2032 * If the history entry is orphaned, rsc will be NULL.
2033 */
2034 rsc = parent->fns->find_rsc(child, rsc_id, NULL,
2036 if (rsc) {
2037 /* If there are multiple instance history entries for an
2038 * anonymous clone in a single node's history (which can
2039 * happen if globally-unique is switched from true to
2040 * false), we want to consider the instances beyond the
2041 * first as orphans, even if there are inactive instance
2042 * numbers available.
2043 */
2044 if (rsc->running_on) {
2045 crm_notice("Active (now-)anonymous clone %s has "
2046 "multiple (orphan) instance histories on %s",
2047 parent->id, pe__node_name(node));
2048 skip_inactive = TRUE;
2049 rsc = NULL;
2050 } else {
2051 pe_rsc_trace(parent, "Resource %s, active", rsc->id);
2052 }
2053 }
2054 }
2055 g_list_free(locations);
2056
2057 } else {
2058 pe_rsc_trace(parent, "Resource %s, skip inactive", child->id);
2059 if (!skip_inactive && !inactive_instance
2060 && !pcmk_is_set(child->flags, pcmk_rsc_blocked)) {
2061 // Remember one inactive instance in case we don't find active
2062 inactive_instance = parent->fns->find_rsc(child, rsc_id, NULL,
2064
2065 /* ... but don't use it if it was already associated with a
2066 * pending action on another node
2067 */
2068 if (inactive_instance && inactive_instance->pending_node
2069 && (inactive_instance->pending_node->details != node->details)) {
2070 inactive_instance = NULL;
2071 }
2072 }
2073 }
2074 }
2075
2076 if ((rsc == NULL) && !skip_inactive && (inactive_instance != NULL)) {
2077 pe_rsc_trace(parent, "Resource %s, empty slot", inactive_instance->id);
2078 rsc = inactive_instance;
2079 }
2080
2081 /* If the resource has "requires" set to "quorum" or "nothing", and we don't
2082 * have a clone instance for every node, we don't want to consume a valid
2083 * instance number for unclean nodes. Such instances may appear to be active
2084 * according to the history, but should be considered inactive, so we can
2085 * start an instance elsewhere. Treat such instances as orphans.
2086 *
2087 * An exception is instances running on guest nodes -- since guest node
2088 * "fencing" is actually just a resource stop, requires shouldn't apply.
2089 *
2090 * @TODO Ideally, we'd use an inactive instance number if it is not needed
2091 * for any clean instances. However, we don't know that at this point.
2092 */
2093 if ((rsc != NULL) && !pcmk_is_set(rsc->flags, pcmk_rsc_needs_fencing)
2094 && (!node->details->online || node->details->unclean)
2095 && !pe__is_guest_node(node)
2097
2098 rsc = NULL;
2099 }
2100
2101 if (rsc == NULL) {
2102 rsc = create_anonymous_orphan(parent, rsc_id, node, scheduler);
2103 pe_rsc_trace(parent, "Resource %s, orphan", rsc->id);
2104 }
2105 return rsc;
2106}
2107
2108static pcmk_resource_t *
2109unpack_find_resource(pcmk_scheduler_t *scheduler, const pcmk_node_t *node,
2110 const char *rsc_id)
2111{
2112 pcmk_resource_t *rsc = NULL;
2113 pcmk_resource_t *parent = NULL;
2114
2115 crm_trace("looking for %s", rsc_id);
2116 rsc = pe_find_resource(scheduler->resources, rsc_id);
2117
2118 if (rsc == NULL) {
2119 /* If we didn't find the resource by its name in the operation history,
2120 * check it again as a clone instance. Even when clone-max=0, we create
2121 * a single :0 orphan to match against here.
2122 */
2123 char *clone0_id = clone_zero(rsc_id);
2125 clone0_id);
2126
2127 if (clone0 && !pcmk_is_set(clone0->flags, pcmk_rsc_unique)) {
2128 rsc = clone0;
2129 parent = uber_parent(clone0);
2130 crm_trace("%s found as %s (%s)", rsc_id, clone0_id, parent->id);
2131 } else {
2132 crm_trace("%s is not known as %s either (orphan)",
2133 rsc_id, clone0_id);
2134 }
2135 free(clone0_id);
2136
2137 } else if (rsc->variant > pcmk_rsc_variant_primitive) {
2138 crm_trace("Resource history for %s is orphaned because it is no longer primitive",
2139 rsc_id);
2140 return NULL;
2141
2142 } else {
2143 parent = uber_parent(rsc);
2144 }
2145
2146 if (pe_rsc_is_anon_clone(parent)) {
2147
2148 if (pe_rsc_is_bundled(parent)) {
2149 rsc = pe__find_bundle_replica(parent->parent, node);
2150 } else {
2151 char *base = clone_strip(rsc_id);
2152
2153 rsc = find_anonymous_clone(scheduler, node, parent, base);
2154 free(base);
2155 CRM_ASSERT(rsc != NULL);
2156 }
2157 }
2158
2159 if (rsc && !pcmk__str_eq(rsc_id, rsc->id, pcmk__str_casei)
2160 && !pcmk__str_eq(rsc_id, rsc->clone_name, pcmk__str_casei)) {
2161
2162 pcmk__str_update(&rsc->clone_name, rsc_id);
2163 pe_rsc_debug(rsc, "Internally renamed %s on %s to %s%s",
2164 rsc_id, pe__node_name(node), rsc->id,
2165 (pcmk_is_set(rsc->flags, pcmk_rsc_removed)? " (ORPHAN)" : ""));
2166 }
2167 return rsc;
2168}
2169
2170static pcmk_resource_t *
2171process_orphan_resource(const xmlNode *rsc_entry, const pcmk_node_t *node,
2173{
2174 pcmk_resource_t *rsc = NULL;
2175 const char *rsc_id = crm_element_value(rsc_entry, XML_ATTR_ID);
2176
2177 crm_debug("Detected orphan resource %s on %s", rsc_id, pe__node_name(node));
2178 rsc = create_fake_resource(rsc_id, rsc_entry, scheduler);
2179 if (rsc == NULL) {
2180 return NULL;
2181 }
2182
2185
2186 } else {
2187 CRM_CHECK(rsc != NULL, return NULL);
2188 pe_rsc_trace(rsc, "Added orphan %s", rsc->id);
2189 resource_location(rsc, NULL, -INFINITY, "__orphan_do_not_run__",
2190 scheduler);
2191 }
2192 return rsc;
2193}
2194
2195static void
2196process_rsc_state(pcmk_resource_t *rsc, pcmk_node_t *node,
2197 enum action_fail_response on_fail)
2198{
2199 pcmk_node_t *tmpnode = NULL;
2200 char *reason = NULL;
2201 enum action_fail_response save_on_fail = pcmk_on_fail_ignore;
2202
2203 CRM_ASSERT(rsc);
2204 pe_rsc_trace(rsc, "Resource %s is %s on %s: on_fail=%s",
2205 rsc->id, role2text(rsc->role), pe__node_name(node),
2206 fail2text(on_fail));
2207
2208 /* process current state */
2209 if (rsc->role != pcmk_role_unknown) {
2210 pcmk_resource_t *iter = rsc;
2211
2212 while (iter) {
2213 if (g_hash_table_lookup(iter->known_on, node->details->id) == NULL) {
2214 pcmk_node_t *n = pe__copy_node(node);
2215
2216 pe_rsc_trace(rsc, "%s%s%s known on %s",
2217 rsc->id,
2218 ((rsc->clone_name == NULL)? "" : " also known as "),
2219 ((rsc->clone_name == NULL)? "" : rsc->clone_name),
2220 pe__node_name(n));
2221 g_hash_table_insert(iter->known_on, (gpointer) n->details->id, n);
2222 }
2223 if (pcmk_is_set(iter->flags, pcmk_rsc_unique)) {
2224 break;
2225 }
2226 iter = iter->parent;
2227 }
2228 }
2229
2230 /* If a managed resource is believed to be running, but node is down ... */
2231 if ((rsc->role > pcmk_role_stopped)
2232 && node->details->online == FALSE
2233 && node->details->maintenance == FALSE
2235
2236 gboolean should_fence = FALSE;
2237
2238 /* If this is a guest node, fence it (regardless of whether fencing is
2239 * enabled, because guest node fencing is done by recovery of the
2240 * container resource rather than by the fencer). Mark the resource
2241 * we're processing as failed. When the guest comes back up, its
2242 * operation history in the CIB will be cleared, freeing the affected
2243 * resource to run again once we are sure we know its state.
2244 */
2245 if (pe__is_guest_node(node)) {
2248 should_fence = TRUE;
2249
2250 } else if (pcmk_is_set(rsc->cluster->flags,
2252 if (pe__is_remote_node(node) && node->details->remote_rsc
2253 && !pcmk_is_set(node->details->remote_rsc->flags,
2254 pcmk_rsc_failed)) {
2255
2256 /* Setting unseen means that fencing of the remote node will
2257 * occur only if the connection resource is not going to start
2258 * somewhere. This allows connection resources on a failed
2259 * cluster node to move to another node without requiring the
2260 * remote nodes to be fenced as well.
2261 */
2262 node->details->unseen = TRUE;
2263 reason = crm_strdup_printf("%s is active there (fencing will be"
2264 " revoked if remote connection can "
2265 "be re-established elsewhere)",
2266 rsc->id);
2267 }
2268 should_fence = TRUE;
2269 }
2270
2271 if (should_fence) {
2272 if (reason == NULL) {
2273 reason = crm_strdup_printf("%s is thought to be active there", rsc->id);
2274 }
2275 pe_fence_node(rsc->cluster, node, reason, FALSE);
2276 }
2277 free(reason);
2278 }
2279
2280 /* In order to calculate priority_fencing_delay correctly, save the failure information and pass it to native_add_running(). */
2281 save_on_fail = on_fail;
2282
2283 if (node->details->unclean) {
2284 /* No extra processing needed
2285 * Also allows resources to be started again after a node is shot
2286 */
2287 on_fail = pcmk_on_fail_ignore;
2288 }
2289
2290 switch (on_fail) {
2292 /* nothing to do */
2293 break;
2294
2297 demote_action(rsc, node, FALSE);
2298 break;
2299
2301 /* treat it as if it is still running
2302 * but also mark the node as unclean
2303 */
2304 reason = crm_strdup_printf("%s failed there", rsc->id);
2305 pe_fence_node(rsc->cluster, node, reason, FALSE);
2306 free(reason);
2307 break;
2308
2310 node->details->standby = TRUE;
2311 node->details->standby_onfail = TRUE;
2312 break;
2313
2314 case pcmk_on_fail_block:
2315 /* is_managed == FALSE will prevent any
2316 * actions being sent for the resource
2317 */
2320 break;
2321
2322 case pcmk_on_fail_ban:
2323 /* make sure it comes up somewhere else
2324 * or not at all
2325 */
2326 resource_location(rsc, node, -INFINITY, "__action_migration_auto__",
2327 rsc->cluster);
2328 break;
2329
2330 case pcmk_on_fail_stop:
2331 pe__set_next_role(rsc, pcmk_role_stopped, "on-fail=stop");
2332 break;
2333
2335 if ((rsc->role != pcmk_role_stopped)
2336 && (rsc->role != pcmk_role_unknown)) {
2339 stop_action(rsc, node, FALSE);
2340 }
2341 break;
2342
2346 if (rsc->container && pe_rsc_is_bundled(rsc)) {
2347 /* A bundle's remote connection can run on a different node than
2348 * the bundle's container. We don't necessarily know where the
2349 * container is running yet, so remember it and add a stop
2350 * action for it later.
2351 */
2352 rsc->cluster->stop_needed =
2353 g_list_prepend(rsc->cluster->stop_needed, rsc->container);
2354 } else if (rsc->container) {
2355 stop_action(rsc->container, node, FALSE);
2356 } else if ((rsc->role != pcmk_role_stopped)
2357 && (rsc->role != pcmk_role_unknown)) {
2358 stop_action(rsc, node, FALSE);
2359 }
2360 break;
2361
2366 tmpnode = NULL;
2367 if (rsc->is_remote_node) {
2368 tmpnode = pe_find_node(rsc->cluster->nodes, rsc->id);
2369 }
2370 if (tmpnode &&
2371 pe__is_remote_node(tmpnode) &&
2372 tmpnode->details->remote_was_fenced == 0) {
2373
2374 /* The remote connection resource failed in a way that
2375 * should result in fencing the remote node.
2376 */
2377 pe_fence_node(rsc->cluster, tmpnode,
2378 "remote connection is unrecoverable", FALSE);
2379 }
2380 }
2381
2382 /* require the stop action regardless if fencing is occurring or not. */
2383 if (rsc->role > pcmk_role_stopped) {
2384 stop_action(rsc, node, FALSE);
2385 }
2386
2387 /* if reconnect delay is in use, prevent the connection from exiting the
2388 * "STOPPED" role until the failure is cleared by the delay timeout. */
2389 if (rsc->remote_reconnect_ms) {
2390 pe__set_next_role(rsc, pcmk_role_stopped, "remote reset");
2391 }
2392 break;
2393 }
2394
2395 /* ensure a remote-node connection failure forces an unclean remote-node
2396 * to be fenced. By setting unseen = FALSE, the remote-node failure will
2397 * result in a fencing operation regardless if we're going to attempt to
2398 * reconnect to the remote-node in this transition or not. */
2399 if (pcmk_is_set(rsc->flags, pcmk_rsc_failed) && rsc->is_remote_node) {
2400 tmpnode = pe_find_node(rsc->cluster->nodes, rsc->id);
2401 if (tmpnode && tmpnode->details->unclean) {
2402 tmpnode->details->unseen = FALSE;
2403 }
2404 }
2405
2406 if ((rsc->role != pcmk_role_stopped)
2407 && (rsc->role != pcmk_role_unknown)) {
2408 if (pcmk_is_set(rsc->flags, pcmk_rsc_removed)) {
2409 if (pcmk_is_set(rsc->flags, pcmk_rsc_managed)) {
2410 pcmk__config_warn("Detected active orphan %s running on %s",
2411 rsc->id, pe__node_name(node));
2412 } else {
2413 pcmk__config_warn("Resource '%s' must be stopped manually on "
2414 "%s because cluster is configured not to "
2415 "stop active orphans",
2416 rsc->id, pe__node_name(node));
2417 }
2418 }
2419
2420 native_add_running(rsc, node, rsc->cluster,
2421 (save_on_fail != pcmk_on_fail_ignore));
2422 switch (on_fail) {
2424 break;
2426 case pcmk_on_fail_block:
2428 break;
2429 default:
2432 break;
2433 }
2434
2435 } else if (rsc->clone_name && strchr(rsc->clone_name, ':') != NULL) {
2436 /* Only do this for older status sections that included instance numbers
2437 * Otherwise stopped instances will appear as orphans
2438 */
2439 pe_rsc_trace(rsc, "Resetting clone_name %s for %s (stopped)", rsc->clone_name, rsc->id);
2440 free(rsc->clone_name);
2441 rsc->clone_name = NULL;
2442
2443 } else {
2444 GList *possible_matches = pe__resource_actions(rsc, node,
2445 PCMK_ACTION_STOP, FALSE);
2446 GList *gIter = possible_matches;
2447
2448 for (; gIter != NULL; gIter = gIter->next) {
2449 pcmk_action_t *stop = (pcmk_action_t *) gIter->data;
2450
2452 }
2453
2454 g_list_free(possible_matches);
2455 }
2456
2457 /* A successful stop after migrate_to on the migration source doesn't make
2458 * the partially migrated resource stopped on the migration target.
2459 */
2460 if ((rsc->role == pcmk_role_stopped)
2462 && rsc->partial_migration_source->details == node->details
2464 && rsc->running_on) {
2465
2466 rsc->role = pcmk_role_started;
2467 }
2468}
2469
2470/* create active recurring operations as optional */
2471static void
2472process_recurring(pcmk_node_t *node, pcmk_resource_t *rsc,
2473 int start_index, int stop_index,
2474 GList *sorted_op_list, pcmk_scheduler_t *scheduler)
2475{
2476 int counter = -1;
2477 const char *task = NULL;
2478 const char *status = NULL;
2479 GList *gIter = sorted_op_list;
2480
2481 CRM_ASSERT(rsc);
2482 pe_rsc_trace(rsc, "%s: Start index %d, stop index = %d", rsc->id, start_index, stop_index);
2483
2484 for (; gIter != NULL; gIter = gIter->next) {
2485 xmlNode *rsc_op = (xmlNode *) gIter->data;
2486
2487 guint interval_ms = 0;
2488 char *key = NULL;
2489 const char *id = ID(rsc_op);
2490
2491 counter++;
2492
2493 if (node->details->online == FALSE) {
2494 pe_rsc_trace(rsc, "Skipping %s on %s: node is offline",
2495 rsc->id, pe__node_name(node));
2496 break;
2497
2498 /* Need to check if there's a monitor for role="Stopped" */
2499 } else if (start_index < stop_index && counter <= stop_index) {
2500 pe_rsc_trace(rsc, "Skipping %s on %s: resource is not active",
2501 id, pe__node_name(node));
2502 continue;
2503
2504 } else if (counter < start_index) {
2505 pe_rsc_trace(rsc, "Skipping %s on %s: old %d",
2506 id, pe__node_name(node), counter);
2507 continue;
2508 }
2509
2510 crm_element_value_ms(rsc_op, XML_LRM_ATTR_INTERVAL_MS, &interval_ms);
2511 if (interval_ms == 0) {
2512 pe_rsc_trace(rsc, "Skipping %s on %s: non-recurring",
2513 id, pe__node_name(node));
2514 continue;
2515 }
2516
2517 status = crm_element_value(rsc_op, XML_LRM_ATTR_OPSTATUS);
2518 if (pcmk__str_eq(status, "-1", pcmk__str_casei)) {
2519 pe_rsc_trace(rsc, "Skipping %s on %s: status",
2520 id, pe__node_name(node));
2521 continue;
2522 }
2523 task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK);
2524 /* create the action */
2525 key = pcmk__op_key(rsc->id, task, interval_ms);
2526 pe_rsc_trace(rsc, "Creating %s on %s", key, pe__node_name(node));
2527 custom_action(rsc, key, task, node, TRUE, scheduler);
2528 }
2529}
2530
2531void
2532calculate_active_ops(const GList *sorted_op_list, int *start_index,
2533 int *stop_index)
2534{
2535 int counter = -1;
2536 int implied_monitor_start = -1;
2537 int implied_clone_start = -1;
2538 const char *task = NULL;
2539 const char *status = NULL;
2540
2541 *stop_index = -1;
2542 *start_index = -1;
2543
2544 for (const GList *iter = sorted_op_list; iter != NULL; iter = iter->next) {
2545 const xmlNode *rsc_op = (const xmlNode *) iter->data;
2546
2547 counter++;
2548
2549 task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK);
2550 status = crm_element_value(rsc_op, XML_LRM_ATTR_OPSTATUS);
2551
2552 if (pcmk__str_eq(task, PCMK_ACTION_STOP, pcmk__str_casei)
2553 && pcmk__str_eq(status, "0", pcmk__str_casei)) {
2554 *stop_index = counter;
2555
2556 } else if (pcmk__strcase_any_of(task, PCMK_ACTION_START,
2557 PCMK_ACTION_MIGRATE_FROM, NULL)) {
2558 *start_index = counter;
2559
2560 } else if ((implied_monitor_start <= *stop_index)
2561 && pcmk__str_eq(task, PCMK_ACTION_MONITOR,
2562 pcmk__str_casei)) {
2563 const char *rc = crm_element_value(rsc_op, XML_LRM_ATTR_RC);
2564
2565 if (pcmk__strcase_any_of(rc, "0", "8", NULL)) {
2566 implied_monitor_start = counter;
2567 }
2569 PCMK_ACTION_DEMOTE, NULL)) {
2570 implied_clone_start = counter;
2571 }
2572 }
2573
2574 if (*start_index == -1) {
2575 if (implied_clone_start != -1) {
2576 *start_index = implied_clone_start;
2577 } else if (implied_monitor_start != -1) {
2578 *start_index = implied_monitor_start;
2579 }
2580 }
2581}
2582
2583// If resource history entry has shutdown lock, remember lock node and time
2584static void
2585unpack_shutdown_lock(const xmlNode *rsc_entry, pcmk_resource_t *rsc,
2587{
2588 time_t lock_time = 0; // When lock started (i.e. node shutdown time)
2589
2591 &lock_time) == pcmk_ok) && (lock_time != 0)) {
2592
2593 if ((scheduler->shutdown_lock > 0)
2595 > (lock_time + scheduler->shutdown_lock))) {
2596 pe_rsc_info(rsc, "Shutdown lock for %s on %s expired",
2597 rsc->id, pe__node_name(node));
2598 pe__clear_resource_history(rsc, node);
2599 } else {
2600 /* @COMPAT I don't like breaking const signatures, but
2601 * rsc->lock_node should really be const -- we just can't change it
2602 * until the next API compatibility break.
2603 */
2604 rsc->lock_node = (pcmk_node_t *) node;
2605 rsc->lock_time = lock_time;
2606 }
2607 }
2608}
2609
2620static pcmk_resource_t *
2621unpack_lrm_resource(pcmk_node_t *node, const xmlNode *lrm_resource,
2623{
2624 GList *gIter = NULL;
2625 int stop_index = -1;
2626 int start_index = -1;
2627 enum rsc_role_e req_role = pcmk_role_unknown;
2628
2629 const char *rsc_id = ID(lrm_resource);
2630
2631 pcmk_resource_t *rsc = NULL;
2632 GList *op_list = NULL;
2633 GList *sorted_op_list = NULL;
2634
2635 xmlNode *rsc_op = NULL;
2636 xmlNode *last_failure = NULL;
2637
2639 enum rsc_role_e saved_role = pcmk_role_unknown;
2640
2641 if (rsc_id == NULL) {
2642 crm_warn("Ignoring malformed " XML_LRM_TAG_RESOURCE
2643 " entry without id");
2644 return NULL;
2645 }
2646 crm_trace("Unpacking " XML_LRM_TAG_RESOURCE " for %s on %s",
2647 rsc_id, pe__node_name(node));
2648
2649 // Build a list of individual lrm_rsc_op entries, so we can sort them
2650 for (rsc_op = first_named_child(lrm_resource, XML_LRM_TAG_RSC_OP);
2651 rsc_op != NULL; rsc_op = crm_next_same_xml(rsc_op)) {
2652
2653 op_list = g_list_prepend(op_list, rsc_op);
2654 }
2655
2657 if (op_list == NULL) {
2658 // If there are no operations, there is nothing to do
2659 return NULL;
2660 }
2661 }
2662
2663 /* find the resource */
2664 rsc = unpack_find_resource(scheduler, node, rsc_id);
2665 if (rsc == NULL) {
2666 if (op_list == NULL) {
2667 // If there are no operations, there is nothing to do
2668 return NULL;
2669 } else {
2670 rsc = process_orphan_resource(lrm_resource, node, scheduler);
2671 }
2672 }
2673 CRM_ASSERT(rsc != NULL);
2674
2675 // Check whether the resource is "shutdown-locked" to this node
2677 unpack_shutdown_lock(lrm_resource, rsc, node, scheduler);
2678 }
2679
2680 /* process operations */
2681 saved_role = rsc->role;
2682 rsc->role = pcmk_role_unknown;
2683 sorted_op_list = g_list_sort(op_list, sort_op_by_callid);
2684
2685 for (gIter = sorted_op_list; gIter != NULL; gIter = gIter->next) {
2686 xmlNode *rsc_op = (xmlNode *) gIter->data;
2687
2688 unpack_rsc_op(rsc, node, rsc_op, &last_failure, &on_fail);
2689 }
2690
2691 /* create active recurring operations as optional */
2692 calculate_active_ops(sorted_op_list, &start_index, &stop_index);
2693 process_recurring(node, rsc, start_index, stop_index, sorted_op_list,
2694 scheduler);
2695
2696 /* no need to free the contents */
2697 g_list_free(sorted_op_list);
2698
2699 process_rsc_state(rsc, node, on_fail);
2700
2701 if (get_target_role(rsc, &req_role)) {
2702 if ((rsc->next_role == pcmk_role_unknown)
2703 || (req_role < rsc->next_role)) {
2704
2706
2707 } else if (req_role > rsc->next_role) {
2708 pe_rsc_info(rsc, "%s: Not overwriting calculated next role %s"
2709 " with requested next role %s",
2710 rsc->id, role2text(rsc->next_role), role2text(req_role));
2711 }
2712 }
2713
2714 if (saved_role > rsc->role) {
2715 rsc->role = saved_role;
2716 }
2717
2718 return rsc;
2719}
2720
2721static void
2722handle_orphaned_container_fillers(const xmlNode *lrm_rsc_list,
2724{
2725 for (const xmlNode *rsc_entry = pcmk__xe_first_child(lrm_rsc_list);
2726 rsc_entry != NULL; rsc_entry = pcmk__xe_next(rsc_entry)) {
2727
2728 pcmk_resource_t *rsc;
2729 pcmk_resource_t *container;
2730 const char *rsc_id;
2731 const char *container_id;
2732
2733 if (!pcmk__str_eq((const char *)rsc_entry->name, XML_LRM_TAG_RESOURCE, pcmk__str_casei)) {
2734 continue;
2735 }
2736
2737 container_id = crm_element_value(rsc_entry, XML_RSC_ATTR_CONTAINER);
2738 rsc_id = crm_element_value(rsc_entry, XML_ATTR_ID);
2739 if (container_id == NULL || rsc_id == NULL) {
2740 continue;
2741 }
2742
2743 container = pe_find_resource(scheduler->resources, container_id);
2744 if (container == NULL) {
2745 continue;
2746 }
2747
2748 rsc = pe_find_resource(scheduler->resources, rsc_id);
2749 if ((rsc == NULL) || (rsc->container != NULL)
2751 continue;
2752 }
2753
2754 pe_rsc_trace(rsc, "Mapped container of orphaned resource %s to %s",
2755 rsc->id, container_id);
2756 rsc->container = container;
2757 container->fillers = g_list_append(container->fillers, rsc);
2758 }
2759}
2760
2769static void
2770unpack_node_lrm(pcmk_node_t *node, const xmlNode *xml,
2772{
2773 bool found_orphaned_container_filler = false;
2774
2775 // Drill down to lrm_resources section
2776 xml = find_xml_node(xml, XML_CIB_TAG_LRM, FALSE);
2777 if (xml == NULL) {
2778 return;
2779 }
2780 xml = find_xml_node(xml, XML_LRM_TAG_RESOURCES, FALSE);
2781 if (xml == NULL) {
2782 return;
2783 }
2784
2785 // Unpack each lrm_resource entry
2786 for (const xmlNode *rsc_entry = first_named_child(xml, XML_LRM_TAG_RESOURCE);
2787 rsc_entry != NULL; rsc_entry = crm_next_same_xml(rsc_entry)) {
2788
2789 pcmk_resource_t *rsc = unpack_lrm_resource(node, rsc_entry, scheduler);
2790
2791 if ((rsc != NULL)
2793 found_orphaned_container_filler = true;
2794 }
2795 }
2796
2797 /* Now that all resource state has been unpacked for this node, map any
2798 * orphaned container fillers to their container resource.
2799 */
2800 if (found_orphaned_container_filler) {
2801 handle_orphaned_container_fillers(xml, scheduler);
2802 }
2803}
2804
2805static void
2806set_active(pcmk_resource_t *rsc)
2807{
2808 const pcmk_resource_t *top = pe__const_top_resource(rsc, false);
2809
2810 if (top && pcmk_is_set(top->flags, pcmk_rsc_promotable)) {
2812 } else {
2813 rsc->role = pcmk_role_started;
2814 }
2815}
2816
2817static void
2818set_node_score(gpointer key, gpointer value, gpointer user_data)
2819{
2820 pcmk_node_t *node = value;
2821 int *score = user_data;
2822
2823 node->weight = *score;
2824}
2825
2826#define XPATH_NODE_STATE "/" XML_TAG_CIB "/" XML_CIB_TAG_STATUS \
2827 "/" XML_CIB_TAG_STATE
2828#define SUB_XPATH_LRM_RESOURCE "/" XML_CIB_TAG_LRM \
2829 "/" XML_LRM_TAG_RESOURCES \
2830 "/" XML_LRM_TAG_RESOURCE
2831#define SUB_XPATH_LRM_RSC_OP "/" XML_LRM_TAG_RSC_OP
2832
2833static xmlNode *
2834find_lrm_op(const char *resource, const char *op, const char *node, const char *source,
2835 int target_rc, pcmk_scheduler_t *scheduler)
2836{
2837 GString *xpath = NULL;
2838 xmlNode *xml = NULL;
2839
2840 CRM_CHECK((resource != NULL) && (op != NULL) && (node != NULL),
2841 return NULL);
2842
2843 xpath = g_string_sized_new(256);
2844 pcmk__g_strcat(xpath,
2845 XPATH_NODE_STATE "[@" XML_ATTR_UNAME "='", node, "']"
2846 SUB_XPATH_LRM_RESOURCE "[@" XML_ATTR_ID "='", resource, "']"
2847 SUB_XPATH_LRM_RSC_OP "[@" XML_LRM_ATTR_TASK "='", op, "'",
2848 NULL);
2849
2850 /* Need to check against transition_magic too? */
2851 if ((source != NULL) && (strcmp(op, PCMK_ACTION_MIGRATE_TO) == 0)) {
2852 pcmk__g_strcat(xpath,
2853 " and @" XML_LRM_ATTR_MIGRATE_TARGET "='", source, "']",
2854 NULL);
2855
2856 } else if ((source != NULL)
2857 && (strcmp(op, PCMK_ACTION_MIGRATE_FROM) == 0)) {
2858 pcmk__g_strcat(xpath,
2859 " and @" XML_LRM_ATTR_MIGRATE_SOURCE "='", source, "']",
2860 NULL);
2861 } else {
2862 g_string_append_c(xpath, ']');
2863 }
2864
2865 xml = get_xpath_object((const char *) xpath->str, scheduler->input,
2866 LOG_DEBUG);
2867 g_string_free(xpath, TRUE);
2868
2869 if (xml && target_rc >= 0) {
2870 int rc = PCMK_OCF_UNKNOWN_ERROR;
2871 int status = PCMK_EXEC_ERROR;
2872
2875 if ((rc != target_rc) || (status != PCMK_EXEC_DONE)) {
2876 return NULL;
2877 }
2878 }
2879 return xml;
2880}
2881
2882static xmlNode *
2883find_lrm_resource(const char *rsc_id, const char *node_name,
2885{
2886 GString *xpath = NULL;
2887 xmlNode *xml = NULL;
2888
2889 CRM_CHECK((rsc_id != NULL) && (node_name != NULL), return NULL);
2890
2891 xpath = g_string_sized_new(256);
2892 pcmk__g_strcat(xpath,
2893 XPATH_NODE_STATE "[@" XML_ATTR_UNAME "='", node_name, "']"
2894 SUB_XPATH_LRM_RESOURCE "[@" XML_ATTR_ID "='", rsc_id, "']",
2895 NULL);
2896
2897 xml = get_xpath_object((const char *) xpath->str, scheduler->input,
2898 LOG_DEBUG);
2899
2900 g_string_free(xpath, TRUE);
2901 return xml;
2902}
2903
2913static bool
2914unknown_on_node(pcmk_resource_t *rsc, const char *node_name)
2915{
2916 bool result = false;
2917 xmlXPathObjectPtr search;
2918 GString *xpath = g_string_sized_new(256);
2919
2920 pcmk__g_strcat(xpath,
2921 XPATH_NODE_STATE "[@" XML_ATTR_UNAME "='", node_name, "']"
2922 SUB_XPATH_LRM_RESOURCE "[@" XML_ATTR_ID "='", rsc->id, "']"
2923 SUB_XPATH_LRM_RSC_OP "[@" XML_LRM_ATTR_RC "!='193']",
2924 NULL);
2925 search = xpath_search(rsc->cluster->input, (const char *) xpath->str);
2926 result = (numXpathResults(search) == 0);
2927 freeXpathObject(search);
2928 g_string_free(xpath, TRUE);
2929 return result;
2930}
2931
2944static bool
2945monitor_not_running_after(const char *rsc_id, const char *node_name,
2946 const xmlNode *xml_op, bool same_node,
2948{
2949 /* Any probe/monitor operation on the node indicating it was not running
2950 * there
2951 */
2952 xmlNode *monitor = find_lrm_op(rsc_id, PCMK_ACTION_MONITOR, node_name,
2954
2955 return (monitor && pe__is_newer_op(monitor, xml_op, same_node) > 0);
2956}
2957
2970static bool
2971non_monitor_after(const char *rsc_id, const char *node_name,
2972 const xmlNode *xml_op, bool same_node,
2974{
2975 xmlNode *lrm_resource = NULL;
2976
2977 lrm_resource = find_lrm_resource(rsc_id, node_name, scheduler);
2978 if (lrm_resource == NULL) {
2979 return false;
2980 }
2981
2982 for (xmlNode *op = first_named_child(lrm_resource, XML_LRM_TAG_RSC_OP);
2983 op != NULL; op = crm_next_same_xml(op)) {
2984 const char * task = NULL;
2985
2986 if (op == xml_op) {
2987 continue;
2988 }
2989
2991
2994 NULL)
2995 && pe__is_newer_op(op, xml_op, same_node) > 0) {
2996 return true;
2997 }
2998 }
2999
3000 return false;
3001}
3002
3015static bool
3016newer_state_after_migrate(const char *rsc_id, const char *node_name,
3017 const xmlNode *migrate_to,
3018 const xmlNode *migrate_from,
3020{
3021 const xmlNode *xml_op = migrate_to;
3022 const char *source = NULL;
3023 const char *target = NULL;
3024 bool same_node = false;
3025
3026 if (migrate_from) {
3027 xml_op = migrate_from;
3028 }
3029
3032
3033 /* It's preferred to compare to the migrate event on the same node if
3034 * existing, since call ids are more reliable.
3035 */
3036 if (pcmk__str_eq(node_name, target, pcmk__str_casei)) {
3037 if (migrate_from) {
3038 xml_op = migrate_from;
3039 same_node = true;
3040
3041 } else {
3042 xml_op = migrate_to;
3043 }
3044
3045 } else if (pcmk__str_eq(node_name, source, pcmk__str_casei)) {
3046 if (migrate_to) {
3047 xml_op = migrate_to;
3048 same_node = true;
3049
3050 } else {
3051 xml_op = migrate_from;
3052 }
3053 }
3054
3055 /* If there's any newer non-monitor operation on the node, or any newer
3056 * probe/monitor operation on the node indicating it was not running there,
3057 * the migration events potentially no longer matter for the node.
3058 */
3059 return non_monitor_after(rsc_id, node_name, xml_op, same_node, scheduler)
3060 || monitor_not_running_after(rsc_id, node_name, xml_op, same_node,
3061 scheduler);
3062}
3063
3076static int
3077get_migration_node_names(const xmlNode *entry, const pcmk_node_t *source_node,
3078 const pcmk_node_t *target_node,
3079 const char **source_name, const char **target_name)
3080{
3081 *source_name = crm_element_value(entry, XML_LRM_ATTR_MIGRATE_SOURCE);
3082 *target_name = crm_element_value(entry, XML_LRM_ATTR_MIGRATE_TARGET);
3083 if ((*source_name == NULL) || (*target_name == NULL)) {
3084 crm_err("Ignoring resource history entry %s without "
3086 ID(entry));
3087 return pcmk_rc_unpack_error;
3088 }
3089
3090 if ((source_node != NULL)
3091 && !pcmk__str_eq(*source_name, source_node->details->uname,
3093 crm_err("Ignoring resource history entry %s because "
3094 XML_LRM_ATTR_MIGRATE_SOURCE "='%s' does not match %s",
3095 ID(entry), *source_name, pe__node_name(source_node));
3096 return pcmk_rc_unpack_error;
3097 }
3098
3099 if ((target_node != NULL)
3100 && !pcmk__str_eq(*target_name, target_node->details->uname,
3102 crm_err("Ignoring resource history entry %s because "
3103 XML_LRM_ATTR_MIGRATE_TARGET "='%s' does not match %s",
3104 ID(entry), *target_name, pe__node_name(target_node));
3105 return pcmk_rc_unpack_error;
3106 }
3107
3108 return pcmk_rc_ok;
3109}
3110
3111/*
3112 * \internal
3113 * \brief Add a migration source to a resource's list of dangling migrations
3114 *
3115 * If the migrate_to and migrate_from actions in a live migration both
3116 * succeeded, but there is no stop on the source, the migration is considered
3117 * "dangling." Add the source to the resource's dangling migration list, which
3118 * will be used to schedule a stop on the source without affecting the target.
3119 *
3120 * \param[in,out] rsc Resource involved in migration
3121 * \param[in] node Migration source
3122 */
3123static void
3124add_dangling_migration(pcmk_resource_t *rsc, const pcmk_node_t *node)
3125{
3126 pe_rsc_trace(rsc, "Dangling migration of %s requires stop on %s",
3127 rsc->id, pe__node_name(node));
3128 rsc->role = pcmk_role_stopped;
3129 rsc->dangling_migrations = g_list_prepend(rsc->dangling_migrations,
3130 (gpointer) node);
3131}
3132
3139static void
3140unpack_migrate_to_success(struct action_history *history)
3141{
3142 /* A complete migration sequence is:
3143 * 1. migrate_to on source node (which succeeded if we get to this function)
3144 * 2. migrate_from on target node
3145 * 3. stop on source node
3146 *
3147 * If no migrate_from has happened, the migration is considered to be
3148 * "partial". If the migrate_from succeeded but no stop has happened, the
3149 * migration is considered to be "dangling".
3150 *
3151 * If a successful migrate_to and stop have happened on the source node, we
3152 * still need to check for a partial migration, due to scenarios (easier to
3153 * produce with batch-limit=1) like:
3154 *
3155 * - A resource is migrating from node1 to node2, and a migrate_to is
3156 * initiated for it on node1.
3157 *
3158 * - node2 goes into standby mode while the migrate_to is pending, which
3159 * aborts the transition.
3160 *
3161 * - Upon completion of the migrate_to, a new transition schedules a stop
3162 * on both nodes and a start on node1.
3163 *
3164 * - If the new transition is aborted for any reason while the resource is
3165 * stopping on node1, the transition after that stop completes will see
3166 * the migrate_to and stop on the source, but it's still a partial
3167 * migration, and the resource must be stopped on node2 because it is
3168 * potentially active there due to the migrate_to.
3169 *
3170 * We also need to take into account that either node's history may be
3171 * cleared at any point in the migration process.
3172 */
3173 int from_rc = PCMK_OCF_OK;
3174 int from_status = PCMK_EXEC_PENDING;
3175 pcmk_node_t *target_node = NULL;
3176 xmlNode *migrate_from = NULL;
3177 const char *source = NULL;
3178 const char *target = NULL;
3179 bool source_newer_op = false;
3180 bool target_newer_state = false;
3181 bool active_on_target = false;
3182
3183 // Get source and target node names from XML
3184 if (get_migration_node_names(history->xml, history->node, NULL, &source,
3185 &target) != pcmk_rc_ok) {
3186 return;
3187 }
3188
3189 // Check for newer state on the source
3190 source_newer_op = non_monitor_after(history->rsc->id, source, history->xml,
3191 true, history->rsc->cluster);
3192
3193 // Check for a migrate_from action from this source on the target
3194 migrate_from = find_lrm_op(history->rsc->id, PCMK_ACTION_MIGRATE_FROM,
3195 target, source, -1, history->rsc->cluster);
3196 if (migrate_from != NULL) {
3197 if (source_newer_op) {
3198 /* There's a newer non-monitor operation on the source and a
3199 * migrate_from on the target, so this migrate_to is irrelevant to
3200 * the resource's state.
3201 */
3202 return;
3203 }
3204 crm_element_value_int(migrate_from, XML_LRM_ATTR_RC, &from_rc);
3206 &from_status);
3207 }
3208
3209 /* If the resource has newer state on both the source and target after the
3210 * migration events, this migrate_to is irrelevant to the resource's state.
3211 */
3212 target_newer_state = newer_state_after_migrate(history->rsc->id, target,
3213 history->xml, migrate_from,
3214 history->rsc->cluster);
3215 if (source_newer_op && target_newer_state) {
3216 return;
3217 }
3218
3219 /* Check for dangling migration (migrate_from succeeded but stop not done).
3220 * We know there's no stop because we already returned if the target has a
3221 * migrate_from and the source has any newer non-monitor operation.
3222 */
3223 if ((from_rc == PCMK_OCF_OK) && (from_status == PCMK_EXEC_DONE)) {
3224 add_dangling_migration(history->rsc, history->node);
3225 return;
3226 }
3227
3228 /* Without newer state, this migrate_to implies the resource is active.
3229 * (Clones are not allowed to migrate, so role can't be promoted.)
3230 */
3231 history->rsc->role = pcmk_role_started;
3232
3233 target_node = pe_find_node(history->rsc->cluster->nodes, target);
3234 active_on_target = !target_newer_state && (target_node != NULL)
3235 && target_node->details->online;
3236
3237 if (from_status != PCMK_EXEC_PENDING) { // migrate_from failed on target
3238 if (active_on_target) {
3239 native_add_running(history->rsc, target_node, history->rsc->cluster,
3240 TRUE);
3241 } else {
3242 // Mark resource as failed, require recovery, and prevent migration
3243 pe__set_resource_flags(history->rsc,
3246 }
3247 return;
3248 }
3249
3250 // The migrate_from is pending, complete but erased, or to be scheduled
3251
3252 /* If there is no history at all for the resource on an online target, then
3253 * it was likely cleaned. Just return, and we'll schedule a probe. Once we
3254 * have the probe result, it will be reflected in target_newer_state.
3255 */
3256 if ((target_node != NULL) && target_node->details->online
3257 && unknown_on_node(history->rsc, target)) {
3258 return;
3259 }
3260
3261 if (active_on_target) {
3262 pcmk_node_t *source_node = pe_find_node(history->rsc->cluster->nodes,
3263 source);
3264
3265 native_add_running(history->rsc, target_node, history->rsc->cluster,
3266 FALSE);
3267 if ((source_node != NULL) && source_node->details->online) {
3268 /* This is a partial migration: the migrate_to completed
3269 * successfully on the source, but the migrate_from has not
3270 * completed. Remember the source and target; if the newly
3271 * chosen target remains the same when we schedule actions
3272 * later, we may continue with the migration.
3273 */
3274 history->rsc->partial_migration_target = target_node;
3275 history->rsc->partial_migration_source = source_node;
3276 }
3277
3278 } else if (!source_newer_op) {
3279 // Mark resource as failed, require recovery, and prevent migration
3280 pe__set_resource_flags(history->rsc,
3283 }
3284}
3285
3292static void
3293unpack_migrate_to_failure(struct action_history *history)
3294{
3295 xmlNode *target_migrate_from = NULL;
3296 const char *source = NULL;
3297 const char *target = NULL;
3298
3299 // Get source and target node names from XML
3300 if (get_migration_node_names(history->xml, history->node, NULL, &source,
3301 &target) != pcmk_rc_ok) {
3302 return;
3303 }
3304
3305 /* If a migration failed, we have to assume the resource is active. Clones
3306 * are not allowed to migrate, so role can't be promoted.
3307 */
3308 history->rsc->role = pcmk_role_started;
3309
3310 // Check for migrate_from on the target
3311 target_migrate_from = find_lrm_op(history->rsc->id,
3313 PCMK_OCF_OK, history->rsc->cluster);
3314
3315 if (/* If the resource state is unknown on the target, it will likely be
3316 * probed there.
3317 * Don't just consider it running there. We will get back here anyway in
3318 * case the probe detects it's running there.
3319 */
3320 !unknown_on_node(history->rsc, target)
3321 /* If the resource has newer state on the target after the migration
3322 * events, this migrate_to no longer matters for the target.
3323 */
3324 && !newer_state_after_migrate(history->rsc->id, target, history->xml,
3325 target_migrate_from,
3326 history->rsc->cluster)) {
3327 /* The resource has no newer state on the target, so assume it's still
3328 * active there.
3329 * (if it is up).
3330 */
3331 pcmk_node_t *target_node = pe_find_node(history->rsc->cluster->nodes,
3332 target);
3333
3334 if (target_node && target_node->details->online) {
3335 native_add_running(history->rsc, target_node, history->rsc->cluster,
3336 FALSE);
3337 }
3338
3339 } else if (!non_monitor_after(history->rsc->id, source, history->xml, true,
3340 history->rsc->cluster)) {
3341 /* We know the resource has newer state on the target, but this
3342 * migrate_to still matters for the source as long as there's no newer
3343 * non-monitor operation there.
3344 */
3345
3346 // Mark node as having dangling migration so we can force a stop later
3347 history->rsc->dangling_migrations =
3348 g_list_prepend(history->rsc->dangling_migrations,
3349 (gpointer) history->node);
3350 }
3351}
3352
3359static void
3360unpack_migrate_from_failure(struct action_history *history)
3361{
3362 xmlNode *source_migrate_to = NULL;
3363 const char *source = NULL;
3364 const char *target = NULL;
3365
3366 // Get source and target node names from XML
3367 if (get_migration_node_names(history->xml, NULL, history->node, &source,
3368 &target) != pcmk_rc_ok) {
3369 return;
3370 }
3371
3372 /* If a migration failed, we have to assume the resource is active. Clones
3373 * are not allowed to migrate, so role can't be promoted.
3374 */
3375 history->rsc->role = pcmk_role_started;
3376
3377 // Check for a migrate_to on the source
3378 source_migrate_to = find_lrm_op(history->rsc->id, PCMK_ACTION_MIGRATE_TO,
3379 source, target, PCMK_OCF_OK,
3380 history->rsc->cluster);
3381
3382 if (/* If the resource state is unknown on the source, it will likely be
3383 * probed there.
3384 * Don't just consider it running there. We will get back here anyway in
3385 * case the probe detects it's running there.
3386 */
3387 !unknown_on_node(history->rsc, source)
3388 /* If the resource has newer state on the source after the migration
3389 * events, this migrate_from no longer matters for the source.
3390 */
3391 && !newer_state_after_migrate(history->rsc->id, source,
3392 source_migrate_to, history->xml,
3393 history->rsc->cluster)) {
3394 /* The resource has no newer state on the source, so assume it's still
3395 * active there (if it is up).
3396 */
3397 pcmk_node_t *source_node = pe_find_node(history->rsc->cluster->nodes,
3398 source);
3399
3400 if (source_node && source_node->details->online) {
3401 native_add_running(history->rsc, source_node, history->rsc->cluster,
3402 TRUE);
3403 }
3404 }
3405}
3406
3413static void
3414record_failed_op(struct action_history *history)
3415{
3416 if (!(history->node->details->online)) {
3417 return;
3418 }
3419
3420 for (const xmlNode *xIter = history->rsc->cluster->failed->children;
3421 xIter != NULL; xIter = xIter->next) {
3422
3423 const char *key = pe__xe_history_key(xIter);
3424 const char *uname = crm_element_value(xIter, XML_ATTR_UNAME);
3425
3426 if (pcmk__str_eq(history->key, key, pcmk__str_none)
3427 && pcmk__str_eq(uname, history->node->details->uname,
3428 pcmk__str_casei)) {
3429 crm_trace("Skipping duplicate entry %s on %s",
3430 history->key, pe__node_name(history->node));
3431 return;
3432 }
3433 }
3434
3435 crm_trace("Adding entry for %s on %s to failed action list",
3436 history->key, pe__node_name(history->node));
3437 crm_xml_add(history->xml, XML_ATTR_UNAME, history->node->details->uname);
3438 crm_xml_add(history->xml, XML_LRM_ATTR_RSCID, history->rsc->id);
3439 add_node_copy(history->rsc->cluster->failed, history->xml);
3440}
3441
3442static char *
3443last_change_str(const xmlNode *xml_op)
3444{
3445 time_t when;
3446 char *result = NULL;
3447
3449 &when) == pcmk_ok) {
3450 char *when_s = pcmk__epoch2str(&when, 0);
3451 const char *p = strchr(when_s, ' ');
3452
3453 // Skip day of week to make message shorter
3454 if ((p != NULL) && (*(++p) != '\0')) {
3455 result = strdup(p);
3456 CRM_ASSERT(result != NULL);
3457 }
3458 free(when_s);
3459 }
3460
3461 if (result == NULL) {
3462 result = strdup("unknown time");
3463 CRM_ASSERT(result != NULL);
3464 }
3465
3466 return result;
3467}
3468
3481static int
3482cmp_on_fail(enum action_fail_response first, enum action_fail_response second)
3483{
3484 switch (first) {
3486 switch (second) {
3488 return 1;
3490 return 0;
3491 default:
3492 return -1;
3493 }
3494 break;
3495
3497 switch (second) {
3501 return 1;
3503 return 0;
3504 default:
3505 return -1;
3506 }
3507 break;
3508
3510 switch (second) {
3515 return 1;
3517 return 0;
3518 default:
3519 return -1;
3520 }
3521 break;
3522
3523 default:
3524 break;
3525 }
3526 switch (second) {
3528 return (first == pcmk_on_fail_ignore)? -1 : 1;
3529
3531 switch (first) {
3535 return -1;
3536 default:
3537 return 1;
3538 }
3539 break;
3540
3542 switch (first) {
3547 return -1;
3548 default:
3549 return 1;
3550 }
3551 break;
3552
3553 default:
3554 break;
3555 }
3556 return first - second;
3557}
3558
3565static void
3566ban_from_all_nodes(pcmk_resource_t *rsc)
3567{
3568 int score = -INFINITY;
3569 pcmk_resource_t *fail_rsc = rsc;
3570
3571 if (fail_rsc->parent != NULL) {
3572 pcmk_resource_t *parent = uber_parent(fail_rsc);
3573
3574 if (pe_rsc_is_anon_clone(parent)) {
3575 /* For anonymous clones, if an operation with on-fail=stop fails for
3576 * any instance, the entire clone must stop.
3577 */
3578 fail_rsc = parent;
3579 }
3580 }
3581
3582 // Ban the resource from all nodes
3583 crm_notice("%s will not be started under current conditions", fail_rsc->id);
3584 if (fail_rsc->allowed_nodes != NULL) {
3585 g_hash_table_destroy(fail_rsc->allowed_nodes);
3586 }
3588 g_hash_table_foreach(fail_rsc->allowed_nodes, set_node_score, &score);
3589}
3590
3599static void
3600unpack_failure_handling(struct action_history *history,
3601 enum action_fail_response *on_fail,
3602 enum rsc_role_e *fail_role)
3603{
3604 xmlNode *config = pcmk__find_action_config(history->rsc, history->task,
3605 history->interval_ms, true);
3606
3607 GHashTable *meta = pcmk__unpack_action_meta(history->rsc, history->node,
3608 history->task,
3609 history->interval_ms, config);
3610
3611 const char *on_fail_str = g_hash_table_lookup(meta, XML_OP_ATTR_ON_FAIL);
3612
3613 *on_fail = pcmk__parse_on_fail(history->rsc, history->task,
3614 history->interval_ms, on_fail_str);
3615 *fail_role = pcmk__role_after_failure(history->rsc, history->task, *on_fail,
3616 meta);
3617 g_hash_table_destroy(meta);
3618}
3619
3630static void
3631unpack_rsc_op_failure(struct action_history *history,
3632 enum action_fail_response config_on_fail,
3633 enum rsc_role_e fail_role, xmlNode **last_failure,
3634 enum action_fail_response *on_fail)
3635{
3636 bool is_probe = false;
3637 char *last_change_s = NULL;
3638
3639 *last_failure = history->xml;
3640
3641 is_probe = pcmk_xe_is_probe(history->xml);
3642 last_change_s = last_change_str(history->xml);
3643
3644 if (!pcmk_is_set(history->rsc->cluster->flags, pcmk_sched_symmetric_cluster)
3645 && (history->exit_status == PCMK_OCF_NOT_INSTALLED)) {
3646 crm_trace("Unexpected result (%s%s%s) was recorded for "
3647 "%s of %s on %s at %s " CRM_XS " exit-status=%d id=%s",
3648 services_ocf_exitcode_str(history->exit_status),
3649 (pcmk__str_empty(history->exit_reason)? "" : ": "),
3650 pcmk__s(history->exit_reason, ""),
3651 (is_probe? "probe" : history->task), history->rsc->id,
3652 pe__node_name(history->node), last_change_s,
3653 history->exit_status, history->id);
3654 } else {
3655 crm_warn("Unexpected result (%s%s%s) was recorded for "
3656 "%s of %s on %s at %s " CRM_XS " exit-status=%d id=%s",
3657 services_ocf_exitcode_str(history->exit_status),
3658 (pcmk__str_empty(history->exit_reason)? "" : ": "),
3659 pcmk__s(history->exit_reason, ""),
3660 (is_probe? "probe" : history->task), history->rsc->id,
3661 pe__node_name(history->node), last_change_s,
3662 history->exit_status, history->id);
3663
3664 if (is_probe && (history->exit_status != PCMK_OCF_OK)
3665 && (history->exit_status != PCMK_OCF_NOT_RUNNING)
3666 && (history->exit_status != PCMK_OCF_RUNNING_PROMOTED)) {
3667
3668 /* A failed (not just unexpected) probe result could mean the user
3669 * didn't know resources will be probed even where they can't run.
3670 */
3671 crm_notice("If it is not possible for %s to run on %s, see "
3672 "the resource-discovery option for location constraints",
3673 history->rsc->id, pe__node_name(history->node));
3674 }
3675
3676 record_failed_op(history);
3677 }
3678
3679 free(last_change_s);
3680
3681 if (cmp_on_fail(*on_fail, config_on_fail) < 0) {
3682 pe_rsc_trace(history->rsc, "on-fail %s -> %s for %s",
3683 fail2text(*on_fail), fail2text(config_on_fail),
3684 history->key);
3685 *on_fail = config_on_fail;
3686 }
3687
3688 if (strcmp(history->task, PCMK_ACTION_STOP) == 0) {
3689 resource_location(history->rsc, history->node, -INFINITY,
3690 "__stop_fail__", history->rsc->cluster);
3691
3692 } else if (strcmp(history->task, PCMK_ACTION_MIGRATE_TO) == 0) {
3693 unpack_migrate_to_failure(history);
3694
3695 } else if (strcmp(history->task, PCMK_ACTION_MIGRATE_FROM) == 0) {
3696 unpack_migrate_from_failure(history);
3697
3698 } else if (strcmp(history->task, PCMK_ACTION_PROMOTE) == 0) {
3699 history->rsc->role = pcmk_role_promoted;
3700
3701 } else if (strcmp(history->task, PCMK_ACTION_DEMOTE) == 0) {
3702 if (config_on_fail == pcmk_on_fail_block) {
3703 history->rsc->role = pcmk_role_promoted;
3705 "demote with on-fail=block");
3706
3707 } else if (history->exit_status == PCMK_OCF_NOT_RUNNING) {
3708 history->rsc->role = pcmk_role_stopped;
3709
3710 } else {
3711 /* Staying in the promoted role would put the scheduler and
3712 * controller into a loop. Setting the role to unpromoted is not
3713 * dangerous because the resource will be stopped as part of
3714 * recovery, and any promotion will be ordered after that stop.
3715 */
3716 history->rsc->role = pcmk_role_unpromoted;
3717 }
3718 }
3719
3720 if (is_probe && (history->exit_status == PCMK_OCF_NOT_INSTALLED)) {
3721 /* leave stopped */
3722 pe_rsc_trace(history->rsc, "Leaving %s stopped", history->rsc->id);
3723 history->rsc->role = pcmk_role_stopped;
3724
3725 } else if (history->rsc->role < pcmk_role_started) {
3726 pe_rsc_trace(history->rsc, "Setting %s active", history->rsc->id);
3727 set_active(history->rsc);
3728 }
3729
3730 pe_rsc_trace(history->rsc,
3731 "Resource %s: role=%s, unclean=%s, on_fail=%s, fail_role=%s",
3732 history->rsc->id, role2text(history->rsc->role),
3733 pcmk__btoa(history->node->details->unclean),
3734 fail2text(config_on_fail), role2text(fail_role));
3735
3736 if ((fail_role != pcmk_role_started)
3737 && (history->rsc->next_role < fail_role)) {
3738 pe__set_next_role(history->rsc, fail_role, "failure");
3739 }
3740
3741 if (fail_role == pcmk_role_stopped) {
3742 ban_from_all_nodes(history->rsc);
3743 }
3744}
3745
3755static void
3756block_if_unrecoverable(struct action_history *history)
3757{
3758 char *last_change_s = NULL;
3759
3760 if (strcmp(history->task, PCMK_ACTION_STOP) != 0) {
3761 return; // All actions besides stop are always recoverable
3762 }
3763 if (pe_can_fence(history->node->details->data_set, history->node)) {
3764 return; // Failed stops are recoverable via fencing
3765 }
3766
3767 last_change_s = last_change_str(history->xml);
3768 pe_proc_err("No further recovery can be attempted for %s "
3769 "because %s on %s failed (%s%s%s) at %s "
3770 CRM_XS " rc=%d id=%s",
3771 history->rsc->id, history->task, pe__node_name(history->node),
3772 services_ocf_exitcode_str(history->exit_status),
3773 (pcmk__str_empty(history->exit_reason)? "" : ": "),
3774 pcmk__s(history->exit_reason, ""),
3775 last_change_s, history->exit_status, history->id);
3776
3777 free(last_change_s);
3778
3781}
3782
3792static inline void
3793remap_because(struct action_history *history, const char **why, int value,
3794 const char *reason)
3795{
3796 if (history->execution_status != value) {
3797 history->execution_status = value;
3798 *why = reason;
3799 }
3800}
3801
3824static void
3825remap_operation(struct action_history *history,
3826 enum action_fail_response *on_fail, bool expired)
3827{
3828 bool is_probe = false;
3829 int orig_exit_status = history->exit_status;
3830 int orig_exec_status = history->execution_status;
3831 const char *why = NULL;
3832 const char *task = history->task;
3833
3834 // Remap degraded results to their successful counterparts
3835 history->exit_status = pcmk__effective_rc(history->exit_status);
3836 if (history->exit_status != orig_exit_status) {
3837 why = "degraded result";
3838 if (!expired && (!history->node->details->shutdown
3839 || history->node->details->online)) {
3840 record_failed_op(history);
3841 }
3842 }
3843
3844 if (!pe_rsc_is_bundled(history->rsc)
3845 && pcmk_xe_mask_probe_failure(history->xml)
3846 && ((history->execution_status != PCMK_EXEC_DONE)
3847 || (history->exit_status != PCMK_OCF_NOT_RUNNING))) {
3848 history->execution_status = PCMK_EXEC_DONE;
3849 history->exit_status = PCMK_OCF_NOT_RUNNING;
3850 why = "equivalent probe result";
3851 }
3852
3853 /* If the executor reported an execution status of anything but done or
3854 * error, consider that final. But for done or error, we know better whether
3855 * it should be treated as a failure or not, because we know the expected
3856 * result.
3857 */
3858 switch (history->execution_status) {
3859 case PCMK_EXEC_DONE:
3860 case PCMK_EXEC_ERROR:
3861 break;
3862
3863 // These should be treated as node-fatal
3866 remap_because(history, &why, PCMK_EXEC_ERROR_HARD,
3867 "node-fatal error");
3868 goto remap_done;
3869
3870 default:
3871 goto remap_done;
3872 }
3873
3874 is_probe = pcmk_xe_is_probe(history->xml);
3875 if (is_probe) {
3876 task = "probe";
3877 }
3878
3879 if (history->expected_exit_status < 0) {
3880 /* Pre-1.0 Pacemaker versions, and Pacemaker 1.1.6 or earlier with
3881 * Heartbeat 2.0.7 or earlier as the cluster layer, did not include the
3882 * expected exit status in the transition key, which (along with the
3883 * similar case of a corrupted transition key in the CIB) will be
3884 * reported to this function as -1. Pacemaker 2.0+ does not support
3885 * rolling upgrades from those versions or processing of saved CIB files
3886 * from those versions, so we do not need to care much about this case.
3887 */
3888 remap_because(history, &why, PCMK_EXEC_ERROR,
3889 "obsolete history format");
3890 crm_warn("Expected result not found for %s on %s "
3891 "(corrupt or obsolete CIB?)",
3892 history->key, pe__node_name(history->node));
3893
3894 } else if (history->exit_status == history->expected_exit_status) {
3895 remap_because(history, &why, PCMK_EXEC_DONE, "expected result");
3896
3897 } else {
3898 remap_because(history, &why, PCMK_EXEC_ERROR, "unexpected result");
3899 pe_rsc_debug(history->rsc,
3900 "%s on %s: expected %d (%s), got %d (%s%s%s)",
3901 history->key, pe__node_name(history->node),
3902 history->expected_exit_status,
3903 services_ocf_exitcode_str(history->expected_exit_status),
3904 history->exit_status,
3905 services_ocf_exitcode_str(history->exit_status),
3906 (pcmk__str_empty(history->exit_reason)? "" : ": "),
3907 pcmk__s(history->exit_reason, ""));
3908 }
3909
3910 switch (history->exit_status) {
3911 case PCMK_OCF_OK:
3912 if (is_probe
3913 && (history->expected_exit_status == PCMK_OCF_NOT_RUNNING)) {
3914 char *last_change_s = last_change_str(history->xml);
3915
3916 remap_because(history, &why, PCMK_EXEC_DONE, "probe");
3917 pe_rsc_info(history->rsc, "Probe found %s active on %s at %s",
3918 history->rsc->id, pe__node_name(history->node),
3919 last_change_s);
3920 free(last_change_s);
3921 }
3922 break;
3923
3925 if (is_probe
3926 || (history->expected_exit_status == history->exit_status)
3927 || !pcmk_is_set(history->rsc->flags, pcmk_rsc_managed)) {
3928
3929 /* For probes, recurring monitors for the Stopped role, and
3930 * unmanaged resources, "not running" is not considered a
3931 * failure.
3932 */
3933 remap_because(history, &why, PCMK_EXEC_DONE, "exit status");
3934 history->rsc->role = pcmk_role_stopped;
3935 *on_fail = pcmk_on_fail_ignore;
3937 "not running");
3938 }
3939 break;
3940
3942 if (is_probe
3943 && (history->exit_status != history->expected_exit_status)) {
3944 char *last_change_s = last_change_str(history->xml);
3945
3946 remap_because(history, &why, PCMK_EXEC_DONE, "probe");
3947 pe_rsc_info(history->rsc,
3948 "Probe found %s active and promoted on %s at %s",
3949 history->rsc->id, pe__node_name(history->node),
3950 last_change_s);
3951 free(last_change_s);
3952 }
3953 if (!expired
3954 || (history->exit_status == history->expected_exit_status)) {
3955 history->rsc->role = pcmk_role_promoted;
3956 }
3957 break;
3958
3960 if (!expired) {
3961 history->rsc->role = pcmk_role_promoted;
3962 }
3963 remap_because(history, &why, PCMK_EXEC_ERROR, "exit status");
3964 break;
3965
3967 remap_because(history, &why, PCMK_EXEC_ERROR_FATAL, "exit status");
3968 break;
3969
3971 {
3972 guint interval_ms = 0;
3974 &interval_ms);
3975
3976 if (interval_ms == 0) {
3977 if (!expired) {
3978 block_if_unrecoverable(history);
3979 }
3980 remap_because(history, &why, PCMK_EXEC_ERROR_HARD,
3981 "exit status");
3982 } else {
3983 remap_because(history, &why, PCMK_EXEC_NOT_SUPPORTED,
3984 "exit status");
3985 }
3986 }
3987 break;
3988
3992 if (!expired) {
3993 block_if_unrecoverable(history);
3994 }
3995 remap_because(history, &why, PCMK_EXEC_ERROR_HARD, "exit status");
3996 break;
3997
3998 default:
3999 if (history->execution_status == PCMK_EXEC_DONE) {
4000 char *last_change_s = last_change_str(history->xml);
4001
4002 crm_info("Treating unknown exit status %d from %s of %s "
4003 "on %s at %s as failure",
4004 history->exit_status, task, history->rsc->id,
4005 pe__node_name(history->node), last_change_s);
4006 remap_because(history, &why, PCMK_EXEC_ERROR,
4007 "unknown exit status");
4008 free(last_change_s);
4009 }
4010 break;
4011 }
4012
4013remap_done:
4014 if (why != NULL) {
4015 pe_rsc_trace(history->rsc,
4016 "Remapped %s result from [%s: %s] to [%s: %s] "
4017 "because of %s",
4018 history->key, pcmk_exec_status_str(orig_exec_status),
4019 crm_exit_str(orig_exit_status),
4020 pcmk_exec_status_str(history->execution_status),
4021 crm_exit_str(history->exit_status), why);
4022 }
4023}
4024
4025// return TRUE if start or monitor last failure but parameters changed
4026static bool
4027should_clear_for_param_change(const xmlNode *xml_op, const char *task,
4028 pcmk_resource_t *rsc, pcmk_node_t *node)
4029{
4032 /* We haven't allocated resources yet, so we can't reliably
4033 * substitute addr parameters for the REMOTE_CONTAINER_HACK.
4034 * When that's needed, defer the check until later.
4035 */
4037 rsc->cluster);
4038
4039 } else {
4040 op_digest_cache_t *digest_data = NULL;
4041
4042 digest_data = rsc_action_digest_cmp(rsc, xml_op, node,
4043 rsc->cluster);
4044 switch (digest_data->rc) {
4046 crm_trace("Resource %s history entry %s on %s"
4047 " has no digest to compare",
4048 rsc->id, pe__xe_history_key(xml_op),
4049 node->details->id);
4050 break;
4051 case pcmk__digest_match:
4052 break;
4053 default:
4054 return TRUE;
4055 }
4056 }
4057 }
4058 return FALSE;
4059}
4060
4061// Order action after fencing of remote node, given connection rsc
4062static void
4063order_after_remote_fencing(pcmk_action_t *action, pcmk_resource_t *remote_conn,
4065{
4066 pcmk_node_t *remote_node = pe_find_node(scheduler->nodes, remote_conn->id);
4067
4068 if (remote_node) {
4069 pcmk_action_t *fence = pe_fence_op(remote_node, NULL, TRUE, NULL,
4070 FALSE, scheduler);
4071
4073 }
4074}
4075
4076static bool
4077should_ignore_failure_timeout(const pcmk_resource_t *rsc, const char *task,
4078 guint interval_ms, bool is_last_failure)
4079{
4080 /* Clearing failures of recurring monitors has special concerns. The
4081 * executor reports only changes in the monitor result, so if the
4082 * monitor is still active and still getting the same failure result,
4083 * that will go undetected after the failure is cleared.
4084 *
4085 * Also, the operation history will have the time when the recurring
4086 * monitor result changed to the given code, not the time when the
4087 * result last happened.
4088 *
4089 * @TODO We probably should clear such failures only when the failure
4090 * timeout has passed since the last occurrence of the failed result.
4091 * However we don't record that information. We could maybe approximate
4092 * that by clearing only if there is a more recent successful monitor or
4093 * stop result, but we don't even have that information at this point
4094 * since we are still unpacking the resource's operation history.
4095 *
4096 * This is especially important for remote connection resources with a
4097 * reconnect interval, so in that case, we skip clearing failures
4098 * if the remote node hasn't been fenced.
4099 */
4100 if (rsc->remote_reconnect_ms
4102 && (interval_ms != 0)
4103 && pcmk__str_eq(task, PCMK_ACTION_MONITOR, pcmk__str_casei)) {
4104
4105 pcmk_node_t *remote_node = pe_find_node(rsc->cluster->nodes, rsc->id);
4106
4107 if (remote_node && !remote_node->details->remote_was_fenced) {
4108 if (is_last_failure) {
4109 crm_info("Waiting to clear monitor failure for remote node %s"
4110 " until fencing has occurred", rsc->id);
4111 }
4112 return TRUE;
4113 }
4114 }
4115 return FALSE;
4116}
4117
4136static bool
4137check_operation_expiry(struct action_history *history)
4138{
4139 bool expired = false;
4140 bool is_last_failure = pcmk__ends_with(history->id, "_last_failure_0");
4141 time_t last_run = 0;
4142 int unexpired_fail_count = 0;
4143 const char *clear_reason = NULL;
4144
4145 if (history->execution_status == PCMK_EXEC_NOT_INSTALLED) {
4146 pe_rsc_trace(history->rsc,
4147 "Resource history entry %s on %s is not expired: "
4148 "Not Installed does not expire",
4149 history->id, pe__node_name(history->node));
4150 return false; // "Not installed" must always be cleared manually
4151 }
4152
4153 if ((history->rsc->failure_timeout > 0)
4155 &last_run) == 0)) {
4156
4157 // Resource has a failure-timeout, and history entry has a timestamp
4158
4159 time_t now = get_effective_time(history->rsc->cluster);
4160 time_t last_failure = 0;
4161
4162 // Is this particular operation history older than the failure timeout?
4163 if ((now >= (last_run + history->rsc->failure_timeout))
4164 && !should_ignore_failure_timeout(history->rsc, history->task,
4165 history->interval_ms,
4166 is_last_failure)) {
4167 expired = true;
4168 }
4169
4170 // Does the resource as a whole have an unexpired fail count?
4171 unexpired_fail_count = pe_get_failcount(history->node, history->rsc,
4172 &last_failure,
4174 history->xml);
4175
4176 // Update scheduler recheck time according to *last* failure
4177 crm_trace("%s@%lld is %sexpired @%lld with unexpired_failures=%d timeout=%ds"
4178 " last-failure@%lld",
4179 history->id, (long long) last_run, (expired? "" : "not "),
4180 (long long) now, unexpired_fail_count,
4181 history->rsc->failure_timeout, (long long) last_failure);
4182 last_failure += history->rsc->failure_timeout + 1;
4183 if (unexpired_fail_count && (now < last_failure)) {
4184 pe__update_recheck_time(last_failure, history->rsc->cluster,
4185 "fail count expiration");
4186 }
4187 }
4188
4189 if (expired) {
4190 if (pe_get_failcount(history->node, history->rsc, NULL,
4191 pcmk__fc_default, history->xml)) {
4192 // There is a fail count ignoring timeout
4193
4194 if (unexpired_fail_count == 0) {
4195 // There is no fail count considering timeout
4196 clear_reason = "it expired";
4197
4198 } else {
4199 /* This operation is old, but there is an unexpired fail count.
4200 * In a properly functioning cluster, this should only be
4201 * possible if this operation is not a failure (otherwise the
4202 * fail count should be expired too), so this is really just a
4203 * failsafe.
4204 */
4205 pe_rsc_trace(history->rsc,
4206 "Resource history entry %s on %s is not expired: "
4207 "Unexpired fail count",
4208 history->id, pe__node_name(history->node));
4209 expired = false;
4210 }
4211
4212 } else if (is_last_failure
4213 && (history->rsc->remote_reconnect_ms != 0)) {
4214 /* Clear any expired last failure when reconnect interval is set,
4215 * even if there is no fail count.
4216 */
4217 clear_reason = "reconnect interval is set";
4218 }
4219 }
4220
4221 if (!expired && is_last_failure
4222 && should_clear_for_param_change(history->xml, history->task,
4223 history->rsc, history->node)) {
4224 clear_reason = "resource parameters have changed";
4225 }
4226
4227 if (clear_reason != NULL) {
4228 pcmk_action_t *clear_op = NULL;
4229
4230 // Schedule clearing of the fail count
4231 clear_op = pe__clear_failcount(history->rsc, history->node,
4232 clear_reason, history->rsc->cluster);
4233
4234 if (pcmk_is_set(history->rsc->cluster->flags,
4236 && (history->rsc->remote_reconnect_ms != 0)) {
4237 /* If we're clearing a remote connection due to a reconnect
4238 * interval, we want to wait until any scheduled fencing
4239 * completes.
4240 *
4241 * We could limit this to remote_node->details->unclean, but at
4242 * this point, that's always true (it won't be reliable until
4243 * after unpack_node_history() is done).
4244 */
4245 crm_info("Clearing %s failure will wait until any scheduled "
4246 "fencing of %s completes",
4247 history->task, history->rsc->id);
4248 order_after_remote_fencing(clear_op, history->rsc,
4249 history->rsc->cluster);
4250 }
4251 }
4252
4253 if (expired && (history->interval_ms == 0)
4254 && pcmk__str_eq(history->task, PCMK_ACTION_MONITOR, pcmk__str_none)) {
4255 switch (history->exit_status) {
4256 case PCMK_OCF_OK:
4259 case PCMK_OCF_DEGRADED:
4261 // Don't expire probes that return these values
4262 pe_rsc_trace(history->rsc,
4263 "Resource history entry %s on %s is not expired: "
4264 "Probe result",
4265 history->id, pe__node_name(history->node));
4266 expired = false;
4267 break;
4268 }
4269 }
4270
4271 return expired;
4272}
4273
4274int
4275pe__target_rc_from_xml(const xmlNode *xml_op)
4276{
4277 int target_rc = 0;
4278 const char *key = crm_element_value(xml_op, XML_ATTR_TRANSITION_KEY);
4279
4280 if (key == NULL) {
4281 return -1;
4282 }
4283 decode_transition_key(key, NULL, NULL, NULL, &target_rc);
4284 return target_rc;
4285}
4286
4296static void
4297update_resource_state(struct action_history *history, int exit_status,
4298 const xmlNode *last_failure,
4299 enum action_fail_response *on_fail)
4300{
4301 bool clear_past_failure = false;
4302
4303 if ((exit_status == PCMK_OCF_NOT_INSTALLED)
4304 || (!pe_rsc_is_bundled(history->rsc)
4305 && pcmk_xe_mask_probe_failure(history->xml))) {
4306 history->rsc->role = pcmk_role_stopped;
4307
4308 } else if (exit_status == PCMK_OCF_NOT_RUNNING) {
4309 clear_past_failure = true;
4310
4311 } else if (pcmk__str_eq(history->task, PCMK_ACTION_MONITOR,
4312 pcmk__str_none)) {
4313 if ((last_failure != NULL)
4314 && pcmk__str_eq(history->key, pe__xe_history_key(last_failure),
4315 pcmk__str_none)) {
4316 clear_past_failure = true;
4317 }
4318 if (history->rsc->role < pcmk_role_started) {
4319 set_active(history->rsc);
4320 }
4321
4322 } else if (pcmk__str_eq(history->task, PCMK_ACTION_START, pcmk__str_none)) {
4323 history->rsc->role = pcmk_role_started;
4324 clear_past_failure = true;
4325
4326 } else if (pcmk__str_eq(history->task, PCMK_ACTION_STOP, pcmk__str_none)) {
4327 history->rsc->role = pcmk_role_stopped;
4328 clear_past_failure = true;
4329
4330 } else if (pcmk__str_eq(history->task, PCMK_ACTION_PROMOTE,
4331 pcmk__str_none)) {
4332 history->rsc->role = pcmk_role_promoted;
4333 clear_past_failure = true;
4334
4335 } else if (pcmk__str_eq(history->task, PCMK_ACTION_DEMOTE,
4336 pcmk__str_none)) {
4337 if (*on_fail == pcmk_on_fail_demote) {
4338 // Demote clears an error only if on-fail=demote
4339 clear_past_failure = true;
4340 }
4341 history->rsc->role = pcmk_role_unpromoted;
4342
4343 } else if (pcmk__str_eq(history->task, PCMK_ACTION_MIGRATE_FROM,
4344 pcmk__str_none)) {
4345 history->rsc->role = pcmk_role_started;
4346 clear_past_failure = true;
4347
4348 } else if (pcmk__str_eq(history->task, PCMK_ACTION_MIGRATE_TO,
4349 pcmk__str_none)) {
4350 unpack_migrate_to_success(history);
4351
4352 } else if (history->rsc->role < pcmk_role_started) {
4353 pe_rsc_trace(history->rsc, "%s active on %s",
4354 history->rsc->id, pe__node_name(history->node));
4355 set_active(history->rsc);
4356 }
4357
4358 if (!clear_past_failure) {
4359 return;
4360 }
4361
4362 switch (*on_fail) {
4363 case pcmk_on_fail_stop:
4364 case pcmk_on_fail_ban:
4367 pe_rsc_trace(history->rsc,
4368 "%s (%s) is not cleared by a completed %s",
4369 history->rsc->id, fail2text(*on_fail), history->task);
4370 break;
4371
4372 case pcmk_on_fail_block:
4377 *on_fail = pcmk_on_fail_ignore;
4379 "clear past failures");
4380 break;
4381
4383 if (history->rsc->remote_reconnect_ms == 0) {
4384 /* With no reconnect interval, the connection is allowed to
4385 * start again after the remote node is fenced and
4386 * completely stopped. (With a reconnect interval, we wait
4387 * for the failure to be cleared entirely before attempting
4388 * to reconnect.)
4389 */
4390 *on_fail = pcmk_on_fail_ignore;
4392 "clear past failures and reset remote");
4393 }
4394 break;
4395 }
4396}
4397
4406static inline bool
4407can_affect_state(struct action_history *history)
4408{
4409#if 0
4410 /* @COMPAT It might be better to parse only actions we know we're interested
4411 * in, rather than exclude a couple we don't. However that would be a
4412 * behavioral change that should be done at a major or minor series release.
4413 * Currently, unknown operations can affect whether a resource is considered
4414 * active and/or failed.
4415 */
4416 return pcmk__str_any_of(history->task, PCMK_ACTION_MONITOR,
4420 "asyncmon", NULL);
4421#else
4422 return !pcmk__str_any_of(history->task, PCMK_ACTION_NOTIFY,
4423 PCMK_ACTION_META_DATA, NULL);
4424#endif
4425}
4426
4435static int
4436unpack_action_result(struct action_history *history)
4437{
4439 &(history->execution_status)) < 0)
4440 || (history->execution_status < PCMK_EXEC_PENDING)
4441 || (history->execution_status > PCMK_EXEC_MAX)
4442 || (history->execution_status == PCMK_EXEC_CANCELLED)) {
4443 crm_err("Ignoring resource history entry %s for %s on %s "
4444 "with invalid " XML_LRM_ATTR_OPSTATUS " '%s'",
4445 history->id, history->rsc->id, pe__node_name(history->node),
4446 pcmk__s(crm_element_value(history->xml, XML_LRM_ATTR_OPSTATUS),
4447 ""));
4448 return pcmk_rc_unpack_error;
4449 }
4450 if ((crm_element_value_int(history->xml, XML_LRM_ATTR_RC,
4451 &(history->exit_status)) < 0)
4452 || (history->exit_status < 0) || (history->exit_status > CRM_EX_MAX)) {
4453#if 0
4454 /* @COMPAT We should ignore malformed entries, but since that would
4455 * change behavior, it should be done at a major or minor series
4456 * release.
4457 */
4458 crm_err("Ignoring resource history entry %s for %s on %s "
4459 "with invalid " XML_LRM_ATTR_RC " '%s'",
4460 history->id, history->rsc->id, pe__node_name(history->node),
4461 pcmk__s(crm_element_value(history->xml, XML_LRM_ATTR_RC),
4462 ""));
4463 return pcmk_rc_unpack_error;
4464#else
4465 history->exit_status = CRM_EX_ERROR;
4466#endif
4467 }
4468 history->exit_reason = crm_element_value(history->xml,
4470 return pcmk_rc_ok;
4471}
4472
4483static int
4484process_expired_result(struct action_history *history, int orig_exit_status)
4485{
4486 if (!pe_rsc_is_bundled(history->rsc)
4487 && pcmk_xe_mask_probe_failure(history->xml)
4488 && (orig_exit_status != history->expected_exit_status)) {
4489
4490 if (history->rsc->role <= pcmk_role_stopped) {
4491 history->rsc->role = pcmk_role_unknown;
4492 }
4493 crm_trace("Ignoring resource history entry %s for probe of %s on %s: "
4494 "Masked failure expired",
4495 history->id, history->rsc->id,
4496 pe__node_name(history->node));
4497 return pcmk_rc_ok;
4498 }
4499
4500 if (history->exit_status == history->expected_exit_status) {
4501 return pcmk_rc_undetermined; // Only failures expire
4502 }
4503
4504 if (history->interval_ms == 0) {
4505 crm_notice("Ignoring resource history entry %s for %s of %s on %s: "
4506 "Expired failure",
4507 history->id, history->task, history->rsc->id,
4508 pe__node_name(history->node));
4509 return pcmk_rc_ok;
4510 }
4511
4512 if (history->node->details->online && !history->node->details->unclean) {
4513 /* Reschedule the recurring action. schedule_cancel() won't work at
4514 * this stage, so as a hacky workaround, forcibly change the restart
4515 * digest so pcmk__check_action_config() does what we want later.
4516 *
4517 * @TODO We should skip this if there is a newer successful monitor.
4518 * Also, this causes rescheduling only if the history entry
4519 * has an op-digest (which the expire-non-blocked-failure
4520 * scheduler regression test doesn't, but that may not be a
4521 * realistic scenario in production).
4522 */
4523 crm_notice("Rescheduling %s-interval %s of %s on %s "
4524 "after failure expired",
4525 pcmk__readable_interval(history->interval_ms), history->task,
4526 history->rsc->id, pe__node_name(history->node));
4528 "calculated-failure-timeout");
4529 return pcmk_rc_ok;
4530 }
4531
4532 return pcmk_rc_undetermined;
4533}
4534
4544static void
4545mask_probe_failure(struct action_history *history, int orig_exit_status,
4546 const xmlNode *last_failure,
4547 enum action_fail_response *on_fail)
4548{
4549 pcmk_resource_t *ban_rsc = history->rsc;
4550
4551 if (!pcmk_is_set(history->rsc->flags, pcmk_rsc_unique)) {
4552 ban_rsc = uber_parent(history->rsc);
4553 }
4554
4555 crm_notice("Treating probe result '%s' for %s on %s as 'not running'",
4556 services_ocf_exitcode_str(orig_exit_status), history->rsc->id,
4557 pe__node_name(history->node));
4558 update_resource_state(history, history->expected_exit_status, last_failure,
4559 on_fail);
4560 crm_xml_add(history->xml, XML_ATTR_UNAME, history->node->details->uname);
4561
4562 record_failed_op(history);
4563 resource_location(ban_rsc, history->node, -INFINITY, "masked-probe-failure",
4564 history->rsc->cluster);
4565}
4566
4578static bool
4579failure_is_newer(const struct action_history *history,
4580 const xmlNode *last_failure)
4581{
4582 guint failure_interval_ms = 0U;
4583 long long failure_change = 0LL;
4584 long long this_change = 0LL;
4585
4586 if (last_failure == NULL) {
4587 return false; // Resource has no last_failure entry
4588 }
4589
4590 if (!pcmk__str_eq(history->task,
4591 crm_element_value(last_failure, XML_LRM_ATTR_TASK),
4592 pcmk__str_none)) {
4593 return false; // last_failure is for different action
4594 }
4595
4597 &failure_interval_ms) != pcmk_ok)
4598 || (history->interval_ms != failure_interval_ms)) {
4599 return false; // last_failure is for action with different interval
4600 }
4601
4603 &this_change, 0LL) != pcmk_rc_ok)
4604 || (pcmk__scan_ll(crm_element_value(last_failure,
4606 &failure_change, 0LL) != pcmk_rc_ok)
4607 || (failure_change < this_change)) {
4608 return false; // Failure is not known to be newer
4609 }
4610
4611 return true;
4612}
4613
4621static void
4622process_pending_action(struct action_history *history,
4623 const xmlNode *last_failure)
4624{
4625 /* For recurring monitors, a failure is recorded only in RSC_last_failure_0,
4626 * and there might be a RSC_monitor_INTERVAL entry with the last successful
4627 * or pending result.
4628 *
4629 * If last_failure contains the failure of the pending recurring monitor
4630 * we're processing here, and is newer, the action is no longer pending.
4631 * (Pending results have call ID -1, which sorts last, so the last failure
4632 * if any should be known.)
4633 */
4634 if (failure_is_newer(history, last_failure)) {
4635 return;
4636 }
4637
4638 if (strcmp(history->task, PCMK_ACTION_START) == 0) {
4640 set_active(history->rsc);
4641
4642 } else if (strcmp(history->task, PCMK_ACTION_PROMOTE) == 0) {
4643 history->rsc->role = pcmk_role_promoted;
4644
4645 } else if ((strcmp(history->task, PCMK_ACTION_MIGRATE_TO) == 0)
4646 && history->node->details->unclean) {
4647 /* A migrate_to action is pending on a unclean source, so force a stop
4648 * on the target.
4649 */
4650 const char *migrate_target = NULL;
4651 pcmk_node_t *target = NULL;
4652
4653 migrate_target = crm_element_value(history->xml,
4655 target = pe_find_node(history->rsc->cluster->nodes, migrate_target);
4656 if (target != NULL) {
4657 stop_action(history->rsc, target, FALSE);
4658 }
4659 }
4660
4661 if (history->rsc->pending_task != NULL) {
4662 /* There should never be multiple pending actions, but as a failsafe,
4663 * just remember the first one processed for display purposes.
4664 */
4665 return;
4666 }
4667
4668 if (pcmk_is_probe(history->task, history->interval_ms)) {
4669 /* Pending probes are currently never displayed, even if pending
4670 * operations are requested. If we ever want to change that,
4671 * enable the below and the corresponding part of
4672 * native.c:native_pending_task().
4673 */
4674#if 0
4675 history->rsc->pending_task = strdup("probe");
4676 history->rsc->pending_node = history->node;
4677#endif
4678 } else {
4679 history->rsc->pending_task = strdup(history->task);
4680 history->rsc->pending_node = history->node;
4681 }
4682}
4683
4684static void
4685unpack_rsc_op(pcmk_resource_t *rsc, pcmk_node_t *node, xmlNode *xml_op,
4686 xmlNode **last_failure, enum action_fail_response *on_fail)
4687{
4688 int old_rc = 0;
4689 bool expired = false;
4690 pcmk_resource_t *parent = rsc;
4691 enum rsc_role_e fail_role = pcmk_role_unknown;
4692 enum action_fail_response failure_strategy = pcmk_on_fail_restart;
4693
4694 struct action_history history = {
4695 .rsc = rsc,
4696 .node = node,
4697 .xml = xml_op,
4698 .execution_status = PCMK_EXEC_UNKNOWN,
4699 };
4700
4701 CRM_CHECK(rsc && node && xml_op, return);
4702
4703 history.id = ID(xml_op);
4704 if (history.id == NULL) {
4705 crm_err("Ignoring resource history entry for %s on %s without ID",
4706 rsc->id, pe__node_name(node));
4707 return;
4708 }
4709
4710 // Task and interval
4711 history.task = crm_element_value(xml_op, XML_LRM_ATTR_TASK);
4712 if (history.task == NULL) {
4713 crm_err("Ignoring resource history entry %s for %s on %s without "
4714 XML_LRM_ATTR_TASK, history.id, rsc->id, pe__node_name(node));
4715 return;
4716 }
4718 &(history.interval_ms));
4719 if (!can_affect_state(&history)) {
4720 pe_rsc_trace(rsc,
4721 "Ignoring resource history entry %s for %s on %s "
4722 "with irrelevant action '%s'",
4723 history.id, rsc->id, pe__node_name(node), history.task);
4724 return;
4725 }
4726
4727 if (unpack_action_result(&history) != pcmk_rc_ok) {
4728 return; // Error already logged
4729 }
4730
4731 history.expected_exit_status = pe__target_rc_from_xml(xml_op);
4732 history.key = pe__xe_history_key(xml_op);
4733 crm_element_value_int(xml_op, XML_LRM_ATTR_CALLID, &(history.call_id));
4734
4735 pe_rsc_trace(rsc, "Unpacking %s (%s call %d on %s): %s (%s)",
4736 history.id, history.task, history.call_id, pe__node_name(node),
4737 pcmk_exec_status_str(history.execution_status),
4738 crm_exit_str(history.exit_status));
4739
4740 if (node->details->unclean) {
4741 pe_rsc_trace(rsc,
4742 "%s is running on %s, which is unclean (further action "
4743 "depends on value of stop's on-fail attribute)",
4744 rsc->id, pe__node_name(node));
4745 }
4746
4747 expired = check_operation_expiry(&history);
4748 old_rc = history.exit_status;
4749
4750 remap_operation(&history, on_fail, expired);
4751
4752 if (expired && (process_expired_result(&history, old_rc) == pcmk_rc_ok)) {
4753 goto done;
4754 }
4755
4756 if (!pe_rsc_is_bundled(rsc) && pcmk_xe_mask_probe_failure(xml_op)) {
4757 mask_probe_failure(&history, old_rc, *last_failure, on_fail);
4758 goto done;
4759 }
4760
4761 if (!pcmk_is_set(rsc->flags, pcmk_rsc_unique)) {
4762 parent = uber_parent(rsc);
4763 }
4764
4765 switch (history.execution_status) {
4766 case PCMK_EXEC_PENDING:
4767 process_pending_action(&history, *last_failure);
4768 goto done;
4769
4770 case PCMK_EXEC_DONE:
4771 update_resource_state(&history, history.exit_status, *last_failure,
4772 on_fail);
4773 goto done;
4774
4776 unpack_failure_handling(&history, &failure_strategy, &fail_role);
4777 if (failure_strategy == pcmk_on_fail_ignore) {
4778 crm_warn("Cannot ignore failed %s of %s on %s: "
4779 "Resource agent doesn't exist "
4780 CRM_XS " status=%d rc=%d id=%s",
4781 history.task, rsc->id, pe__node_name(node),
4782 history.execution_status, history.exit_status,
4783 history.id);
4784 /* Also for printing it as "FAILED" by marking it as
4785 * pcmk_rsc_failed later
4786 */
4787 *on_fail = pcmk_on_fail_ban;
4788 }
4789 resource_location(parent, node, -INFINITY, "hard-error",
4790 rsc->cluster);
4791 unpack_rsc_op_failure(&history, failure_strategy, fail_role,
4792 last_failure, on_fail);
4793 goto done;
4794
4799 /* We should never get into a situation where a managed remote
4800 * connection resource is considered OK but a resource action
4801 * behind the connection gets a "not connected" status. But as a
4802 * fail-safe in case a bug or unusual circumstances do lead to
4803 * that, ensure the remote connection is considered failed.
4804 */
4807 }
4808 break; // Not done, do error handling
4809
4810 case PCMK_EXEC_ERROR:
4813 case PCMK_EXEC_TIMEOUT:
4815 case PCMK_EXEC_INVALID:
4816 break; // Not done, do error handling
4817
4818 default: // No other value should be possible at this point
4819 break;
4820 }
4821
4822 unpack_failure_handling(&history, &failure_strategy, &fail_role);
4823 if ((failure_strategy == pcmk_on_fail_ignore)
4824 || ((failure_strategy == pcmk_on_fail_restart_container)
4825 && (strcmp(history.task, PCMK_ACTION_STOP) == 0))) {
4826
4827 char *last_change_s = last_change_str(xml_op);
4828
4829 crm_warn("Pretending failed %s (%s%s%s) of %s on %s at %s succeeded "
4830 CRM_XS " %s",
4831 history.task, services_ocf_exitcode_str(history.exit_status),
4832 (pcmk__str_empty(history.exit_reason)? "" : ": "),
4833 pcmk__s(history.exit_reason, ""), rsc->id, pe__node_name(node),
4834 last_change_s, history.id);
4835 free(last_change_s);
4836
4837 update_resource_state(&history, history.expected_exit_status,
4838 *last_failure, on_fail);
4839 crm_xml_add(xml_op, XML_ATTR_UNAME, node->details->uname);
4841
4842 record_failed_op(&history);
4843
4844 if ((failure_strategy == pcmk_on_fail_restart_container)
4845 && cmp_on_fail(*on_fail, pcmk_on_fail_restart) <= 0) {
4846 *on_fail = failure_strategy;
4847 }
4848
4849 } else {
4850 unpack_rsc_op_failure(&history, failure_strategy, fail_role,
4851 last_failure, on_fail);
4852
4853 if (history.execution_status == PCMK_EXEC_ERROR_HARD) {
4854 uint8_t log_level = LOG_ERR;
4855
4856 if (history.exit_status == PCMK_OCF_NOT_INSTALLED) {
4857 log_level = LOG_NOTICE;
4858 }
4859 do_crm_log(log_level,
4860 "Preventing %s from restarting on %s because "
4861 "of hard failure (%s%s%s) " CRM_XS " %s",
4862 parent->id, pe__node_name(node),
4863 services_ocf_exitcode_str(history.exit_status),
4864 (pcmk__str_empty(history.exit_reason)? "" : ": "),
4865 pcmk__s(history.exit_reason, ""), history.id);
4866 resource_location(parent, node, -INFINITY, "hard-error",
4867 rsc->cluster);
4868
4869 } else if (history.execution_status == PCMK_EXEC_ERROR_FATAL) {
4870 crm_err("Preventing %s from restarting anywhere because "
4871 "of fatal failure (%s%s%s) " CRM_XS " %s",
4872 parent->id, services_ocf_exitcode_str(history.exit_status),
4873 (pcmk__str_empty(history.exit_reason)? "" : ": "),
4874 pcmk__s(history.exit_reason, ""), history.id);
4875 resource_location(parent, NULL, -INFINITY, "fatal-error",
4876 rsc->cluster);
4877 }
4878 }
4879
4880done:
4881 pe_rsc_trace(rsc, "%s role on %s after %s is %s (next %s)",
4882 rsc->id, pe__node_name(node), history.id,
4883 role2text(rsc->role), role2text(rsc->next_role));
4884}
4885
4886static void
4887add_node_attrs(const xmlNode *xml_obj, pcmk_node_t *node, bool overwrite,
4889{
4890 const char *cluster_name = NULL;
4891
4892 pe_rule_eval_data_t rule_data = {
4893 .node_hash = NULL,
4894 .role = pcmk_role_unknown,
4895 .now = scheduler->now,
4896 .match_data = NULL,
4897 .rsc_data = NULL,
4898 .op_data = NULL
4899 };
4900
4901 g_hash_table_insert(node->details->attrs,
4902 strdup(CRM_ATTR_UNAME), strdup(node->details->uname));
4903
4904 g_hash_table_insert(node->details->attrs, strdup(CRM_ATTR_ID),
4905 strdup(node->details->id));
4906 if (pcmk__str_eq(node->details->id, scheduler->dc_uuid, pcmk__str_casei)) {
4907 scheduler->dc_node = node;
4908 node->details->is_dc = TRUE;
4909 g_hash_table_insert(node->details->attrs,
4910 strdup(CRM_ATTR_IS_DC), strdup(XML_BOOLEAN_TRUE));
4911 } else {
4912 g_hash_table_insert(node->details->attrs,
4913 strdup(CRM_ATTR_IS_DC), strdup(XML_BOOLEAN_FALSE));
4914 }
4915
4916 cluster_name = g_hash_table_lookup(scheduler->config_hash, "cluster-name");
4917 if (cluster_name) {
4918 g_hash_table_insert(node->details->attrs, strdup(CRM_ATTR_CLUSTER_NAME),
4919 strdup(cluster_name));
4920 }
4921
4922 pe__unpack_dataset_nvpairs(xml_obj, XML_TAG_ATTR_SETS, &rule_data,
4923 node->details->attrs, NULL, overwrite,
4924 scheduler);
4925
4927 node->details->utilization, NULL,
4928 FALSE, scheduler);
4929
4930 if (pe_node_attribute_raw(node, CRM_ATTR_SITE_NAME) == NULL) {
4931 const char *site_name = pe_node_attribute_raw(node, "site-name");
4932
4933 if (site_name) {
4934 g_hash_table_insert(node->details->attrs,
4935 strdup(CRM_ATTR_SITE_NAME),
4936 strdup(site_name));
4937
4938 } else if (cluster_name) {
4939 /* Default to cluster-name if unset */
4940 g_hash_table_insert(node->details->attrs,
4941 strdup(CRM_ATTR_SITE_NAME),
4942 strdup(cluster_name));
4943 }
4944 }
4945}
4946
4947static GList *
4948extract_operations(const char *node, const char *rsc, xmlNode * rsc_entry, gboolean active_filter)
4949{
4950 int counter = -1;
4951 int stop_index = -1;
4952 int start_index = -1;
4953
4954 xmlNode *rsc_op = NULL;
4955
4956 GList *gIter = NULL;
4957 GList *op_list = NULL;
4958 GList *sorted_op_list = NULL;
4959
4960 /* extract operations */
4961 op_list = NULL;
4962 sorted_op_list = NULL;
4963
4964 for (rsc_op = pcmk__xe_first_child(rsc_entry);
4965 rsc_op != NULL; rsc_op = pcmk__xe_next(rsc_op)) {
4966
4967 if (pcmk__str_eq((const char *)rsc_op->name, XML_LRM_TAG_RSC_OP,
4968 pcmk__str_none)) {
4969 crm_xml_add(rsc_op, "resource", rsc);
4970 crm_xml_add(rsc_op, XML_ATTR_UNAME, node);
4971 op_list = g_list_prepend(op_list, rsc_op);
4972 }
4973 }
4974
4975 if (op_list == NULL) {
4976 /* if there are no operations, there is nothing to do */
4977 return NULL;
4978 }
4979
4980 sorted_op_list = g_list_sort(op_list, sort_op_by_callid);
4981
4982 /* create active recurring operations as optional */
4983 if (active_filter == FALSE) {
4984 return sorted_op_list;
4985 }
4986
4987 op_list = NULL;
4988
4989 calculate_active_ops(sorted_op_list, &start_index, &stop_index);
4990
4991 for (gIter = sorted_op_list; gIter != NULL; gIter = gIter->next) {
4992 xmlNode *rsc_op = (xmlNode *) gIter->data;
4993
4994 counter++;
4995
4996 if (start_index < stop_index) {
4997 crm_trace("Skipping %s: not active", ID(rsc_entry));
4998 break;
4999
5000 } else if (counter < start_index) {
5001 crm_trace("Skipping %s: old", ID(rsc_op));
5002 continue;
5003 }
5004 op_list = g_list_append(op_list, rsc_op);
5005 }
5006
5007 g_list_free(sorted_op_list);
5008 return op_list;
5009}
5010
5011GList *
5012find_operations(const char *rsc, const char *node, gboolean active_filter,
5014{
5015 GList *output = NULL;
5016 GList *intermediate = NULL;
5017
5018 xmlNode *tmp = NULL;
5019 xmlNode *status = find_xml_node(scheduler->input, XML_CIB_TAG_STATUS, TRUE);
5020
5021 pcmk_node_t *this_node = NULL;
5022
5023 xmlNode *node_state = NULL;
5024
5025 for (node_state = pcmk__xe_first_child(status); node_state != NULL;
5026 node_state = pcmk__xe_next(node_state)) {
5027
5028 if (pcmk__str_eq((const char *)node_state->name, XML_CIB_TAG_STATE, pcmk__str_none)) {
5029 const char *uname = crm_element_value(node_state, XML_ATTR_UNAME);
5030
5031 if (node != NULL && !pcmk__str_eq(uname, node, pcmk__str_casei)) {
5032 continue;
5033 }
5034
5035 this_node = pe_find_node(scheduler->nodes, uname);
5036 if(this_node == NULL) {
5037 CRM_LOG_ASSERT(this_node != NULL);
5038 continue;
5039
5040 } else if (pe__is_guest_or_remote_node(this_node)) {
5041 determine_remote_online_status(scheduler, this_node);
5042
5043 } else {
5044 determine_online_status(node_state, this_node, scheduler);
5045 }
5046
5047 if (this_node->details->online
5049 /* offline nodes run no resources...
5050 * unless stonith is enabled in which case we need to
5051 * make sure rsc start events happen after the stonith
5052 */
5053 xmlNode *lrm_rsc = NULL;
5054
5055 tmp = find_xml_node(node_state, XML_CIB_TAG_LRM, FALSE);
5056 tmp = find_xml_node(tmp, XML_LRM_TAG_RESOURCES, FALSE);
5057
5058 for (lrm_rsc = pcmk__xe_first_child(tmp); lrm_rsc != NULL;
5059 lrm_rsc = pcmk__xe_next(lrm_rsc)) {
5060
5061 if (pcmk__str_eq((const char *)lrm_rsc->name,
5063
5064 const char *rsc_id = crm_element_value(lrm_rsc, XML_ATTR_ID);
5065
5066 if (rsc != NULL && !pcmk__str_eq(rsc_id, rsc, pcmk__str_casei)) {
5067 continue;
5068 }
5069
5070 intermediate = extract_operations(uname, rsc_id, lrm_rsc, active_filter);
5071 output = g_list_concat(output, intermediate);
5072 }
5073 }
5074 }
5075 }
5076 }
5077
5078 return output;
5079}
@ pcmk__ar_first_implies_then
bool pcmk_xe_mask_probe_failure(const xmlNode *xml_op)
Definition actions.c:518
#define PCMK_ACTION_STOP
Definition actions.h:74
bool pcmk_is_probe(const char *task, guint interval)
Definition actions.c:496
gboolean decode_transition_key(const char *key, char **uuid, int *transition_id, int *action_id, int *target_rc)
Parse a transition key into its constituent parts.
Definition actions.c:272
#define PCMK_ACTION_META_DATA
Definition actions.h:56
#define PCMK_ACTION_PROMOTE
Definition actions.h:65
#define PCMK_ACTION_START
Definition actions.h:71
action_fail_response
Possible responses to a resource action failure.
Definition actions.h:149
@ pcmk_on_fail_ban
Ban resource from current node.
Definition actions.h:169
@ pcmk_on_fail_fence_node
Fence resource's node.
Definition actions.h:181
@ pcmk_on_fail_ignore
Act as if failure didn't happen.
Definition actions.h:163
@ pcmk_on_fail_restart_container
Restart resource's container.
Definition actions.h:186
@ pcmk_on_fail_demote
Demote if promotable, else stop.
Definition actions.h:197
@ pcmk_on_fail_standby_node
Put resource's node in standby.
Definition actions.h:178
@ pcmk_on_fail_block
Treat resource as unmanaged.
Definition actions.h:172
@ pcmk_on_fail_reset_remote
Definition actions.h:194
@ pcmk_on_fail_stop
Stop resource and leave stopped.
Definition actions.h:175
@ pcmk_on_fail_restart
Restart resource.
Definition actions.h:166
#define PCMK_ACTION_MIGRATE_FROM
Definition actions.h:57
@ pcmk_action_optional
Whether action should not be executed.
Definition actions.h:244
#define PCMK_ACTION_MIGRATE_TO
Definition actions.h:58
#define PCMK_ACTION_MONITOR
Definition actions.h:59
#define PCMK_ACTION_OFF
Definition actions.h:62
#define PCMK_ACTION_DEMOTE
Definition actions.h:49
#define PCMK_ACTION_NOTIFY
Definition actions.h:61
bool pcmk_xe_is_probe(const xmlNode *xml_op)
Definition actions.c:507
char * pcmk__op_key(const char *rsc_id, const char *op_type, guint interval_ms)
Generate an operation key (RESOURCE_ACTION_INTERVAL)
Definition actions.c:42
const char * parent
Definition cib.c:27
const char * name
Definition cib.c:26
pcmk_resource_t * pe__create_clone_child(pcmk_resource_t *rsc, pcmk_scheduler_t *scheduler)
Definition clone.c:245
int pcmk__effective_rc(int rc)
Definition agents.c:71
Utility functions.
#define ONLINESTATUS
Definition util.h:37
char guint crm_parse_interval_spec(const char *input)
Parse milliseconds from a Pacemaker interval specification.
Definition utils.c:271
#define OFFLINESTATUS
Definition util.h:38
int char2score(const char *score)
Get the integer value of a score string.
Definition scores.c:36
char * crm_strdup_printf(char const *format,...) G_GNUC_PRINTF(1
gboolean crm_is_true(const char *s)
Definition strings.c:416
int crm_str_to_boolean(const char *s, int *ret)
Definition strings.c:424
#define pcmk_is_set(g, f)
Convenience alias for pcmk_all_flags_set(), to check single flag.
Definition util.h:99
const char * fail2text(enum action_fail_response fail)
Definition common.c:320
const char * role2text(enum rsc_role_e role)
Definition common.c:458
const char * pe_pref(GHashTable *options, const char *name)
Definition common.c:314
int pe__unpack_resource(xmlNode *xml_obj, pcmk_resource_t **rsc, pcmk_resource_t *parent, pcmk_scheduler_t *scheduler)
Definition complex.c:603
pcmk_resource_t * uber_parent(pcmk_resource_t *rsc)
Definition complex.c:936
pcmk__cpg_host_t host
Definition cpg.c:4
enum crm_ais_msg_types type
Definition cpg.c:3
char uname[MAX_NAME]
Definition cpg.c:5
char data[0]
Definition cpg.c:10
uint32_t id
Definition cpg.c:0
A dumping ground.
#define CRMD_JOINSTATE_NACK
Definition crm.h:161
#define CRM_ATTR_IS_DC
Definition crm.h:117
#define CRM_ATTR_SITE_NAME
Definition crm.h:119
#define CRMD_JOINSTATE_DOWN
Definition crm.h:158
#define CRMD_JOINSTATE_PENDING
Definition crm.h:159
#define INFINITY
Definition crm.h:98
#define CRM_ATTR_KIND
Definition crm.h:115
#define CRM_ATTR_CLUSTER_NAME
Definition crm.h:118
#define CRM_ATTR_UNAME
Definition crm.h:113
#define CRM_ATTR_ID
Definition crm.h:114
#define CRMD_JOINSTATE_MEMBER
Definition crm.h:160
#define PCMK__XA_CRMD
#define PCMK__XA_EXPECTED
#define PCMK__XA_JOIN
#define PCMK__XA_IN_CCM
@ pcmk__digest_match
@ pcmk__digest_unknown
@ pcmk__fc_effective
@ pcmk__fc_default
char * pcmk__epoch2str(const time_t *source, uint32_t flags)
Definition iso8601.c:1858
const char * pcmk__readable_interval(guint interval_ms)
Definition iso8601.c:1926
#define CRM_TRACE_INIT_DATA(name)
Definition logging.h:137
#define crm_info(fmt, args...)
Definition logging.h:382
#define do_crm_log(level, fmt, args...)
Log a message.
Definition logging.h:175
#define crm_warn(fmt, args...)
Definition logging.h:380
#define CRM_XS
Definition logging.h:56
#define crm_log_xml_debug(xml, text)
Definition logging.h:392
#define CRM_LOG_ASSERT(expr)
Definition logging.h:222
#define crm_notice(fmt, args...)
Definition logging.h:381
#define CRM_CHECK(expr, failure_action)
Definition logging.h:238
#define crm_debug(fmt, args...)
Definition logging.h:384
#define crm_err(fmt, args...)
Definition logging.h:379
#define crm_trace(fmt, args...)
Definition logging.h:385
#define pcmk__config_warn(fmt...)
#define pcmk__config_err(fmt...)
#define XML_NODE_ATTR_RSC_DISCOVERY
Definition msg_xml.h:396
#define XML_LRM_TAG_RSC_OP
Definition msg_xml.h:279
#define ID(x)
Definition msg_xml.h:474
#define XML_ATTR_TRANSITION_KEY
Definition msg_xml.h:416
#define XML_BOOLEAN_TRUE
Definition msg_xml.h:167
#define XML_ATTR_HAVE_WATCHDOG
Definition msg_xml.h:146
#define XML_ATTR_UNAME
Definition msg_xml.h:178
#define XML_RSC_ATTR_TARGET_ROLE
Definition msg_xml.h:249
#define XML_CIB_TAG_TICKET_STATE
Definition msg_xml.h:447
#define XML_TAG_TRANSIENT_NODEATTRS
Definition msg_xml.h:420
#define XML_NVPAIR_ATTR_VALUE
Definition msg_xml.h:394
#define XML_CIB_TAG_TAG
Definition msg_xml.h:450
#define XML_RSC_ATTR_REMOTE_NODE
Definition msg_xml.h:258
#define XML_RULE_ATTR_SCORE
Definition msg_xml.h:341
#define XML_CIB_TAG_OBJ_REF
Definition msg_xml.h:451
#define XML_LRM_TAG_RESOURCES
Definition msg_xml.h:277
#define XML_RSC_ATTR_MANAGED
Definition msg_xml.h:248
#define XML_LRM_ATTR_MIGRATE_SOURCE
Definition msg_xml.h:330
#define XML_NODE_IS_FENCED
Definition msg_xml.h:289
#define XML_CONFIG_ATTR_PRIORITY_FENCING_DELAY
Definition msg_xml.h:405
#define XML_RSC_ATTR_CONTAINER
Definition msg_xml.h:255
#define XML_CIB_TAG_STATE
Definition msg_xml.h:222
#define XML_CONFIG_ATTR_SHUTDOWN_LOCK
Definition msg_xml.h:403
#define XML_LRM_ATTR_OPSTATUS
Definition msg_xml.h:316
#define XML_CIB_TAG_TICKETS
Definition msg_xml.h:446
#define XML_TAG_ATTR_SETS
Definition msg_xml.h:227
#define XML_ATTR_ID
Definition msg_xml.h:156
#define XML_LRM_ATTR_RESTART_DIGEST
Definition msg_xml.h:322
#define XML_LRM_ATTR_EXIT_REASON
Definition msg_xml.h:324
#define XML_NODE_IS_MAINTENANCE
Definition msg_xml.h:290
#define XML_TAG_META_SETS
Definition msg_xml.h:228
#define XML_BOOLEAN_FALSE
Definition msg_xml.h:168
#define XML_ATTR_QUORUM_PANIC
Definition msg_xml.h:144
#define XML_CIB_TAG_RSC_TEMPLATE
Definition msg_xml.h:240
#define XML_LRM_ATTR_TASK
Definition msg_xml.h:306
#define XML_CIB_TAG_PROPSET
Definition msg_xml.h:226
#define XML_ATTR_TYPE
Definition msg_xml.h:160
#define XML_LRM_ATTR_MIGRATE_TARGET
Definition msg_xml.h:331
#define XML_NVPAIR_ATTR_NAME
Definition msg_xml.h:393
#define CIB_OPTIONS_FIRST
Definition msg_xml.h:110
#define XML_CIB_TAG_GROUP
Definition msg_xml.h:236
#define XML_CONFIG_ATTR_NODE_PENDING_TIMEOUT
Definition msg_xml.h:406
#define XML_CIB_TAG_STATUS
Definition msg_xml.h:204
#define XML_OP_ATTR_ON_FAIL
Definition msg_xml.h:268
#define XML_RSC_OP_LAST_CHANGE
Definition msg_xml.h:326
#define XML_LRM_ATTR_CALLID
Definition msg_xml.h:318
#define XML_LRM_ATTR_RSCID
Definition msg_xml.h:315
#define XML_CIB_TAG_LRM
Definition msg_xml.h:276
#define XML_LRM_ATTR_RC
Definition msg_xml.h:317
#define XML_CIB_TAG_NODE
Definition msg_xml.h:223
#define XML_TAG_UTILIZATION
Definition msg_xml.h:232
#define XML_CIB_TAG_RESOURCE
Definition msg_xml.h:235
#define XML_CONFIG_ATTR_SHUTDOWN_LOCK_LIMIT
Definition msg_xml.h:404
#define XML_LRM_ATTR_INTERVAL_MS
Definition msg_xml.h:304
#define XML_LRM_TAG_RESOURCE
Definition msg_xml.h:278
pcmk_scheduler_t * scheduler
@ node_ping
Definition nodes.h:37
@ pcmk_node_variant_remote
Pacemaker Remote node.
Definition nodes.h:35
@ pcmk_node_variant_cluster
Cluster layer node.
Definition nodes.h:34
#define PCMK_NODE_ATTR_TERMINATE
Definition nodes.h:29
const char * crm_element_value(const xmlNode *data, const char *name)
Retrieve the value of an XML attribute.
Definition nvpair.c:447
int crm_element_value_int(const xmlNode *data, const char *name, int *dest)
Retrieve the integer value of an XML attribute.
Definition nvpair.c:483
int crm_element_value_ms(const xmlNode *data, const char *name, guint *dest)
Retrieve the millisecond value of an XML attribute.
Definition nvpair.c:540
int crm_element_value_epoch(const xmlNode *xml, const char *name, time_t *dest)
Retrieve the seconds-since-epoch value of an XML attribute.
Definition nvpair.c:568
const char * crm_xml_add(xmlNode *node, const char *name, const char *value)
Create an XML attribute with specified name and value.
Definition nvpair.c:302
unsigned int timeout
Definition pcmk_fence.c:32
const char * action
Definition pcmk_fence.c:30
pcmk__action_result_t result
Definition pcmk_fence.c:35
const char * target
Definition pcmk_fence.c:29
void pe__unpack_node_health_scores(pcmk_scheduler_t *scheduler)
Definition pe_health.c:23
G_GNUC_INTERNAL gint pe__cmp_rsc_priority(gconstpointer a, gconstpointer b)
Definition utils.c:296
bool pe_can_fence(const pcmk_scheduler_t *scheduler, const pcmk_node_t *node)
Definition utils.c:36
pcmk_action_t * pe__clear_failcount(pcmk_resource_t *rsc, const pcmk_node_t *node, const char *reason, pcmk_scheduler_t *scheduler)
Schedule a controller operation to clear a fail count.
Definition failcounts.c:453
void verify_pe_options(GHashTable *options)
Definition common.c:308
pcmk_node_t * pe__copy_node(const pcmk_node_t *this_node)
Definition utils.c:89
GHashTable * pcmk__unpack_action_meta(pcmk_resource_t *rsc, const pcmk_node_t *node, const char *action_name, guint interval_ms, const xmlNode *action_config)
Definition pe_actions.c:692
#define pe_warn_once(pe_wo_bit, fmt...)
Definition internal.h:142
#define pe__set_working_set_flags(scheduler, flags_to_set)
Definition internal.h:52
GHashTable * pe__node_list2table(const GList *list)
Definition utils.c:116
gboolean get_target_role(const pcmk_resource_t *rsc, enum rsc_role_e *role)
Definition utils.c:411
time_t get_effective_time(pcmk_scheduler_t *scheduler)
Definition utils.c:396
void pe__unpack_dataset_nvpairs(const xmlNode *xml_obj, const char *set_name, const pe_rule_eval_data_t *rule_data, GHashTable *hash, const char *always_first, gboolean overwrite, pcmk_scheduler_t *scheduler)
Definition utils.c:707
GList * pe__resource_actions(const pcmk_resource_t *rsc, const pcmk_node_t *node, const char *task, bool require_node)
Find all actions of given type for a resource.
#define demote_action(rsc, node, optional)
Definition internal.h:395
void pe__add_param_check(const xmlNode *rsc_op, pcmk_resource_t *rsc, pcmk_node_t *node, enum pcmk__check_parameters, pcmk_scheduler_t *scheduler)
Definition remote.c:225
void destroy_ticket(gpointer data)
Definition utils.c:498
int pe__is_newer_op(const xmlNode *xml_a, const xmlNode *xml_b, bool same_node_default)
const pcmk_resource_t * pe__const_top_resource(const pcmk_resource_t *rsc, bool include_bundle)
Definition complex.c:962
gboolean order_actions(pcmk_action_t *lh_action, pcmk_action_t *rh_action, uint32_t flags)
Definition utils.c:450
bool pe__is_universal_clone(const pcmk_resource_t *rsc, const pcmk_scheduler_t *scheduler)
Definition clone.c:1240
void resource_location(pcmk_resource_t *rsc, const pcmk_node_t *node, int score, const char *tag, pcmk_scheduler_t *scheduler)
Definition utils.c:360
pcmk_resource_t * pe__find_bundle_replica(const pcmk_resource_t *bundle, const pcmk_node_t *node)
Definition bundle.c:1367
#define pe__clear_resource_flags(resource, flags_to_clear)
Definition internal.h:70
#define pe_rsc_debug(rsc, fmt, args...)
Definition internal.h:36
int pe_get_failcount(const pcmk_node_t *node, pcmk_resource_t *rsc, time_t *last_failure, uint32_t flags, const xmlNode *xml_op)
Definition failcounts.c:360
void pe__update_recheck_time(time_t recheck, pcmk_scheduler_t *scheduler, const char *reason)
Definition utils.c:682
op_digest_cache_t * rsc_action_digest_cmp(pcmk_resource_t *rsc, const xmlNode *xml_op, pcmk_node_t *node, pcmk_scheduler_t *scheduler)
Definition pe_digest.c:389
void pe__free_digests(gpointer ptr)
Definition pe_digest.c:34
gint pe__cmp_node_name(gconstpointer a, gconstpointer b)
Definition utils.c:146
#define pe_rsc_trace(rsc, fmt, args...)
Definition internal.h:37
pcmk_action_t * pe_fence_op(pcmk_node_t *node, const char *op, bool optional, const char *reason, bool priority_delay, pcmk_scheduler_t *scheduler)
bool pe__bundle_needs_remote_name(pcmk_resource_t *rsc)
Definition bundle.c:920
enum rsc_role_e pcmk__role_after_failure(const pcmk_resource_t *rsc, const char *action_name, enum action_fail_response on_fail, GHashTable *meta)
gint sort_op_by_callid(gconstpointer a, gconstpointer b)
#define pe__set_resource_flags(resource, flags_to_set)
Definition internal.h:64
void pe__set_next_role(pcmk_resource_t *rsc, enum rsc_role_e role, const char *why)
Definition complex.c:1184
pcmk_action_t * custom_action(pcmk_resource_t *rsc, char *key, const char *task, const pcmk_node_t *on_node, gboolean optional, pcmk_scheduler_t *scheduler)
Create or update an action object.
#define stop_action(rsc, node, optional)
Definition internal.h:379
#define pe_rsc_info(rsc, fmt, args...)
Definition internal.h:35
#define pe_proc_err(fmt...)
Definition internal.h:49
void native_add_running(pcmk_resource_t *rsc, pcmk_node_t *node, pcmk_scheduler_t *scheduler, gboolean failed)
Definition native.c:90
pcmk_ticket_t * ticket_new(const char *ticket_id, pcmk_scheduler_t *scheduler)
Definition utils.c:510
void pe__clear_resource_history(pcmk_resource_t *rsc, const pcmk_node_t *node)
bool pe__shutdown_requested(const pcmk_node_t *node)
Definition utils.c:666
xmlNode * pcmk__find_action_config(const pcmk_resource_t *rsc, const char *action_name, guint interval_ms, bool include_disabled)
Definition pe_actions.c:129
gboolean add_tag_ref(GHashTable *tags, const char *tag_name, const char *obj_ref)
Definition utils.c:617
const char * pe_node_attribute_raw(const pcmk_node_t *node, const char *name)
Definition common.c:621
#define pe_err(fmt...)
Definition internal.h:39
#define pe__set_action_flags(action, flags_to_set)
Definition internal.h:76
#define pe_proc_warn(fmt...)
Definition internal.h:50
enum action_fail_response pcmk__parse_on_fail(const pcmk_resource_t *rsc, const char *action_name, guint interval_ms, const char *value)
Definition pe_actions.c:889
#define pe__clear_working_set_flags(scheduler, flags_to_clear)
Definition internal.h:58
bool xml_contains_remote_node(xmlNode *xml)
Definition remote.c:84
xmlNode * pe_create_remote_xml(xmlNode *parent, const char *uname, const char *container_id, const char *migrateable, const char *is_managed, const char *start_timeout, const char *server, const char *port)
Definition remote.c:160
bool pe__is_guest_or_remote_node(const pcmk_node_t *node)
Definition remote.c:41
bool pe__is_remote_node(const pcmk_node_t *node)
Definition remote.c:25
bool pe__is_guest_node(const pcmk_node_t *node)
Definition remote.c:33
@ pcmk_rsc_match_clone_only
Match only clones and their instances, by either clone or instance ID.
Definition resources.h:205
@ pcmk_rsc_variant_primitive
Primitive resource.
Definition resources.h:34
@ pcmk_rsc_promotable
Whether resource can be promoted and demoted.
Definition resources.h:124
@ pcmk_rsc_stop_if_failed
Whether resource must be stopped (instead of demoted) if it is failed.
Definition resources.h:139
@ pcmk_rsc_migratable
Whether resource is allowed to live-migrate.
Definition resources.h:172
@ pcmk_rsc_unique
Whether resource is not an anonymous clone instance.
Definition resources.h:118
@ pcmk_rsc_needs_fencing
Whether resource requires fencing before recovery if on unclean node.
Definition resources.h:190
@ pcmk_rsc_removed
Whether resource has been removed from the configuration.
Definition resources.h:103
@ pcmk_rsc_start_pending
Whether resource has pending start action in history.
Definition resources.h:160
@ pcmk_rsc_blocked
Whether resource is blocked from further action.
Definition resources.h:109
@ pcmk_rsc_removed_filler
Whether resource has been removed but has a container.
Definition resources.h:112
@ pcmk_rsc_has_filler
Definition resources.h:184
@ pcmk_rsc_managed
Whether resource is managed.
Definition resources.h:106
@ pcmk_rsc_ignore_failure
Whether resource has an ignorable failure.
Definition resources.h:175
@ pcmk_rsc_failed
Whether resource is considered failed.
Definition resources.h:151
#define CRM_ASSERT(expr)
Definition results.h:42
@ CRM_EX_ERROR
Unspecified error.
Definition results.h:241
@ CRM_EX_MAX
Ensure crm_exit_t can hold this.
Definition results.h:305
@ PCMK_OCF_INSUFFICIENT_PRIV
Insufficient privileges.
Definition results.h:174
@ PCMK_OCF_FAILED_PROMOTED
Service failed and possibly in promoted role.
Definition results.h:179
@ PCMK_OCF_RUNNING_PROMOTED
Service active and promoted.
Definition results.h:178
@ PCMK_OCF_DEGRADED_PROMOTED
Service promoted but more likely to fail soon.
Definition results.h:181
@ PCMK_OCF_UNIMPLEMENT_FEATURE
Requested action not implemented.
Definition results.h:173
@ PCMK_OCF_NOT_CONFIGURED
Parameter invalid (inherently)
Definition results.h:176
@ PCMK_OCF_DEGRADED
Service active but more likely to fail soon.
Definition results.h:180
@ PCMK_OCF_NOT_INSTALLED
Dependencies not available locally.
Definition results.h:175
@ PCMK_OCF_UNKNOWN_ERROR
Unspecified error.
Definition results.h:171
@ PCMK_OCF_INVALID_PARAM
Parameter invalid (in local context)
Definition results.h:172
@ PCMK_OCF_NOT_RUNNING
Service safely stopped.
Definition results.h:177
@ PCMK_OCF_OK
Success.
Definition results.h:170
@ pcmk_rc_ok
Definition results.h:154
@ pcmk_rc_undetermined
Definition results.h:128
@ pcmk_rc_unpack_error
Definition results.h:118
#define pcmk_ok
Definition results.h:68
const char * crm_exit_str(crm_exit_t exit_code)
Definition results.c:640
@ PCMK_EXEC_CANCELLED
Action was cancelled.
Definition results.h:319
@ PCMK_EXEC_NO_SECRETS
Necessary CIB secrets are unavailable.
Definition results.h:329
@ PCMK_EXEC_ERROR_FATAL
Execution failed, do not retry anywhere.
Definition results.h:324
@ PCMK_EXEC_NOT_INSTALLED
Agent or dependency not available locally.
Definition results.h:325
@ PCMK_EXEC_INVALID
Action cannot be attempted (e.g. shutdown)
Definition results.h:327
@ PCMK_EXEC_DONE
Action completed, result is known.
Definition results.h:318
@ PCMK_EXEC_ERROR
Execution failed, may be retried.
Definition results.h:322
@ PCMK_EXEC_NOT_SUPPORTED
Agent does not implement requested action.
Definition results.h:321
@ PCMK_EXEC_TIMEOUT
Action did not complete in time.
Definition results.h:320
@ PCMK_EXEC_PENDING
Action is in progress.
Definition results.h:317
@ PCMK_EXEC_UNKNOWN
Used only to initialize variables.
Definition results.h:316
@ PCMK_EXEC_ERROR_HARD
Execution failed, do not retry on node.
Definition results.h:323
@ PCMK_EXEC_MAX
Maximum value for this enum.
Definition results.h:332
@ PCMK_EXEC_NO_FENCE_DEVICE
No fence device is configured for target.
Definition results.h:328
@ PCMK_EXEC_NOT_CONNECTED
No connection to executor.
Definition results.h:326
rsc_role_e
Definition roles.h:27
@ pcmk_role_started
Started.
Definition roles.h:30
@ pcmk_role_unknown
Resource role is unknown.
Definition roles.h:28
@ pcmk_role_unpromoted
Unpromoted.
Definition roles.h:31
@ pcmk_role_promoted
Promoted.
Definition roles.h:32
@ pcmk_role_stopped
Stopped.
Definition roles.h:29
@ pcmk_no_quorum_freeze
Definition scheduler.h:39
@ pcmk_no_quorum_stop
Definition scheduler.h:40
@ pcmk_no_quorum_ignore
Definition scheduler.h:41
@ pcmk_no_quorum_demote
Definition scheduler.h:43
@ pcmk_no_quorum_fence
Definition scheduler.h:42
@ pcmk_sched_stop_removed_resources
Definition scheduler.h:95
@ pcmk_sched_in_maintenance
Whether cluster is in maintenance mode (via maintenance-mode property)
Definition scheduler.h:77
@ pcmk_sched_symmetric_cluster
Whether cluster is symmetric (via symmetric-cluster property)
Definition scheduler.h:74
@ pcmk_sched_fencing_enabled
Whether fencing is enabled (via stonith-enabled property)
Definition scheduler.h:80
@ pcmk_sched_probe_resources
Definition scheduler.h:128
@ pcmk_sched_have_remote_nodes
Whether the cluster includes any Pacemaker Remote nodes (via CIB)
Definition scheduler.h:134
@ pcmk_sched_have_fencing
Whether cluster has a fencing resource (via CIB resources)
Definition scheduler.h:83
@ pcmk_sched_shutdown_lock
Definition scheduler.h:122
@ pcmk_sched_location_only
Definition scheduler.h:143
@ pcmk_sched_quorate
Whether partition has quorum (via have-quorum property)
Definition scheduler.h:71
@ pcmk_sched_concurrent_fencing
Whether concurrent fencing is allowed (via concurrent-fencing property)
Definition scheduler.h:89
@ pcmk_sched_start_failure_fatal
Definition scheduler.h:110
@ pcmk_sched_enable_unfencing
Whether any resource provides or requires unfencing (via CIB resources)
Definition scheduler.h:86
@ pcmk_sched_remove_after_stop
Definition scheduler.h:113
@ pcmk_sched_cancel_removed_actions
Definition scheduler.h:101
@ pcmk_sched_stop_all
Whether to stop all resources (via stop-all-resources property)
Definition scheduler.h:104
@ pcmk_sched_startup_fencing
Whether unseen nodes should be fenced (via startup-fencing property)
Definition scheduler.h:116
@ pcmk__check_last_failure
@ pcmk__wo_ping_node
@ pcmk__wo_poweroff
@ pcmk__wo_blind
@ pcmk__wo_remove_after
Services API.
pcmk_resource_t * pe_find_resource(GList *rsc_list, const char *id_rh)
Definition status.c:391
pcmk_node_t * pe_find_node_any(const GList *node_list, const char *id, const char *node_name)
Find a node by name or ID in a list of nodes.
Definition status.c:426
pcmk_node_t * pe_find_node(const GList *node_list, const char *node_name)
Find a node by name in a list of nodes.
Definition status.c:473
int pcmk__scan_min_int(const char *text, int *result, int minimum)
Definition strings.c:127
GHashTable * pcmk__strkey_table(GDestroyNotify key_destroy_func, GDestroyNotify value_destroy_func)
Definition strings.c:608
int pcmk__scan_ll(const char *text, long long *result, long long default_value)
Definition strings.c:97
void pcmk__str_update(char **str, const char *value)
Definition strings.c:1193
bool pcmk__strcase_any_of(const char *s,...) G_GNUC_NULL_TERMINATED
Definition strings.c:933
@ pcmk__str_none
@ pcmk__str_null_matches
@ pcmk__str_casei
bool pcmk__ends_with(const char *s, const char *match)
Definition strings.c:533
bool pcmk__str_any_of(const char *s,...) G_GNUC_NULL_TERMINATED
Definition strings.c:957
void pcmk__g_strcat(GString *buffer,...) G_GNUC_NULL_TERMINATED
Definition strings.c:1217
enum pcmk__digest_result rc
Definition internal.h:455
Implementation of pcmk_action_t.
Definition actions.h:390
Implementation of pcmk_node_t.
Definition nodes.h:130
int weight
Node score for a given resource.
Definition nodes.h:131
gboolean fixed
Definition nodes.h:132
struct pe_node_shared_s * details
Basic node information.
Definition nodes.h:134
Basic node information (all node objects for the same node share this)
Definition nodes.h:66
GHashTable * attrs
Node attributes.
Definition nodes.h:115
gboolean shutdown
Whether shutting down.
Definition nodes.h:78
GHashTable * digest_cache
Cache of calculated resource digests.
Definition nodes.h:117
gboolean expected_up
Whether expected join state is member.
Definition nodes.h:79
const char * id
Node ID at the cluster layer.
Definition nodes.h:67
gboolean online
Whether online.
Definition nodes.h:72
gboolean standby_onfail
Whether in standby mode due to on-fail.
Definition nodes.h:74
const char * uname
Node name in cluster.
Definition nodes.h:68
gboolean standby
Whether in standby mode.
Definition nodes.h:73
GHashTable * utilization
Node utilization attributes.
Definition nodes.h:116
gboolean unpacked
Whether node history has been unpacked.
Definition nodes.h:102
pcmk_scheduler_t * data_set
Cluster that node is part of.
Definition nodes.h:126
gboolean remote_maintenance
Definition nodes.h:100
gboolean is_dc
Whether node is cluster's DC.
Definition nodes.h:80
gboolean unclean
Whether node requires fencing.
Definition nodes.h:76
gboolean remote_requires_reset
Definition nodes.h:88
pcmk_resource_t * remote_rsc
Remote connection resource for node, if it is a Pacemaker Remote node.
Definition nodes.h:111
gboolean maintenance
Whether in maintenance mode.
Definition nodes.h:81
gboolean rsc_discovery_enabled
Whether probes are allowed on node.
Definition nodes.h:82
enum node_type type
Node variant.
Definition nodes.h:69
gboolean pending
Whether controller membership is pending.
Definition nodes.h:75
gboolean remote_was_fenced
Definition nodes.h:94
GList * running_rsc
List of resources active on node.
Definition nodes.h:113
gboolean unseen
Whether node has never joined cluster.
Definition nodes.h:77
Implementation of pcmk_resource_t.
Definition resources.h:399
GList * running_on
Nodes where resource may be active.
Definition resources.h:460
enum pe_obj_types variant
Resource variant.
Definition resources.h:414
pcmk_node_t * partial_migration_target
The destination node, if migrate_to completed but migrate_from has not.
Definition resources.h:454
GHashTable * meta
Resource's meta-attributes.
Definition resources.h:471
GList * children
Resource's child resources, if any.
Definition resources.h:475
pcmk_scheduler_t * cluster
Cluster that resource is part of.
Definition resources.h:412
pcmk_node_t * partial_migration_source
The source node, if migrate_to completed but migrate_from has not.
Definition resources.h:457
pcmk_resource_t * container
Resource containing this one, if any.
Definition resources.h:480
pcmk_rsc_methods_t * fns
Resource object methods.
Definition resources.h:416
GHashTable * known_on
Nodes where resource has been probed (key is node ID, not name)
Definition resources.h:463
char * clone_name
Resource instance ID in history.
Definition resources.h:401
gboolean is_remote_node
Whether this is a remote connection.
Definition resources.h:432
char * id
Resource ID in configuration.
Definition resources.h:400
GHashTable * allowed_nodes
Nodes where resource may run (key is node ID, not name)
Definition resources.h:466
GList * dangling_migrations
Definition resources.h:478
pcmk_node_t * lock_node
Resource shutdown-locked to this node.
Definition resources.h:485
unsigned long long flags
Group of enum pcmk_rsc_flags.
Definition resources.h:429
pcmk_node_t * pending_node
Node on which pending_task is happening.
Definition resources.h:484
guint remote_reconnect_ms
Retry interval for remote connections.
Definition resources.h:427
GList * fillers
Resources contained by this one, if any.
Definition resources.h:481
enum rsc_role_e next_role
Resource's scheduled next role.
Definition resources.h:469
enum rsc_role_e role
Resource's current role.
Definition resources.h:468
pcmk_resource_t * parent
Resource's parent resource, if any.
Definition resources.h:413
time_t lock_time
When shutdown lock started.
Definition resources.h:487
GHashTable * node_hash
Definition common.h:80
Configuration tag object.
Definition tags.h:26
char * id
XML ID of tag.
Definition tags.h:27
GList * refs
XML IDs of objects that reference the tag.
Definition tags.h:28
Ticket constraint object.
Definition tickets.h:27
GHashTable * state
XML attributes from ticket state.
Definition tickets.h:32
char * id
XML ID of ticket constraint or state.
Definition tickets.h:28
gboolean standby
Whether ticket is temporarily suspended.
Definition tickets.h:31
gboolean granted
Whether cluster has been granted the ticket.
Definition tickets.h:29
time_t last_granted
When cluster was last granted the ticket.
Definition tickets.h:30
Implementation of pcmk_scheduler_t.
Definition scheduler.h:172
guint node_pending_timeout
Pending join times out after this (ms)
Definition scheduler.h:231
const char * stonith_action
Default fencing action.
Definition scheduler.h:179
char * dc_uuid
Node ID of designated controller.
Definition scheduler.h:177
GHashTable * tags
Configuration tags (ID -> pcmk_tag_t *)
Definition scheduler.h:218
const char * placement_strategy
Value of placement-strategy property.
Definition scheduler.h:180
guint shutdown_lock
How long to lock resources (seconds)
Definition scheduler.h:225
GHashTable * config_hash
Cluster properties.
Definition scheduler.h:187
int stonith_timeout
Value of stonith-timeout property.
Definition scheduler.h:185
GHashTable * template_rsc_sets
Mappings of template ID to resource ID.
Definition scheduler.h:213
xmlNode * input
CIB XML.
Definition scheduler.h:175
GList * resources
Resources in cluster.
Definition scheduler.h:196
unsigned long long flags
Group of enum pcmk_scheduler_flags.
Definition scheduler.h:183
pcmk_node_t * dc_node
Node object for DC.
Definition scheduler.h:178
enum pe_quorum_policy no_quorum_policy
Response to loss of quorum.
Definition scheduler.h:186
GList * stop_needed
Containers that need stop actions.
Definition scheduler.h:222
GHashTable * tickets
Definition scheduler.h:190
GList * nodes
Nodes in cluster.
Definition scheduler.h:195
int priority_fencing_delay
Priority fencing delay.
Definition scheduler.h:226
crm_time_t * now
Current time for evaluation purposes.
Definition scheduler.h:176
const char * localhost
Definition scheduler.h:216
pcmk_resource_t *(* find_rsc)(pcmk_resource_t *rsc, const char *search, const pcmk_node_t *node, int flags)
Search for a resource ID in a resource and its children.
Definition resources.h:287
pcmk_node_t *(* location)(const pcmk_resource_t *rsc, GList **list, int current)
List nodes where a resource (or any of its children) is.
Definition resources.h:339
pcmk_node_t * pe_create_node(const char *id, const char *uname, const char *type, const char *score, pcmk_scheduler_t *scheduler)
Definition unpack.c:440
#define set_config_flag(scheduler, option, flag)
Definition unpack.c:52
#define XPATH_ENABLE_UNFENCING
Definition unpack.c:191
void calculate_active_ops(const GList *sorted_op_list, int *start_index, int *stop_index)
Definition unpack.c:2532
void pe_fence_node(pcmk_scheduler_t *scheduler, pcmk_node_t *node, const char *reason, bool priority_delay)
Schedule a fence action for a node.
Definition unpack.c:110
const char * pe_base_name_end(const char *id)
Definition unpack.c:1825
gboolean unpack_status(xmlNode *status, pcmk_scheduler_t *scheduler)
Definition unpack.c:1319
#define SUB_XPATH_LRM_RESOURCE
Definition unpack.c:2828
GList * find_operations(const char *rsc, const char *node, gboolean active_filter, pcmk_scheduler_t *scheduler)
Definition unpack.c:5012
gboolean unpack_remote_nodes(xmlNode *xml_resources, pcmk_scheduler_t *scheduler)
Definition unpack.c:674
char * clone_zero(const char *last_rsc_id)
Definition unpack.c:1887
#define XPATH_NODE_STATE
Definition unpack.c:2826
#define SUB_XPATH_LRM_RSC_OP
Definition unpack.c:2831
gboolean unpack_resources(const xmlNode *xml_resources, pcmk_scheduler_t *scheduler)
Definition unpack.c:816
gboolean unpack_config(xmlNode *config, pcmk_scheduler_t *scheduler)
Definition unpack.c:212
char * clone_strip(const char *last_rsc_id)
Definition unpack.c:1865
gboolean unpack_nodes(xmlNode *xml_nodes, pcmk_scheduler_t *scheduler)
Definition unpack.c:597
int pe__target_rc_from_xml(const xmlNode *xml_op)
Definition unpack.c:4275
gboolean unpack_tags(xmlNode *xml_tags, pcmk_scheduler_t *scheduler)
Definition unpack.c:884
Wrappers for and extensions to libxml2.
xmlNode * first_named_child(const xmlNode *parent, const char *name)
Definition xml.c:2484
void copy_in_properties(xmlNode *target, const xmlNode *src)
Definition xml.c:456
xmlNode * get_xpath_object(const char *xpath, xmlNode *xml_obj, int error_level)
Definition xpath.c:211
xmlNode * crm_next_same_xml(const xmlNode *sibling)
Get next instance of same XML tag.
Definition xml.c:2510
void freeXpathObject(xmlXPathObjectPtr xpathObj)
Definition xpath.c:39
xmlNode * find_xml_node(const xmlNode *root, const char *search_path, gboolean must_find)
Definition xml.c:384
xmlNode * add_node_copy(xmlNode *new_parent, xmlNode *xml_node)
Definition xml.c:622
xmlXPathObjectPtr xpath_search(const xmlNode *xml_top, const char *path)
Definition xpath.c:139
xmlNode * create_xml_node(xmlNode *parent, const char *name)
Definition xml.c:638