pacemaker 2.1.7-2.1.7
Scalable High-Availability cluster resource manager
Loading...
Searching...
No Matches
membership.c
Go to the documentation of this file.
1/*
2 * Copyright 2004-2023 the Pacemaker project contributors
3 *
4 * The version control history for this file may have further details.
5 *
6 * This source code is licensed under the GNU Lesser General Public License
7 * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
8 */
9
10#include <crm_internal.h>
11
12#ifndef _GNU_SOURCE
13# define _GNU_SOURCE
14#endif
15
16#include <sys/param.h>
17#include <sys/types.h>
18#include <stdio.h>
19#include <unistd.h>
20#include <string.h>
21#include <glib.h>
22#include <crm/common/ipc.h>
25#include <crm/msg_xml.h>
26#include <crm/stonith-ng.h>
27#include "crmcluster_private.h"
28
29/* The peer cache remembers cluster nodes that have been seen.
30 * This is managed mostly automatically by libcluster, based on
31 * cluster membership events.
32 *
33 * Because cluster nodes can have conflicting names or UUIDs,
34 * the hash table key is a uniquely generated ID.
35 */
36GHashTable *crm_peer_cache = NULL;
37
38/*
39 * The remote peer cache tracks pacemaker_remote nodes. While the
40 * value has the same type as the peer cache's, it is tracked separately for
41 * three reasons: pacemaker_remote nodes can't have conflicting names or UUIDs,
42 * so the name (which is also the UUID) is used as the hash table key; there
43 * is no equivalent of membership events, so management is not automatic; and
44 * most users of the peer cache need to exclude pacemaker_remote nodes.
45 *
46 * That said, using a single cache would be more logical and less error-prone,
47 * so it would be a good idea to merge them one day.
48 *
49 * libcluster provides two avenues for populating the cache:
50 * crm_remote_peer_get() and crm_remote_peer_cache_remove() directly manage it,
51 * while crm_remote_peer_cache_refresh() populates it via the CIB.
52 */
53GHashTable *crm_remote_peer_cache = NULL;
54
55/*
56 * The known node cache tracks cluster and remote nodes that have been seen in
57 * the CIB. It is useful mainly when a caller needs to know about a node that
58 * may no longer be in the membership, but doesn't want to add the node to the
59 * main peer cache tables.
60 */
61static GHashTable *known_node_cache = NULL;
62
63unsigned long long crm_peer_seq = 0;
64gboolean crm_have_quorum = FALSE;
65static gboolean crm_autoreap = TRUE;
66
67// Flag setting and clearing for crm_node_t:flags
68
69#define set_peer_flags(peer, flags_to_set) do { \
70 (peer)->flags = pcmk__set_flags_as(__func__, __LINE__, LOG_TRACE, \
71 "Peer", (peer)->uname, \
72 (peer)->flags, (flags_to_set), \
73 #flags_to_set); \
74 } while (0)
75
76#define clear_peer_flags(peer, flags_to_clear) do { \
77 (peer)->flags = pcmk__clear_flags_as(__func__, __LINE__, \
78 LOG_TRACE, \
79 "Peer", (peer)->uname, \
80 (peer)->flags, (flags_to_clear), \
81 #flags_to_clear); \
82 } while (0)
83
84static void update_peer_uname(crm_node_t *node, const char *uname);
85
86int
88{
89 if (crm_remote_peer_cache == NULL) {
90 return 0;
91 }
92 return g_hash_table_size(crm_remote_peer_cache);
93}
94
107crm_remote_peer_get(const char *node_name)
108{
109 crm_node_t *node;
110
111 if (node_name == NULL) {
112 errno = -EINVAL;
113 return NULL;
114 }
115
116 /* Return existing cache entry if one exists */
117 node = g_hash_table_lookup(crm_remote_peer_cache, node_name);
118 if (node) {
119 return node;
120 }
121
122 /* Allocate a new entry */
123 node = calloc(1, sizeof(crm_node_t));
124 if (node == NULL) {
125 return NULL;
126 }
127
128 /* Populate the essential information */
130 node->uuid = strdup(node_name);
131 if (node->uuid == NULL) {
132 free(node);
133 errno = -ENOMEM;
134 return NULL;
135 }
136
137 /* Add the new entry to the cache */
138 g_hash_table_replace(crm_remote_peer_cache, node->uuid, node);
139 crm_trace("added %s to remote cache", node_name);
140
141 /* Update the entry's uname, ensuring peer status callbacks are called */
142 update_peer_uname(node, node_name);
143 return node;
144}
145
146void
147crm_remote_peer_cache_remove(const char *node_name)
148{
149 if (g_hash_table_remove(crm_remote_peer_cache, node_name)) {
150 crm_trace("removed %s from remote peer cache", node_name);
151 }
152}
153
165static const char *
166remote_state_from_cib(const xmlNode *node_state)
167{
168 bool status = false;
169
171 &status) == pcmk_rc_ok) && !status) {
172 return CRM_NODE_LOST;
173 } else {
174 return CRM_NODE_MEMBER;
175 }
176}
177
178/* user data for looping through remote node xpath searches */
179struct refresh_data {
180 const char *field; /* XML attribute to check for node name */
181 gboolean has_state; /* whether to update node state based on XML */
182};
183
191static void
192remote_cache_refresh_helper(xmlNode *result, void *user_data)
193{
194 const struct refresh_data *data = user_data;
195 const char *remote = crm_element_value(result, data->field);
196 const char *state = NULL;
197 crm_node_t *node;
198
199 CRM_CHECK(remote != NULL, return);
200
201 /* Determine node's state, if the result has it */
202 if (data->has_state) {
203 state = remote_state_from_cib(result);
204 }
205
206 /* Check whether cache already has entry for node */
207 node = g_hash_table_lookup(crm_remote_peer_cache, remote);
208
209 if (node == NULL) {
210 /* Node is not in cache, so add a new entry for it */
211 node = crm_remote_peer_get(remote);
212 CRM_ASSERT(node);
213 if (state) {
214 pcmk__update_peer_state(__func__, node, state, 0);
215 }
216
217 } else if (pcmk_is_set(node->flags, crm_node_dirty)) {
218 /* Node is in cache and hasn't been updated already, so mark it clean */
220 if (state) {
221 pcmk__update_peer_state(__func__, node, state, 0);
222 }
223 }
224}
225
226static void
227mark_dirty(gpointer key, gpointer value, gpointer user_data)
228{
230}
231
232static gboolean
233is_dirty(gpointer key, gpointer value, gpointer user_data)
234{
235 return pcmk_is_set(((crm_node_t*)value)->flags, crm_node_dirty);
236}
237
243void
245{
246 struct refresh_data data;
247
249
250 /* First, we mark all existing cache entries as dirty,
251 * so that later we can remove any that weren't in the CIB.
252 * We don't empty the cache, because we need to detect changes in state.
253 */
254 g_hash_table_foreach(crm_remote_peer_cache, mark_dirty, NULL);
255
256 /* Look for guest nodes and remote nodes in the status section */
257 data.field = "id";
258 data.has_state = TRUE;
260 remote_cache_refresh_helper, &data);
261
262 /* Look for guest nodes and remote nodes in the configuration section,
263 * because they may have just been added and not have a status entry yet.
264 * In that case, the cached node state will be left NULL, so that the
265 * peer status callback isn't called until we're sure the node started
266 * successfully.
267 */
268 data.field = "value";
269 data.has_state = FALSE;
271 remote_cache_refresh_helper, &data);
272 data.field = "id";
273 data.has_state = FALSE;
275 remote_cache_refresh_helper, &data);
276
277 /* Remove all old cache entries that weren't seen in the CIB */
278 g_hash_table_foreach_remove(crm_remote_peer_cache, is_dirty, NULL);
279}
280
281gboolean
283{
284 if(node == NULL) {
285 return FALSE;
286 }
287
288 if (pcmk_is_set(node->flags, crm_remote_node)) {
289 /* remote nodes are never considered active members. This
290 * guarantees they will never be considered for DC membership.*/
291 return FALSE;
292 }
293#if SUPPORT_COROSYNC
294 if (is_corosync_cluster()) {
295 return crm_is_corosync_peer_active(node);
296 }
297#endif
298 crm_err("Unhandled cluster type: %s", name_for_cluster_type(get_cluster_type()));
299 return FALSE;
300}
301
302static gboolean
303crm_reap_dead_member(gpointer key, gpointer value, gpointer user_data)
304{
305 crm_node_t *node = value;
306 crm_node_t *search = user_data;
307
308 if (search == NULL) {
309 return FALSE;
310
311 } else if (search->id && node->id != search->id) {
312 return FALSE;
313
314 } else if (search->id == 0 && !pcmk__str_eq(node->uname, search->uname, pcmk__str_casei)) {
315 return FALSE;
316
317 } else if (crm_is_peer_active(value) == FALSE) {
318 crm_info("Removing node with name %s and id %u from membership cache",
319 (node->uname? node->uname : "unknown"), node->id);
320 return TRUE;
321 }
322 return FALSE;
323}
324
333guint
334reap_crm_member(uint32_t id, const char *name)
335{
336 int matches = 0;
337 crm_node_t search = { 0, };
338
339 if (crm_peer_cache == NULL) {
340 crm_trace("Membership cache not initialized, ignoring purge request");
341 return 0;
342 }
343
344 search.id = id;
345 pcmk__str_update(&search.uname, name);
346 matches = g_hash_table_foreach_remove(crm_peer_cache, crm_reap_dead_member, &search);
347 if(matches) {
348 crm_notice("Purged %d peer%s with id=%u%s%s from the membership cache",
349 matches, pcmk__plural_s(matches), search.id,
350 (search.uname? " and/or uname=" : ""),
351 (search.uname? search.uname : ""));
352
353 } else {
354 crm_info("No peers with id=%u%s%s to purge from the membership cache",
355 search.id, (search.uname? " and/or uname=" : ""),
356 (search.uname? search.uname : ""));
357 }
358
359 free(search.uname);
360 return matches;
361}
362
363static void
364count_peer(gpointer key, gpointer value, gpointer user_data)
365{
366 guint *count = user_data;
367 crm_node_t *node = value;
368
369 if (crm_is_peer_active(node)) {
370 *count = *count + 1;
371 }
372}
373
374guint
376{
377 guint count = 0;
378
379 if (crm_peer_cache) {
380 g_hash_table_foreach(crm_peer_cache, count_peer, &count);
381 }
382 return count;
383}
384
385static void
386destroy_crm_node(gpointer data)
387{
388 crm_node_t *node = data;
389
390 crm_trace("Destroying entry for node %u: %s", node->id, node->uname);
391
392 free(node->uname);
393 free(node->state);
394 free(node->uuid);
395 free(node->expected);
396 free(node->conn_host);
397 free(node);
398}
399
400void
402{
403 if (crm_peer_cache == NULL) {
404 crm_peer_cache = pcmk__strikey_table(free, destroy_crm_node);
405 }
406
407 if (crm_remote_peer_cache == NULL) {
408 crm_remote_peer_cache = pcmk__strikey_table(NULL, destroy_crm_node);
409 }
410
411 if (known_node_cache == NULL) {
412 known_node_cache = pcmk__strikey_table(free, destroy_crm_node);
413 }
414}
415
416void
418{
419 if (crm_peer_cache != NULL) {
420 crm_trace("Destroying peer cache with %d members", g_hash_table_size(crm_peer_cache));
421 g_hash_table_destroy(crm_peer_cache);
422 crm_peer_cache = NULL;
423 }
424
425 if (crm_remote_peer_cache != NULL) {
426 crm_trace("Destroying remote peer cache with %d members", g_hash_table_size(crm_remote_peer_cache));
427 g_hash_table_destroy(crm_remote_peer_cache);
429 }
430
431 if (known_node_cache != NULL) {
432 crm_trace("Destroying known node cache with %d members",
433 g_hash_table_size(known_node_cache));
434 g_hash_table_destroy(known_node_cache);
435 known_node_cache = NULL;
436 }
437
438}
439
440static void (*peer_status_callback)(enum crm_status_type, crm_node_t *,
441 const void *) = NULL;
442
453void
454crm_set_status_callback(void (*dispatch) (enum crm_status_type, crm_node_t *, const void *))
455{
456 peer_status_callback = dispatch;
457}
458
470void
471crm_set_autoreap(gboolean autoreap)
472{
473 crm_autoreap = autoreap;
474}
475
476static void
477dump_peer_hash(int level, const char *caller)
478{
479 GHashTableIter iter;
480 const char *id = NULL;
481 crm_node_t *node = NULL;
482
483 g_hash_table_iter_init(&iter, crm_peer_cache);
484 while (g_hash_table_iter_next(&iter, (gpointer *) &id, (gpointer *) &node)) {
485 do_crm_log(level, "%s: Node %u/%s = %p - %s", caller, node->id, node->uname, node, id);
486 }
487}
488
489static gboolean
490hash_find_by_data(gpointer key, gpointer value, gpointer user_data)
491{
492 return value == user_data;
493}
494
506pcmk__search_node_caches(unsigned int id, const char *uname, uint32_t flags)
507{
508 crm_node_t *node = NULL;
509
510 CRM_ASSERT(id > 0 || uname != NULL);
511
513
514 if ((uname != NULL) && pcmk_is_set(flags, CRM_GET_PEER_REMOTE)) {
515 node = g_hash_table_lookup(crm_remote_peer_cache, uname);
516 }
517
518 if ((node == NULL) && pcmk_is_set(flags, CRM_GET_PEER_CLUSTER)) {
519 node = pcmk__search_cluster_node_cache(id, uname, NULL);
520 }
521 return node;
522}
523
536pcmk__get_peer_full(unsigned int id, const char *uname, const char *uuid,
537 int flags)
538{
539 crm_node_t *node = NULL;
540
541 CRM_ASSERT(id > 0 || uname != NULL);
542
544
546 node = g_hash_table_lookup(crm_remote_peer_cache, uname);
547 }
548
549 if ((node == NULL) && pcmk_is_set(flags, CRM_GET_PEER_CLUSTER)) {
550 node = pcmk__get_peer(id, uname, uuid);
551 }
552 return node;
553}
554
565crm_get_peer_full(unsigned int id, const char *uname, int flags)
566{
567 return pcmk__get_peer_full(id, uname, NULL, flags);
568}
569
582pcmk__search_cluster_node_cache(unsigned int id, const char *uname,
583 const char *uuid)
584{
585 GHashTableIter iter;
586 crm_node_t *node = NULL;
587 crm_node_t *by_id = NULL;
588 crm_node_t *by_name = NULL;
589
590 CRM_ASSERT(id > 0 || uname != NULL);
591
593
594 if (uname != NULL) {
595 g_hash_table_iter_init(&iter, crm_peer_cache);
596 while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
597 if(node->uname && strcasecmp(node->uname, uname) == 0) {
598 crm_trace("Name match: %s = %p", node->uname, node);
599 by_name = node;
600 break;
601 }
602 }
603 }
604
605 if (id > 0) {
606 g_hash_table_iter_init(&iter, crm_peer_cache);
607 while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
608 if(node->id == id) {
609 crm_trace("ID match: %u = %p", node->id, node);
610 by_id = node;
611 break;
612 }
613 }
614
615 } else if (uuid != NULL) {
616 g_hash_table_iter_init(&iter, crm_peer_cache);
617 while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
618 if (pcmk__str_eq(node->uuid, uuid, pcmk__str_casei)) {
619 crm_trace("UUID match: %s = %p", node->uuid, node);
620 by_id = node;
621 break;
622 }
623 }
624 }
625
626 node = by_id; /* Good default */
627 if(by_id == by_name) {
628 /* Nothing to do if they match (both NULL counts) */
629 crm_trace("Consistent: %p for %u/%s", by_id, id, uname);
630
631 } else if(by_id == NULL && by_name) {
632 crm_trace("Only one: %p for %u/%s", by_name, id, uname);
633
634 if(id && by_name->id) {
635 dump_peer_hash(LOG_WARNING, __func__);
636 crm_crit("Node %u and %u share the same name '%s'",
637 id, by_name->id, uname);
638 node = NULL; /* Create a new one */
639
640 } else {
641 node = by_name;
642 }
643
644 } else if(by_name == NULL && by_id) {
645 crm_trace("Only one: %p for %u/%s", by_id, id, uname);
646
647 if(uname && by_id->uname) {
648 dump_peer_hash(LOG_WARNING, __func__);
649 crm_crit("Node '%s' and '%s' share the same cluster nodeid %u: assuming '%s' is correct",
650 uname, by_id->uname, id, uname);
651 }
652
653 } else if(uname && by_id->uname) {
654 if(pcmk__str_eq(uname, by_id->uname, pcmk__str_casei)) {
655 crm_notice("Node '%s' has changed its ID from %u to %u", by_id->uname, by_name->id, by_id->id);
656 g_hash_table_foreach_remove(crm_peer_cache, hash_find_by_data, by_name);
657
658 } else {
659 crm_warn("Node '%s' and '%s' share the same cluster nodeid: %u %s", by_id->uname, by_name->uname, id, uname);
660 dump_peer_hash(LOG_INFO, __func__);
661 crm_abort(__FILE__, __func__, __LINE__, "member weirdness", TRUE,
662 TRUE);
663 }
664
665 } else if(id && by_name->id) {
666 crm_warn("Node %u and %u share the same name: '%s'", by_id->id, by_name->id, uname);
667
668 } else {
669 /* Simple merge */
670
671 /* Only corosync-based clusters use node IDs. The functions that call
672 * pcmk__update_peer_state() and crm_update_peer_proc() only know
673 * nodeid, so 'by_id' is authoritative when merging.
674 */
675 dump_peer_hash(LOG_DEBUG, __func__);
676
677 crm_info("Merging %p into %p", by_name, by_id);
678 g_hash_table_foreach_remove(crm_peer_cache, hash_find_by_data, by_name);
679 }
680
681 return node;
682}
683
684#if SUPPORT_COROSYNC
685static guint
686remove_conflicting_peer(crm_node_t *node)
687{
688 int matches = 0;
689 GHashTableIter iter;
690 crm_node_t *existing_node = NULL;
691
692 if (node->id == 0 || node->uname == NULL) {
693 return 0;
694 }
695
697 return 0;
698 }
699
700 g_hash_table_iter_init(&iter, crm_peer_cache);
701 while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &existing_node)) {
702 if (existing_node->id > 0
703 && existing_node->id != node->id
704 && existing_node->uname != NULL
705 && strcasecmp(existing_node->uname, node->uname) == 0) {
706
707 if (crm_is_peer_active(existing_node)) {
708 continue;
709 }
710
711 crm_warn("Removing cached offline node %u/%s which has conflicting uname with %u",
712 existing_node->id, existing_node->uname, node->id);
713
714 g_hash_table_iter_remove(&iter);
715 matches++;
716 }
717 }
718
719 return matches;
720}
721#endif
722
733/* coverity[-alloc] Memory is referenced in one or both hashtables */
735pcmk__get_peer(unsigned int id, const char *uname, const char *uuid)
736{
737 crm_node_t *node = NULL;
738 char *uname_lookup = NULL;
739
740 CRM_ASSERT(id > 0 || uname != NULL);
741
743
744 node = pcmk__search_cluster_node_cache(id, uname, uuid);
745
746 /* if uname wasn't provided, and find_peer did not turn up a uname based on id.
747 * we need to do a lookup of the node name using the id in the cluster membership. */
748 if ((node == NULL || node->uname == NULL) && (uname == NULL)) {
749 uname_lookup = get_node_name(id);
750 }
751
752 if (uname_lookup) {
753 uname = uname_lookup;
754 crm_trace("Inferred a name of '%s' for node %u", uname, id);
755
756 /* try to turn up the node one more time now that we know the uname. */
757 if (node == NULL) {
758 node = pcmk__search_cluster_node_cache(id, uname, uuid);
759 }
760 }
761
762
763 if (node == NULL) {
764 char *uniqueid = crm_generate_uuid();
765
766 node = calloc(1, sizeof(crm_node_t));
767 CRM_ASSERT(node);
768
769 crm_info("Created entry %s/%p for node %s/%u (%d total)",
770 uniqueid, node, uname, id, 1 + g_hash_table_size(crm_peer_cache));
771 g_hash_table_replace(crm_peer_cache, uniqueid, node);
772 }
773
774 if(id > 0 && uname && (node->id == 0 || node->uname == NULL)) {
775 crm_info("Node %u is now known as %s", id, uname);
776 }
777
778 if(id > 0 && node->id == 0) {
779 node->id = id;
780 }
781
782 if (uname && (node->uname == NULL)) {
783 update_peer_uname(node, uname);
784 }
785
786 if(node->uuid == NULL) {
787 if (uuid == NULL) {
788 uuid = crm_peer_uuid(node);
789 }
790
791 if (uuid) {
792 crm_info("Node %u has uuid %s", id, uuid);
793
794 } else {
795 crm_info("Cannot obtain a UUID for node %u/%s", id, node->uname);
796 }
797 }
798
799 free(uname_lookup);
800
801 return node;
802}
803
812/* coverity[-alloc] Memory is referenced in one or both hashtables */
814crm_get_peer(unsigned int id, const char *uname)
815{
816 return pcmk__get_peer(id, uname, NULL);
817}
818
830static void
831update_peer_uname(crm_node_t *node, const char *uname)
832{
833 CRM_CHECK(uname != NULL,
834 crm_err("Bug: can't update node name without name"); return);
835 CRM_CHECK(node != NULL,
836 crm_err("Bug: can't update node name to %s without node", uname);
837 return);
838
839 if (pcmk__str_eq(uname, node->uname, pcmk__str_casei)) {
840 crm_debug("Node uname '%s' did not change", uname);
841 return;
842 }
843
844 for (const char *c = uname; *c; ++c) {
845 if ((*c >= 'A') && (*c <= 'Z')) {
846 crm_warn("Node names with capitals are discouraged, consider changing '%s'",
847 uname);
848 break;
849 }
850 }
851
853
854 if (peer_status_callback != NULL) {
855 peer_status_callback(crm_status_uname, node, NULL);
856 }
857
858#if SUPPORT_COROSYNC
860 remove_conflicting_peer(node);
861 }
862#endif
863}
864
873static inline const char *
874proc2text(enum crm_proc_flag proc)
875{
876 const char *text = "unknown";
877
878 switch (proc) {
879 case crm_proc_none:
880 text = "none";
881 break;
882 case crm_proc_based:
883 text = "pacemaker-based";
884 break;
886 text = "pacemaker-controld";
887 break;
889 text = "pacemaker-schedulerd";
890 break;
891 case crm_proc_execd:
892 text = "pacemaker-execd";
893 break;
894 case crm_proc_attrd:
895 text = "pacemaker-attrd";
896 break;
897 case crm_proc_fenced:
898 text = "pacemaker-fenced";
899 break;
900 case crm_proc_cpg:
901 text = "corosync-cpg";
902 break;
903 }
904 return text;
905}
906
924crm_update_peer_proc(const char *source, crm_node_t * node, uint32_t flag, const char *status)
925{
926 uint32_t last = 0;
927 gboolean changed = FALSE;
928
929 CRM_CHECK(node != NULL, crm_err("%s: Could not set %s to %s for NULL",
930 source, proc2text(flag), status);
931 return NULL);
932
933 /* Pacemaker doesn't spawn processes on remote nodes */
934 if (pcmk_is_set(node->flags, crm_remote_node)) {
935 return node;
936 }
937
938 last = node->processes;
939 if (status == NULL) {
940 node->processes = flag;
941 if (node->processes != last) {
942 changed = TRUE;
943 }
944
945 } else if (pcmk__str_eq(status, ONLINESTATUS, pcmk__str_casei)) {
946 if ((node->processes & flag) != flag) {
947 node->processes = pcmk__set_flags_as(__func__, __LINE__,
948 LOG_TRACE, "Peer process",
949 node->uname, node->processes,
950 flag, "processes");
951 changed = TRUE;
952 }
953
954 } else if (node->processes & flag) {
955 node->processes = pcmk__clear_flags_as(__func__, __LINE__,
956 LOG_TRACE, "Peer process",
957 node->uname, node->processes,
958 flag, "processes");
959 changed = TRUE;
960 }
961
962 if (changed) {
963 if (status == NULL && flag <= crm_proc_none) {
964 crm_info("%s: Node %s[%u] - all processes are now offline", source, node->uname,
965 node->id);
966 } else {
967 crm_info("%s: Node %s[%u] - %s is now %s", source, node->uname, node->id,
968 proc2text(flag), status);
969 }
970
971 if (pcmk_is_set(node->processes, crm_get_cluster_proc())) {
972 node->when_online = time(NULL);
973
974 } else {
975 node->when_online = 0;
976 }
977
978 /* Call the client callback first, then update the peer state,
979 * in case the node will be reaped
980 */
981 if (peer_status_callback != NULL) {
982 peer_status_callback(crm_status_processes, node, &last);
983 }
984
985 /* The client callback shouldn't touch the peer caches,
986 * but as a safety net, bail if the peer cache was destroyed.
987 */
988 if (crm_peer_cache == NULL) {
989 return NULL;
990 }
991
992 if (crm_autoreap) {
993 const char *peer_state = NULL;
994
995 if (pcmk_is_set(node->processes, crm_get_cluster_proc())) {
996 peer_state = CRM_NODE_MEMBER;
997 } else {
998 peer_state = CRM_NODE_LOST;
999 }
1000 node = pcmk__update_peer_state(__func__, node, peer_state, 0);
1001 }
1002 } else {
1003 crm_trace("%s: Node %s[%u] - %s is unchanged (%s)", source, node->uname, node->id,
1004 proc2text(flag), status);
1005 }
1006 return node;
1007}
1008
1017void
1018pcmk__update_peer_expected(const char *source, crm_node_t *node,
1019 const char *expected)
1020{
1021 char *last = NULL;
1022 gboolean changed = FALSE;
1023
1024 CRM_CHECK(node != NULL, crm_err("%s: Could not set 'expected' to %s", source, expected);
1025 return);
1026
1027 /* Remote nodes don't participate in joins */
1028 if (pcmk_is_set(node->flags, crm_remote_node)) {
1029 return;
1030 }
1031
1032 last = node->expected;
1033 if (expected != NULL && !pcmk__str_eq(node->expected, expected, pcmk__str_casei)) {
1034 node->expected = strdup(expected);
1035 changed = TRUE;
1036 }
1037
1038 if (changed) {
1039 crm_info("%s: Node %s[%u] - expected state is now %s (was %s)", source, node->uname, node->id,
1040 expected, last);
1041 free(last);
1042 } else {
1043 crm_trace("%s: Node %s[%u] - expected state is unchanged (%s)", source, node->uname,
1044 node->id, expected);
1045 }
1046}
1047
1064static crm_node_t *
1065update_peer_state_iter(const char *source, crm_node_t *node, const char *state,
1066 uint64_t membership, GHashTableIter *iter)
1067{
1068 gboolean is_member;
1069
1070 CRM_CHECK(node != NULL,
1071 crm_err("Could not set state for unknown host to %s"
1072 CRM_XS " source=%s", state, source);
1073 return NULL);
1074
1075 is_member = pcmk__str_eq(state, CRM_NODE_MEMBER, pcmk__str_casei);
1076 if (is_member) {
1077 node->when_lost = 0;
1078 if (membership) {
1079 node->last_seen = membership;
1080 }
1081 }
1082
1083 if (state && !pcmk__str_eq(node->state, state, pcmk__str_casei)) {
1084 char *last = node->state;
1085
1086 if (is_member) {
1087 node->when_member = time(NULL);
1088
1089 } else {
1090 node->when_member = 0;
1091 }
1092
1093 node->state = strdup(state);
1094 crm_notice("Node %s state is now %s " CRM_XS
1095 " nodeid=%u previous=%s source=%s", node->uname, state,
1096 node->id, (last? last : "unknown"), source);
1097 if (peer_status_callback != NULL) {
1098 peer_status_callback(crm_status_nstate, node, last);
1099 }
1100 free(last);
1101
1102 if (crm_autoreap && !is_member
1103 && !pcmk_is_set(node->flags, crm_remote_node)) {
1104 /* We only autoreap from the peer cache, not the remote peer cache,
1105 * because the latter should be managed only by
1106 * crm_remote_peer_cache_refresh().
1107 */
1108 if(iter) {
1109 crm_notice("Purged 1 peer with id=%u and/or uname=%s from the membership cache", node->id, node->uname);
1110 g_hash_table_iter_remove(iter);
1111
1112 } else {
1113 reap_crm_member(node->id, node->uname);
1114 }
1115 node = NULL;
1116 }
1117
1118 } else {
1119 crm_trace("Node %s state is unchanged (%s) " CRM_XS
1120 " nodeid=%u source=%s", node->uname, state, node->id, source);
1121 }
1122 return node;
1123}
1124
1140crm_node_t *
1141pcmk__update_peer_state(const char *source, crm_node_t *node,
1142 const char *state, uint64_t membership)
1143{
1144 return update_peer_state_iter(source, node, state, membership, NULL);
1145}
1146
1153void
1154pcmk__reap_unseen_nodes(uint64_t membership)
1155{
1156 GHashTableIter iter;
1157 crm_node_t *node = NULL;
1158
1159 crm_trace("Reaping unseen nodes...");
1160 g_hash_table_iter_init(&iter, crm_peer_cache);
1161 while (g_hash_table_iter_next(&iter, NULL, (gpointer *)&node)) {
1162 if (node->last_seen != membership) {
1163 if (node->state) {
1164 /*
1165 * Calling update_peer_state_iter() allows us to
1166 * remove the node from crm_peer_cache without
1167 * invalidating our iterator
1168 */
1169 update_peer_state_iter(__func__, node, CRM_NODE_LOST,
1170 membership, &iter);
1171
1172 } else {
1173 crm_info("State of node %s[%u] is still unknown",
1174 node->uname, node->id);
1175 }
1176 }
1177 }
1178}
1179
1180static crm_node_t *
1181find_known_node(const char *id, const char *uname)
1182{
1183 GHashTableIter iter;
1184 crm_node_t *node = NULL;
1185 crm_node_t *by_id = NULL;
1186 crm_node_t *by_name = NULL;
1187
1188 if (uname) {
1189 g_hash_table_iter_init(&iter, known_node_cache);
1190 while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
1191 if (node->uname && strcasecmp(node->uname, uname) == 0) {
1192 crm_trace("Name match: %s = %p", node->uname, node);
1193 by_name = node;
1194 break;
1195 }
1196 }
1197 }
1198
1199 if (id) {
1200 g_hash_table_iter_init(&iter, known_node_cache);
1201 while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
1202 if(strcasecmp(node->uuid, id) == 0) {
1203 crm_trace("ID match: %s= %p", id, node);
1204 by_id = node;
1205 break;
1206 }
1207 }
1208 }
1209
1210 node = by_id; /* Good default */
1211 if (by_id == by_name) {
1212 /* Nothing to do if they match (both NULL counts) */
1213 crm_trace("Consistent: %p for %s/%s", by_id, id, uname);
1214
1215 } else if (by_id == NULL && by_name) {
1216 crm_trace("Only one: %p for %s/%s", by_name, id, uname);
1217
1218 if (id) {
1219 node = NULL;
1220
1221 } else {
1222 node = by_name;
1223 }
1224
1225 } else if (by_name == NULL && by_id) {
1226 crm_trace("Only one: %p for %s/%s", by_id, id, uname);
1227
1228 if (uname) {
1229 node = NULL;
1230 }
1231
1232 } else if (uname && by_id->uname
1233 && pcmk__str_eq(uname, by_id->uname, pcmk__str_casei)) {
1234 /* Multiple nodes have the same uname in the CIB.
1235 * Return by_id. */
1236
1237 } else if (id && by_name->uuid
1238 && pcmk__str_eq(id, by_name->uuid, pcmk__str_casei)) {
1239 /* Multiple nodes have the same id in the CIB.
1240 * Return by_name. */
1241 node = by_name;
1242
1243 } else {
1244 node = NULL;
1245 }
1246
1247 if (node == NULL) {
1248 crm_debug("Couldn't find node%s%s%s%s",
1249 id? " " : "",
1250 id? id : "",
1251 uname? " with name " : "",
1252 uname? uname : "");
1253 }
1254
1255 return node;
1256}
1257
1258static void
1259known_node_cache_refresh_helper(xmlNode *xml_node, void *user_data)
1260{
1261 const char *id = crm_element_value(xml_node, XML_ATTR_ID);
1262 const char *uname = crm_element_value(xml_node, XML_ATTR_UNAME);
1263 crm_node_t * node = NULL;
1264
1265 CRM_CHECK(id != NULL && uname !=NULL, return);
1266 node = find_known_node(id, uname);
1267
1268 if (node == NULL) {
1269 char *uniqueid = crm_generate_uuid();
1270
1271 node = calloc(1, sizeof(crm_node_t));
1272 CRM_ASSERT(node != NULL);
1273
1274 node->uname = strdup(uname);
1275 CRM_ASSERT(node->uname != NULL);
1276
1277 node->uuid = strdup(id);
1278 CRM_ASSERT(node->uuid != NULL);
1279
1280 g_hash_table_replace(known_node_cache, uniqueid, node);
1281
1282 } else if (pcmk_is_set(node->flags, crm_node_dirty)) {
1283 pcmk__str_update(&node->uname, uname);
1284
1285 /* Node is in cache and hasn't been updated already, so mark it clean */
1287 }
1288
1289}
1290
1291static void
1292refresh_known_node_cache(xmlNode *cib)
1293{
1294 crm_peer_init();
1295
1296 g_hash_table_foreach(known_node_cache, mark_dirty, NULL);
1297
1299 known_node_cache_refresh_helper, NULL);
1300
1301 /* Remove all old cache entries that weren't seen in the CIB */
1302 g_hash_table_foreach_remove(known_node_cache, is_dirty, NULL);
1303}
1304
1305void
1307{
1309 refresh_known_node_cache(cib);
1310}
1311
1322crm_node_t *
1323pcmk__search_known_node_cache(unsigned int id, const char *uname,
1324 uint32_t flags)
1325{
1326 crm_node_t *node = NULL;
1327 char *id_str = NULL;
1328
1329 CRM_ASSERT(id > 0 || uname != NULL);
1330
1332
1333 if (node || !(flags & CRM_GET_PEER_CLUSTER)) {
1334 return node;
1335 }
1336
1337 if (id > 0) {
1338 id_str = crm_strdup_printf("%u", id);
1339 }
1340
1341 node = find_known_node(id_str, uname);
1342
1343 free(id_str);
1344 return node;
1345}
1346
1347
1348// Deprecated functions kept only for backward API compatibility
1349// LCOV_EXCL_START
1350
1351#include <crm/cluster/compat.h>
1352
1353int
1354crm_terminate_member(int nodeid, const char *uname, void *unused)
1355{
1356 return stonith_api_kick(nodeid, uname, 120, TRUE);
1357}
1358
1359int
1360crm_terminate_member_no_mainloop(int nodeid, const char *uname, int *connection)
1361{
1362 return stonith_api_kick(nodeid, uname, 120, TRUE);
1363}
1364
1365// LCOV_EXCL_STOP
1366// End deprecated API
const char * name
Definition cib.c:26
crm_proc_flag
Definition internal.h:17
@ crm_proc_attrd
Definition internal.h:27
@ crm_proc_schedulerd
Definition internal.h:28
@ crm_proc_controld
Definition internal.h:26
@ crm_proc_cpg
Definition internal.h:21
@ crm_proc_based
Definition internal.h:25
@ crm_proc_execd
Definition internal.h:24
@ crm_proc_none
Definition internal.h:18
@ crm_proc_fenced
Definition internal.h:29
gboolean is_corosync_cluster(void)
Check whether the local cluster is a Corosync cluster.
Definition cluster.c:389
#define CRM_NODE_LOST
Definition cluster.h:32
const char * crm_peer_uuid(crm_node_t *node)
Get (and set if needed) a node's UUID.
Definition cluster.c:38
enum cluster_type_e get_cluster_type(void)
Get (and validate) the local cluster type.
Definition cluster.c:325
#define CRM_NODE_MEMBER
Definition cluster.h:33
char * get_node_name(uint32_t nodeid)
Get the node name corresponding to a cluster node ID.
Definition cluster.c:204
const char * name_for_cluster_type(enum cluster_type_e type)
Get a log-friendly string equivalent of a cluster type.
Definition cluster.c:304
@ CRM_GET_PEER_CLUSTER
Definition cluster.h:133
@ CRM_GET_PEER_REMOTE
Definition cluster.h:134
@ crm_remote_node
Definition cluster.h:52
@ crm_node_dirty
Definition cluster.h:55
crm_status_type
Definition cluster.h:181
@ crm_status_processes
Definition cluster.h:184
@ crm_status_nstate
Definition cluster.h:183
@ crm_status_uname
Definition cluster.h:182
int pcmk__xe_get_bool_attr(const xmlNode *node, const char *name, bool *value)
Definition nvpair.c:878
uint64_t flags
Definition remote.c:3
#define ONLINESTATUS
Definition util.h:37
char * crm_generate_uuid(void)
Definition utils.c:509
void crm_abort(const char *file, const char *function, int line, const char *condition, gboolean do_core, gboolean do_fork)
Definition utils.c:397
char * crm_strdup_printf(char const *format,...) G_GNUC_PRINTF(1
#define pcmk_is_set(g, f)
Convenience alias for pcmk_all_flags_set(), to check single flag.
Definition util.h:99
Deprecated Pacemaker cluster API.
gboolean crm_is_corosync_peer_active(const crm_node_t *node)
Check whether a Corosync cluster peer is active.
Definition corosync.c:531
bool pcmk__corosync_has_nodelist(void)
Definition corosync.c:730
char uname[MAX_NAME]
Definition cpg.c:5
char data[0]
Definition cpg.c:10
uint32_t id
Definition cpg.c:0
#define PCMK__XA_IN_CCM
IPC interface to Pacemaker daemons.
#define crm_info(fmt, args...)
Definition logging.h:382
#define do_crm_log(level, fmt, args...)
Log a message.
Definition logging.h:175
#define crm_warn(fmt, args...)
Definition logging.h:380
#define CRM_XS
Definition logging.h:56
#define crm_crit(fmt, args...)
Definition logging.h:378
#define crm_notice(fmt, args...)
Definition logging.h:381
#define CRM_CHECK(expr, failure_action)
Definition logging.h:238
#define crm_debug(fmt, args...)
Definition logging.h:384
#define crm_err(fmt, args...)
Definition logging.h:379
#define crm_trace(fmt, args...)
Definition logging.h:385
#define LOG_TRACE
Definition logging.h:38
gboolean crm_have_quorum
Definition membership.c:64
crm_node_t * crm_update_peer_proc(const char *source, crm_node_t *node, uint32_t flag, const char *status)
Definition membership.c:924
void pcmk__update_peer_expected(const char *source, crm_node_t *node, const char *expected)
crm_node_t * pcmk__search_known_node_cache(unsigned int id, const char *uname, uint32_t flags)
int crm_remote_peer_cache_size(void)
Definition membership.c:87
GHashTable * crm_peer_cache
Definition membership.c:36
void crm_remote_peer_cache_refresh(xmlNode *cib)
Repopulate the remote peer cache based on CIB XML.
Definition membership.c:244
crm_node_t * pcmk__get_peer_full(unsigned int id, const char *uname, const char *uuid, int flags)
Get a node cache entry (cluster or Pacemaker Remote)
Definition membership.c:536
crm_node_t * pcmk__get_peer(unsigned int id, const char *uname, const char *uuid)
Get a cluster node cache entry.
Definition membership.c:735
#define clear_peer_flags(peer, flags_to_clear)
Definition membership.c:76
crm_node_t * pcmk__search_cluster_node_cache(unsigned int id, const char *uname, const char *uuid)
Definition membership.c:582
void pcmk__reap_unseen_nodes(uint64_t membership)
int crm_terminate_member(int nodeid, const char *uname, void *unused)
guint reap_crm_member(uint32_t id, const char *name)
Remove all peer cache entries matching a node ID and/or uname.
Definition membership.c:334
void crm_set_autoreap(gboolean autoreap)
Tell the library whether to automatically reap lost nodes.
Definition membership.c:471
void crm_set_status_callback(void(*dispatch)(enum crm_status_type, crm_node_t *, const void *))
Set a client function that will be called after peer status changes.
Definition membership.c:454
crm_node_t * crm_get_peer(unsigned int id, const char *uname)
Get a cluster node cache entry.
Definition membership.c:814
crm_node_t * pcmk__search_node_caches(unsigned int id, const char *uname, uint32_t flags)
Definition membership.c:506
crm_node_t * crm_get_peer_full(unsigned int id, const char *uname, int flags)
Get a node cache entry (cluster or Pacemaker Remote)
Definition membership.c:565
void pcmk__refresh_node_caches_from_cib(xmlNode *cib)
int crm_terminate_member_no_mainloop(int nodeid, const char *uname, int *connection)
GHashTable * crm_remote_peer_cache
Definition membership.c:53
void crm_remote_peer_cache_remove(const char *node_name)
Definition membership.c:147
#define set_peer_flags(peer, flags_to_set)
Definition membership.c:69
crm_node_t * pcmk__update_peer_state(const char *source, crm_node_t *node, const char *state, uint64_t membership)
Update a node's state and membership information.
unsigned long long crm_peer_seq
Definition membership.c:63
gboolean crm_is_peer_active(const crm_node_t *node)
Definition membership.c:282
void crm_peer_init(void)
Definition membership.c:401
void crm_peer_destroy(void)
Definition membership.c:417
guint crm_active_peers(void)
Definition membership.c:375
crm_node_t * crm_remote_peer_get(const char *node_name)
Get a remote node peer cache entry, creating it if necessary.
Definition membership.c:107
#define XML_ATTR_UNAME
Definition msg_xml.h:178
#define XML_ATTR_ID
Definition msg_xml.h:156
const char * crm_element_value(const xmlNode *data, const char *name)
Retrieve the value of an XML attribute.
Definition nvpair.c:447
pcmk__action_result_t result
Definition pcmk_fence.c:35
#define CRM_ASSERT(expr)
Definition results.h:42
@ pcmk_rc_ok
Definition results.h:154
Fencing aka. STONITH.
int stonith_api_kick(uint32_t nodeid, const char *uname, int timeout, bool off)
Definition st_client.c:1971
#define pcmk__plural_s(i)
void pcmk__str_update(char **str, const char *value)
Definition strings.c:1193
@ pcmk__str_casei
GHashTable * pcmk__strikey_table(GDestroyNotify key_destroy_func, GDestroyNotify value_destroy_func)
Definition strings.c:646
uint32_t processes
Definition cluster.h:64
char * uname
Definition cluster.h:59
char * expected
Definition cluster.h:77
time_t when_member
Definition cluster.h:82
time_t when_online
Definition cluster.h:83
char * conn_host
Definition cluster.h:80
uint64_t last_seen
Definition cluster.h:63
uint32_t id
Definition cluster.h:72
char * state
Definition cluster.h:61
char * uuid
Definition cluster.h:60
time_t when_lost
Definition cluster.h:73
uint64_t flags
Definition cluster.h:62
void crm_foreach_xpath_result(xmlNode *xml, const char *xpath, void(*helper)(xmlNode *, void *), void *user_data)
Run a supplied function for each result of an xpath search.
Definition xpath.c:170
#define PCMK__XP_REMOTE_NODE_CONFIG
#define PCMK__XP_REMOTE_NODE_STATUS
#define PCMK__XP_MEMBER_NODE_CONFIG
#define PCMK__XP_GUEST_NODE_CONFIG