76 #include <sys/types.h>
81 #include <sys/socket.h>
84 #include <sys/resource.h>
86 #include <netinet/in.h>
87 #include <arpa/inet.h>
96 #include <semaphore.h>
99 #include <qb/qbdefs.h>
100 #include <qb/qblog.h>
101 #include <qb/qbloop.h>
102 #include <qb/qbutil.h>
103 #include <qb/qbipcs.h>
113 #ifdef HAVE_LIBCGROUP
114 #include <libcgroup.h>
129 #ifdef HAVE_SMALL_MEMORY_FOOTPRINT
130 #define IPC_LOGSYS_SIZE 1024*64
132 #define IPC_LOGSYS_SIZE 8192*128
147 #define SERVER_BACKLOG 5
149 static int sched_priority = 0;
151 static unsigned int service_count = 32;
157 static int sync_in_process = 1;
159 static qb_loop_t *corosync_poll_handle;
165 static const char *corosync_lock_file =
LOCALSTATEDIR"/run/corosync.pid";
167 static int ip_version = AF_INET;
171 return (corosync_poll_handle);
179 int (*dispatch_fn) (
int fd,
183 return qb_loop_poll_add(handle, QB_LOOP_MED, fd, events, data,
189 return qb_loop_poll_del(handle, fd);
203 static void corosync_blackbox_write_to_file (
void)
205 char fname[PATH_MAX];
206 char fdata_fname[PATH_MAX];
207 char time_str[PATH_MAX];
208 struct tm cur_time_tm;
212 cur_time_t = time(NULL);
213 localtime_r(&cur_time_t, &cur_time_tm);
215 strftime(time_str, PATH_MAX,
"%Y-%m-%dT%H:%M:%S", &cur_time_tm);
216 snprintf(fname, PATH_MAX,
"%s/fdata-%s-%lld",
219 (
long long int)getpid());
221 if ((res = qb_log_blackbox_write_to_file(fname)) < 0) {
225 snprintf(fdata_fname,
sizeof(fdata_fname),
"%s/fdata",
get_run_dir());
227 if (symlink(fname, fdata_fname) == -1) {
233 static void unlink_all_completed (
void)
236 qb_loop_stop (corosync_poll_handle);
245 static int32_t sig_diag_handler (
int num,
void *data)
251 static int32_t sig_exit_handler (
int num,
void *data)
258 static void sigsegv_handler (
int num)
260 (void)signal (num, SIG_DFL);
261 corosync_blackbox_write_to_file ();
266 #define LOCALHOST_IP inet_addr("127.0.0.1")
268 static void *corosync_group_handle;
275 static void serialize_lock (
void)
279 static void serialize_unlock (
void)
283 static void corosync_sync_completed (
void)
286 "Completed service synchronization, ready to provide service.");
297 static int corosync_sync_callbacks_retrieve (
305 if (callbacks == NULL) {
320 static void member_object_joined (
unsigned int nodeid)
327 "runtime.totem.pg.mrp.srp.members.%u.ip", nodeid);
329 "runtime.totem.pg.mrp.srp.members.%u.join_count", nodeid);
331 "runtime.totem.pg.mrp.srp.members.%u.status", nodeid);
346 static void member_object_left (
unsigned int nodeid)
351 "runtime.totem.pg.mrp.srp.members.%u.status", nodeid);
358 static void confchg_fn (
360 const unsigned int *member_list,
size_t member_list_entries,
361 const unsigned int *left_list,
size_t left_list_entries,
362 const unsigned int *joined_list,
size_t joined_list_entries,
366 int abort_activate = 0;
368 if (sync_in_process == 1) {
373 memcpy (&corosync_ring_id, ring_id,
sizeof (
struct memb_ring_id));
375 for (i = 0; i < left_list_entries; i++) {
376 member_object_left (left_list[i]);
378 for (i = 0; i < joined_list_entries; i++) {
379 member_object_joined (joined_list[i]);
384 for (i = 0; i < service_count; i++) {
387 member_list, member_list_entries,
388 left_list, left_list_entries,
389 joined_list, joined_list_entries, ring_id);
393 if (abort_activate) {
400 sync_start (member_list, member_list_entries, ring_id);
404 static void priv_drop (
void)
409 static void corosync_tty_detach (
void)
437 devnull = open(
"/dev/null", O_RDWR);
442 if (dup2(devnull, 0) < 0 || dup2(devnull, 1) < 0
443 || dup2(devnull, 2) < 0) {
450 static void corosync_mlockall (
void)
453 struct rlimit rlimit;
455 rlimit.rlim_cur = RLIM_INFINITY;
456 rlimit.rlim_max = RLIM_INFINITY;
458 #ifndef RLIMIT_MEMLOCK
459 #define RLIMIT_MEMLOCK RLIMIT_VMEM
462 setrlimit (RLIMIT_MEMLOCK, &rlimit);
464 res = mlockall (MCL_CURRENT | MCL_FUTURE);
467 "Could not lock memory of service to avoid page faults");
472 static void corosync_totem_stats_updater (
void *data)
475 uint32_t total_mtt_rx_token;
476 uint32_t total_backlog_calc;
477 uint32_t total_token_holdtime;
514 icmap_set_uint8(
"runtime.totem.pg.mrp.srp.firewall_enabled_or_nic_failure",
522 cstr =
"number of multicast sendmsg failures is above threshold";
526 cstr =
"totem is continuously in gather state";
530 "Totem is unable to form a cluster because of an "
531 "operating system or network fault (reason: %s). The most common "
532 "cause of this message is that the local firewall is "
533 "configured improperly.", cstr);
534 icmap_set_uint8(
"runtime.totem.pg.mrp.srp.firewall_enabled_or_nic_failure", 1);
536 icmap_set_uint8(
"runtime.totem.pg.mrp.srp.firewall_enabled_or_nic_failure", 0);
543 total_mtt_rx_token = 0;
544 total_token_holdtime = 0;
545 total_backlog_calc = 0;
566 icmap_set_uint32(
"runtime.totem.pg.mrp.srp.mtt_rx_token", (total_mtt_rx_token / token_count));
567 icmap_set_uint32(
"runtime.totem.pg.mrp.srp.avg_token_workload", (total_token_holdtime / token_count));
568 icmap_set_uint32(
"runtime.totem.pg.mrp.srp.avg_backlog_calc", (total_backlog_calc / token_count));
574 corosync_totem_stats_updater,
575 &corosync_stats_timer_handle);
578 static void corosync_totem_stats_init (
void)
586 corosync_totem_stats_updater,
587 &corosync_stats_timer_handle);
590 static void deliver_fn (
593 unsigned int msg_len,
594 int endian_conversion_required)
596 const struct qb_ipc_request_header *
header;
602 if (endian_conversion_required) {
625 if (endian_conversion_required) {
626 assert(
corosync_service[service]->exec_engine[fn_id].exec_endian_convert_fn != NULL);
636 const struct iovec *iovec,
637 unsigned int iov_len,
640 const struct qb_ipc_request_header *req = iovec->iov_base;
644 service = req->id >> 16;
645 fn_id = req->id & 0xffff;
654 static void corosync_ring_id_create_or_load (
660 char filename[PATH_MAX];
662 snprintf (filename,
sizeof(filename),
"%s/ringid_%s",
664 fd = open (filename, O_RDONLY, 0700);
669 res = read (fd, &memb_ring_id->
seq, sizeof (uint64_t));
675 if ((fd == -1) || (res !=
sizeof (uint64_t))) {
676 memb_ring_id->
seq = 0;
678 fd = open (filename, O_CREAT|O_RDWR, 0700);
680 res = write (fd, &memb_ring_id->
seq, sizeof (uint64_t));
684 "Couldn't write ringid file '%s'", filename);
690 "Couldn't create ringid file '%s'", filename);
697 assert (!totemip_zero_check(&memb_ring_id->
rep));
700 static void corosync_ring_id_store (
701 const struct memb_ring_id *memb_ring_id,
704 char filename[PATH_MAX];
708 snprintf (filename,
sizeof(filename),
"%s/ringid_%s",
711 fd = open (filename, O_WRONLY, 0700);
713 fd = open (filename, O_CREAT|O_RDWR, 0700);
717 "Couldn't store new ring id %llx to stable storage",
723 "Storing new sequence id for ring %llx", memb_ring_id->
seq);
724 res = write (fd, &memb_ring_id->
seq,
sizeof(memb_ring_id->
seq));
726 if (res !=
sizeof(memb_ring_id->
seq)) {
728 "Couldn't store new ring id %llx to stable storage",
735 static qb_loop_timer_handle recheck_the_q_level_timer;
751 unsigned int service,
754 void *sending_allowed_private_data)
758 struct iovec reserve_iovec;
759 struct qb_ipc_request_header *header = (
struct qb_ipc_request_header *)msg;
762 reserve_iovec.iov_base = (
char *)header;
763 reserve_iovec.iov_len = header->size;
766 corosync_group_handle,
772 sending_allowed = QB_FALSE;
778 sending_allowed = QB_TRUE;
780 sending_allowed = QB_TRUE;
787 return -EHOSTUNREACH;
790 return (sending_allowed);
808 assert (source != NULL);
819 assert ((source != NULL) && (conn != NULL));
832 static void timer_function_scheduler_timeout (
void *data)
835 unsigned long long tv_current;
836 unsigned long long tv_diff;
838 tv_current = qb_util_nano_current_get ();
840 if (timeout_data->
tv_prev == 0) {
844 timeout_data->
tv_prev = tv_current;
848 tv_diff = tv_current - timeout_data->
tv_prev;
849 timeout_data->
tv_prev = tv_current;
853 "(threshold is %0.4f ms). Consider token timeout increase.",
854 (
float)tv_diff / QB_TIME_NS_IN_MSEC, (
float)timeout_data->
max_tv_diff / QB_TIME_NS_IN_MSEC);
861 qb_loop_timer_add (corosync_poll_handle,
865 timer_function_scheduler_timeout,
870 static int corosync_set_rr_scheduler (
void)
874 #if defined(HAVE_PTHREAD_SETSCHEDPARAM) && defined(HAVE_SCHED_GET_PRIORITY_MAX) && defined(HAVE_SCHED_SETSCHEDULER)
877 sched_priority = sched_get_priority_max (SCHED_RR);
878 if (sched_priority != -1) {
883 "Could not set SCHED_RR at priority %d",
887 #ifdef HAVE_QB_LOG_THREAD_PRIORITY_SET
888 qb_log_thread_priority_set (SCHED_OTHER, 0);
896 #ifdef HAVE_QB_LOG_THREAD_PRIORITY_SET
897 res = qb_log_thread_priority_set (SCHED_RR, sched_priority);
903 "Could not set logsys thread priority."
904 " Can't continue because of priority inversions.");
910 "Could not get maximum scheduler priority");
916 "The Platform is missing process priority setting features. Leaving at default.");
926 static const char *corosync_basename(
const char *file_name)
929 base = strrchr (file_name,
'/');
938 _logsys_log_printf(
int level,
int subsys,
939 const char *function_name,
940 const char *file_name,
946 _logsys_log_printf(
int level,
int subsys,
947 const
char *function_name,
948 const
char *file_name,
950 const
char *format, ...)
954 va_start(ap, format);
955 qb_log_from_external_source_va(function_name, corosync_basename(file_name),
956 format, level, file_line,
961 static void fplay_key_change_notify_fn (
963 const char *key_name,
968 if (strcmp(key_name,
"runtime.blackbox.dump_flight_data") == 0) {
969 fprintf(stderr,
"Writetofile\n");
970 corosync_blackbox_write_to_file ();
972 if (strcmp(key_name,
"runtime.blackbox.dump_state") == 0) {
973 fprintf(stderr,
"statefump\n");
978 static void corosync_fplay_control_init (
void)
987 fplay_key_change_notify_fn,
991 fplay_key_change_notify_fn,
1002 static void set_icmap_ro_keys_flag (
void)
1035 static void main_service_ready (
void)
1048 corosync_totem_stats_init ();
1049 corosync_fplay_control_init ();
1051 corosync_sync_callbacks_retrieve,
1052 corosync_sync_completed);
1055 static enum e_corosync_done corosync_flock (
const char *lockfile, pid_t pid)
1065 lf = open (lockfile, O_WRONLY | O_CREAT, 0640);
1072 lock.l_type = F_WRLCK;
1074 lock.l_whence = SEEK_SET;
1076 if (fcntl (lf, F_SETLK, &
lock) == -1) {
1096 if (ftruncate (lf, 0) == -1) {
1100 goto error_close_unlink;
1103 memset (pid_s, 0,
sizeof (pid_s));
1104 snprintf (pid_s,
sizeof (pid_s) - 1,
"%u\n", pid);
1107 if (write (lf, pid_s, strlen (pid_s)) != strlen (pid_s)) {
1108 if (errno == EINTR) {
1112 "Error was %s", strerror (errno));
1114 goto error_close_unlink;
1118 if ((fd_flag = fcntl (lf, F_GETFD, 0)) == -1) {
1120 "Error was %s", strerror (errno));
1122 goto error_close_unlink;
1124 fd_flag |= FD_CLOEXEC;
1125 if (fcntl (lf, F_SETFD, fd_flag) == -1) {
1127 "Error was %s", strerror (errno));
1129 goto error_close_unlink;
1142 static int corosync_move_to_root_cgroup(
void) {
1144 #ifdef HAVE_LIBCGROUP
1146 struct cgroup *root_cgroup = NULL;
1147 struct cgroup_controller *root_cpu_cgroup_controller = NULL;
1148 char *current_cgroup_path = NULL;
1150 cg_ret = cgroup_init();
1153 cgroup_strerror(cg_ret));
1158 cg_ret = cgroup_get_current_controller_path(getpid(),
"cpu", ¤t_cgroup_path);
1161 cgroup_strerror(cg_ret));
1166 if (strcmp(current_cgroup_path,
"/") == 0) {
1173 root_cgroup = cgroup_new_cgroup(
"/");
1174 if (root_cgroup == NULL) {
1180 root_cpu_cgroup_controller = cgroup_add_controller(root_cgroup,
"cpu");
1181 if (root_cpu_cgroup_controller == NULL) {
1187 cg_ret = cgroup_attach_task(root_cgroup);
1190 cgroup_strerror(cg_ret));
1195 cg_ret = cgroup_get_current_controller_path(getpid(),
"cpu", ¤t_cgroup_path);
1198 cgroup_strerror(cg_ret));
1203 if (strcmp(current_cgroup_path,
"/") == 0) {
1211 if (root_cgroup != NULL) {
1212 cgroup_free(&root_cgroup);
1225 int main (
int argc,
char **argv,
char **envp)
1227 const char *error_string;
1230 int background, sched_rr, prio, testonly, move_to_root_cgroup;
1231 struct stat stat_out;
1233 uint64_t totem_config_warnings;
1244 move_to_root_cgroup = 1;
1246 while ((ch = getopt (argc, argv,
"fP:pRrtv")) != EOF) {
1256 if (strcmp(optarg,
"max") == 0) {
1258 }
else if (strcmp(optarg,
"min") == 0) {
1263 tmpli = strtol(optarg, &ep, 10);
1264 if (errno != 0 || *ep !=
'\0' || tmpli > INT_MAX || tmpli < INT_MIN) {
1265 fprintf(stderr,
"Priority value %s is invalid", optarg);
1267 return EXIT_FAILURE;
1274 move_to_root_cgroup = 0;
1283 printf (
"Corosync Cluster Engine, version '%s'\n",
VERSION);
1284 printf (
"Copyright (c) 2006-2009 Red Hat, Inc.\n");
1286 return EXIT_SUCCESS;
1292 " -f : Start application in foreground.\n"\
1293 " -p : Do not set realtime scheduling.\n"\
1294 " -r : Set round robin realtime scheduling (default).\n"\
1295 " -R : Do not try move corosync to root cpu cgroup (valid when built with libcgroup)\n" \
1296 " -P num : Set priority of process (no effect when -r is used)\n"\
1297 " -t : Test configuration and exit.\n"\
1298 " -v : Display version and SVN revision of Corosync and exit.\n");
1300 return EXIT_FAILURE;
1308 (void)signal (SIGSEGV, sigsegv_handler);
1309 (void)signal (SIGABRT, sigsegv_handler);
1310 #if MSG_NOSIGNAL != 0
1311 (void)signal (SIGPIPE, SIG_IGN);
1318 set_icmap_ro_keys_flag();
1331 fprintf (stderr,
"%s\n", error_string);
1346 fprintf(stderr,
"%s", error_string);
1360 if ((res == -1) || (res == 0 && !S_ISDIR(stat_out.st_mode))) {
1368 "Please make sure it has correct context and rights.",
get_run_dir());
1392 "Nodelist one is going to be used.");
1395 if (totem_config_warnings != 0) {
1420 if (move_to_root_cgroup) {
1421 (void)corosync_move_to_root_cgroup();
1428 if (corosync_set_rr_scheduler () != 0) {
1436 if (setpriority(PRIO_PGRP, 0, prio) != 0) {
1438 "Could not set priority %d", prio);
1449 "totemmrp.c,totemrrp.c,totemip.c,totemconfig.c,totemcrypto.c,totemsrp.c,"
1450 "totempg.c,totemiba.c,totemudp.c,totemudpu.c,totemnet.c");
1467 corosync_tty_detach ();
1478 corosync_mlockall ();
1480 corosync_poll_handle = qb_loop_create ();
1482 memset(&scheduler_pause_timeout_data, 0,
sizeof(scheduler_pause_timeout_data));
1484 timer_function_scheduler_timeout (&scheduler_pause_timeout_data);
1486 qb_loop_signal_add(corosync_poll_handle, QB_LOOP_LOW,
1487 SIGUSR2, NULL, sig_diag_handler, NULL);
1488 qb_loop_signal_add(corosync_poll_handle, QB_LOOP_HIGH,
1489 SIGINT, NULL, sig_exit_handler, NULL);
1490 qb_loop_signal_add(corosync_poll_handle, QB_LOOP_HIGH,
1491 SIGQUIT, NULL, sig_exit_handler, NULL);
1492 qb_loop_signal_add(corosync_poll_handle, QB_LOOP_HIGH,
1493 SIGTERM, NULL, sig_exit_handler, NULL);
1500 if ((flock_err = corosync_flock (corosync_lock_file, getpid ())) !=
COROSYNC_DONE_EXIT) {
1515 corosync_poll_handle,
1516 &totem_config) != 0) {
1523 main_service_ready);
1526 &corosync_group_handle,
1531 corosync_group_handle,
1552 qb_loop_run (corosync_poll_handle);
1562 qb_loop_destroy (corosync_poll_handle);
1571 unlink (corosync_lock_file);
1575 return EXIT_SUCCESS;