svn commit: r212038 - head/sbin/hastd

Pawel Jakub Dawidek pjd at FreeBSD.org
Mon Aug 30 23:26:11 UTC 2010


Author: pjd
Date: Mon Aug 30 23:26:10 2010
New Revision: 212038
URL: http://svn.freebsd.org/changeset/base/212038

Log:
  Because it is very hard to make fork(2) from threaded process safe (we are
  limited to async-signal safe functions in the child process), move all hooks
  execution to the main (non-threaded) process.
  
  Do it by maintaining connection (socketpair) between child and parent
  and sending events from the child to parent, so it can execute the hook.
  
  This is step in right direction for others reasons too. For example there is
  one less problem to drop privs in worker processes.
  
  MFC after:	2 weeks
  Obtained from:	Wheel Systems Sp. z o.o. http://www.wheelsystems.com

Modified:
  head/sbin/hastd/Makefile
  head/sbin/hastd/hast.h
  head/sbin/hastd/hastd.c
  head/sbin/hastd/primary.c
  head/sbin/hastd/secondary.c

Modified: head/sbin/hastd/Makefile
==============================================================================
--- head/sbin/hastd/Makefile	Mon Aug 30 23:19:21 2010	(r212037)
+++ head/sbin/hastd/Makefile	Mon Aug 30 23:26:10 2010	(r212038)
@@ -5,7 +5,7 @@
 PROG=	hastd
 SRCS=	activemap.c
 SRCS+=	control.c
-SRCS+=	ebuf.c
+SRCS+=	ebuf.c event.c
 SRCS+=	hast_proto.c hastd.c hooks.c
 SRCS+=	metadata.c
 SRCS+=	nv.c

Modified: head/sbin/hastd/hast.h
==============================================================================
--- head/sbin/hastd/hast.h	Mon Aug 30 23:19:21 2010	(r212037)
+++ head/sbin/hastd/hast.h	Mon Aug 30 23:26:10 2010	(r212038)
@@ -181,6 +181,8 @@ struct hast_resource {
 	pid_t	hr_workerpid;
 	/* Control connection between parent and child. */
 	struct proto_conn *hr_ctrl;
+	/* Events from child to parent. */
+	struct proto_conn *hr_event;
 
 	/* Activemap structure. */
 	struct activemap *hr_amp;

Modified: head/sbin/hastd/hastd.c
==============================================================================
--- head/sbin/hastd/hastd.c	Mon Aug 30 23:19:21 2010	(r212037)
+++ head/sbin/hastd/hastd.c	Mon Aug 30 23:26:10 2010	(r212038)
@@ -52,6 +52,7 @@ __FBSDID("$FreeBSD$");
 #include <pjdlog.h>
 
 #include "control.h"
+#include "event.h"
 #include "hast.h"
 #include "hast_proto.h"
 #include "hastd.h"
@@ -158,6 +159,11 @@ child_exit(void)
 		    role2str(res->hr_role));
 		child_exit_log(pid, status);
 		proto_close(res->hr_ctrl);
+		res->hr_ctrl = NULL;
+		if (res->hr_event != NULL) {
+			proto_close(res->hr_event);
+			res->hr_event = NULL;
+		}
 		res->hr_workerpid = 0;
 		if (res->hr_role == HAST_ROLE_PRIMARY) {
 			/*
@@ -624,9 +630,10 @@ close:
 static void
 main_loop(void)
 {
-	fd_set rfds;
-	int cfd, lfd, maxfd, ret;
+	struct hast_resource *res;
 	struct timeval timeout;
+	int fd, maxfd, ret;
+	fd_set rfds;
 
 	timeout.tv_sec = REPORT_INTERVAL;
 	timeout.tv_usec = 0;
@@ -646,14 +653,20 @@ main_loop(void)
 			hastd_reload();
 		}
 
-		cfd = proto_descriptor(cfg->hc_controlconn);
-		lfd = proto_descriptor(cfg->hc_listenconn);
-		maxfd = cfd > lfd ? cfd : lfd;
-
 		/* Setup descriptors for select(2). */
 		FD_ZERO(&rfds);
-		FD_SET(cfd, &rfds);
-		FD_SET(lfd, &rfds);
+		maxfd = fd = proto_descriptor(cfg->hc_controlconn);
+		FD_SET(fd, &rfds);
+		fd = proto_descriptor(cfg->hc_listenconn);
+		FD_SET(fd, &rfds);
+		maxfd = fd > maxfd ? fd : maxfd;
+		TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) {
+			if (res->hr_event == NULL)
+				continue;
+			fd = proto_descriptor(res->hr_event);
+			FD_SET(fd, &rfds);
+			maxfd = fd > maxfd ? fd : maxfd;
+		}
 
 		ret = select(maxfd + 1, &rfds, NULL, NULL, &timeout);
 		if (ret == 0)
@@ -665,10 +678,21 @@ main_loop(void)
 			pjdlog_exit(EX_OSERR, "select() failed");
 		}
 
-		if (FD_ISSET(cfd, &rfds))
+		if (FD_ISSET(proto_descriptor(cfg->hc_controlconn), &rfds))
 			control_handle(cfg);
-		if (FD_ISSET(lfd, &rfds))
+		if (FD_ISSET(proto_descriptor(cfg->hc_listenconn), &rfds))
 			listen_accept();
+		TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) {
+			if (res->hr_event == NULL)
+				continue;
+			if (FD_ISSET(proto_descriptor(res->hr_event), &rfds)) {
+				if (event_recv(res) == 0)
+					continue;
+				/* The worker process exited? */
+				proto_close(res->hr_event);
+				res->hr_event = NULL;
+			}
+		}
 	}
 }
 

Modified: head/sbin/hastd/primary.c
==============================================================================
--- head/sbin/hastd/primary.c	Mon Aug 30 23:19:21 2010	(r212037)
+++ head/sbin/hastd/primary.c	Mon Aug 30 23:26:10 2010	(r212038)
@@ -58,6 +58,7 @@ __FBSDID("$FreeBSD$");
 #include <rangelock.h>
 
 #include "control.h"
+#include "event.h"
 #include "hast.h"
 #include "hast_proto.h"
 #include "hastd.h"
@@ -226,12 +227,6 @@ static void *sync_thread(void *arg);
 static void *guard_thread(void *arg);
 
 static void
-dummy_sighandler(int sig __unused)
-{
-	/* Nothing to do. */
-}
-
-static void
 cleanup(struct hast_resource *res)
 {
 	int rerrno;
@@ -429,8 +424,6 @@ init_environment(struct hast_resource *r
 	/*
 	 * Turn on signals handling.
 	 */
-	/* Because SIGCHLD is ignored by default, setup dummy handler for it. */
-	PJDLOG_VERIFY(signal(SIGCHLD, dummy_sighandler) != SIG_ERR);
 	PJDLOG_VERIFY(sigfillset(&mask) == 0);
 	PJDLOG_VERIFY(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
 }
@@ -672,11 +665,11 @@ init_remote(struct hast_resource *res, s
 		res->hr_remotein = in;
 		res->hr_remoteout = out;
 	}
-	hook_exec(res->hr_exec, "connect", res->hr_name, NULL);
+	event_send(res, EVENT_CONNECT);
 	return (true);
 close:
 	if (errmsg != NULL && strcmp(errmsg, "Split-brain condition!") == 0)
-		hook_exec(res->hr_exec, "split-brain", res->hr_name, NULL);
+		event_send(res, EVENT_SPLITBRAIN);
 	proto_close(out);
 	if (in != NULL)
 		proto_close(in);
@@ -774,6 +767,14 @@ hastd_primary(struct hast_resource *res)
 		pjdlog_exit(EX_OSERR,
 		    "Unable to create control sockets between parent and child");
 	}
+	/*
+	 * Create communication channel between child and parent.
+	 */
+	if (proto_client("socketpair://", &res->hr_event) < 0) {
+		KEEP_ERRNO((void)pidfile_remove(pfh));
+		pjdlog_exit(EX_OSERR,
+		    "Unable to create event sockets between child and parent");
+	}
 
 	pid = fork();
 	if (pid < 0) {
@@ -783,6 +784,8 @@ hastd_primary(struct hast_resource *res)
 
 	if (pid > 0) {
 		/* This is parent. */
+		/* Declare that we are receiver. */
+		proto_recv(res->hr_event, NULL, 0);
 		res->hr_workerpid = pid;
 		return;
 	}
@@ -797,7 +800,9 @@ hastd_primary(struct hast_resource *res)
 	signal(SIGHUP, SIG_DFL);
 	signal(SIGCHLD, SIG_DFL);
 
-	hook_init();
+	/* Declare that we are sender. */
+	proto_send(res->hr_event, NULL, 0);
+
 	init_local(res);
 	if (real_remote(res) && init_remote(res, NULL, NULL))
 		sync_start();
@@ -896,7 +901,7 @@ remote_close(struct hast_resource *res, 
 	 */
 	sync_stop();
 
-	hook_exec(res->hr_exec, "disconnect", res->hr_name, NULL);
+	event_send(res, EVENT_DISCONNECT);
 }
 
 /*
@@ -1512,7 +1517,7 @@ sync_thread(void *arg __unused)
 			pjdlog_info("Synchronization interrupted. "
 			    "%jd bytes synchronized so far.",
 			    (intmax_t)synced);
-			hook_exec(res->hr_exec, "syncintr", res->hr_name, NULL);
+			event_send(res, EVENT_SYNCINTR);
 		}
 		while (!sync_inprogress) {
 			dorewind = true;
@@ -1545,8 +1550,7 @@ sync_thread(void *arg __unused)
 				pjdlog_info("Synchronization started. %ju bytes to go.",
 				    (uintmax_t)(res->hr_extentsize *
 				    activemap_ndirty(res->hr_amp)));
-				hook_exec(res->hr_exec, "syncstart",
-				    res->hr_name, NULL);
+				event_send(res, EVENT_SYNCSTART);
 			}
 		}
 		if (offset < 0) {
@@ -1563,8 +1567,7 @@ sync_thread(void *arg __unused)
 					pjdlog_info("Synchronization complete. "
 					    "%jd bytes synchronized.",
 					    (intmax_t)synced);
-					hook_exec(res->hr_exec, "syncdone",
-					    res->hr_name, NULL);
+					event_send(res, EVENT_SYNCDONE);
 				}
 				mtx_lock(&metadata_lock);
 				res->hr_syncsrc = HAST_SYNCSRC_UNDEF;
@@ -1950,7 +1953,6 @@ guard_thread(void *arg)
 	PJDLOG_VERIFY(sigaddset(&mask, SIGHUP) == 0);
 	PJDLOG_VERIFY(sigaddset(&mask, SIGINT) == 0);
 	PJDLOG_VERIFY(sigaddset(&mask, SIGTERM) == 0);
-	PJDLOG_VERIFY(sigaddset(&mask, SIGCHLD) == 0);
 
 	timeout.tv_nsec = 0;
 	signo = -1;
@@ -1969,7 +1971,6 @@ guard_thread(void *arg)
 		default:
 			break;
 		}
-		hook_check(signo == SIGCHLD);
 
 		pjdlog_debug(2, "remote_guard: Checking connections.");
 		now = time(NULL);

Modified: head/sbin/hastd/secondary.c
==============================================================================
--- head/sbin/hastd/secondary.c	Mon Aug 30 23:19:21 2010	(r212037)
+++ head/sbin/hastd/secondary.c	Mon Aug 30 23:26:10 2010	(r212038)
@@ -54,6 +54,7 @@ __FBSDID("$FreeBSD$");
 #include <pjdlog.h>
 
 #include "control.h"
+#include "event.h"
 #include "hast.h"
 #include "hast_proto.h"
 #include "hastd.h"
@@ -325,7 +326,7 @@ init_remote(struct hast_resource *res, s
 	if (res->hr_secondary_localcnt > res->hr_primary_remotecnt &&
 	     res->hr_primary_localcnt > res->hr_secondary_remotecnt) {
 		/* Exit on split-brain. */
-		hook_exec(res->hr_exec, "split-brain", res->hr_name, NULL);
+		event_send(res, EVENT_SPLITBRAIN);
 		exit(EX_CONFIG);
 	}
 }
@@ -345,6 +346,14 @@ hastd_secondary(struct hast_resource *re
 		pjdlog_exit(EX_OSERR,
 		    "Unable to create control sockets between parent and child");
 	}
+	/*
+	 * Create communication channel between child and parent.
+	 */
+	if (proto_client("socketpair://", &res->hr_event) < 0) {
+		KEEP_ERRNO((void)pidfile_remove(pfh));
+		pjdlog_exit(EX_OSERR,
+		    "Unable to create event sockets between child and parent");
+	}
 
 	pid = fork();
 	if (pid < 0) {
@@ -358,6 +367,8 @@ hastd_secondary(struct hast_resource *re
 		res->hr_remotein = NULL;
 		proto_close(res->hr_remoteout);
 		res->hr_remoteout = NULL;
+		/* Declare that we are receiver. */
+		proto_recv(res->hr_event, NULL, 0);
 		res->hr_workerpid = pid;
 		return;
 	}
@@ -372,17 +383,19 @@ hastd_secondary(struct hast_resource *re
 	signal(SIGHUP, SIG_DFL);
 	signal(SIGCHLD, SIG_DFL);
 
+	/* Declare that we are sender. */
+	proto_send(res->hr_event, NULL, 0);
+
 	/* Error in setting timeout is not critical, but why should it fail? */
 	if (proto_timeout(res->hr_remotein, 0) < 0)
 		pjdlog_errno(LOG_WARNING, "Unable to set connection timeout");
 	if (proto_timeout(res->hr_remoteout, res->hr_timeout) < 0)
 		pjdlog_errno(LOG_WARNING, "Unable to set connection timeout");
 
-	hook_init();
 	init_local(res);
 	init_remote(res, nvin);
 	init_environment();
-	hook_exec(res->hr_exec, "connect", res->hr_name, NULL);
+	event_send(res, EVENT_CONNECT);
 
 	error = pthread_create(&td, NULL, recv_thread, res);
 	assert(error == 0);
@@ -515,7 +528,7 @@ secondary_exit(int exitcode, const char 
 	va_start(ap, fmt);
 	pjdlogv_errno(LOG_ERR, fmt, ap);
 	va_end(ap);
-	hook_exec(gres->hr_exec, "disconnect", gres->hr_name, NULL);
+	event_send(gres, EVENT_DISCONNECT);
 	exit(exitcode);
 }
 


More information about the svn-src-all mailing list