svn commit: r213007 - head/sbin/hastd

Pawel Jakub Dawidek pjd at FreeBSD.org
Wed Sep 22 19:03:12 UTC 2010


Author: pjd
Date: Wed Sep 22 19:03:11 2010
New Revision: 213007
URL: http://svn.freebsd.org/changeset/base/213007

Log:
  Fix possible deadlock where worker process sends an event to the main process
  while the main process sends control message to the worker process, but worker
  process hasn't started control thread yet, because it waits for reply from the
  main process.
  
  The fix is to start the control thread before sending any events.
  
  Reported and fix suggested by:	Mikolaj Golub <to.my.trociny at gmail.com>
  MFC after:	3 days

Modified:
  head/sbin/hastd/primary.c
  head/sbin/hastd/secondary.c

Modified: head/sbin/hastd/primary.c
==============================================================================
--- head/sbin/hastd/primary.c	Wed Sep 22 18:57:06 2010	(r213006)
+++ head/sbin/hastd/primary.c	Wed Sep 22 19:03:11 2010	(r213007)
@@ -807,10 +807,20 @@ hastd_primary(struct hast_resource *res)
 	proto_send(res->hr_event, NULL, 0);
 
 	init_local(res);
-	if (real_remote(res) && init_remote(res, NULL, NULL))
-		sync_start();
 	init_ggate(res);
 	init_environment(res);
+	/*
+	 * Create the control thread before sending any event to the parent,
+	 * as we can deadlock when parent sends control request to worker,
+	 * but worker has no control thread started yet, so parent waits.
+	 * In the meantime worker sends an event to the parent, but parent
+	 * is unable to handle the event, because it waits for control
+	 * request response.
+	 */
+	error = pthread_create(&td, NULL, ctrl_thread, res);
+	assert(error == 0);
+	if (real_remote(res) && init_remote(res, NULL, NULL))
+		sync_start();
 	error = pthread_create(&td, NULL, ggate_recv_thread, res);
 	assert(error == 0);
 	error = pthread_create(&td, NULL, local_send_thread, res);
@@ -823,8 +833,6 @@ hastd_primary(struct hast_resource *res)
 	assert(error == 0);
 	error = pthread_create(&td, NULL, sync_thread, res);
 	assert(error == 0);
-	error = pthread_create(&td, NULL, ctrl_thread, res);
-	assert(error == 0);
 	(void)guard_thread(res);
 }
 

Modified: head/sbin/hastd/secondary.c
==============================================================================
--- head/sbin/hastd/secondary.c	Wed Sep 22 18:57:06 2010	(r213006)
+++ head/sbin/hastd/secondary.c	Wed Sep 22 19:03:11 2010	(r213007)
@@ -393,17 +393,27 @@ hastd_secondary(struct hast_resource *re
 		pjdlog_errno(LOG_WARNING, "Unable to set connection timeout");
 
 	init_local(res);
-	init_remote(res, nvin);
 	init_environment();
+
+	/*
+	 * Create the control thread before sending any event to the parent,
+	 * as we can deadlock when parent sends control request to worker,
+	 * but worker has no control thread started yet, so parent waits.
+	 * In the meantime worker sends an event to the parent, but parent
+	 * is unable to handle the event, because it waits for control
+	 * request response.
+	 */
+	error = pthread_create(&td, NULL, ctrl_thread, res);
+	assert(error == 0);
+
+	init_remote(res, nvin);
 	event_send(res, EVENT_CONNECT);
 
 	error = pthread_create(&td, NULL, recv_thread, res);
 	assert(error == 0);
 	error = pthread_create(&td, NULL, disk_thread, res);
 	assert(error == 0);
-	error = pthread_create(&td, NULL, send_thread, res);
-	assert(error == 0);
-	(void)ctrl_thread(res);
+	(void)send_thread(res);
 }
 
 static void


More information about the svn-src-head mailing list