svn commit: r214692 - head/sbin/hastd

Pawel Jakub Dawidek pjd at FreeBSD.org
Tue Nov 2 22:13:08 UTC 2010


Author: pjd
Date: Tue Nov  2 22:13:08 2010
New Revision: 214692
URL: http://svn.freebsd.org/changeset/base/214692

Log:
  Send packets to remote node only via the send thread to avoid possible
  races - in this case a keepalive packet was send from wrong thread which
  lead to connection dropping, because of corrupted packet.
  
  Fix it by sending keepalive packets directly from the send thread.
  As a bonus we now send keepalive packets only when connection is idle.
  
  Submitted by:	Mikolaj Golub <to.my.trociny at gmail.com>
  MFC after:	3 days

Modified:
  head/sbin/hastd/primary.c

Modified: head/sbin/hastd/primary.c
==============================================================================
--- head/sbin/hastd/primary.c	Tue Nov  2 22:12:12 2010	(r214691)
+++ head/sbin/hastd/primary.c	Tue Nov  2 22:13:08 2010	(r214692)
@@ -180,14 +180,21 @@ static pthread_mutex_t metadata_lock;
 	if (_wakeup)							\
 		cv_signal(&hio_##name##_list_cond);			\
 } while (0)
-#define	QUEUE_TAKE1(hio, name, ncomp)	do {				\
+#define	QUEUE_TAKE1(hio, name, ncomp, timeout)	do {			\
+	bool _last;							\
+									\
 	mtx_lock(&hio_##name##_list_lock[(ncomp)]);			\
-	while (((hio) = TAILQ_FIRST(&hio_##name##_list[(ncomp)])) == NULL) { \
-		cv_wait(&hio_##name##_list_cond[(ncomp)],		\
-		    &hio_##name##_list_lock[(ncomp)]);			\
+	_last = false;							\
+	while (((hio) = TAILQ_FIRST(&hio_##name##_list[(ncomp)])) == NULL && !_last) { \
+		cv_timedwait(&hio_##name##_list_cond[(ncomp)],		\
+		    &hio_##name##_list_lock[(ncomp)], (timeout));	\
+		if ((timeout) != 0) 					\
+			_last = true;					\
+	}								\
+	if (hio != NULL) {						\
+		TAILQ_REMOVE(&hio_##name##_list[(ncomp)], (hio),	\
+		    hio_next[(ncomp)]);					\
 	}								\
-	TAILQ_REMOVE(&hio_##name##_list[(ncomp)], (hio),		\
-	    hio_next[(ncomp)]);						\
 	mtx_unlock(&hio_##name##_list_lock[(ncomp)]);			\
 } while (0)
 #define	QUEUE_TAKE2(hio, name)	do {					\
@@ -1112,7 +1119,7 @@ local_send_thread(void *arg)
 
 	for (;;) {
 		pjdlog_debug(2, "local_send: Taking request.");
-		QUEUE_TAKE1(hio, send, ncomp);
+		QUEUE_TAKE1(hio, send, ncomp, 0);
 		pjdlog_debug(2, "local_send: (%p) Got request.", hio);
 		ggio = &hio->hio_ggio;
 		switch (ggio->gctl_cmd) {
@@ -1176,6 +1183,38 @@ local_send_thread(void *arg)
 	return (NULL);
 }
 
+static void
+keepalive_send(struct hast_resource *res, unsigned int ncomp)
+{
+	struct nv *nv;
+
+	if (!ISCONNECTED(res, ncomp))
+		return;
+	
+	assert(res->hr_remotein != NULL);
+	assert(res->hr_remoteout != NULL);
+
+	nv = nv_alloc();
+	nv_add_uint8(nv, HIO_KEEPALIVE, "cmd");
+	if (nv_error(nv) != 0) {
+		nv_free(nv);
+		pjdlog_debug(1,
+		    "keepalive_send: Unable to prepare header to send.");
+		return;
+	}
+	if (hast_proto_send(res, res->hr_remoteout, nv, NULL, 0) < 0) {
+		pjdlog_common(LOG_DEBUG, 1, errno,
+		    "keepalive_send: Unable to send request");
+		nv_free(nv);
+		rw_unlock(&hio_remote_lock[ncomp]);
+		remote_close(res, ncomp);
+		rw_rlock(&hio_remote_lock[ncomp]);
+		return;
+	}
+	nv_free(nv);
+	pjdlog_debug(2, "keepalive_send: Request sent.");
+}
+
 /*
  * Thread sends request to secondary node.
  */
@@ -1184,6 +1223,7 @@ remote_send_thread(void *arg)
 {
 	struct hast_resource *res = arg;
 	struct g_gate_ctl_io *ggio;
+	time_t lastcheck, now;
 	struct hio *hio;
 	struct nv *nv;
 	unsigned int ncomp;
@@ -1194,10 +1234,19 @@ remote_send_thread(void *arg)
 
 	/* Remote component is 1 for now. */
 	ncomp = 1;
+	lastcheck = time(NULL);	
 
 	for (;;) {
 		pjdlog_debug(2, "remote_send: Taking request.");
-		QUEUE_TAKE1(hio, send, ncomp);
+		QUEUE_TAKE1(hio, send, ncomp, RETRY_SLEEP);
+		if (hio == NULL) {
+			now = time(NULL);
+			if (lastcheck + RETRY_SLEEP <= now) {
+				keepalive_send(res, ncomp);
+				lastcheck = now;
+			}
+			continue;
+		}
 		pjdlog_debug(2, "remote_send: (%p) Got request.", hio);
 		ggio = &hio->hio_ggio;
 		switch (ggio->gctl_cmd) {
@@ -1883,32 +1932,6 @@ failed:
 }
 
 static void
-keepalive_send(struct hast_resource *res, unsigned int ncomp)
-{
-	struct nv *nv;
-
-	nv = nv_alloc();
-	nv_add_uint8(nv, HIO_KEEPALIVE, "cmd");
-	if (nv_error(nv) != 0) {
-		nv_free(nv);
-		pjdlog_debug(1,
-		    "keepalive_send: Unable to prepare header to send.");
-		return;
-	}
-	if (hast_proto_send(res, res->hr_remoteout, nv, NULL, 0) < 0) {
-		pjdlog_common(LOG_DEBUG, 1, errno,
-		    "keepalive_send: Unable to send request");
-		nv_free(nv);
-		rw_unlock(&hio_remote_lock[ncomp]);
-		remote_close(res, ncomp);
-		rw_rlock(&hio_remote_lock[ncomp]);
-		return;
-	}
-	nv_free(nv);
-	pjdlog_debug(2, "keepalive_send: Request sent.");
-}
-
-static void
 guard_one(struct hast_resource *res, unsigned int ncomp)
 {
 	struct proto_conn *in, *out;
@@ -1926,12 +1949,6 @@ guard_one(struct hast_resource *res, uns
 	if (ISCONNECTED(res, ncomp)) {
 		assert(res->hr_remotein != NULL);
 		assert(res->hr_remoteout != NULL);
-		keepalive_send(res, ncomp);
-	}
-
-	if (ISCONNECTED(res, ncomp)) {
-		assert(res->hr_remotein != NULL);
-		assert(res->hr_remoteout != NULL);
 		rw_unlock(&hio_remote_lock[ncomp]);
 		pjdlog_debug(2, "remote_guard: Connection to %s is ok.",
 		    res->hr_remoteaddr);


More information about the svn-src-head mailing list