kern/154504: PF_LOCAL stream connection is stuck in sbwait when recv(MSG_WAITALL) is used

Andrey Simonenko simon at comsys.ntu-kpi.kiev.ua
Fri Feb 4 10:00:25 UTC 2011


>Number:         154504
>Category:       kern
>Synopsis:       PF_LOCAL stream connection is stuck in sbwait when recv(MSG_WAITALL) is used
>Confidential:   no
>Severity:       serious
>Priority:       medium
>Responsible:    freebsd-bugs
>State:          open
>Quarter:        
>Keywords:       
>Date-Required:
>Class:          sw-bug
>Submitter-Id:   current-users
>Arrival-Date:   Fri Feb 04 10:00:23 UTC 2011
>Closed-Date:
>Last-Modified:
>Originator:     Andrey Simonenko
>Release:        FreeBSD 9.0-CURRENT and 8-STABLE
>Organization:
>Environment:

FreeBSD 9.0-CURRENT and 8-STABLE.

>Description:

Let's there are a client and a server that use stream sockets in the PF_LOCAL
communication domain.  A client sends data to a server, then having received
data from a client a server sends data to a client and a client receives it.
Both of them use recv(2) with the MSG_WAITALL flag.

At some moment of such communication a client and sometimes a server
stuck in sbwait.  I could not reproduce this situation on FreeBSD 7.1-STABLE,
but can reproduce it on 8.2-PRERELEASE (real amd64 hardware) and on just
updated 9.0-CURRENT (qemu for amd64).

>How-To-Repeat:

I wrote a test program.  Run "msg_waitall -s" in one terminal and run
"msg_waitall_client.sh" in another terminal.  At some moment a client will
stuck in sbwait, sometimes a server also is blocked in sbwait:

 2297 sbwait ./msg_waitall -s
 2298 wait   /bin/sh ./msg_waitall_client.sh
 3189 sbwait ./msg_waitall -c

All test were made with the default net.local.stream.recvspace and
net.local.stream.sendspace values.  Also if these variable are smaller,
then the effect of blocking should happen faster.

The effect of blocking also exists when TCP_SORECEIVE_STREAM is defined
on 9.0-CURRENT.

diff -ruNp msg_waitall.orig/Makefile msg_waitall/Makefile
--- msg_waitall.orig/Makefile	1970-01-01 03:00:00.000000000 +0300
+++ msg_waitall/Makefile	2011-02-02 13:30:25.000000000 +0200
@@ -0,0 +1,7 @@
+PROG=msg_waitall
+
+NO_MAN=
+
+WARNS=6
+
+.include <bsd.prog.mk>
diff -ruNp msg_waitall.orig/msg_waitall.c msg_waitall/msg_waitall.c
--- msg_waitall.orig/msg_waitall.c	1970-01-01 03:00:00.000000000 +0300
+++ msg_waitall/msg_waitall.c	2011-02-03 17:39:50.000000000 +0200
@@ -0,0 +1,186 @@
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+
+#include <err.h>
+#include <errno.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#define SOCKET_PATH	"./socket"
+#define BUFFER_SIZE	(30 * 1024)
+
+static void
+client(int use_read, int use_write)
+{
+	char buf[BUFFER_SIZE];
+	struct sockaddr_un un;
+	ssize_t nread, nsent;
+	int fd;
+
+	printf("CLIENT: using %s() and %s()\n",
+	    use_read ? "read" : "recv", use_write ? "write" : "send");
+
+	if (sizeof(SOCKET_PATH) > sizeof(un.sun_path))
+		errx(EXIT_FAILURE, "path %s is too long", SOCKET_PATH);
+
+	memset(&un, 0, sizeof(un));
+	un.sun_family = PF_LOCAL;
+	strncpy(un.sun_path, SOCKET_PATH, sizeof(un.sun_path) - 1);
+
+	fd = socket(PF_LOCAL, SOCK_STREAM, 0);
+	if (fd < 0)
+		err(EXIT_FAILURE, "socket");
+
+	if (connect(fd, (struct sockaddr *)&un, SUN_LEN(&un)) < 0)
+		err(EXIT_FAILURE, "connect");
+
+	if (use_write) {
+		nsent = write(fd, buf, sizeof(buf));
+		if (nsent < 0)
+			err(EXIT_FAILURE, "write");
+	} else {
+		nsent = send(fd, buf, sizeof(buf), 0);
+		if (nsent < 0)
+			err(EXIT_FAILURE, "send");
+	}
+			
+	if (nsent != -1 && nsent != sizeof(buf))
+		errx(EXIT_FAILURE, "short send: %zd of %zu",
+		    nsent, sizeof(buf));
+
+	fprintf(stderr, "sent");
+
+	if (use_read) {
+		nread = read(fd, buf, sizeof(buf));
+		if (nread < 0)
+			err(EXIT_FAILURE, "read");
+	} else {
+		nread = recv(fd, buf, sizeof(buf), MSG_WAITALL);
+		if (nread < 0)
+			err(EXIT_FAILURE, "recv");
+	}
+
+	if (nread != -1 && nread != sizeof(buf))
+		errx(EXIT_FAILURE, "short read: %zd of %zu",
+		    nread, sizeof(buf));
+
+	fprintf(stderr, ", received\n");
+
+	if (close(fd) < 0)
+		err(EXIT_FAILURE, "close");
+}
+
+static void
+server(int use_read, int use_write)
+{
+	char buf[BUFFER_SIZE];
+	struct sockaddr_un un;
+	ssize_t nread, nsent;
+	int fdl, fd;
+
+	printf("SERVER: using %s() and %s()\n",
+	    use_read ? "read" : "recv", use_write ? "write" : "send");
+
+	signal(SIGPIPE, SIG_IGN);
+
+	if (sizeof(SOCKET_PATH) > sizeof(un.sun_path))
+		errx(EXIT_FAILURE, "path %s is too long", SOCKET_PATH);
+
+	fdl = socket(PF_LOCAL, SOCK_STREAM, 0);
+	if (fdl < 0)
+		err(EXIT_FAILURE, "socket");
+
+	if (unlink(SOCKET_PATH) < 0)
+		if (errno != ENOENT)
+			err(EXIT_FAILURE, "unlink");
+
+	memset(&un, 0, sizeof(un));
+	un.sun_family = PF_LOCAL;
+	strncpy(un.sun_path, SOCKET_PATH, sizeof(un.sun_path) - 1);
+	if (bind(fdl, (struct sockaddr *)&un, SUN_LEN(&un)) < 0)
+		err(EXIT_FAILURE, "bind");
+
+	if (listen(fdl, 10) < 0)
+		err(EXIT_FAILURE, "listen");
+
+	for (;;) {
+		fd = accept(fdl, (struct sockaddr *)NULL, (socklen_t *)NULL);
+		if (fd < 0)
+			err(EXIT_FAILURE, "accept");
+
+		if (use_read) {
+			nread = read(fd, buf, sizeof(buf));
+			if (nread < 0)
+				warn("read");
+		} else {
+			nread = recv(fd, buf, sizeof(buf), MSG_WAITALL);
+			if (nread < 0)
+				warn("recv");
+		}
+
+		if (nread != -1 && nread != sizeof(buf))
+			warnx("short read: %zd of %zu",
+			    nread, sizeof(buf));
+
+		fprintf(stderr, "received");
+
+		if (use_write) {
+			nsent = write(fd, buf, sizeof(buf));
+			if (nsent < 0)
+				warn("write");
+		} else {
+			nsent = send(fd, buf, sizeof(buf), 0);
+			if (nsent < 0)
+				warn("send");
+		}
+
+		if (nsent != -1 && nsent != sizeof(buf))
+			warnx("short send: %zd of %zu",
+			    nsent, sizeof(buf));
+
+		fprintf(stderr, ", sent\n");
+
+		if (close(fd) < 0)
+			warn("close");
+	}
+}
+
+int
+main(int argc, char *argv[])
+{
+	int opt, opt_s, opt_c, opt_r, opt_w;
+
+	opt_s = opt_c = opt_r = opt_w = 0;
+	while ((opt = getopt(argc, argv, "scrw")) != -1)
+		switch (opt) {
+		case 's':
+			opt_s = 1;
+			break;
+		case 'c':
+			opt_c = 1;
+			break;
+		case 'r':
+			opt_r = 1;
+			break;
+		case 'w':
+			opt_w = 1;
+			break;
+		default:
+			errx(EXIT_FAILURE, "Usage: %s: [-rw] -s|c",
+			    getprogname());
+		}
+
+	if (opt_s == 0 && opt_c == 0)
+		errx(EXIT_FAILURE, "specify -s or -c");
+
+	if (opt_c)
+		client(opt_r, opt_w);
+	else
+		server(opt_r, opt_w);
+
+	return (EXIT_SUCCESS);
+}
diff -ruNp msg_waitall.orig/msg_waitall_client.sh msg_waitall/msg_waitall_client.sh
--- msg_waitall.orig/msg_waitall_client.sh	1970-01-01 03:00:00.000000000 +0300
+++ msg_waitall/msg_waitall_client.sh	2011-02-03 13:20:03.000000000 +0200
@@ -0,0 +1,6 @@
+#!/bin/sh
+
+while [ true ]; do
+	./msg_waitall -c || exit 1
+done
+


>Fix:


>Release-Note:
>Audit-Trail:
>Unformatted:


More information about the freebsd-bugs mailing list