misc/146845: close() return error 54 (connection reset by peer) wrongly

Anton Lavrentiev lavr at ncbi.nlm.nih.gov
Sat May 22 22:10:02 UTC 2010


>Number:         146845
>Category:       misc
>Synopsis:       close() return error 54 (connection reset by peer) wrongly
>Confidential:   no
>Severity:       serious
>Priority:       low
>Responsible:    freebsd-bugs
>State:          open
>Quarter:        
>Keywords:       
>Date-Required:
>Class:          sw-bug
>Submitter-Id:   current-users
>Arrival-Date:   Sat May 22 22:10:01 UTC 2010
>Closed-Date:
>Last-Modified:
>Originator:     Anton Lavrentiev
>Release:        8.0
>Organization:
NCBI/NLM/NIH
>Environment:
FreeBSD beastie3.ncbi.nlm.nih.gov 8.0-RELEASE-p2 FreeBSD 8.0-RELEASE-p2 #0: Tue Jan  5 16:02:27 UTC 2010     root at i386-builder.daemonology.net:/usr/obj/usr/src/sys/GENERIC  i386
>Description:
When an INET socket is being closed from two ends almost simultaneously,
with both sides using a series of SO_LINGER, shutdown and close, and one
side being slightly behind the other, the side closing last gets -1
from close() on such a socket, and errno 54, Connection reset by peer,
even though there was no data in the socket (so it more likely seems that
the FIN that the late shutdown() was assumed to send was counted towards
the actual data, but it should not have).  No other UNIX flavor (and we
have many here at NCBI) exhibits this kind of problem.  Also, the problem
is not 100% reproducible on FreeBSD (but is pretty frequent, nonetheless).
We have seen this very same problem on both version 6 and 8 of the OS
(reporting here for 8).
>How-To-Repeat:
In the attached, there is a server-client C program that reproduces the
problem (although, the program itself has to be started a few times before
the actual error occurs).  At the bottom of the attached C program,
a ktrace of the communicating processes, and errno 54 coming out of
close().  select(0,...) is used to slowdown one process to imitate some
of its midwork before it is ready to proceed with actual socket disposal.
If either shutdown() or setsockopt(SO_LINGER) are excluded from the code,
the problem in close() seems to be gone.

while ktrace -i ./a.out; do date; done
kdump

>Fix:
There are workarounds (as to not to use either setsockopt(SO_LINGER) or
shutdown(SHUT_WR), or both) but they do not fix the problem in general.


Patch attached with submission follows:

#include <errno.h>
#include <sys/socket.h>
#include <sys/types.h>
#include <sys/time.h>
#include <netinet/in.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <unistd.h>

#define BUG

int main(void)
{
    char buf[40];
    int a, s, len;
    struct sockaddr_in sin;
    struct linger lg;
    pid_t pid;

    if ((s = socket(AF_INET, SOCK_STREAM, 0)) < 0) {
        perror("socket");
        return 1;
    }
    memset(&sin, 0, sizeof(sin));
    sin.sin_len = sizeof(sin);
    sin.sin_family = AF_INET;
    sin.sin_port = htons(7890);
    if (bind(s, (struct sockaddr*) &sin, sizeof(sin)) < 0) {
        perror("bind");
        return 1;
    }
    if (listen(s, 100) < 0) {
        perror("listen");
        return 1;
    }
    if (pid = fork()) {
        len = sizeof(sin);
        memset(&sin, 0, sizeof(sin));
        if ((a = accept(s, (struct sockaddr*) &sin, &len)) < 0) {
            perror("accept");
            return 1;
        }
        close(s);
        s = a;
    } else {
        close(s);
        if ((s = socket(AF_INET, SOCK_STREAM, 0)) < 0) {
            perror("socket");
            return 1;
        } 
        if (connect(s, (struct sockaddr*) &sin, sizeof(sin)) < 0) {
            perror("connect");
            return 1;
        }
    }
    if (write(s, "Hello\n", 6) < 6) {
        perror("write");
        return 1;
    }
    if ((len = read(s, buf, sizeof(buf))) < 0) {
        perror("read");
        return 1;
    }
    lg.l_linger = 5;
    lg.l_onoff  = 1;
    if (setsockopt(s, SOL_SOCKET, SO_LINGER, &lg, sizeof(lg)) < 0) {
        perror("setsockopt");
        return 1;
    }
    if (pid) {
        struct timeval tv = {0, 350000} ;
        select(0, 0, 0, 0, &tv);
    }
#ifdef BUG
    if (shutdown(s, SHUT_WR) < 0) {
        perror("shutdown");
        return 1;
    }
#endif
    errno = 0;
    a = close(s);
    if (a < 0)
        perror("close");
    printf("%.*s %d %d\n", len, buf, a, errno);
    return a; 
}

#if 0

 68009 a.out    CALL  socket(PF_INET,SOCK_STREAM,IPPROTO_IP)
 68009 a.out    RET   socket 3
 68009 a.out    CALL  bind(0x3,0xbfbfe9dc,0x10)
 68009 a.out    STRU  struct sockaddr { AF_INET, 0.0.0.0:7890 }
 68009 a.out    RET   bind 0
 68009 a.out    CALL  listen(0x3,0x64)
 68009 a.out    RET   listen 0
 68009 a.out    CALL  fork
 68009 a.out    RET   fork 68010/0x109aa
 68010 a.out    RET   fork 0
 68009 a.out    CALL  accept(0x3,0xbfbfe9dc,0xbfbfe9ec)
 68010 a.out    CALL  close(0x3)
 68010 a.out    RET   close 0
 68010 a.out    CALL  socket(PF_INET,SOCK_STREAM,IPPROTO_IP)
 68010 a.out    RET   socket 3
 68010 a.out    CALL  connect(0x3,0xbfbfe9dc,0x10)
 68010 a.out    STRU  struct sockaddr { AF_INET, 0.0.0.0:7890 }
 68010 a.out    RET   connect 0
 68010 a.out    CALL  write(0x3,0x8048bc3,0x6)
 68010 a.out    GIO   fd 3 wrote 6 bytes
       "Hello
       "
 68009 a.out    STRU  struct sockaddr { AF_INET, 127.0.0.1:12597 }
 68010 a.out    RET   write 6
 68009 a.out    RET   accept 4
 68010 a.out    CALL  read(0x3,0xbfbfe9f0,0x28)
 68009 a.out    CALL  close(0x3)
 68009 a.out    RET   close 0
 68009 a.out    CALL  write(0x4,0x8048bc3,0x6)
 68009 a.out    GIO   fd 4 wrote 6 bytes
       "Hello
       "
 68009 a.out    RET   write 6
 68009 a.out    CALL  read(0x4,0xbfbfe9f0,0x28)
 68010 a.out    GIO   fd 3 read 6 bytes
       "Hello
       "
 68010 a.out    RET   read 6
 68009 a.out    GIO   fd 4 read 6 bytes
       "Hello
       "
 68009 a.out    RET   read 6
 68010 a.out    CALL  setsockopt(0x3,SOL_SOCKET,SO_LINGER,0xbfbfe9d4,0x8)
 68010 a.out    RET   setsockopt 0
 68009 a.out    CALL  setsockopt(0x4,SOL_SOCKET,SO_LINGER,0xbfbfe9d4,0x8)
 68009 a.out    RET   setsockopt 0
 68010 a.out    CALL  shutdown(0x3,<invalid=1>)
 68009 a.out    CALL  select(0,0,0,0,0xbfbfe9cc)
 68010 a.out    RET   shutdown 0
 68010 a.out    CALL  close(0x3)
 68010 a.out    RET   close 0
 68010 a.out    CALL  write(0x1,0x28201000,0x6)
 68010 a.out    GIO   fd 1 wrote 6 bytes
       "Hello
       "
 68010 a.out    RET   write 6
 68010 a.out    CALL  write(0x1,0x28201000,0x5)
 68010 a.out    GIO   fd 1 wrote 5 bytes
       " 0 0
       "
 68010 a.out    RET   write 5
 68010 a.out    CALL  exit(0)
 68009 a.out    RET   select 0
 68009 a.out    CALL  shutdown(0x4,<invalid=1>)
 68009 a.out    RET   shutdown 0
 68009 a.out    CALL  close(0x4)
 68009 a.out    RET   close -1 errno 54 Connection reset by peer           <<<=== BUG!!!
 68009 a.out    CALL  writev(0x2,0xbfbfe168,0x4)
 68009 a.out    GIO   fd 2 wrote 32 bytes
       "close: Connection reset by peer
       "
 68009 a.out    RET   writev 32/0x20
 68009 a.out    CALL  write(0x1,0x28201000,0x6)
 68009 a.out    GIO   fd 1 wrote 6 bytes
       "Hello
       "
 68009 a.out    RET   write 6
 68009 a.out    CALL  write(0x1,0x28201000,0x7)
 68009 a.out    GIO   fd 1 wrote 7 bytes
       " -1 54
       "
 68009 a.out    RET   write 7
 68009 a.out    CALL  exit(0xffffffff)

#endif


>Release-Note:
>Audit-Trail:
>Unformatted:


More information about the freebsd-bugs mailing list