tun(4) in -CURRENT: No buffer space available - race condition
patch
John Baldwin
jhb at freebsd.org
Wed Sep 15 21:49:50 UTC 2010
On Monday, September 13, 2010 9:10:01 pm Marcin Cieslak wrote:
> Output queue of tun(4) gets full after some time when sending lots of data.
> I have been observing this on -CURRENT at least since March this year.
>
> Looks like it's a race condition (same in tun(4) and tap(4)),
> the following patch seems to address the issue:
This is a good find. I actually went through these drivers a bit further and
have a bit of a larger patch to extend the locking some. Would you care to
test it?
Index: if_tap.c
===================================================================
--- if_tap.c (revision 212557)
+++ if_tap.c (working copy)
@@ -209,7 +209,6 @@
tap_destroy(struct tap_softc *tp)
{
struct ifnet *ifp = tp->tap_ifp;
- int s;
/* Unlocked read. */
KASSERT(!(tp->tap_flags & TAP_OPEN),
@@ -217,10 +216,8 @@
knlist_destroy(&tp->tap_rsel.si_note);
destroy_dev(tp->tap_dev);
- s = splimp();
ether_ifdetach(ifp);
if_free_type(ifp, IFT_ETHER);
- splx(s);
mtx_destroy(&tp->tap_mtx);
free(tp, M_TAP);
@@ -398,7 +395,7 @@
struct tap_softc *tp = NULL;
unsigned short macaddr_hi;
uint32_t macaddr_mid;
- int unit, s;
+ int unit;
char *name = NULL;
u_char eaddr[6];
@@ -449,15 +446,13 @@
dev->si_drv1 = tp;
tp->tap_dev = dev;
- s = splimp();
ether_ifattach(ifp, eaddr);
- splx(s);
mtx_lock(&tp->tap_mtx);
tp->tap_flags |= TAP_INITED;
mtx_unlock(&tp->tap_mtx);
- knlist_init_mtx(&tp->tap_rsel.si_note, NULL);
+ knlist_init_mtx(&tp->tap_rsel.si_note, &tp->tap_mtx);
TAPDEBUG("interface %s is created. minor = %#x\n",
ifp->if_xname, dev2unit(dev));
@@ -474,7 +469,7 @@
{
struct tap_softc *tp = NULL;
struct ifnet *ifp = NULL;
- int error, s;
+ int error;
if (tapuopen == 0) {
error = priv_check(td, PRIV_NET_TAP);
@@ -497,15 +492,13 @@
tp->tap_pid = td->td_proc->p_pid;
tp->tap_flags |= TAP_OPEN;
ifp = tp->tap_ifp;
- mtx_unlock(&tp->tap_mtx);
- s = splimp();
ifp->if_drv_flags |= IFF_DRV_RUNNING;
ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
if (tapuponopen)
ifp->if_flags |= IFF_UP;
+ mtx_unlock(&tp->tap_mtx);
if_link_state_change(ifp, LINK_STATE_UP);
- splx(s);
TAPDEBUG("%s is open. minor = %#x\n", ifp->if_xname, dev2unit(dev));
@@ -524,7 +517,6 @@
struct ifaddr *ifa;
struct tap_softc *tp = dev->si_drv1;
struct ifnet *ifp = tp->tap_ifp;
- int s;
/* junk all pending output */
IF_DRAIN(&ifp->if_snd);
@@ -537,25 +529,24 @@
mtx_lock(&tp->tap_mtx);
if (((tp->tap_flags & TAP_VMNET) == 0) && (ifp->if_flags & IFF_UP)) {
mtx_unlock(&tp->tap_mtx);
- s = splimp();
if_down(ifp);
+ mtx_lock(&tp->tap_mtx);
if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
+ ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
+ mtx_unlock(&tp->tap_mtx);
TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
rtinit(ifa, (int)RTM_DELETE, 0);
}
if_purgeaddrs(ifp);
- ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
+ mtx_lock(&tp->tap_mtx);
}
- splx(s);
- } else
- mtx_unlock(&tp->tap_mtx);
+ }
if_link_state_change(ifp, LINK_STATE_DOWN);
funsetown(&tp->tap_sigio);
selwakeuppri(&tp->tap_rsel, PZERO+1);
- KNOTE_UNLOCKED(&tp->tap_rsel.si_note, 0);
+ KNOTE_LOCKED(&tp->tap_rsel.si_note, 0);
- mtx_lock(&tp->tap_mtx);
tp->tap_flags &= ~TAP_OPEN;
tp->tap_pid = 0;
mtx_unlock(&tp->tap_mtx);
@@ -580,8 +571,10 @@
TAPDEBUG("initializing %s\n", ifp->if_xname);
+ mtx_lock(&tp->tap_mtx);
ifp->if_drv_flags |= IFF_DRV_RUNNING;
ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
+ mtx_unlock(&tp->tap_mtx);
/* attempt to start output */
tapifstart(ifp);
@@ -599,7 +592,7 @@
struct tap_softc *tp = ifp->if_softc;
struct ifreq *ifr = (struct ifreq *)data;
struct ifstat *ifs = NULL;
- int s, dummy;
+ int dummy;
switch (cmd) {
case SIOCSIFFLAGS: /* XXX -- just like vmnet does */
@@ -612,7 +605,6 @@
break;
case SIOCGIFSTATUS:
- s = splimp();
ifs = (struct ifstat *)data;
dummy = strlen(ifs->ascii);
mtx_lock(&tp->tap_mtx);
@@ -621,14 +613,10 @@
sizeof(ifs->ascii) - dummy,
"\tOpened by PID %d\n", tp->tap_pid);
mtx_unlock(&tp->tap_mtx);
- splx(s);
break;
default:
- s = splimp();
- dummy = ether_ioctl(ifp, cmd, data);
- splx(s);
- return (dummy);
+ return (ether_ioctl(ifp, cmd, data));
/* NOT REACHED */
}
@@ -645,7 +633,6 @@
tapifstart(struct ifnet *ifp)
{
struct tap_softc *tp = ifp->if_softc;
- int s;
TAPDEBUG("%s starting\n", ifp->if_xname);
@@ -665,24 +652,19 @@
TAPDEBUG("%s not ready, tap_flags = 0x%x\n", ifp->if_xname,
tp->tap_flags);
- s = splimp();
do {
IF_DEQUEUE(&ifp->if_snd, m);
if (m != NULL)
m_freem(m);
ifp->if_oerrors ++;
} while (m != NULL);
- splx(s);
return;
}
- mtx_unlock(&tp->tap_mtx);
- s = splimp();
ifp->if_drv_flags |= IFF_DRV_OACTIVE;
- if (ifp->if_snd.ifq_len != 0) {
- mtx_lock(&tp->tap_mtx);
+ if (!IFQ_IS_EMPTY(&ifp->if_snd)) {
if (tp->tap_flags & TAP_RWAIT) {
tp->tap_flags &= ~TAP_RWAIT;
wakeup(tp);
@@ -691,16 +673,16 @@
if ((tp->tap_flags & TAP_ASYNC) && (tp->tap_sigio != NULL)) {
mtx_unlock(&tp->tap_mtx);
pgsigio(&tp->tap_sigio, SIGIO, 0);
- } else
- mtx_unlock(&tp->tap_mtx);
+ mtx_lock(&tp->tap_mtx);
+ }
selwakeuppri(&tp->tap_rsel, PZERO+1);
- KNOTE_UNLOCKED(&tp->tap_rsel.si_note, 0);
+ KNOTE_LOCKED(&tp->tap_rsel.si_note, 0);
ifp->if_opackets ++; /* obytes are counted in ether_output */
}
ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
- splx(s);
+ mtx_unlock(&tp->tap_mtx);
} /* tapifstart */
@@ -715,7 +697,6 @@
struct tap_softc *tp = dev->si_drv1;
struct ifnet *ifp = tp->tap_ifp;
struct tapinfo *tapp = NULL;
- int s;
int f;
#if defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD5) || \
defined(COMPAT_FREEBSD4)
@@ -724,12 +705,10 @@
switch (cmd) {
case TAPSIFINFO:
- s = splimp();
tapp = (struct tapinfo *)data;
ifp->if_mtu = tapp->mtu;
ifp->if_type = tapp->type;
ifp->if_baudrate = tapp->baudrate;
- splx(s);
break;
case TAPGIFINFO:
@@ -757,26 +736,26 @@
break;
case FIOASYNC:
- s = splimp();
mtx_lock(&tp->tap_mtx);
if (*(int *)data)
tp->tap_flags |= TAP_ASYNC;
else
tp->tap_flags &= ~TAP_ASYNC;
mtx_unlock(&tp->tap_mtx);
- splx(s);
break;
case FIONREAD:
- s = splimp();
- if (ifp->if_snd.ifq_head) {
- struct mbuf *mb = ifp->if_snd.ifq_head;
+ if (!IFQ_IS_EMPTY(&ifp->if_snd)) {
+ struct mbuf *mb;
- for(*(int *)data = 0;mb != NULL;mb = mb->m_next)
+ IFQ_LOCK(&ifp->if_snd);
+ IFQ_POLL_NOLOCK(&ifp->if_snd, mb);
+ for (*(int *)data = 0; mb != NULL;
+ mb = mb->m_next)
*(int *)data += mb->m_len;
+ IFQ_UNLOCK(&ifp->if_snd);
} else
*(int *)data = 0;
- splx(s);
break;
case FIOSETOWN:
@@ -797,10 +776,6 @@
/* VMware/VMnet port ioctl's */
- case SIOCGIFFLAGS: /* get ifnet flags */
- bcopy(&ifp->if_flags, data, sizeof(ifp->if_flags));
- break;
-
#if defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD5) || \
defined(COMPAT_FREEBSD4)
case _IO('V', 0):
@@ -814,9 +789,9 @@
f &= ~IFF_CANTCHANGE;
f |= IFF_UP;
- s = splimp();
+ mtx_lock(&tp->tap_mtx);
ifp->if_flags = f | (ifp->if_flags & IFF_CANTCHANGE);
- splx(s);
+ mtx_unlock(&tp->tap_mtx);
break;
case OSIOCGIFADDR: /* get MAC address of the remote side */
@@ -851,7 +826,7 @@
struct tap_softc *tp = dev->si_drv1;
struct ifnet *ifp = tp->tap_ifp;
struct mbuf *m = NULL;
- int error = 0, len, s;
+ int error = 0, len;
TAPDEBUG("%s reading, minor = %#x\n", ifp->if_xname, dev2unit(dev));
@@ -867,26 +842,27 @@
}
tp->tap_flags &= ~TAP_RWAIT;
- mtx_unlock(&tp->tap_mtx);
/* sleep until we get a packet */
do {
- s = splimp();
IF_DEQUEUE(&ifp->if_snd, m);
- splx(s);
if (m == NULL) {
- if (flag & O_NONBLOCK)
+ if (flag & O_NONBLOCK) {
+ mtx_unlock(&tp->tap_mtx);
return (EWOULDBLOCK);
+ }
- mtx_lock(&tp->tap_mtx);
tp->tap_flags |= TAP_RWAIT;
- mtx_unlock(&tp->tap_mtx);
- error = tsleep(tp,PCATCH|(PZERO+1),"taprd",0);
- if (error)
+ error = mtx_sleep(tp, &tp->tap_mtx, PCATCH | (PZERO + 1),
+ "taprd", 0);
+ if (error) {
+ mtx_unlock(&tp->tap_mtx);
return (error);
+ }
}
} while (m == NULL);
+ mtx_unlock(&tp->tap_mtx);
/* feed packet to bpf */
BPF_MTAP(ifp, m);
@@ -982,14 +958,14 @@
{
struct tap_softc *tp = dev->si_drv1;
struct ifnet *ifp = tp->tap_ifp;
- int s, revents = 0;
+ int revents = 0;
TAPDEBUG("%s polling, minor = %#x\n",
ifp->if_xname, dev2unit(dev));
- s = splimp();
if (events & (POLLIN | POLLRDNORM)) {
- if (ifp->if_snd.ifq_len > 0) {
+ IFQ_LOCK(&ifp->if_snd);
+ if (!IFQ_IS_EMPTY(&ifp->if_snd)) {
TAPDEBUG("%s have data in queue. len = %d, " \
"minor = %#x\n", ifp->if_xname,
ifp->if_snd.ifq_len, dev2unit(dev));
@@ -1001,12 +977,12 @@
selrecord(td, &tp->tap_rsel);
}
+ IFQ_UNLOCK(&ifp->if_snd);
}
if (events & (POLLOUT | POLLWRNORM))
revents |= (events & (POLLOUT | POLLWRNORM));
- splx(s);
return (revents);
} /* tappoll */
@@ -1019,11 +995,9 @@
static int
tapkqfilter(struct cdev *dev, struct knote *kn)
{
- int s;
struct tap_softc *tp = dev->si_drv1;
struct ifnet *ifp = tp->tap_ifp;
- s = splimp();
switch (kn->kn_filter) {
case EVFILT_READ:
TAPDEBUG("%s kqfilter: EVFILT_READ, minor = %#x\n",
@@ -1040,11 +1014,9 @@
default:
TAPDEBUG("%s kqfilter: invalid filter, minor = %#x\n",
ifp->if_xname, dev2unit(dev));
- splx(s);
return (EINVAL);
/* NOT REACHED */
}
- splx(s);
kn->kn_hook = (caddr_t) dev;
knlist_add(&tp->tap_rsel.si_note, kn, 0);
@@ -1061,12 +1033,11 @@
static int
tapkqread(struct knote *kn, long hint)
{
- int ret, s;
+ int ret;
struct cdev *dev = (struct cdev *)(kn->kn_hook);
struct tap_softc *tp = dev->si_drv1;
struct ifnet *ifp = tp->tap_ifp;
- s = splimp();
if ((kn->kn_data = ifp->if_snd.ifq_len) > 0) {
TAPDEBUG("%s have data in queue. len = %d, minor = %#x\n",
ifp->if_xname, ifp->if_snd.ifq_len, dev2unit(dev));
@@ -1076,7 +1047,6 @@
ifp->if_xname, dev2unit(dev));
ret = 0;
}
- splx(s);
return (ret);
} /* tapkqread */
@@ -1090,13 +1060,10 @@
static int
tapkqwrite(struct knote *kn, long hint)
{
- int s;
struct tap_softc *tp = ((struct cdev *) kn->kn_hook)->si_drv1;
struct ifnet *ifp = tp->tap_ifp;
- s = splimp();
kn->kn_data = ifp->if_mtu;
- splx(s);
return (1);
} /* tapkqwrite */
Index: if_tun.c
===================================================================
--- if_tun.c (revision 212557)
+++ if_tun.c (working copy)
@@ -344,13 +344,13 @@
tp->tun_flags &= ~TUN_RWAIT;
wakeup(tp);
}
+ selwakeuppri(&tp->tun_rsel, PZERO + 1);
+ KNOTE_LOCKED(&tp->tun_rsel.si_note, 0);
if (tp->tun_flags & TUN_ASYNC && tp->tun_sigio) {
mtx_unlock(&tp->tun_mtx);
pgsigio(&tp->tun_sigio, SIGIO, 0);
} else
mtx_unlock(&tp->tun_mtx);
- selwakeuppri(&tp->tun_rsel, PZERO + 1);
- KNOTE_UNLOCKED(&tp->tun_rsel.si_note, 0);
}
/* XXX: should return an error code so it can fail. */
@@ -385,7 +385,7 @@
IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen);
ifp->if_snd.ifq_drv_maxlen = 0;
IFQ_SET_READY(&ifp->if_snd);
- knlist_init_mtx(&sc->tun_rsel.si_note, NULL);
+ knlist_init_mtx(&sc->tun_rsel.si_note, &sc->tun_mtx);
ifp->if_capabilities |= IFCAP_LINKSTATE;
ifp->if_capenable |= IFCAP_LINKSTATE;
@@ -443,7 +443,6 @@
{
struct tun_softc *tp;
struct ifnet *ifp;
- int s;
tp = dev->si_drv1;
ifp = TUN2IFP(tp);
@@ -451,27 +450,25 @@
mtx_lock(&tp->tun_mtx);
tp->tun_flags &= ~TUN_OPEN;
tp->tun_pid = 0;
- mtx_unlock(&tp->tun_mtx);
/*
* junk all pending output
*/
CURVNET_SET(ifp->if_vnet);
- s = splimp();
IFQ_PURGE(&ifp->if_snd);
- splx(s);
if (ifp->if_flags & IFF_UP) {
- s = splimp();
+ mtx_unlock(&tp->tun_mtx);
if_down(ifp);
- splx(s);
+ mtx_lock(&tp->tun_mtx);
}
/* Delete all addresses and routes which reference this interface. */
if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
struct ifaddr *ifa;
- s = splimp();
+ ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
+ mtx_unlock(&tp->tun_mtx);
TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
/* deal w/IPv4 PtP destination; unlocked read */
if (ifa->ifa_addr->sa_family == AF_INET) {
@@ -482,16 +479,14 @@
}
}
if_purgeaddrs(ifp);
- ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
- splx(s);
+ mtx_lock(&tp->tun_mtx);
}
if_link_state_change(ifp, LINK_STATE_DOWN);
CURVNET_RESTORE();
- mtx_lock(&tp->tun_mtx);
funsetown(&tp->tun_sigio);
selwakeuppri(&tp->tun_rsel, PZERO + 1);
- KNOTE_UNLOCKED(&tp->tun_rsel.si_note, 0);
+ KNOTE_LOCKED(&tp->tun_rsel.si_note, 0);
TUNDEBUG (ifp, "closed\n");
cv_broadcast(&tp->tun_cv);
@@ -510,9 +505,11 @@
TUNDEBUG(ifp, "tuninit\n");
+ mtx_lock(&tp->tun_mtx);
ifp->if_flags |= IFF_UP;
ifp->if_drv_flags |= IFF_DRV_RUNNING;
getmicrotime(&ifp->if_lastchange);
+ mtx_unlock(&tp->tun_mtx);
#ifdef INET
if_addr_rlock(ifp);
@@ -545,9 +542,8 @@
struct ifreq *ifr = (struct ifreq *)data;
struct tun_softc *tp = ifp->if_softc;
struct ifstat *ifs;
- int error = 0, s;
+ int error = 0;
- s = splimp();
switch(cmd) {
case SIOCGIFSTATUS:
ifs = (struct ifstat *)data;
@@ -576,7 +572,6 @@
default:
error = EINVAL;
}
- splx(s);
return (error);
}
@@ -682,7 +677,6 @@
static int
tunioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag, struct thread
*td)
{
- int s;
int error;
struct tun_softc *tp = dev->si_drv1;
struct tuninfo *tunp;
@@ -745,9 +739,11 @@
switch (*(int *)data & ~IFF_MULTICAST) {
case IFF_POINTOPOINT:
case IFF_BROADCAST:
+ mtx_lock(&tp->tun_mtx);
TUN2IFP(tp)->if_flags &=
~(IFF_BROADCAST|IFF_POINTOPOINT|IFF_MULTICAST);
TUN2IFP(tp)->if_flags |= *(int *)data;
+ mtx_unlock(&tp->tun_mtx);
break;
default:
return(EINVAL);
@@ -769,17 +765,15 @@
mtx_unlock(&tp->tun_mtx);
break;
case FIONREAD:
- s = splimp();
if (!IFQ_IS_EMPTY(&TUN2IFP(tp)->if_snd)) {
struct mbuf *mb;
IFQ_LOCK(&TUN2IFP(tp)->if_snd);
IFQ_POLL_NOLOCK(&TUN2IFP(tp)->if_snd, mb);
- for( *(int *)data = 0; mb != 0; mb = mb->m_next)
+ for (*(int *)data = 0; mb != NULL; mb = mb->m_next)
*(int *)data += mb->m_len;
IFQ_UNLOCK(&TUN2IFP(tp)->if_snd);
} else
*(int *)data = 0;
- splx(s);
break;
case FIOSETOWN:
return (fsetown(*(int *)data, &tp->tun_sigio));
@@ -813,7 +807,7 @@
struct tun_softc *tp = dev->si_drv1;
struct ifnet *ifp = TUN2IFP(tp);
struct mbuf *m;
- int error=0, len, s;
+ int error=0, len;
TUNDEBUG (ifp, "read\n");
mtx_lock(&tp->tun_mtx);
@@ -824,27 +818,24 @@
}
tp->tun_flags &= ~TUN_RWAIT;
- mtx_unlock(&tp->tun_mtx);
- s = splimp();
do {
IFQ_DEQUEUE(&ifp->if_snd, m);
if (m == NULL) {
if (flag & O_NONBLOCK) {
- splx(s);
+ mtx_unlock(&tp->tun_mtx);
return (EWOULDBLOCK);
}
- mtx_lock(&tp->tun_mtx);
tp->tun_flags |= TUN_RWAIT;
- mtx_unlock(&tp->tun_mtx);
- if ((error = tsleep(tp, PCATCH | (PZERO + 1),
- "tunread", 0)) != 0) {
- splx(s);
+ error = mtx_sleep(tp, &tp->tun_mtx, PCATCH | (PZERO + 1),
+ "tunread", 0);
+ if (error != 0) {
+ mtx_unlock(&tp->tun_mtx);
return (error);
}
}
} while (m == NULL);
- splx(s);
+ mtx_unlock(&tp->tun_mtx);
while (m && uio->uio_resid > 0 && error == 0) {
len = min(uio->uio_resid, m->m_len);
@@ -957,13 +948,11 @@
static int
tunpoll(struct cdev *dev, int events, struct thread *td)
{
- int s;
struct tun_softc *tp = dev->si_drv1;
struct ifnet *ifp = TUN2IFP(tp);
int revents = 0;
struct mbuf *m;
- s = splimp();
TUNDEBUG(ifp, "tunpoll\n");
if (events & (POLLIN | POLLRDNORM)) {
@@ -981,7 +970,6 @@
if (events & (POLLOUT | POLLWRNORM))
revents |= events & (POLLOUT | POLLWRNORM);
- splx(s);
return (revents);
}
@@ -991,11 +979,9 @@
static int
tunkqfilter(struct cdev *dev, struct knote *kn)
{
- int s;
struct tun_softc *tp = dev->si_drv1;
struct ifnet *ifp = TUN2IFP(tp);
- s = splimp();
switch(kn->kn_filter) {
case EVFILT_READ:
TUNDEBUG(ifp, "%s kqfilter: EVFILT_READ, minor = %#x\n",
@@ -1012,10 +998,8 @@
default:
TUNDEBUG(ifp, "%s kqfilter: invalid filter, minor = %#x\n",
ifp->if_xname, dev2unit(dev));
- splx(s);
return(EINVAL);
}
- splx(s);
kn->kn_hook = (caddr_t) dev;
knlist_add(&tp->tun_rsel.si_note, kn, 0);
@@ -1029,12 +1013,11 @@
static int
tunkqread(struct knote *kn, long hint)
{
- int ret, s;
+ int ret;
struct cdev *dev = (struct cdev *)(kn->kn_hook);
struct tun_softc *tp = dev->si_drv1;
struct ifnet *ifp = TUN2IFP(tp);
- s = splimp();
if ((kn->kn_data = ifp->if_snd.ifq_len) > 0) {
TUNDEBUG(ifp,
"%s have data in the queue. Len = %d, minor = %#x\n",
@@ -1046,7 +1029,6 @@
dev2unit(dev));
ret = 0;
}
- splx(s);
return (ret);
}
@@ -1057,13 +1039,10 @@
static int
tunkqwrite(struct knote *kn, long hint)
{
- int s;
struct tun_softc *tp = ((struct cdev *)kn->kn_hook)->si_drv1;
struct ifnet *ifp = TUN2IFP(tp);
- s = splimp();
kn->kn_data = ifp->if_mtu;
- splx(s);
return (1);
}
--
John Baldwin
More information about the freebsd-current
mailing list