kern/153938: [run] [panic] [patch] Workaround for use-after-free panic

Juergen Lock nox at jelal.kn-bremen.de
Wed Jan 12 20:20:05 UTC 2011


>Number:         153938
>Category:       kern
>Synopsis:       [run] [panic] [patch] Workaround for use-after-free panic
>Confidential:   no
>Severity:       serious
>Priority:       medium
>Responsible:    freebsd-bugs
>State:          open
>Quarter:        
>Keywords:       
>Date-Required:
>Class:          sw-bug
>Submitter-Id:   current-users
>Arrival-Date:   Wed Jan 12 20:20:04 UTC 2011
>Closed-Date:
>Last-Modified:
>Originator:     Juergen Lock
>Release:        FreeBSD 8.1-RC2 amd64
>Organization:
me?  organized??
>Environment:
System: FreeBSD triton8.kn-bremen.de 8.1-RC2 FreeBSD 8.1-RC2 #9: Wed Sep 1 21:53:36 CEST 2010 nox at triton8.kn-bremen.de:/usr/obj/data2v/home/nox/src-r81/src/sys/TRITON8U amd64

	Yes this is an older stable/8 checkout but if_run(4) is
	checked out from head.

>Description:
	Running the nic in hostap mode with wpa2 I once every few
	weeks got the following crash:

#0  doadump () at pcpu.h:223
223	pcpu.h: No such file or directory.
	in pcpu.h
(kgdb) bt
#0  doadump () at pcpu.h:223
#1  0xffffffff805f0719 in boot (howto=260)
    at /data2v/home/nox/src-r81/src/sys/kern/kern_shutdown.c:416
#2  0xffffffff805f0b6c in panic (fmt=Variable "fmt" is not available.
)
    at /data2v/home/nox/src-r81/src/sys/kern/kern_shutdown.c:590
#3  0xffffffff808e4e0d in trap_fatal (frame=0xc, eva=Variable "eva" is not available.
)
    at /data2v/home/nox/src-r81/src/sys/amd64/amd64/trap.c:777
#4  0xffffffff808e51f4 in trap_pfault (frame=0xffffff80ec121aa0, usermode=0)
    at /data2v/home/nox/src-r81/src/sys/amd64/amd64/trap.c:693
#5  0xffffffff808e5a7e in trap (frame=0xffffff80ec121aa0)
    at /data2v/home/nox/src-r81/src/sys/amd64/amd64/trap.c:451
#6  0xffffffff808ca953 in calltrap ()
    at /data2v/home/nox/src-r81/src/sys/amd64/amd64/exception.S:223
#7  0xffffffff81072ac6 in run_drain_fifo (arg=Variable "arg" is not available.
)
    at /data2v/home/nox/src-r81/src/sys/modules/usb/run/../../../dev/usb/wlan/if_run.c:2245
#8  0xffffffff81072bc3 in run_ratectl_cb (arg=Variable "arg" is not available.
)
    at /data2v/home/nox/src-r81/src/sys/modules/usb/run/../../../dev/usb/wlan/if_run.c:2210
#9  0xffffffff8062e543 in taskqueue_run (queue=0xffffff0005f42380)
    at /data2v/home/nox/src-r81/src/sys/kern/subr_taskqueue.c:239
#10 0xffffffff8062e7c6 in taskqueue_thread_loop (arg=Variable "arg" is not available.
)
    at /data2v/home/nox/src-r81/src/sys/kern/subr_taskqueue.c:360
---Type <return> to continue, or q <return> to quit---
#11 0xffffffff805c64a8 in fork_exit (
    callout=0xffffffff8062e780 <taskqueue_thread_loop>, 
    arg=0xffffff8000b130b8, frame=0xffffff80ec121c80)
    at /data2v/home/nox/src-r81/src/sys/kern/kern_fork.c:844
#12 0xffffffff808cae2e in fork_trampoline ()
    at /data2v/home/nox/src-r81/src/sys/amd64/amd64/exception.S:562
#13 0x0000000000000000 in ?? ()
#14 0x0000000000000000 in ?? ()
#15 0x0000000000000000 in ?? ()
#16 0x0000000000000000 in ?? ()
#17 0x0000000000000000 in ?? ()
#18 0x0000000000000000 in ?? ()
#19 0x0000000000000000 in ?? ()
#20 0x0000000000000000 in ?? ()
#21 0x0000000000000000 in ?? ()
#22 0x0000000000000000 in ?? ()
#23 0x0000000000000000 in ?? ()
#24 0x0000000000000000 in ?? ()
#25 0x0000000000000000 in ?? ()
#26 0x0000000000000000 in ?? ()
#27 0x0000000000000000 in ?? ()
#28 0x0000000000000000 in ?? ()
#29 0x0000000000000000 in ?? ()
---Type <return> to continue, or q <return> to quit---
#30 0x0000000000000000 in ?? ()
#31 0x0000000000000000 in ?? ()
#32 0x0000000000000000 in ?? ()
#33 0x0000000000000000 in ?? ()
#34 0x0000000000000000 in ?? ()
#35 0x0000000000000000 in ?? ()
#36 0x0000000000000000 in ?? ()
#37 0x0000000000f37000 in ?? ()
#38 0x0000000000000000 in ?? ()
#39 0xffffff00078c47c0 in ?? ()
#40 0xffffffff80cac9c0 in affinity ()
#41 0xffffff00018837c0 in ?? ()
#42 0xffffff80ec121710 in ?? ()
#43 0xffffff80ec1216c8 in ?? ()
#44 0xffffff00078c47c0 in ?? ()
#45 0xffffffff8061471a in sched_switch (td=0xffffff8000b130b8, 
    newtd=0xffffffff8062e780, flags=Variable "flags" is not available.
)
    at /data2v/home/nox/src-r81/src/sys/kern/sched_ule.c:1844
Previous frame inner to this frame (corrupt stack?)
(kgdb) fr 7
#7  0xffffffff81072ac6 in run_drain_fifo (arg=Variable "arg" is not available.
)
    at /data2v/home/nox/src-r81/src/sys/modules/usb/run/../../../dev/usb/wlan/if_run.c:2245
2245			ni = sc->sc_ni[wcid];
(kgdb) p wcid
$1 = 1 '\001'
(kgdb) p sc->sc_ni
$2 = {0x0, 0xffffff8001676000, 0x0 <repeats 63 times>}
(kgdb) p sc->sc_ni[1]
$3 = (struct ieee80211_node *) 0xffffff8001676000
(kgdb) p *sc->sc_ni[1]
Cannot access memory at address 0xffffff8001676000
(kgdb) up
#8  0xffffffff81072bc3 in run_ratectl_cb (arg=Variable "arg" is not available.
)
    at /data2v/home/nox/src-r81/src/sys/modules/usb/run/../../../dev/usb/wlan/if_run.c:2210
2210			run_drain_fifo(sc);
(kgdb) p sc->sc_ni
$4 = {0x0, 0xffffff8001676000, 0x0 <repeats 63 times>}
(kgdb) l run_drain_fifo
2216			usb_callout_reset(&sc->ratectl_ch, hz, run_ratectl_to, sc);
2217	}
2218	
2219	static void
2220	run_drain_fifo(void *arg)
2221	{
2222		struct run_softc *sc = arg;
2223		struct ifnet *ifp = sc->sc_ifp;
2224		struct ieee80211_node *ni = sc->sc_ni[0];	/* make compiler happy */
2225		uint32_t stat;
(kgdb) l
2226		int retrycnt = 0;
2227		uint8_t wcid, mcs, pid;
2228	
2229		RUN_LOCK_ASSERT(sc, MA_OWNED);
2230	
2231		for (;;) {
2232			/* drain Tx status FIFO (maxsize = 16) */
2233			run_read(sc, RT2860_TX_STAT_FIFO, &stat);
2234			DPRINTFN(4, "tx stat 0x%08x\n", stat);
2235			if (!(stat & RT2860_TXQ_VLD))
(kgdb) 
2236				break;
2237	
2238			wcid = (stat >> RT2860_TXQ_WCID_SHIFT) & 0xff;
2239	
2240			/* if no ACK was requested, no feedback is available */
2241			if (!(stat & RT2860_TXQ_ACKREQ) || wcid > RT2870_WCID_MAX ||
2242			    wcid == 0)
2243				continue;
2244	
2245			ni = sc->sc_ni[wcid];
(kgdb) 
2246			if (ni->ni_rctls == NULL)
2247				continue;
2248	
2249			/* update per-STA AMRR stats */
2250			if (stat & RT2860_TXQ_OK) {
2251				/*
2252				 * Check if there were retries, ie if the Tx
2253				 * success rate is different from the requested
2254				 * rate. Note that it works only because we do
2255				 * not allow rate fallback from OFDM to CCK.
(kgdb) 
2256				 */
2257				mcs = (stat >> RT2860_TXQ_MCS_SHIFT) & 0x7f;
2258				pid = (stat >> RT2860_TXQ_PID_SHIFT) & 0xf;
2259				if (mcs + 1 != pid)
2260					retrycnt = 1;
2261				ieee80211_ratectl_tx_complete(ni->ni_vap, ni,
2262				    IEEE80211_RATECTL_TX_SUCCESS,
2263				    &retrycnt, NULL);
2264			} else {
2265				retrycnt = 1;
(kgdb) 
2266				ieee80211_ratectl_tx_complete(ni->ni_vap, ni,
2267				    IEEE80211_RATECTL_TX_FAILURE,
2268				    &retrycnt, NULL);
2269				ifp->if_oerrors++;
2270			}
2271		}
2272		DPRINTFN(3, "count=%d\n", sc->fifo_cnt);
2273	
2274		sc->fifo_cnt = 0;
2275	}
(kgdb) up
#9  0xffffffff8062e543 in taskqueue_run (queue=0xffffff0005f42380)
    at /data2v/home/nox/src-r81/src/sys/kern/subr_taskqueue.c:239
239			task->ta_func(task->ta_context, pending);
(kgdb) p task
$5 = (struct task *) 0xffffff8000a8be38
(kgdb) p *task
$6 = {ta_link = {stqe_next = 0x0}, ta_pending = 0, ta_priority = 0, 
  ta_func = 0xffffffff81072b60 <run_ratectl_cb>, 
  ta_context = 0xffffff8000a89000}
(kgdb) l run_ratectl_cb
2184	}
2185	
2186	/* ARGSUSED */
2187	static void
2188	run_ratectl_cb(void *arg, int pending)
2189	{
2190		struct run_softc *sc = arg;
2191		struct ieee80211com *ic = sc->sc_ifp->if_l2com;
2192		struct ieee80211vap *vap = TAILQ_FIRST(&ic->ic_vaps);
2193	
(kgdb) down
#8  0xffffffff81072bc3 in run_ratectl_cb (arg=Variable "arg" is not available.
)
    at /data2v/home/nox/src-r81/src/sys/modules/usb/run/../../../dev/usb/wlan/if_run.c:2210
2210			run_drain_fifo(sc);
(kgdb) l run_ratectl_cb
2184	}
2185	
2186	/* ARGSUSED */
2187	static void
2188	run_ratectl_cb(void *arg, int pending)
2189	{
2190		struct run_softc *sc = arg;
2191		struct ieee80211com *ic = sc->sc_ifp->if_l2com;
2192		struct ieee80211vap *vap = TAILQ_FIRST(&ic->ic_vaps);
2193	
(kgdb) l
2194		if (vap == NULL)
2195			return;
2196	
2197		if (sc->rvp_cnt <= 1 && vap->iv_opmode == IEEE80211_M_STA)
2198			run_iter_func(sc, vap->iv_bss);
2199		else {
2200			/*
2201			 * run_reset_livelock() doesn't do anything with AMRR,
2202			 * but Ralink wants us to call it every 1 sec. So, we
2203			 * piggyback here rather than creating another callout.
(kgdb) p sc->rvp_cnt
$7 = 1 '\001'
(kgdb) l
2204			 * Livelock may occur only in HOSTAP or IBSS mode
2205			 * (when h/w is sending beacons).
2206			 */
2207			RUN_LOCK(sc);
2208			run_reset_livelock(sc);
2209			/* just in case, there are some stats to drain */
2210			run_drain_fifo(sc);
2211			RUN_UNLOCK(sc);
2212			ieee80211_iterate_nodes(&ic->ic_sta, run_iter_func, sc);
2213		}
(kgdb) down
#7  0xffffffff81072ac6 in run_drain_fifo (arg=Variable "arg" is not available.
)
    at /data2v/home/nox/src-r81/src/sys/modules/usb/run/../../../dev/usb/wlan/if_run.c:2245
2245			ni = sc->sc_ni[wcid];
(kgdb) up
#8  0xffffffff81072bc3 in run_ratectl_cb (arg=Variable "arg" is not available.
)
    at /data2v/home/nox/src-r81/src/sys/modules/usb/run/../../../dev/usb/wlan/if_run.c:2210
2210			run_drain_fifo(sc);
(kgdb) l
2205			 * (when h/w is sending beacons).
2206			 */
2207			RUN_LOCK(sc);
2208			run_reset_livelock(sc);
2209			/* just in case, there are some stats to drain */
2210			run_drain_fifo(sc);
2211			RUN_UNLOCK(sc);
2212			ieee80211_iterate_nodes(&ic->ic_sta, run_iter_func, sc);
2213		}
2214	
(kgdb) l
2215		if(sc->ratectl_run != RUN_RATECTL_OFF)
2216			usb_callout_reset(&sc->ratectl_ch, hz, run_ratectl_to, sc);
2217	}
2218	
2219	static void
2220	run_drain_fifo(void *arg)
2221	{
2222		struct run_softc *sc = arg;
2223		struct ifnet *ifp = sc->sc_ifp;
2224		struct ieee80211_node *ni = sc->sc_ni[0];	/* make compiler happy */
(kgdb) 
2225		uint32_t stat;
2226		int retrycnt = 0;
2227		uint8_t wcid, mcs, pid;
2228	
2229		RUN_LOCK_ASSERT(sc, MA_OWNED);
2230	
2231		for (;;) {
2232			/* drain Tx status FIFO (maxsize = 16) */
2233			run_read(sc, RT2860_TX_STAT_FIFO, &stat);
2234			DPRINTFN(4, "tx stat 0x%08x\n", stat);
(kgdb) p sc->fifo_cnt
$8 = 1 '\001'
(kgdb) l
2235			if (!(stat & RT2860_TXQ_VLD))
2236				break;
2237	
2238			wcid = (stat >> RT2860_TXQ_WCID_SHIFT) & 0xff;
2239	
2240			/* if no ACK was requested, no feedback is available */
2241			if (!(stat & RT2860_TXQ_ACKREQ) || wcid > RT2870_WCID_MAX ||
2242			    wcid == 0)
2243				continue;
2244	
(kgdb) l
2245			ni = sc->sc_ni[wcid];
2246			if (ni->ni_rctls == NULL)
2247				continue;
2248	
2249			/* update per-STA AMRR stats */
2250			if (stat & RT2860_TXQ_OK) {
2251				/*
2252				 * Check if there were retries, ie if the Tx
2253				 * success rate is different from the requested
2254				 * rate. Note that it works only because we do
(kgdb) p vap->iv_opmode
Variable "vap" is not available.
(kgdb) p ic->ic_vaps
$9 = {tqh_first = 0xffffff0007800000, tqh_last = 0xffffff0007800048}
(kgdb) p ic->ic_vaps->tqh_first
$10 = (struct ieee80211vap *) 0xffffff0007800000
(kgdb) p ic->ic_vaps->tqh_first->iv_opmode
$11 = IEEE80211_M_HOSTAP
(kgdb) p ic->ic_vaps->tqh_last->iv_opmode
Cannot access memory at address 0x2f0
(kgdb) p ic->ic_vaps->tqh_last
$12 = (struct ieee80211vap **) 0xffffff0007800048
(kgdb) p *ic->ic_vaps->tqh_last
$13 = (struct ieee80211vap *) 0x0
(kgdb) q

Script done on Tue Jan  4 09:23:48 2011
>How-To-Repeat:
	Setup if_run(4) in hostap mode, wait a few weeks...
	(I only have one smartphone using the wifi, maybe if
	you have a bigger network it'll happen more often?)

>Fix:

	I don't really know the wifi code so the following patch
	is likely not the `proper' fix (and it also still has
	diagnostic code that shouldn't be committed as is), but at
	least it fixed the panic for me, I just finally got the

		run0: drain_fifo ni=NULL wcid=1

	message I added for the condition that previously caused
	the panic, and the nic kept working.  (The panic happened
	when sc->sc_ni[wcid] was accessed by run_drain_fifo() after
	it had been free'd, so I hooked into ic->ic_node_cleanup
	to set it to NULL before it gets free'd and added a check
	for NULL with the above message to run_drain_fifo().)

Index: src/sys/dev/usb/wlan/if_run.c
===================================================================
RCS file: /home/scvs/src/sys/dev/usb/wlan/if_run.c,v
retrieving revision 1.17
diff -u -p -r1.17 if_run.c
--- src/sys/dev/usb/wlan/if_run.c	6 Nov 2010 18:17:20 -0000	1.17
+++ src/sys/dev/usb/wlan/if_run.c	7 Jan 2011 00:58:35 -0000
@@ -341,6 +341,7 @@ static const char *run_get_rf(int);
 static int	run_read_eeprom(struct run_softc *);
 static struct ieee80211_node *run_node_alloc(struct ieee80211vap *,
 			    const uint8_t mac[IEEE80211_ADDR_LEN]);
+static void	run_node_cleanup(struct ieee80211_node *ni);
 static int	run_media_change(struct ifnet *);
 static int	run_newstate(struct ieee80211vap *, enum ieee80211_state, int);
 static int	run_wme_update(struct ieee80211com *);
@@ -673,6 +674,8 @@ run_attach(device_t self)
 	ic->ic_scan_end = run_scan_end;
 	ic->ic_set_channel = run_set_channel;
 	ic->ic_node_alloc = run_node_alloc;
+	sc->sc_node_cleanup = ic->ic_node_cleanup;
+	ic->ic_node_cleanup = run_node_cleanup;
 	ic->ic_newassoc = run_newassoc;
 	//ic->ic_updateslot = run_updateslot;
 	ic->ic_update_mcast = run_update_mcast;
@@ -2243,7 +2246,14 @@ run_drain_fifo(void *arg)
 			continue;
 
 		ni = sc->sc_ni[wcid];
-		if (ni->ni_rctls == NULL)
+#if 1
+		static struct ieee80211_node *lastni;
+		if (ni == NULL && lastni)
+			device_printf(sc->sc_dev, "drain_fifo ni=NULL wcid=%d\n",
+				wcid);
+		lastni = ni;
+#endif
+		if (ni == NULL || ni->ni_rctls == NULL)
 			continue;
 
 		/* update per-STA AMRR stats */
@@ -2373,10 +2383,12 @@ run_newassoc(struct ieee80211_node *ni, 
 		ieee80211_runtask(ic, &sc->cmdq_task);
 	}
 
-	DPRINTF("new assoc isnew=%d associd=%x addr=%s\n",
-	    isnew, ni->ni_associd, ether_sprintf(ni->ni_macaddr));
+	//DPRINTF("new assoc isnew=%d associd=%x addr=%s\n",
+	device_printf(sc->sc_dev, "new assoc isnew=%d associd=%x addr=%s ni=%p\n",
+	    isnew, ni->ni_associd, ether_sprintf(ni->ni_macaddr), ni);
 
 	ieee80211_ratectl_node_init(ni);
+	rn->wcid = wcid;
 	sc->sc_ni[wcid] = ni;
 
 	for (i = 0; i < rs->rs_nrates; i++) {
@@ -2412,6 +2424,39 @@ run_newassoc(struct ieee80211_node *ni, 
 	usb_callout_reset(&sc->ratectl_ch, hz, run_ratectl_to, sc);
 }
 
+static void
+run_node_cleanup(struct ieee80211_node *ni)
+{
+	struct run_node *rn = (void *)ni;
+	struct ieee80211vap *vap = ni->ni_vap;
+	struct ieee80211com *ic = vap->iv_ic;
+	struct run_softc *sc = ic->ic_ifp->if_softc;
+	uint8_t wcid = RUN_AID2WCID(ni->ni_associd);
+
+	if (wcid == 0)
+		wcid = rn->wcid;
+	if (wcid > RT2870_WCID_MAX) {
+		device_printf(sc->sc_dev, "wcid=%d out of range\n", wcid);
+		sc->sc_node_cleanup(ni);
+		return;
+	}
+
+	//DPRINTF("node_cleanup wcid=%d addr=%s\n",
+	device_printf(sc->sc_dev, "node_cleanup wcid=%d addr=%s ni=%p\n",
+	    wcid, ether_sprintf(vap->iv_opmode == IEEE80211_M_STA ?
+		    vap->iv_myaddr : ni->ni_macaddr), ni);
+
+	if (wcid > 0 && sc->sc_ni[wcid]) {
+		if (sc->sc_ni[wcid] != ni) {
+			device_printf(sc->sc_dev, "node_cleanup sc->sc_ni[wcid] %p != ni\n",
+				sc->sc_ni[wcid]);
+		} else {
+			sc->sc_ni[wcid] = NULL;
+		}
+	}
+	sc->sc_node_cleanup(ni);
+}
+
 /*
  * Return the Rx chain with the highest RSSI for a given frame.
  */
Index: src/sys/dev/usb/wlan/if_runvar.h
===================================================================
RCS file: /home/scvs/src/sys/dev/usb/wlan/if_runvar.h,v
retrieving revision 1.6
diff -u -p -r1.6 if_runvar.h
--- src/sys/dev/usb/wlan/if_runvar.h	14 Jun 2010 00:40:23 -0000	1.6
+++ src/sys/dev/usb/wlan/if_runvar.h	4 Jan 2011 08:48:13 -0000
@@ -106,6 +106,7 @@ struct run_node {
 	uint8_t			amrr_ridx;
 	uint8_t			mgt_ridx;
 	uint8_t			fix_ridx;
+	uint8_t			wcid;
 };
 
 struct run_cmdq {
@@ -164,6 +165,8 @@ struct run_softc {
 	int				(*sc_srom_read)(struct run_softc *,
 					    uint16_t, uint16_t *);
 
+	void 				(*sc_node_cleanup)(struct ieee80211_node *);
+
 	uint16_t			mac_ver;
 	uint16_t			mac_rev;
 	uint8_t				rf_rev;
>Release-Note:
>Audit-Trail:
>Unformatted:


More information about the freebsd-bugs mailing list