svn commit: r184664 - user/kmacy/HEAD_multi_tx/sys/netinet

Kip Macy kmacy at FreeBSD.org
Tue Nov 4 15:06:36 PST 2008


Author: kmacy
Date: Tue Nov  4 23:06:35 2008
New Revision: 184664
URL: http://svn.freebsd.org/changeset/base/184664

Log:
  Commit rwatson's tcp_input locking changes
  
  http://www.watson.org/~robert/freebsd/netperf/20081104-tcp_input-rlock.diff

Modified:
  user/kmacy/HEAD_multi_tx/sys/netinet/tcp_input.c

Modified: user/kmacy/HEAD_multi_tx/sys/netinet/tcp_input.c
==============================================================================
--- user/kmacy/HEAD_multi_tx/sys/netinet/tcp_input.c	Tue Nov  4 23:03:36 2008	(r184663)
+++ user/kmacy/HEAD_multi_tx/sys/netinet/tcp_input.c	Tue Nov  4 23:06:35 2008	(r184664)
@@ -158,6 +158,31 @@ SYSCTL_V_INT(V_NET, vnet_inet, _net_inet
     CTLFLAG_RW, tcp_autorcvbuf_max, 0,
     "Max size of automatic receive buffer");
 
+int	tcp_read_locking = 0;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, read_locking, CTLFLAG_RW,
+    &tcp_read_locking, 0, "Enable read locking strategy");
+
+int	tcp_rlock_atfirst;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, rlock_atfirst, CTLFLAG_RD,
+    &tcp_rlock_atfirst, 0, "");
+
+int	tcp_wlock_atfirst;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, tcp_wlock_atfirst, CTLFLAG_RD,
+    &tcp_wlock_atfirst, 0, "");
+
+int	tcp_rlock_downgraded;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, rlock_downgraded, CTLFLAG_RD,
+    &tcp_rlock_downgraded, 0, "");
+
+int	tcp_wlock_upgraded;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, wlock_upgraded, CTLFLAG_RD,
+    &tcp_wlock_upgraded, 0, "");
+
+int	tcp_wlock_looped;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, wlock_looped, CTLFLAG_RD,
+    &tcp_wlock_looped, 0, "");
+
+
 struct inpcbhead tcb;
 #define	tcb6	tcb  /* for KAME src sync over BSD*'s */
 struct inpcbinfo tcbinfo;
@@ -288,6 +313,10 @@ tcp_input(struct mbuf *m, int off0)
 #endif
 	struct tcpopt to;		/* options in this segment */
 	char *s = NULL;			/* address and port logging */
+	int ti_locked;
+#define	TI_UNLOCKED	1
+#define	TI_RLOCKED	2
+#define	TI_WLOCKED	3
 
 #ifdef TCPDEBUG
 	/*
@@ -440,11 +469,40 @@ tcp_input(struct mbuf *m, int off0)
 	drop_hdrlen = off0 + off;
 
 	/*
-	 * Locate pcb for segment.
-	 */
-	INP_INFO_WLOCK(&V_tcbinfo);
+	 * Locate pcb for segment, which requires a lock on tcbinfo.
+	 *
+	 * If any TCP header flags are present that suggest a global state
+	 * change may arise when processing the packet, we preemptively
+	 * acquire a write lock, and otherwise a read lock.  However, we may
+	 * be wrong and also find that we can't upgrade from a read lock to
+	 * a write lock.  When this happens, we drop and re-acquire the lock
+	 * as a write lock and loopback to findpcb.
+	 */
+	if ((thflags & (TH_SYN | TH_FIN | TH_RST)) != 0 ||
+	    tcp_read_locking == 0) {
+		INP_INFO_WLOCK(&V_tcbinfo);
+		ti_locked = TI_WLOCKED;
+		tcp_wlock_atfirst++;
+	} else {
+		INP_INFO_RLOCK(&V_tcbinfo);
+		ti_locked = TI_RLOCKED;
+		tcp_rlock_atfirst++;
+	}
+
 findpcb:
-	INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+	switch (ti_locked) {
+	case TI_RLOCKED:
+		INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
+		break;
+
+	case TI_WLOCKED:
+		INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+		break;
+	default:
+		panic("tcp_input: findpcb ti_locked %d\n", ti_locked);
+		break;
+	}
+
 #ifdef IPFIREWALL_FORWARD
 	/*
 	 * Grab info from PACKET_TAG_IPFORWARD tag prepended to the chain.
@@ -555,10 +613,40 @@ findpcb:
 	 * stray or duplicate segments arriving late.  If this segment
 	 * was a legitimate new connection attempt the old INPCB gets
 	 * removed and we can try again to find a listening socket.
+	 *
+	 * Timewait processing currently always requires a write lock, so if
+	 * we don't have one, acquire one.
+	 *
+	 * XXXRW: This duplicates logic below, and perhaps shouldn't.
 	 */
 	if (inp->inp_vflag & INP_TIMEWAIT) {
+		switch (ti_locked) {
+		case TI_RLOCKED:
+			if (rw_try_upgrade(&tcbinfo.ipi_lock) == 0) {
+				INP_WUNLOCK(inp);
+				INP_INFO_RUNLOCK(&tcbinfo);
+				INP_INFO_WLOCK(&tcbinfo);
+				ti_locked = TI_WLOCKED;
+				tcp_wlock_looped++;
+				goto findpcb;
+			}
+			ti_locked = TI_WLOCKED;
+			tcp_wlock_upgraded++;
+			break;
+
+		case TI_WLOCKED:
+			break;
+
+		default:
+			panic("tcp_input: INP_TIMEWAIT ti_locked %d",
+			    ti_locked);
+		}
+
+		INP_INFO_WLOCK_ASSERT(&tcbinfo);
+
 		if (thflags & TH_SYN)
 			tcp_dooptions(&to, optp, optlen, TO_SYN);
+
 		/*
 		 * NB: tcp_twcheck unlocks the INP and frees the mbuf.
 		 */
@@ -567,6 +655,7 @@ findpcb:
 		INP_INFO_WUNLOCK(&V_tcbinfo);
 		return;
 	}
+
 	/*
 	 * The TCPCB may no longer exist if the connection is winding
 	 * down or it is in the CLOSED state.  Either way we drop the
@@ -578,6 +667,67 @@ findpcb:
 		goto dropwithreset;
 	}
 
+	/*
+	 * Now that we've found an inpcb, we get to find out whether or not
+	 * we needed a write lock.  The rule we use once we have a
+	 * connection is that the packet doesn't have a SYN/FIN/RST flag set,
+	 * and that it's in the established state.  This is somewhat
+	 * conservative, but by the time we know for sure, we can no longer
+	 * restart processing.
+	 *
+	 * The inpcb lock is required in order to check the connection state,
+	 * but since it follows the pcbinfo lock, we have to try the upgrade
+	 * and, if that fails, handle failure.  Currently we do this by
+	 * dropping all locks, re-acquiring the pcbinfo lock with a write
+	 * lock, and looking up the inpcb again.  This is very expensive and
+	 * the trade-off here might not be right.
+	 */
+	if (tp->t_state != TCPS_ESTABLISHED ||
+	    (thflags & (TH_SYN | TH_FIN | TH_RST)) != 0 ||
+	    tcp_read_locking == 0) {
+		switch (ti_locked) {
+		case TI_RLOCKED:
+			if (rw_try_upgrade(&tcbinfo.ipi_lock) == 0) {
+				INP_WUNLOCK(inp);
+				INP_INFO_RUNLOCK(&tcbinfo);
+				INP_INFO_WLOCK(&tcbinfo);
+				ti_locked = TI_WLOCKED;
+				tcp_wlock_looped++;
+				goto findpcb;
+			}
+			ti_locked = TI_WLOCKED;
+			tcp_wlock_upgraded++;
+			break;
+
+		case TI_WLOCKED:
+			break;
+
+		default:
+			panic("tcp_input: upgrade check ti_locked %d",
+			    ti_locked);
+		}
+		INP_INFO_WLOCK_ASSERT(&tcbinfo);
+	} else {
+		/*
+		 * It's possible that because of looping, we may have
+		 * conservatively acquired a write lock.  If so, downgrade.
+		 */
+		switch (ti_locked) {
+		case TI_RLOCKED:
+			break;
+
+		case TI_WLOCKED:
+			tcp_rlock_downgraded++;
+			rw_downgrade(&tcbinfo.ipi_lock);
+			ti_locked = TI_RLOCKED;
+			break;
+
+		default:
+			panic("tcp_input: downgrade check ti_locked %d",
+			    ti_locked);
+		}
+	}
+
 #ifdef MAC
 	INP_WLOCK_ASSERT(inp);
 	if (mac_inpcb_check_deliver(inp, m))
@@ -895,8 +1045,20 @@ findpcb:
 	return;
 
 dropwithreset:
-	INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
-	INP_INFO_WUNLOCK(&V_tcbinfo);
+	switch (ti_locked) {
+	case TI_RLOCKED:
+		INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
+		INP_INFO_RUNLOCK(&V_tcbinfo);
+		break;
+
+	case TI_WLOCKED:
+		INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+		INP_INFO_WUNLOCK(&V_tcbinfo);
+		break;
+
+	default:
+		panic("tcp_input: dropwithreset ti_locked %d", ti_locked);
+	}
 
 	if (inp != NULL) {
 		tcp_dropwithreset(m, th, tp, tlen, rstreason);
@@ -907,10 +1069,22 @@ dropwithreset:
 	goto drop;
 
 dropunlock:
-	INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+	switch (ti_locked) {
+	case TI_RLOCKED:
+		INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
+		INP_INFO_RUNLOCK(&V_tcbinfo);
+		break;
+
+	case TI_WLOCKED:
+		INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+		INP_INFO_WUNLOCK(&V_tcbinfo);
+		break;
+
+	default:
+		panic("tcp_input: dropunlock ti_locked %d", ti_locked);
+	}
 	if (inp != NULL)
 		INP_WUNLOCK(inp);
-	INP_INFO_WUNLOCK(&V_tcbinfo);
 
 drop:
 	INP_INFO_UNLOCK_ASSERT(&V_tcbinfo);
@@ -926,10 +1100,10 @@ tcp_do_segment(struct mbuf *m, struct tc
 {
 	INIT_VNET_INET(tp->t_vnet);
 	int thflags, acked, ourfinisacked, needoutput = 0;
-	int headlocked = 1;
 	int rstreason, todrop, win;
 	u_long tiwin;
 	struct tcpopt to;
+	int ti_locked;
 
 #ifdef TCPDEBUG
 	/*
@@ -942,7 +1116,35 @@ tcp_do_segment(struct mbuf *m, struct tc
 #endif
 	thflags = th->th_flags;
 
-	INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+	/*
+	 * If this is either a state-changing packet or current state isn't
+	 * established, we require a write lock on tcbinfo.  Otherwise, we
+	 * allow either a read lock or a write lock, as we may have acquired
+	 * a write lock due to a race.
+	 *
+	 * XXXRW: if tcp_read_locking changes during processing, then we may
+	 * reach here with a read lock even though we now want a write one,
+	 * or a write lock where we might want a read one.  We tolerate this
+	 * by checking tcp_read_locking only in the case where we might get a
+	 * read lock by accepting a write lock.  We still need to set
+	 * ti_locked so do that using rw_wowned().  This will go away once we
+	 * remove the tcp_read_locking frob.
+	 */
+	if ((thflags & (TH_SYN | TH_FIN | TH_RST)) != 0 ||
+	    tp->t_state != TCPS_ESTABLISHED) {
+		INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+		ti_locked = TI_WLOCKED;
+	} else {
+#if 0
+		INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
+		ti_locked = TI_RLOCKED;
+#endif
+		if (rw_wowned(&V_tcbinfo.ipi_lock))
+			ti_locked = TI_WLOCKED;
+		else
+			ti_locked = TI_RLOCKED;
+
+	}
 	INP_WLOCK_ASSERT(tp->t_inpcb);
 	KASSERT(tp->t_state > TCPS_LISTEN, ("%s: TCPS_LISTEN",
 	    __func__));
@@ -1073,6 +1275,20 @@ tcp_do_segment(struct mbuf *m, struct tc
 	     TSTMP_GEQ(to.to_tsval, tp->ts_recent)) ) {
 
 		/*
+		 * By definition, the header-predicted case won't transition
+		 * states, so we shouldn't hold a global write lock here.
+		 *
+		 * XXXRW: Due to tcp_read_locking, temporarily disable
+		 * assertion.
+		 */
+#if 0
+		INP_INFO_RLOCK_ASSERT(&tcbinfo);
+		KASSERT(ti_locked == TI_RLOCKED,
+		    ("tcp_do_segment: header prediction ti_locked %d",
+		    ti_locked));
+#endif
+
+		/*
 		 * If last ACK falls within this segment's sequence numbers,
 		 * record the timestamp.
 		 * NOTE that the test is modified according to the latest
@@ -1096,14 +1312,19 @@ tcp_do_segment(struct mbuf *m, struct tc
 			      !IN_FASTRECOVERY(tp) &&
 			      (to.to_flags & TOF_SACK) == 0 &&
 			      TAILQ_EMPTY(&tp->snd_holes)))) {
-				KASSERT(headlocked,
-				    ("%s: headlocked", __func__));
-				INP_INFO_WUNLOCK(&V_tcbinfo);
-				headlocked = 0;
 				/*
 				 * This is a pure ack for outstanding data.
+				 *
+				 * XXXRW: temporarily tolerate a write lock
+				 * here.
 				 */
+				if (ti_locked == TI_RLOCKED)
+					INP_INFO_RUNLOCK(&V_tcbinfo);
+				else
+					INP_INFO_WUNLOCK(&V_tcbinfo);
+				ti_locked = TI_UNLOCKED;
 				++V_tcpstat.tcps_predack;
+
 				/*
 				 * "bad retransmit" recovery.
 				 */
@@ -1190,14 +1411,19 @@ tcp_do_segment(struct mbuf *m, struct tc
 		    tlen <= sbspace(&so->so_rcv)) {
 			int newsize = 0;	/* automatic sockbuf scaling */
 
-			KASSERT(headlocked, ("%s: headlocked", __func__));
-			INP_INFO_WUNLOCK(&V_tcbinfo);
-			headlocked = 0;
 			/*
 			 * This is a pure, in-sequence data packet
 			 * with nothing on the reassembly queue and
 			 * we have enough buffer space to take it.
+			 *
+			 * XXXRW: Temporarily tolerate write lock here.
 			 */
+			if (ti_locked == TI_RLOCKED)
+				INP_INFO_RUNLOCK(&tcbinfo);
+			else
+				INP_INFO_WUNLOCK(&tcbinfo);
+			ti_locked = TI_UNLOCKED;
+
 			/* Clean receiver SACK report if present */
 			if ((tp->t_flags & TF_SACK_PERMIT) && tp->rcv_numsacks)
 				tcp_clean_sackreport(tp);
@@ -1424,8 +1650,9 @@ tcp_do_segment(struct mbuf *m, struct tc
 			tp->t_state = TCPS_SYN_RECEIVED;
 		}
 
-		KASSERT(headlocked, ("%s: trimthenstep6: head not locked",
-		    __func__));
+		INP_INFO_WLOCK_ASSERT(&tcbinfo);
+		KASSERT(ti_locked == TI_WLOCKED,
+		    ("trimthenstep6: ti_locked %d", ti_locked));
 		INP_WLOCK_ASSERT(tp->t_inpcb);
 
 		/*
@@ -1553,17 +1780,23 @@ tcp_do_segment(struct mbuf *m, struct tc
 			case TCPS_CLOSE_WAIT:
 				so->so_error = ECONNRESET;
 			close:
+				KASSERT(ti_locked == TI_WLOCKED,
+				    ("tcp_do_segment: TH_RST 1 ti_locked %d",
+				    ti_locked));
+				INP_INFO_WLOCK_ASSERT(&tcbinfo);
+
 				tp->t_state = TCPS_CLOSED;
 				V_tcpstat.tcps_drops++;
-				KASSERT(headlocked, ("%s: trimthenstep6: "
-				    "tcp_close: head not locked", __func__));
 				tp = tcp_close(tp);
 				break;
 
 			case TCPS_CLOSING:
 			case TCPS_LAST_ACK:
-				KASSERT(headlocked, ("%s: trimthenstep6: "
-				    "tcp_close.2: head not locked", __func__));
+				KASSERT(ti_locked == TI_WLOCKED,
+				    ("tcp_do_segment: TH_RST 2 ti_locked %d",
+				    ti_locked));
+				INP_INFO_WLOCK_ASSERT(&tcbinfo);
+
 				tp = tcp_close(tp);
 				break;
 			}
@@ -1668,8 +1901,11 @@ tcp_do_segment(struct mbuf *m, struct tc
 	    tp->t_state > TCPS_CLOSE_WAIT && tlen) {
 		char *s;
 
-		KASSERT(headlocked, ("%s: trimthenstep6: tcp_close.3: head "
-		    "not locked", __func__));
+		KASSERT(ti_locked == TI_WLOCKED, ("tcp_do_segment: "
+		    "SS_NOFDEREF && CLOSE_WAIT && tlen ti_locked %d",
+		    ti_locked));
+		INP_INFO_WLOCK_ASSERT(&tcbinfo);
+
 		if ((s = tcp_log_addrs(&tp->t_inpcb->inp_inc, th, NULL, NULL))) {
 			log(LOG_DEBUG, "%s; %s: %s: Received %d bytes of data after socket "
 			    "was closed, sending RST and removing tcpcb\n",
@@ -1741,8 +1977,10 @@ tcp_do_segment(struct mbuf *m, struct tc
 	 * error and we send an RST and drop the connection.
 	 */
 	if (thflags & TH_SYN) {
-		KASSERT(headlocked, ("%s: tcp_drop: trimthenstep6: "
-		    "head not locked", __func__));
+		KASSERT(ti_locked == TI_WLOCKED,
+		    ("tcp_do_segment: TH_SYN ti_locked %d", ti_locked));
+		INP_INFO_WLOCK_ASSERT(&tcbinfo);
+
 		tp = tcp_drop(tp, ECONNRESET);
 		rstreason = BANDLIM_UNLIMITED;
 		goto drop;
@@ -2029,8 +2267,9 @@ tcp_do_segment(struct mbuf *m, struct tc
 		}
 
 process_ACK:
-		KASSERT(headlocked, ("%s: process_ACK: head not locked",
-		    __func__));
+		INP_INFO_LOCK_ASSERT(&tcbinfo);
+		KASSERT(ti_locked == TI_RLOCKED || ti_locked == TI_WLOCKED,
+		    ("tcp_input: process_ACK ti_locked %d", ti_locked));
 		INP_WLOCK_ASSERT(tp->t_inpcb);
 
 		acked = th->th_ack - tp->snd_una;
@@ -2187,11 +2426,9 @@ process_ACK:
 		 */
 		case TCPS_CLOSING:
 			if (ourfinisacked) {
-				KASSERT(headlocked, ("%s: process_ACK: "
-				    "head not locked", __func__));
+				INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
 				tcp_twstart(tp);
 				INP_INFO_WUNLOCK(&V_tcbinfo);
-				headlocked = 0;
 				m_freem(m);
 				return;
 			}
@@ -2205,8 +2442,7 @@ process_ACK:
 		 */
 		case TCPS_LAST_ACK:
 			if (ourfinisacked) {
-				KASSERT(headlocked, ("%s: process_ACK: "
-				    "tcp_close: head not locked", __func__));
+				INP_INFO_WLOCK_ASSERT(&tcbinfo);
 				tp = tcp_close(tp);
 				goto drop;
 			}
@@ -2215,7 +2451,9 @@ process_ACK:
 	}
 
 step6:
-	KASSERT(headlocked, ("%s: step6: head not locked", __func__));
+	INP_INFO_LOCK_ASSERT(&tcbinfo);
+	KASSERT(ti_locked == TI_RLOCKED || ti_locked == TI_WLOCKED,
+	    ("tcp_do_segment: step6 ti_locked %d", ti_locked));
 	INP_WLOCK_ASSERT(tp->t_inpcb);
 
 	/*
@@ -2301,7 +2539,9 @@ step6:
 			tp->rcv_up = tp->rcv_nxt;
 	}
 dodata:							/* XXX */
-	KASSERT(headlocked, ("%s: dodata: head not locked", __func__));
+	INP_INFO_LOCK_ASSERT(&tcbinfo);
+	KASSERT(ti_locked == TI_RLOCKED || ti_locked == TI_WLOCKED,
+	    ("tcp_do_segment: dodata ti_locked %d", ti_locked));
 	INP_WLOCK_ASSERT(tp->t_inpcb);
 
 	/*
@@ -2420,15 +2660,32 @@ dodata:							/* XXX */
 		 * standard timers.
 		 */
 		case TCPS_FIN_WAIT_2:
-			KASSERT(headlocked == 1, ("%s: dodata: "
-			    "TCP_FIN_WAIT_2: head not locked", __func__));
+			INP_INFO_WLOCK_ASSERT(&tcbinfo);
+			KASSERT(ti_locked == TI_WLOCKED, ("tcp_do_segment: "
+			    "dodata TCP_FIN_WAIT_2 ti_locked: %d",
+			    ti_locked));
+
 			tcp_twstart(tp);
 			INP_INFO_WUNLOCK(&V_tcbinfo);
 			return;
 		}
 	}
-	INP_INFO_WUNLOCK(&V_tcbinfo);
-	headlocked = 0;
+	switch (ti_locked) {
+	case TI_RLOCKED:
+		INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
+		INP_INFO_RUNLOCK(&V_tcbinfo);
+		break;
+
+	case TI_WLOCKED:
+		INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+		INP_INFO_WUNLOCK(&V_tcbinfo);
+		break;
+
+	default:
+		panic("tco_do_setment: dodata epilogue ti_locked %d",
+		    ti_locked);
+	}
+	ti_locked = TI_UNLOCKED;
 #ifdef TCPDEBUG
 	if (so->so_options & SO_DEBUG)
 		tcp_trace(TA_INPUT, ostate, tp, (void *)tcp_saveipgen,
@@ -2442,10 +2699,11 @@ dodata:							/* XXX */
 		(void) tcp_output(tp);
 
 check_delack:
-	KASSERT(headlocked == 0, ("%s: check_delack: head locked",
-	    __func__));
+	KASSERT(ti_locked == TI_UNLOCKED, ("tcp_do_segment: check_delack "
+	    "ti_locked %d", ti_locked));
 	INP_INFO_UNLOCK_ASSERT(&V_tcbinfo);
 	INP_WLOCK_ASSERT(tp->t_inpcb);
+
 	if (tp->t_flags & TF_DELACK) {
 		tp->t_flags &= ~TF_DELACK;
 		tcp_timer_activate(tp, TT_DELACK, tcp_delacktime);
@@ -2454,7 +2712,9 @@ check_delack:
 	return;
 
 dropafterack:
-	KASSERT(headlocked, ("%s: dropafterack: head not locked", __func__));
+	KASSERT(ti_locked == TI_RLOCKED || ti_locked == TI_WLOCKED,
+	    ("tcp_do_segment: dropafterack ti_locked %d", ti_locked));
+
 	/*
 	 * Generate an ACK dropping incoming segment if it occupies
 	 * sequence space, where the ACK reflects our state.
@@ -2481,8 +2741,22 @@ dropafterack:
 		tcp_trace(TA_DROP, ostate, tp, (void *)tcp_saveipgen,
 			  &tcp_savetcp, 0);
 #endif
-	KASSERT(headlocked, ("%s: headlocked should be 1", __func__));
-	INP_INFO_WUNLOCK(&V_tcbinfo);
+	switch (ti_locked) {
+	case TI_RLOCKED:
+		INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
+		INP_INFO_RUNLOCK(&V_tcbinfo);
+		break;
+
+	case TI_WLOCKED:
+		INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+		INP_INFO_WUNLOCK(&V_tcbinfo);
+		break;
+
+	default:
+		panic("tcp_do_sgement: dropafterack epilogue ti_locked %d",
+		    ti_locked);
+	}
+	ti_locked = TI_UNLOCKED;
 	tp->t_flags |= TF_ACKNOW;
 	(void) tcp_output(tp);
 	INP_WUNLOCK(tp->t_inpcb);
@@ -2490,8 +2764,21 @@ dropafterack:
 	return;
 
 dropwithreset:
-	KASSERT(headlocked, ("%s: dropwithreset: head not locked", __func__));
-	INP_INFO_WUNLOCK(&V_tcbinfo);
+	switch (ti_locked) {
+	case TI_RLOCKED:
+		INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
+		INP_INFO_RUNLOCK(&V_tcbinfo);
+		break;
+
+	case TI_WLOCKED:
+		INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+		INP_INFO_WUNLOCK(&V_tcbinfo);
+		break;
+
+	default:
+		panic("tcp_do_segment: dropwithreset ti_locked %d",
+		    ti_locked);
+	}
 
 	if (tp != NULL) {
 		tcp_dropwithreset(m, th, tp, tlen, rstreason);
@@ -2501,6 +2788,22 @@ dropwithreset:
 	return;
 
 drop:
+	switch (ti_locked) {
+	case TI_RLOCKED:
+		INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
+		INP_INFO_RUNLOCK(&V_tcbinfo);
+		break;
+
+	case TI_WLOCKED:
+		INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+		INP_INFO_WUNLOCK(&V_tcbinfo);
+		break;
+
+	case TI_UNLOCKED:
+		INP_INFO_UNLOCK_ASSERT(&V_tcbinfo);
+		break;
+	}
+
 	/*
 	 * Drop space held by incoming segment and return.
 	 */
@@ -2511,8 +2814,6 @@ drop:
 #endif
 	if (tp != NULL)
 		INP_WUNLOCK(tp->t_inpcb);
-	if (headlocked)
-		INP_INFO_WUNLOCK(&V_tcbinfo);
 	m_freem(m);
 }
 
@@ -2683,7 +2984,7 @@ tcp_pulloutofband(struct socket *so, str
 			char *cp = mtod(m, caddr_t) + cnt;
 			struct tcpcb *tp = sototcpcb(so);
 
-			INP_WLOCK_ASSERT(tp->t_inpcb);
+			INP_LOCK_ASSERT(tp->t_inpcb);
 
 			tp->t_iobc = *cp;
 			tp->t_oobflags |= TCPOOB_HAVEDATA;
@@ -2830,7 +3131,7 @@ tcp_mss_update(struct tcpcb *tp, int off
 	const size_t min_protoh = sizeof(struct tcpiphdr);
 #endif
 
-	INP_WLOCK_ASSERT(tp->t_inpcb);
+	INP_LOCK_ASSERT(tp->t_inpcb);
 
 	/* Initialize. */
 #ifdef INET6
@@ -3144,7 +3445,7 @@ tcp_newreno_partial_ack(struct tcpcb *tp
 	tcp_seq onxt = tp->snd_nxt;
 	u_long  ocwnd = tp->snd_cwnd;
 
-	INP_WLOCK_ASSERT(tp->t_inpcb);
+	INP_LOCK_ASSERT(tp->t_inpcb);
 
 	tcp_timer_activate(tp, TT_REXMT, 0);
 	tp->t_rtttime = 0;


More information about the svn-src-user mailing list