PERFORCE change 128974 for review

Kip Macy kmacy at FreeBSD.org
Sun Nov 11 22:29:34 PST 2007


http://perforce.freebsd.org/chv.cgi?CH=128974

Change 128974 by kmacy at kmacy:storage:toestack on 2007/11/12 06:28:34

	- fix passive establish handling by initializing tp_ulp
	- fix race condition in t3_push_frames by adding locking
	- lock calls to tcp_close
	- change remaining cases where ctx was still being treated as a socket
	- update notes

Affected files ...

.. //depot/projects/toestack/sys/dev/cxgb/ulp/tom/cxgb_cpl_io.c#21 edit
.. //depot/projects/toestack/sys/dev/cxgb/ulp/tom/cxgb_listen.c#5 edit
.. //depot/projects/toestack/sys/dev/cxgb/ulp/tom/notes#4 edit

Differences ...

==== //depot/projects/toestack/sys/dev/cxgb/ulp/tom/cxgb_cpl_io.c#21 (text+ko) ====

@@ -139,12 +139,25 @@
 
 static void t3_send_reset(struct socket *so);
 
-static inline
-int is_t3a(const struct toedev *dev)
+static inline int
+is_t3a(const struct toedev *dev)
 {
 	return (dev->ttid == TOE_ID_CHELSIO_T3);
 }
 
+static void
+dump_toepcb(struct toepcb *toep)
+{
+	printf("qset_idx=%d qset=%d ulp_mode=%d mtu_idx=%d tid=%d\n",
+	    toep->tp_qset_idx, toep->tp_qset, toep->tp_ulp_mode,
+	    toep->tp_mtu_idx, toep->tp_tid);
+
+	printf("wr_max=%d wr_avail=%d wr_unacked=%d mss_clamp=%d flags=0x%x\n",
+	    toep->tp_wr_max, toep->tp_wr_avail, toep->tp_wr_unacked, 
+	    toep->tp_mss_clamp, toep->tp_flags);
+}
+
+
 /*
  * Determine whether to send a CPL message now or defer it.  A message is
  * deferred if the connection is in SYN_SENT since we don't know the TID yet.
@@ -234,27 +247,45 @@
 	bus_dma_segment_t segs[TX_MAX_SEGS], *segp;
 	segp = segs;
 
-	if (tp->t_state == TCPS_SYN_SENT || tp->t_state == TCPS_CLOSED)
+	if (tp->t_state == TCPS_SYN_SENT || tp->t_state == TCPS_CLOSED) {
+		printf("tcp state=%d\n", tp->t_state);	
 		return (0);
+	}	
 
-	if (so->so_state & (SS_ISDISCONNECTING|SS_ISDISCONNECTED))
+	if (so->so_state & (SS_ISDISCONNECTING|SS_ISDISCONNECTED)) {
+		printf("disconnecting\n");
+		
 		return (0);
+	}
 
+	SOCKBUF_LOCK(&so->so_snd);
+	
 	d = TOM_DATA(TOE_DEV(so));
 	cdev = d->cdev;
 	last = tail = so->so_snd.sb_sndptr ? so->so_snd.sb_sndptr : so->so_snd.sb_mb;
 	total_bytes = 0;
+	printf("tail=%p snd.cc=%d tp_last=%4\n", tail, so->so_snd.sb_cc,
+		toep->tp_m_last);
+
 	if (last && toep->tp_m_last == last) {
 		KASSERT(tail, ("sbdrop error"));
 		last = tail = tail->m_next;
 	}
 
+	if ((toep->tp_wr_avail == 0 ) || (tail == NULL)) {		
+		SOCKBUF_UNLOCK(&so->so_snd);
+		return (0);		
+	}
+			
 	while (toep->tp_wr_avail && (tail != NULL)) {
 
+		
 		count = bytes = 0;
-		if ((m0 = m_gethdr(M_NOWAIT, MT_DATA)) == NULL)
+		if ((m0 = m_gethdr(M_NOWAIT, MT_DATA)) == NULL) {
+			SOCKBUF_UNLOCK(&so->so_snd);
 			return (0);
-
+		}
+		
 		while ((mbuf_wrs[count + 1] <= toep->tp_wr_avail) && (tail != NULL) && (count < TX_MAX_SEGS)) {
 			bytes += tail->m_len;
 			count++;
@@ -269,6 +300,8 @@
 			segp++;
 			tail = tail->m_next;
 		}
+		printf("wr_avail=%d mbuf_wrs[%d]=%d tail=%p\n",
+		    toep->tp_wr_avail, count, mbuf_wrs[count], tail);	
 		if (tail) {
 			so->so_snd.sb_sndptr = tail;
 			toep->tp_m_last = NULL;
@@ -279,6 +312,8 @@
 		total_bytes += bytes;
 		toep->tp_write_seq += bytes;
 
+
+		SOCKBUF_UNLOCK(&so->so_snd);
 		
 		/*
 		 * XXX can drop socket buffer lock here
@@ -310,9 +345,11 @@
 		    bytes, count);
 		
 		l2t_send(cdev, m0, toep->tp_l2t);
+		if (toep->tp_wr_avail && (tail != NULL)) 
+			SOCKBUF_LOCK(&so->so_snd);
 	}
-	
-	
+
+	SOCKBUF_UNLOCK_ASSERT(&so->so_snd);
 	return (total_bytes);
 }
 
@@ -334,7 +371,7 @@
 	struct toepcb *toep = tp->t_toe;
 	unsigned int tid = toep->tp_tid;
 	
-	d = TOM_DATA(TOE_DEV(so));
+	d = TOM_DATA(toep->tp_toedev);
 
 	if (tp->t_state != TCPS_SYN_SENT)
 		t3_push_frames(so, 1);
@@ -494,9 +531,10 @@
 cxgb_toe_send(struct tcpcb *tp)
 {
 	struct socket *so;
+	
+	printf("cxgb_toe_send\n");
+	dump_toepcb(tp->t_toe);
 
-	printf("cxgb_toe_send\n");
-	
 	so = tp->t_inpcb->inp_socket;
 	t3_push_frames(so, 1);
 	return (0);
@@ -944,9 +982,13 @@
 calc_opt0l(struct socket *so, int ulp_mode)
 {
 	struct tcpcb *tp = sototcpcb(so);
+	unsigned int val;
 	
-	return V_TOS(SO_TOS(so)) | V_ULP_MODE(ulp_mode) |
+	val = V_TOS(SO_TOS(so)) | V_ULP_MODE(ulp_mode) |
 	       V_RCV_BUFSIZ(min(tp->rcv_wnd >> 10, (u32)M_RCV_BUFSIZ));
+
+	printf("opt0l tos=%08x rcv_wnd=%ld opt0l=%08x\n", SO_TOS(so), tp->rcv_wnd, val);
+	return (val);
 }
 
 static inline unsigned int
@@ -984,7 +1026,7 @@
 	req->peer_port = inp->inp_fport;
 	memcpy(&req->local_ip, &inp->inp_laddr, 4);
 	memcpy(&req->peer_ip, &inp->inp_faddr, 4);
-
+	printf("connect smt_idx=%d\n", e->smt_idx);
 	req->opt0h = htonl(calc_opt0h(so, toep->tp_mtu_idx) | V_L2T_IDX(e->idx) |
 			   V_TX_CHANNEL(e->smt_idx));
 	req->opt0l = htonl(calc_opt0l(so, toep->tp_ulp_mode));
@@ -1070,7 +1112,9 @@
 static int
 do_act_open_rpl(struct t3cdev *cdev, struct mbuf *m, void *ctx)
 {
-	struct socket *so = (struct socket *)ctx;
+	struct toepcb *toep = (struct toepcb *)ctx;
+	struct socket *so = toeptoso(toep);
+	
 #ifdef notyet	
 	struct cpl_act_open_rpl *rpl = cplhdr(m);
 
@@ -1468,9 +1512,13 @@
 		 * Otherwise we enter TIME_WAIT.
 		 */
 		t3_release_offload_resources(so);
-		if (toep->tp_flags & TP_ABORT_RPL_PENDING)
+		if (toep->tp_flags & TP_ABORT_RPL_PENDING) {
+			INP_INFO_WLOCK(&tcbinfo);
+			INP_LOCK(tp->t_inpcb);
 			tcp_close(tp);
-		else
+			INP_INFO_WUNLOCK(&tcbinfo);
+			INP_UNLOCK(tp->t_inpcb);					
+		} else
 			enter_timewait(so);
 		break;
 	default:
@@ -1502,7 +1550,8 @@
 static int
 do_peer_close(struct t3cdev *cdev, struct mbuf *m, void *ctx)
 {
-	struct socket *so = (struct socket *)ctx;
+	struct toepcb *toep = (struct toepcb *)ctx;
+	struct socket *so = toeptoso(toep);
 
 	VALIDATE_SOCK(so);
 
@@ -1525,9 +1574,13 @@
 	switch (tp->t_state) {
 	case TCPS_CLOSING:              /* see FIN_WAIT2 case in do_peer_fin */
 		t3_release_offload_resources(so);
-		if (toep->tp_flags & TP_ABORT_RPL_PENDING)
+		if (toep->tp_flags & TP_ABORT_RPL_PENDING) {
+			INP_INFO_WLOCK(&tcbinfo);
+			INP_LOCK(tp->t_inpcb);
 			tcp_close(tp);
-		else
+			INP_INFO_WUNLOCK(&tcbinfo);
+			INP_UNLOCK(tp->t_inpcb);		
+		} else
 			enter_timewait(so);
 		break;
 	case TCPS_LAST_ACK:
@@ -1537,7 +1590,12 @@
 		 * late, this close_con_rpl is the actual last message.
 		 */
 		t3_release_offload_resources(so);
+		INP_INFO_WLOCK(&tcbinfo);
+		INP_LOCK(tp->t_inpcb);
 		tcp_close(tp);
+		INP_INFO_WUNLOCK(&tcbinfo);
+		INP_UNLOCK(tp->t_inpcb);
+
 		break;
 	case TCPS_FIN_WAIT_1:
 #ifdef notyet
@@ -1576,7 +1634,8 @@
 do_close_con_rpl(struct t3cdev *cdev, struct mbuf *m,
 			    void *ctx)
 {
-	struct socket *so = (struct socket *)ctx;
+	struct toepcb *toep = (struct toepcb *)ctx;
+	struct socket *so = toeptoso(toep);
 
 	VALIDATE_SOCK(so);
 
@@ -1613,7 +1672,11 @@
 				if (toep->tp_flags & TP_ABORT_REQ_RCVD)
 					panic("TP_ABORT_REQ_RCVD set");
 				t3_release_offload_resources(so);
+				INP_INFO_WLOCK(&tcbinfo);
+				INP_LOCK(tp->t_inpcb);
 				tcp_close(tp);
+				INP_INFO_WUNLOCK(&tcbinfo);
+				INP_UNLOCK(tp->t_inpcb);
 			}
 		}
 	}
@@ -1642,9 +1705,10 @@
 		return (0);
 	}
 
-	so = (struct socket *)ctx;
-
-	/*
+	toep = (struct toepcb *)ctx;
+	so = toeptoso(toep);
+	
+        /*
 	 * Sometimes we've already closed the socket, e.g., a post-close
 	 * abort races with ABORT_REQ_RSS, the latter frees the socket
 	 * expecting the ABORT_REQ will fail with CPL_ERR_ABORT_FAILED,
@@ -1654,7 +1718,6 @@
 	if (!so)
 		goto discard;
 	
-	toep = sototcpcb(so)->t_toe;
 	toepcb_hold(toep);
 	process_abort_rpl(so, m);
 	toepcb_release(toep);
@@ -1775,6 +1838,7 @@
 {
 	struct tcpcb *parenttp = sototcpcb(parent);
 	struct tcpcb *childtp = sototcpcb(child);
+	struct inpcb *inp = sotoinpcb(child);
 	
 	/*
 	 * If the server is still open we clean up the child connection,
@@ -1784,7 +1848,11 @@
 	if (__predict_false(parenttp->t_state == TCPS_LISTEN)) {
 		cleanup_syn_rcv_conn(child, parent);
 		t3_release_offload_resources(child);
+		INP_INFO_WLOCK(&tcbinfo);
+		INP_LOCK(inp);
 		tcp_close(childtp);
+		INP_INFO_WUNLOCK(&tcbinfo);
+		INP_UNLOCK(inp);
 	}
 }
 
@@ -1865,7 +1933,11 @@
 			return;
 
 		t3_release_offload_resources(so);
+		INP_INFO_WLOCK(&tcbinfo);
+		INP_LOCK(tp->t_inpcb);
 		tcp_close(tp);
+		INP_INFO_WUNLOCK(&tcbinfo);
+		INP_UNLOCK(tp->t_inpcb);		
 	}
 	
 	send_abort_rpl(m, tdev, rst_status);
@@ -2140,6 +2212,8 @@
 	toep->tp_iss = th.th_seq = req->rcv_isn;
 	th.th_flags = TH_SYN;
 
+	toep->tp_delack_seq = toep->tp_rcv_wup = toep->tp_copied_seq = ntohl(req->rcv_isn);
+	
 	inc.inc_isipv6 = 0;
 	inc.inc_len = 0;
 	inc.inc_faddr.s_addr = req->peer_ip;
@@ -2293,17 +2367,35 @@
 	rpl = cplhdr(reply_mbuf);
 	reply_mbuf->m_pkthdr.len = reply_mbuf->m_len = sizeof(*rpl);
 	rpl->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
+	rpl->wr.wr_lo = 0;
 	OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL, tid));
+	rpl->opt2 = htonl(calc_opt2(so, tdev));
+	rpl->rsvd = rpl->opt2;                /* workaround for HW bug */
 	rpl->peer_ip = req->peer_ip;	// req->peer_ip is not overwritten
+
+	printf("accept smt_idx=%d\n", e->smt_idx);
 	
-	rpl->opt0h = htonl(calc_opt0h(so, select_mss(td, NULL, dst->rt_ifp->if_mtu)) | V_L2T_IDX(e->idx) |
-			   V_TX_CHANNEL(e->smt_idx));
+	rpl->opt0h = htonl(calc_opt0h(so, select_mss(td, NULL, dst->rt_ifp->if_mtu)) |
+	    V_L2T_IDX(e->idx) | V_TX_CHANNEL(e->smt_idx));
 	rpl->opt0l_status = htonl(calc_opt0l(so, lctx->ulp_mode) |
 				  CPL_PASS_OPEN_ACCEPT);
-	rpl->opt2 = htonl(calc_opt2(so, tdev));
-	rpl->rsvd = rpl->opt2;                /* workaround for HW bug */
+
+	printf("opt0l_status=%08x\n", rpl->opt0l_status);
+	
 	m_set_priority(reply_mbuf, mkprio(CPL_PRIORITY_SETUP, so));
 	
+	{
+		int i;
+
+		printf("rpl:\n");
+		uint32_t *rplbuf = mtod(reply_mbuf, uint32_t *);
+		
+		for (i = 0; i < sizeof(*rpl)/sizeof(uint32_t); i++)
+			printf("[%d] %08x\n", i, rplbuf[i]);
+	}
+	
+
+		
 	l2t_send(cdev, reply_mbuf, e);
 	m_free(m);
 #ifdef notyet
@@ -2429,6 +2521,8 @@
 	if (tp->rcv_wnd > (M_RCV_BUFSIZ << 10))
 		toep->tp_rcv_wup -= tp->rcv_wnd - (M_RCV_BUFSIZ << 10);
 
+	dump_toepcb(toep);
+
 #ifdef notyet
 /*
  * no clean interface for marking ARP up to date
@@ -2539,9 +2633,14 @@
 	    tp->rcv_wnd >= MIN_DDP_RCV_WIN ? ULP_MODE_TCPDDP : 0;
 	toep->tp_qset_idx = 0;
 	toep->tp_mtu_idx = select_mss(td, tp, toep->tp_l2t->neigh->rt_ifp->if_mtu);
-
+	
+	/*
+	 * XXX Cancel any keep alive timer
+	 */
+	     
 	make_established(so, ntohl(req->snd_isn), ntohs(req->tcp_opt));
 	INP_INFO_WUNLOCK(&tcbinfo);
+	
 	soisconnected(so);
 	
 #ifdef notyet
@@ -2587,6 +2686,8 @@
 	struct toepcb *toep = tp->t_toe;
 	unsigned int tid = toep->tp_tid;
 
+	printf("fixup_and_send_ofo\n");
+	
 	while ((m = mbufq_dequeue(&toep->out_of_order_queue)) != NULL) {
 		/*
 		 * A variety of messages can be waiting but the fields we'll
@@ -2799,6 +2900,7 @@
 	struct toepcb *toep = (struct toepcb *)ctx;
 
 	printf("do_wr_ack\n");
+	dump_toepcb(toep);
 	
 	VALIDATE_SOCK(so);
 

==== //depot/projects/toestack/sys/dev/cxgb/ulp/tom/cxgb_listen.c#5 (text+ko) ====

@@ -253,7 +253,8 @@
 
 	ctx->tom_data = d;
 	ctx->lso = so;
-
+	ctx->ulp_mode = 0; /* DDP if the default */
+	
 	stid = cxgb_alloc_stid(d->cdev, d->client, ctx);
 	if (stid < 0)
 		goto free_ctx;

==== //depot/projects/toestack/sys/dev/cxgb/ulp/tom/notes#4 (text+ko) ====

@@ -1,9 +1,10 @@
-Currently untested:
+Somewhat untested:
  - abort
 
 Currently unimplemented:
- - complete listen handling
+ - DDP
 
+ - module unload 
  - close for a subset of states
  - correct ARP failure handling
  - urgent data
@@ -11,7 +12,7 @@
  - connection retry
  - fragment assembly and re-tunneling is not implemented, but may work just 
    using the native stack - not clear how credit accounting will sync up
- - DDP
+
 
 open questions:
 What attributes are inherited from the listen socket. Should we be inheriting more?


More information about the p4-projects mailing list