PERFORCE change 130582 for review

Kip Macy kmacy at FreeBSD.org
Sun Dec 9 19:50:43 PST 2007


http://perforce.freebsd.org/chv.cgi?CH=130582

Change 130582 by kmacy at kmacy:storage:toestack on 2007/12/10 03:50:20

	- remove mk_pass_sock
	- change "release" functions to use the toepcb to better handle case of socket
	  going away
	- add locking around detach to fix races

Affected files ...

.. //depot/projects/toestack/sys/dev/cxgb/ulp/tom/cxgb_cpl_io.c#31 edit
.. //depot/projects/toestack/sys/dev/cxgb/ulp/tom/cxgb_t3_ddp.h#4 edit
.. //depot/projects/toestack/sys/dev/cxgb/ulp/tom/cxgb_toepcb.h#3 edit
.. //depot/projects/toestack/sys/dev/cxgb/ulp/tom/cxgb_tom.h#14 edit

Differences ...

==== //depot/projects/toestack/sys/dev/cxgb/ulp/tom/cxgb_cpl_io.c#31 (text+ko) ====

@@ -142,7 +142,7 @@
 
 static void t3_send_reset(struct socket *so);
 static void send_abort_rpl(struct mbuf *m, struct toedev *tdev, int rst_status);
-
+static inline void free_atid(struct t3cdev *cdev, unsigned int tid);
 
 static inline int
 is_t3a(const struct toedev *dev)
@@ -356,7 +356,7 @@
 		}
 
 		m0->m_type = MT_DONTFREE;
-		enqueue_wr(tp, m0);
+		enqueue_wr(toep, m0);
 		DPRINTF("sending offload tx with %d bytes in %d segments\n",
 		    bytes, count);
 		
@@ -590,14 +590,20 @@
 cxgb_toe_detach(struct tcpcb *tp)
 {
 	struct toepcb *toep;
+	/*
+	 * XXX how do we handle teardown in the SYN_SENT state?
+	 *
+	 */
+	INP_INFO_WLOCK(&tcbinfo);
+	toep = tp->t_toe;
+	toep->tp_tp = NULL;
 
 	/*
 	 * unhook from socket
 	 */
 	tp->t_flags &= ~TF_TOE;
-	toep = tp->t_toe;
-	toep->tp_tp = NULL;
 	tp->t_toe = NULL;
+	INP_INFO_WUNLOCK(&tcbinfo);
 }
 	
 
@@ -844,7 +850,7 @@
 }
 
 void
-t3_release_ddp_resources(struct socket *so)
+t3_release_ddp_resources(struct toepcb *toep)
 {
 	/*
 	 * This is a no-op until we have DDP support
@@ -864,24 +870,22 @@
  * Release resources held by an offload connection (TID, L2T entry, etc.)
  */
 static void
-t3_release_offload_resources(struct socket *so)
+t3_release_offload_resources(struct toepcb *toep)
 {
-	struct tcpcb *tp = sototcpcb(so);
-	struct toepcb *toep = tp->t_toe;
-	struct toedev *tdev = TOE_DEV(so);
+	struct tcpcb *tp = toep->tp_tp;
+	struct toedev *tdev = toep->tp_toedev;
 	struct t3cdev *cdev;
 	unsigned int tid = toep->tp_tid;
 
 	if (!tdev)
 		return;
 
-	cdev = T3C_DEV(so);
+	cdev = TOEP_T3C_DEV(toep);
 	if (!cdev)
 		return;
 
-	INP_LOCK_ASSERT(tp->t_inpcb);
 	toep->tp_qset = 0;
-	t3_release_ddp_resources(so);
+	t3_release_ddp_resources(toep);
 
 #ifdef CTRL_SKB_CACHE
 	kfree_skb(CTRL_SKB_CACHE(tp));
@@ -889,8 +893,8 @@
 #endif
 
 	if (toep->tp_wr_avail != toep->tp_wr_max) {
-		purge_wr_queue(tp);
-		reset_wr_list(tp);
+		purge_wr_queue(toep);
+		reset_wr_list(toep);
 	}
 
 	if (toep->tp_l2t) {
@@ -900,10 +904,13 @@
 	printf("setting toep->tp_tp to NULL\n");
 	
 	toep->tp_tp = NULL;
-	tp->t_toe = NULL;
-	tp->t_flags &= ~TF_TOE;
+	if (tp) {
+		INP_LOCK_ASSERT(tp->t_inpcb);
+		tp->t_toe = NULL;
+		tp->t_flags &= ~TF_TOE;
+	}
 	
-	if (tp->t_state == TCPS_SYN_SENT) {
+	if (toep->tp_state == TCPS_SYN_SENT) {
 		free_atid(cdev, tid);
 #ifdef notyet		
 		__skb_queue_purge(&tp->out_of_order_queue);
@@ -1014,7 +1021,7 @@
 		       tp->rcv_wnd >= MIN_DDP_RCV_WIN ? ULP_MODE_TCPDDP : 0;
 	toep->tp_qset_idx = 0;
 	
-	reset_wr_list(tp);
+	reset_wr_list(toep);
 	DPRINTF("initialization done\n");
 }
 
@@ -1113,13 +1120,15 @@
 }
 
 static void
-fail_act_open(struct socket *so, int errno)
+fail_act_open(struct toepcb *toep, int errno)
 {
-	struct tcpcb *tp = sototcpcb(so);
+	struct tcpcb *tp = toep->tp_tp;
 
-	INP_LOCK_ASSERT(tp->t_inpcb);
-	t3_release_offload_resources(so);
-	tcp_drop(tp, errno);
+	t3_release_offload_resources(toep);
+	if (tp) {
+		INP_LOCK_ASSERT(tp->t_inpcb);
+		tcp_drop(tp, errno);
+	}
 	
 #ifdef notyet
 	TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
@@ -1130,13 +1139,18 @@
  * Handle active open failures.
  */
 static void
-active_open_failed(struct socket *so, struct mbuf *m)
+active_open_failed(struct toepcb *toep, struct mbuf *m)
 {
 	struct cpl_act_open_rpl *rpl = cplhdr(m);
-	struct inpcb *inp = sotoinpcb(so);
-	
+	struct inpcb *inp;
+
 	INP_INFO_WLOCK(&tcbinfo);
+	if (toep->tp_tp == NULL)
+		goto done;
+
+	inp = toep->tp_tp->t_inpcb;
 	INP_LOCK(inp);
+
 /*
  * Don't handle connection retry for now
  */
@@ -1150,9 +1164,11 @@
 			       jiffies + HZ / 2);
 	} else
 #endif		
-		fail_act_open(so, act_open_rpl_status_to_errno(rpl->status));
+		fail_act_open(toep, act_open_rpl_status_to_errno(rpl->status));
 	INP_UNLOCK(inp);
+done:
 	INP_INFO_WUNLOCK(&tcbinfo);
+
 	m_free(m);
 }
 
@@ -1173,15 +1189,12 @@
 do_act_open_rpl(struct t3cdev *cdev, struct mbuf *m, void *ctx)
 {
 	struct toepcb *toep = (struct toepcb *)ctx;
-	struct socket *so = NULL;
 	struct cpl_act_open_rpl *rpl = cplhdr(m);
 	
 	if (cdev->type != T3A && act_open_has_tid(rpl->status))
 		cxgb_queue_tid_release(cdev, GET_TID(rpl));
-	if (toep->tp_tp != NULL)
-		so = toeptoso(toep);
 	
-	active_open_failed(so, m);
+	active_open_failed(toep, m);
 	return (0);
 }
 
@@ -1236,7 +1249,7 @@
 	if ((atid = cxgb_alloc_atid(d->cdev, d->client, toep)) < 0)
 		goto out_err;
 	
-	e = t3_l2t_get(d->cdev, dst, egress_ifp);
+	e = t3_l2t_get(d->cdev, dst, egress_ifp, &inp->inp_route.ro_dst);
 	if (!e)
 		goto free_tid;
 
@@ -1262,6 +1275,7 @@
 	
 	printf("sending off request\n");
 	
+	toep->tp_state = TCPS_SYN_SENT;
 	l2t_send(d->cdev, (struct mbuf *)m, e);
 
 	if (toep->tp_ulp_mode)
@@ -1814,7 +1828,7 @@
 		 * time of generating the RST but we must wait for HW).
 		 * Otherwise we enter TIME_WAIT.
 		 */
-		t3_release_offload_resources(so);
+		t3_release_offload_resources(toep);
 		if (toep->tp_flags & TP_ABORT_RPL_PENDING) {
 			tp = tcp_close(tp);
 		} else
@@ -1884,7 +1898,7 @@
 	INP_LOCK(tp->t_inpcb);
 	switch (tp->t_state) {
 	case TCPS_CLOSING:              /* see FIN_WAIT2 case in do_peer_fin */
-		t3_release_offload_resources(so);
+		t3_release_offload_resources(toep);
 		if (toep->tp_flags & TP_ABORT_RPL_PENDING) {
 			tp = tcp_close(tp);
 
@@ -1897,7 +1911,7 @@
 		 * If we've sent abort_req it was post-close and was sent too
 		 * late, this close_con_rpl is the actual last message.
 		 */
-		t3_release_offload_resources(so);
+		t3_release_offload_resources(toep);
 		tp = tcp_close(tp);
 		break;
 	case TCPS_FIN_WAIT_1:
@@ -1982,7 +1996,7 @@
 					panic("TP_ABORT_REQ_RCVD set");
 				INP_INFO_WLOCK(&tcbinfo);
 				INP_LOCK(tp->t_inpcb);
-				t3_release_offload_resources(so);
+				t3_release_offload_resources(toep);
 				tp = tcp_close(tp);
 				INP_INFO_WUNLOCK(&tcbinfo);
 			}
@@ -2176,7 +2190,7 @@
 		cleanup_syn_rcv_conn(child, parent);
 		INP_INFO_WLOCK(&tcbinfo);
 		INP_LOCK(childtp->t_inpcb);
-		t3_release_offload_resources(child);
+		t3_release_offload_resources(childtp->t_toe);
 		childtp = tcp_close(childtp);
 		INP_INFO_WUNLOCK(&tcbinfo);
 		if (childtp)
@@ -2260,7 +2274,7 @@
 		if ((tp->t_state == TCPS_SYN_RECEIVED) && !abort_syn_rcv(so, m))
 			goto skip;
 
-		t3_release_offload_resources(so);
+		t3_release_offload_resources(toep);
 		tp = tcp_close(tp);
 	}
 	if (tp)
@@ -2403,95 +2417,6 @@
 }
 
 /*
- * Create a new socket as a child of the listening socket 'lsk' and initialize
- * with the information in the supplied PASS_ACCEPT_REQ message.
- */
-#ifdef notyet
-static struct socket *
-mk_pass_sock(struct socket *lso, struct toedev *dev, int tid,
-				 struct cpl_pass_accept_req *req)
-{
-	UNIMPLEMENTED();
-	
-#ifdef notyet	
-	struct socket *newso;
-	struct l2t_entry *e;
-	struct rtentry *dst;
-	struct tcpcb *newtp;
-	struct ifp *egress;
-	struct socket *oreq = reqsk_alloc(&t3_rsk_ops);
-
-	if (!oreq)
-		goto out_err;
-
-	tcp_rsk(oreq)->rcv_isn = ntohl(req->rcv_isn);
-	inet_rsk(oreq)->rmt_port = req->peer_port;
-	t3_set_req_addr(oreq, req->local_ip, req->peer_ip);
-	t3_set_req_opt(oreq, NULL);
-	if (sysctl_tcp_window_scaling) {
-		inet_rsk(oreq)->wscale_ok = 1;
-		inet_rsk(oreq)->snd_wscale = req->tcp_options.wsf;
-	}
-
-	dst = route_req(lsk, oreq);
-	if (!dst)
-		goto free_or;
-
-	newsk = tcp_create_openreq_child(lsk, oreq, tcphdr_skb);
-	if (!newsk)
-		goto free_dst;
-
-	egress = offload_get_phys_egress(dst->neighbour->dev, newsk, TOE_OPEN);
-	if (!egress || TOEDEV(egress) != dev)
-		goto free_dst;
-
-	e = t3_l2t_get(TOM_DATA(dev)->cdev, dst->neighbour, egress);
-	if (!e)
-		goto free_sk;
-
-
-	if (sock_flag(newsk, SOCK_KEEPOPEN))
-		inet_csk_delete_keepalive_timer(newsk);
-	oreq->ts_recent = G_PASS_OPEN_TID(ntohl(req->tos_tid));
-	newsk->sk_user_data = oreq;
-	sk_setup_caps(newsk, dst);
-
-	newtp = tcp_sk(newsk);
-	init_offload_sk(newsk, dev, tid, e, dst);
-	DELACK_SEQ(newtp) = newtp->rcv_nxt;
-	RCV_WSCALE(newtp) = select_rcv_wscale(tcp_full_space(newsk),
-					      WSCALE_OK(newtp),
-					      newtp->window_clamp);
-
-#ifdef	LINUX_2_4
-	newsk->daddr = req->peer_ip;
-	newsk->rcv_saddr = req->local_ip;
-	newsk->saddr = req->local_ip;
-#else
-	inet_sk(newsk)->daddr = req->peer_ip;
-	inet_sk(newsk)->rcv_saddr = req->local_ip;
-	inet_sk(newsk)->saddr = req->local_ip;
-#endif	/* LINUX_2_4 */
-
-	lsk->sk_prot->hash(newsk);
-	inet_inherit_port(&tcp_hashinfo, lsk, newsk);
-	install_offload_ops(newsk);
-	bh_unlock_sock(newsk);     // counters tcp_create_openreq_child()
-	return newsk;
-
-free_sk:
-	sk_free(newsk);
-free_dst:
-	dst_release(dst);
-free_or:
-	__reqsk_free(oreq);
-out_err:
-#endif	
-	return NULL;
-}
-#endif
-
-/*
  * Populate a reject CPL_PASS_ACCEPT_RPL WR.
  */
 static void
@@ -2686,7 +2611,7 @@
 	
 	nam.sin_len = sizeof(struct sockaddr_in);
 	nam.sin_family = AF_INET;
-	memcpy(&nam.sin_addr, &req->peer_ip, 4);
+	nam.sin_addr.s_addr =req->peer_ip;
 	dst = rtalloc2((struct sockaddr *)&nam, 1, 0);
 
 	if (dst == NULL) {
@@ -2694,7 +2619,8 @@
 		DPRINTF("failed to find route\n");
 		
 	}
-	e = newtoep->tp_l2t = t3_l2t_get(d->cdev, dst, tim.dev);
+	e = newtoep->tp_l2t = t3_l2t_get(d->cdev, dst, tim.dev,
+	    (struct sockaddr *)&nam);
 	if (e == NULL) {
 
 		DPRINTF("failed to get l2t\n");
@@ -3000,7 +2926,7 @@
 	toep->tp_tp = tp;
 	toep->tp_flags = 0;
 	tp->t_toe = toep;
-	reset_wr_list(tp);
+	reset_wr_list(toep);
 	tp->rcv_wnd = select_rcv_wnd(so);
 	DPRINTF("rcv_wnd=%ld\n", tp->rcv_wnd);
 	install_offload_ops(so);
@@ -3141,7 +3067,7 @@
 #endif
 
 	soisconnected(so);
-	tp->t_state = TCPS_ESTABLISHED;
+	toep->tp_state = tp->t_state = TCPS_ESTABLISHED;
 	tcpstat.tcps_connects++;
 				
 }
@@ -3157,10 +3083,19 @@
 	unsigned int atid = G_PASS_OPEN_TID(ntohl(req->tos_tid));
 	struct toepcb *toep = (struct toepcb *)ctx;
 	struct tcpcb *tp = toep->tp_tp;
-	struct socket *so = toeptoso(toep);
-	struct toedev *tdev = TOE_DEV(so); /* blow up here if link was down */
-	struct tom_data *d = TOM_DATA(tdev);
-
+	struct socket *so; 
+	struct toedev *tdev;
+	struct tom_data *d;
+	
+	if (tp == NULL) {
+		free_atid(cdev, atid);
+		return (0);
+	}
+	
+	so = toeptoso(toep);
+	tdev = TOE_DEV(so); /* blow up here if link was down */
+	d = TOM_DATA(tdev);
+	
 	INP_LOCK(tp->t_inpcb);
 	
 	/*
@@ -3175,7 +3110,7 @@
 
 	socket_act_establish(so, m);
 	INP_UNLOCK(tp->t_inpcb);
-	return 0;
+	return (0);
 }
 
 /*
@@ -3201,7 +3136,7 @@
 		toep->tp_wr_unacked = toep->tp_wr_max - toep->tp_wr_avail;
 
 	while (credits) {
-		struct mbuf *p = peek_wr(tp);
+		struct mbuf *p = peek_wr(toep);
 		DPRINTF("p->credits=%d p->bytes=%d\n", p->m_pkthdr.csum_data, p->m_pkthdr.len) ;
 		
 		if (__predict_false(!p)) {
@@ -3227,7 +3162,7 @@
 			p->m_pkthdr.csum_data -= credits;
 			break;
 		} else {
-			dequeue_wr(tp);
+			dequeue_wr(toep);
 			credits -= p->m_pkthdr.csum_data;
 			bytes += p->m_pkthdr.len;
 			DPRINTF("done with wr of %d bytes\n", p->m_pkthdr.len);
@@ -3323,8 +3258,12 @@
 	TAILQ_FOREACH(so, &listen_so->so_comp, so_list) {
 		tp = sototcpcb(so);
 		
-		if (tp->t_flags & TF_TOE)
+		if (tp->t_flags & TF_TOE) {
+			INP_LOCK(tp->t_inpcb);
 			t3_reset_listen_child(so);
+			INP_UNLOCK(tp->t_inpcb);
+		}
+		
 	}
 }
 

==== //depot/projects/toestack/sys/dev/cxgb/ulp/tom/cxgb_t3_ddp.h#4 (text+ko) ====

@@ -173,7 +173,7 @@
 		    int rcv_flags, int modulate, int post_kbuf);
 int t3_enter_ddp(struct socket *so, unsigned int kbuf_size, unsigned int waitall);
 void t3_cleanup_ddp(struct socket *so);
-void t3_release_ddp_resources(struct socket *so);
+void t3_release_ddp_resources(struct toepcb *toep);
 void t3_cancel_ddpbuf(struct socket *so, unsigned int bufidx);
 void t3_overlay_ddpbuf(struct socket *so, unsigned int bufidx, unsigned int tag0,
 		       unsigned int tag1, unsigned int len);

==== //depot/projects/toestack/sys/dev/cxgb/ulp/tom/cxgb_toepcb.h#3 (text+ko) ====

@@ -20,7 +20,8 @@
 	int tp_flags;
 	int tp_enqueued_bytes;
 	int tp_page_count;
-	
+	int tp_state;
+
 	tcp_seq tp_iss;
 	tcp_seq tp_delack_seq;
 	tcp_seq tp_rcv_wup;
@@ -40,41 +41,32 @@
 	struct ddp_state tp_ddp_state;
 };
 
-static inline void reset_wr_list(struct tcpcb *tp)
+static inline void reset_wr_list(struct toepcb *toep)
 {
-	struct toepcb *toep = tp->t_toe;
-
 	mbufq_init(&toep->wr_list);
 }
 
-static inline void purge_wr_queue(struct tcpcb *tp)
+static inline void purge_wr_queue(struct toepcb *toep)
 {
-	struct toepcb *toep = tp->t_toe;
 	struct mbuf *m;
 	
 	while ((m = mbufq_dequeue(&toep->wr_list)) != NULL) 
 		m_freem(m);
 }
 
-static inline void enqueue_wr(struct tcpcb *tp, struct mbuf *m)
+static inline void enqueue_wr(struct toepcb *toep, struct mbuf *m)
 {
-	struct toepcb *toep = tp->t_toe;
-
 	mbufq_tail(&toep->wr_list, m);
 }
 
 
-static inline struct mbuf *peek_wr(struct tcpcb *tp)
+static inline struct mbuf *peek_wr(struct toepcb *toep)
 {
-	struct toepcb *toep = tp->t_toe;
-
 	return mbufq_peek(&toep->wr_list);
 }
 
-static inline struct mbuf *dequeue_wr(struct tcpcb *tp)
+static inline struct mbuf *dequeue_wr(struct toepcb *toep)
 {
-	struct toepcb *toep = tp->t_toe;
-
 	return mbufq_dequeue(&toep->wr_list);
 }
 

==== //depot/projects/toestack/sys/dev/cxgb/ulp/tom/cxgb_tom.h#14 (text+ko) ====

@@ -122,6 +122,7 @@
 
 #define TOM_DATA(dev) (*(struct tom_data **)&(dev)->l4opt)
 #define T3C_DEV(sk) ((TOM_DATA(TOE_DEV(sk)))->cdev)
+#define TOEP_T3C_DEV(toep) (TOM_DATA(toep->tp_toedev)->cdev)
 #define TOM_TUNABLE(dev, param) (TOM_DATA(dev)->conf.param)
 
 #define TP_DATASENT         	(1 << 0)


More information about the p4-projects mailing list