PERFORCE change 128854 for review

Kip Macy kmacy at FreeBSD.org
Thu Nov 8 21:39:15 PST 2007


http://perforce.freebsd.org/chv.cgi?CH=128854

Change 128854 by kmacy at kmacy:storage:toestack on 2007/11/09 05:38:46

	add infrastructure to send SYN-ACK to establish connection
	as well as tie into syncache

Affected files ...

.. //depot/projects/toestack/sys/dev/cxgb/ulp/tom/cxgb_cpl_io.c#19 edit
.. //depot/projects/toestack/sys/dev/cxgb/ulp/tom/cxgb_defs.h#10 edit
.. //depot/projects/toestack/sys/dev/cxgb/ulp/tom/cxgb_t3_ddp.h#3 edit
.. //depot/projects/toestack/sys/dev/cxgb/ulp/tom/cxgb_tom.c#10 edit
.. //depot/projects/toestack/sys/dev/cxgb/ulp/tom/cxgb_tom.h#10 edit

Differences ...

==== //depot/projects/toestack/sys/dev/cxgb/ulp/tom/cxgb_cpl_io.c#19 (text+ko) ====

@@ -39,6 +39,7 @@
 #include <sys/mbuf.h>
 #include <sys/mutex.h>
 #include <sys/socket.h>
+#include <sys/sysctl.h>
 #include <sys/syslog.h>
 #include <sys/socketvar.h>
 #include <sys/protosw.h>
@@ -57,11 +58,11 @@
 #include <dev/cxgb/sys/mbufq.h>
 
 #include <netinet/ip.h>
-#include <netinet/tcp.h>
 #include <netinet/tcp_var.h>
 #include <netinet/tcp_fsm.h>
 #include <netinet/tcp_ofld.h>
 #include <netinet/tcp_seq.h>
+#include <netinet/tcp_syncache.h>
 #include <net/route.h>
 
 
@@ -703,7 +704,8 @@
 {
 	struct toepcb *toep = sototoep(so);
 	toepcb_hold(toep);
-	cxgb_insert_tid(d->cdev, d->client, so, tid);
+	
+	cxgb_insert_tid(d->cdev, d->client, toep, tid);
 }
 
 /**
@@ -934,26 +936,30 @@
 	struct tcpcb *tp = sototcpcb(so);
 	struct toepcb *toep = tp->t_toe;
 	int wscale = select_rcv_wscale(tp->rcv_wnd);
+	int qset_idx;
+
+	if (toep)
+		qset_idx = toep->tp_qset_idx;
+	else
+		qset_idx = 0;
 	
 	return V_NAGLE((tp->t_flags & TF_NODELAY) == 0) |
 	    V_KEEP_ALIVE((so->so_options & SO_KEEPALIVE) != 0) | F_TCAM_BYPASS |
-	    V_WND_SCALE(wscale) | V_MSS_IDX(toep->tp_qset_idx);
+	    V_WND_SCALE(wscale) | V_MSS_IDX(qset_idx);
 }
 
 static inline unsigned int
-calc_opt0l(struct socket *so)
+calc_opt0l(struct socket *so, int ulp_mode)
 {
 	struct tcpcb *tp = sototcpcb(so);
-	struct toepcb *toep = tp->t_toe;
 	
-	return V_TOS(SO_TOS(so)) | V_ULP_MODE(toep->tp_ulp_mode) |
+	return V_TOS(SO_TOS(so)) | V_ULP_MODE(ulp_mode) |
 	       V_RCV_BUFSIZ(min(tp->rcv_wnd >> 10, (u32)M_RCV_BUFSIZ));
 }
 
 static inline unsigned int
-calc_opt2(const struct socket *so)
+calc_opt2(const struct socket *so, struct toedev *dev)
 {
-	struct toedev *dev = TOE_DEV(so);
 	int flv_valid;
 
 	flv_valid = (TOM_TUNABLE(dev, cong_alg) != -1);
@@ -971,6 +977,9 @@
 {
 	struct cpl_act_open_req *req;
 	struct inpcb *inp = sotoinpcb(so);
+	struct tcpcb *tp = intotcpcb(inp);
+	struct toepcb *toep = tp->t_toe;
+	struct toedev *tdev = TOE_DEV(so);
 	
 	m_set_priority((struct mbuf *)m, mkprio(CPL_PRIORITY_SETUP, so));
 	
@@ -986,9 +995,9 @@
 
 	req->opt0h = htonl(calc_opt0h(so) | V_L2T_IDX(e->idx) |
 			   V_TX_CHANNEL(e->smt_idx));
-	req->opt0l = htonl(calc_opt0l(so));
+	req->opt0l = htonl(calc_opt0l(so, toep->tp_ulp_mode));
 	req->params = 0;
-	req->opt2 = htonl(calc_opt2(so));
+	req->opt2 = htonl(calc_opt2(so, tdev));
 }
 
 
@@ -1319,11 +1328,11 @@
  * Process new data received for a connection.
  */
 static void
-new_rx_data(struct socket *so, struct mbuf *m)
+new_rx_data(struct toepcb *toep, struct mbuf *m)
 {
 	struct cpl_rx_data *hdr = cplhdr(m);
-	struct tcpcb *tp = sototcpcb(so);
-	struct toepcb *toep = tp->t_toe;
+	struct tcpcb *tp = toep->tp_tp;
+	struct socket *so = toeptoso(toep);
 	int len = be16toh(hdr->len);
 
 #ifdef notyet	
@@ -1393,11 +1402,9 @@
 static int
 do_rx_data(struct t3cdev *cdev, struct mbuf *m, void *ctx)
 {
-	struct socket *so = (struct socket *)ctx;
+	struct toepcb *toep = (struct toepcb *)ctx;
 
-	VALIDATE_SOCK(so);
-
-	new_rx_data(so, m);
+	new_rx_data(toep, m);
 
 	return (0);
 }
@@ -1885,15 +1892,21 @@
 do_abort_req(struct t3cdev *cdev, struct mbuf *m, void *ctx)
 {
 	const struct cpl_abort_req_rss *req = cplhdr(m);
-	struct socket *so = (struct socket *)ctx;
-	struct tcpcb *tp = sototcpcb(so);
-	struct toepcb *toep = tp->t_toe;
+	struct toepcb *toep = (struct toepcb *)ctx;
+	struct socket *so;
 	
 	if (is_neg_adv_abort(req->status)) {
 		m_free(m);
 		return (0);
 	}
-
+	printf("aborting tid=%d\n", toep->tp_tid);
+	
+	if (toep->tp_flags & TP_SYN_RCVD) {
+		printf("abort for unestablished connection :-(\n");
+		return (0);
+	}
+	
+	so = toeptoso(toep);
 	VALIDATE_SOCK(so);
 	toepcb_hold(toep);
 	process_abort_req(so, m, TOE_DEV(so));
@@ -1975,6 +1988,7 @@
  * Create a new socket as a child of the listening socket 'lsk' and initialize
  * with the information in the supplied PASS_ACCEPT_REQ message.
  */
+#ifdef notyet
 static struct socket *
 mk_pass_sock(struct socket *lso, struct toedev *dev, int tid,
 				 struct cpl_pass_accept_req *req)
@@ -2057,6 +2071,7 @@
 #endif	
 	return NULL;
 }
+#endif
 
 /*
  * Populate a reject CPL_PASS_ACCEPT_RPL WR.
@@ -2092,6 +2107,74 @@
 	m_free(m);
 }
 
+static void
+handle_syncache_event(int event, void *arg)
+{
+	struct toepcb *toep = arg;
+
+	switch (event) {
+	case SC_ENTRY_PRESENT:
+		/*
+		 * entry already exists - free toepcb
+		 * and l2t
+		 */
+		toepcb_release(toep);
+		break;
+	case SC_DROP:
+		/*
+		 * The syncache has given up on this entry
+		 * either it timed out, or it was evicted
+		 * we need to explicitly release the tid
+		 */
+		toepcb_release(toep);		
+		break;
+	default:
+		log(LOG_ERR, "unknown syncache event %d\n", event);
+		break;
+	}
+}
+
+static void
+syncache_add_accept_req(struct cpl_pass_accept_req *req, struct socket *lso, struct toepcb *toep)
+{
+	struct in_conninfo inc;
+	struct tcpopt to;
+	struct tcphdr th;
+	struct inpcb *inp;
+	int mss, wsf, sack, ts;
+
+	bzero(&to, sizeof(struct tcpopt));
+	inp = sotoinpcb(lso);
+	
+	/*
+	 * Fill out information for entering us into the syncache
+	 */
+	th.th_sport = req->peer_port;
+	th.th_dport = req->local_port;
+	th.th_seq = req->rcv_isn;
+	th.th_flags = TH_SYN;
+
+	inc.inc_isipv6 = 0;
+	inc.inc_len = 0;
+	memcpy(&inc.inc_faddr, &req->peer_ip, 4);
+	memcpy(&inc.inc_laddr, &req->local_ip, 4);
+	inc.inc_ext = toep;
+	inc.inc_eh = handle_syncache_event;
+	
+	mss = req->tcp_options.mss;
+	wsf = req->tcp_options.wsf;
+	ts = req->tcp_options.tstamp;
+	sack = req->tcp_options.sack;
+	to.to_mss = mss;
+	to.to_wscale = wsf;
+	to.to_flags = (mss ? TOF_MSS : 0) | (wsf ? TOF_SCALE : 0) | (ts ? TOF_TS : 0) | (sack ? TOF_SACKPERM : 0);
+
+	INP_INFO_WLOCK(&tcbinfo);
+	INP_LOCK(inp);
+	syncache_add(&inc, &to, &th, inp, &lso, NULL);
+}
+
+
 /*
  * Process a CPL_PASS_ACCEPT_REQ message.  Does the part that needs the socket
  * lock held.  Note that the sock here is a listening socket that is not owned
@@ -2102,7 +2185,6 @@
     struct listen_ctx *lctx)
 {
 	int rt_flags;
-	struct socket *newso;
 	struct l2t_entry *e;
 	struct iff_mac tim;
 	struct mbuf *reply_mbuf, *ddp_mbuf = NULL;
@@ -2111,26 +2193,37 @@
 	unsigned int tid = GET_TID(req);
 	struct tom_data *d = TOM_DATA(tdev);
 	struct t3cdev *cdev = d->cdev;
-	struct tcpcb *newtp, *tp = sototcpcb(so);
-	struct toepcb *toep, *newtoep;
-	
+	struct tcpcb *tp = sototcpcb(so);
+	struct toepcb *newtoep;
+	struct rtentry *dst;
+	struct sockaddr_in nam;
+
 	reply_mbuf = m_gethdr(M_NOWAIT, MT_DATA);
-	if (__predict_false(!reply_mbuf)) {
+	if (__predict_false(reply_mbuf == NULL)) {
 		if (tdev->ttid == TOE_ID_CHELSIO_T3)
 			t3_defer_reply(m, tdev, reject_pass_request);
 		else {
 			cxgb_queue_tid_release(cdev, tid);
 			m_free(m);
 		}
+		printf("failed to get reply_mbuf\n");
+		
 		goto out;
 	}
 
-	if (tp->t_state != TCPS_LISTEN)
+	if (tp->t_state != TCPS_LISTEN) {
+		printf("socket not in listen state\n");
+		
 		goto reject;
+	}
+	
 	tim.mac_addr = req->dst_mac;
 	tim.vlan_tag = ntohs(req->vlan_tag);
-	if (cdev->ctl(cdev, GET_IFF_FROM_MAC, &tim) < 0 || !tim.dev)
+	if (cdev->ctl(cdev, GET_IFF_FROM_MAC, &tim) < 0 || !tim.dev) {
+		printf("rejecting from failed GET_IFF_FROM_MAC\n");
 		goto reject;
+	}
+	
 #ifdef notyet
 	/*
 	 * XXX do route lookup to confirm that we're still listening on this
@@ -2154,52 +2247,89 @@
 	if ((rt_flags & RTF_LOCAL) == 0)
 		goto reject;
 	
+	/*
+	 * Calculate values and add to syncache
+	 */
+
+	newtoep = toepcb_alloc();
+	if (newtoep == NULL)
+		goto reject;
+
+	bzero(&nam, sizeof(struct sockaddr_in));
 	
-	newso = sonewconn(so, SS_ISCONNECTED);
-	newtp = sototcpcb(so);
+	nam.sin_len = sizeof(struct sockaddr_in);
+	nam.sin_family = AF_INET;
+	memcpy(&nam.sin_addr, &req->peer_ip, 4);
+	dst = rtalloc2((struct sockaddr *)&nam, 1, 0);
+
+	if (dst == NULL) {
+		
+		printf("failed to find route\n");
+		
+	}
+	e = newtoep->tp_l2t = t3_l2t_get(d->cdev, dst, tim.dev);
+	if (e == NULL) {
+
+		printf("failed to get l2t\n");
+		
+	}
 	/*
-	 * XXX need to inherit ULP mode
+	 * Point to our listen socket until accept
 	 */
-	newtoep = toepcb_alloc(newtp);
+	newtoep->tp_tp = tp;
+	newtoep->tp_flags = TP_SYN_RCVD;
+	newtoep->tp_tid = tid;
 	
-	/* Don't get a reference, newsk starts out with ref count 2 */
-	cxgb_insert_tid(cdev, d->client, newso, tid);
+	printf("inserting tid=%d\n", tid);
+	cxgb_insert_tid(cdev, d->client, newtoep, tid);
 
-	if (newtoep->tp_ulp_mode) {
+	if (lctx->ulp_mode) {
 		ddp_mbuf = m_gethdr(M_NOWAIT, MT_DATA);
 		
 		if (!ddp_mbuf)
 			newtoep->tp_ulp_mode = 0;
+		else
+			newtoep->tp_ulp_mode = lctx->ulp_mode;
 	}
 
-	m_set_socket(reply_mbuf, newso);
 	set_arp_failure_handler(reply_mbuf, pass_accept_rpl_arp_failure);
-	e = newtoep->tp_l2t;
+
+	printf("adding request to syn cache\n");
+	
+	syncache_add_accept_req(req, so, newtoep);
 
 	rpl = cplhdr(reply_mbuf);
+	reply_mbuf->m_pkthdr.len = reply_mbuf->m_len = sizeof(*rpl);
 	rpl->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
 	OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL, tid));
 	rpl->peer_ip = req->peer_ip;	// req->peer_ip is not overwritten
-	rpl->opt0h = htonl(calc_opt0h(newso) | V_L2T_IDX(e->idx) |
+	printf("e=%p idxs:\n", e);
+	printf("e->idx=%d e->smt_idx=%d\n", e->idx, e->smt_idx);
+	
+	rpl->opt0h = htonl(calc_opt0h(so) | V_L2T_IDX(e->idx) |
 			   V_TX_CHANNEL(e->smt_idx));
-	rpl->opt0l_status = htonl(calc_opt0l(newso) |
+	rpl->opt0l_status = htonl(calc_opt0l(so, lctx->ulp_mode) |
 				  CPL_PASS_OPEN_ACCEPT);
-	rpl->opt2 = htonl(calc_opt2(newso));
-
+	rpl->opt2 = htonl(calc_opt2(so, tdev));
 	rpl->rsvd = rpl->opt2;                /* workaround for HW bug */
-	m_set_priority(reply_mbuf, mkprio(CPL_PRIORITY_SETUP, newso));
+	m_set_priority(reply_mbuf, mkprio(CPL_PRIORITY_SETUP, so));
+	printf("sending off reply\n");
+	
 	l2t_send(cdev, reply_mbuf, e);
 	m_free(m);
-	if (toep->tp_ulp_mode) {
+#ifdef notyet
+	/*
+	 * XXX this call path has to be converted to not depend on sockets
+	 */
+	if (newtoep->tp_ulp_mode) 
 		__set_tcb_field(newso, ddp_mbuf, W_TCB_RX_DDP_FLAGS,
 				V_TF_DDP_OFF(1) |
 				TP_DDP_TIMER_WORKAROUND_MASK,
 				V_TF_DDP_OFF(1) |
 				TP_DDP_TIMER_WORKAROUND_VAL, 1);
 
-		return;
-	}
-	
+#endif	
+	return;
 reject:
 	if (tdev->ttid == TOE_ID_CHELSIO_T3)
 		mk_pass_accept_rpl(reply_mbuf, m);
@@ -2366,19 +2496,20 @@
 do_pass_establish(struct t3cdev *cdev, struct mbuf *m, void *ctx)
 {
 	struct cpl_pass_establish *req = cplhdr(m);
-	struct socket *lso, *so = (struct socket *)ctx;
-	struct toedev *tdev = TOE_DEV(so);
+	struct toepcb *toep = (struct toepcb *)ctx;
+	struct socket *so, *lso;
 	// Complete socket initialization now that we have the SND_ISN
-	struct tcpcb *tp = sototcpcb(so);
-	struct toepcb *toep = tp->t_toe;
+	
+	struct toedev *tdev;
 	struct toe_tid_entry *t3c_stid;
 	struct tid_info *t;
 	unsigned int stid;
+
+	lso = toeptoso(toep);
+	tdev = TOE_DEV(lso);
 	
-	VALIDATE_SOCK(so);
+	SOCK_LOCK(lso);
 
-	SOCK_LOCK(so);
-
 	toep->tp_wr_max = toep->tp_wr_avail = TOM_TUNABLE(tdev, max_wrs);
 	toep->tp_wr_unacked = 0;
 	toep->tp_qset = G_QNUM(ntohl(m->m_pkthdr.csum_data));
@@ -2421,7 +2552,7 @@
 #if 0
 unlock:
 #endif
-	SOCK_UNLOCK(so);
+	SOCK_UNLOCK(lso);
 		
 	return 0;
 }
@@ -2537,6 +2668,7 @@
 	 * backlogged its last CPL message(s).  Just take it away.
 	 */
 	toep->tp_tid = tid;
+	toep->tp_tp = tp;
 	so_insert_tid(d, so, tid);
 	free_atid(cdev, atid);
 	toep->tp_qset = G_QNUM(ntohl(m->m_pkthdr.csum_data));
@@ -2550,11 +2682,11 @@
  * next batch of work requests from the write queue.
  */
 static void
-wr_ack(struct socket *so, struct mbuf *m)
+wr_ack(struct toepcb *toep, struct mbuf *m)
 {
-	struct tcpcb *tp = sototcpcb(so);
-	struct toepcb *toep = tp->t_toe;
+	struct tcpcb *tp = toep->tp_tp;
 	struct cpl_wr_ack *hdr = cplhdr(m);
+	struct socket *so = toeptoso(toep);
 	unsigned int credits = ntohs(hdr->credits);
 	u32 snd_una = ntohl(hdr->snd_una);
 	int bytes = 0;
@@ -2647,13 +2779,13 @@
 static int
 do_wr_ack(struct t3cdev *dev, struct mbuf *m, void *ctx)
 {
-	struct socket *so = (struct socket *)ctx;
+	struct toepcb *toep = (struct toepcb *)ctx;
 
 	printf("do_wr_ack\n");
 	
 	VALIDATE_SOCK(so);
 
-	wr_ack(so, m);
+	wr_ack(toep, m);
 	return 0;
 }
 

==== //depot/projects/toestack/sys/dev/cxgb/ulp/tom/cxgb_defs.h#10 (text+ko) ====

@@ -58,7 +58,7 @@
 void t3_reset_synq(struct socket *listen_so);
 void t3_defer_reply(struct mbuf *m, struct toedev *dev, defer_handler_t handler);
 
-struct toepcb *toepcb_alloc(struct tcpcb *);
+struct toepcb *toepcb_alloc(void);
 void toepcb_hold(struct toepcb *);
 void toepcb_release(struct toepcb *);
 void toepcb_init(struct toepcb *);

==== //depot/projects/toestack/sys/dev/cxgb/ulp/tom/cxgb_t3_ddp.h#3 (text+ko) ====

@@ -30,6 +30,7 @@
 $FreeBSD$
 
 ***************************************************************************/
+
 #ifndef T3_DDP_H
 #define T3_DDP_H
 

==== //depot/projects/toestack/sys/dev/cxgb/ulp/tom/cxgb_tom.c#10 (text+ko) ====

@@ -133,7 +133,7 @@
 }
 
 struct toepcb *
-toepcb_alloc(struct tcpcb *tp)
+toepcb_alloc(void)
 {
 	struct toepcb *toep;
 	
@@ -142,11 +142,10 @@
 	if (toep == NULL)
 		return (NULL);
 
-	toep->tp_tp = tp;
-	tp->t_toe = toep;
-	
 	toepcb_init(toep);
 	toepcb_hold(toep);
+
+	return (toep);
 }
 
 void
@@ -168,6 +167,7 @@
 		/*
 		 * XXX clear our reference on the inpcb
 		 */
+		cxgb_remove_tid(TOM_DATA(toep->tp_toedev)->cdev, NULL, toep->tp_tid);
 		free(toep, M_DEVBUF);
 		return;
 	}

==== //depot/projects/toestack/sys/dev/cxgb/ulp/tom/cxgb_tom.h#10 (text+ko) ====

@@ -120,6 +120,7 @@
 struct listen_ctx {
 	struct socket *lso;
 	struct tom_data *tom_data;
+	int ulp_mode;
 };
 
 #define TOM_DATA(dev) (*(struct tom_data **)&(dev)->l4opt)
@@ -134,7 +135,7 @@
 #define TP_ABORT_RPL_RCVD    	(1 << 5)
 #define TP_ABORT_REQ_RCVD    	(1 << 6)
 #define TP_CLOSE_CON_REQUESTED	(1 << 7)
-
+#define TP_SYN_RCVD		(1 << 8)
 
 struct toepcb {
 	struct toedev *tp_toedev;


More information about the p4-projects mailing list