svn commit: r252555 - in stable/9: sbin/ifconfig sys/amd64/conf sys/conf sys/contrib/rdma sys/contrib/rdma/krping sys/dev/cxgb/ulp/tom sys/i386/conf sys/modules sys/modules/rdma/krping sys/modules/...

Navdeep Parhar np at FreeBSD.org
Wed Jul 3 09:25:34 UTC 2013


Author: np
Date: Wed Jul  3 09:25:29 2013
New Revision: 252555
URL: http://svnweb.freebsd.org/changeset/base/252555

Log:
  MFC/backport core kernel and userspace parts of r237263 (TCP_OFFLOAD
  rework).  MFC r237563, r239511, r243603, r245915, r245916, r245919,
  r245921, r245922, r245924, r245925, r245932, r245934 too.
  
  Build tested with make universe.
  
  r237263:
  - Updated TOE support in the kernel.
  ...
  
  r237563:
  Fix clang warning when compiling iw_cxgb.
  
  r239511:
  Correctly handle the case where an inp has already been dropped by the time
  the TOE driver reports that an active open failed.  toe_connect_failed is
  supposed to handle this but it should be provided the inpcb instead of the
  tcpcb which may no longer be around.
  
  r243603:
  Make sure that tcp_timer_activate() correctly sees TCP_OFFLOAD (or not).
  
  r245915:
  Heed SO_NO_OFFLOAD.
  
  r245916:
  Teach toe_4tuple_check() to deal with IPv6 4-tuples too.
  
  r245919:
  Add TCP_OFFLOAD hook in syncache_respond for IPv6 too, just like the one
  that exists for IPv4.
  
  r245921:
  There is no need to call into the TOE driver twice in pru_rcvd (tod_rcvd
  and then tod_output right after that).
  
  r245922:
  Avoid NULL dereference in nd6_storelladdr when no mbuf is provided.  It
  is called this way from a couple of places in the OFED code.  (toecore
  calls it too but that's going to change shortly).
  
  r245924:
  Move lle_event to if_llatbl.h
  
  lle_event replaced arp_update_event after the ARP rewrite and ended up
  in if_ether.h simply because arp_update_event used to be there too.
  IPv6 neighbor discovery is going to grow lle_event support and this is a
  good time to move it to if_llatbl.h.
  
  The two in-tree consumers of this event - OFED and toecore - are not
  affected.
  
  r245925:
  Generate lle_event in the IPv6 neighbor discovery code too.
  
  r245932:
  Teach toe_l2_resolve to resolve IPv6 destinations too.
  
  r245934:
  Add checks for SO_NO_OFFLOAD in a couple of places that I missed earlier
  in r245915.

Added:
  stable/9/sys/modules/toecore/
     - copied from r237263, head/sys/modules/toecore/
  stable/9/sys/netinet/toecore.c
     - copied, changed from r237263, head/sys/netinet/toecore.c
  stable/9/sys/netinet/toecore.h
     - copied, changed from r237263, head/sys/netinet/toecore.h
Deleted:
  stable/9/sys/netinet/toedev.h
Modified:
  stable/9/sbin/ifconfig/ifconfig.c
  stable/9/sys/amd64/conf/GENERIC
  stable/9/sys/conf/NOTES
  stable/9/sys/conf/files
  stable/9/sys/conf/options
  stable/9/sys/contrib/rdma/krping/krping.c
  stable/9/sys/contrib/rdma/krping/krping.h
  stable/9/sys/contrib/rdma/krping/krping_dev.c
  stable/9/sys/contrib/rdma/rdma_addr.c
  stable/9/sys/contrib/rdma/rdma_cache.c
  stable/9/sys/dev/cxgb/ulp/tom/cxgb_cpl_io.c
  stable/9/sys/i386/conf/GENERIC
  stable/9/sys/i386/conf/XEN
  stable/9/sys/modules/Makefile
  stable/9/sys/modules/rdma/krping/Makefile
  stable/9/sys/net/if_llatbl.h
  stable/9/sys/net/if_var.h
  stable/9/sys/net/if_vlan.c
  stable/9/sys/netinet/if_ether.c
  stable/9/sys/netinet/if_ether.h
  stable/9/sys/netinet/in.c
  stable/9/sys/netinet/tcp_input.c
  stable/9/sys/netinet/tcp_offload.c
  stable/9/sys/netinet/tcp_offload.h
  stable/9/sys/netinet/tcp_output.c
  stable/9/sys/netinet/tcp_subr.c
  stable/9/sys/netinet/tcp_syncache.c
  stable/9/sys/netinet/tcp_syncache.h
  stable/9/sys/netinet/tcp_timer.c
  stable/9/sys/netinet/tcp_usrreq.c
  stable/9/sys/netinet/tcp_var.h
  stable/9/sys/netinet6/nd6.c
  stable/9/sys/netinet6/nd6_nbr.c
  stable/9/sys/ofed/drivers/infiniband/core/cma.c
  stable/9/sys/ofed/drivers/infiniband/core/iwcm.c
  stable/9/sys/ofed/include/linux/net.h
  stable/9/sys/ofed/include/linux/workqueue.h
  stable/9/sys/ofed/include/net/netevent.h
  stable/9/sys/ofed/include/rdma/iw_cm.h
  stable/9/usr.bin/netstat/inet.c
  stable/9/usr.bin/sockstat/sockstat.c
Directory Properties:
  stable/9/sbin/ifconfig/   (props changed)
  stable/9/sys/   (props changed)
  stable/9/sys/dev/   (props changed)
  stable/9/sys/net/   (props changed)
  stable/9/usr.bin/netstat/   (props changed)
  stable/9/usr.bin/sockstat/   (props changed)

Modified: stable/9/sbin/ifconfig/ifconfig.c
==============================================================================
--- stable/9/sbin/ifconfig/ifconfig.c	Wed Jul  3 08:39:18 2013	(r252554)
+++ stable/9/sbin/ifconfig/ifconfig.c	Wed Jul  3 09:25:29 2013	(r252555)
@@ -910,7 +910,7 @@ unsetifdescr(const char *val, int value,
 #define	IFCAPBITS \
 "\020\1RXCSUM\2TXCSUM\3NETCONS\4VLAN_MTU\5VLAN_HWTAGGING\6JUMBO_MTU\7POLLING" \
 "\10VLAN_HWCSUM\11TSO4\12TSO6\13LRO\14WOL_UCAST\15WOL_MCAST\16WOL_MAGIC" \
-"\21VLAN_HWFILTER\23VLAN_HWTSO\24LINKSTATE\25NETMAP" \
+"\17TOE4\20TOE6\21VLAN_HWFILTER\23VLAN_HWTSO\24LINKSTATE\25NETMAP" \
 "\26RXCSUM_IPV6\27TXCSUM_IPV6"
 
 /*
@@ -1189,6 +1189,8 @@ static struct cmd basic_cmds[] = {
 	DEF_CMD("-tso4",	-IFCAP_TSO4,	setifcap),
 	DEF_CMD("tso",		IFCAP_TSO,	setifcap),
 	DEF_CMD("-tso",		-IFCAP_TSO,	setifcap),
+	DEF_CMD("toe",		IFCAP_TOE,	setifcap),
+	DEF_CMD("-toe",		-IFCAP_TOE,	setifcap),
 	DEF_CMD("lro",		IFCAP_LRO,	setifcap),
 	DEF_CMD("-lro",		-IFCAP_LRO,	setifcap),
 	DEF_CMD("wol",		IFCAP_WOL,	setifcap),

Modified: stable/9/sys/amd64/conf/GENERIC
==============================================================================
--- stable/9/sys/amd64/conf/GENERIC	Wed Jul  3 08:39:18 2013	(r252554)
+++ stable/9/sys/amd64/conf/GENERIC	Wed Jul  3 09:25:29 2013	(r252555)
@@ -28,6 +28,7 @@ options 	SCHED_ULE		# ULE scheduler
 options 	PREEMPTION		# Enable kernel thread preemption
 options 	INET			# InterNETworking
 options 	INET6			# IPv6 communications protocols
+options 	TCP_OFFLOAD		# TCP offload
 options 	SCTP			# Stream Control Transmission Protocol
 options 	FFS			# Berkeley Fast Filesystem
 options 	SOFTUPDATES		# Enable FFS soft updates support

Modified: stable/9/sys/conf/NOTES
==============================================================================
--- stable/9/sys/conf/NOTES	Wed Jul  3 08:39:18 2013	(r252554)
+++ stable/9/sys/conf/NOTES	Wed Jul  3 09:25:29 2013	(r252555)
@@ -545,6 +545,8 @@ options 	INET6			#IPv6 communications pr
 
 options 	ROUTETABLES=2		# max 16. 1 is back compatible.
 
+options 	TCP_OFFLOAD		# TCP offload support.
+
 # In order to enable IPSEC you MUST also add device crypto to 
 # your kernel configuration
 options 	IPSEC			#IP security (requires device crypto)

Modified: stable/9/sys/conf/files
==============================================================================
--- stable/9/sys/conf/files	Wed Jul  3 08:39:18 2013	(r252554)
+++ stable/9/sys/conf/files	Wed Jul  3 09:25:29 2013	(r252555)
@@ -2886,7 +2886,7 @@ netinet/tcp_hostcache.c		optional inet |
 netinet/tcp_input.c		optional inet | inet6
 netinet/tcp_lro.c		optional inet | inet6
 netinet/tcp_output.c		optional inet | inet6
-netinet/tcp_offload.c		optional inet | inet6
+netinet/tcp_offload.c		optional tcp_offload inet | tcp_offload inet6
 netinet/tcp_reass.c		optional inet | inet6
 netinet/tcp_sack.c		optional inet | inet6
 netinet/tcp_subr.c		optional inet | inet6

Modified: stable/9/sys/conf/options
==============================================================================
--- stable/9/sys/conf/options	Wed Jul  3 08:39:18 2013	(r252554)
+++ stable/9/sys/conf/options	Wed Jul  3 09:25:29 2013	(r252555)
@@ -431,7 +431,7 @@ RADIX_MPATH		opt_mpath.h
 ROUTETABLES		opt_route.h
 SLIP_IFF_OPTS		opt_slip.h
 TCPDEBUG
-TCP_OFFLOAD_DISABLE	opt_inet.h #Disable code to dispatch tcp offloading
+TCP_OFFLOAD		opt_inet.h # Enable code to dispatch TCP offloading
 TCP_SIGNATURE		opt_inet.h
 VLAN_ARRAY		opt_vlan.h
 XBONEHACK

Modified: stable/9/sys/contrib/rdma/krping/krping.c
==============================================================================
--- stable/9/sys/contrib/rdma/krping/krping.c	Wed Jul  3 08:39:18 2013	(r252554)
+++ stable/9/sys/contrib/rdma/krping/krping.c	Wed Jul  3 09:25:29 2013	(r252555)
@@ -41,7 +41,6 @@ __FBSDID("$FreeBSD$");
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/socket.h>
-#include <sys/module.h>
 #include <sys/endian.h>
 #include <sys/limits.h>
 #include <sys/proc.h>
@@ -53,11 +52,13 @@ __FBSDID("$FreeBSD$");
 #include <sys/queue.h>
 #include <sys/taskqueue.h>
 #include <sys/syslog.h>
+#include <netinet/in.h>
 
 #include <vm/vm.h>
 #include <vm/pmap.h>
 
-#include <contrib/rdma/rdma_cm.h>
+#include <linux/types.h>
+#include <rdma/rdma_cm.h>
 
 #include "getopt.h"
 #include "krping.h"
@@ -83,6 +84,7 @@ static const struct krping_option krping
 	{"bw", OPT_NOPARAM, 'B'},
 	{"tx-depth", OPT_INT, 't'},
   	{"poll", OPT_NOPARAM, 'P'},
+  	{"memlimit", OPT_INT, 'm'},
 	{NULL, 0, 0}
 };
 
@@ -254,10 +256,14 @@ static void krping_cq_event_handler(stru
 		ib_req_notify_cq(cb->cq, IB_CQ_NEXT_COMP);
 	while ((ret = ib_poll_cq(cb->cq, 1, &wc)) == 1) {
 		if (wc.status) {
-			if (wc.status != IB_WC_WR_FLUSH_ERR)
-				log(LOG_ERR, "cq completion failed status %d\n",
+			if (wc.status == IB_WC_WR_FLUSH_ERR) {
+				DEBUG_LOG("cq flushed\n");
+				continue;
+			} else {
+				log(LOG_CRIT, "cq completion failed status %d\n",
 					wc.status);
-			goto error;
+				goto error;
+			}
 		}
 
 		switch (wc.opcode) {
@@ -432,8 +438,17 @@ static int krping_setup_buffers(struct k
 		}
 	}
 
-	cb->rdma_buf = contigmalloc(cb->size, M_DEVBUF, M_WAITOK, 0, -1UL,
-		PAGE_SIZE, 0);
+	/* RNIC adapters have a limit upto which it can register physical memory
+	 * If DMA-MR memory mode is set then normally driver registers maximum
+	 * supported memory. After that if contigmalloc allocates memory beyond the
+	 * specified RNIC limit then Krping may not work.
+	 */
+	if (cb->use_dmamr && cb->memlimit)
+		cb->rdma_buf = contigmalloc(cb->size, M_DEVBUF, M_WAITOK, 0, cb->memlimit,
+					    PAGE_SIZE, 0);
+	else 
+		cb->rdma_buf = contigmalloc(cb->size, M_DEVBUF, M_WAITOK, 0, -1UL,
+					    PAGE_SIZE, 0);
 
 	if (!cb->rdma_buf) {
 		log(LOG_ERR, "rdma_buf malloc failed\n");
@@ -458,8 +473,12 @@ static int krping_setup_buffers(struct k
 	}
 
 	if (!cb->server || cb->wlat || cb->rlat || cb->bw) {
-		cb->start_buf = contigmalloc(cb->size, M_DEVBUF, M_WAITOK,
-			0, -1UL, PAGE_SIZE, 0);
+		if (cb->use_dmamr && cb->memlimit)
+			cb->start_buf = contigmalloc(cb->size, M_DEVBUF, M_WAITOK,
+						     0, cb->memlimit, PAGE_SIZE, 0);
+		else
+			cb->start_buf = contigmalloc(cb->size, M_DEVBUF, M_WAITOK,
+						     0, -1UL, PAGE_SIZE, 0);
 		if (!cb->start_buf) {
 			log(LOG_ERR, "start_buf malloc failed\n");
 			ret = ENOMEM;
@@ -1636,6 +1655,8 @@ int krping_doit(char *cmd)
 	cb->state = IDLE;
 	cb->size = 64;
 	cb->txdepth = RPING_SQ_DEPTH;
+	cb->use_dmamr = 1;
+	cb->memlimit = 0;
 	mtx_init(&cb->lock, "krping mtx", NULL, MTX_DUPOK|MTX_DEF);
 
 	while ((op = krping_getopt("krping", &cmd, krping_opts, NULL, &optarg,
@@ -1713,6 +1734,15 @@ int krping_doit(char *cmd)
 		case 'd':
 			debug++;
 			break;
+		case 'm':
+                        cb->memlimit = optint;
+                        if (cb->memlimit < 1) {
+                                log(LOG_ERR, "Invalid memory limit %ju\n",
+				    cb->memlimit);
+                                ret = EINVAL;
+                        } else
+                                DEBUG_LOG(PFX "memory limit %d\n", (int)optint);
+                        break;
 		default:
 			log(LOG_ERR, "unknown opt %s\n", optarg);
 			ret = EINVAL;

Modified: stable/9/sys/contrib/rdma/krping/krping.h
==============================================================================
--- stable/9/sys/contrib/rdma/krping/krping.h	Wed Jul  3 08:39:18 2013	(r252554)
+++ stable/9/sys/contrib/rdma/krping/krping.h	Wed Jul  3 09:25:29 2013	(r252555)
@@ -1,7 +1,7 @@
 /*
  * $FreeBSD$
  */
-#include <contrib/rdma/ib_verbs.h>
+#include <rdma/ib_verbs.h>
 #include <netinet/in.h>
 
 /*
@@ -92,6 +92,8 @@ struct krping_cb {
 	int count;			/* ping count */
 	int size;			/* ping data size */
 	int validate;			/* validate ping data */
+	uint64_t memlimit;		/* limit of the physical memory that
+					   can be registered with dma_mr mode */
 
 	/* CM stuff */
 	struct rdma_cm_id *cm_id;	/* connection on client side,*/

Modified: stable/9/sys/contrib/rdma/krping/krping_dev.c
==============================================================================
--- stable/9/sys/contrib/rdma/krping/krping_dev.c	Wed Jul  3 08:39:18 2013	(r252554)
+++ stable/9/sys/contrib/rdma/krping/krping_dev.c	Wed Jul  3 09:25:29 2013	(r252555)
@@ -14,7 +14,6 @@
 __FBSDID("$FreeBSD$");
 
 #include <sys/types.h>
-#include <sys/module.h>
 #include <sys/systm.h>  /* uprintf */
 #include <sys/errno.h>
 #include <sys/param.h>  /* defines used in kernel.h */
@@ -51,6 +50,9 @@ typedef struct s_krping {
 /* vars */
 static struct cdev *krping_dev;
 
+#undef MODULE_VERSION
+#include <sys/module.h>
+
 static int
 krping_loader(struct module *m, int what, void *arg)
 {
@@ -175,6 +177,4 @@ krping_write(struct cdev *dev, struct ui
 	return(err);
 }
 
-MODULE_DEPEND(krping, rdma_core, 1, 1, 1);
-MODULE_DEPEND(krping, rdma_cma, 1, 1, 1);
 DEV_MODULE(krping,krping_loader,NULL);

Modified: stable/9/sys/contrib/rdma/rdma_addr.c
==============================================================================
--- stable/9/sys/contrib/rdma/rdma_addr.c	Wed Jul  3 08:39:18 2013	(r252554)
+++ stable/9/sys/contrib/rdma/rdma_addr.c	Wed Jul  3 09:25:29 2013	(r252555)
@@ -117,7 +117,8 @@ int rdma_copy_addr(struct rdma_dev_addr 
 		     const unsigned char *dst_dev_addr)
 {
 	dev_addr->dev_type = RDMA_NODE_RNIC;
-	memcpy(dev_addr->src_dev_addr, IF_LLADDR(dev), MAX_ADDR_LEN);
+	memset(dev_addr->src_dev_addr, 0, MAX_ADDR_LEN);
+	memcpy(dev_addr->src_dev_addr, IF_LLADDR(dev), dev->if_addrlen);
 	memcpy(dev_addr->broadcast, dev->if_broadcastaddr, MAX_ADDR_LEN);
 	if (dst_dev_addr)
 		memcpy(dev_addr->dst_dev_addr, dst_dev_addr, MAX_ADDR_LEN);
@@ -207,7 +208,7 @@ static int addr_resolve_remote(struct so
 		goto put;
 	}
  	ret = arpresolve(iproute.ro_rt->rt_ifp, iproute.ro_rt, NULL, 
-		rt_key(iproute.ro_rt), dmac, &lle);
+		(struct sockaddr *)dst_in, dmac, &lle);
 	if (ret) {
 		goto put;
 	}

Modified: stable/9/sys/contrib/rdma/rdma_cache.c
==============================================================================
--- stable/9/sys/contrib/rdma/rdma_cache.c	Wed Jul  3 08:39:18 2013	(r252554)
+++ stable/9/sys/contrib/rdma/rdma_cache.c	Wed Jul  3 09:25:29 2013	(r252555)
@@ -132,7 +132,7 @@ int ib_find_cached_gid(struct ib_device 
 	for (p = 0; p <= end_port(device) - start_port(device); ++p) {
 		cache = device->cache.gid_cache[p];
 		for (i = 0; i < cache->table_len; ++i) {
-			if (!memcmp(gid, &cache->table[i], 6)) { /* XXX */
+			if (!memcmp(gid, &cache->table[i], sizeof *gid)) {
 				*port_num = p + start_port(device);
 				if (index)
 					*index = i;

Modified: stable/9/sys/dev/cxgb/ulp/tom/cxgb_cpl_io.c
==============================================================================
--- stable/9/sys/dev/cxgb/ulp/tom/cxgb_cpl_io.c	Wed Jul  3 08:39:18 2013	(r252554)
+++ stable/9/sys/dev/cxgb/ulp/tom/cxgb_cpl_io.c	Wed Jul  3 09:25:29 2013	(r252555)
@@ -880,10 +880,10 @@ act_open_rpl_status_to_errno(int status)
 	case CPL_ERR_CONN_TIMEDOUT:
 		return (ETIMEDOUT);
 	case CPL_ERR_TCAM_FULL:
-		return (ENOMEM);
+		return (EAGAIN);
 	case CPL_ERR_CONN_EXIST:
 		log(LOG_ERR, "ACTIVE_OPEN_RPL: 4-tuple in use\n");
-		return (EADDRINUSE);
+		return (EAGAIN);
 	default:
 		return (EIO);
 	}
@@ -912,8 +912,7 @@ do_act_open_rpl(struct sge_qset *qs, str
 	unsigned int atid = G_TID(ntohl(rpl->atid));
 	struct toepcb *toep = lookup_atid(&td->tid_maps, atid);
 	struct inpcb *inp = toep->tp_inp;
-	struct tcpcb *tp = intotcpcb(inp);
-	int s = rpl->status;
+	int s = rpl->status, rc;
 
 	CTR3(KTR_CXGB, "%s: atid %u, status %u ", __func__, atid, s);
 
@@ -923,17 +922,14 @@ do_act_open_rpl(struct sge_qset *qs, str
 	if (act_open_has_tid(s))
 		queue_tid_release(tod, GET_TID(rpl));
 
-	if (s == CPL_ERR_TCAM_FULL || s == CPL_ERR_CONN_EXIST) {
-		INP_WLOCK(inp);
-		toe_connect_failed(tod, tp, EAGAIN);
-		toepcb_release(toep);	/* unlocks inp */
-	} else {
+	rc = act_open_rpl_status_to_errno(s);
+	if (rc != EAGAIN)
 		INP_INFO_WLOCK(&V_tcbinfo);
-		INP_WLOCK(inp);
-		toe_connect_failed(tod, tp, act_open_rpl_status_to_errno(s));
-		toepcb_release(toep);	/* unlocks inp */
+	INP_WLOCK(inp);
+	toe_connect_failed(tod, inp, rc);
+	toepcb_release(toep);	/* unlocks inp */
+	if (rc != EAGAIN)
 		INP_INFO_WUNLOCK(&V_tcbinfo);
-	}
 
 	m_freem(m);
 	return (0);

Modified: stable/9/sys/i386/conf/GENERIC
==============================================================================
--- stable/9/sys/i386/conf/GENERIC	Wed Jul  3 08:39:18 2013	(r252554)
+++ stable/9/sys/i386/conf/GENERIC	Wed Jul  3 09:25:29 2013	(r252555)
@@ -30,6 +30,7 @@ options 	SCHED_ULE		# ULE scheduler
 options 	PREEMPTION		# Enable kernel thread preemption
 options 	INET			# InterNETworking
 options 	INET6			# IPv6 communications protocols
+options 	TCP_OFFLOAD		# TCP offload
 options 	SCTP			# Stream Control Transmission Protocol
 options 	FFS			# Berkeley Fast Filesystem
 options 	SOFTUPDATES		# Enable FFS soft updates support

Modified: stable/9/sys/i386/conf/XEN
==============================================================================
--- stable/9/sys/i386/conf/XEN	Wed Jul  3 08:39:18 2013	(r252554)
+++ stable/9/sys/i386/conf/XEN	Wed Jul  3 09:25:29 2013	(r252555)
@@ -7,7 +7,7 @@ cpu		I686_CPU
 ident		XEN
 
 makeoptions	DEBUG=-g		# Build kernel with gdb(1) debug symbols
-makeoptions	WITHOUT_MODULES="aha ahb amd cxgb dpt drm drm2 hptmv ida malo mps mwl nve sound sym trm xfs"
+makeoptions	WITHOUT_MODULES="aha ahb amd cxgb dpt drm drm2 hptmv ida malo mps mwl nve rdma sound sym trm xfs"
 
 options 	SCHED_ULE		# ULE scheduler
 options 	PREEMPTION		# Enable kernel thread preemption

Modified: stable/9/sys/modules/Makefile
==============================================================================
--- stable/9/sys/modules/Makefile	Wed Jul  3 08:39:18 2013	(r252554)
+++ stable/9/sys/modules/Makefile	Wed Jul  3 09:25:29 2013	(r252555)
@@ -313,6 +313,7 @@ SUBDIR=	\
 	${_ti} \
 	tl \
 	tmpfs \
+	${_toecore} \
 	${_tpm} \
 	trm \
 	${_twa} \
@@ -395,6 +396,7 @@ _if_gre=	if_gre
 .if (${MK_INET_SUPPORT} != "no" || ${MK_INET6_SUPPORT} != "no") || \
 	defined(ALL_MODULES)
 _if_carp=	if_carp
+_toecore=	toecore
 .endif
 
 .if ${MK_IPFILTER} != "no" || defined(ALL_MODULES)

Modified: stable/9/sys/modules/rdma/krping/Makefile
==============================================================================
--- stable/9/sys/modules/rdma/krping/Makefile	Wed Jul  3 08:39:18 2013	(r252554)
+++ stable/9/sys/modules/rdma/krping/Makefile	Wed Jul  3 09:25:29 2013	(r252555)
@@ -6,5 +6,7 @@ RDMA= ${.CURDIR}/../../../contrib/rdma/k
 KMOD= krping
 SRCS= krping.c krping_dev.c getopt.c
 SRCS+=  bus_if.h device_if.h opt_sched.h pci_if.h pcib_if.h
+SRCS+=  vnode_if.h
+CFLAGS+= -I${.CURDIR}/../../../ofed/include 
 
 .include <bsd.kmod.mk>

Modified: stable/9/sys/net/if_llatbl.h
==============================================================================
--- stable/9/sys/net/if_llatbl.h	Wed Jul  3 08:39:18 2013	(r252554)
+++ stable/9/sys/net/if_llatbl.h	Wed Jul  3 09:25:29 2013	(r252555)
@@ -205,4 +205,14 @@ lla_lookup(struct lltable *llt, u_int fl
 }
 
 int		lla_rt_output(struct rt_msghdr *, struct rt_addrinfo *);
+
+#include <sys/eventhandler.h>
+enum {
+	LLENTRY_RESOLVED,
+	LLENTRY_TIMEDOUT,
+	LLENTRY_DELETED,
+	LLENTRY_EXPIRED,
+};
+typedef void (*lle_event_fn)(void *, struct llentry *, int);
+EVENTHANDLER_DECLARE(lle_event, lle_event_fn);
 #endif  /* _NET_IF_LLATBL_H_ */

Modified: stable/9/sys/net/if_var.h
==============================================================================
--- stable/9/sys/net/if_var.h	Wed Jul  3 08:39:18 2013	(r252554)
+++ stable/9/sys/net/if_var.h	Wed Jul  3 09:25:29 2013	(r252555)
@@ -755,6 +755,8 @@ drbr_inuse(struct ifnet *ifp, struct buf
 #define	IF_MINMTU	72
 #define	IF_MAXMTU	65535
 
+#define	TOEDEV(ifp)	((ifp)->if_llsoftc)
+
 #endif /* _KERNEL */
 
 /*

Modified: stable/9/sys/net/if_vlan.c
==============================================================================
--- stable/9/sys/net/if_vlan.c	Wed Jul  3 08:39:18 2013	(r252554)
+++ stable/9/sys/net/if_vlan.c	Wed Jul  3 09:25:29 2013	(r252555)
@@ -748,8 +748,8 @@ vlan_modevent(module_t mod, int type, vo
 		vlan_trunk_cap_p = NULL;
 		vlan_trunkdev_p = NULL;
 		vlan_tag_p = NULL;
-		vlan_cookie_p = vlan_cookie;
-		vlan_setcookie_p = vlan_setcookie;
+		vlan_cookie_p = NULL;
+		vlan_setcookie_p = NULL;
 		vlan_devat_p = NULL;
 		VLAN_LOCK_DESTROY();
 		if (bootverbose)
@@ -1510,6 +1510,22 @@ vlan_capabilities(struct ifvlan *ifv)
 		ifp->if_capenable &= ~(p->if_capenable & IFCAP_TSO);
 		ifp->if_hwassist &= ~(p->if_hwassist & CSUM_TSO);
 	}
+
+	/*
+	 * If the parent interface can offload TCP connections over VLANs then
+	 * propagate its TOE capability to the VLAN interface.
+	 *
+	 * All TOE drivers in the tree today can deal with VLANs.  If this
+	 * changes then IFCAP_VLAN_TOE should be promoted to a full capability
+	 * with its own bit.
+	 */
+#define	IFCAP_VLAN_TOE IFCAP_TOE
+	if (p->if_capabilities & IFCAP_VLAN_TOE)
+		ifp->if_capabilities |= p->if_capabilities & IFCAP_TOE;
+	if (p->if_capenable & IFCAP_VLAN_TOE) {
+		TOEDEV(ifp) = TOEDEV(p);
+		ifp->if_capenable |= p->if_capenable & IFCAP_TOE;
+	}
 }
 
 static void

Modified: stable/9/sys/netinet/if_ether.c
==============================================================================
--- stable/9/sys/netinet/if_ether.c	Wed Jul  3 08:39:18 2013	(r252554)
+++ stable/9/sys/netinet/if_ether.c	Wed Jul  3 09:25:29 2013	(r252555)
@@ -178,6 +178,16 @@ arptimer(void *arg)
 	ifp = lle->lle_tbl->llt_ifp;
 	CURVNET_SET(ifp->if_vnet);
 
+	if ((lle->la_flags & LLE_DELETED) == 0) {
+		int evt;
+
+		if (lle->la_flags & LLE_VALID)
+			evt = LLENTRY_EXPIRED;
+		else
+			evt = LLENTRY_TIMEDOUT;
+		EVENTHANDLER_INVOKE(lle_event, lle, evt);
+	}
+
 	callout_stop(&lle->la_timer);
 
 	/* XXX: LOR avoidance. We still have ref on lle. */
@@ -730,7 +740,7 @@ match:
 		(void)memcpy(&la->ll_addr, ar_sha(ah), ifp->if_addrlen);
 		la->la_flags |= LLE_VALID;
 
-		EVENTHANDLER_INVOKE(arp_update_event, la);
+		EVENTHANDLER_INVOKE(lle_event, la, LLENTRY_RESOLVED);
 
 		if (!(la->la_flags & LLE_STATIC)) {
 			int canceled;

Modified: stable/9/sys/netinet/if_ether.h
==============================================================================
--- stable/9/sys/netinet/if_ether.h	Wed Jul  3 08:39:18 2013	(r252554)
+++ stable/9/sys/netinet/if_ether.h	Wed Jul  3 09:25:29 2013	(r252555)
@@ -117,11 +117,6 @@ int	arpresolve(struct ifnet *ifp, struct
 		    struct llentry **lle);
 void	arp_ifinit(struct ifnet *, struct ifaddr *);
 void	arp_ifinit2(struct ifnet *, struct ifaddr *, u_char *);
-
-#include <sys/eventhandler.h>
-typedef void (*llevent_arp_update_fn)(void *, struct llentry *);
-EVENTHANDLER_DECLARE(arp_update_event, llevent_arp_update_fn);
-
 #endif
 
 #endif

Modified: stable/9/sys/netinet/in.c
==============================================================================
--- stable/9/sys/netinet/in.c	Wed Jul  3 08:39:18 2013	(r252554)
+++ stable/9/sys/netinet/in.c	Wed Jul  3 09:25:29 2013	(r252555)
@@ -1527,7 +1527,7 @@ in_lltable_lookup(struct lltable *llt, u
 		if (!(lle->la_flags & LLE_IFADDR) || (flags & LLE_IFADDR)) {
 			LLE_WLOCK(lle);
 			lle->la_flags |= LLE_DELETED;
-			EVENTHANDLER_INVOKE(arp_update_event, lle);
+			EVENTHANDLER_INVOKE(lle_event, lle, LLENTRY_DELETED);
 #ifdef DIAGNOSTIC
 			log(LOG_INFO, "ifaddr cache = %p is deleted\n", lle);
 #endif

Modified: stable/9/sys/netinet/tcp_input.c
==============================================================================
--- stable/9/sys/netinet/tcp_input.c	Wed Jul  3 08:39:18 2013	(r252554)
+++ stable/9/sys/netinet/tcp_input.c	Wed Jul  3 09:25:29 2013	(r252555)
@@ -105,6 +105,9 @@ __FBSDID("$FreeBSD$");
 #ifdef TCPDEBUG
 #include <netinet/tcp_debug.h>
 #endif /* TCPDEBUG */
+#ifdef TCP_OFFLOAD
+#include <netinet/tcp_offload.h>
+#endif
 
 #ifdef IPSEC
 #include <netipsec/ipsec.h>
@@ -995,6 +998,14 @@ relocked:
 		goto dropwithreset;
 	}
 
+#ifdef TCP_OFFLOAD
+	if (tp->t_flags & TF_TOE) {
+		tcp_offload_input(tp, m);
+		m = NULL;	/* consumed by the TOE driver */
+		goto dropunlock;
+	}
+#endif
+
 	/*
 	 * We've identified a valid inpcb, but it could be that we need an
 	 * inpcbinfo write lock but don't hold it.  In this case, attempt to

Modified: stable/9/sys/netinet/tcp_offload.c
==============================================================================
--- stable/9/sys/netinet/tcp_offload.c	Wed Jul  3 08:39:18 2013	(r252554)
+++ stable/9/sys/netinet/tcp_offload.c	Wed Jul  3 09:25:29 2013	(r252555)
@@ -1,145 +1,176 @@
 /*-
- * Copyright (c) 2007, Chelsio Inc.
+ * Copyright (c) 2012 Chelsio Communications, Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
  *
- * 1. Redistributions of source code must retain the above copyright notice,
- *    this list of conditions and the following disclaimer.
- *
- * 2. Neither the name of the Chelsio Corporation nor the names of its
- *    contributors may be used to endorse or promote products derived from
- *    this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
+#include "opt_inet.h"
+
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/types.h>
-#include <sys/malloc.h>
-#include <sys/kernel.h>
-#include <sys/sysctl.h>
 #include <sys/mbuf.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
-
+#include <sys/sockopt.h>
 #include <net/if.h>
-#include <net/if_types.h>
-#include <net/if_var.h>
 #include <net/route.h>
-#include <net/vnet.h>
-
 #include <netinet/in.h>
-#include <netinet/in_systm.h>
 #include <netinet/in_pcb.h>
 #include <netinet/tcp.h>
 #include <netinet/tcp_var.h>
 #include <netinet/tcp_offload.h>
-#include <netinet/toedev.h>
+#define	TCPOUTFLAGS
+#include <netinet/tcp_fsm.h>
+#include <netinet/toecore.h>
 
-uint32_t toedev_registration_count;
+int registered_toedevs;
 
+/*
+ * Provide an opportunity for a TOE driver to offload.
+ */
 int
 tcp_offload_connect(struct socket *so, struct sockaddr *nam)
 {
 	struct ifnet *ifp;
-	struct toedev *tdev;
+	struct toedev *tod;
 	struct rtentry *rt;
-	int error;
+	int error = EOPNOTSUPP;
+
+	INP_WLOCK_ASSERT(sotoinpcb(so));
+	KASSERT(nam->sa_family == AF_INET || nam->sa_family == AF_INET6,
+	    ("%s: called with sa_family %d", __func__, nam->sa_family));
 
-	if (toedev_registration_count == 0)
-		return (EINVAL);
-	
-	/*
-	 * Look up the route used for the connection to 
-	 * determine if it uses an interface capable of
-	 * offloading the connection.
-	 */
-	rt = rtalloc1(nam, 0 /*report*/, 0 /*ignflags*/);
-	if (rt) 
+	if (registered_toedevs == 0)
+		return (error);
+
+	rt = rtalloc1(nam, 0, 0);
+	if (rt)
 		RT_UNLOCK(rt);
-	else 
+	else
 		return (EHOSTUNREACH);
 
 	ifp = rt->rt_ifp;
-	if ((ifp->if_capenable & IFCAP_TOE) == 0) {
-		error = EINVAL;
-		goto fail;
-	}
-	
-	tdev = TOEDEV(ifp);
-	if (tdev == NULL) {
-		error = EPERM;
-		goto fail;
-	}
-	
-	if (tdev->tod_can_offload(tdev, so) == 0) {
-		error = EPERM;
-		goto fail;
-	}
-	
-	return (tdev->tod_connect(tdev, so, rt, nam));
-fail:
+
+	if (nam->sa_family == AF_INET && !(ifp->if_capenable & IFCAP_TOE4))
+		goto done;
+	if (nam->sa_family == AF_INET6 && !(ifp->if_capenable & IFCAP_TOE6))
+		goto done;
+
+	tod = TOEDEV(ifp);
+	if (tod != NULL)
+		error = tod->tod_connect(tod, so, rt, nam);
+done:
 	RTFREE(rt);
 	return (error);
 }
 
+void
+tcp_offload_listen_start(struct tcpcb *tp)
+{
 
-/*
- * This file contains code as a short-term staging area before it is moved in 
- * to sys/netinet/tcp_offload.c
- */
+	INP_WLOCK_ASSERT(tp->t_inpcb);
+
+	EVENTHANDLER_INVOKE(tcp_offload_listen_start, tp);
+}
 
 void
-tcp_offload_twstart(struct tcpcb *tp)
+tcp_offload_listen_stop(struct tcpcb *tp)
 {
 
-	INP_INFO_WLOCK(&V_tcbinfo);
-	INP_WLOCK(tp->t_inpcb);
-	tcp_twstart(tp);
-	INP_INFO_WUNLOCK(&V_tcbinfo);
+	INP_WLOCK_ASSERT(tp->t_inpcb);
+
+	EVENTHANDLER_INVOKE(tcp_offload_listen_stop, tp);
 }
 
-struct tcpcb *
-tcp_offload_close(struct tcpcb *tp)
+void
+tcp_offload_input(struct tcpcb *tp, struct mbuf *m)
 {
+	struct toedev *tod = tp->tod;
 
-	INP_INFO_WLOCK(&V_tcbinfo);
-	INP_WLOCK(tp->t_inpcb);
-	tp = tcp_close(tp);
-	INP_INFO_WUNLOCK(&V_tcbinfo);
-	if (tp)
-		INP_WUNLOCK(tp->t_inpcb);
+	KASSERT(tod != NULL, ("%s: tp->tod is NULL, tp %p", __func__, tp));
+	INP_WLOCK_ASSERT(tp->t_inpcb);
 
-	return (tp);
+	tod->tod_input(tod, tp, m);
 }
 
-struct tcpcb *
-tcp_offload_drop(struct tcpcb *tp, int error)
+int
+tcp_offload_output(struct tcpcb *tp)
 {
+	struct toedev *tod = tp->tod;
+	int error, flags;
+
+	KASSERT(tod != NULL, ("%s: tp->tod is NULL, tp %p", __func__, tp));
+	INP_WLOCK_ASSERT(tp->t_inpcb);
 
-	INP_INFO_WLOCK(&V_tcbinfo);
-	INP_WLOCK(tp->t_inpcb);
-	tp = tcp_drop(tp, error);
-	INP_INFO_WUNLOCK(&V_tcbinfo);
-	if (tp)
-		INP_WUNLOCK(tp->t_inpcb);
+	flags = tcp_outflags[tp->t_state];
 
-	return (tp);
+	if (flags & TH_RST) {
+		/* XXX: avoid repeated calls like we do for FIN */
+		error = tod->tod_send_rst(tod, tp);
+	} else if ((flags & TH_FIN || tp->t_flags & TF_NEEDFIN) &&
+	    (tp->t_flags & TF_SENTFIN) == 0) {
+		error = tod->tod_send_fin(tod, tp);
+		if (error == 0)
+			tp->t_flags |= TF_SENTFIN;
+	} else
+		error = tod->tod_output(tod, tp);
+
+	return (error);
+}
+
+void
+tcp_offload_rcvd(struct tcpcb *tp)
+{
+	struct toedev *tod = tp->tod;
+
+	KASSERT(tod != NULL, ("%s: tp->tod is NULL, tp %p", __func__, tp));
+	INP_WLOCK_ASSERT(tp->t_inpcb);
+
+	tod->tod_rcvd(tod, tp);
 }
 
+void
+tcp_offload_ctloutput(struct tcpcb *tp, int sopt_dir, int sopt_name)
+{
+	struct toedev *tod = tp->tod;
+
+	KASSERT(tod != NULL, ("%s: tp->tod is NULL, tp %p", __func__, tp));
+	INP_WLOCK_ASSERT(tp->t_inpcb);
+
+	tod->tod_ctloutput(tod, tp, sopt_dir, sopt_name);
+}
+
+void
+tcp_offload_detach(struct tcpcb *tp)
+{
+	struct toedev *tod = tp->tod;
+
+	KASSERT(tod != NULL, ("%s: tp->tod is NULL, tp %p", __func__, tp));
+	INP_WLOCK_ASSERT(tp->t_inpcb);
+
+	tod->tod_pcb_detach(tod, tp);
+}

Modified: stable/9/sys/netinet/tcp_offload.h
==============================================================================
--- stable/9/sys/netinet/tcp_offload.h	Wed Jul  3 08:39:18 2013	(r252554)
+++ stable/9/sys/netinet/tcp_offload.h	Wed Jul  3 09:25:29 2013	(r252555)
@@ -1,30 +1,30 @@
 /*-
- * Copyright (c) 2007, Chelsio Inc.
+ * Copyright (c) 2012 Chelsio Communications, Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
  *
- * 1. Redistributions of source code must retain the above copyright notice,
- *    this list of conditions and the following disclaimer.
- *
- * 2. Neither the name of the Chelsio Corporation nor the names of its
- *    contributors may be used to endorse or promote products derived from
- *    this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
  *
  * $FreeBSD$
+ *
  */
 
 #ifndef _NETINET_TCP_OFFLOAD_H_
@@ -34,321 +34,15 @@
 #error "no user-serviceable parts inside"
 #endif
 
-/*
- * A driver publishes that it provides offload services
- * by setting IFCAP_TOE in the ifnet. The offload connect
- * will bypass any further work if the interface that a
- * connection would use does not support TCP offload.
- *
- * The TOE API assumes that the tcp offload engine can offload the 
- * the entire connection from set up to teardown, with some provision 
- * being made to allowing the software stack to handle time wait. If
- * the device does not meet these criteria, it is the driver's responsibility
- * to overload the functions that it needs to in tcp_usrreqs and make
- * its own calls to tcp_output if it needs to do so.
- *
- * There is currently no provision for the device advertising the congestion
- * control algorithms it supports as there is currently no API for querying 
- * an operating system for the protocols that it has loaded. This is a desirable
- * future extension.
- *
- *
- *
- * It is assumed that individuals deploying TOE will want connections
- * to be offloaded without software changes so all connections on an
- * interface providing TOE are offloaded unless the SO_NO_OFFLOAD 
- * flag is set on the socket.
- *
- *
- * The toe_usrreqs structure constitutes the TOE driver's 
- * interface to the TCP stack for functionality that doesn't
- * interact directly with userspace. If one wants to provide
- * (optional) functionality to do zero-copy to/from
- * userspace one still needs to override soreceive/sosend 
- * with functions that fault in and pin the user buffers.
- *
- * + tu_send
- *   - tells the driver that new data may have been added to the 
- *     socket's send buffer - the driver should not fail if the
- *     buffer is in fact unchanged
- *   - the driver is responsible for providing credits (bytes in the send window)
- *     back to the socket by calling sbdrop() as segments are acknowledged.
- *   - The driver expects the inpcb lock to be held - the driver is expected
- *     not to drop the lock. Hence the driver is not allowed to acquire the
- *     pcbinfo lock during this call.
- *
- * + tu_rcvd
- *   - returns credits to the driver and triggers window updates
- *     to the peer (a credit as used here is a byte in the peer's receive window)
- *   - the driver is expected to determine how many bytes have been 
- *     consumed and credit that back to the card so that it can grow
- *     the window again by maintaining its own state between invocations.
- *   - In principle this could be used to shrink the window as well as
- *     grow the window, although it is not used for that now.
- *   - this function needs to correctly handle being called any number of
- *     times without any bytes being consumed from the receive buffer.
- *   - The driver expects the inpcb lock to be held - the driver is expected
- *     not to drop the lock. Hence the driver is not allowed to acquire the
- *     pcbinfo lock during this call.
- *
- * + tu_disconnect
- *   - tells the driver to send FIN to peer
- *   - driver is expected to send the remaining data and then do a clean half close
- *   - disconnect implies at least half-close so only send, reset, and detach
- *     are legal
- *   - the driver is expected to handle transition through the shutdown
- *     state machine and allow the stack to support SO_LINGER.
- *   - The driver expects the inpcb lock to be held - the driver is expected
- *     not to drop the lock. Hence the driver is not allowed to acquire the
- *     pcbinfo lock during this call.
- *
- * + tu_reset
- *   - closes the connection and sends a RST to peer
- *   - driver is expectd to trigger an RST and detach the toepcb
- *   - no further calls are legal after reset
- *   - The driver expects the inpcb lock to be held - the driver is expected
- *     not to drop the lock. Hence the driver is not allowed to acquire the
- *     pcbinfo lock during this call.
- *
- *   The following fields in the tcpcb are expected to be referenced by the driver:
- *	+ iss
- *	+ rcv_nxt
- *	+ rcv_wnd
- *	+ snd_isn
- *	+ snd_max
- *	+ snd_nxt
- *	+ snd_una
- *	+ t_flags
- *	+ t_inpcb
- *	+ t_maxseg
- *	+ t_toe
- *
- *   The following fields in the inpcb are expected to be referenced by the driver:
- *	+ inp_lport
- *	+ inp_fport
- *	+ inp_laddr
- *	+ inp_fport
- *	+ inp_socket
- *	+ inp_ip_tos
- *
- *   The following fields in the socket are expected to be referenced by the
- *   driver:
- *	+ so_comp
- *	+ so_error
- *	+ so_linger
- *	+ so_options
- *	+ so_rcv
- *	+ so_snd
- *	+ so_state
- *	+ so_timeo
- *
- *   These functions all return 0 on success and can return the following errors
- *   as appropriate:
- *	+ EPERM:
- *	+ ENOBUFS: memory allocation failed
- *	+ EMSGSIZE: MTU changed during the call
- *	+ EHOSTDOWN:
- *	+ EHOSTUNREACH:
- *	+ ENETDOWN:
- *	* ENETUNREACH: the peer is no longer reachable
- *
- * + tu_detach
- *   - tells driver that the socket is going away so disconnect
- *     the toepcb and free appropriate resources
- *   - allows the driver to cleanly handle the case of connection state
- *     outliving the socket
- *   - no further calls are legal after detach
- *   - the driver is expected to provide its own synchronization between
- *     detach and receiving new data.
- * 
- * + tu_syncache_event
- *   - even if it is not actually needed, the driver is expected to
- *     call syncache_add for the initial SYN and then syncache_expand
- *     for the SYN,ACK
- *   - tells driver that a connection either has not been added or has 
- *     been dropped from the syncache
- *   - the driver is expected to maintain state that lives outside the 
- *     software stack so the syncache needs to be able to notify the
- *     toe driver that the software stack is not going to create a connection
- *     for a received SYN
- *   - The driver is responsible for any synchronization required between
- *     the syncache dropping an entry and the driver processing the SYN,ACK.
- * 
- */
-struct toe_usrreqs {
-	int (*tu_send)(struct tcpcb *tp);
-	int (*tu_rcvd)(struct tcpcb *tp);
-	int (*tu_disconnect)(struct tcpcb *tp);
-	int (*tu_reset)(struct tcpcb *tp);
-	void (*tu_detach)(struct tcpcb *tp);
-	void (*tu_syncache_event)(int event, void *toep);
-};
-
-/*
- * Proxy for struct tcpopt between TOE drivers and TCP functions.
- */
-struct toeopt {
-	u_int64_t	to_flags;	/* see tcpopt in tcp_var.h */
-	u_int16_t	to_mss;		/* maximum segment size */
-	u_int8_t	to_wscale;	/* window scaling */
-
-	u_int8_t	_pad1;		/* explicit pad for 64bit alignment */
-	u_int32_t	_pad2;		/* explicit pad for 64bit alignment */
-	u_int64_t	_pad3[4];	/* TBD */
-};
-
-#define	TOE_SC_ENTRY_PRESENT		1	/* 4-tuple already present */
-#define	TOE_SC_DROP			2	/* connection was timed out */
-
-/*
- * Because listen is a one-to-many relationship (a socket can be listening 
- * on all interfaces on a machine some of which may be using different TCP
- * offload devices), listen uses a publish/subscribe mechanism. The TCP
- * offload driver registers a listen notification function with the stack.
- * When a listen socket is created all TCP offload devices are notified
- * so that they can do the appropriate set up to offload connections on the
- * port to which the socket is bound. When the listen socket is closed,
- * the offload devices are notified so that they will stop listening on that
- * port and free any associated resources as well as sending RSTs on any
- * connections in the SYN_RCVD state.
- *
- */
-
-typedef	void	(*tcp_offload_listen_start_fn)(void *, struct tcpcb *);
-typedef	void	(*tcp_offload_listen_stop_fn)(void *, struct tcpcb *);

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***


More information about the svn-src-all mailing list