svn commit: r256890 - user/andre/mbuf_staging/netinet

Andre Oppermann andre at FreeBSD.org
Tue Oct 22 13:45:04 UTC 2013


Author: andre
Date: Tue Oct 22 13:45:03 2013
New Revision: 256890
URL: http://svnweb.freebsd.org/changeset/base/256890

Log:
  Add gross proof of concept hack to bypass the entire lower stack
  for outbound tcp sends for certain very high performance uses.
  
  Bypassing means packets going directly to the wire and skip over
  all firewalls or other processing done in ip_output() and
  ether_output().
  
  This very first version only does one interface and MAC address
  lookup at the beginning of a connection.  It doesn't detect any
  changes afterwards and will grind into a void if it has to.  On
  certain errors it also leaks a bit of memory.
  
  The point of this patch is to allow for performance analysis of
  a) the normal vs. bypass path; b) to profile and optimize the
  normal path to bring it as close as possible to the bypass path.
  
  This patch is not intended to be merged to HEAD in its current
  form (or at all).

Modified:
  user/andre/mbuf_staging/netinet/tcp_output.c

Modified: user/andre/mbuf_staging/netinet/tcp_output.c
==============================================================================
--- user/andre/mbuf_staging/netinet/tcp_output.c	Tue Oct 22 13:31:36 2013	(r256889)
+++ user/andre/mbuf_staging/netinet/tcp_output.c	Tue Oct 22 13:45:03 2013	(r256890)
@@ -129,6 +129,14 @@ static void inline	hhook_run_tcp_est_out
 			    long len, int tso);
 static void inline	cc_after_idle(struct tcpcb *tp);
 
+#ifdef TCP_IFTRANSMIT
+static int	tcp_l2hdr(struct tcpcb *tp, struct in_conninfo *inc);
+static int	tcp_ifsend(struct tcpcb *tp, struct mbuf *m);
+
+#define	t_ifp	t_pspare2[0]
+#define	t_l2h	t_pspare2[1]
+#endif
+
 /*
  * Wrapper for the TCP established output helper hook.
  */
@@ -1228,6 +1236,13 @@ send:
 
 	TCP_PROBE5(send, NULL, tp, ip, tp, th);
 
+#ifdef TCP_IFTRANSMIT
+	if (tp->t_l2h == NULL)
+		(void)tcp_l2hdr(tp, &tp->t_inpcb->inp_inc);
+	if (tp->t_ifp != NULL) {
+		tcp_ifsend(tp, m);
+	} else
+#endif
 	error = ip_output(m, tp->t_inpcb->inp_options, &ro,
 	    ((so->so_options & SO_DONTROUTE) ? IP_ROUTETOIF : 0), 0,
 	    tp->t_inpcb);
@@ -1424,6 +1439,71 @@ tcp_setpersist(struct tcpcb *tp)
 		tp->t_rxtshift++;
 }
 
+#ifdef TCP_IFTRANSMIT
+#include <net/if_arp.h>
+#include <net/if_llc.h>
+#include <net/if_dl.h>
+#include <net/if_types.h>
+#include <net/ethernet.h>
+#include <net/if_llatbl.h>
+#include <netinet/in_var.h>
+#include <netinet/if_ether.h>
+
+static int
+tcp_l2hdr(struct tcpcb *tp, struct in_conninfo *inc)
+{
+	struct route sro;
+	struct sockaddr_in *dst;
+	struct ifnet *ifp;
+	struct llentry *lle = NULL;
+	struct ether_header *eh;
+
+	/* Look up destination interface. */
+	bzero(&sro, sizeof(sro));
+	if (inc->inc_faddr.s_addr == INADDR_ANY)
+		return (ENOBUFS);
+	dst = (struct sockaddr_in *)&sro.ro_dst;
+	dst->sin_family = AF_INET;
+	dst->sin_len = sizeof(*dst);
+	dst->sin_addr = inc->inc_faddr;
+	in_rtalloc_ign(&sro, 0, inc->inc_fibnum);
+	if (sro.ro_rt == NULL)
+		return (ENOBUFS);
+	ifp = sro.ro_rt->rt_ifp;
+	RO_RTFREE(&sro);
+
+	if ((tp->t_l2h = malloc(ETHER_HDR_LEN, M_TEMP, M_NOWAIT)) == NULL)
+		return (ENOBUFS);
+
+	eh = tp->t_l2h;
+	if (arpresolve(ifp, NULL, NULL, &sro.ro_dst, (u_char *)(&eh->ether_dhost),
+	    &lle) != 0)
+		return (ENOBUFS);	/* XXX leak */
+	(void)memcpy(eh->ether_shost, IF_LLADDR(ifp), sizeof(eh->ether_shost));
+	eh->ether_type = htons(ETHERTYPE_IP);
+	tp->t_ifp = ifp;
+
+	return (0);
+}
+
+static int
+tcp_ifsend(struct tcpcb *tp, struct mbuf *m)
+{
+	struct ether_header *eh;
+	struct ifnet *ifp;
+
+	M_PREPEND(m, ETHER_HDR_LEN, M_NOWAIT);
+	if (m == NULL)
+		return (ENOBUFS);
+
+	ifp = tp->t_ifp;
+	eh = mtod(m, struct ether_header *);
+	(void)memcpy(eh, tp->t_l2h, ETHER_HDR_LEN);
+
+	return ((ifp->if_transmit)(ifp, m));
+}
+#endif /* TCP_IFTRANSMIT */
+
 /*
  * Insert TCP options according to the supplied parameters to the place
  * optp in a consistent way.  Can handle unaligned destinations.


More information about the svn-src-user mailing list