svn commit: r256694 - in head/sys: dev/cxgbe/iw_cxgbe modules/cxgbe modules/cxgbe/iw_cxgbe

Navdeep Parhar np at FreeBSD.org
Thu Oct 17 18:37:25 UTC 2013


Author: np
Date: Thu Oct 17 18:37:25 2013
New Revision: 256694
URL: http://svnweb.freebsd.org/changeset/base/256694

Log:
  iw_cxgbe: iWARP driver for Chelsio T4/T5 chips.  This is a straight port
  of the iw_cxgb4 found in OFED distributions.
  
  Obtained from:	Chelsio

Added:
  head/sys/dev/cxgbe/iw_cxgbe/
  head/sys/dev/cxgbe/iw_cxgbe/cm.c   (contents, props changed)
  head/sys/dev/cxgbe/iw_cxgbe/cq.c   (contents, props changed)
  head/sys/dev/cxgbe/iw_cxgbe/device.c   (contents, props changed)
  head/sys/dev/cxgbe/iw_cxgbe/ev.c   (contents, props changed)
  head/sys/dev/cxgbe/iw_cxgbe/id_table.c   (contents, props changed)
  head/sys/dev/cxgbe/iw_cxgbe/iw_cxgbe.h   (contents, props changed)
  head/sys/dev/cxgbe/iw_cxgbe/mem.c   (contents, props changed)
  head/sys/dev/cxgbe/iw_cxgbe/provider.c   (contents, props changed)
  head/sys/dev/cxgbe/iw_cxgbe/qp.c   (contents, props changed)
  head/sys/dev/cxgbe/iw_cxgbe/resource.c   (contents, props changed)
  head/sys/dev/cxgbe/iw_cxgbe/t4.h   (contents, props changed)
  head/sys/dev/cxgbe/iw_cxgbe/user.h   (contents, props changed)
  head/sys/modules/cxgbe/iw_cxgbe/
  head/sys/modules/cxgbe/iw_cxgbe/Makefile   (contents, props changed)
Modified:
  head/sys/modules/cxgbe/Makefile

Added: head/sys/dev/cxgbe/iw_cxgbe/cm.c
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ head/sys/dev/cxgbe/iw_cxgbe/cm.c	Thu Oct 17 18:37:25 2013	(r256694)
@@ -0,0 +1,2458 @@
+/*
+ * Copyright (c) 2009-2013 Chelsio, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *	  copyright notice, this list of conditions and the following
+ *	  disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *	  copyright notice, this list of conditions and the following
+ *	  disclaimer in the documentation and/or other materials
+ *	  provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include "opt_inet.h"
+
+#ifdef TCP_OFFLOAD
+#include <sys/types.h>
+#include <sys/malloc.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/sockio.h>
+#include <sys/taskqueue.h>
+#include <netinet/in.h>
+#include <net/neighbour.h>
+#include <net/route.h>
+
+#include <netinet/in_systm.h>
+#include <netinet/in_pcb.h>
+#include <netinet/ip.h>
+#include <netinet/ip_var.h>
+#include <netinet/tcp_var.h>
+#include <netinet/tcp.h>
+#include <netinet/tcpip.h>
+
+#include <netinet/toecore.h>
+
+struct sge_iq;
+struct rss_header;
+#include <linux/types.h>
+#include "offload.h"
+#include "tom/t4_tom.h"
+
+#define TOEPCB(so)  ((struct toepcb *)(so_sototcpcb((so))->t_toe))
+
+#include "iw_cxgbe.h"
+#include <linux/module.h>
+#include <linux/workqueue.h>
+#include <linux/notifier.h>
+#include <linux/inetdevice.h>
+#include <linux/if_vlan.h>
+#include <net/netevent.h>
+
+static spinlock_t req_lock;
+static TAILQ_HEAD(c4iw_ep_list, c4iw_ep_common) req_list;
+static struct work_struct c4iw_task;
+static struct workqueue_struct *c4iw_taskq;
+static LIST_HEAD(timeout_list);
+static spinlock_t timeout_lock;
+
+static void process_req(struct work_struct *ctx);
+static void start_ep_timer(struct c4iw_ep *ep);
+static void stop_ep_timer(struct c4iw_ep *ep);
+static int set_tcpinfo(struct c4iw_ep *ep);
+static enum c4iw_ep_state state_read(struct c4iw_ep_common *epc);
+static void __state_set(struct c4iw_ep_common *epc, enum c4iw_ep_state tostate);
+static void state_set(struct c4iw_ep_common *epc, enum c4iw_ep_state tostate);
+static void *alloc_ep(int size, gfp_t flags);
+void __free_ep(struct c4iw_ep_common *epc);
+static struct rtentry * find_route(__be32 local_ip, __be32 peer_ip, __be16 local_port,
+		__be16 peer_port, u8 tos);
+static int close_socket(struct c4iw_ep_common *epc, int close);
+static int shutdown_socket(struct c4iw_ep_common *epc);
+static void abort_socket(struct c4iw_ep *ep);
+static void send_mpa_req(struct c4iw_ep *ep);
+static int send_mpa_reject(struct c4iw_ep *ep, const void *pdata, u8 plen);
+static int send_mpa_reply(struct c4iw_ep *ep, const void *pdata, u8 plen);
+static void close_complete_upcall(struct c4iw_ep *ep);
+static int abort_connection(struct c4iw_ep *ep);
+static void peer_close_upcall(struct c4iw_ep *ep);
+static void peer_abort_upcall(struct c4iw_ep *ep);
+static void connect_reply_upcall(struct c4iw_ep *ep, int status);
+static void connect_request_upcall(struct c4iw_ep *ep);
+static void established_upcall(struct c4iw_ep *ep);
+static void process_mpa_reply(struct c4iw_ep *ep);
+static void process_mpa_request(struct c4iw_ep *ep);
+static void process_peer_close(struct c4iw_ep *ep);
+static void process_conn_error(struct c4iw_ep *ep);
+static void process_close_complete(struct c4iw_ep *ep);
+static void ep_timeout(unsigned long arg);
+static void init_sock(struct c4iw_ep_common *epc);
+static void process_data(struct c4iw_ep *ep);
+static void process_connected(struct c4iw_ep *ep);
+static struct socket * dequeue_socket(struct socket *head, struct sockaddr_in **remote, struct c4iw_ep *child_ep);
+static void process_newconn(struct c4iw_ep *parent_ep);
+static int c4iw_so_upcall(struct socket *so, void *arg, int waitflag);
+static void process_socket_event(struct c4iw_ep *ep);
+static void release_ep_resources(struct c4iw_ep *ep);
+
+#define START_EP_TIMER(ep) \
+    do { \
+	    CTR3(KTR_IW_CXGBE, "start_ep_timer (%s:%d) ep %p", \
+		__func__, __LINE__, (ep)); \
+	    start_ep_timer(ep); \
+    } while (0)
+
+#define STOP_EP_TIMER(ep) \
+    do { \
+	    CTR3(KTR_IW_CXGBE, "stop_ep_timer (%s:%d) ep %p", \
+		__func__, __LINE__, (ep)); \
+	    stop_ep_timer(ep); \
+    } while (0)
+
+#ifdef KTR
+static char *states[] = {
+	"idle",
+	"listen",
+	"connecting",
+	"mpa_wait_req",
+	"mpa_req_sent",
+	"mpa_req_rcvd",
+	"mpa_rep_sent",
+	"fpdu_mode",
+	"aborting",
+	"closing",
+	"moribund",
+	"dead",
+	NULL,
+};
+#endif
+
+static void
+process_req(struct work_struct *ctx)
+{
+	struct c4iw_ep_common *epc;
+
+	spin_lock(&req_lock);
+	while (!TAILQ_EMPTY(&req_list)) {
+		epc = TAILQ_FIRST(&req_list);
+		TAILQ_REMOVE(&req_list, epc, entry);
+		epc->entry.tqe_prev = NULL;
+		spin_unlock(&req_lock);
+		if (epc->so)
+			process_socket_event((struct c4iw_ep *)epc);
+		c4iw_put_ep(epc);
+		spin_lock(&req_lock);
+	}
+	spin_unlock(&req_lock);
+}
+
+/*
+ * XXX: doesn't belong here in the iWARP driver.
+ * XXX: assumes that the connection was offloaded by cxgbe/t4_tom if TF_TOE is
+ *      set.  Is this a valid assumption for active open?
+ */
+static int
+set_tcpinfo(struct c4iw_ep *ep)
+{
+	struct socket *so = ep->com.so;
+	struct inpcb *inp = sotoinpcb(so);
+	struct tcpcb *tp;
+	struct toepcb *toep;
+	int rc = 0;
+
+	INP_WLOCK(inp);
+	tp = intotcpcb(inp);
+	if ((tp->t_flags & TF_TOE) == 0) {
+		rc = EINVAL;
+		log(LOG_ERR, "%s: connection not offloaded (so %p, ep %p)\n",
+		    __func__, so, ep);
+		goto done;
+	}
+	toep = TOEPCB(so);
+
+	ep->hwtid = toep->tid;
+	ep->snd_seq = tp->snd_nxt;
+	ep->rcv_seq = tp->rcv_nxt;
+	ep->emss = max(tp->t_maxseg, 128);
+done:
+	INP_WUNLOCK(inp);
+	return (rc);
+
+}
+
+static struct rtentry *
+find_route(__be32 local_ip, __be32 peer_ip, __be16 local_port,
+		__be16 peer_port, u8 tos)
+{
+	struct route iproute;
+	struct sockaddr_in *dst = (struct sockaddr_in *)&iproute.ro_dst;
+
+	CTR5(KTR_IW_CXGBE, "%s:frtB %x, %x, %d, %d", __func__, local_ip,
+	    peer_ip, ntohs(local_port), ntohs(peer_port));
+	bzero(&iproute, sizeof iproute);
+	dst->sin_family = AF_INET;
+	dst->sin_len = sizeof *dst;
+	dst->sin_addr.s_addr = peer_ip;
+
+	rtalloc(&iproute);
+	CTR2(KTR_IW_CXGBE, "%s:frtE %p", __func__, (uint64_t)iproute.ro_rt);
+	return iproute.ro_rt;
+}
+
+static int
+close_socket(struct c4iw_ep_common *epc, int close)
+{
+	struct socket *so = epc->so;
+	int rc;
+
+	CTR4(KTR_IW_CXGBE, "%s: so %p, ep %p, state %s", __func__, epc, so,
+	    states[epc->state]);
+
+	SOCK_LOCK(so);
+	soupcall_clear(so, SO_RCV);
+	SOCK_UNLOCK(so);
+
+	if (close)
+                rc = soclose(so);
+        else
+                rc = soshutdown(so, SHUT_WR | SHUT_RD);
+	epc->so = NULL;
+
+	return (rc);
+}
+
+static int
+shutdown_socket(struct c4iw_ep_common *epc)
+{
+
+	CTR4(KTR_IW_CXGBE, "%s: so %p, ep %p, state %s", __func__, epc->so, epc,
+	    states[epc->state]);
+
+	return (soshutdown(epc->so, SHUT_WR));
+}
+
+static void
+abort_socket(struct c4iw_ep *ep)
+{
+	struct sockopt sopt;
+	int rc;
+	struct linger l;
+
+	CTR4(KTR_IW_CXGBE, "%s ep %p so %p state %s", __func__, ep, ep->com.so,
+	    states[ep->com.state]);
+
+	l.l_onoff = 1;
+	l.l_linger = 0;
+
+	/* linger_time of 0 forces RST to be sent */
+	sopt.sopt_dir = SOPT_SET;
+	sopt.sopt_level = SOL_SOCKET;
+	sopt.sopt_name = SO_LINGER;
+	sopt.sopt_val = (caddr_t)&l;
+	sopt.sopt_valsize = sizeof l;
+	sopt.sopt_td = NULL;
+	rc = sosetopt(ep->com.so, &sopt);
+	if (rc) {
+		log(LOG_ERR, "%s: can't set linger to 0, no RST! err %d\n",
+		    __func__, rc);
+	}
+}
+
+static void
+process_peer_close(struct c4iw_ep *ep)
+{
+	struct c4iw_qp_attributes attrs;
+	int disconnect = 1;
+	int release = 0;
+
+	CTR4(KTR_IW_CXGBE, "%s:ppcB ep %p so %p state %s", __func__, ep,
+	    ep->com.so, states[ep->com.state]);
+
+	mutex_lock(&ep->com.mutex);
+	switch (ep->com.state) {
+
+		case MPA_REQ_WAIT:
+			CTR2(KTR_IW_CXGBE, "%s:ppc1 %p MPA_REQ_WAIT CLOSING",
+			    __func__, ep);
+			__state_set(&ep->com, CLOSING);
+			break;
+
+		case MPA_REQ_SENT:
+			CTR2(KTR_IW_CXGBE, "%s:ppc2 %p MPA_REQ_SENT CLOSING",
+			    __func__, ep);
+			__state_set(&ep->com, DEAD);
+			connect_reply_upcall(ep, -ECONNABORTED);
+
+			disconnect = 0;
+			STOP_EP_TIMER(ep);
+			close_socket(&ep->com, 0);
+			ep->com.cm_id->rem_ref(ep->com.cm_id);
+			ep->com.cm_id = NULL;
+			ep->com.qp = NULL;
+			release = 1;
+			break;
+
+		case MPA_REQ_RCVD:
+
+			/*
+			 * We're gonna mark this puppy DEAD, but keep
+			 * the reference on it until the ULP accepts or
+			 * rejects the CR.
+			 */
+			CTR2(KTR_IW_CXGBE, "%s:ppc3 %p MPA_REQ_RCVD CLOSING",
+			    __func__, ep);
+			__state_set(&ep->com, CLOSING);
+			c4iw_get_ep(&ep->com);
+			break;
+
+		case MPA_REP_SENT:
+			CTR2(KTR_IW_CXGBE, "%s:ppc4 %p MPA_REP_SENT CLOSING",
+			    __func__, ep);
+			__state_set(&ep->com, CLOSING);
+			break;
+
+		case FPDU_MODE:
+			CTR2(KTR_IW_CXGBE, "%s:ppc5 %p FPDU_MODE CLOSING",
+			    __func__, ep);
+			START_EP_TIMER(ep);
+			__state_set(&ep->com, CLOSING);
+			attrs.next_state = C4IW_QP_STATE_CLOSING;
+			c4iw_modify_qp(ep->com.dev, ep->com.qp,
+					C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
+			peer_close_upcall(ep);
+			break;
+
+		case ABORTING:
+			CTR2(KTR_IW_CXGBE, "%s:ppc6 %p ABORTING (disconn)",
+			    __func__, ep);
+			disconnect = 0;
+			break;
+
+		case CLOSING:
+			CTR2(KTR_IW_CXGBE, "%s:ppc7 %p CLOSING MORIBUND",
+			    __func__, ep);
+			__state_set(&ep->com, MORIBUND);
+			disconnect = 0;
+			break;
+
+		case MORIBUND:
+			CTR2(KTR_IW_CXGBE, "%s:ppc8 %p MORIBUND DEAD", __func__,
+			    ep);
+			STOP_EP_TIMER(ep);
+			if (ep->com.cm_id && ep->com.qp) {
+				attrs.next_state = C4IW_QP_STATE_IDLE;
+				c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
+						C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
+			}
+			close_socket(&ep->com, 0);
+			close_complete_upcall(ep);
+			__state_set(&ep->com, DEAD);
+			release = 1;
+			disconnect = 0;
+			break;
+
+		case DEAD:
+			CTR2(KTR_IW_CXGBE, "%s:ppc9 %p DEAD (disconn)",
+			    __func__, ep);
+			disconnect = 0;
+			break;
+
+		default:
+			panic("%s: ep %p state %d", __func__, ep,
+			    ep->com.state);
+			break;
+	}
+
+	mutex_unlock(&ep->com.mutex);
+
+	if (disconnect) {
+
+		CTR2(KTR_IW_CXGBE, "%s:ppca %p", __func__, ep);
+		c4iw_ep_disconnect(ep, 0, M_NOWAIT);
+	}
+	if (release) {
+
+		CTR2(KTR_IW_CXGBE, "%s:ppcb %p", __func__, ep);
+		c4iw_put_ep(&ep->com);
+	}
+	CTR2(KTR_IW_CXGBE, "%s:ppcE %p", __func__, ep);
+	return;
+}
+
+static void
+process_conn_error(struct c4iw_ep *ep)
+{
+	struct c4iw_qp_attributes attrs;
+	int ret;
+	int state;
+
+	state = state_read(&ep->com);
+	CTR5(KTR_IW_CXGBE, "%s:pceB ep %p so %p so->so_error %u state %s",
+	    __func__, ep, ep->com.so, ep->com.so->so_error,
+	    states[ep->com.state]);
+
+	switch (state) {
+
+		case MPA_REQ_WAIT:
+			STOP_EP_TIMER(ep);
+			break;
+
+		case MPA_REQ_SENT:
+			STOP_EP_TIMER(ep);
+			connect_reply_upcall(ep, -ECONNRESET);
+			break;
+
+		case MPA_REP_SENT:
+			ep->com.rpl_err = ECONNRESET;
+			CTR1(KTR_IW_CXGBE, "waking up ep %p", ep);
+			break;
+
+		case MPA_REQ_RCVD:
+
+			/*
+			 * We're gonna mark this puppy DEAD, but keep
+			 * the reference on it until the ULP accepts or
+			 * rejects the CR.
+			 */
+			c4iw_get_ep(&ep->com);
+			break;
+
+		case MORIBUND:
+		case CLOSING:
+			STOP_EP_TIMER(ep);
+			/*FALLTHROUGH*/
+		case FPDU_MODE:
+
+			if (ep->com.cm_id && ep->com.qp) {
+
+				attrs.next_state = C4IW_QP_STATE_ERROR;
+				ret = c4iw_modify_qp(ep->com.qp->rhp,
+					ep->com.qp, C4IW_QP_ATTR_NEXT_STATE,
+					&attrs, 1);
+				if (ret)
+					log(LOG_ERR,
+							"%s - qp <- error failed!\n",
+							__func__);
+			}
+			peer_abort_upcall(ep);
+			break;
+
+		case ABORTING:
+			break;
+
+		case DEAD:
+			CTR2(KTR_IW_CXGBE, "%s so_error %d IN DEAD STATE!!!!",
+			    __func__, ep->com.so->so_error);
+			return;
+
+		default:
+			panic("%s: ep %p state %d", __func__, ep, state);
+			break;
+	}
+
+	if (state != ABORTING) {
+
+		CTR2(KTR_IW_CXGBE, "%s:pce1 %p", __func__, ep);
+		close_socket(&ep->com, 0);
+		state_set(&ep->com, DEAD);
+		c4iw_put_ep(&ep->com);
+	}
+	CTR2(KTR_IW_CXGBE, "%s:pceE %p", __func__, ep);
+	return;
+}
+
+static void
+process_close_complete(struct c4iw_ep *ep)
+{
+	struct c4iw_qp_attributes attrs;
+	int release = 0;
+
+	CTR4(KTR_IW_CXGBE, "%s:pccB ep %p so %p state %s", __func__, ep,
+	    ep->com.so, states[ep->com.state]);
+
+	/* The cm_id may be null if we failed to connect */
+	mutex_lock(&ep->com.mutex);
+
+	switch (ep->com.state) {
+
+		case CLOSING:
+			CTR2(KTR_IW_CXGBE, "%s:pcc1 %p CLOSING MORIBUND",
+			    __func__, ep);
+			__state_set(&ep->com, MORIBUND);
+			break;
+
+		case MORIBUND:
+			CTR2(KTR_IW_CXGBE, "%s:pcc1 %p MORIBUND DEAD", __func__,
+			    ep);
+			STOP_EP_TIMER(ep);
+
+			if ((ep->com.cm_id) && (ep->com.qp)) {
+
+				CTR2(KTR_IW_CXGBE, "%s:pcc2 %p QP_STATE_IDLE",
+				    __func__, ep);
+				attrs.next_state = C4IW_QP_STATE_IDLE;
+				c4iw_modify_qp(ep->com.dev,
+						ep->com.qp,
+						C4IW_QP_ATTR_NEXT_STATE,
+						&attrs, 1);
+			}
+
+			if (ep->parent_ep) {
+
+				CTR2(KTR_IW_CXGBE, "%s:pcc3 %p", __func__, ep);
+				close_socket(&ep->com, 1);
+			}
+			else {
+
+				CTR2(KTR_IW_CXGBE, "%s:pcc4 %p", __func__, ep);
+				close_socket(&ep->com, 0);
+			}
+			close_complete_upcall(ep);
+			__state_set(&ep->com, DEAD);
+			release = 1;
+			break;
+
+		case ABORTING:
+			CTR2(KTR_IW_CXGBE, "%s:pcc5 %p ABORTING", __func__, ep);
+			break;
+
+		case DEAD:
+		default:
+			CTR2(KTR_IW_CXGBE, "%s:pcc6 %p DEAD", __func__, ep);
+			panic("%s:pcc6 %p DEAD", __func__, ep);
+			break;
+	}
+	mutex_unlock(&ep->com.mutex);
+
+	if (release) {
+
+		CTR2(KTR_IW_CXGBE, "%s:pcc7 %p", __func__, ep);
+		c4iw_put_ep(&ep->com);
+	}
+	CTR2(KTR_IW_CXGBE, "%s:pccE %p", __func__, ep);
+	return;
+}
+
+static void
+init_sock(struct c4iw_ep_common *epc)
+{
+	int rc;
+	struct sockopt sopt;
+	struct socket *so = epc->so;
+	int on = 1;
+
+	SOCK_LOCK(so);
+	soupcall_set(so, SO_RCV, c4iw_so_upcall, epc);
+	so->so_state |= SS_NBIO;
+	SOCK_UNLOCK(so);
+	sopt.sopt_dir = SOPT_SET;
+	sopt.sopt_level = IPPROTO_TCP;
+	sopt.sopt_name = TCP_NODELAY;
+	sopt.sopt_val = (caddr_t)&on;
+	sopt.sopt_valsize = sizeof on;
+	sopt.sopt_td = NULL;
+	rc = sosetopt(so, &sopt);
+	if (rc) {
+		log(LOG_ERR, "%s: can't set TCP_NODELAY on so %p (%d)\n",
+		    __func__, so, rc);
+	}
+}
+
+static void
+process_data(struct c4iw_ep *ep)
+{
+	struct sockaddr_in *local, *remote;
+
+	CTR5(KTR_IW_CXGBE, "%s: so %p, ep %p, state %s, sb_cc %d", __func__,
+	    ep->com.so, ep, states[ep->com.state], ep->com.so->so_rcv.sb_cc);
+
+	switch (state_read(&ep->com)) {
+	case MPA_REQ_SENT:
+		process_mpa_reply(ep);
+		break;
+	case MPA_REQ_WAIT:
+		in_getsockaddr(ep->com.so, (struct sockaddr **)&local);
+		in_getpeeraddr(ep->com.so, (struct sockaddr **)&remote);
+		ep->com.local_addr = *local;
+		ep->com.remote_addr = *remote;
+		free(local, M_SONAME);
+		free(remote, M_SONAME);
+		process_mpa_request(ep);
+		break;
+	default:
+		if (ep->com.so->so_rcv.sb_cc)
+			log(LOG_ERR, "%s: Unexpected streaming data.  "
+			    "ep %p, state %d, so %p, so_state 0x%x, sb_cc %u\n",
+			    __func__, ep, state_read(&ep->com), ep->com.so,
+			    ep->com.so->so_state, ep->com.so->so_rcv.sb_cc);
+		break;
+	}
+}
+
+static void
+process_connected(struct c4iw_ep *ep)
+{
+
+	if ((ep->com.so->so_state & SS_ISCONNECTED) && !ep->com.so->so_error)
+		send_mpa_req(ep);
+	else {
+		connect_reply_upcall(ep, -ep->com.so->so_error);
+		close_socket(&ep->com, 0);
+		state_set(&ep->com, DEAD);
+		c4iw_put_ep(&ep->com);
+	}
+}
+
+static struct socket *
+dequeue_socket(struct socket *head, struct sockaddr_in **remote,
+    struct c4iw_ep *child_ep)
+{
+	struct socket *so;
+
+	ACCEPT_LOCK();
+	so = TAILQ_FIRST(&head->so_comp);
+	if (!so) {
+		ACCEPT_UNLOCK();
+		return (NULL);
+	}
+	TAILQ_REMOVE(&head->so_comp, so, so_list);
+	head->so_qlen--;
+	SOCK_LOCK(so);
+	so->so_qstate &= ~SQ_COMP;
+	so->so_head = NULL;
+	soref(so);
+	soupcall_set(so, SO_RCV, c4iw_so_upcall, child_ep);
+	so->so_state |= SS_NBIO;
+	SOCK_UNLOCK(so);
+	ACCEPT_UNLOCK();
+	soaccept(so, (struct sockaddr **)remote);
+
+	return (so);
+}
+
+static void
+process_newconn(struct c4iw_ep *parent_ep)
+{
+	struct socket *child_so;
+	struct c4iw_ep *child_ep;
+	struct sockaddr_in *remote;
+
+	child_ep = alloc_ep(sizeof(*child_ep), M_NOWAIT);
+	if (!child_ep) {
+		CTR3(KTR_IW_CXGBE, "%s: parent so %p, parent ep %p, ENOMEM",
+		    __func__, parent_ep->com.so, parent_ep);
+		log(LOG_ERR, "%s: failed to allocate ep entry\n", __func__);
+		return;
+	}
+
+	child_so = dequeue_socket(parent_ep->com.so, &remote, child_ep);
+	if (!child_so) {
+		CTR4(KTR_IW_CXGBE,
+		    "%s: parent so %p, parent ep %p, child ep %p, dequeue err",
+		    __func__, parent_ep->com.so, parent_ep, child_ep);
+		log(LOG_ERR, "%s: failed to dequeue child socket\n", __func__);
+		__free_ep(&child_ep->com);
+		return;
+
+	}
+
+	CTR5(KTR_IW_CXGBE,
+	    "%s: parent so %p, parent ep %p, child so %p, child ep %p",
+	     __func__, parent_ep->com.so, parent_ep, child_so, child_ep);
+
+	child_ep->com.local_addr = parent_ep->com.local_addr;
+	child_ep->com.remote_addr = *remote;
+	child_ep->com.dev = parent_ep->com.dev;
+	child_ep->com.so = child_so;
+	child_ep->com.cm_id = NULL;
+	child_ep->com.thread = parent_ep->com.thread;
+	child_ep->parent_ep = parent_ep;
+
+	free(remote, M_SONAME);
+	c4iw_get_ep(&parent_ep->com);
+	child_ep->parent_ep = parent_ep;
+	init_timer(&child_ep->timer);
+	state_set(&child_ep->com, MPA_REQ_WAIT);
+	START_EP_TIMER(child_ep);
+
+	/* maybe the request has already been queued up on the socket... */
+	process_mpa_request(child_ep);
+}
+
+static int
+c4iw_so_upcall(struct socket *so, void *arg, int waitflag)
+{
+	struct c4iw_ep *ep = arg;
+
+	spin_lock(&req_lock);
+
+	CTR6(KTR_IW_CXGBE,
+	    "%s: so %p, so_state 0x%x, ep %p, ep_state %s, tqe_prev %p",
+	    __func__, so, so->so_state, ep, states[ep->com.state],
+	    ep->com.entry.tqe_prev);
+
+	if (ep && ep->com.so && !ep->com.entry.tqe_prev) {
+		KASSERT(ep->com.so == so, ("%s: XXX review.", __func__));
+		c4iw_get_ep(&ep->com);
+		TAILQ_INSERT_TAIL(&req_list, &ep->com, entry);
+		queue_work(c4iw_taskq, &c4iw_task);
+	}
+
+	spin_unlock(&req_lock);
+	return (SU_OK);
+}
+
+static void
+process_socket_event(struct c4iw_ep *ep)
+{
+	int state = state_read(&ep->com);
+	struct socket *so = ep->com.so;
+
+	CTR6(KTR_IW_CXGBE, "process_socket_event: so %p, so_state 0x%x, "
+	    "so_err %d, sb_state 0x%x, ep %p, ep_state %s", so, so->so_state,
+	    so->so_error, so->so_rcv.sb_state, ep, states[state]);
+
+	if (state == CONNECTING) {
+		process_connected(ep);
+		return;
+	}
+
+	if (state == LISTEN) {
+		process_newconn(ep);
+		return;
+	}
+
+	/* connection error */
+	if (so->so_error) {
+		process_conn_error(ep);
+		return;
+	}
+
+	/* peer close */
+	if ((so->so_rcv.sb_state & SBS_CANTRCVMORE) && state < CLOSING) {
+		process_peer_close(ep);
+		return;
+	}
+
+	/* close complete */
+	if (so->so_state & SS_ISDISCONNECTED) {
+		process_close_complete(ep);
+		return;
+	}
+
+	/* rx data */
+	process_data(ep);
+}
+
+SYSCTL_NODE(_hw, OID_AUTO, iw_cxgbe, CTLFLAG_RD, 0, "iw_cxgbe driver parameters");
+
+int db_delay_usecs = 1;
+TUNABLE_INT("hw.iw_cxgbe.db_delay_usecs", &db_delay_usecs);
+SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, db_delay_usecs, CTLFLAG_RW, &db_delay_usecs, 0,
+		"Usecs to delay awaiting db fifo to drain");
+
+static int dack_mode = 1;
+TUNABLE_INT("hw.iw_cxgbe.dack_mode", &dack_mode);
+SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, dack_mode, CTLFLAG_RW, &dack_mode, 0,
+		"Delayed ack mode (default = 1)");
+
+int c4iw_max_read_depth = 8;
+TUNABLE_INT("hw.iw_cxgbe.c4iw_max_read_depth", &c4iw_max_read_depth);
+SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, c4iw_max_read_depth, CTLFLAG_RW, &c4iw_max_read_depth, 0,
+		"Per-connection max ORD/IRD (default = 8)");
+
+static int enable_tcp_timestamps;
+TUNABLE_INT("hw.iw_cxgbe.enable_tcp_timestamps", &enable_tcp_timestamps);
+SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, enable_tcp_timestamps, CTLFLAG_RW, &enable_tcp_timestamps, 0,
+		"Enable tcp timestamps (default = 0)");
+
+static int enable_tcp_sack;
+TUNABLE_INT("hw.iw_cxgbe.enable_tcp_sack", &enable_tcp_sack);
+SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, enable_tcp_sack, CTLFLAG_RW, &enable_tcp_sack, 0,
+		"Enable tcp SACK (default = 0)");
+
+static int enable_tcp_window_scaling = 1;
+TUNABLE_INT("hw.iw_cxgbe.enable_tcp_window_scaling", &enable_tcp_window_scaling);
+SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, enable_tcp_window_scaling, CTLFLAG_RW, &enable_tcp_window_scaling, 0,
+		"Enable tcp window scaling (default = 1)");
+
+int c4iw_debug = 1;
+TUNABLE_INT("hw.iw_cxgbe.c4iw_debug", &c4iw_debug);
+SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, c4iw_debug, CTLFLAG_RW, &c4iw_debug, 0,
+		"Enable debug logging (default = 0)");
+
+static int peer2peer;
+TUNABLE_INT("hw.iw_cxgbe.peer2peer", &peer2peer);
+SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, peer2peer, CTLFLAG_RW, &peer2peer, 0,
+		"Support peer2peer ULPs (default = 0)");
+
+static int p2p_type = FW_RI_INIT_P2PTYPE_READ_REQ;
+TUNABLE_INT("hw.iw_cxgbe.p2p_type", &p2p_type);
+SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, p2p_type, CTLFLAG_RW, &p2p_type, 0,
+		"RDMAP opcode to use for the RTR message: 1 = RDMA_READ 0 = RDMA_WRITE (default 1)");
+
+static int ep_timeout_secs = 60;
+TUNABLE_INT("hw.iw_cxgbe.ep_timeout_secs", &ep_timeout_secs);
+SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, ep_timeout_secs, CTLFLAG_RW, &ep_timeout_secs, 0,
+		"CM Endpoint operation timeout in seconds (default = 60)");
+
+static int mpa_rev = 1;
+TUNABLE_INT("hw.iw_cxgbe.mpa_rev", &mpa_rev);
+#ifdef IW_CM_MPAV2
+SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, mpa_rev, CTLFLAG_RW, &mpa_rev, 0,
+		"MPA Revision, 0 supports amso1100, 1 is RFC0544 spec compliant, 2 is IETF MPA Peer Connect Draft compliant (default = 1)");
+#else
+SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, mpa_rev, CTLFLAG_RW, &mpa_rev, 0,
+		"MPA Revision, 0 supports amso1100, 1 is RFC0544 spec compliant (default = 1)");
+#endif
+
+static int markers_enabled;
+TUNABLE_INT("hw.iw_cxgbe.markers_enabled", &markers_enabled);
+SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, markers_enabled, CTLFLAG_RW, &markers_enabled, 0,
+		"Enable MPA MARKERS (default(0) = disabled)");
+
+static int crc_enabled = 1;
+TUNABLE_INT("hw.iw_cxgbe.crc_enabled", &crc_enabled);
+SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, crc_enabled, CTLFLAG_RW, &crc_enabled, 0,
+		"Enable MPA CRC (default(1) = enabled)");
+
+static int rcv_win = 256 * 1024;
+TUNABLE_INT("hw.iw_cxgbe.rcv_win", &rcv_win);
+SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, rcv_win, CTLFLAG_RW, &rcv_win, 0,
+		"TCP receive window in bytes (default = 256KB)");
+
+static int snd_win = 128 * 1024;
+TUNABLE_INT("hw.iw_cxgbe.snd_win", &snd_win);
+SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, snd_win, CTLFLAG_RW, &snd_win, 0,
+		"TCP send window in bytes (default = 128KB)");
+
+int db_fc_threshold = 2000;
+TUNABLE_INT("hw.iw_cxgbe.db_fc_threshold", &db_fc_threshold);
+SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, db_fc_threshold, CTLFLAG_RW, &db_fc_threshold, 0,
+		"QP count/threshold that triggers automatic");
+
+static void
+start_ep_timer(struct c4iw_ep *ep)
+{
+
+	if (timer_pending(&ep->timer)) {
+		CTR2(KTR_IW_CXGBE, "%s: ep %p, already started", __func__, ep);
+		printk(KERN_ERR "%s timer already started! ep %p\n", __func__,
+		    ep);
+		return;
+	}
+	clear_bit(TIMEOUT, &ep->com.flags);
+	c4iw_get_ep(&ep->com);
+	ep->timer.expires = jiffies + ep_timeout_secs * HZ;
+	ep->timer.data = (unsigned long)ep;
+	ep->timer.function = ep_timeout;
+	add_timer(&ep->timer);
+}
+
+static void
+stop_ep_timer(struct c4iw_ep *ep)
+{
+
+	del_timer_sync(&ep->timer);
+	if (!test_and_set_bit(TIMEOUT, &ep->com.flags)) {
+		c4iw_put_ep(&ep->com);
+	}
+}
+
+static enum
+c4iw_ep_state state_read(struct c4iw_ep_common *epc)
+{
+	enum c4iw_ep_state state;
+
+	mutex_lock(&epc->mutex);
+	state = epc->state;
+	mutex_unlock(&epc->mutex);
+
+	return (state);
+}
+
+static void
+__state_set(struct c4iw_ep_common *epc, enum c4iw_ep_state new)
+{
+
+	epc->state = new;
+}
+
+static void
+state_set(struct c4iw_ep_common *epc, enum c4iw_ep_state new)
+{
+
+	mutex_lock(&epc->mutex);
+	__state_set(epc, new);
+	mutex_unlock(&epc->mutex);
+}
+
+static void *
+alloc_ep(int size, gfp_t gfp)
+{
+	struct c4iw_ep_common *epc;
+
+	epc = kzalloc(size, gfp);
+	if (epc == NULL)
+		return (NULL);
+
+	kref_init(&epc->kref);
+	mutex_init(&epc->mutex);
+	c4iw_init_wr_wait(&epc->wr_wait);
+
+	return (epc);
+}
+
+void
+__free_ep(struct c4iw_ep_common *epc)
+{
+	CTR2(KTR_IW_CXGBE, "%s:feB %p", __func__, epc);
+	KASSERT(!epc->so, ("%s warning ep->so %p \n", __func__, epc->so));
+	KASSERT(!epc->entry.tqe_prev, ("%s epc %p still on req list!\n", __func__, epc));
+	free(epc, M_DEVBUF);
+	CTR2(KTR_IW_CXGBE, "%s:feE %p", __func__, epc);
+}
+
+void _c4iw_free_ep(struct kref *kref)
+{
+	struct c4iw_ep *ep;
+	struct c4iw_ep_common *epc;
+
+	ep = container_of(kref, struct c4iw_ep, com.kref);
+	epc = &ep->com;
+	KASSERT(!epc->so, ("%s ep->so %p", __func__, epc->so));
+	KASSERT(!epc->entry.tqe_prev, ("%s epc %p still on req list",
+	    __func__, epc));
+	kfree(ep);
+}
+
+static void release_ep_resources(struct c4iw_ep *ep)
+{
+	CTR2(KTR_IW_CXGBE, "%s:rerB %p", __func__, ep);
+	set_bit(RELEASE_RESOURCES, &ep->com.flags);
+	c4iw_put_ep(&ep->com);
+	CTR2(KTR_IW_CXGBE, "%s:rerE %p", __func__, ep);
+}
+
+static void
+send_mpa_req(struct c4iw_ep *ep)
+{
+	int mpalen;
+	struct mpa_message *mpa;
+	struct mpa_v2_conn_params mpa_v2_params;
+	struct mbuf *m;
+	char mpa_rev_to_use = mpa_rev;
+	int err;
+
+	if (ep->retry_with_mpa_v1)
+		mpa_rev_to_use = 1;
+	mpalen = sizeof(*mpa) + ep->plen;
+	if (mpa_rev_to_use == 2)
+		mpalen += sizeof(struct mpa_v2_conn_params);
+
+	if (mpalen > MHLEN)
+		CXGBE_UNIMPLEMENTED(__func__);
+
+	m = m_gethdr(M_NOWAIT, MT_DATA);
+	if (m == NULL) {
+		connect_reply_upcall(ep, -ENOMEM);
+		return;
+	}
+
+	mpa = mtod(m, struct mpa_message *);
+	m->m_len = mpalen;
+	m->m_pkthdr.len = mpalen;
+	memcpy(mpa->key, MPA_KEY_REQ, sizeof(mpa->key));
+	mpa->flags = (crc_enabled ? MPA_CRC : 0) |
+		(markers_enabled ? MPA_MARKERS : 0) |
+		(mpa_rev_to_use == 2 ? MPA_ENHANCED_RDMA_CONN : 0);
+	mpa->private_data_size = htons(ep->plen);
+	mpa->revision = mpa_rev_to_use;
+
+	if (mpa_rev_to_use == 1) {
+		ep->tried_with_mpa_v1 = 1;
+		ep->retry_with_mpa_v1 = 0;
+	}

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***


More information about the svn-src-head mailing list