svn commit: r294610 - in head/sys: dev/cxgb/ulp/iw_cxgb dev/cxgbe/iw_cxgbe ofed/drivers/infiniband/core ofed/include/rdma

Navdeep Parhar np at FreeBSD.org
Fri Jan 22 23:33:37 UTC 2016


Author: np
Date: Fri Jan 22 23:33:34 2016
New Revision: 294610
URL: https://svnweb.freebsd.org/changeset/base/294610

Log:
  Fix for iWARP servers that listen on INADDR_ANY.
  
  The iWARP Connection Manager (CM) on FreeBSD creates a TCP socket to
  represent an iWARP endpoint when the connection is over TCP. For
  servers the current approach is to invoke create_listen callback for
  each iWARP RNIC registered with the CM. This doesn't work too well for
  INADDR_ANY because a listen on any TCP socket already notifies all
  hardware TOEs/RNICs of the new listener. This patch fixes the server
  side of things for FreeBSD. We've tried to keep all these modifications
  in the iWARP/TCP specific parts of the OFED infrastructure as much as
  possible.
  
  Submitted by:	Krishnamraju Eraparaju @ Chelsio (with design inputs from Steve Wise)
  Sponsored by:	Chelsio Communications
  Differential Revision:	https://reviews.freebsd.org/D4801

Modified:
  head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb.h
  head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_cm.c
  head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_cm.h
  head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_provider.c
  head/sys/dev/cxgbe/iw_cxgbe/cm.c
  head/sys/dev/cxgbe/iw_cxgbe/iw_cxgbe.h
  head/sys/dev/cxgbe/iw_cxgbe/provider.c
  head/sys/ofed/drivers/infiniband/core/cma.c
  head/sys/ofed/drivers/infiniband/core/iwcm.c
  head/sys/ofed/include/rdma/iw_cm.h
  head/sys/ofed/include/rdma/rdma_cm.h

Modified: head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb.h
==============================================================================
--- head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb.h	Fri Jan 22 21:50:08 2016	(r294609)
+++ head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb.h	Fri Jan 22 23:33:34 2016	(r294610)
@@ -174,4 +174,5 @@ static inline void remove_handle(struct 
 }
 
 void iwch_ev_dispatch(struct iwch_dev *, struct mbuf *);
+void process_newconn(struct iw_cm_id *parent_cm_id, struct socket *child_so);
 #endif

Modified: head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_cm.c
==============================================================================
--- head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_cm.c	Fri Jan 22 21:50:08 2016	(r294609)
+++ head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_cm.c	Fri Jan 22 23:33:34 2016	(r294610)
@@ -260,7 +260,6 @@ alloc_ep(int size, int flags)
 void __free_ep(struct iwch_ep_common *epc)
 {
 	CTR3(KTR_IW_CXGB, "%s ep %p state %s", __FUNCTION__, epc, states[state_read(epc)]);
-	KASSERT(!epc->so, ("%s warning ep->so %p \n", __FUNCTION__, epc->so));
 	KASSERT(!epc->entry.tqe_prev, ("%s epc %p still on req list!\n", __FUNCTION__, epc));
 	free(epc, M_DEVBUF);
 }
@@ -1361,7 +1360,7 @@ out:
 }
 
 int
-iwch_create_listen(struct iw_cm_id *cm_id, int backlog)
+iwch_create_listen_ep(struct iw_cm_id *cm_id, int backlog)
 {
 	int err = 0;
 	struct iwch_listen_ep *ep;
@@ -1381,35 +1380,22 @@ iwch_create_listen(struct iw_cm_id *cm_i
 	state_set(&ep->com, LISTEN);
 
 	ep->com.so = cm_id->so;
-	err = init_sock(&ep->com);
-	if (err)
-		goto fail;
-
-	err = solisten(ep->com.so, ep->backlog, ep->com.thread);
-	if (!err) {
-		cm_id->provider_data = ep;
-		goto out;
-	}
-	close_socket(&ep->com, 0);
-fail:
-	cm_id->rem_ref(cm_id);
-	put_ep(&ep->com);
+	cm_id->provider_data = ep;
 out:
 	return err;
 }
 
-int
-iwch_destroy_listen(struct iw_cm_id *cm_id)
+void
+iwch_destroy_listen_ep(struct iw_cm_id *cm_id)
 {
 	struct iwch_listen_ep *ep = to_listen_ep(cm_id);
 
 	CTR2(KTR_IW_CXGB, "%s ep %p", __FUNCTION__, ep);
 
 	state_set(&ep->com, DEAD);
-	close_socket(&ep->com, 0);
 	cm_id->rem_ref(cm_id);
 	put_ep(&ep->com);
-	return 0;
+	return;
 }
 
 int
@@ -1526,54 +1512,32 @@ process_connected(struct iwch_ep *ep)
 	}
 }
 
-static struct socket *
-dequeue_socket(struct socket *head, struct sockaddr_in **remote, struct iwch_ep *child_ep)
-{
-	struct socket *so;
-
-	ACCEPT_LOCK();
-	so = TAILQ_FIRST(&head->so_comp);
-	if (!so) {
-		ACCEPT_UNLOCK();
-		return NULL;
-	}
-	TAILQ_REMOVE(&head->so_comp, so, so_list);
-	head->so_qlen--;
-	SOCK_LOCK(so);
-	so->so_qstate &= ~SQ_COMP;
-	so->so_head = NULL;
-	soref(so);
-	soupcall_set(so, SO_RCV, iwch_so_upcall, child_ep);
-	so->so_state |= SS_NBIO;
-	PANIC_IF(!(so->so_state & SS_ISCONNECTED));
-	PANIC_IF(so->so_error);
-	SOCK_UNLOCK(so);
-	ACCEPT_UNLOCK();
-	soaccept(so, (struct sockaddr **)remote);
-	return so;
-}
-
-static void
-process_newconn(struct iwch_ep *parent_ep)
+void
+process_newconn(struct iw_cm_id *parent_cm_id, struct socket *child_so)
 {
-	struct socket *child_so;
 	struct iwch_ep *child_ep;
+	struct sockaddr_in *local;
 	struct sockaddr_in *remote;
+	struct iwch_ep *parent_ep = parent_cm_id->provider_data;
 
 	CTR3(KTR_IW_CXGB, "%s parent ep %p so %p", __FUNCTION__, parent_ep, parent_ep->com.so);
+	if (!child_so) {
+		log(LOG_ERR, "%s - invalid child socket!\n", __func__);
+		return;
+	}
 	child_ep = alloc_ep(sizeof(*child_ep), M_NOWAIT);
 	if (!child_ep) {
 		log(LOG_ERR, "%s - failed to allocate ep entry!\n",
 		       __FUNCTION__);
 		return;
 	}
-	child_so = dequeue_socket(parent_ep->com.so, &remote, child_ep);
-	if (!child_so) {
-		log(LOG_ERR, "%s - failed to dequeue child socket!\n",
-		       __FUNCTION__);
-		__free_ep(&child_ep->com);
-		return;
-	}
+	SOCKBUF_LOCK(&child_so->so_rcv);
+	soupcall_set(child_so, SO_RCV, iwch_so_upcall, child_ep);
+	SOCKBUF_UNLOCK(&child_so->so_rcv);
+
+	in_getsockaddr(child_so, (struct sockaddr **)&local);
+	in_getpeeraddr(child_so, (struct sockaddr **)&remote);
+
 	CTR3(KTR_IW_CXGB, "%s remote addr %s port %d", __FUNCTION__, 
 		inet_ntoa(remote->sin_addr), ntohs(remote->sin_port));
 	child_ep->com.tdev = parent_ep->com.tdev;
@@ -1590,9 +1554,9 @@ process_newconn(struct iwch_ep *parent_e
 	child_ep->com.thread = parent_ep->com.thread;
 	child_ep->parent_ep = parent_ep;
 
+	free(local, M_SONAME);
 	free(remote, M_SONAME);
 	get_ep(&parent_ep->com);
-	child_ep->parent_ep = parent_ep;
 	callout_init(&child_ep->timer, 1);
 	state_set(&child_ep->com, MPA_REQ_WAIT);
 	start_ep_timer(child_ep);
@@ -1630,7 +1594,10 @@ process_socket_event(struct iwch_ep *ep)
 	}
 
 	if (state == LISTEN) {
-		process_newconn(ep);
+		/* socket listening events are handled at IWCM */
+		CTR3(KTR_IW_CXGB, "%s Invalid ep state:%u, ep:%p", __func__,
+			ep->com.state, ep);
+		BUG();
 		return;
 	}
 

Modified: head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_cm.h
==============================================================================
--- head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_cm.h	Fri Jan 22 21:50:08 2016	(r294609)
+++ head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_cm.h	Fri Jan 22 23:33:34 2016	(r294610)
@@ -231,8 +231,8 @@ iwch_wakeup(struct cv *cv, struct mtx *l
 /* CM prototypes */
 
 int iwch_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param);
-int iwch_create_listen(struct iw_cm_id *cm_id, int backlog);
-int iwch_destroy_listen(struct iw_cm_id *cm_id);
+int iwch_create_listen_ep(struct iw_cm_id *cm_id, int backlog);
+void iwch_destroy_listen_ep(struct iw_cm_id *cm_id);
 int iwch_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len);
 int iwch_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param);
 int iwch_ep_disconnect(struct iwch_ep *ep, int abrupt, int flags);

Modified: head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_provider.c
==============================================================================
--- head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_provider.c	Fri Jan 22 21:50:08 2016	(r294609)
+++ head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_provider.c	Fri Jan 22 23:33:34 2016	(r294610)
@@ -1140,8 +1140,9 @@ int iwch_register_device(struct iwch_dev
 	dev->ibdev.iwcm->connect = iwch_connect;
 	dev->ibdev.iwcm->accept = iwch_accept_cr;
 	dev->ibdev.iwcm->reject = iwch_reject_cr;
-	dev->ibdev.iwcm->create_listen = iwch_create_listen;
-	dev->ibdev.iwcm->destroy_listen = iwch_destroy_listen;
+	dev->ibdev.iwcm->create_listen_ep = iwch_create_listen_ep;
+	dev->ibdev.iwcm->destroy_listen_ep = iwch_destroy_listen_ep;
+	dev->ibdev.iwcm->newconn = process_newconn;
 	dev->ibdev.iwcm->add_ref = iwch_qp_add_ref;
 	dev->ibdev.iwcm->rem_ref = iwch_qp_rem_ref;
 	dev->ibdev.iwcm->get_qp = iwch_get_qp;

Modified: head/sys/dev/cxgbe/iw_cxgbe/cm.c
==============================================================================
--- head/sys/dev/cxgbe/iw_cxgbe/cm.c	Fri Jan 22 21:50:08 2016	(r294609)
+++ head/sys/dev/cxgbe/iw_cxgbe/cm.c	Fri Jan 22 23:33:34 2016	(r294610)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2009-2013 Chelsio, Inc. All rights reserved.
+ * Copyright (c) 2009-2013, 2016 Chelsio, Inc. All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -111,8 +111,6 @@ static void ep_timeout(unsigned long arg
 static void init_sock(struct c4iw_ep_common *epc);
 static void process_data(struct c4iw_ep *ep);
 static void process_connected(struct c4iw_ep *ep);
-static struct socket * dequeue_socket(struct socket *head, struct sockaddr_in **remote, struct c4iw_ep *child_ep);
-static void process_newconn(struct c4iw_ep *parent_ep);
 static int c4iw_so_upcall(struct socket *so, void *arg, int waitflag);
 static void process_socket_event(struct c4iw_ep *ep);
 static void release_ep_resources(struct c4iw_ep *ep);
@@ -623,40 +621,21 @@ process_connected(struct c4iw_ep *ep)
 	}
 }
 
-static struct socket *
-dequeue_socket(struct socket *head, struct sockaddr_in **remote,
-    struct c4iw_ep *child_ep)
-{
-	struct socket *so;
-
-	ACCEPT_LOCK();
-	so = TAILQ_FIRST(&head->so_comp);
-	if (!so) {
-		ACCEPT_UNLOCK();
-		return (NULL);
-	}
-	TAILQ_REMOVE(&head->so_comp, so, so_list);
-	head->so_qlen--;
-	SOCK_LOCK(so);
-	so->so_qstate &= ~SQ_COMP;
-	so->so_head = NULL;
-	soref(so);
-	soupcall_set(so, SO_RCV, c4iw_so_upcall, child_ep);
-	so->so_state |= SS_NBIO;
-	SOCK_UNLOCK(so);
-	ACCEPT_UNLOCK();
-	soaccept(so, (struct sockaddr **)remote);
-
-	return (so);
-}
-
-static void
-process_newconn(struct c4iw_ep *parent_ep)
+void
+process_newconn(struct iw_cm_id *parent_cm_id, struct socket *child_so)
 {
-	struct socket *child_so;
 	struct c4iw_ep *child_ep;
+	struct sockaddr_in *local;
 	struct sockaddr_in *remote;
+	struct c4iw_ep *parent_ep = parent_cm_id->provider_data;
 
+	if (!child_so) {
+		CTR4(KTR_IW_CXGBE,
+		    "%s: parent so %p, parent ep %p, child so %p, invalid so",
+		    __func__, parent_ep->com.so, parent_ep, child_so);
+		log(LOG_ERR, "%s: invalid child socket\n", __func__);
+		return;
+	}
 	child_ep = alloc_ep(sizeof(*child_ep), M_NOWAIT);
 	if (!child_ep) {
 		CTR3(KTR_IW_CXGBE, "%s: parent so %p, parent ep %p, ENOMEM",
@@ -664,23 +643,18 @@ process_newconn(struct c4iw_ep *parent_e
 		log(LOG_ERR, "%s: failed to allocate ep entry\n", __func__);
 		return;
 	}
-
-	child_so = dequeue_socket(parent_ep->com.so, &remote, child_ep);
-	if (!child_so) {
-		CTR4(KTR_IW_CXGBE,
-		    "%s: parent so %p, parent ep %p, child ep %p, dequeue err",
-		    __func__, parent_ep->com.so, parent_ep, child_ep);
-		log(LOG_ERR, "%s: failed to dequeue child socket\n", __func__);
-		__free_ep(&child_ep->com);
-		return;
-
-	}
+	SOCKBUF_LOCK(&child_so->so_rcv);
+	soupcall_set(child_so, SO_RCV, c4iw_so_upcall, child_ep);
+	SOCKBUF_UNLOCK(&child_so->so_rcv);
 
 	CTR5(KTR_IW_CXGBE,
 	    "%s: parent so %p, parent ep %p, child so %p, child ep %p",
 	     __func__, parent_ep->com.so, parent_ep, child_so, child_ep);
 
-	child_ep->com.local_addr = parent_ep->com.local_addr;
+	in_getsockaddr(child_so, (struct sockaddr **)&local);
+	in_getpeeraddr(child_so, (struct sockaddr **)&remote);
+
+	child_ep->com.local_addr = *local;
 	child_ep->com.remote_addr = *remote;
 	child_ep->com.dev = parent_ep->com.dev;
 	child_ep->com.so = child_so;
@@ -688,15 +662,17 @@ process_newconn(struct c4iw_ep *parent_e
 	child_ep->com.thread = parent_ep->com.thread;
 	child_ep->parent_ep = parent_ep;
 
+	free(local, M_SONAME);
 	free(remote, M_SONAME);
+
 	c4iw_get_ep(&parent_ep->com);
-	child_ep->parent_ep = parent_ep;
 	init_timer(&child_ep->timer);
 	state_set(&child_ep->com, MPA_REQ_WAIT);
 	START_EP_TIMER(child_ep);
 
 	/* maybe the request has already been queued up on the socket... */
 	process_mpa_request(child_ep);
+	return;
 }
 
 static int
@@ -738,7 +714,10 @@ process_socket_event(struct c4iw_ep *ep)
 	}
 
 	if (state == LISTEN) {
-		process_newconn(ep);
+		/* socket listening events are handled at IWCM */
+		CTR3(KTR_IW_CXGBE, "%s Invalid ep state:%u, ep:%p", __func__,
+			    ep->com.state, ep);
+		BUG();
 		return;
 	}
 
@@ -919,7 +898,6 @@ void _c4iw_free_ep(struct kref *kref)
 
 	ep = container_of(kref, struct c4iw_ep, com.kref);
 	epc = &ep->com;
-	KASSERT(!epc->so, ("%s ep->so %p", __func__, epc->so));
 	KASSERT(!epc->entry.tqe_prev, ("%s epc %p still on req list",
 	    __func__, epc));
 	kfree(ep);
@@ -2126,10 +2104,10 @@ out:
 }
 
 /*
- * iwcm->create_listen.  Returns -errno on failure.
+ * iwcm->create_listen_ep.  Returns -errno on failure.
  */
 int
-c4iw_create_listen(struct iw_cm_id *cm_id, int backlog)
+c4iw_create_listen_ep(struct iw_cm_id *cm_id, int backlog)
 {
 	int rc;
 	struct c4iw_dev *dev = to_c4iw_dev(cm_id->device);
@@ -2154,17 +2132,6 @@ c4iw_create_listen(struct iw_cm_id *cm_i
 	ep->com.thread = curthread;
 	state_set(&ep->com, LISTEN);
 	ep->com.so = so;
-	init_sock(&ep->com);
-
-	rc = solisten(so, ep->backlog, ep->com.thread);
-	if (rc != 0) {
-		log(LOG_ERR, "%s: failed to start listener: %d\n", __func__,
-		    rc);
-		close_socket(&ep->com, 0);
-		cm_id->rem_ref(cm_id);
-		c4iw_put_ep(&ep->com);
-		goto failed;
-	}
 
 	cm_id->provider_data = ep;
 	return (0);
@@ -2174,21 +2141,19 @@ failed:
 	return (-rc);
 }
 
-int
-c4iw_destroy_listen(struct iw_cm_id *cm_id)
+void
+c4iw_destroy_listen_ep(struct iw_cm_id *cm_id)
 {
-	int rc;
 	struct c4iw_listen_ep *ep = to_listen_ep(cm_id);
 
-	CTR4(KTR_IW_CXGBE, "%s: cm_id %p, so %p, inp %p", __func__, cm_id,
-	    cm_id->so, cm_id->so->so_pcb);
+	CTR4(KTR_IW_CXGBE, "%s: cm_id %p, so %p, state %s", __func__, cm_id,
+	    cm_id->so, states[ep->com.state]);
 
 	state_set(&ep->com, DEAD);
-	rc = close_socket(&ep->com, 0);
 	cm_id->rem_ref(cm_id);
 	c4iw_put_ep(&ep->com);
 
-	return (rc);
+	return;
 }
 
 int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp)

Modified: head/sys/dev/cxgbe/iw_cxgbe/iw_cxgbe.h
==============================================================================
--- head/sys/dev/cxgbe/iw_cxgbe/iw_cxgbe.h	Fri Jan 22 21:50:08 2016	(r294609)
+++ head/sys/dev/cxgbe/iw_cxgbe/iw_cxgbe.h	Fri Jan 22 23:33:34 2016	(r294610)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2009-2013 Chelsio, Inc. All rights reserved.
+ * Copyright (c) 2009-2013, 2016 Chelsio, Inc. All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -850,8 +850,8 @@ int c4iw_post_receive(struct ib_qp *ibqp
 int c4iw_bind_mw(struct ib_qp *qp, struct ib_mw *mw,
 		 struct ib_mw_bind *mw_bind);
 int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param);
-int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog);
-int c4iw_destroy_listen(struct iw_cm_id *cm_id);
+int c4iw_create_listen_ep(struct iw_cm_id *cm_id, int backlog);
+void c4iw_destroy_listen_ep(struct iw_cm_id *cm_id);
 int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param);
 int c4iw_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len);
 void c4iw_qp_add_ref(struct ib_qp *qp);
@@ -914,6 +914,8 @@ u32 c4iw_get_qpid(struct c4iw_rdev *rdev
 void c4iw_put_qpid(struct c4iw_rdev *rdev, u32 qid,
 		struct c4iw_dev_ucontext *uctx);
 void c4iw_ev_dispatch(struct c4iw_dev *dev, struct t4_cqe *err_cqe);
+void process_newconn(struct iw_cm_id *parent_cm_id,
+		struct socket *child_so);
 
 extern struct cxgb4_client t4c_client;
 extern c4iw_handler_func c4iw_handlers[NUM_CPL_CMDS];

Modified: head/sys/dev/cxgbe/iw_cxgbe/provider.c
==============================================================================
--- head/sys/dev/cxgbe/iw_cxgbe/provider.c	Fri Jan 22 21:50:08 2016	(r294609)
+++ head/sys/dev/cxgbe/iw_cxgbe/provider.c	Fri Jan 22 23:33:34 2016	(r294610)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2009-2013 Chelsio, Inc. All rights reserved.
+ * Copyright (c) 2009-2013, 2016 Chelsio, Inc. All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -474,8 +474,9 @@ c4iw_register_device(struct c4iw_dev *de
 	iwcm->connect = c4iw_connect;
 	iwcm->accept = c4iw_accept_cr;
 	iwcm->reject = c4iw_reject_cr;
-	iwcm->create_listen = c4iw_create_listen;
-	iwcm->destroy_listen = c4iw_destroy_listen;
+	iwcm->create_listen_ep = c4iw_create_listen_ep;
+	iwcm->destroy_listen_ep = c4iw_destroy_listen_ep;
+	iwcm->newconn = process_newconn;
 	iwcm->add_ref = c4iw_qp_add_ref;
 	iwcm->rem_ref = c4iw_qp_rem_ref;
 	iwcm->get_qp = c4iw_get_qp;

Modified: head/sys/ofed/drivers/infiniband/core/cma.c
==============================================================================
--- head/sys/ofed/drivers/infiniband/core/cma.c	Fri Jan 22 21:50:08 2016	(r294609)
+++ head/sys/ofed/drivers/infiniband/core/cma.c	Fri Jan 22 23:33:34 2016	(r294610)
@@ -3,6 +3,7 @@
  * Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved.
  * Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved.
  * Copyright (c) 2005-2006 Intel Corporation.  All rights reserved.
+ * Copyright (c) 2016 Chelsio Communications.  All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -407,6 +408,75 @@ static int find_gid_port(struct ib_devic
 	return -EAGAIN;
 }
 
+int
+rdma_find_cmid_laddr(struct sockaddr_in *local_addr, unsigned short dev_type,
+							void **cm_id)
+{
+	int ret;
+	u8 port;
+	int found_dev = 0, found_cmid = 0;
+	struct rdma_id_private  *id_priv;
+	struct rdma_id_private  *dev_id_priv;
+	struct cma_device	*cma_dev;
+	struct rdma_dev_addr	dev_addr;
+	union ib_gid		gid;
+	enum rdma_link_layer dev_ll = dev_type == ARPHRD_INFINIBAND ?
+		IB_LINK_LAYER_INFINIBAND : IB_LINK_LAYER_ETHERNET;
+
+	memset(&dev_addr, 0, sizeof(dev_addr));
+
+	ret = rdma_translate_ip((struct sockaddr *)local_addr,
+							&dev_addr, NULL);
+	if (ret)
+		goto err;
+
+	/* find rdma device based on MAC address/gid */
+	mutex_lock(&lock);
+
+	memcpy(&gid, dev_addr.src_dev_addr +
+	       rdma_addr_gid_offset(&dev_addr), sizeof(gid));
+
+	list_for_each_entry(cma_dev, &dev_list, list)
+		for (port = 1; port <= cma_dev->device->phys_port_cnt; ++port)
+			if ((rdma_port_get_link_layer(cma_dev->device, port) ==
+								 dev_ll) &&
+			 (rdma_node_get_transport(cma_dev->device->node_type) ==
+							RDMA_TRANSPORT_IWARP)) {
+					ret = find_gid_port(cma_dev->device,
+								&gid, port);
+					if (!ret) {
+						found_dev = 1;
+						goto out;
+					} else if (ret == 1) {
+						mutex_unlock(&lock);
+						goto err;
+					}
+			}
+out:
+	mutex_unlock(&lock);
+
+	if (!found_dev)
+		goto err;
+
+	/* Traverse through the list of listening cm_id's to find the
+	 * desired cm_id based on rdma device & port number.
+	 */
+	list_for_each_entry(id_priv, &listen_any_list, list)
+		list_for_each_entry(dev_id_priv, &id_priv->listen_list,
+						 listen_list)
+			if (dev_id_priv->cma_dev == cma_dev)
+				if (dev_id_priv->cm_id.iw->local_addr.sin_port
+						== local_addr->sin_port) {
+					*cm_id = (void *)dev_id_priv->cm_id.iw;
+					found_cmid = 1;
+				}
+	return found_cmid ? 0 : -ENODEV;
+
+err:
+	return -ENODEV;
+}
+EXPORT_SYMBOL(rdma_find_cmid_laddr);
+
 static int cma_acquire_dev(struct rdma_id_private *id_priv)
 {
 	struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
@@ -780,6 +850,12 @@ static inline int cma_any_addr(struct so
 {
 	return cma_zero_addr(addr) || cma_loopback_addr(addr);
 }
+int
+rdma_cma_any_addr(struct sockaddr *addr)
+{
+	return cma_any_addr(addr);
+}
+EXPORT_SYMBOL(rdma_cma_any_addr);
 
 static int cma_addr_cmp(struct sockaddr *src, struct sockaddr *dst)
 {
@@ -1707,6 +1783,7 @@ static void cma_listen_on_dev(struct rdm
 	dev_id_priv = container_of(id, struct rdma_id_private, id);
 
 	dev_id_priv->state = RDMA_CM_ADDR_BOUND;
+	dev_id_priv->sock = id_priv->sock;
 	memcpy(&id->route.addr.src_addr, &id_priv->id.route.addr.src_addr,
 	       ip_addr_size((struct sockaddr *) &id_priv->id.route.addr.src_addr));
 

Modified: head/sys/ofed/drivers/infiniband/core/iwcm.c
==============================================================================
--- head/sys/ofed/drivers/infiniband/core/iwcm.c	Fri Jan 22 21:50:08 2016	(r294609)
+++ head/sys/ofed/drivers/infiniband/core/iwcm.c	Fri Jan 22 23:33:34 2016	(r294610)
@@ -5,6 +5,7 @@
  * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
  * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
  * Copyright (c) 2005 Network Appliance, Inc. All rights reserved.
+ * Copyright (c) 2016 Chelsio Communications.  All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -35,6 +36,8 @@
  * SOFTWARE.
  *
  */
+#include "opt_inet.h"
+
 #include <linux/dma-mapping.h>
 #include <linux/err.h>
 #include <linux/idr.h>
@@ -47,7 +50,10 @@
 #include <linux/slab.h>
 #include <linux/module.h>
 #include <linux/string.h>
+#include <netinet/tcp.h>
+#include <sys/mutex.h>
 
+#include <rdma/rdma_cm.h>
 #include <rdma/iw_cm.h>
 #include <rdma/ib_addr.h>
 
@@ -65,6 +71,85 @@ struct iwcm_work {
 	struct iw_cm_event event;
 	struct list_head free_list;
 };
+struct iwcm_listen_work {
+	struct work_struct work;
+	struct iw_cm_id *cm_id;
+};
+
+static LIST_HEAD(listen_port_list);
+
+static DEFINE_MUTEX(listen_port_mutex);
+static DEFINE_MUTEX(dequeue_mutex);
+
+struct listen_port_info {
+	struct list_head list;
+	uint16_t port_num;
+	uint32_t refcnt;
+};
+
+static int32_t
+add_port_to_listenlist(uint16_t port)
+{
+	struct listen_port_info *port_info;
+	int err = 0;
+
+	mutex_lock(&listen_port_mutex);
+
+	list_for_each_entry(port_info, &listen_port_list, list)
+		if (port_info->port_num == port)
+			goto found_port;
+
+	port_info = kmalloc(sizeof(*port_info), GFP_KERNEL);
+	if (!port_info) {
+		err = -ENOMEM;
+		mutex_unlock(&listen_port_mutex);
+		goto out;
+	}
+
+	port_info->port_num = port;
+	port_info->refcnt    = 0;
+
+	list_add(&port_info->list, &listen_port_list);
+
+found_port:
+	++(port_info->refcnt);
+	mutex_unlock(&listen_port_mutex);
+	return port_info->refcnt;
+out:
+	return err;
+}
+
+static int32_t
+rem_port_from_listenlist(uint16_t port)
+{
+	struct listen_port_info *port_info;
+	int ret, found_port = 0;
+
+	mutex_lock(&listen_port_mutex);
+
+	list_for_each_entry(port_info, &listen_port_list, list)
+		if (port_info->port_num == port) {
+			found_port = 1;
+			break;
+		}
+
+	if (found_port) {
+		--(port_info->refcnt);
+		ret = port_info->refcnt;
+		if (port_info->refcnt == 0) {
+			/* Remove this entry from the list as there are no
+			 * more listeners for this port_num.
+			 */
+			list_del(&port_info->list);
+			kfree(port_info);
+		}
+	} else {
+		ret = -EINVAL;
+	}
+	mutex_unlock(&listen_port_mutex);
+	return ret;
+
+}
 
 /*
  * The following services provide a mechanism for pre-allocating iwcm_work
@@ -320,6 +405,167 @@ int iw_cm_disconnect(struct iw_cm_id *cm
 }
 EXPORT_SYMBOL(iw_cm_disconnect);
 
+static struct socket *
+dequeue_socket(struct socket *head)
+{
+	struct socket *so;
+	struct sockaddr_in *remote;
+
+	ACCEPT_LOCK();
+	so = TAILQ_FIRST(&head->so_comp);
+	if (!so) {
+		ACCEPT_UNLOCK();
+		return NULL;
+	}
+
+	SOCK_LOCK(so);
+	/*
+	 * Before changing the flags on the socket, we have to bump the
+	 * reference count.  Otherwise, if the protocol calls sofree(),
+	 * the socket will be released due to a zero refcount.
+	 */
+	soref(so);
+	TAILQ_REMOVE(&head->so_comp, so, so_list);
+	head->so_qlen--;
+	so->so_qstate &= ~SQ_COMP;
+	so->so_head = NULL;
+	so->so_state |= SS_NBIO;
+	SOCK_UNLOCK(so);
+	ACCEPT_UNLOCK();
+	soaccept(so, (struct sockaddr **)&remote);
+
+	free(remote, M_SONAME);
+	return so;
+}
+static void
+iw_so_event_handler(struct work_struct *_work)
+{
+#ifdef INET
+	struct	iwcm_listen_work *work = container_of(_work,
+						struct iwcm_listen_work, work);
+	struct	iw_cm_id *listen_cm_id = work->cm_id;
+	struct	iwcm_id_private *cm_id_priv;
+	struct	iw_cm_id *real_cm_id;
+	struct	sockaddr_in *local;
+	struct	socket *so;
+
+	cm_id_priv = container_of(listen_cm_id, struct iwcm_id_private, id);
+
+	if (cm_id_priv->state != IW_CM_STATE_LISTEN) {
+		kfree(work);
+		return;
+	}
+	mutex_lock(&dequeue_mutex);
+
+	/* Dequeue & process  all new 'so' connection requests for this cmid */
+	while ((so = dequeue_socket(work->cm_id->so)) != NULL) {
+		if (rdma_cma_any_addr((struct sockaddr *)
+					&listen_cm_id->local_addr)) {
+			in_getsockaddr(so, (struct sockaddr **)&local);
+			if (rdma_find_cmid_laddr(local, ARPHRD_ETHER,
+					(void **) &real_cm_id)) {
+				free(local, M_SONAME);
+				goto err;
+			}
+			free(local, M_SONAME);
+
+			real_cm_id->device->iwcm->newconn(real_cm_id, so);
+		} else {
+			listen_cm_id->device->iwcm->newconn(listen_cm_id, so);
+		}
+	}
+err:
+	mutex_unlock(&dequeue_mutex);
+	kfree(work);
+#endif
+	return;
+}
+static int
+iw_so_upcall(struct socket *parent_so, void *arg, int waitflag)
+{
+	struct iwcm_listen_work *work;
+	struct socket *so;
+	struct iw_cm_id *cm_id = arg;
+
+	mutex_lock(&dequeue_mutex);
+	/* check whether iw_so_event_handler() already dequeued this 'so' */
+	so = TAILQ_FIRST(&parent_so->so_comp);
+	if (!so)
+		return SU_OK;
+	work = kzalloc(sizeof(*work), M_NOWAIT);
+	if (!work)
+		return -ENOMEM;
+	work->cm_id = cm_id;
+
+	INIT_WORK(&work->work, iw_so_event_handler);
+	queue_work(iwcm_wq, &work->work);
+
+	mutex_unlock(&dequeue_mutex);
+	return SU_OK;
+}
+
+static void
+iw_init_sock(struct iw_cm_id *cm_id)
+{
+	struct sockopt sopt;
+	struct socket *so = cm_id->so;
+	int on = 1;
+
+	SOCK_LOCK(so);
+	soupcall_set(so, SO_RCV, iw_so_upcall, cm_id);
+	so->so_state |= SS_NBIO;
+	SOCK_UNLOCK(so);
+	sopt.sopt_dir = SOPT_SET;
+	sopt.sopt_level = IPPROTO_TCP;
+	sopt.sopt_name = TCP_NODELAY;
+	sopt.sopt_val = (caddr_t)&on;
+	sopt.sopt_valsize = sizeof(on);
+	sopt.sopt_td = NULL;
+	sosetopt(so, &sopt);
+}
+
+static int
+iw_close_socket(struct iw_cm_id *cm_id, int close)
+{
+	struct socket *so = cm_id->so;
+	int rc;
+
+
+	SOCK_LOCK(so);
+	soupcall_clear(so, SO_RCV);
+	SOCK_UNLOCK(so);
+
+	if (close)
+		rc = soclose(so);
+	else
+		rc = soshutdown(so, SHUT_WR | SHUT_RD);
+
+	cm_id->so = NULL;
+
+	return rc;
+}
+
+static int
+iw_create_listen(struct iw_cm_id *cm_id, int backlog)
+{
+	int rc;
+
+	iw_init_sock(cm_id);
+	rc = solisten(cm_id->so, backlog, curthread);
+	if (rc != 0)
+		iw_close_socket(cm_id, 0);
+	return rc;
+}
+
+static int
+iw_destroy_listen(struct iw_cm_id *cm_id)
+{
+	int rc;
+	rc = iw_close_socket(cm_id, 0);
+	return rc;
+}
+
+
 /*
  * CM_ID <-- DESTROYING
  *
@@ -330,7 +576,7 @@ static void destroy_cm_id(struct iw_cm_i
 {
 	struct iwcm_id_private *cm_id_priv;
 	unsigned long flags;
-	int ret;
+	int ret = 0, refcnt;
 
 	cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
 	/*
@@ -345,8 +591,18 @@ static void destroy_cm_id(struct iw_cm_i
 	case IW_CM_STATE_LISTEN:
 		cm_id_priv->state = IW_CM_STATE_DESTROYING;
 		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
-		/* destroy the listening endpoint */
-		ret = cm_id->device->iwcm->destroy_listen(cm_id);
+		if (rdma_cma_any_addr((struct sockaddr *)&cm_id->local_addr)) {
+			refcnt =
+			  rem_port_from_listenlist(cm_id->local_addr.sin_port);
+
+			if (refcnt == 0)
+				ret = iw_destroy_listen(cm_id);
+
+			cm_id->device->iwcm->destroy_listen_ep(cm_id);
+		} else {
+			ret = iw_destroy_listen(cm_id);
+			cm_id->device->iwcm->destroy_listen_ep(cm_id);
+		}
 		spin_lock_irqsave(&cm_id_priv->lock, flags);
 		break;
 	case IW_CM_STATE_ESTABLISHED:
@@ -418,7 +674,7 @@ int iw_cm_listen(struct iw_cm_id *cm_id,
 {
 	struct iwcm_id_private *cm_id_priv;
 	unsigned long flags;
-	int ret;
+	int ret, refcnt;
 
 	cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
 
@@ -431,9 +687,33 @@ int iw_cm_listen(struct iw_cm_id *cm_id,
 	case IW_CM_STATE_IDLE:
 		cm_id_priv->state = IW_CM_STATE_LISTEN;
 		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
-		ret = cm_id->device->iwcm->create_listen(cm_id, backlog);
-		if (ret)
+
+		if (rdma_cma_any_addr((struct sockaddr *)&cm_id->local_addr)) {
+			refcnt =
+			  add_port_to_listenlist(cm_id->local_addr.sin_port);
+
+			if (refcnt == 1) {
+				ret = iw_create_listen(cm_id, backlog);
+			} else if (refcnt <= 0) {
+				ret = -EINVAL;
+			} else {
+				/* if refcnt > 1, a socket listener created
+				 * already. And we need not create socket
+				 * listener on other rdma devices/listen cm_id's
+				 * due to TOE. That is when a socket listener is
+				 * created with INADDR_ANY all registered TOE
+				 * devices will get a call to start
+				 * hardware listeners.
+				 */
+			}
+		} else {
+			ret = iw_create_listen(cm_id, backlog);
+		}
+		if (!ret)
+			cm_id->device->iwcm->create_listen_ep(cm_id, backlog);
+		else
 			cm_id_priv->state = IW_CM_STATE_IDLE;
+
 		spin_lock_irqsave(&cm_id_priv->lock, flags);
 		break;
 	default:

Modified: head/sys/ofed/include/rdma/iw_cm.h
==============================================================================
--- head/sys/ofed/include/rdma/iw_cm.h	Fri Jan 22 21:50:08 2016	(r294609)
+++ head/sys/ofed/include/rdma/iw_cm.h	Fri Jan 22 23:33:34 2016	(r294610)
@@ -1,6 +1,7 @@
 /*
  * Copyright (c) 2005 Network Appliance, Inc. All rights reserved.
  * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ * Copyright (c) 2016 Chelsio Communications.  All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -120,10 +121,13 @@ struct iw_cm_verbs {
 	int		(*reject)(struct iw_cm_id *cm_id,
 				  const void *pdata, u8 pdata_len);
 
-	int		(*create_listen)(struct iw_cm_id *cm_id,
+	int		(*create_listen_ep)(struct iw_cm_id *cm_id,
 					 int backlog);
 
-	int		(*destroy_listen)(struct iw_cm_id *cm_id);
+	void		(*destroy_listen_ep)(struct iw_cm_id *cm_id);
+
+	void		(*newconn)(struct iw_cm_id *parent_cm_id,
+						struct socket *so);
 };
 
 /**

Modified: head/sys/ofed/include/rdma/rdma_cm.h
==============================================================================
--- head/sys/ofed/include/rdma/rdma_cm.h	Fri Jan 22 21:50:08 2016	(r294609)
+++ head/sys/ofed/include/rdma/rdma_cm.h	Fri Jan 22 23:33:34 2016	(r294610)
@@ -1,6 +1,7 @@
 /*
  * Copyright (c) 2005 Voltaire Inc.  All rights reserved.
  * Copyright (c) 2005 Intel Corporation.  All rights reserved.
+ * Copyright (c) 2016 Chelsio Communications.  All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -400,5 +401,7 @@ int rdma_set_afonly(struct rdma_cm_id *i
  * @timeout: QP timeout
  */
 void rdma_set_timeout(struct rdma_cm_id *id, int timeout);
-
+int rdma_cma_any_addr(struct sockaddr *addr);
+int rdma_find_cmid_laddr(struct sockaddr_in *local_addr,
+		unsigned short dev_type, void **cm_id);
 #endif /* RDMA_CM_H */


More information about the svn-src-head mailing list