svn commit: r331318 - projects/bsd_rdma_4_9_stable_11/sys/dev/cxgbe/iw_cxgbe
Navdeep Parhar
np at FreeBSD.org
Wed Mar 21 18:57:32 UTC 2018
Author: np
Date: Wed Mar 21 18:57:31 2018
New Revision: 331318
URL: https://svnweb.freebsd.org/changeset/base/331318
Log:
MFC r326169 (cxgbe portion).
Sponsored by: Chelsio Communications
Modified:
projects/bsd_rdma_4_9_stable_11/sys/dev/cxgbe/iw_cxgbe/cm.c
projects/bsd_rdma_4_9_stable_11/sys/dev/cxgbe/iw_cxgbe/cq.c
projects/bsd_rdma_4_9_stable_11/sys/dev/cxgbe/iw_cxgbe/device.c
projects/bsd_rdma_4_9_stable_11/sys/dev/cxgbe/iw_cxgbe/iw_cxgbe.h
projects/bsd_rdma_4_9_stable_11/sys/dev/cxgbe/iw_cxgbe/mem.c
projects/bsd_rdma_4_9_stable_11/sys/dev/cxgbe/iw_cxgbe/provider.c
projects/bsd_rdma_4_9_stable_11/sys/dev/cxgbe/iw_cxgbe/qp.c
projects/bsd_rdma_4_9_stable_11/sys/dev/cxgbe/iw_cxgbe/t4.h
projects/bsd_rdma_4_9_stable_11/sys/dev/cxgbe/iw_cxgbe/user.h
Directory Properties:
projects/bsd_rdma_4_9_stable_11/sys/dev/cxgbe/ (props changed)
Modified: projects/bsd_rdma_4_9_stable_11/sys/dev/cxgbe/iw_cxgbe/cm.c
==============================================================================
--- projects/bsd_rdma_4_9_stable_11/sys/dev/cxgbe/iw_cxgbe/cm.c Wed Mar 21 18:39:29 2018 (r331317)
+++ projects/bsd_rdma_4_9_stable_11/sys/dev/cxgbe/iw_cxgbe/cm.c Wed Mar 21 18:57:31 2018 (r331318)
@@ -46,8 +46,11 @@ __FBSDID("$FreeBSD$");
#include <netinet/in_systm.h>
#include <netinet/in_pcb.h>
+#include <netinet6/in6_pcb.h>
#include <netinet/ip.h>
#include <netinet/in_fib.h>
+#include <netinet6/in6_fib.h>
+#include <netinet6/scope6_var.h>
#include <netinet/ip_var.h>
#include <netinet/tcp_var.h>
#include <netinet/tcp.h>
@@ -78,6 +81,8 @@ static struct work_struct c4iw_task;
static struct workqueue_struct *c4iw_taskq;
static LIST_HEAD(err_cqe_list);
static spinlock_t err_cqe_lock;
+static LIST_HEAD(listen_port_list);
+static DEFINE_MUTEX(listen_port_mutex);
static void process_req(struct work_struct *ctx);
static void start_ep_timer(struct c4iw_ep *ep);
@@ -85,12 +90,7 @@ static int stop_ep_timer(struct c4iw_ep *ep);
static int set_tcpinfo(struct c4iw_ep *ep);
static void process_timeout(struct c4iw_ep *ep);
static void process_err_cqes(void);
-static enum c4iw_ep_state state_read(struct c4iw_ep_common *epc);
-static void __state_set(struct c4iw_ep_common *epc, enum c4iw_ep_state tostate);
-static void state_set(struct c4iw_ep_common *epc, enum c4iw_ep_state tostate);
static void *alloc_ep(int size, gfp_t flags);
-static int find_route(__be32 local_ip, __be32 peer_ip, __be16 local_port,
- __be16 peer_port, u8 tos, struct nhop4_extended *pnh4);
static void close_socket(struct socket *so);
static int send_mpa_req(struct c4iw_ep *ep);
static int send_mpa_reject(struct c4iw_ep *ep, const void *pdata, u8 plen);
@@ -120,6 +120,15 @@ static int process_terminate(struct c4iw_ep *ep);
static int terminate(struct sge_iq *iq, const struct rss_header *rss,
struct mbuf *m);
static int add_ep_to_req_list(struct c4iw_ep *ep, int ep_events);
+static struct listen_port_info *
+add_ep_to_listenlist(struct c4iw_listen_ep *lep);
+static int rem_ep_from_listenlist(struct c4iw_listen_ep *lep);
+static struct c4iw_listen_ep *
+find_real_listen_ep(struct c4iw_listen_ep *master_lep, struct socket *so);
+static int get_ifnet_from_raddr(struct sockaddr_storage *raddr,
+ struct ifnet **ifp);
+static void process_newconn(struct c4iw_listen_ep *master_lep,
+ struct socket *new_so);
#define START_EP_TIMER(ep) \
do { \
CTR3(KTR_IW_CXGBE, "start_ep_timer (%s:%d) ep %p", \
@@ -134,6 +143,34 @@ static int add_ep_to_req_list(struct c4iw_ep *ep, int
stop_ep_timer(ep); \
})
+#define GET_LOCAL_ADDR(pladdr, so) \
+ do { \
+ struct sockaddr_storage *__a = NULL; \
+ struct inpcb *__inp = sotoinpcb(so); \
+ KASSERT(__inp != NULL, \
+ ("GET_LOCAL_ADDR(%s):so:%p, inp = NULL", __func__, so)); \
+ if (__inp->inp_vflag & INP_IPV4) \
+ in_getsockaddr(so, (struct sockaddr **)&__a); \
+ else \
+ in6_getsockaddr(so, (struct sockaddr **)&__a); \
+ *(pladdr) = *__a; \
+ free(__a, M_SONAME); \
+ } while (0)
+
+#define GET_REMOTE_ADDR(praddr, so) \
+ do { \
+ struct sockaddr_storage *__a = NULL; \
+ struct inpcb *__inp = sotoinpcb(so); \
+ KASSERT(__inp != NULL, \
+ ("GET_REMOTE_ADDR(%s):so:%p, inp = NULL", __func__, so)); \
+ if (__inp->inp_vflag & INP_IPV4) \
+ in_getpeeraddr(so, (struct sockaddr **)&__a); \
+ else \
+ in6_getpeeraddr(so, (struct sockaddr **)&__a); \
+ *(praddr) = *__a; \
+ free(__a, M_SONAME); \
+ } while (0)
+
#ifdef KTR
static char *states[] = {
"idle",
@@ -152,7 +189,6 @@ static char *states[] = {
};
#endif
-
static void deref_cm_id(struct c4iw_ep_common *epc)
{
epc->cm_id->rem_ref(epc->cm_id);
@@ -179,13 +215,184 @@ static void ref_qp(struct c4iw_ep *ep)
set_bit(QP_REFED, &ep->com.history);
c4iw_qp_add_ref(&ep->com.qp->ibqp);
}
+/* allocated per TCP port while listening */
+struct listen_port_info {
+ uint16_t port_num; /* TCP port address */
+ struct list_head list; /* belongs to listen_port_list */
+ struct list_head lep_list; /* per port lep list */
+ uint32_t refcnt; /* number of lep's listening */
+};
+/*
+ * Following two lists are used to manage INADDR_ANY listeners:
+ * 1)listen_port_list
+ * 2)lep_list
+ *
+ * Below is the INADDR_ANY listener lists overview on a system with a two port
+ * adapter:
+ * |------------------|
+ * |listen_port_list |
+ * |------------------|
+ * |
+ * | |-----------| |-----------|
+ * | | port_num:X| | port_num:X|
+ * |--------------|-list------|-------|-list------|-------....
+ * | lep_list----| | lep_list----|
+ * | refcnt | | | refcnt | |
+ * | | | | | |
+ * | | | | | |
+ * |-----------| | |-----------| |
+ * | |
+ * | |
+ * | |
+ * | | lep1 lep2
+ * | | |----------------| |----------------|
+ * | |----| listen_ep_list |----| listen_ep_list |
+ * | |----------------| |----------------|
+ * |
+ * |
+ * | lep1 lep2
+ * | |----------------| |----------------|
+ * |---| listen_ep_list |----| listen_ep_list |
+ * |----------------| |----------------|
+ *
+ * Because of two port adapter, the number of lep's are two(lep1 & lep2) for
+ * each TCP port number.
+ *
+ * Here 'lep1' is always marked as Master lep, because solisten() is always
+ * called through first lep.
+ *
+ */
+static struct listen_port_info *
+add_ep_to_listenlist(struct c4iw_listen_ep *lep)
+{
+ uint16_t port;
+ struct listen_port_info *port_info = NULL;
+ struct sockaddr_storage *laddr = &lep->com.local_addr;
+
+ port = (laddr->ss_family == AF_INET) ?
+ ((struct sockaddr_in *)laddr)->sin_port :
+ ((struct sockaddr_in6 *)laddr)->sin6_port;
+
+ mutex_lock(&listen_port_mutex);
+
+ list_for_each_entry(port_info, &listen_port_list, list)
+ if (port_info->port_num == port)
+ goto found_port;
+
+ port_info = malloc(sizeof(*port_info), M_CXGBE, M_WAITOK);
+ port_info->port_num = port;
+ port_info->refcnt = 0;
+
+ list_add_tail(&port_info->list, &listen_port_list);
+ INIT_LIST_HEAD(&port_info->lep_list);
+
+found_port:
+ port_info->refcnt++;
+ list_add_tail(&lep->listen_ep_list, &port_info->lep_list);
+ mutex_unlock(&listen_port_mutex);
+ return port_info;
+}
+
+static int
+rem_ep_from_listenlist(struct c4iw_listen_ep *lep)
+{
+ uint16_t port;
+ struct listen_port_info *port_info = NULL;
+ struct sockaddr_storage *laddr = &lep->com.local_addr;
+ int refcnt = 0;
+
+ port = (laddr->ss_family == AF_INET) ?
+ ((struct sockaddr_in *)laddr)->sin_port :
+ ((struct sockaddr_in6 *)laddr)->sin6_port;
+
+ mutex_lock(&listen_port_mutex);
+
+ /* get the port_info structure based on the lep's port address */
+ list_for_each_entry(port_info, &listen_port_list, list) {
+ if (port_info->port_num == port) {
+ port_info->refcnt--;
+ refcnt = port_info->refcnt;
+ /* remove the current lep from the listen list */
+ list_del(&lep->listen_ep_list);
+ if (port_info->refcnt == 0) {
+ /* Remove this entry from the list as there
+ * are no more listeners for this port_num.
+ */
+ list_del(&port_info->list);
+ kfree(port_info);
+ }
+ break;
+ }
+ }
+ mutex_unlock(&listen_port_mutex);
+ return refcnt;
+}
+
+/*
+ * Find the lep that belongs to the ifnet on which the SYN frame was received.
+ */
+struct c4iw_listen_ep *
+find_real_listen_ep(struct c4iw_listen_ep *master_lep, struct socket *so)
+{
+ struct adapter *adap = NULL;
+ struct c4iw_listen_ep *lep = NULL;
+ struct sockaddr_storage remote = { 0 };
+ struct ifnet *new_conn_ifp = NULL;
+ struct listen_port_info *port_info = NULL;
+ int err = 0, i = 0,
+ found_portinfo = 0, found_lep = 0;
+ uint16_t port;
+
+ /* STEP 1: get 'ifnet' based on socket's remote address */
+ GET_REMOTE_ADDR(&remote, so);
+
+ err = get_ifnet_from_raddr(&remote, &new_conn_ifp);
+ if (err) {
+ CTR4(KTR_IW_CXGBE, "%s: Failed to get ifnet, sock %p, "
+ "master_lep %p err %d",
+ __func__, so, master_lep, err);
+ return (NULL);
+ }
+
+ /* STEP 2: Find 'port_info' with listener local port address. */
+ port = (master_lep->com.local_addr.ss_family == AF_INET) ?
+ ((struct sockaddr_in *)&master_lep->com.local_addr)->sin_port :
+ ((struct sockaddr_in6 *)&master_lep->com.local_addr)->sin6_port;
+
+
+ mutex_lock(&listen_port_mutex);
+ list_for_each_entry(port_info, &listen_port_list, list)
+ if (port_info->port_num == port) {
+ found_portinfo =1;
+ break;
+ }
+ if (!found_portinfo)
+ goto out;
+
+ /* STEP 3: Traverse through list of lep's that are bound to the current
+ * TCP port address and find the lep that belongs to the ifnet on which
+ * the SYN frame was received.
+ */
+ list_for_each_entry(lep, &port_info->lep_list, listen_ep_list) {
+ adap = lep->com.dev->rdev.adap;
+ for_each_port(adap, i) {
+ if (new_conn_ifp == adap->port[i]->vi[0].ifp) {
+ found_lep =1;
+ goto out;
+ }
+ }
+ }
+out:
+ mutex_unlock(&listen_port_mutex);
+ return found_lep ? lep : (NULL);
+}
+
static void process_timeout(struct c4iw_ep *ep)
{
- struct c4iw_qp_attributes attrs;
+ struct c4iw_qp_attributes attrs = {0};
int abort = 1;
- mutex_lock(&ep->com.mutex);
CTR4(KTR_IW_CXGBE, "%s ep :%p, tid:%u, state %d", __func__,
ep, ep->hwtid, ep->com.state);
set_bit(TIMEDOUT, &ep->com.history);
@@ -221,7 +428,6 @@ static void process_timeout(struct c4iw_ep *ep)
, __func__, ep, ep->hwtid, ep->com.state);
abort = 0;
}
- mutex_unlock(&ep->com.mutex);
if (abort)
c4iw_ep_disconnect(ep, 1, GFP_KERNEL);
c4iw_put_ep(&ep->com);
@@ -273,14 +479,16 @@ process_req(struct work_struct *ctx)
ep_events = epc->ep_events;
epc->ep_events = 0;
spin_unlock_irqrestore(&req_lock, flag);
- CTR4(KTR_IW_CXGBE, "%s: so %p, ep %p, events 0x%x", __func__,
- epc->so, epc, ep_events);
+ mutex_lock(&epc->mutex);
+ CTR5(KTR_IW_CXGBE, "%s: so %p, ep %p, ep_state %s events 0x%x",
+ __func__, epc->so, epc, states[epc->state], ep_events);
if (ep_events & C4IW_EVENT_TERM)
process_terminate((struct c4iw_ep *)epc);
if (ep_events & C4IW_EVENT_TIMEOUT)
process_timeout((struct c4iw_ep *)epc);
if (ep_events & C4IW_EVENT_SOCKET)
process_socket_event((struct c4iw_ep *)epc);
+ mutex_unlock(&epc->mutex);
c4iw_put_ep(epc);
process_err_cqes();
spin_lock_irqsave(&req_lock, flag);
@@ -321,55 +529,67 @@ done:
return (rc);
}
-
static int
-find_route(__be32 local_ip, __be32 peer_ip, __be16 local_port,
- __be16 peer_port, u8 tos, struct nhop4_extended *pnh4)
+get_ifnet_from_raddr(struct sockaddr_storage *raddr, struct ifnet **ifp)
{
- struct in_addr addr;
- int err;
+ int err = 0;
- CTR5(KTR_IW_CXGBE, "%s:frtB %x, %x, %d, %d", __func__, local_ip,
- peer_ip, ntohs(local_port), ntohs(peer_port));
+ if (raddr->ss_family == AF_INET) {
+ struct sockaddr_in *raddr4 = (struct sockaddr_in *)raddr;
+ struct nhop4_extended nh4 = {0};
- addr.s_addr = peer_ip;
- err = fib4_lookup_nh_ext(RT_DEFAULT_FIB, addr, NHR_REF, 0, pnh4);
+ err = fib4_lookup_nh_ext(RT_DEFAULT_FIB, raddr4->sin_addr,
+ NHR_REF, 0, &nh4);
+ *ifp = nh4.nh_ifp;
+ if (err)
+ fib4_free_nh_ext(RT_DEFAULT_FIB, &nh4);
+ } else {
+ struct sockaddr_in6 *raddr6 = (struct sockaddr_in6 *)raddr;
+ struct nhop6_extended nh6 = {0};
+ struct in6_addr addr6;
+ uint32_t scopeid;
- CTR2(KTR_IW_CXGBE, "%s:frtE %d", __func__, err);
+ memset(&addr6, 0, sizeof(addr6));
+ in6_splitscope((struct in6_addr *)&raddr6->sin6_addr,
+ &addr6, &scopeid);
+ err = fib6_lookup_nh_ext(RT_DEFAULT_FIB, &addr6, scopeid,
+ NHR_REF, 0, &nh6);
+ *ifp = nh6.nh_ifp;
+ if (err)
+ fib6_free_nh_ext(RT_DEFAULT_FIB, &nh6);
+ }
+
+ CTR2(KTR_IW_CXGBE, "%s: return: %d", __func__, err);
return err;
}
static void
close_socket(struct socket *so)
{
-
uninit_iwarp_socket(so);
- sodisconnect(so);
+ soclose(so);
}
static void
process_peer_close(struct c4iw_ep *ep)
{
- struct c4iw_qp_attributes attrs;
+ struct c4iw_qp_attributes attrs = {0};
int disconnect = 1;
int release = 0;
CTR4(KTR_IW_CXGBE, "%s:ppcB ep %p so %p state %s", __func__, ep,
ep->com.so, states[ep->com.state]);
- mutex_lock(&ep->com.mutex);
switch (ep->com.state) {
case MPA_REQ_WAIT:
- CTR2(KTR_IW_CXGBE, "%s:ppc1 %p MPA_REQ_WAIT CLOSING",
+ CTR2(KTR_IW_CXGBE, "%s:ppc1 %p MPA_REQ_WAIT DEAD",
__func__, ep);
- __state_set(&ep->com, CLOSING);
- break;
-
+ /* Fallthrough */
case MPA_REQ_SENT:
- CTR2(KTR_IW_CXGBE, "%s:ppc2 %p MPA_REQ_SENT CLOSING",
+ CTR2(KTR_IW_CXGBE, "%s:ppc2 %p MPA_REQ_SENT DEAD",
__func__, ep);
- __state_set(&ep->com, DEAD);
+ ep->com.state = DEAD;
connect_reply_upcall(ep, -ECONNABORTED);
disconnect = 0;
@@ -388,21 +608,20 @@ process_peer_close(struct c4iw_ep *ep)
*/
CTR2(KTR_IW_CXGBE, "%s:ppc3 %p MPA_REQ_RCVD CLOSING",
__func__, ep);
- __state_set(&ep->com, CLOSING);
- c4iw_get_ep(&ep->com);
+ ep->com.state = CLOSING;
break;
case MPA_REP_SENT:
CTR2(KTR_IW_CXGBE, "%s:ppc4 %p MPA_REP_SENT CLOSING",
__func__, ep);
- __state_set(&ep->com, CLOSING);
+ ep->com.state = CLOSING;
break;
case FPDU_MODE:
CTR2(KTR_IW_CXGBE, "%s:ppc5 %p FPDU_MODE CLOSING",
__func__, ep);
START_EP_TIMER(ep);
- __state_set(&ep->com, CLOSING);
+ ep->com.state = CLOSING;
attrs.next_state = C4IW_QP_STATE_CLOSING;
c4iw_modify_qp(ep->com.dev, ep->com.qp,
C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
@@ -418,7 +637,7 @@ process_peer_close(struct c4iw_ep *ep)
case CLOSING:
CTR2(KTR_IW_CXGBE, "%s:ppc7 %p CLOSING MORIBUND",
__func__, ep);
- __state_set(&ep->com, MORIBUND);
+ ep->com.state = MORIBUND;
disconnect = 0;
break;
@@ -433,7 +652,7 @@ process_peer_close(struct c4iw_ep *ep)
}
close_socket(ep->com.so);
close_complete_upcall(ep, 0);
- __state_set(&ep->com, DEAD);
+ ep->com.state = DEAD;
release = 1;
disconnect = 0;
break;
@@ -450,7 +669,6 @@ process_peer_close(struct c4iw_ep *ep)
break;
}
- mutex_unlock(&ep->com.mutex);
if (disconnect) {
@@ -469,11 +687,10 @@ process_peer_close(struct c4iw_ep *ep)
static void
process_conn_error(struct c4iw_ep *ep)
{
- struct c4iw_qp_attributes attrs;
+ struct c4iw_qp_attributes attrs = {0};
int ret;
int state;
- mutex_lock(&ep->com.mutex);
state = ep->com.state;
CTR5(KTR_IW_CXGBE, "%s:pceB ep %p so %p so->so_error %u state %s",
__func__, ep, ep->com.so, ep->com.so->so_error,
@@ -483,6 +700,7 @@ process_conn_error(struct c4iw_ep *ep)
case MPA_REQ_WAIT:
STOP_EP_TIMER(ep);
+ c4iw_put_ep(&ep->parent_ep->com);
break;
case MPA_REQ_SENT:
@@ -496,13 +714,6 @@ process_conn_error(struct c4iw_ep *ep)
break;
case MPA_REQ_RCVD:
-
- /*
- * We're gonna mark this puppy DEAD, but keep
- * the reference on it until the ULP accepts or
- * rejects the CR.
- */
- c4iw_get_ep(&ep->com);
break;
case MORIBUND:
@@ -531,7 +742,6 @@ process_conn_error(struct c4iw_ep *ep)
case DEAD:
CTR2(KTR_IW_CXGBE, "%s so_error %d IN DEAD STATE!!!!",
__func__, ep->com.so->so_error);
- mutex_unlock(&ep->com.mutex);
return;
default:
@@ -541,10 +751,9 @@ process_conn_error(struct c4iw_ep *ep)
if (state != ABORTING) {
close_socket(ep->com.so);
- __state_set(&ep->com, DEAD);
+ ep->com.state = DEAD;
c4iw_put_ep(&ep->com);
}
- mutex_unlock(&ep->com.mutex);
CTR2(KTR_IW_CXGBE, "%s:pceE %p", __func__, ep);
return;
}
@@ -552,14 +761,13 @@ process_conn_error(struct c4iw_ep *ep)
static void
process_close_complete(struct c4iw_ep *ep)
{
- struct c4iw_qp_attributes attrs;
+ struct c4iw_qp_attributes attrs = {0};
int release = 0;
CTR4(KTR_IW_CXGBE, "%s:pccB ep %p so %p state %s", __func__, ep,
ep->com.so, states[ep->com.state]);
/* The cm_id may be null if we failed to connect */
- mutex_lock(&ep->com.mutex);
set_bit(CLOSE_CON_RPL, &ep->com.history);
switch (ep->com.state) {
@@ -567,7 +775,7 @@ process_close_complete(struct c4iw_ep *ep)
case CLOSING:
CTR2(KTR_IW_CXGBE, "%s:pcc1 %p CLOSING MORIBUND",
__func__, ep);
- __state_set(&ep->com, MORIBUND);
+ ep->com.state = MORIBUND;
break;
case MORIBUND:
@@ -588,7 +796,7 @@ process_close_complete(struct c4iw_ep *ep)
close_socket(ep->com.so);
close_complete_upcall(ep, 0);
- __state_set(&ep->com, DEAD);
+ ep->com.state = DEAD;
release = 1;
break;
@@ -605,12 +813,11 @@ process_close_complete(struct c4iw_ep *ep)
panic("%s:pcc6 %p unknown ep state", __func__, ep);
break;
}
- mutex_unlock(&ep->com.mutex);
if (release) {
CTR2(KTR_IW_CXGBE, "%s:pcc8 %p", __func__, ep);
- c4iw_put_ep(&ep->com);
+ release_ep_resources(ep);
}
CTR2(KTR_IW_CXGBE, "%s:pccE %p", __func__, ep);
return;
@@ -639,49 +846,56 @@ setiwsockopt(struct socket *so)
static void
init_iwarp_socket(struct socket *so, void *arg)
{
-
- SOCKBUF_LOCK(&so->so_rcv);
- soupcall_set(so, SO_RCV, c4iw_so_upcall, arg);
- so->so_state |= SS_NBIO;
- SOCKBUF_UNLOCK(&so->so_rcv);
+ if (SOLISTENING(so)) {
+ SOLISTEN_LOCK(so);
+ solisten_upcall_set(so, c4iw_so_upcall, arg);
+ so->so_state |= SS_NBIO;
+ SOLISTEN_UNLOCK(so);
+ } else {
+ SOCKBUF_LOCK(&so->so_rcv);
+ soupcall_set(so, SO_RCV, c4iw_so_upcall, arg);
+ so->so_state |= SS_NBIO;
+ SOCKBUF_UNLOCK(&so->so_rcv);
+ }
}
static void
uninit_iwarp_socket(struct socket *so)
{
-
- SOCKBUF_LOCK(&so->so_rcv);
- soupcall_clear(so, SO_RCV);
- SOCKBUF_UNLOCK(&so->so_rcv);
+ if (SOLISTENING(so)) {
+ SOLISTEN_LOCK(so);
+ solisten_upcall_set(so, NULL, NULL);
+ SOLISTEN_UNLOCK(so);
+ } else {
+ SOCKBUF_LOCK(&so->so_rcv);
+ soupcall_clear(so, SO_RCV);
+ SOCKBUF_UNLOCK(&so->so_rcv);
+ }
}
static void
process_data(struct c4iw_ep *ep)
{
- struct sockaddr_in *local, *remote;
int disconnect = 0;
CTR5(KTR_IW_CXGBE, "%s: so %p, ep %p, state %s, sbused %d", __func__,
ep->com.so, ep, states[ep->com.state], sbused(&ep->com.so->so_rcv));
- switch (state_read(&ep->com)) {
+ switch (ep->com.state) {
case MPA_REQ_SENT:
disconnect = process_mpa_reply(ep);
break;
case MPA_REQ_WAIT:
- in_getsockaddr(ep->com.so, (struct sockaddr **)&local);
- in_getpeeraddr(ep->com.so, (struct sockaddr **)&remote);
- ep->com.local_addr = *local;
- ep->com.remote_addr = *remote;
- free(local, M_SONAME);
- free(remote, M_SONAME);
disconnect = process_mpa_request(ep);
+ if (disconnect)
+ /* Refered in process_newconn() */
+ c4iw_put_ep(&ep->parent_ep->com);
break;
default:
if (sbused(&ep->com.so->so_rcv))
log(LOG_ERR, "%s: Unexpected streaming data. ep %p, "
"state %d, so %p, so_state 0x%x, sbused %u\n",
- __func__, ep, state_read(&ep->com), ep->com.so,
+ __func__, ep, ep->com.state, ep->com.so,
ep->com.so->so_state, sbused(&ep->com.so->so_rcv));
break;
}
@@ -705,58 +919,122 @@ process_connected(struct c4iw_ep *ep)
return;
err:
close_socket(so);
- state_set(&ep->com, DEAD);
+ ep->com.state = DEAD;
c4iw_put_ep(&ep->com);
return;
}
-void
-process_newconn(struct iw_cm_id *parent_cm_id, struct socket *child_so)
+static inline int c4iw_zero_addr(struct sockaddr *addr)
{
- struct c4iw_ep *child_ep;
- struct sockaddr_in *local;
- struct sockaddr_in *remote;
- struct c4iw_ep *parent_ep = parent_cm_id->provider_data;
+ struct in6_addr *ip6;
+
+ if (addr->sa_family == AF_INET)
+ return IN_ZERONET(
+ ntohl(((struct sockaddr_in *)addr)->sin_addr.s_addr));
+ else {
+ ip6 = &((struct sockaddr_in6 *) addr)->sin6_addr;
+ return (ip6->s6_addr32[0] | ip6->s6_addr32[1] |
+ ip6->s6_addr32[2] | ip6->s6_addr32[3]) == 0;
+ }
+}
+
+static inline int c4iw_loopback_addr(struct sockaddr *addr)
+{
+ if (addr->sa_family == AF_INET)
+ return IN_LOOPBACK(
+ ntohl(((struct sockaddr_in *) addr)->sin_addr.s_addr));
+ else
+ return IN6_IS_ADDR_LOOPBACK(
+ &((struct sockaddr_in6 *) addr)->sin6_addr);
+}
+
+static inline int c4iw_any_addr(struct sockaddr *addr)
+{
+ return c4iw_zero_addr(addr) || c4iw_loopback_addr(addr);
+}
+
+static void
+process_newconn(struct c4iw_listen_ep *master_lep, struct socket *new_so)
+{
+ struct c4iw_listen_ep *real_lep = NULL;
+ struct c4iw_ep *new_ep = NULL;
+ struct sockaddr_in *remote = NULL;
int ret = 0;
- MPASS(child_so != NULL);
+ MPASS(new_so != NULL);
- child_ep = alloc_ep(sizeof(*child_ep), GFP_KERNEL);
+ if (c4iw_any_addr((struct sockaddr *)&master_lep->com.local_addr)) {
+ /* Here we need to find the 'real_lep' that belongs to the
+ * incomming socket's network interface, such that the newly
+ * created 'ep' can be attached to the real 'lep'.
+ */
+ real_lep = find_real_listen_ep(master_lep, new_so);
+ if (real_lep == NULL) {
+ CTR2(KTR_IW_CXGBE, "%s: Could not find the real listen "
+ "ep for sock: %p", __func__, new_so);
+ log(LOG_ERR,"%s: Could not find the real listen ep for "
+ "sock: %p\n", __func__, new_so);
+ /* FIXME: properly free the 'new_so' in failure case.
+ * Use of soabort() and soclose() are not legal
+ * here(before soaccept()).
+ */
+ return;
+ }
+ } else /* for Non-Wildcard address, master_lep is always the real_lep */
+ real_lep = master_lep;
- CTR5(KTR_IW_CXGBE,
- "%s: parent so %p, parent ep %p, child so %p, child ep %p",
- __func__, parent_ep->com.so, parent_ep, child_so, child_ep);
+ new_ep = alloc_ep(sizeof(*new_ep), GFP_KERNEL);
- in_getsockaddr(child_so, (struct sockaddr **)&local);
- in_getpeeraddr(child_so, (struct sockaddr **)&remote);
+ CTR6(KTR_IW_CXGBE, "%s: master_lep %p, real_lep: %p, new ep %p, "
+ "listening so %p, new so %p", __func__, master_lep, real_lep,
+ new_ep, master_lep->com.so, new_so);
- child_ep->com.local_addr = *local;
- child_ep->com.remote_addr = *remote;
- child_ep->com.dev = parent_ep->com.dev;
- child_ep->com.so = child_so;
- child_ep->com.cm_id = NULL;
- child_ep->com.thread = parent_ep->com.thread;
- child_ep->parent_ep = parent_ep;
+ new_ep->com.dev = real_lep->com.dev;
+ new_ep->com.so = new_so;
+ new_ep->com.cm_id = NULL;
+ new_ep->com.thread = real_lep->com.thread;
+ new_ep->parent_ep = real_lep;
- free(local, M_SONAME);
+ GET_LOCAL_ADDR(&new_ep->com.local_addr, new_so);
+ GET_REMOTE_ADDR(&new_ep->com.remote_addr, new_so);
+ c4iw_get_ep(&real_lep->com);
+ init_timer(&new_ep->timer);
+ new_ep->com.state = MPA_REQ_WAIT;
+ START_EP_TIMER(new_ep);
+
+ setiwsockopt(new_so);
+ ret = soaccept(new_so, (struct sockaddr **)&remote);
+ if (ret != 0) {
+ CTR4(KTR_IW_CXGBE,
+ "%s:listen sock:%p, new sock:%p, ret:%d\n",
+ __func__, master_lep->com.so, new_so, ret);
+ if (remote != NULL)
+ free(remote, M_SONAME);
+ uninit_iwarp_socket(new_so);
+ soclose(new_so);
+ c4iw_put_ep(&new_ep->com);
+ c4iw_put_ep(&real_lep->com);
+ return;
+ }
free(remote, M_SONAME);
- setiwsockopt(child_so);
- init_iwarp_socket(child_so, &child_ep->com);
- c4iw_get_ep(&parent_ep->com);
- init_timer(&child_ep->timer);
- state_set(&child_ep->com, MPA_REQ_WAIT);
- START_EP_TIMER(child_ep);
+ /* MPA request might have been queued up on the socket already, so we
+ * initialize the socket/upcall_handler under lock to prevent processing
+ * MPA request on another thread(via process_req()) simultaniously.
+ */
+ c4iw_get_ep(&new_ep->com); /* Dereferenced at the end below, this is to
+ avoid freeing of ep before ep unlock. */
+ mutex_lock(&new_ep->com.mutex);
+ init_iwarp_socket(new_so, &new_ep->com);
- /* maybe the request has already been queued up on the socket... */
- ret = process_mpa_request(child_ep);
- if (ret == 2)
+ ret = process_mpa_request(new_ep);
+ if (ret) {
/* ABORT */
- c4iw_ep_disconnect(child_ep, 1, GFP_KERNEL);
- else if (ret == 1)
- /* CLOSE */
- c4iw_ep_disconnect(child_ep, 0, GFP_KERNEL);
-
+ c4iw_ep_disconnect(new_ep, 1, GFP_KERNEL);
+ c4iw_put_ep(&real_lep->com);
+ }
+ mutex_unlock(&new_ep->com.mutex);
+ c4iw_put_ep(&new_ep->com);
return;
}
@@ -790,6 +1068,12 @@ c4iw_so_upcall(struct socket *so, void *arg, int waitf
ep->com.entry.tqe_prev);
MPASS(ep->com.so == so);
+ /*
+ * Wake up any threads waiting in rdma_init()/rdma_fini(),
+ * with locks held.
+ */
+ if (so->so_error)
+ c4iw_wake_up(&ep->com.wr_wait, -ECONNRESET);
add_ep_to_req_list(ep, C4IW_EVENT_SOCKET);
return (SU_OK);
@@ -820,9 +1104,15 @@ terminate(struct sge_iq *iq, const struct rss_header *
static void
process_socket_event(struct c4iw_ep *ep)
{
- int state = state_read(&ep->com);
+ int state = ep->com.state;
struct socket *so = ep->com.so;
+ if (ep->com.state == DEAD) {
+ CTR3(KTR_IW_CXGBE, "%s: Pending socket event discarded "
+ "ep %p ep_state %s", __func__, ep, states[state]);
+ return;
+ }
+
CTR6(KTR_IW_CXGBE, "process_socket_event: so %p, so_state 0x%x, "
"so_err %d, sb_state 0x%x, ep %p, ep_state %s", so, so->so_state,
so->so_error, so->so_rcv.sb_state, ep, states[state]);
@@ -833,10 +1123,29 @@ process_socket_event(struct c4iw_ep *ep)
}
if (state == LISTEN) {
- /* socket listening events are handled at IWCM */
- CTR3(KTR_IW_CXGBE, "%s Invalid ep state:%u, ep:%p", __func__,
- ep->com.state, ep);
- BUG();
+ struct c4iw_listen_ep *lep = (struct c4iw_listen_ep *)ep;
+ struct socket *listen_so = so, *new_so = NULL;
+ int error = 0;
+
+ SOLISTEN_LOCK(listen_so);
+ do {
+ error = solisten_dequeue(listen_so, &new_so,
+ SOCK_NONBLOCK);
+ if (error) {
+ CTR4(KTR_IW_CXGBE, "%s: lep %p listen_so %p "
+ "error %d", __func__, lep, listen_so,
+ error);
+ return;
+ }
+ process_newconn(lep, new_so);
+
+ /* solisten_dequeue() unlocks while return, so aquire
+ * lock again for sol_qlen and also for next iteration.
+ */
+ SOLISTEN_LOCK(listen_so);
+ } while (listen_so->sol_qlen);
+ SOLISTEN_UNLOCK(listen_so);
+
return;
}
@@ -955,34 +1264,6 @@ stop_ep_timer(struct c4iw_ep *ep)
return 1;
}
-static enum
-c4iw_ep_state state_read(struct c4iw_ep_common *epc)
-{
- enum c4iw_ep_state state;
-
- mutex_lock(&epc->mutex);
- state = epc->state;
- mutex_unlock(&epc->mutex);
-
- return (state);
-}
-
-static void
-__state_set(struct c4iw_ep_common *epc, enum c4iw_ep_state new)
-{
-
- epc->state = new;
-}
-
-static void
-state_set(struct c4iw_ep_common *epc, enum c4iw_ep_state new)
-{
-
- mutex_lock(&epc->mutex);
- __state_set(epc, new);
- mutex_unlock(&epc->mutex);
-}
-
static void *
alloc_ep(int size, gfp_t gfp)
{
@@ -1059,8 +1340,8 @@ send_mpa_req(struct c4iw_ep *ep)
}
if (mpa_rev_to_use == 2) {
- mpa->private_data_size +=
- htons(sizeof(struct mpa_v2_conn_params));
+ mpa->private_data_size = htons(ntohs(mpa->private_data_size) +
+ sizeof(struct mpa_v2_conn_params));
mpa_v2_params.ird = htons((u16)ep->ird);
mpa_v2_params.ord = htons((u16)ep->ord);
@@ -1112,7 +1393,7 @@ send_mpa_req(struct c4iw_ep *ep)
}
START_EP_TIMER(ep);
- state_set(&ep->com, MPA_REQ_SENT);
+ ep->com.state = MPA_REQ_SENT;
ep->mpa_attr.initiator = 1;
CTR3(KTR_IW_CXGBE, "%s:smrE %p, error: %d", __func__, ep, err);
return 0;
@@ -1155,8 +1436,8 @@ static int send_mpa_reject(struct c4iw_ep *ep, const v
if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) {
mpa->flags |= MPA_ENHANCED_RDMA_CONN;
- mpa->private_data_size +=
- htons(sizeof(struct mpa_v2_conn_params));
+ mpa->private_data_size = htons(ntohs(mpa->private_data_size) +
+ sizeof(struct mpa_v2_conn_params));
mpa_v2_params.ird = htons(((u16)ep->ird) |
(peer2peer ? MPA_V2_PEER2PEER_MODEL :
0));
@@ -1171,7 +1452,7 @@ static int send_mpa_reject(struct c4iw_ep *ep, const v
if (ep->plen)
memcpy(mpa->private_data +
- sizeof(struct mpa_v2_conn_params), pdata, plen);
+ sizeof(struct mpa_v2_conn_params), pdata, plen);
CTR5(KTR_IW_CXGBE, "%s:smrej3 %p %d %d %d", __func__, ep,
mpa_v2_params.ird, mpa_v2_params.ord, ep->plen);
} else
@@ -1275,7 +1556,7 @@ static int send_mpa_reply(struct c4iw_ep *ep, const vo
free(mpa, M_CXGBE);
- state_set(&ep->com, MPA_REP_SENT);
+ ep->com.state = MPA_REP_SENT;
ep->snd_seq += mpalen;
err = -sosend(ep->com.so, NULL, NULL, m, NULL, MSG_DONTWAIT,
ep->com.thread);
@@ -1332,17 +1613,17 @@ send_abort(struct c4iw_ep *ep)
}
uninit_iwarp_socket(so);
- sodisconnect(so);
+ soclose(so);
set_bit(ABORT_CONN, &ep->com.history);
/*
* TBD: iw_cxgbe driver should receive ABORT reply for every ABORT
* request it has sent. But the current TOE driver is not propagating
* this ABORT reply event (via do_abort_rpl) to iw_cxgbe. So as a work-
- * around de-refer 'ep' (which was refered before sending ABORT request)
- * here instead of doing it in abort_rpl() handler of iw_cxgbe driver.
+ * around de-refererece 'ep' here instead of doing it in abort_rpl()
+ * handler(not yet implemented) of iw_cxgbe driver.
*/
- c4iw_put_ep(&ep->com);
+ release_ep_resources(ep);
return (0);
}
@@ -1401,6 +1682,8 @@ static void connect_reply_upcall(struct c4iw_ep *ep, i
CTR2(KTR_IW_CXGBE, "%s:cru1 %p", __func__, ep);
/* this means MPA_v2 is used */
+ event.ord = ep->ird;
+ event.ird = ep->ord;
event.private_data_len = ep->plen -
sizeof(struct mpa_v2_conn_params);
event.private_data = ep->mpa_pkt +
@@ -1410,6 +1693,8 @@ static void connect_reply_upcall(struct c4iw_ep *ep, i
CTR2(KTR_IW_CXGBE, "%s:cru2 %p", __func__, ep);
/* this means MPA_v1 is used */
+ event.ord = c4iw_max_read_depth;
+ event.ird = c4iw_max_read_depth;
event.private_data_len = ep->plen;
event.private_data = ep->mpa_pkt +
sizeof(struct mpa_message);
@@ -1451,7 +1736,6 @@ static int connect_request_upcall(struct c4iw_ep *ep)
event.local_addr = ep->com.local_addr;
event.remote_addr = ep->com.remote_addr;
event.provider_data = ep;
- event.so = ep->com.so;
if (!ep->tried_with_mpa_v1) {
/* this means MPA_v2 is used */
@@ -1473,11 +1757,18 @@ static int connect_request_upcall(struct c4iw_ep *ep)
c4iw_get_ep(&ep->com);
ret = ep->parent_ep->com.cm_id->event_handler(ep->parent_ep->com.cm_id,
&event);
- if(ret)
+ if(ret) {
+ CTR3(KTR_IW_CXGBE, "%s: ep %p, Failure while notifying event to"
+ " IWCM, err:%d", __func__, ep, ret);
c4iw_put_ep(&ep->com);
+ } else
+ /* Dereference parent_ep only in success case.
+ * In case of failure, parent_ep is dereferenced by the caller
+ * of process_mpa_request().
+ */
+ c4iw_put_ep(&ep->parent_ep->com);
set_bit(CONNREQ_UPCALL, &ep->com.history);
- c4iw_put_ep(&ep->parent_ep->com);
return ret;
*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
More information about the svn-src-projects
mailing list