svn commit: r325522 - in projects/bsd_rdma_4_9: contrib/ofed/libcxgb4 sys/compat/linuxkpi/common/include/linux sys/dev/cxgbe sys/dev/cxgbe/common sys/dev/cxgbe/iw_cxgbe sys/modules/cxgbe/iw_cxgbe
Navdeep Parhar
np at FreeBSD.org
Tue Nov 7 23:52:16 UTC 2017
Author: np
Date: Tue Nov 7 23:52:14 2017
New Revision: 325522
URL: https://svnweb.freebsd.org/changeset/base/325522
Log:
Update the iw_cxgbe bits in the projects branch.
Submitted by: Krishnamraju Eraparaju @ Chelsio
Sponsored by: Chelsio Communications
Modified:
projects/bsd_rdma_4_9/contrib/ofed/libcxgb4/cq.c
projects/bsd_rdma_4_9/contrib/ofed/libcxgb4/dev.c
projects/bsd_rdma_4_9/contrib/ofed/libcxgb4/qp.c
projects/bsd_rdma_4_9/contrib/ofed/libcxgb4/t4.h
projects/bsd_rdma_4_9/contrib/ofed/libcxgb4/verbs.c
projects/bsd_rdma_4_9/sys/compat/linuxkpi/common/include/linux/dma-mapping.h
projects/bsd_rdma_4_9/sys/dev/cxgbe/adapter.h
projects/bsd_rdma_4_9/sys/dev/cxgbe/common/common.h
projects/bsd_rdma_4_9/sys/dev/cxgbe/common/t4_hw.c
projects/bsd_rdma_4_9/sys/dev/cxgbe/iw_cxgbe/cm.c
projects/bsd_rdma_4_9/sys/dev/cxgbe/iw_cxgbe/cq.c
projects/bsd_rdma_4_9/sys/dev/cxgbe/iw_cxgbe/device.c
projects/bsd_rdma_4_9/sys/dev/cxgbe/iw_cxgbe/iw_cxgbe.h
projects/bsd_rdma_4_9/sys/dev/cxgbe/iw_cxgbe/mem.c
projects/bsd_rdma_4_9/sys/dev/cxgbe/iw_cxgbe/provider.c
projects/bsd_rdma_4_9/sys/dev/cxgbe/iw_cxgbe/qp.c
projects/bsd_rdma_4_9/sys/dev/cxgbe/iw_cxgbe/t4.h
projects/bsd_rdma_4_9/sys/dev/cxgbe/iw_cxgbe/user.h
projects/bsd_rdma_4_9/sys/dev/cxgbe/offload.h
projects/bsd_rdma_4_9/sys/dev/cxgbe/t4_main.c
projects/bsd_rdma_4_9/sys/modules/cxgbe/iw_cxgbe/Makefile
Modified: projects/bsd_rdma_4_9/contrib/ofed/libcxgb4/cq.c
==============================================================================
--- projects/bsd_rdma_4_9/contrib/ofed/libcxgb4/cq.c Tue Nov 7 19:12:20 2017 (r325521)
+++ projects/bsd_rdma_4_9/contrib/ofed/libcxgb4/cq.c Tue Nov 7 23:52:14 2017 (r325522)
@@ -437,7 +437,7 @@ static int poll_cq(struct t4_wq *wq, struct t4_cq *cq,
if (!*cqe_flushed && CQE_STATUS(hw_cqe))
dump_cqe(hw_cqe);
- BUG_ON((*cqe_flushed == 0) && !SW_CQE(hw_cqe));
+ BUG_ON((cqe_flushed == 0) && !SW_CQE(hw_cqe));
goto proc_cqe;
}
Modified: projects/bsd_rdma_4_9/contrib/ofed/libcxgb4/dev.c
==============================================================================
--- projects/bsd_rdma_4_9/contrib/ofed/libcxgb4/dev.c Tue Nov 7 19:12:20 2017 (r325521)
+++ projects/bsd_rdma_4_9/contrib/ofed/libcxgb4/dev.c Tue Nov 7 23:52:14 2017 (r325522)
@@ -39,6 +39,7 @@
#include <pthread.h>
#include <string.h>
#include <signal.h>
+#include <stdbool.h>
#include "libcxgb4.h"
#include "cxgb4-abi.h"
@@ -194,6 +195,17 @@ static struct ibv_context *c4iw_alloc_context(struct i
rhp->cqid2ptr = calloc(rhp->max_cq, sizeof(void *));
if (!rhp->cqid2ptr)
goto err_unmap;
+
+ /* Disable userspace WC if architecture/adapter does not
+ * support WC.
+ * Note: To forcefully disable WC in kernel driver use the
+ * loader tunable "hw.cxl.write_combine=0"
+ */
+ if (t5_en_wc && !context->status_page->wc_supported) {
+ fprintf(stderr, "iw_cxgb4 driver doesn't support Write "
+ "Combine, so regular DB writes will be used\n");
+ t5_en_wc = 0;
+ }
}
return &context->ibv_ctx;
@@ -400,11 +412,44 @@ int c4iw_abi_version = 1;
static struct verbs_device *cxgb4_driver_init(const char *uverbs_sys_path,
int abi_version)
{
- char devstr[IBV_SYSFS_PATH_MAX], ibdev[16], value[32], *cp;
+ char devstr[IBV_SYSFS_PATH_MAX], ibdev[16], value[128], *cp;
+ char dev_str[IBV_SYSFS_PATH_MAX];
struct c4iw_dev *dev;
unsigned vendor, device, fw_maj, fw_min;
int i;
+ char devnum;
+ char ib_param[16];
+#ifndef __linux__
+ if (ibv_read_sysfs_file(uverbs_sys_path, "ibdev",
+ ibdev, sizeof ibdev) < 0)
+ return NULL;
+
+ devnum = atoi(&ibdev[5]);
+
+ if (ibdev[0] == 't' && ibdev[1] >= '4' && ibdev[1] <= '6' &&
+ strstr(&ibdev[2], "nex") && devnum >= 0) {
+ snprintf(dev_str, sizeof(dev_str), "/dev/t%cnex/%d", ibdev[1],
+ devnum);
+ } else
+ return NULL;
+
+ if (ibv_read_sysfs_file(dev_str, "\%pnpinfo", value, sizeof value) < 0)
+ return NULL;
+ else {
+ if (strstr(value, "vendor=")) {
+ strncpy(ib_param, strstr(value, "vendor=") +
+ strlen("vendor="), 6);
+ sscanf(ib_param, "%i", &vendor);
+ }
+
+ if (strstr(value, "device=")) {
+ strncpy(ib_param, strstr(value, "device=") +
+ strlen("device="), 6);
+ sscanf(ib_param, "%i", &device);
+ }
+ }
+#else
if (ibv_read_sysfs_file(uverbs_sys_path, "device/vendor",
value, sizeof value) < 0)
return NULL;
@@ -414,6 +459,7 @@ static struct verbs_device *cxgb4_driver_init(const ch
value, sizeof value) < 0)
return NULL;
sscanf(value, "%i", &device);
+#endif
for (i = 0; i < sizeof hca_table / sizeof hca_table[0]; ++i)
if (vendor == hca_table[i].vendor &&
@@ -425,6 +471,11 @@ static struct verbs_device *cxgb4_driver_init(const ch
found:
c4iw_abi_version = abi_version;
+#ifndef __linux__
+ if (ibv_read_sysfs_file(dev_str, "firmware_version",
+ value, sizeof value) < 0)
+ return NULL;
+#else
/*
* Verify that the firmware major number matches. Major number
* mismatches are fatal. Minor number mismatches are tolerated.
@@ -438,6 +489,7 @@ found:
ibv_get_sysfs_path(), ibdev);
if (ibv_read_sysfs_file(devstr, "fw_ver", value, sizeof value) < 0)
return NULL;
+#endif
cp = strtok(value+1, ".");
sscanf(cp, "%i", &fw_maj);
Modified: projects/bsd_rdma_4_9/contrib/ofed/libcxgb4/qp.c
==============================================================================
--- projects/bsd_rdma_4_9/contrib/ofed/libcxgb4/qp.c Tue Nov 7 19:12:20 2017 (r325521)
+++ projects/bsd_rdma_4_9/contrib/ofed/libcxgb4/qp.c Tue Nov 7 23:52:14 2017 (r325522)
@@ -44,10 +44,13 @@ struct c4iw_stats c4iw_stats;
static void copy_wr_to_sq(struct t4_wq *wq, union t4_wr *wqe, u8 len16)
{
- u64 *src, *dst;
+ void *src, *dst;
+ uintptr_t end;
+ int total, len;
- src = (u64 *)wqe;
- dst = (u64 *)((u8 *)wq->sq.queue + wq->sq.wq_pidx * T4_EQ_ENTRY_SIZE);
+ src = &wqe->flits[0];
+ dst = &wq->sq.queue->flits[wq->sq.wq_pidx *
+ (T4_EQ_ENTRY_SIZE / sizeof(__be64))];
if (t4_sq_onchip(wq)) {
len16 = align(len16, 4);
@@ -57,17 +60,18 @@ static void copy_wr_to_sq(struct t4_wq *wq, union t4_w
* happens */
mmio_wc_start();
}
- while (len16) {
- *dst++ = *src++;
- if (dst == (u64 *)&wq->sq.queue[wq->sq.size])
- dst = (u64 *)wq->sq.queue;
- *dst++ = *src++;
- if (dst == (u64 *)&wq->sq.queue[wq->sq.size])
- dst = (u64 *)wq->sq.queue;
- len16--;
- /* NOTE len16 cannot be large enough to write to the
- same sq.queue memory twice in this loop */
+ /* NOTE len16 cannot be large enough to write to the
+ same sq.queue memory twice in this loop */
+ total = len16 * 16;
+ end = (uintptr_t)&wq->sq.queue[wq->sq.size];
+ if (__predict_true((uintptr_t)dst + total <= end)) {
+ /* Won't wrap around. */
+ memcpy(dst, src, total);
+ } else {
+ len = end - (uintptr_t)dst;
+ memcpy(dst, src, len);
+ memcpy(wq->sq.queue, src + len, total - len);
}
if (t4_sq_onchip(wq))
@@ -76,18 +80,23 @@ static void copy_wr_to_sq(struct t4_wq *wq, union t4_w
static void copy_wr_to_rq(struct t4_wq *wq, union t4_recv_wr *wqe, u8 len16)
{
- u64 *src, *dst;
+ void *src, *dst;
+ uintptr_t end;
+ int total, len;
- src = (u64 *)wqe;
- dst = (u64 *)((u8 *)wq->rq.queue + wq->rq.wq_pidx * T4_EQ_ENTRY_SIZE);
- while (len16) {
- *dst++ = *src++;
- if (dst >= (u64 *)&wq->rq.queue[wq->rq.size])
- dst = (u64 *)wq->rq.queue;
- *dst++ = *src++;
- if (dst >= (u64 *)&wq->rq.queue[wq->rq.size])
- dst = (u64 *)wq->rq.queue;
- len16--;
+ src = &wqe->flits[0];
+ dst = &wq->rq.queue->flits[wq->rq.wq_pidx *
+ (T4_EQ_ENTRY_SIZE / sizeof(__be64))];
+
+ total = len16 * 16;
+ end = (uintptr_t)&wq->rq.queue[wq->rq.size];
+ if (__predict_true((uintptr_t)dst + total <= end)) {
+ /* Won't wrap around. */
+ memcpy(dst, src, total);
+ } else {
+ len = end - (uintptr_t)dst;
+ memcpy(dst, src, len);
+ memcpy(wq->rq.queue, src + len, total - len);
}
}
Modified: projects/bsd_rdma_4_9/contrib/ofed/libcxgb4/t4.h
==============================================================================
--- projects/bsd_rdma_4_9/contrib/ofed/libcxgb4/t4.h Tue Nov 7 19:12:20 2017 (r325521)
+++ projects/bsd_rdma_4_9/contrib/ofed/libcxgb4/t4.h Tue Nov 7 23:52:14 2017 (r325522)
@@ -87,7 +87,7 @@
#define T4_MAX_CQ_DEPTH (T4_MAX_IQ_SIZE - 1)
#define T4_MAX_NUM_STAG (1<<15)
#define T4_MAX_MR_SIZE (~0ULL - 1)
-#define T4_PAGESIZE_MASK 0xffff000 /* 4KB-128MB */
+#define T4_PAGESIZE_MASK 0xffffffff000 /* 4KB-8TB */
#define T4_STAG_UNSET 0xffffffff
#define T4_FW_MAJ 0
@@ -723,7 +723,7 @@ static inline void t4_reset_cq_in_error(struct t4_cq *
struct t4_dev_status_page
{
u8 db_off;
- u8 pad1;
+ u8 wc_supported;
u16 pad2;
u32 pad3;
u64 qp_start;
Modified: projects/bsd_rdma_4_9/contrib/ofed/libcxgb4/verbs.c
==============================================================================
--- projects/bsd_rdma_4_9/contrib/ofed/libcxgb4/verbs.c Tue Nov 7 19:12:20 2017 (r325521)
+++ projects/bsd_rdma_4_9/contrib/ofed/libcxgb4/verbs.c Tue Nov 7 23:52:14 2017 (r325522)
@@ -468,7 +468,7 @@ static struct ibv_qp *create_qp(struct ibv_pd *pd,
}
qhp->wq.sq.queue = mmap(NULL, qhp->wq.sq.memsize,
- PROT_WRITE, MAP_SHARED,
+ PROT_READ|PROT_WRITE, MAP_SHARED,
pd->context->cmd_fd, resp.sq_key);
if (qhp->wq.sq.queue == MAP_FAILED)
goto err4;
@@ -490,7 +490,7 @@ static struct ibv_qp *create_qp(struct ibv_pd *pd,
qhp->wq.rq.udb += 2;
}
qhp->wq.rq.queue = mmap(NULL, qhp->wq.rq.memsize,
- PROT_WRITE, MAP_SHARED,
+ PROT_READ|PROT_WRITE, MAP_SHARED,
pd->context->cmd_fd, resp.rq_key);
if (qhp->wq.rq.queue == MAP_FAILED)
goto err6;
Modified: projects/bsd_rdma_4_9/sys/compat/linuxkpi/common/include/linux/dma-mapping.h
==============================================================================
--- projects/bsd_rdma_4_9/sys/compat/linuxkpi/common/include/linux/dma-mapping.h Tue Nov 7 19:12:20 2017 (r325521)
+++ projects/bsd_rdma_4_9/sys/compat/linuxkpi/common/include/linux/dma-mapping.h Tue Nov 7 23:52:14 2017 (r325522)
@@ -127,7 +127,7 @@ dma_alloc_coherent(struct device *dev, size_t size, dm
size_t align;
void *mem;
- if (dev->dma_mask)
+ if (dev != NULL && dev->dma_mask)
high = *dev->dma_mask;
else if (flag & GFP_DMA32)
high = BUS_SPACE_MAXADDR_32BIT;
Modified: projects/bsd_rdma_4_9/sys/dev/cxgbe/adapter.h
==============================================================================
--- projects/bsd_rdma_4_9/sys/dev/cxgbe/adapter.h Tue Nov 7 19:12:20 2017 (r325521)
+++ projects/bsd_rdma_4_9/sys/dev/cxgbe/adapter.h Tue Nov 7 23:52:14 2017 (r325522)
@@ -801,6 +801,7 @@ struct adapter {
void *tom_softc; /* (struct tom_data *) */
struct tom_tunables tt;
+ struct iw_tunables iwt;
void *iwarp_softc; /* (struct c4iw_dev *) */
void *iscsi_ulp_softc; /* (struct cxgbei_data *) */
void *ccr_softc; /* (struct ccr_softc *) */
Modified: projects/bsd_rdma_4_9/sys/dev/cxgbe/common/common.h
==============================================================================
--- projects/bsd_rdma_4_9/sys/dev/cxgbe/common/common.h Tue Nov 7 19:12:20 2017 (r325521)
+++ projects/bsd_rdma_4_9/sys/dev/cxgbe/common/common.h Tue Nov 7 23:52:14 2017 (r325522)
@@ -68,6 +68,8 @@ enum {
FEC_RESERVED = 1 << 2,
};
+enum t4_bar2_qtype { T4_BAR2_QTYPE_EGRESS, T4_BAR2_QTYPE_INGRESS };
+
struct port_stats {
u64 tx_octets; /* total # of octets in good frames */
u64 tx_frames; /* all good frames */
@@ -843,5 +845,8 @@ int t4vf_get_sge_params(struct adapter *adapter);
int t4vf_get_rss_glb_config(struct adapter *adapter);
int t4vf_get_vfres(struct adapter *adapter);
int t4vf_prep_adapter(struct adapter *adapter);
+int t4_bar2_sge_qregs(struct adapter *adapter, unsigned int qid,
+ enum t4_bar2_qtype qtype, int user, u64 *pbar2_qoffset,
+ unsigned int *pbar2_qid);
#endif /* __CHELSIO_COMMON_H */
Modified: projects/bsd_rdma_4_9/sys/dev/cxgbe/common/t4_hw.c
==============================================================================
--- projects/bsd_rdma_4_9/sys/dev/cxgbe/common/t4_hw.c Tue Nov 7 19:12:20 2017 (r325521)
+++ projects/bsd_rdma_4_9/sys/dev/cxgbe/common/t4_hw.c Tue Nov 7 23:52:14 2017 (r325522)
@@ -8081,6 +8081,98 @@ int t4_shutdown_adapter(struct adapter *adapter)
}
/**
+ * t4_bar2_sge_qregs - return BAR2 SGE Queue register information
+ * @adapter: the adapter
+ * @qid: the Queue ID
+ * @qtype: the Ingress or Egress type for @qid
+ * @user: true if this request is for a user mode queue
+ * @pbar2_qoffset: BAR2 Queue Offset
+ * @pbar2_qid: BAR2 Queue ID or 0 for Queue ID inferred SGE Queues
+ *
+ * Returns the BAR2 SGE Queue Registers information associated with the
+ * indicated Absolute Queue ID. These are passed back in return value
+ * pointers. @qtype should be T4_BAR2_QTYPE_EGRESS for Egress Queue
+ * and T4_BAR2_QTYPE_INGRESS for Ingress Queues.
+ *
+ * This may return an error which indicates that BAR2 SGE Queue
+ * registers aren't available. If an error is not returned, then the
+ * following values are returned:
+ *
+ * *@pbar2_qoffset: the BAR2 Offset of the @qid Registers
+ * *@pbar2_qid: the BAR2 SGE Queue ID or 0 of @qid
+ *
+ * If the returned BAR2 Queue ID is 0, then BAR2 SGE registers which
+ * require the "Inferred Queue ID" ability may be used. E.g. the
+ * Write Combining Doorbell Buffer. If the BAR2 Queue ID is not 0,
+ * then these "Inferred Queue ID" register may not be used.
+ */
+int t4_bar2_sge_qregs(struct adapter *adapter,
+ unsigned int qid,
+ enum t4_bar2_qtype qtype,
+ int user,
+ u64 *pbar2_qoffset,
+ unsigned int *pbar2_qid)
+{
+ unsigned int page_shift, page_size, qpp_shift, qpp_mask;
+ u64 bar2_page_offset, bar2_qoffset;
+ unsigned int bar2_qid, bar2_qid_offset, bar2_qinferred;
+
+ /* T4 doesn't support BAR2 SGE Queue registers for kernel
+ * mode queues.
+ */
+ if (!user && is_t4(adapter))
+ return -EINVAL;
+
+ /* Get our SGE Page Size parameters.
+ */
+ page_shift = adapter->params.sge.page_shift;
+ page_size = 1 << page_shift;
+
+ /* Get the right Queues per Page parameters for our Queue.
+ */
+ qpp_shift = (qtype == T4_BAR2_QTYPE_EGRESS
+ ? adapter->params.sge.eq_s_qpp
+ : adapter->params.sge.iq_s_qpp);
+ qpp_mask = (1 << qpp_shift) - 1;
+
+ /* Calculate the basics of the BAR2 SGE Queue register area:
+ * o The BAR2 page the Queue registers will be in.
+ * o The BAR2 Queue ID.
+ * o The BAR2 Queue ID Offset into the BAR2 page.
+ */
+ bar2_page_offset = ((u64)(qid >> qpp_shift) << page_shift);
+ bar2_qid = qid & qpp_mask;
+ bar2_qid_offset = bar2_qid * SGE_UDB_SIZE;
+
+ /* If the BAR2 Queue ID Offset is less than the Page Size, then the
+ * hardware will infer the Absolute Queue ID simply from the writes to
+ * the BAR2 Queue ID Offset within the BAR2 Page (and we need to use a
+ * BAR2 Queue ID of 0 for those writes). Otherwise, we'll simply
+ * write to the first BAR2 SGE Queue Area within the BAR2 Page with
+ * the BAR2 Queue ID and the hardware will infer the Absolute Queue ID
+ * from the BAR2 Page and BAR2 Queue ID.
+ *
+ * One important censequence of this is that some BAR2 SGE registers
+ * have a "Queue ID" field and we can write the BAR2 SGE Queue ID
+ * there. But other registers synthesize the SGE Queue ID purely
+ * from the writes to the registers -- the Write Combined Doorbell
+ * Buffer is a good example. These BAR2 SGE Registers are only
+ * available for those BAR2 SGE Register areas where the SGE Absolute
+ * Queue ID can be inferred from simple writes.
+ */
+ bar2_qoffset = bar2_page_offset;
+ bar2_qinferred = (bar2_qid_offset < page_size);
+ if (bar2_qinferred) {
+ bar2_qoffset += bar2_qid_offset;
+ bar2_qid = 0;
+ }
+
+ *pbar2_qoffset = bar2_qoffset;
+ *pbar2_qid = bar2_qid;
+ return 0;
+}
+
+/**
* t4_init_devlog_params - initialize adapter->params.devlog
* @adap: the adapter
* @fw_attach: whether we can talk to the firmware
Modified: projects/bsd_rdma_4_9/sys/dev/cxgbe/iw_cxgbe/cm.c
==============================================================================
--- projects/bsd_rdma_4_9/sys/dev/cxgbe/iw_cxgbe/cm.c Tue Nov 7 19:12:20 2017 (r325521)
+++ projects/bsd_rdma_4_9/sys/dev/cxgbe/iw_cxgbe/cm.c Tue Nov 7 23:52:14 2017 (r325522)
@@ -46,8 +46,11 @@ __FBSDID("$FreeBSD$");
#include <netinet/in_systm.h>
#include <netinet/in_pcb.h>
+#include <netinet6/in6_pcb.h>
#include <netinet/ip.h>
#include <netinet/in_fib.h>
+#include <netinet6/in6_fib.h>
+#include <netinet6/scope6_var.h>
#include <netinet/ip_var.h>
#include <netinet/tcp_var.h>
#include <netinet/tcp.h>
@@ -78,6 +81,8 @@ static struct work_struct c4iw_task;
static struct workqueue_struct *c4iw_taskq;
static LIST_HEAD(err_cqe_list);
static spinlock_t err_cqe_lock;
+static LIST_HEAD(listen_port_list);
+static DEFINE_MUTEX(listen_port_mutex);
static void process_req(struct work_struct *ctx);
static void start_ep_timer(struct c4iw_ep *ep);
@@ -85,12 +90,7 @@ static int stop_ep_timer(struct c4iw_ep *ep);
static int set_tcpinfo(struct c4iw_ep *ep);
static void process_timeout(struct c4iw_ep *ep);
static void process_err_cqes(void);
-static enum c4iw_ep_state state_read(struct c4iw_ep_common *epc);
-static void __state_set(struct c4iw_ep_common *epc, enum c4iw_ep_state tostate);
-static void state_set(struct c4iw_ep_common *epc, enum c4iw_ep_state tostate);
static void *alloc_ep(int size, gfp_t flags);
-static int find_route(__be32 local_ip, __be32 peer_ip, __be16 local_port,
- __be16 peer_port, u8 tos, struct nhop4_extended *pnh4);
static void close_socket(struct socket *so);
static int send_mpa_req(struct c4iw_ep *ep);
static int send_mpa_reject(struct c4iw_ep *ep, const void *pdata, u8 plen);
@@ -120,6 +120,15 @@ static int process_terminate(struct c4iw_ep *ep);
static int terminate(struct sge_iq *iq, const struct rss_header *rss,
struct mbuf *m);
static int add_ep_to_req_list(struct c4iw_ep *ep, int ep_events);
+static struct listen_port_info *
+add_ep_to_listenlist(struct c4iw_listen_ep *lep);
+static int rem_ep_from_listenlist(struct c4iw_listen_ep *lep);
+static struct c4iw_listen_ep *
+find_real_listen_ep(struct c4iw_listen_ep *master_lep, struct socket *so);
+static int get_ifnet_from_raddr(struct sockaddr_storage *raddr,
+ struct ifnet **ifp);
+static void process_newconn(struct c4iw_listen_ep *master_lep,
+ struct socket *new_so);
#define START_EP_TIMER(ep) \
do { \
CTR3(KTR_IW_CXGBE, "start_ep_timer (%s:%d) ep %p", \
@@ -134,6 +143,34 @@ static int add_ep_to_req_list(struct c4iw_ep *ep, int
stop_ep_timer(ep); \
})
+#define GET_LOCAL_ADDR(pladdr, so) \
+ do { \
+ struct sockaddr_storage *__a = NULL; \
+ struct inpcb *__inp = sotoinpcb(so); \
+ KASSERT(__inp != NULL, \
+ ("GET_LOCAL_ADDR(%s):so:%p, inp = NULL", __func__, so)); \
+ if (__inp->inp_vflag & INP_IPV4) \
+ in_getsockaddr(so, (struct sockaddr **)&__a); \
+ else \
+ in6_getsockaddr(so, (struct sockaddr **)&__a); \
+ *(pladdr) = *__a; \
+ free(__a, M_SONAME); \
+ } while (0)
+
+#define GET_REMOTE_ADDR(praddr, so) \
+ do { \
+ struct sockaddr_storage *__a = NULL; \
+ struct inpcb *__inp = sotoinpcb(so); \
+ KASSERT(__inp != NULL, \
+ ("GET_REMOTE_ADDR(%s):so:%p, inp = NULL", __func__, so)); \
+ if (__inp->inp_vflag & INP_IPV4) \
+ in_getpeeraddr(so, (struct sockaddr **)&__a); \
+ else \
+ in6_getpeeraddr(so, (struct sockaddr **)&__a); \
+ *(praddr) = *__a; \
+ free(__a, M_SONAME); \
+ } while (0)
+
#ifdef KTR
static char *states[] = {
"idle",
@@ -152,7 +189,6 @@ static char *states[] = {
};
#endif
-
static void deref_cm_id(struct c4iw_ep_common *epc)
{
epc->cm_id->rem_ref(epc->cm_id);
@@ -179,13 +215,184 @@ static void ref_qp(struct c4iw_ep *ep)
set_bit(QP_REFED, &ep->com.history);
c4iw_qp_add_ref(&ep->com.qp->ibqp);
}
+/* allocated per TCP port while listening */
+struct listen_port_info {
+ uint16_t port_num; /* TCP port address */
+ struct list_head list; /* belongs to listen_port_list */
+ struct list_head lep_list; /* per port lep list */
+ uint32_t refcnt; /* number of lep's listening */
+};
+/*
+ * Following two lists are used to manage INADDR_ANY listeners:
+ * 1)listen_port_list
+ * 2)lep_list
+ *
+ * Below is the INADDR_ANY listener lists overview on a system with a two port
+ * adapter:
+ * |------------------|
+ * |listen_port_list |
+ * |------------------|
+ * |
+ * | |-----------| |-----------|
+ * | | port_num:X| | port_num:X|
+ * |--------------|-list------|-------|-list------|-------....
+ * | lep_list----| | lep_list----|
+ * | refcnt | | | refcnt | |
+ * | | | | | |
+ * | | | | | |
+ * |-----------| | |-----------| |
+ * | |
+ * | |
+ * | |
+ * | | lep1 lep2
+ * | | |----------------| |----------------|
+ * | |----| listen_ep_list |----| listen_ep_list |
+ * | |----------------| |----------------|
+ * |
+ * |
+ * | lep1 lep2
+ * | |----------------| |----------------|
+ * |---| listen_ep_list |----| listen_ep_list |
+ * |----------------| |----------------|
+ *
+ * Because of two port adapter, the number of lep's are two(lep1 & lep2) for
+ * each TCP port number.
+ *
+ * Here 'lep1' is always marked as Master lep, because solisten() is always
+ * called through first lep.
+ *
+ */
+static struct listen_port_info *
+add_ep_to_listenlist(struct c4iw_listen_ep *lep)
+{
+ uint16_t port;
+ struct listen_port_info *port_info = NULL;
+ struct sockaddr_storage *laddr = &lep->com.local_addr;
+
+ port = (laddr->ss_family == AF_INET) ?
+ ((struct sockaddr_in *)laddr)->sin_port :
+ ((struct sockaddr_in6 *)laddr)->sin6_port;
+
+ mutex_lock(&listen_port_mutex);
+
+ list_for_each_entry(port_info, &listen_port_list, list)
+ if (port_info->port_num == port)
+ goto found_port;
+
+ port_info = malloc(sizeof(*port_info), M_CXGBE, M_WAITOK);
+ port_info->port_num = port;
+ port_info->refcnt = 0;
+
+ list_add_tail(&port_info->list, &listen_port_list);
+ INIT_LIST_HEAD(&port_info->lep_list);
+
+found_port:
+ port_info->refcnt++;
+ list_add_tail(&lep->listen_ep_list, &port_info->lep_list);
+ mutex_unlock(&listen_port_mutex);
+ return port_info;
+}
+
+static int
+rem_ep_from_listenlist(struct c4iw_listen_ep *lep)
+{
+ uint16_t port;
+ struct listen_port_info *port_info = NULL;
+ struct sockaddr_storage *laddr = &lep->com.local_addr;
+ int refcnt = 0;
+
+ port = (laddr->ss_family == AF_INET) ?
+ ((struct sockaddr_in *)laddr)->sin_port :
+ ((struct sockaddr_in6 *)laddr)->sin6_port;
+
+ mutex_lock(&listen_port_mutex);
+
+ /* get the port_info structure based on the lep's port address */
+ list_for_each_entry(port_info, &listen_port_list, list) {
+ if (port_info->port_num == port) {
+ port_info->refcnt--;
+ refcnt = port_info->refcnt;
+ /* remove the current lep from the listen list */
+ list_del(&lep->listen_ep_list);
+ if (port_info->refcnt == 0) {
+ /* Remove this entry from the list as there
+ * are no more listeners for this port_num.
+ */
+ list_del(&port_info->list);
+ kfree(port_info);
+ }
+ break;
+ }
+ }
+ mutex_unlock(&listen_port_mutex);
+ return refcnt;
+}
+
+/*
+ * Find the lep that belongs to the ifnet on which the SYN frame was received.
+ */
+struct c4iw_listen_ep *
+find_real_listen_ep(struct c4iw_listen_ep *master_lep, struct socket *so)
+{
+ struct adapter *adap = NULL;
+ struct c4iw_listen_ep *lep = NULL;
+ struct sockaddr_storage remote = { 0 };
+ struct ifnet *new_conn_ifp = NULL;
+ struct listen_port_info *port_info = NULL;
+ int err = 0, i = 0,
+ found_portinfo = 0, found_lep = 0;
+ uint16_t port;
+
+ /* STEP 1: get 'ifnet' based on socket's remote address */
+ GET_REMOTE_ADDR(&remote, so);
+
+ err = get_ifnet_from_raddr(&remote, &new_conn_ifp);
+ if (err) {
+ CTR4(KTR_IW_CXGBE, "%s: Failed to get ifnet, sock %p, "
+ "master_lep %p err %d",
+ __func__, so, master_lep, err);
+ return (NULL);
+ }
+
+ /* STEP 2: Find 'port_info' with listener local port address. */
+ port = (master_lep->com.local_addr.ss_family == AF_INET) ?
+ ((struct sockaddr_in *)&master_lep->com.local_addr)->sin_port :
+ ((struct sockaddr_in6 *)&master_lep->com.local_addr)->sin6_port;
+
+
+ mutex_lock(&listen_port_mutex);
+ list_for_each_entry(port_info, &listen_port_list, list)
+ if (port_info->port_num == port) {
+ found_portinfo =1;
+ break;
+ }
+ if (!found_portinfo)
+ goto out;
+
+ /* STEP 3: Traverse through list of lep's that are bound to the current
+ * TCP port address and find the lep that belongs to the ifnet on which
+ * the SYN frame was received.
+ */
+ list_for_each_entry(lep, &port_info->lep_list, listen_ep_list) {
+ adap = lep->com.dev->rdev.adap;
+ for_each_port(adap, i) {
+ if (new_conn_ifp == adap->port[i]->vi[0].ifp) {
+ found_lep =1;
+ goto out;
+ }
+ }
+ }
+out:
+ mutex_unlock(&listen_port_mutex);
+ return found_lep ? lep : (NULL);
+}
+
static void process_timeout(struct c4iw_ep *ep)
{
- struct c4iw_qp_attributes attrs;
+ struct c4iw_qp_attributes attrs = {0};
int abort = 1;
- mutex_lock(&ep->com.mutex);
CTR4(KTR_IW_CXGBE, "%s ep :%p, tid:%u, state %d", __func__,
ep, ep->hwtid, ep->com.state);
set_bit(TIMEDOUT, &ep->com.history);
@@ -221,7 +428,6 @@ static void process_timeout(struct c4iw_ep *ep)
, __func__, ep, ep->hwtid, ep->com.state);
abort = 0;
}
- mutex_unlock(&ep->com.mutex);
if (abort)
c4iw_ep_disconnect(ep, 1, GFP_KERNEL);
c4iw_put_ep(&ep->com);
@@ -273,14 +479,16 @@ process_req(struct work_struct *ctx)
ep_events = epc->ep_events;
epc->ep_events = 0;
spin_unlock_irqrestore(&req_lock, flag);
- CTR4(KTR_IW_CXGBE, "%s: so %p, ep %p, events 0x%x", __func__,
- epc->so, epc, ep_events);
+ mutex_lock(&epc->mutex);
+ CTR5(KTR_IW_CXGBE, "%s: so %p, ep %p, ep_state %s events 0x%x",
+ __func__, epc->so, epc, states[epc->state], ep_events);
if (ep_events & C4IW_EVENT_TERM)
process_terminate((struct c4iw_ep *)epc);
if (ep_events & C4IW_EVENT_TIMEOUT)
process_timeout((struct c4iw_ep *)epc);
if (ep_events & C4IW_EVENT_SOCKET)
process_socket_event((struct c4iw_ep *)epc);
+ mutex_unlock(&epc->mutex);
c4iw_put_ep(epc);
process_err_cqes();
spin_lock_irqsave(&req_lock, flag);
@@ -321,55 +529,67 @@ done:
return (rc);
}
-
static int
-find_route(__be32 local_ip, __be32 peer_ip, __be16 local_port,
- __be16 peer_port, u8 tos, struct nhop4_extended *pnh4)
+get_ifnet_from_raddr(struct sockaddr_storage *raddr, struct ifnet **ifp)
{
- struct in_addr addr;
- int err;
+ int err = 0;
- CTR5(KTR_IW_CXGBE, "%s:frtB %x, %x, %d, %d", __func__, local_ip,
- peer_ip, ntohs(local_port), ntohs(peer_port));
+ if (raddr->ss_family == AF_INET) {
+ struct sockaddr_in *raddr4 = (struct sockaddr_in *)raddr;
+ struct nhop4_extended nh4 = {0};
- addr.s_addr = peer_ip;
- err = fib4_lookup_nh_ext(RT_DEFAULT_FIB, addr, NHR_REF, 0, pnh4);
+ err = fib4_lookup_nh_ext(RT_DEFAULT_FIB, raddr4->sin_addr,
+ NHR_REF, 0, &nh4);
+ *ifp = nh4.nh_ifp;
+ if (err)
+ fib4_free_nh_ext(RT_DEFAULT_FIB, &nh4);
+ } else {
+ struct sockaddr_in6 *raddr6 = (struct sockaddr_in6 *)raddr;
+ struct nhop6_extended nh6 = {0};
+ struct in6_addr addr6;
+ uint32_t scopeid;
- CTR2(KTR_IW_CXGBE, "%s:frtE %d", __func__, err);
+ memset(&addr6, 0, sizeof(addr6));
+ in6_splitscope((struct in6_addr *)&raddr6->sin6_addr,
+ &addr6, &scopeid);
+ err = fib6_lookup_nh_ext(RT_DEFAULT_FIB, &addr6, scopeid,
+ NHR_REF, 0, &nh6);
+ *ifp = nh6.nh_ifp;
+ if (err)
+ fib6_free_nh_ext(RT_DEFAULT_FIB, &nh6);
+ }
+
+ CTR2(KTR_IW_CXGBE, "%s: return: %d", __func__, err);
return err;
}
static void
close_socket(struct socket *so)
{
-
uninit_iwarp_socket(so);
- sodisconnect(so);
+ soclose(so);
}
static void
process_peer_close(struct c4iw_ep *ep)
{
- struct c4iw_qp_attributes attrs;
+ struct c4iw_qp_attributes attrs = {0};
int disconnect = 1;
int release = 0;
CTR4(KTR_IW_CXGBE, "%s:ppcB ep %p so %p state %s", __func__, ep,
ep->com.so, states[ep->com.state]);
- mutex_lock(&ep->com.mutex);
switch (ep->com.state) {
case MPA_REQ_WAIT:
- CTR2(KTR_IW_CXGBE, "%s:ppc1 %p MPA_REQ_WAIT CLOSING",
+ CTR2(KTR_IW_CXGBE, "%s:ppc1 %p MPA_REQ_WAIT DEAD",
__func__, ep);
- __state_set(&ep->com, CLOSING);
- break;
-
+ /* Fallthrough */
case MPA_REQ_SENT:
- CTR2(KTR_IW_CXGBE, "%s:ppc2 %p MPA_REQ_SENT CLOSING",
+ CTR2(KTR_IW_CXGBE, "%s:ppc2 %p MPA_REQ_SENT DEAD",
__func__, ep);
- __state_set(&ep->com, DEAD);
+ ep->com.state = DEAD;
connect_reply_upcall(ep, -ECONNABORTED);
disconnect = 0;
@@ -388,21 +608,20 @@ process_peer_close(struct c4iw_ep *ep)
*/
CTR2(KTR_IW_CXGBE, "%s:ppc3 %p MPA_REQ_RCVD CLOSING",
__func__, ep);
- __state_set(&ep->com, CLOSING);
- c4iw_get_ep(&ep->com);
+ ep->com.state = CLOSING;
break;
case MPA_REP_SENT:
CTR2(KTR_IW_CXGBE, "%s:ppc4 %p MPA_REP_SENT CLOSING",
__func__, ep);
- __state_set(&ep->com, CLOSING);
+ ep->com.state = CLOSING;
break;
case FPDU_MODE:
CTR2(KTR_IW_CXGBE, "%s:ppc5 %p FPDU_MODE CLOSING",
__func__, ep);
START_EP_TIMER(ep);
- __state_set(&ep->com, CLOSING);
+ ep->com.state = CLOSING;
attrs.next_state = C4IW_QP_STATE_CLOSING;
c4iw_modify_qp(ep->com.dev, ep->com.qp,
C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
@@ -418,7 +637,7 @@ process_peer_close(struct c4iw_ep *ep)
case CLOSING:
CTR2(KTR_IW_CXGBE, "%s:ppc7 %p CLOSING MORIBUND",
__func__, ep);
- __state_set(&ep->com, MORIBUND);
+ ep->com.state = MORIBUND;
disconnect = 0;
break;
@@ -433,7 +652,7 @@ process_peer_close(struct c4iw_ep *ep)
}
close_socket(ep->com.so);
close_complete_upcall(ep, 0);
- __state_set(&ep->com, DEAD);
+ ep->com.state = DEAD;
release = 1;
disconnect = 0;
break;
@@ -450,7 +669,6 @@ process_peer_close(struct c4iw_ep *ep)
break;
}
- mutex_unlock(&ep->com.mutex);
if (disconnect) {
@@ -469,11 +687,10 @@ process_peer_close(struct c4iw_ep *ep)
static void
process_conn_error(struct c4iw_ep *ep)
{
- struct c4iw_qp_attributes attrs;
+ struct c4iw_qp_attributes attrs = {0};
int ret;
int state;
- mutex_lock(&ep->com.mutex);
state = ep->com.state;
CTR5(KTR_IW_CXGBE, "%s:pceB ep %p so %p so->so_error %u state %s",
__func__, ep, ep->com.so, ep->com.so->so_error,
@@ -483,6 +700,7 @@ process_conn_error(struct c4iw_ep *ep)
case MPA_REQ_WAIT:
STOP_EP_TIMER(ep);
+ c4iw_put_ep(&ep->parent_ep->com);
break;
case MPA_REQ_SENT:
@@ -496,13 +714,6 @@ process_conn_error(struct c4iw_ep *ep)
break;
case MPA_REQ_RCVD:
-
- /*
- * We're gonna mark this puppy DEAD, but keep
- * the reference on it until the ULP accepts or
- * rejects the CR.
- */
- c4iw_get_ep(&ep->com);
break;
case MORIBUND:
@@ -531,7 +742,6 @@ process_conn_error(struct c4iw_ep *ep)
case DEAD:
CTR2(KTR_IW_CXGBE, "%s so_error %d IN DEAD STATE!!!!",
__func__, ep->com.so->so_error);
- mutex_unlock(&ep->com.mutex);
return;
default:
@@ -541,10 +751,9 @@ process_conn_error(struct c4iw_ep *ep)
if (state != ABORTING) {
close_socket(ep->com.so);
- __state_set(&ep->com, DEAD);
+ ep->com.state = DEAD;
c4iw_put_ep(&ep->com);
}
- mutex_unlock(&ep->com.mutex);
CTR2(KTR_IW_CXGBE, "%s:pceE %p", __func__, ep);
return;
}
@@ -552,14 +761,13 @@ process_conn_error(struct c4iw_ep *ep)
static void
process_close_complete(struct c4iw_ep *ep)
{
- struct c4iw_qp_attributes attrs;
+ struct c4iw_qp_attributes attrs = {0};
int release = 0;
CTR4(KTR_IW_CXGBE, "%s:pccB ep %p so %p state %s", __func__, ep,
ep->com.so, states[ep->com.state]);
/* The cm_id may be null if we failed to connect */
- mutex_lock(&ep->com.mutex);
set_bit(CLOSE_CON_RPL, &ep->com.history);
switch (ep->com.state) {
@@ -567,7 +775,7 @@ process_close_complete(struct c4iw_ep *ep)
case CLOSING:
CTR2(KTR_IW_CXGBE, "%s:pcc1 %p CLOSING MORIBUND",
__func__, ep);
- __state_set(&ep->com, MORIBUND);
+ ep->com.state = MORIBUND;
break;
case MORIBUND:
@@ -588,7 +796,7 @@ process_close_complete(struct c4iw_ep *ep)
close_socket(ep->com.so);
close_complete_upcall(ep, 0);
- __state_set(&ep->com, DEAD);
+ ep->com.state = DEAD;
release = 1;
break;
@@ -605,12 +813,11 @@ process_close_complete(struct c4iw_ep *ep)
panic("%s:pcc6 %p unknown ep state", __func__, ep);
break;
}
- mutex_unlock(&ep->com.mutex);
if (release) {
CTR2(KTR_IW_CXGBE, "%s:pcc8 %p", __func__, ep);
- c4iw_put_ep(&ep->com);
+ release_ep_resources(ep);
}
CTR2(KTR_IW_CXGBE, "%s:pccE %p", __func__, ep);
return;
@@ -639,49 +846,56 @@ setiwsockopt(struct socket *so)
static void
init_iwarp_socket(struct socket *so, void *arg)
{
-
- SOCKBUF_LOCK(&so->so_rcv);
- soupcall_set(so, SO_RCV, c4iw_so_upcall, arg);
- so->so_state |= SS_NBIO;
- SOCKBUF_UNLOCK(&so->so_rcv);
+ if (SOLISTENING(so)) {
+ SOLISTEN_LOCK(so);
+ solisten_upcall_set(so, c4iw_so_upcall, arg);
+ so->so_state |= SS_NBIO;
+ SOLISTEN_UNLOCK(so);
+ } else {
+ SOCKBUF_LOCK(&so->so_rcv);
+ soupcall_set(so, SO_RCV, c4iw_so_upcall, arg);
+ so->so_state |= SS_NBIO;
+ SOCKBUF_UNLOCK(&so->so_rcv);
+ }
}
static void
uninit_iwarp_socket(struct socket *so)
{
-
- SOCKBUF_LOCK(&so->so_rcv);
- soupcall_clear(so, SO_RCV);
- SOCKBUF_UNLOCK(&so->so_rcv);
+ if (SOLISTENING(so)) {
+ SOLISTEN_LOCK(so);
+ solisten_upcall_set(so, NULL, NULL);
+ SOLISTEN_UNLOCK(so);
+ } else {
+ SOCKBUF_LOCK(&so->so_rcv);
+ soupcall_clear(so, SO_RCV);
+ SOCKBUF_UNLOCK(&so->so_rcv);
+ }
}
static void
process_data(struct c4iw_ep *ep)
{
- struct sockaddr_in *local, *remote;
int disconnect = 0;
CTR5(KTR_IW_CXGBE, "%s: so %p, ep %p, state %s, sbused %d", __func__,
ep->com.so, ep, states[ep->com.state], sbused(&ep->com.so->so_rcv));
- switch (state_read(&ep->com)) {
+ switch (ep->com.state) {
case MPA_REQ_SENT:
disconnect = process_mpa_reply(ep);
break;
case MPA_REQ_WAIT:
- in_getsockaddr(ep->com.so, (struct sockaddr **)&local);
- in_getpeeraddr(ep->com.so, (struct sockaddr **)&remote);
- ep->com.local_addr = *local;
- ep->com.remote_addr = *remote;
- free(local, M_SONAME);
- free(remote, M_SONAME);
disconnect = process_mpa_request(ep);
+ if (disconnect)
+ /* Refered in process_newconn() */
+ c4iw_put_ep(&ep->parent_ep->com);
break;
default:
if (sbused(&ep->com.so->so_rcv))
log(LOG_ERR, "%s: Unexpected streaming data. ep %p, "
"state %d, so %p, so_state 0x%x, sbused %u\n",
- __func__, ep, state_read(&ep->com), ep->com.so,
*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
More information about the svn-src-projects
mailing list