svn commit: r278886 - in head/sys: conf contrib/rdma/krping dev/cxgb/ulp/iw_cxgb dev/cxgbe/iw_cxgbe modules/ibcore modules/mlx4ib ofed/drivers/infiniband/core ofed/drivers/infiniband/debug ofed/dri...

Hans Petter Selasky hselasky at FreeBSD.org
Tue Feb 17 08:40:31 UTC 2015


Author: hselasky
Date: Tue Feb 17 08:40:27 2015
New Revision: 278886
URL: https://svnweb.freebsd.org/changeset/base/278886

Log:
  Update the infiniband stack to Mellanox's OFED version 2.1.
  
  Highlights:
   - Multiple verbs API updates
   - Support for RoCE, RDMA over ethernet
  
  All hardware drivers depending on the common infiniband stack has been
  updated aswell.
  
  Discussed with:	np @
  Sponsored by:	Mellanox Technologies
  MFC after:	1 month

Added:
  head/sys/ofed/drivers/infiniband/core/peer_mem.c   (contents, props changed)
  head/sys/ofed/drivers/infiniband/hw/mlx4/mlx4_exp.c   (contents, props changed)
  head/sys/ofed/drivers/infiniband/hw/mlx4/mlx4_exp.h   (contents, props changed)
  head/sys/ofed/include/rdma/ib_peer_mem.h   (contents, props changed)
  head/sys/ofed/include/rdma/ib_user_verbs_exp.h   (contents, props changed)
  head/sys/ofed/include/rdma/ib_verbs_exp.h   (contents, props changed)
  head/sys/ofed/include/rdma/peer_mem.h   (contents, props changed)
Deleted:
  head/sys/ofed/drivers/infiniband/core/Makefile
  head/sys/ofed/drivers/infiniband/core/local_sa.c
  head/sys/ofed/drivers/infiniband/core/notice.c
  head/sys/ofed/drivers/infiniband/hw/mlx4/Makefile
  head/sys/ofed/drivers/infiniband/ulp/ipoib/Makefile
Modified:
  head/sys/conf/files
  head/sys/contrib/rdma/krping/krping.c
  head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_provider.c
  head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_qp.c
  head/sys/dev/cxgbe/iw_cxgbe/cq.c
  head/sys/dev/cxgbe/iw_cxgbe/iw_cxgbe.h
  head/sys/dev/cxgbe/iw_cxgbe/mem.c
  head/sys/modules/ibcore/Makefile
  head/sys/modules/mlx4ib/Makefile
  head/sys/ofed/drivers/infiniband/core/addr.c
  head/sys/ofed/drivers/infiniband/core/cache.c
  head/sys/ofed/drivers/infiniband/core/cm.c
  head/sys/ofed/drivers/infiniband/core/cm_msgs.h
  head/sys/ofed/drivers/infiniband/core/cma.c
  head/sys/ofed/drivers/infiniband/core/core_priv.h
  head/sys/ofed/drivers/infiniband/core/device.c
  head/sys/ofed/drivers/infiniband/core/fmr_pool.c
  head/sys/ofed/drivers/infiniband/core/iwcm.c
  head/sys/ofed/drivers/infiniband/core/mad.c
  head/sys/ofed/drivers/infiniband/core/mad_priv.h
  head/sys/ofed/drivers/infiniband/core/mad_rmpp.c
  head/sys/ofed/drivers/infiniband/core/multicast.c
  head/sys/ofed/drivers/infiniband/core/packer.c
  head/sys/ofed/drivers/infiniband/core/sa.h
  head/sys/ofed/drivers/infiniband/core/sa_query.c
  head/sys/ofed/drivers/infiniband/core/smi.c
  head/sys/ofed/drivers/infiniband/core/sysfs.c
  head/sys/ofed/drivers/infiniband/core/ucm.c
  head/sys/ofed/drivers/infiniband/core/ucma.c
  head/sys/ofed/drivers/infiniband/core/ud_header.c
  head/sys/ofed/drivers/infiniband/core/umem.c
  head/sys/ofed/drivers/infiniband/core/user_mad.c
  head/sys/ofed/drivers/infiniband/core/uverbs.h
  head/sys/ofed/drivers/infiniband/core/uverbs_cmd.c
  head/sys/ofed/drivers/infiniband/core/uverbs_main.c
  head/sys/ofed/drivers/infiniband/core/uverbs_marshall.c
  head/sys/ofed/drivers/infiniband/core/verbs.c
  head/sys/ofed/drivers/infiniband/debug/memtrack.c
  head/sys/ofed/drivers/infiniband/debug/memtrack.h
  head/sys/ofed/drivers/infiniband/debug/mtrack.h
  head/sys/ofed/drivers/infiniband/hw/mlx4/ah.c
  head/sys/ofed/drivers/infiniband/hw/mlx4/alias_GUID.c
  head/sys/ofed/drivers/infiniband/hw/mlx4/cm.c
  head/sys/ofed/drivers/infiniband/hw/mlx4/cq.c
  head/sys/ofed/drivers/infiniband/hw/mlx4/doorbell.c
  head/sys/ofed/drivers/infiniband/hw/mlx4/mad.c
  head/sys/ofed/drivers/infiniband/hw/mlx4/main.c
  head/sys/ofed/drivers/infiniband/hw/mlx4/mcg.c
  head/sys/ofed/drivers/infiniband/hw/mlx4/mlx4_ib.h
  head/sys/ofed/drivers/infiniband/hw/mlx4/mr.c
  head/sys/ofed/drivers/infiniband/hw/mlx4/qp.c
  head/sys/ofed/drivers/infiniband/hw/mlx4/sysfs.c
  head/sys/ofed/drivers/infiniband/hw/mthca/mthca_provider.c
  head/sys/ofed/drivers/infiniband/hw/mthca/mthca_qp.c
  head/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib.h
  head/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_ib.c
  head/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_main.c
  head/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
  head/sys/ofed/include/rdma/ib_addr.h
  head/sys/ofed/include/rdma/ib_cache.h
  head/sys/ofed/include/rdma/ib_cm.h
  head/sys/ofed/include/rdma/ib_mad.h
  head/sys/ofed/include/rdma/ib_pack.h
  head/sys/ofed/include/rdma/ib_sa.h
  head/sys/ofed/include/rdma/ib_umem.h
  head/sys/ofed/include/rdma/ib_user_verbs.h
  head/sys/ofed/include/rdma/ib_verbs.h
  head/sys/ofed/include/rdma/iw_cm.h
  head/sys/ofed/include/rdma/rdma_cm.h
  head/sys/ofed/include/rdma/rdma_user_cm.h

Modified: head/sys/conf/files
==============================================================================
--- head/sys/conf/files	Tue Feb 17 08:38:02 2015	(r278885)
+++ head/sys/conf/files	Tue Feb 17 08:40:27 2015	(r278886)
@@ -3624,19 +3624,16 @@ ofed/drivers/infiniband/core/fmr_pool.c	
 ofed/drivers/infiniband/core/iwcm.c		optional ofed		\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/"
-ofed/drivers/infiniband/core/local_sa.c		optional ofed		\
-	no-depend							\
-	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/"
 ofed/drivers/infiniband/core/mad_rmpp.c		optional ofed		\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/"
 ofed/drivers/infiniband/core/multicast.c	optional ofed		\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/"
-ofed/drivers/infiniband/core/notice.c		optional ofed		\
+ofed/drivers/infiniband/core/packer.c		optional ofed		\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/"
-ofed/drivers/infiniband/core/packer.c		optional ofed		\
+ofed/drivers/infiniband/core/peer_mem.c		optional ofed		\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/"
 ofed/drivers/infiniband/core/sa_query.c		optional ofed		\
@@ -3741,6 +3738,9 @@ ofed/drivers/infiniband/hw/mlx4/mad.c		o
 ofed/drivers/infiniband/hw/mlx4/main.c		optional mlx4ib		\
 	no-depend obj-prefix "mlx4ib_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/infiniband/hw/mlx4/"
+ofed/drivers/infiniband/hw/mlx4/mlx4_exp.c	optional mlx4ib		\
+	no-depend \
+	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/infiniband/hw/mlx4/"
 ofed/drivers/infiniband/hw/mlx4/mr.c		optional mlx4ib		\
 	no-depend obj-prefix "mlx4ib_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/infiniband/hw/mlx4/"

Modified: head/sys/contrib/rdma/krping/krping.c
==============================================================================
--- head/sys/contrib/rdma/krping/krping.c	Tue Feb 17 08:38:02 2015	(r278885)
+++ head/sys/contrib/rdma/krping/krping.c	Tue Feb 17 08:40:27 2015	(r278886)
@@ -525,7 +525,7 @@ static void krping_setup_wr(struct krpin
 	case MW:
 		cb->bind_attr.wr_id = 0xabbaabba;
 		cb->bind_attr.send_flags = 0; /* unsignaled */
-		cb->bind_attr.length = cb->size;
+		cb->bind_attr.bind_info.length = cb->size;
 		break;
 	default:
 		break;
@@ -627,7 +627,7 @@ static int krping_setup_buffers(struct k
 				cb->page_list, cb->page_list_len);
 			break;
 		case MW:
-			cb->mw = ib_alloc_mw(cb->pd);
+			cb->mw = ib_alloc_mw(cb->pd,IB_MW_TYPE_1);
 			if (IS_ERR(cb->mw)) {
 				DEBUG_LOG(cb, "recv_buf alloc_mw failed\n");
 				ret = PTR_ERR(cb->mw);
@@ -898,15 +898,15 @@ static u32 krping_rdma_rkey(struct krpin
 		 * Update the MW with new buf info.
 		 */
 		if (buf == (u64)cb->start_dma_addr) {
-			cb->bind_attr.mw_access_flags = IB_ACCESS_REMOTE_READ;
-			cb->bind_attr.mr = cb->start_mr;
+			cb->bind_attr.bind_info.mw_access_flags = IB_ACCESS_REMOTE_READ;
+			cb->bind_attr.bind_info.mr = cb->start_mr;
 		} else {
-			cb->bind_attr.mw_access_flags = IB_ACCESS_REMOTE_WRITE;
-			cb->bind_attr.mr = cb->rdma_mr;
+			cb->bind_attr.bind_info.mw_access_flags = IB_ACCESS_REMOTE_WRITE;
+			cb->bind_attr.bind_info.mr = cb->rdma_mr;
 		}
-		cb->bind_attr.addr = buf;
+		cb->bind_attr.bind_info.addr = buf;
 		DEBUG_LOG(cb, "binding mw rkey 0x%x to buf %llx mr rkey 0x%x\n",
-			cb->mw->rkey, buf, cb->bind_attr.mr->rkey);
+			cb->mw->rkey, buf, cb->bind_attr.bind_info.mr->rkey);
 		ret = ib_bind_mw(cb->qp, cb->mw, &cb->bind_attr);
 		if (ret) {
 			PRINTF(cb, "bind mw error %d\n", ret);
@@ -2304,7 +2304,7 @@ int krping_doit(char *cmd, void *cookie)
 		goto out;
 	}
 
-	cb->cm_id = rdma_create_id(krping_cma_event_handler, cb, RDMA_PS_TCP);
+	cb->cm_id = rdma_create_id(krping_cma_event_handler, cb, RDMA_PS_TCP, IB_QPT_RC);
 	if (IS_ERR(cb->cm_id)) {
 		ret = PTR_ERR(cb->cm_id);
 		PRINTF(cb, "rdma_create_id error %d\n", ret);

Modified: head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_provider.c
==============================================================================
--- head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_provider.c	Tue Feb 17 08:38:02 2015	(r278885)
+++ head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_provider.c	Tue Feb 17 08:40:27 2015	(r278886)
@@ -176,7 +176,7 @@ iwch_destroy_cq(struct ib_cq *ib_cq)
 }
 
 static struct ib_cq *
-iwch_create_cq(struct ib_device *ibdev, int entries, int vector,
+iwch_create_cq(struct ib_device *ibdev, struct ib_cq_init_attr *attr,
 			     struct ib_ucontext *ib_context,
 			     struct ib_udata *udata)
 {
@@ -187,6 +187,7 @@ iwch_create_cq(struct ib_device *ibdev, 
 	struct iwch_ucontext *ucontext = NULL;
 	static int warned;
 	size_t resplen;
+	int entries = attr->cqe;
 
 	CTR3(KTR_IW_CXGB, "%s ib_dev %p entries %d", __FUNCTION__, ibdev, entries);
 	rhp = to_iwch_dev(ibdev);
@@ -545,16 +546,14 @@ static struct ib_mr *iwch_reg_user_mr(st
 				      int mr_id)
 {
 	__be64 *pages;
-	int shift, i, n;
+	int shift, n, len;
+	int i, k, entry;
 	int err = 0;
-	struct ib_umem_chunk *chunk;
 	struct iwch_dev *rhp;
 	struct iwch_pd *php;
 	struct iwch_mr *mhp;
 	struct iwch_reg_user_mr_resp uresp;
-#ifdef notyet
-	int j, k, len;
-#endif	
+	struct scatterlist *sg;
 	
 	CTR2(KTR_IW_CXGB, "%s ib_pd %p", __FUNCTION__, pd);
 
@@ -575,9 +574,7 @@ static struct ib_mr *iwch_reg_user_mr(st
 
 	shift = ffs(mhp->umem->page_size) - 1;
 
-	n = 0;
-	list_for_each_entry(chunk, &mhp->umem->chunk_list, list)
-		n += chunk->nents;
+	n = mhp->umem->nmap;
 
 	err = iwch_alloc_pbl(mhp, n);
 	if (err)
@@ -591,7 +588,21 @@ static struct ib_mr *iwch_reg_user_mr(st
 
 	i = n = 0;
 
-#ifdef notyet
+	for_each_sg(mhp->umem->sg_head.sgl, sg, mhp->umem->nmap, entry) {
+		len = sg_dma_len(sg) >> shift;
+		for (k = 0; k < len; ++k) {
+			pages[i++] = cpu_to_be64(sg_dma_address(sg) +
+					mhp->umem->page_size * k);
+			if (i == PAGE_SIZE / sizeof *pages) {
+				err = iwch_write_pbl(mhp, pages, i, n);
+				if (err)
+					goto pbl_done;
+				n += i;
+				i = 0;
+			}
+		}
+	}
+#if 0
 	TAILQ_FOREACH(chunk, &mhp->umem->chunk_list, entry)
 		for (j = 0; j < chunk->nmap; ++j) {
 			len = sg_dma_len(&chunk->page_list[j]) >> shift;
@@ -612,9 +623,7 @@ static struct ib_mr *iwch_reg_user_mr(st
 
 	if (i)
 		err = iwch_write_pbl(mhp, pages, i, n);
-#ifdef notyet
 pbl_done:
-#endif
 	cxfree(pages);
 	if (err)
 		goto err_pbl;
@@ -672,7 +681,7 @@ static struct ib_mr *iwch_get_dma_mr(str
 	return ibmr;
 }
 
-static struct ib_mw *iwch_alloc_mw(struct ib_pd *pd)
+static struct ib_mw *iwch_alloc_mw(struct ib_pd *pd, enum ib_mw_type type)
 {
 	struct iwch_dev *rhp;
 	struct iwch_pd *php;

Modified: head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_qp.c
==============================================================================
--- head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_qp.c	Tue Feb 17 08:38:02 2015	(r278885)
+++ head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_qp.c	Tue Feb 17 08:40:27 2015	(r278886)
@@ -551,18 +551,18 @@ int iwch_bind_mw(struct ib_qp *qp,
 	if (mw_bind->send_flags & IB_SEND_SIGNALED)
 		t3_wr_flags = T3_COMPLETION_FLAG;
 
-	sgl.addr = mw_bind->addr;
-	sgl.lkey = mw_bind->mr->lkey;
-	sgl.length = mw_bind->length;
+	sgl.addr = mw_bind->bind_info.addr;
+	sgl.lkey = mw_bind->bind_info.mr->lkey;
+	sgl.length = mw_bind->bind_info.length;
 	wqe->bind.reserved = 0;
 	wqe->bind.type = T3_VA_BASED_TO;
 
 	/* TBD: check perms */
-	wqe->bind.perms = iwch_ib_to_mwbind_access(mw_bind->mw_access_flags);
-	wqe->bind.mr_stag = htobe32(mw_bind->mr->lkey);
+	wqe->bind.perms = iwch_ib_to_mwbind_access(mw_bind->bind_info.mw_access_flags);
+	wqe->bind.mr_stag = htobe32(mw_bind->bind_info.mr->lkey);
 	wqe->bind.mw_stag = htobe32(mw->rkey);
-	wqe->bind.mw_len = htobe32(mw_bind->length);
-	wqe->bind.mw_va = htobe64(mw_bind->addr);
+	wqe->bind.mw_len = htobe32(mw_bind->bind_info.length);
+	wqe->bind.mw_va = htobe64(mw_bind->bind_info.addr);
 	err = iwch_sgl2pbl_map(rhp, &sgl, 1, &pbl_addr, &page_size);
 	if (err) {
 		mtx_unlock(&qhp->lock);

Modified: head/sys/dev/cxgbe/iw_cxgbe/cq.c
==============================================================================
--- head/sys/dev/cxgbe/iw_cxgbe/cq.c	Tue Feb 17 08:38:02 2015	(r278885)
+++ head/sys/dev/cxgbe/iw_cxgbe/cq.c	Tue Feb 17 08:40:27 2015	(r278886)
@@ -775,7 +775,7 @@ int c4iw_destroy_cq(struct ib_cq *ib_cq)
 }
 
 struct ib_cq *
-c4iw_create_cq(struct ib_device *ibdev, int entries, int vector,
+c4iw_create_cq(struct ib_device *ibdev, struct ib_cq_init_attr *attr,
     struct ib_ucontext *ib_context, struct ib_udata *udata)
 {
 	struct c4iw_dev *rhp;
@@ -785,6 +785,7 @@ c4iw_create_cq(struct ib_device *ibdev, 
 	int ret;
 	size_t memsize, hwentries;
 	struct c4iw_mm_entry *mm, *mm2;
+	int entries = attr->cqe;
 
 	CTR3(KTR_IW_CXGBE, "%s ib_dev %p entries %d", __func__, ibdev, entries);
 

Modified: head/sys/dev/cxgbe/iw_cxgbe/iw_cxgbe.h
==============================================================================
--- head/sys/dev/cxgbe/iw_cxgbe/iw_cxgbe.h	Tue Feb 17 08:38:02 2015	(r278885)
+++ head/sys/dev/cxgbe/iw_cxgbe/iw_cxgbe.h	Tue Feb 17 08:40:27 2015	(r278886)
@@ -864,7 +864,7 @@ struct ib_fast_reg_page_list *c4iw_alloc
 					int page_list_len);
 struct ib_mr *c4iw_alloc_fast_reg_mr(struct ib_pd *pd, int pbl_depth);
 int c4iw_dealloc_mw(struct ib_mw *mw);
-struct ib_mw *c4iw_alloc_mw(struct ib_pd *pd);
+struct ib_mw *c4iw_alloc_mw(struct ib_pd *pd, enum ib_mw_type type);
 struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64
     virt, int acc, struct ib_udata *udata, int mr_id);
 struct ib_mr *c4iw_get_dma_mr(struct ib_pd *pd, int acc);
@@ -881,8 +881,7 @@ int c4iw_reregister_phys_mem(struct ib_m
 				     int acc, u64 *iova_start);
 int c4iw_dereg_mr(struct ib_mr *ib_mr);
 int c4iw_destroy_cq(struct ib_cq *ib_cq);
-struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, int entries,
-					int vector,
+struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, struct ib_cq_init_attr *attr,
 					struct ib_ucontext *ib_context,
 					struct ib_udata *udata);
 int c4iw_resize_cq(struct ib_cq *cq, int cqe, struct ib_udata *udata);

Modified: head/sys/dev/cxgbe/iw_cxgbe/mem.c
==============================================================================
--- head/sys/dev/cxgbe/iw_cxgbe/mem.c	Tue Feb 17 08:38:02 2015	(r278885)
+++ head/sys/dev/cxgbe/iw_cxgbe/mem.c	Tue Feb 17 08:40:27 2015	(r278886)
@@ -563,9 +563,9 @@ struct ib_mr *c4iw_reg_user_mr(struct ib
 {
 	__be64 *pages;
 	int shift, n, len;
-	int i, j, k;
+	int i, k, entry;
 	int err = 0;
-	struct ib_umem_chunk *chunk;
+	struct scatterlist *sg;
 	struct c4iw_dev *rhp;
 	struct c4iw_pd *php;
 	struct c4iw_mr *mhp;
@@ -594,11 +594,8 @@ struct ib_mr *c4iw_reg_user_mr(struct ib
 	}
 
 	shift = ffs(mhp->umem->page_size) - 1;
-
-	n = 0;
-	list_for_each_entry(chunk, &mhp->umem->chunk_list, list)
-		n += chunk->nents;
-
+	
+	n = mhp->umem->nmap;
 	err = alloc_pbl(mhp, n);
 	if (err)
 		goto err;
@@ -610,25 +607,23 @@ struct ib_mr *c4iw_reg_user_mr(struct ib
 	}
 
 	i = n = 0;
-
-	list_for_each_entry(chunk, &mhp->umem->chunk_list, list)
-		for (j = 0; j < chunk->nmap; ++j) {
-			len = sg_dma_len(&chunk->page_list[j]) >> shift;
-			for (k = 0; k < len; ++k) {
-				pages[i++] = cpu_to_be64(sg_dma_address(
-					&chunk->page_list[j]) +
+	for_each_sg(mhp->umem->sg_head.sgl, sg, mhp->umem->nmap, entry) {
+		len = sg_dma_len(sg) >> shift;
+		for (k = 0; k < len; ++k) {
+			pages[i++] = cpu_to_be64(sg_dma_address(sg) +
 					mhp->umem->page_size * k);
-				if (i == PAGE_SIZE / sizeof *pages) {
-					err = write_pbl(&mhp->rhp->rdev,
-					      pages,
-					      mhp->attr.pbl_addr + (n << 3), i);
-					if (err)
-						goto pbl_done;
-					n += i;
-					i = 0;
-				}
+			if (i == PAGE_SIZE / sizeof *pages) {
+				err = write_pbl(&mhp->rhp->rdev,
+						pages,
+						mhp->attr.pbl_addr + (n << 3), i);
+				if (err)
+					goto pbl_done;
+				n += i;
+				i = 0;
+
 			}
 		}
+	}
 
 	if (i)
 		err = write_pbl(&mhp->rhp->rdev, pages,
@@ -662,7 +657,7 @@ err:
 	return ERR_PTR(err);
 }
 
-struct ib_mw *c4iw_alloc_mw(struct ib_pd *pd)
+struct ib_mw *c4iw_alloc_mw(struct ib_pd *pd, enum ib_mw_type type)
 {
 	struct c4iw_dev *rhp;
 	struct c4iw_pd *php;

Modified: head/sys/modules/ibcore/Makefile
==============================================================================
--- head/sys/modules/ibcore/Makefile	Tue Feb 17 08:38:02 2015	(r278885)
+++ head/sys/modules/ibcore/Makefile	Tue Feb 17 08:40:27 2015	(r278886)
@@ -4,8 +4,8 @@
 
 KMOD=	ibcore
 SRCS=	addr.c iwcm.c sa_query.c ucma.c uverbs_cmd.c \
-	agent.c local_sa.c multicast.c smi.c ud_header.c uverbs_main.c \
-	mad.c notice.c umem.c uverbs_marshall.c \
+	agent.c multicast.c smi.c ud_header.c uverbs_main.c \
+	mad.c peer_mem.c umem.c uverbs_marshall.c \
 	cache.c device.c packer.c sysfs.c user_mad.c verbs.c \
 	cm.c fmr_pool.c mad_rmpp.c ucm.c cma.c \
 	vnode_if.h device_if.h bus_if.h pci_if.h \

Modified: head/sys/modules/mlx4ib/Makefile
==============================================================================
--- head/sys/modules/mlx4ib/Makefile	Tue Feb 17 08:38:02 2015	(r278885)
+++ head/sys/modules/mlx4ib/Makefile	Tue Feb 17 08:40:27 2015	(r278886)
@@ -6,6 +6,7 @@ KMOD=	mlx4ib
 SRCS=	device_if.h bus_if.h vnode_if.h pci_if.h \
 	opt_inet.h opt_inet6.h \
 	alias_GUID.c mcg.c sysfs.c ah.c cq.c \
+	mlx4_exp.c \
 	doorbell.c mad.c main.c mr.c qp.c srq.c wc.c cm.c
 
 CFLAGS+= -I${.CURDIR}/../../ofed/drivers/infiniband/hw/mlx4

Modified: head/sys/ofed/drivers/infiniband/core/addr.c
==============================================================================
--- head/sys/ofed/drivers/infiniband/core/addr.c	Tue Feb 17 08:38:02 2015	(r278885)
+++ head/sys/ofed/drivers/infiniband/core/addr.c	Tue Feb 17 08:40:27 2015	(r278886)
@@ -69,6 +69,7 @@ static LIST_HEAD(req_list);
 static struct delayed_work work;
 static struct workqueue_struct *addr_wq;
 
+static struct rdma_addr_client self;
 void rdma_addr_register_client(struct rdma_addr_client *client)
 {
 	atomic_set(&client->refcount, 1);
@@ -89,19 +90,6 @@ void rdma_addr_unregister_client(struct 
 }
 EXPORT_SYMBOL(rdma_addr_unregister_client);
 
-#ifdef __linux__
-int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev,
-		     const unsigned char *dst_dev_addr)
-{
-	dev_addr->dev_type = dev->type;
-	memcpy(dev_addr->src_dev_addr, dev->dev_addr, MAX_ADDR_LEN);
-	memcpy(dev_addr->broadcast, dev->broadcast, MAX_ADDR_LEN);
-	if (dst_dev_addr)
-		memcpy(dev_addr->dst_dev_addr, dst_dev_addr, MAX_ADDR_LEN);
-	dev_addr->bound_dev_if = dev->ifindex;
-	return 0;
-}
-#else
 int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct ifnet *dev,
 		     const unsigned char *dst_dev_addr)
 {
@@ -119,10 +107,10 @@ int rdma_copy_addr(struct rdma_dev_addr 
 	dev_addr->bound_dev_if = dev->if_index;
 	return 0;
 }
-#endif
 EXPORT_SYMBOL(rdma_copy_addr);
 
-int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr)
+int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr,
+		      u16 *vlan_id)
 {
 	struct net_device *dev;
 	int ret = -EADDRNOTAVAIL;
@@ -137,33 +125,21 @@ int rdma_translate_ip(struct sockaddr *a
 	}
 
 	switch (addr->sa_family) {
-#ifdef INET
 	case AF_INET:
-		dev = ip_dev_find(NULL,
+		dev = ip_dev_find(&init_net,
 			((struct sockaddr_in *) addr)->sin_addr.s_addr);
 
 		if (!dev)
 			return ret;
 
 		ret = rdma_copy_addr(dev_addr, dev, NULL);
+		if (vlan_id)
+			*vlan_id = rdma_vlan_dev_vlan_id(dev);
 		dev_put(dev);
 		break;
-#endif
 
 #if defined(INET6)
 	case AF_INET6:
-#ifdef __linux__
-		read_lock(&dev_base_lock);
-		for_each_netdev(&init_net, dev) {
-			if (ipv6_chk_addr(&init_net,
-					  &((struct sockaddr_in6 *) addr)->sin6_addr,
-					  dev, 1)) {
-				ret = rdma_copy_addr(dev_addr, dev, NULL);
-				break;
-			}
-		}
-		read_unlock(&dev_base_lock);
-#else
 		{
 			struct sockaddr_in6 *sin6;
 			struct ifaddr *ifa;
@@ -179,12 +155,12 @@ int rdma_translate_ip(struct sockaddr *a
 				break;
 			}
 			ret = rdma_copy_addr(dev_addr, ifa->ifa_ifp, NULL);
+			if (vlan_id)
+				*vlan_id = rdma_vlan_dev_vlan_id(ifa->ifa_ifp);
 			ifa_free(ifa);
 			break;
 		}
 #endif
-		break;
-#endif
 	}
 	return ret;
 }
@@ -218,127 +194,6 @@ static void queue_req(struct addr_req *r
 	mutex_unlock(&lock);
 }
 
-#ifdef __linux__
-static int addr4_resolve(struct sockaddr_in *src_in,
-			 struct sockaddr_in *dst_in,
-			 struct rdma_dev_addr *addr)
-{
-	__be32 src_ip = src_in->sin_addr.s_addr;
-	__be32 dst_ip = dst_in->sin_addr.s_addr;
-	struct flowi fl;
-	struct rtable *rt;
-	struct neighbour *neigh;
-	int ret;
-
-	memset(&fl, 0, sizeof fl);
-	fl.nl_u.ip4_u.daddr = dst_ip;
-	fl.nl_u.ip4_u.saddr = src_ip;
-	fl.oif = addr->bound_dev_if;
-
-	ret = ip_route_output_key(&init_net, &rt, &fl);
-	if (ret)
-		goto out;
-
-	src_in->sin_family = AF_INET;
-	src_in->sin_addr.s_addr = rt->rt_src;
-
-	if (rt->idev->dev->flags & IFF_LOOPBACK) {
-		ret = rdma_translate_ip((struct sockaddr *) dst_in, addr);
-		if (!ret)
-			memcpy(addr->dst_dev_addr, addr->src_dev_addr, MAX_ADDR_LEN);
-		goto put;
-	}
-
-	/* If the device does ARP internally, return 'done' */
-	if (rt->idev->dev->flags & IFF_NOARP) {
-		rdma_copy_addr(addr, rt->idev->dev, NULL);
-		goto put;
-	}
-
-	neigh = neigh_lookup(&arp_tbl, &rt->rt_gateway, rt->idev->dev);
-	if (!neigh || !(neigh->nud_state & NUD_VALID)) {
-		neigh_event_send(rt->u.dst.neighbour, NULL);
-		ret = -ENODATA;
-		if (neigh)
-			goto release;
-		goto put;
-	}
-
-	ret = rdma_copy_addr(addr, neigh->dev, neigh->ha);
-release:
-	neigh_release(neigh);
-put:
-	ip_rt_put(rt);
-out:
-	return ret;
-}
-
-#if defined(INET6)
-static int addr6_resolve(struct sockaddr_in6 *src_in,
-			 struct sockaddr_in6 *dst_in,
-			 struct rdma_dev_addr *addr)
-{
-	struct flowi fl;
-	struct neighbour *neigh;
-	struct dst_entry *dst;
-	int ret;
-
-	memset(&fl, 0, sizeof fl);
-	ipv6_addr_copy(&fl.fl6_dst, &dst_in->sin6_addr);
-	ipv6_addr_copy(&fl.fl6_src, &src_in->sin6_addr);
-	fl.oif = addr->bound_dev_if;
-
-	dst = ip6_route_output(&init_net, NULL, &fl);
-	if ((ret = dst->error))
-		goto put;
-
-	if (ipv6_addr_any(&fl.fl6_src)) {
-		ret = ipv6_dev_get_saddr(&init_net, ip6_dst_idev(dst)->dev,
-					 &fl.fl6_dst, 0, &fl.fl6_src);
-		if (ret)
-			goto put;
-
-		src_in->sin6_family = AF_INET6;
-		ipv6_addr_copy(&src_in->sin6_addr, &fl.fl6_src);
-	}
-
-	if (dst->dev->flags & IFF_LOOPBACK) {
-		ret = rdma_translate_ip((struct sockaddr *) dst_in, addr);
-		if (!ret)
-			memcpy(addr->dst_dev_addr, addr->src_dev_addr, MAX_ADDR_LEN);
-		goto put;
-	}
-
-	/* If the device does ARP internally, return 'done' */
-	if (dst->dev->flags & IFF_NOARP) {
-		ret = rdma_copy_addr(addr, dst->dev, NULL);
-		goto put;
-	}
-	
-	neigh = dst->neighbour;
-	if (!neigh || !(neigh->nud_state & NUD_VALID)) {
-		neigh_event_send(dst->neighbour, NULL);
-		ret = -ENODATA;
-		goto put;
-	}
-
-	ret = rdma_copy_addr(addr, dst->dev, neigh->ha);
-put:
-	dst_release(dst);
-	return ret;
-}
-#else
-static int addr6_resolve(struct sockaddr_in6 *src_in,
-			 struct sockaddr_in6 *dst_in,
-			 struct rdma_dev_addr *addr)
-{
-	return -EADDRNOTAVAIL;
-}
-#endif
-
-#else
-#include <netinet/if_ether.h>
-
 static int addr_resolve(struct sockaddr *src_in,
 			struct sockaddr *dst_in,
 			struct rdma_dev_addr *addr)
@@ -354,7 +209,6 @@ static int addr_resolve(struct sockaddr 
 	int bcast;
 	int is_gw = 0;
 	int error = 0;
-
 	/*
 	 * Determine whether the address is unicast, multicast, or broadcast
 	 * and whether the source interface is valid.
@@ -382,8 +236,7 @@ static int addr_resolve(struct sockaddr 
 			port = sin->sin_port;
 			sin->sin_port = 0;
 			memset(&sin->sin_zero, 0, sizeof(sin->sin_zero));
-		} else
-			src_in = NULL; 
+		}
 		break;
 #endif
 #ifdef INET6
@@ -406,7 +259,7 @@ static int addr_resolve(struct sockaddr 
 	 * If we have a source address to use look it up first and verify
 	 * that it is a local interface.
 	 */
-	if (src_in) {
+	if (sin->sin_addr.s_addr != INADDR_ANY) {
 		ifa = ifa_ifwithaddr(src_in);
 		if (sin)
 			sin->sin_port = port;
@@ -436,15 +289,20 @@ static int addr_resolve(struct sockaddr 
 	 * correct interface pointer and unlock the route.
 	 */
 	if (multi || bcast) {
-		if (ifp == NULL)
+		if (ifp == NULL) {
 			ifp = rte->rt_ifp;
+			/* rt_ifa holds the route answer source address */
+			ifa = rte->rt_ifa;
+		}
 		RTFREE_LOCKED(rte);
 	} else if (ifp && ifp != rte->rt_ifp) {
 		RTFREE_LOCKED(rte);
 		return -ENETUNREACH;
 	} else {
-		if (ifp == NULL)
+		if (ifp == NULL) {
 			ifp = rte->rt_ifp;
+			ifa = rte->rt_ifa;
+		}
 		RT_UNLOCK(rte);
 	}
 mcast:
@@ -459,6 +317,8 @@ mcast:
 		error = rdma_copy_addr(addr, ifp,
 		    LLADDR((struct sockaddr_dl *)llsa));
 		free(llsa, M_IFMADDR);
+		if (error == 0)
+			memcpy(src_in, ifa->ifa_addr, ip_addr_size(ifa->ifa_addr));
 		return error;
 	}
 	/*
@@ -472,7 +332,7 @@ mcast:
 #endif
 #ifdef INET6
 	case AF_INET6:
-		error = nd6_storelladdr(ifp, NULL, dst_in, (u_char *)edst,NULL);
+		error = nd6_storelladdr(ifp, NULL, dst_in, (u_char *)edst, NULL);
 		break;
 #endif
 	default:
@@ -480,15 +340,15 @@ mcast:
 		error = -EINVAL;
 	}
 	RTFREE(rte);
-	if (error == 0)
+	if (error == 0) {
+		memcpy(src_in, ifa->ifa_addr, ip_addr_size(ifa->ifa_addr));
 		return rdma_copy_addr(addr, ifp, edst);
+	}
 	if (error == EWOULDBLOCK)
 		return -ENODATA;
 	return -error;
 }
 
-#endif
-
 static void process_req(struct work_struct *work)
 {
 	struct addr_req *req, *temp_req;
@@ -602,20 +462,94 @@ void rdma_addr_cancel(struct rdma_dev_ad
 }
 EXPORT_SYMBOL(rdma_addr_cancel);
 
+struct resolve_cb_context {
+	struct rdma_dev_addr *addr;
+	struct completion comp;
+};
+
+static void resolve_cb(int status, struct sockaddr *src_addr,
+	     struct rdma_dev_addr *addr, void *context)
+{
+	memcpy(((struct resolve_cb_context *)context)->addr, addr, sizeof(struct
+				rdma_dev_addr));
+	complete(&((struct resolve_cb_context *)context)->comp);
+}
+
+int rdma_addr_find_dmac_by_grh(union ib_gid *sgid, union ib_gid *dgid, u8 *dmac,
+			       u16 *vlan_id)
+{
+	int ret = 0;
+	struct rdma_dev_addr dev_addr;
+	struct resolve_cb_context ctx;
+	struct net_device *dev;
+
+	union {
+		struct sockaddr     _sockaddr;
+		struct sockaddr_in  _sockaddr_in;
+		struct sockaddr_in6 _sockaddr_in6;
+	} sgid_addr, dgid_addr;
+
+
+	ret = rdma_gid2ip(&sgid_addr._sockaddr, sgid);
+	if (ret)
+		return ret;
+
+	ret = rdma_gid2ip(&dgid_addr._sockaddr, dgid);
+	if (ret)
+		return ret;
+
+	memset(&dev_addr, 0, sizeof(dev_addr));
+
+	ctx.addr = &dev_addr;
+	init_completion(&ctx.comp);
+	ret = rdma_resolve_ip(&self, &sgid_addr._sockaddr, &dgid_addr._sockaddr,
+			&dev_addr, 1000, resolve_cb, &ctx);
+	if (ret)
+		return ret;
+
+	wait_for_completion(&ctx.comp);
+
+	memcpy(dmac, dev_addr.dst_dev_addr, ETH_ALEN);
+	dev = dev_get_by_index(&init_net, dev_addr.bound_dev_if);
+	if (!dev)
+		return -ENODEV;
+	if (vlan_id)
+		*vlan_id = rdma_vlan_dev_vlan_id(dev);
+	dev_put(dev);
+	return ret;
+}
+EXPORT_SYMBOL(rdma_addr_find_dmac_by_grh);
+
+int rdma_addr_find_smac_by_sgid(union ib_gid *sgid, u8 *smac, u16 *vlan_id)
+{
+	int ret = 0;
+	struct rdma_dev_addr dev_addr;
+	union {
+		struct sockaddr     _sockaddr;
+		struct sockaddr_in  _sockaddr_in;
+		struct sockaddr_in6 _sockaddr_in6;
+	} gid_addr;
+
+	ret = rdma_gid2ip(&gid_addr._sockaddr, sgid);
+
+	if (ret)
+		return ret;
+	memset(&dev_addr, 0, sizeof(dev_addr));
+	ret = rdma_translate_ip(&gid_addr._sockaddr, &dev_addr, vlan_id);
+	if (ret)
+		return ret;
+
+	memcpy(smac, dev_addr.src_dev_addr, ETH_ALEN);
+	return ret;
+}
+EXPORT_SYMBOL(rdma_addr_find_smac_by_sgid);
+
 static int netevent_callback(struct notifier_block *self, unsigned long event,
 	void *ctx)
 {
 	if (event == NETEVENT_NEIGH_UPDATE) {
-#ifdef __linux__
-		struct neighbour *neigh = ctx;
-
-		if (neigh->nud_state & NUD_VALID) {
 			set_timeout(jiffies);
 		}
-#else
-		set_timeout(jiffies);
-#endif
-	}
 	return 0;
 }
 
@@ -631,11 +565,13 @@ static int __init addr_init(void)
 		return -ENOMEM;
 
 	register_netevent_notifier(&nb);
+	rdma_addr_register_client(&self);
 	return 0;
 }
 
 static void __exit addr_cleanup(void)
 {
+	rdma_addr_unregister_client(&self);
 	unregister_netevent_notifier(&nb);
 	destroy_workqueue(addr_wq);
 }

Modified: head/sys/ofed/drivers/infiniband/core/cache.c
==============================================================================
--- head/sys/ofed/drivers/infiniband/core/cache.c	Tue Feb 17 08:38:02 2015	(r278885)
+++ head/sys/ofed/drivers/infiniband/core/cache.c	Tue Feb 17 08:40:27 2015	(r278886)
@@ -76,19 +76,21 @@ int ib_get_cached_gid(struct ib_device *
 {
 	struct ib_gid_cache *cache;
 	unsigned long flags;
-	int ret = 0;
+	int ret = -EINVAL;
 
 	if (port_num < start_port(device) || port_num > end_port(device))
 		return -EINVAL;
 
 	read_lock_irqsave(&device->cache.lock, flags);
 
-	cache = device->cache.gid_cache[port_num - start_port(device)];
+	if (device->cache.gid_cache) {
+		cache = device->cache.gid_cache[port_num - start_port(device)];
 
-	if (index < 0 || index >= cache->table_len)
-		ret = -EINVAL;
-	else
-		*gid = cache->table[index];
+		if (cache && index >= 0 && index < cache->table_len) {
+			*gid = cache->table[index];
+			ret = 0;
+		}
+	}
 
 	read_unlock_irqrestore(&device->cache.lock, flags);
 
@@ -111,22 +113,24 @@ int ib_find_cached_gid(struct ib_device 
 		*index = -1;
 
 	read_lock_irqsave(&device->cache.lock, flags);
-
+	if (!device->cache.gid_cache)
+		goto out;
 	for (p = 0; p <= end_port(device) - start_port(device); ++p) {
 		cache = device->cache.gid_cache[p];
+		if (!cache)
+			continue;
 		for (i = 0; i < cache->table_len; ++i) {
 			if (!memcmp(gid, &cache->table[i], sizeof *gid)) {
 				*port_num = p + start_port(device);
 				if (index)
 					*index = i;
 				ret = 0;
-				goto found;
+				goto out;
 			}
 		}
 	}
-found:
+out:
 	read_unlock_irqrestore(&device->cache.lock, flags);
-
 	return ret;
 }
 EXPORT_SYMBOL(ib_find_cached_gid);
@@ -138,19 +142,21 @@ int ib_get_cached_pkey(struct ib_device 
 {
 	struct ib_pkey_cache *cache;
 	unsigned long flags;
-	int ret = 0;
+	int ret = -EINVAL;
 
 	if (port_num < start_port(device) || port_num > end_port(device))
 		return -EINVAL;
 
 	read_lock_irqsave(&device->cache.lock, flags);
 
-	cache = device->cache.pkey_cache[port_num - start_port(device)];
+	if (device->cache.pkey_cache) {
+		cache = device->cache.pkey_cache[port_num - start_port(device)];
 
-	if (index < 0 || index >= cache->table_len)
-		ret = -EINVAL;
-	else
-		*pkey = cache->table[index];
+		if (cache && index >= 0 && index < cache->table_len) {
+			*pkey = cache->table[index];
+			ret = 0;
+		}
+	}
 
 	read_unlock_irqrestore(&device->cache.lock, flags);
 
@@ -167,41 +173,93 @@ int ib_find_cached_pkey(struct ib_device
 	unsigned long flags;
 	int i;
 	int ret = -ENOENT;
+	int partial_ix = -1;
 
 	if (port_num < start_port(device) || port_num > end_port(device))
 		return -EINVAL;
 
+	*index = -1;
+
 	read_lock_irqsave(&device->cache.lock, flags);
 
+	if (!device->cache.pkey_cache)
+		goto out;
+
 	cache = device->cache.pkey_cache[port_num - start_port(device)];
+	if (!cache)
+		goto out;
+
+	for (i = 0; i < cache->table_len; ++i)
+		if ((cache->table[i] & 0x7fff) == (pkey & 0x7fff)) {
+			if (cache->table[i] & 0x8000) {
+				*index = i;
+				ret = 0;
+				break;
+			} else
+				partial_ix = i;
+		}
+
+	if (ret && partial_ix >= 0) {
+		*index = partial_ix;
+		ret = 0;
+	}
+out:
+	read_unlock_irqrestore(&device->cache.lock, flags);
+	return ret;
+}
+EXPORT_SYMBOL(ib_find_cached_pkey);
+
+int ib_find_exact_cached_pkey(struct ib_device *device,
+			      u8                port_num,
+			      u16               pkey,
+			      u16              *index)
+{
+	struct ib_pkey_cache *cache;
+	unsigned long flags;
+	int i;
+	int ret = -ENOENT;
+
+	if (port_num < start_port(device) || port_num > end_port(device))
+		return -EINVAL;
 
 	*index = -1;
 
+	read_lock_irqsave(&device->cache.lock, flags);
+
+	if (!device->cache.pkey_cache)
+		goto out;
+
+	cache = device->cache.pkey_cache[port_num - start_port(device)];
+	if (!cache)
+		goto out;
+
 	for (i = 0; i < cache->table_len; ++i)
-		if ((cache->table[i] & 0x7fff) == (pkey & 0x7fff)) {
+		if (cache->table[i] == pkey) {
 			*index = i;
 			ret = 0;
 			break;
 		}
-
+out:
 	read_unlock_irqrestore(&device->cache.lock, flags);
-
 	return ret;
 }
-EXPORT_SYMBOL(ib_find_cached_pkey);
+EXPORT_SYMBOL(ib_find_exact_cached_pkey);
 
 int ib_get_cached_lmc(struct ib_device *device,
 		      u8                port_num,
 		      u8                *lmc)
 {
 	unsigned long flags;
-	int ret = 0;
+	int ret = -EINVAL;
 
 	if (port_num < start_port(device) || port_num > end_port(device))
 		return -EINVAL;
 
 	read_lock_irqsave(&device->cache.lock, flags);
-	*lmc = device->cache.lmc_cache[port_num - start_port(device)];
+	if (device->cache.lmc_cache) {
+		*lmc = device->cache.lmc_cache[port_num - start_port(device)];
+		ret = 0;
+	}
 	read_unlock_irqrestore(&device->cache.lock, flags);
 
 	return ret;
@@ -217,6 +275,10 @@ static void ib_cache_update(struct ib_de
 	int                        i;
 	int                        ret;
 
+	if (!(device->cache.pkey_cache && device->cache.gid_cache &&
+	      device->cache.lmc_cache))
+		return;
+
 	tprops = kmalloc(sizeof *tprops, GFP_KERNEL);
 	if (!tprops)
 		return;
@@ -309,7 +371,7 @@ static void ib_cache_event(struct ib_eve
 			INIT_WORK(&work->work, ib_cache_task);
 			work->device   = event->device;
 			work->port_num = event->element.port_num;
-			schedule_work(&work->work);
+			queue_work(ib_wq, &work->work);
 		}
 	}
 }
@@ -362,14 +424,21 @@ err:
 	kfree(device->cache.pkey_cache);
 	kfree(device->cache.gid_cache);
 	kfree(device->cache.lmc_cache);
+	device->cache.pkey_cache = NULL;
+	device->cache.gid_cache = NULL;
+	device->cache.lmc_cache = NULL;
 }
 
 static void ib_cache_cleanup_one(struct ib_device *device)
 {
 	int p;
 
+	if (!(device->cache.pkey_cache && device->cache.gid_cache &&
+	      device->cache.lmc_cache))
+		return;
+
 	ib_unregister_event_handler(&device->cache.event_handler);
-	flush_scheduled_work();
+	flush_workqueue(ib_wq);
 

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***


More information about the svn-src-all mailing list