svn commit: r297879 - head/sys/contrib/rdma/krping

Navdeep Parhar np at FreeBSD.org
Tue Apr 12 21:34:05 UTC 2016


Author: np
Date: Tue Apr 12 21:34:04 2016
New Revision: 297879
URL: https://svnweb.freebsd.org/changeset/base/297879

Log:
  Add fastreg support to krping (ported from upstream).
  
  Submitted by:	Krishnamraju Eraparaju @ Chelsio
  Sponsored by:	Chelsio Communications
  Differential Revision:	https://reviews.freebsd.org/D5777

Modified:
  head/sys/contrib/rdma/krping/krping.c

Modified: head/sys/contrib/rdma/krping/krping.c
==============================================================================
--- head/sys/contrib/rdma/krping/krping.c	Tue Apr 12 21:29:06 2016	(r297878)
+++ head/sys/contrib/rdma/krping/krping.c	Tue Apr 12 21:34:04 2016	(r297879)
@@ -56,6 +56,7 @@ __FBSDID("$FreeBSD$");
 extern int krping_debug;
 #define DEBUG_LOG(cb, x...) if (krping_debug) krping_printf((cb)->cookie, x)
 #define PRINTF(cb, x...) krping_printf((cb)->cookie, x)
+#define BIND_INFO 1
 
 MODULE_AUTHOR("Steve Wise");
 MODULE_DESCRIPTION("RDMA ping client/server");
@@ -99,7 +100,7 @@ static const struct krping_option krping
  	{"poll", OPT_NOPARAM, 'P'},
  	{"local_dma_lkey", OPT_NOPARAM, 'Z'},
  	{"read_inv", OPT_NOPARAM, 'R'},
- 	{"fr", OPT_NOPARAM, 'f'},
+ 	{"fr", OPT_INT, 'f'},
 	{NULL, 0, 0}
 };
 
@@ -232,6 +233,7 @@ struct krping_cb {
 	int txdepth;			/* SQ depth */
 	int local_dma_lkey;		/* use 0 for lkey */
 	int frtest;			/* fastreg test */
+	int testnum;
 
 	/* CM stuff */
 	struct rdma_cm_id *cm_id;	/* connection on client side,*/
@@ -365,11 +367,7 @@ static void krping_cq_event_handler(stru
 		PRINTF(cb, "cq completion in ERROR state\n");
 		return;
 	}
-	if (cb->frtest) {
-		PRINTF(cb, "cq completion event in frtest!\n");
-		return;
-	}
-	if (!cb->wlat && !cb->rlat && !cb->bw)
+	if (!cb->wlat && !cb->rlat && !cb->bw && !cb->frtest)
 		ib_req_notify_cq(cb->cq, IB_CQ_NEXT_COMP);
 	while ((ret = ib_poll_cq(cb->cq, 1, &wc)) == 1) {
 		if (wc.status) {
@@ -411,7 +409,7 @@ static void krping_cq_event_handler(stru
 			DEBUG_LOG(cb, "recv completion\n");
 			cb->stats.recv_bytes += sizeof(cb->recv_buf);
 			cb->stats.recv_msgs++;
-			if (cb->wlat || cb->rlat || cb->bw)
+			if (cb->wlat || cb->rlat || cb->bw || cb->frtest)
 				ret = server_recv(cb, &wc);
 			else
 				ret = cb->server ? server_recv(cb, &wc) :
@@ -464,7 +462,7 @@ static int krping_accept(struct krping_c
 		return ret;
 	}
 
-	if (!cb->wlat && !cb->rlat && !cb->bw) {
+	if (!cb->wlat && !cb->rlat && !cb->bw && !cb->frtest) {
 		wait_event_interruptible(cb->sem, cb->state >= CONNECTED);
 		if (cb->state == ERROR) {
 			PRINTF(cb, "wait for CONNECTED state %d\n", 
@@ -502,7 +500,7 @@ static void krping_setup_wr(struct krpin
 	cb->sq_wr.sg_list = &cb->send_sgl;
 	cb->sq_wr.num_sge = 1;
 
-	if (cb->server || cb->wlat || cb->rlat || cb->bw) {
+	if (cb->server || cb->wlat || cb->rlat || cb->bw || cb->frtest) {
 		cb->rdma_sgl.addr = cb->rdma_dma_addr;
 		if (cb->mem == MR)
 			cb->rdma_sgl.lkey = cb->rdma_mr->lkey;
@@ -531,7 +529,11 @@ static void krping_setup_wr(struct krpin
 	case MW:
 		cb->bind_attr.wr_id = 0xabbaabba;
 		cb->bind_attr.send_flags = 0; /* unsignaled */
+#ifdef BIND_INFO
 		cb->bind_attr.bind_info.length = cb->size;
+#else
+		cb->bind_attr.length = cb->size;
+#endif
 		break;
 	default:
 		break;
@@ -646,7 +648,7 @@ static int krping_setup_buffers(struct k
 			buf.size = cb->size;
 			iovbase = cb->rdma_dma_addr;
 			cb->rdma_mr = ib_reg_phys_mr(cb->pd, &buf, 1, 
-			    		     IB_ACCESS_LOCAL_WRITE|
+						IB_ACCESS_LOCAL_WRITE|
 					     IB_ACCESS_REMOTE_READ| 
 					     IB_ACCESS_REMOTE_WRITE, 
 					     &iovbase);
@@ -665,7 +667,7 @@ static int krping_setup_buffers(struct k
 		}
 	}
 
-	if (!cb->server || cb->wlat || cb->rlat || cb->bw) {
+	if (!cb->server || cb->wlat || cb->rlat || cb->bw || cb->frtest) {
 
 		cb->start_buf = kmalloc(cb->size, GFP_KERNEL);
 		if (!cb->start_buf) {
@@ -682,9 +684,9 @@ static int krping_setup_buffers(struct k
 		if (cb->mem == MR || cb->mem == MW) {
 			unsigned flags = IB_ACCESS_REMOTE_READ;
 
-			if (cb->wlat || cb->rlat || cb->bw) {
+			if (cb->wlat || cb->rlat || cb->bw || cb->frtest) {
 				flags |= IB_ACCESS_LOCAL_WRITE |
-				    IB_ACCESS_REMOTE_WRITE;
+					IB_ACCESS_REMOTE_WRITE;
 			}
 
 			buf.addr = cb->start_dma_addr;
@@ -907,15 +909,33 @@ static u32 krping_rdma_rkey(struct krpin
 		 * Update the MW with new buf info.
 		 */
 		if (buf == (u64)cb->start_dma_addr) {
+#ifdef BIND_INFO
 			cb->bind_attr.bind_info.mw_access_flags = IB_ACCESS_REMOTE_READ;
 			cb->bind_attr.bind_info.mr = cb->start_mr;
+#else
+			cb->bind_attr.mw_access_flags = IB_ACCESS_REMOTE_READ;
+			cb->bind_attr.mr = cb->start_mr;
+#endif
 		} else {
+#ifdef BIND_INFO
 			cb->bind_attr.bind_info.mw_access_flags = IB_ACCESS_REMOTE_WRITE;
 			cb->bind_attr.bind_info.mr = cb->rdma_mr;
+#else
+			cb->bind_attr.mw_access_flags = IB_ACCESS_REMOTE_WRITE;
+			cb->bind_attr.mr = cb->rdma_mr;
+#endif
 		}
+#ifdef BIND_INFO
 		cb->bind_attr.bind_info.addr = buf;
+#else
+		cb->bind_attr.addr = buf;
+#endif
 		DEBUG_LOG(cb, "binding mw rkey 0x%x to buf %llx mr rkey 0x%x\n",
+#ifdef BIND_INFO
 			cb->mw->rkey, buf, cb->bind_attr.bind_info.mr->rkey);
+#else
+			cb->mw->rkey, buf, cb->bind_attr.mr->rkey);
+#endif
 		ret = ib_bind_mw(cb->qp, cb->mw, &cb->bind_attr);
 		if (ret) {
 			PRINTF(cb, "bind mw error %d\n", ret);
@@ -950,7 +970,7 @@ static void krping_format_send(struct kr
 	 * advertising the rdma buffer.  Server side
 	 * sends have no data.
 	 */
-	if (!cb->server || cb->wlat || cb->rlat || cb->bw) {
+	if (!cb->server || cb->wlat || cb->rlat || cb->bw || cb->frtest) {
 		rkey = krping_rdma_rkey(cb, buf, !cb->server_invalidate);
 		info->buf = htonll(buf);
 		info->rkey = htonl(rkey);
@@ -980,7 +1000,6 @@ static void krping_test_server(struct kr
 		cb->rdma_sq_wr.wr.rdma.remote_addr = cb->remote_addr;
 		cb->rdma_sq_wr.sg_list->length = cb->remote_len;
 		cb->rdma_sgl.lkey = krping_rdma_rkey(cb, cb->rdma_dma_addr, 1);
-		cb->rdma_sq_wr.next = NULL;
 
 		/* Issue RDMA Read. */
 		if (cb->read_inv)
@@ -1484,7 +1503,6 @@ static void krping_rlat_test_server(stru
 		PRINTF(cb, "send completiong error %d\n", wc.status);
 		return;
 	}
-
 	wait_event_interruptible(cb->sem, cb->state == ERROR);
 }
 
@@ -1557,9 +1575,10 @@ static void krping_bw_test_server(struct
 	wait_event_interruptible(cb->sem, cb->state == ERROR);
 }
 
-static int fastreg_supported(struct krping_cb *cb)
+static int fastreg_supported(struct krping_cb *cb, int server)
 {
-	struct ib_device *dev = cb->child_cm_id->device;
+	struct ib_device *dev = server?cb->child_cm_id->device:
+					cb->cm_id->device;
 	struct ib_device_attr attr;
 	int ret;
 
@@ -1610,158 +1629,259 @@ static int krping_bind_server(struct krp
 		return -1;
 	}
 
-	if (cb->mem == FASTREG && !fastreg_supported(cb))
+	if (cb->mem == FASTREG && !fastreg_supported(cb, 1))
 		return -EINVAL;
 
 	return 0;
 }
 
-static void krping_run_server(struct krping_cb *cb)
+/*
+ * sq-depth worth of fastreg + 0B read-inv pairs, reposting them as the reads
+ * complete.
+ * NOTE: every 9 seconds we sleep for 1 second to keep the kernel happy.
+ */
+static void krping_fr_test5(struct krping_cb *cb)
 {
-	struct ib_recv_wr *bad_wr;
+	struct ib_fast_reg_page_list **pl;
+	struct ib_send_wr *fr, *read, *bad;
+	struct ib_wc wc;
+	struct ib_sge *sgl;
+	u8 key = 0;
+	struct ib_mr **mr;
+	u8 **buf;
+	dma_addr_t *dma_addr;
+	int i;
 	int ret;
+	int plen = (((cb->size - 1) & PAGE_MASK) + PAGE_SIZE) >> PAGE_SHIFT;
+	time_t start;
+	int count = 0;
+	int scnt;
+	int depth = cb->txdepth >> 1;
 
-	ret = krping_bind_server(cb);
-	if (ret)
+	if (!depth) {
+		PRINTF(cb, "txdepth must be > 1 for this test!\n");
 		return;
-
-	ret = krping_setup_qp(cb, cb->child_cm_id);
-	if (ret) {
-		PRINTF(cb, "setup_qp failed: %d\n", ret);
-		goto err0;
 	}
 
-	ret = krping_setup_buffers(cb);
-	if (ret) {
-		PRINTF(cb, "krping_setup_buffers failed: %d\n", ret);
+	pl = kzalloc(sizeof *pl * depth, GFP_KERNEL);
+	DEBUG_LOG(cb, "%s pl %p size %lu\n", __func__, pl, sizeof *pl * depth);
+	mr = kzalloc(sizeof *mr * depth, GFP_KERNEL);
+	DEBUG_LOG(cb, "%s mr %p size %lu\n", __func__, mr, sizeof *mr * depth);
+	fr = kzalloc(sizeof *fr * depth, GFP_KERNEL);
+	DEBUG_LOG(cb, "%s fr %p size %lu\n", __func__, fr, sizeof *fr * depth);
+	sgl = kzalloc(sizeof *sgl * depth, GFP_KERNEL);
+	DEBUG_LOG(cb, "%s sgl %p size %lu\n", __func__, sgl, sizeof *sgl * depth);
+	read = kzalloc(sizeof *read * depth, GFP_KERNEL);
+	DEBUG_LOG(cb, "%s read %p size %lu\n", __func__, read, sizeof *read * depth);
+	buf = kzalloc(sizeof *buf * depth, GFP_KERNEL);
+	DEBUG_LOG(cb, "%s buf %p size %lu\n", __func__, buf, sizeof *buf * depth);
+	dma_addr = kzalloc(sizeof *dma_addr * depth, GFP_KERNEL);
+	DEBUG_LOG(cb, "%s dma_addr %p size %lu\n", __func__, dma_addr, sizeof *dma_addr * depth);
+	if (!pl || !mr || !fr || !read || !sgl || !buf || !dma_addr) {
+		PRINTF(cb, "kzalloc failed\n");
 		goto err1;
 	}
 
-	ret = ib_post_recv(cb->qp, &cb->rq_wr, &bad_wr);
-	if (ret) {
-		PRINTF(cb, "ib_post_recv failed: %d\n", ret);
-		goto err2;
-	}
-
-	ret = krping_accept(cb);
-	if (ret) {
-		PRINTF(cb, "connect error %d\n", ret);
-		goto err2;
-	}
-
-	if (cb->wlat)
-		krping_wlat_test_server(cb);
-	else if (cb->rlat)
-		krping_rlat_test_server(cb);
-	else if (cb->bw)
-		krping_bw_test_server(cb);
-	else
-		krping_test_server(cb);
-	rdma_disconnect(cb->child_cm_id);
-err2:
-	krping_free_buffers(cb);
-err1:
-	krping_free_qp(cb);
-err0:
-	rdma_destroy_id(cb->child_cm_id);
-}
-
-static void krping_test_client(struct krping_cb *cb)
-{
-	int ping, start, cc, i, ret;
-	struct ib_send_wr *bad_wr;
-	unsigned char c;
-
-	start = 65;
-	for (ping = 0; !cb->count || ping < cb->count; ping++) {
-		cb->state = RDMA_READ_ADV;
-
-		/* Put some ascii text in the buffer. */
-		cc = sprintf(cb->start_buf, "rdma-ping-%d: ", ping);
-		for (i = cc, c = start; i < cb->size; i++) {
-			cb->start_buf[i] = c;
-			c++;
-			if (c > 122)
-				c = 65;
+	for (scnt = 0; scnt < depth; scnt++) {
+		pl[scnt] = ib_alloc_fast_reg_page_list(cb->qp->device, plen);
+		if (IS_ERR(pl[scnt])) {
+			PRINTF(cb, "alloc_fr_page_list failed %ld\n",
+			       PTR_ERR(pl[scnt]));
+			goto err2;
 		}
-		start++;
-		if (start > 122)
-			start = 65;
-		cb->start_buf[cb->size - 1] = 0;
+		DEBUG_LOG(cb, "%s pl[%u] %p\n", __func__, scnt, pl[scnt]);
 
-		krping_format_send(cb, cb->start_dma_addr);
-		if (cb->state == ERROR) {
-			PRINTF(cb, "krping_format_send failed\n");
-			break;
-		}
-		ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr);
-		if (ret) {
-			PRINTF(cb, "post send error %d\n", ret);
-			break;
+		mr[scnt] = ib_alloc_fast_reg_mr(cb->pd, plen);
+		if (IS_ERR(mr[scnt])) {
+			PRINTF(cb, "alloc_fr failed %ld\n",
+			       PTR_ERR(mr[scnt]));
+			goto err2;
 		}
+		DEBUG_LOG(cb, "%s mr[%u] %p\n", __func__, scnt, mr[scnt]);
+		ib_update_fast_reg_key(mr[scnt], ++key);
 
-		/* Wait for server to ACK */
-		wait_event_interruptible(cb->sem, cb->state >= RDMA_WRITE_ADV);
-		if (cb->state != RDMA_WRITE_ADV) {
-			PRINTF(cb, 
-			       "wait for RDMA_WRITE_ADV state %d\n",
-			       cb->state);
-			break;
+		buf[scnt] = kmalloc(cb->size, GFP_KERNEL);
+		if (!buf[scnt]) {
+			PRINTF(cb, "kmalloc failed\n");
+			ret = -ENOMEM;
+			goto err2;
 		}
-
-		krping_format_send(cb, cb->rdma_dma_addr);
-		ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr);
+		DEBUG_LOG(cb, "%s buf[%u] %p\n", __func__, scnt, buf[scnt]);
+		dma_addr[scnt] = dma_map_single(cb->pd->device->dma_device,
+						   buf[scnt], cb->size,
+						   DMA_BIDIRECTIONAL);
+		if (dma_mapping_error(cb->pd->device->dma_device,
+		    dma_addr[scnt])) {
+			PRINTF(cb, "dma_map failed\n");
+			ret = -ENOMEM;
+			goto err2;
+		}
+		DEBUG_LOG(cb, "%s dma_addr[%u] %p\n", __func__, scnt, (void *)dma_addr[scnt]);
+		for (i=0; i<plen; i++) {
+			pl[scnt]->page_list[i] = ((unsigned long)dma_addr[scnt] & PAGE_MASK) + (i * PAGE_SIZE);
+			DEBUG_LOG(cb, "%s pl[%u]->page_list[%u] 0x%llx\n",
+				  __func__, scnt, i,  pl[scnt]->page_list[i]);
+		}
+
+		sgl[scnt].lkey = mr[scnt]->rkey;
+		sgl[scnt].length = cb->size;
+		sgl[scnt].addr = (u64)buf[scnt];
+		DEBUG_LOG(cb, "%s sgl[%u].lkey 0x%x length %u addr 0x%llx\n",
+			  __func__, scnt,  sgl[scnt].lkey, sgl[scnt].length,
+			  sgl[scnt].addr);
+
+		fr[scnt].opcode = IB_WR_FAST_REG_MR;
+		fr[scnt].wr_id = scnt;
+		fr[scnt].send_flags = 0;
+		fr[scnt].wr.fast_reg.page_shift = PAGE_SHIFT;
+		fr[scnt].wr.fast_reg.length = cb->size;
+		fr[scnt].wr.fast_reg.page_list = pl[scnt];
+		fr[scnt].wr.fast_reg.page_list_len = plen;
+		fr[scnt].wr.fast_reg.iova_start = (u64)buf[scnt];
+		fr[scnt].wr.fast_reg.access_flags = IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE;
+		fr[scnt].wr.fast_reg.rkey = mr[scnt]->rkey;
+		fr[scnt].next = &read[scnt];
+		read[scnt].opcode = IB_WR_RDMA_READ_WITH_INV;
+		read[scnt].wr_id = scnt;
+		read[scnt].send_flags = IB_SEND_SIGNALED;
+		read[scnt].wr.rdma.rkey = cb->remote_rkey;
+		read[scnt].wr.rdma.remote_addr = cb->remote_addr;
+		read[scnt].num_sge = 1;
+		read[scnt].sg_list = &sgl[scnt];
+		ret = ib_post_send(cb->qp, &fr[scnt], &bad);
 		if (ret) {
-			PRINTF(cb, "post send error %d\n", ret);
-			break;
+			PRINTF(cb, "ib_post_send failed %d\n", ret);
+			goto err2;
 		}
+	}
 
-		/* Wait for the server to say the RDMA Write is complete. */
-		wait_event_interruptible(cb->sem, 
-					 cb->state >= RDMA_WRITE_COMPLETE);
-		if (cb->state != RDMA_WRITE_COMPLETE) {
-			PRINTF(cb, 
-			       "wait for RDMA_WRITE_COMPLETE state %d\n",
-			       cb->state);
+	start = time_uptime;
+	DEBUG_LOG(cb, "%s starting IO.\n", __func__);
+	while (!cb->count || cb->server || count < cb->count) {
+		if ((time_uptime - start) >= 9) {
+			DEBUG_LOG(cb, "%s pausing 1 tick! count %u\n", __func__,
+				  count);
+			wait_event_interruptible_timeout(cb->sem,
+							 cb->state == ERROR,
+							 1);
+			if (cb->state == ERROR)
+				break;
+			start = time_uptime;
+		}
+		do {
+			ret = ib_poll_cq(cb->cq, 1, &wc);
+			if (ret < 0) {
+				PRINTF(cb, "ib_poll_cq failed %d\n",
+				       ret);
+				goto err2;
+			}
+			if (ret == 1) {
+				if (wc.status) {
+					PRINTF(cb,
+					       "completion error %u wr_id %lld "
+					       "opcode %d\n", wc.status,
+					       wc.wr_id, wc.opcode);
+					goto err2;
+				}
+				count++;
+				if (count == cb->count)
+					break;
+				ib_update_fast_reg_key(mr[wc.wr_id], ++key);
+				fr[wc.wr_id].wr.fast_reg.rkey =
+					mr[wc.wr_id]->rkey;
+				sgl[wc.wr_id].lkey = mr[wc.wr_id]->rkey;
+				ret = ib_post_send(cb->qp, &fr[wc.wr_id], &bad);
+				if (ret) {
+					PRINTF(cb,
+					       "ib_post_send failed %d\n", ret);
+					goto err2;
+				}
+			} else if (krping_sigpending()) {
+				PRINTF(cb, "signal!\n");
+				goto err2;
+			}
+		} while (ret == 1);
+	}
+	DEBUG_LOG(cb, "%s done!\n", __func__);
+err2:
+	DEBUG_LOG(cb, "sleeping 1 second\n");
+	wait_event_interruptible_timeout(cb->sem, cb->state == ERROR, HZ);
+	DEBUG_LOG(cb, "draining the cq...\n");
+	do {
+		ret = ib_poll_cq(cb->cq, 1, &wc);
+		if (ret < 0) {
+			PRINTF(cb, "ib_poll_cq failed %d\n", ret);
 			break;
 		}
-
-		if (cb->validate)
-			if (memcmp(cb->start_buf, cb->rdma_buf, cb->size)) {
-				PRINTF(cb, "data mismatch!\n");
-				break;
+		if (ret == 1) {
+			if (wc.status) {
+				PRINTF(cb, "completion error %u "
+				       "opcode %u\n", wc.status, wc.opcode);
 			}
+		}
+	} while (ret == 1);
 
-		if (cb->verbose) {
-			if (strlen(cb->rdma_buf) > 128) {
-				char msgbuf[128];
-
-				strlcpy(msgbuf, cb->rdma_buf, sizeof(msgbuf));
-				PRINTF(cb, "ping data stripped: %s\n",
-				       msgbuf);
-			} else
-				PRINTF(cb, "ping data: %s\n", cb->rdma_buf);
+	DEBUG_LOG(cb, "destroying fr mrs!\n");
+	for (scnt = 0; scnt < depth; scnt++) {
+		if (mr[scnt]) {
+			ib_dereg_mr(mr[scnt]);
+			DEBUG_LOG(cb, "%s dereg mr %p\n", __func__, mr[scnt]);
+		}
+	}
+	DEBUG_LOG(cb, "unmapping/freeing bufs!\n");
+	for (scnt = 0; scnt < depth; scnt++) {
+		if (buf[scnt]) {
+			dma_unmap_single(cb->pd->device->dma_device,
+					 dma_addr[scnt], cb->size,
+					 DMA_BIDIRECTIONAL);
+			kfree(buf[scnt]);
+			DEBUG_LOG(cb, "%s unmap/free buf %p dma_addr %p\n", __func__, buf[scnt], (void *)dma_addr[scnt]);
+		}
+	}
+	DEBUG_LOG(cb, "destroying fr page lists!\n");
+	for (scnt = 0; scnt < depth; scnt++) {
+		if (pl[scnt]) {
+			DEBUG_LOG(cb, "%s free pl %p\n", __func__, pl[scnt]);
+			ib_free_fast_reg_page_list(pl[scnt]);
 		}
-#ifdef SLOW_KRPING
-		wait_event_interruptible_timeout(cb->sem, cb->state == ERROR, HZ);
-#endif
 	}
+err1:
+	if (pl)
+		kfree(pl);
+	if (mr)
+		kfree(mr);
+	if (fr)
+		kfree(fr);
+	if (read)
+		kfree(read);
+	if (sgl)
+		kfree(sgl);
+	if (buf)
+		kfree(buf);
+	if (dma_addr)
+		kfree(dma_addr);
+}
+static void krping_fr_test_server(struct krping_cb *cb)
+{
+	DEBUG_LOG(cb, "%s waiting for disconnect...\n", __func__);
+	wait_event_interruptible(cb->sem, cb->state == ERROR);
 }
 
-static void krping_rlat_test_client(struct krping_cb *cb)
+static void krping_fr_test5_server(struct krping_cb *cb)
 {
 	struct ib_send_wr *bad_wr;
 	struct ib_wc wc;
 	int ret;
 
-	cb->state = RDMA_READ_ADV;
+	/* Spin waiting for client's Start STAG/TO/Len */
+	while (cb->state < RDMA_READ_ADV) {
+		krping_cq_event_handler(cb->cq, cb);
+	}
+	DEBUG_LOG(cb, "%s client STAG %x TO 0x%llx\n", __func__,
+		  cb->remote_rkey, cb->remote_addr);
 
 	/* Send STAG/TO/Len to client */
 	krping_format_send(cb, cb->start_dma_addr);
-	if (cb->state == ERROR) {
-		PRINTF(cb, "krping_format_send failed\n");
-		return;
-	}
 	ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr);
 	if (ret) {
 		PRINTF(cb, "post send error %d\n", ret);
@@ -1775,84 +1895,31 @@ static void krping_rlat_test_client(stru
 		return;
 	}
 	if (wc.status) {
-		PRINTF(cb, "send completion error %d\n", wc.status);
+		PRINTF(cb, "send completiong error %d\n", wc.status);
 		return;
 	}
 
-	/* Spin waiting for server's Start STAG/TO/Len */
-	while (cb->state < RDMA_WRITE_ADV) {
-		krping_cq_event_handler(cb->cq, cb);
-	}
-
-#if 0
-{
-	int i;
-	struct timeval start, stop;
-	time_t sec;
-	suseconds_t usec;
-	unsigned long long elapsed;
-	struct ib_wc wc;
-	struct ib_send_wr *bad_wr;
-	int ne;
-	
-	cb->rdma_sq_wr.opcode = IB_WR_RDMA_WRITE;
-	cb->rdma_sq_wr.wr.rdma.rkey = cb->remote_rkey;
-	cb->rdma_sq_wr.wr.rdma.remote_addr = cb->remote_addr;
-	cb->rdma_sq_wr.sg_list->length = 0;
-	cb->rdma_sq_wr.num_sge = 0;
-
-	microtime(&start);
-	for (i=0; i < 100000; i++) {
-		if (ib_post_send(cb->qp, &cb->rdma_sq_wr, &bad_wr)) {
-			PRINTF(cb, "Couldn't post send\n");
-			return;
-		}
-		do {
-			ne = ib_poll_cq(cb->cq, 1, &wc);
-		} while (ne == 0);
-		if (ne < 0) {
-			PRINTF(cb, "poll CQ failed %d\n", ne);
-			return;
-		}
-		if (wc.status != IB_WC_SUCCESS) {
-			PRINTF(cb, "Completion wth error at %s:\n",
-				cb->server ? "server" : "client");
-			PRINTF(cb, "Failed status %d: wr_id %d\n",
-				wc.status, (int) wc.wr_id);
-			return;
-		}
-	}
-	microtime(&stop);
-	
-	if (stop.tv_usec < start.tv_usec) {
-		stop.tv_usec += 1000000;
-		stop.tv_sec  -= 1;
-	}
-	sec     = stop.tv_sec - start.tv_sec;
-	usec    = stop.tv_usec - start.tv_usec;
-	elapsed = sec * 1000000 + usec;
-	PRINTF(cb, "0B-write-lat iters 100000 usec %llu\n", elapsed);
-}
-#endif
-
-	rlat_test(cb);
+	if (cb->duplex)
+		krping_fr_test5(cb);
+	DEBUG_LOG(cb, "%s waiting for disconnect...\n", __func__);
+	wait_event_interruptible(cb->sem, cb->state == ERROR);
 }
 
-static void krping_wlat_test_client(struct krping_cb *cb)
+static void krping_fr_test5_client(struct krping_cb *cb)
 {
-	struct ib_send_wr *bad_wr;
+	struct ib_send_wr *bad;
 	struct ib_wc wc;
 	int ret;
 
 	cb->state = RDMA_READ_ADV;
 
-	/* Send STAG/TO/Len to client */
+	/* Send STAG/TO/Len to server */
 	krping_format_send(cb, cb->start_dma_addr);
 	if (cb->state == ERROR) {
 		PRINTF(cb, "krping_format_send failed\n");
 		return;
 	}
-	ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr);
+	ret = ib_post_send(cb->qp, &cb->sq_wr, &bad);
 	if (ret) {
 		PRINTF(cb, "post send error %d\n", ret);
 		return;
@@ -1873,15 +1940,619 @@ static void krping_wlat_test_client(stru
 	while (cb->state < RDMA_WRITE_ADV) {
 		krping_cq_event_handler(cb->cq, cb);
 	}
+	DEBUG_LOG(cb, "%s server STAG %x TO 0x%llx\n", __func__, cb->remote_rkey, cb->remote_addr);
 
-	wlat_test(cb);
+	return krping_fr_test5(cb);
 }
 
-static void krping_bw_test_client(struct krping_cb *cb)
+/*
+ * sq-depth worth of write + fastreg + inv, reposting them as the invs
+ * complete.
+ * NOTE: every 9 seconds we sleep for 1 second to keep the kernel happy.
+ * If a count is given, then the last IO will have a bogus lkey in the
+ * write work request.  This reproduces a fw bug where the connection
+ * will get stuck if a fastreg is processed while the ulptx is failing
+ * the bad write.
+ */
+static void krping_fr_test6(struct krping_cb *cb)
 {
-	struct ib_send_wr *bad_wr;
+	struct ib_fast_reg_page_list **pl;
+	struct ib_send_wr *fr, *write, *inv, *bad;
 	struct ib_wc wc;
-	int ret;
+	struct ib_sge *sgl;
+	u8 key = 0;
+	struct ib_mr **mr;
+	u8 **buf;
+	dma_addr_t *dma_addr;
+	int i;
+	int ret;
+	int plen = (((cb->size - 1) & PAGE_MASK) + PAGE_SIZE) >> PAGE_SHIFT;
+	unsigned long start;
+	int count = 0;
+	int scnt;
+	int depth = cb->txdepth  / 3;
+
+	if (!depth) {
+		PRINTF(cb, "txdepth must be > 3 for this test!\n");
+		return;
+	}
+
+	pl = kzalloc(sizeof *pl * depth, GFP_KERNEL);
+	DEBUG_LOG(cb, "%s pl %p size %lu\n", __func__, pl, sizeof *pl * depth);
+
+	mr = kzalloc(sizeof *mr * depth, GFP_KERNEL);
+	DEBUG_LOG(cb, "%s mr %p size %lu\n", __func__, mr, sizeof *mr * depth);
+
+	fr = kzalloc(sizeof *fr * depth, GFP_KERNEL);
+	DEBUG_LOG(cb, "%s fr %p size %lu\n", __func__, fr, sizeof *fr * depth);
+
+	sgl = kzalloc(sizeof *sgl * depth, GFP_KERNEL);
+	DEBUG_LOG(cb, "%s sgl %p size %lu\n", __func__, sgl, sizeof *sgl * depth);
+
+	write = kzalloc(sizeof *write * depth, GFP_KERNEL);
+	DEBUG_LOG(cb, "%s read %p size %lu\n", __func__, write, sizeof *write * depth);
+
+	inv = kzalloc(sizeof *inv * depth, GFP_KERNEL);
+	DEBUG_LOG(cb, "%s inv %p size %lu\n", __func__, inv, sizeof *inv * depth);
+
+	buf = kzalloc(sizeof *buf * depth, GFP_KERNEL);
+	DEBUG_LOG(cb, "%s buf %p size %lu\n", __func__, buf, sizeof *buf * depth);
+
+	dma_addr = kzalloc(sizeof *dma_addr * depth, GFP_KERNEL);
+	DEBUG_LOG(cb, "%s dma_addr %p size %lu\n", __func__, dma_addr, sizeof *dma_addr * depth);
+
+	if (!pl || !mr || !fr || !write || !sgl || !buf || !dma_addr) {
+		PRINTF(cb, "kzalloc failed\n");
+		goto err1;
+	}
+
+	for (scnt = 0; scnt < depth; scnt++) {
+		pl[scnt] = ib_alloc_fast_reg_page_list(cb->qp->device, plen);
+		if (IS_ERR(pl[scnt])) {
+			PRINTF(cb, "alloc_fr_page_list failed %ld\n",
+			       PTR_ERR(pl[scnt]));
+			goto err2;
+		}
+		DEBUG_LOG(cb, "%s pl[%u] %p\n", __func__, scnt, pl[scnt]);
+
+		mr[scnt] = ib_alloc_fast_reg_mr(cb->pd, plen);
+		if (IS_ERR(mr[scnt])) {
+			PRINTF(cb, "alloc_fr failed %ld\n",
+			       PTR_ERR(mr[scnt]));
+			goto err2;
+		}
+		DEBUG_LOG(cb, "%s mr[%u] %p\n", __func__, scnt, mr[scnt]);
+		ib_update_fast_reg_key(mr[scnt], ++key);
+
+		buf[scnt] = kmalloc(cb->size, GFP_KERNEL);
+		if (!buf[scnt]) {
+			PRINTF(cb, "kmalloc failed\n");
+			ret = -ENOMEM;
+			goto err2;
+		}
+		DEBUG_LOG(cb, "%s buf[%u] %p\n", __func__, scnt, buf[scnt]);
+		dma_addr[scnt] = dma_map_single(cb->pd->device->dma_device,
+						   buf[scnt], cb->size,
+						   DMA_BIDIRECTIONAL);
+		if (dma_mapping_error(cb->pd->device->dma_device,
+		    dma_addr[scnt])) {
+			PRINTF(cb, "dma_map failed\n");
+			ret = -ENOMEM;
+			goto err2;
+		}
+		DEBUG_LOG(cb, "%s dma_addr[%u] %p\n", __func__, scnt, (void *)dma_addr[scnt]);
+		for (i=0; i<plen; i++) {
+			pl[scnt]->page_list[i] = ((unsigned long)dma_addr[scnt] & PAGE_MASK) + (i * PAGE_SIZE);
+			DEBUG_LOG(cb, "%s pl[%u]->page_list[%u] 0x%llx\n",
+				  __func__, scnt, i,  pl[scnt]->page_list[i]);
+		}
+
+		write[scnt].opcode = IB_WR_RDMA_WRITE;
+		write[scnt].wr_id = scnt;
+		write[scnt].wr.rdma.rkey = cb->remote_rkey;
+		write[scnt].wr.rdma.remote_addr = cb->remote_addr;
+		write[scnt].num_sge = 1;
+		write[scnt].sg_list = &cb->rdma_sgl;
+		write[scnt].sg_list->length = cb->size;
+		write[scnt].next = &fr[scnt];
+
+		fr[scnt].opcode = IB_WR_FAST_REG_MR;
+		fr[scnt].wr_id = scnt;
+		fr[scnt].wr.fast_reg.page_shift = PAGE_SHIFT;
+		fr[scnt].wr.fast_reg.length = cb->size;
+		fr[scnt].wr.fast_reg.page_list = pl[scnt];
+		fr[scnt].wr.fast_reg.page_list_len = plen;
+		fr[scnt].wr.fast_reg.iova_start = (u64)buf[scnt];
+		fr[scnt].wr.fast_reg.access_flags = IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE;
+		fr[scnt].wr.fast_reg.rkey = mr[scnt]->rkey;
+		fr[scnt].next = &inv[scnt];
+
+		inv[scnt].opcode = IB_WR_LOCAL_INV;
+		inv[scnt].send_flags = IB_SEND_SIGNALED;
+		inv[scnt].ex.invalidate_rkey = mr[scnt]->rkey;
+
+		ret = ib_post_send(cb->qp, &write[scnt], &bad);
+		if (ret) {
+			PRINTF(cb, "ib_post_send failed %d\n", ret);
+			goto err2;
+		}
+	}
+
+	start = time_uptime;
+	DEBUG_LOG(cb, "%s starting IO.\n", __func__);
+	while (!cb->count || cb->server || count < cb->count) {
+		if ((time_uptime - start) >= 9) {
+			DEBUG_LOG(cb, "%s pausing 1 tick! count %u\n", __func__,
+				  count);
+			wait_event_interruptible_timeout(cb->sem,
+							 cb->state == ERROR,
+							 1);
+			if (cb->state == ERROR)
+				break;
+			start = time_uptime;
+		}
+		do {
+			ret = ib_poll_cq(cb->cq, 1, &wc);
+			if (ret < 0) {
+				PRINTF(cb, "ib_poll_cq failed %d\n",
+				       ret);
+				goto err2;
+			}
+			if (ret == 1) {
+				if (wc.status) {
+					PRINTF(cb,
+					       "completion error %u wr_id %lld "
+					       "opcode %d\n", wc.status,
+					       wc.wr_id, wc.opcode);
+					goto err2;
+				}
+				count++;
+				if (count == (cb->count -1))
+					cb->rdma_sgl.lkey = 0x00dead;
+				if (count == cb->count)
+					break;
+				ib_update_fast_reg_key(mr[wc.wr_id], ++key);
+				fr[wc.wr_id].wr.fast_reg.rkey =
+					mr[wc.wr_id]->rkey;
+				inv[wc.wr_id].ex.invalidate_rkey =
+					mr[wc.wr_id]->rkey;
+				ret = ib_post_send(cb->qp, &write[wc.wr_id], &bad);
+				if (ret) {
+					PRINTF(cb,
+					       "ib_post_send failed %d\n", ret);
+					goto err2;
+				}
+			} else if (krping_sigpending()){
+				PRINTF(cb, "signal!\n");
+				goto err2;
+			}
+		} while (ret == 1);
+	}
+	DEBUG_LOG(cb, "%s done!\n", __func__);
+err2:
+	DEBUG_LOG(cb, "sleeping 1 second\n");
+	wait_event_interruptible_timeout(cb->sem, cb->state == ERROR, HZ);
+	DEBUG_LOG(cb, "draining the cq...\n");
+	do {
+		ret = ib_poll_cq(cb->cq, 1, &wc);
+		if (ret < 0) {
+			PRINTF(cb, "ib_poll_cq failed %d\n", ret);
+			break;
+		}
+		if (ret == 1) {
+			if (wc.status) {
+				PRINTF(cb, "completion error %u "
+				       "opcode %u\n", wc.status, wc.opcode);
+			}
+		}
+	} while (ret == 1);
+
+	DEBUG_LOG(cb, "destroying fr mrs!\n");
+	for (scnt = 0; scnt < depth; scnt++) {
+		if (mr[scnt]) {
+			ib_dereg_mr(mr[scnt]);
+			DEBUG_LOG(cb, "%s dereg mr %p\n", __func__, mr[scnt]);
+		}
+	}
+	DEBUG_LOG(cb, "unmapping/freeing bufs!\n");
+	for (scnt = 0; scnt < depth; scnt++) {
+		if (buf[scnt]) {
+			dma_unmap_single(cb->pd->device->dma_device,
+					 dma_addr[scnt], cb->size,
+					 DMA_BIDIRECTIONAL);
+			kfree(buf[scnt]);
+			DEBUG_LOG(cb, "%s unmap/free buf %p dma_addr %p\n", __func__, buf[scnt], (void *)dma_addr[scnt]);
+		}
+	}
+	DEBUG_LOG(cb, "destroying fr page lists!\n");
+	for (scnt = 0; scnt < depth; scnt++) {
+		if (pl[scnt]) {
+			DEBUG_LOG(cb, "%s free pl %p\n", __func__, pl[scnt]);
+			ib_free_fast_reg_page_list(pl[scnt]);
+		}
+	}
+err1:
+	if (pl)
+		kfree(pl);
+	if (mr)
+		kfree(mr);
+	if (fr)
+		kfree(fr);
+	if (write)
+		kfree(write);
+	if (inv)
+		kfree(inv);
+	if (sgl)
+		kfree(sgl);
+	if (buf)
+		kfree(buf);
+	if (dma_addr)
+		kfree(dma_addr);
+}
+
+static void krping_fr_test6_server(struct krping_cb *cb)
+{
+	struct ib_send_wr *bad_wr;
+	struct ib_wc wc;
+	int ret;
+
+	/* Spin waiting for client's Start STAG/TO/Len */
+	while (cb->state < RDMA_READ_ADV) {
+		krping_cq_event_handler(cb->cq, cb);
+	}
+	DEBUG_LOG(cb, "%s client STAG %x TO 0x%llx\n", __func__,
+		  cb->remote_rkey, cb->remote_addr);
+
+	/* Send STAG/TO/Len to client */
+	krping_format_send(cb, cb->start_dma_addr);
+	ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr);
+	if (ret) {
+		PRINTF(cb, "post send error %d\n", ret);
+		return;
+	}
+
+	/* Spin waiting for send completion */
+	while ((ret = ib_poll_cq(cb->cq, 1, &wc) == 0));
+	if (ret < 0) {
+		PRINTF(cb, "poll error %d\n", ret);
+		return;
+	}
+	if (wc.status) {
+		PRINTF(cb, "send completiong error %d\n", wc.status);
+		return;
+	}
+
+	if (cb->duplex)
+		krping_fr_test6(cb);
+	DEBUG_LOG(cb, "%s waiting for disconnect...\n", __func__);
+	wait_event_interruptible(cb->sem, cb->state == ERROR);
+}
+
+static void krping_fr_test6_client(struct krping_cb *cb)
+{
+	struct ib_send_wr *bad;
+	struct ib_wc wc;
+	int ret;
+
+	cb->state = RDMA_READ_ADV;
+
+	/* Send STAG/TO/Len to server */
+	krping_format_send(cb, cb->start_dma_addr);
+	if (cb->state == ERROR) {
+		PRINTF(cb, "krping_format_send failed\n");
+		return;
+	}
+	ret = ib_post_send(cb->qp, &cb->sq_wr, &bad);
+	if (ret) {
+		PRINTF(cb, "post send error %d\n", ret);
+		return;
+	}
+
+	/* Spin waiting for send completion */
+	while ((ret = ib_poll_cq(cb->cq, 1, &wc) == 0));
+	if (ret < 0) {
+		PRINTF(cb, "poll error %d\n", ret);
+		return;
+	}
+	if (wc.status) {
+		PRINTF(cb, "send completion error %d\n", wc.status);
+		return;
+	}
+
+	/* Spin waiting for server's Start STAG/TO/Len */
+	while (cb->state < RDMA_WRITE_ADV) {
+		krping_cq_event_handler(cb->cq, cb);
+	}
+	DEBUG_LOG(cb, "%s server STAG %x TO 0x%llx\n", __func__, cb->remote_rkey, cb->remote_addr);
+
+	return krping_fr_test6(cb);
+}
+
+static void krping_run_server(struct krping_cb *cb)
+{
+	struct ib_recv_wr *bad_wr;
+	int ret;
+
+	ret = krping_bind_server(cb);
+	if (ret)
+		return;
+
+	ret = krping_setup_qp(cb, cb->child_cm_id);
+	if (ret) {
+		PRINTF(cb, "setup_qp failed: %d\n", ret);
+		goto err0;
+	}
+

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***


More information about the svn-src-head mailing list