PERFORCE change 133966 for review
Kip Macy
kmacy at FreeBSD.org
Wed Jan 23 18:05:56 PST 2008
http://perforce.freebsd.org/chv.cgi?CH=133966
Change 133966 by kmacy at kmacy:storage:toehead on 2008/01/24 02:04:55
first pass integration of ddp support code
Affected files ...
.. //depot/projects/toehead/sys/dev/cxgb/ulp/tom/cxgb_cpl_io.c#4 edit
.. //depot/projects/toehead/sys/dev/cxgb/ulp/tom/cxgb_cpl_socket.c#3 edit
.. //depot/projects/toehead/sys/dev/cxgb/ulp/tom/cxgb_t3_ddp.h#3 edit
Differences ...
==== //depot/projects/toehead/sys/dev/cxgb/ulp/tom/cxgb_cpl_io.c#4 (text+ko) ====
@@ -867,14 +867,6 @@
return (idx);
}
-void
-t3_release_ddp_resources(struct toepcb *toep)
-{
- /*
- * This is a no-op until we have DDP support
- */
-}
-
static inline void
free_atid(struct t3cdev *cdev, unsigned int tid)
{
==== //depot/projects/toehead/sys/dev/cxgb/ulp/tom/cxgb_cpl_socket.c#3 (text+ko) ====
@@ -679,7 +679,7 @@
p->dgl_length = len;
p->dgl_offset = pg_off;
p->dgl_nelem = npages;
-#if 0
+#ifdef notyet
p->phys_addr[0] = pci_map_page(pdev, p->pages[0], pg_off,
PAGE_SIZE - pg_off,
PCI_DMA_FROMDEVICE) - pg_off;
@@ -708,6 +708,47 @@
return (pages + PPOD_PAGES - 1) / PPOD_PAGES + NUM_SENTINEL_PPODS;
}
+
+static void
+unmap_ddp_gl(const struct ddp_gather_list *gl)
+{
+#ifdef notyet
+ int i;
+
+ if (!gl->nelem)
+ return;
+
+ pci_unmap_page(pdev, gl->phys_addr[0] + gl->offset,
+ PAGE_SIZE - gl->offset, PCI_DMA_FROMDEVICE);
+ for (i = 1; i < gl->nelem; ++i)
+ pci_unmap_page(pdev, gl->phys_addr[i], PAGE_SIZE,
+ PCI_DMA_FROMDEVICE);
+
+#endif
+}
+
+static void
+ddp_gl_free_pages(struct ddp_gather_list *gl, int dirty)
+{
+#ifdef notyet
+ int i;
+
+ for (i = 0; i < gl->nelem; ++i) {
+ if (dirty)
+ set_page_dirty_lock(gl->pages[i]);
+ put_page(gl->pages[i]);
+ }
+#endif
+}
+
+void
+t3_free_ddp_gl(struct ddp_gather_list *gl)
+{
+ unmap_ddp_gl(gl);
+ ddp_gl_free_pages(gl, 0);
+ free(gl, M_DEVBUF);
+}
+
/* Max # of page pods for a buffer, enough for 1MB buffer at 4KB page size */
#define MAX_PPODS 64U
@@ -746,6 +787,46 @@
return 0;
}
+
+
+/*
+ * Reposts the kernel DDP buffer after it has been previously become full and
+ * invalidated. We just need to reset the offset and adjust the DDP flags.
+ * Conveniently, we can set the flags and the offset with a single message.
+ * Note that this function does not set the buffer length. Again conveniently
+ * our kernel buffer is of fixed size. If the length needs to be changed it
+ * needs to be done separately.
+ */
+static void
+t3_repost_kbuf(struct socket *so, unsigned int bufidx, int modulate,
+ int activate)
+{
+ struct toepcb *toep = sototcpcb(so)->t_toe;
+ struct ddp_state *p = &toep->tp_ddp_state;
+
+ p->buf_state[bufidx].cur_offset = p->kbuf[bufidx]->dgl_offset;
+ p->buf_state[bufidx].flags = p->kbuf_noinval ? DDP_BF_NOINVAL : 0;
+ p->buf_state[bufidx].gl = p->kbuf[bufidx];
+ p->cur_buf = bufidx;
+ p->kbuf_idx = bufidx;
+ if (!bufidx)
+ t3_setup_ddpbufs(so, 0, 0, 0, 0,
+ V_TF_DDP_PSH_NO_INVALIDATE(p->kbuf_noinval) |
+ V_TF_DDP_BUF0_VALID(1),
+ V_TF_DDP_PSH_NO_INVALIDATE(1) | V_TF_DDP_OFF(1) |
+ V_TF_DDP_BUF0_VALID(1) |
+ V_TF_DDP_ACTIVE_BUF(activate), modulate);
+ else
+ t3_setup_ddpbufs(so, 0, 0, 0, 0,
+ V_TF_DDP_PSH_NO_INVALIDATE(p->kbuf_noinval) |
+ V_TF_DDP_BUF1_VALID(1) |
+ V_TF_DDP_ACTIVE_BUF(activate),
+ V_TF_DDP_PSH_NO_INVALIDATE(1) | V_TF_DDP_OFF(1) |
+ V_TF_DDP_BUF1_VALID(1) | V_TF_DDP_ACTIVE_BUF(1),
+ modulate);
+
+}
+
/*
* Starting offset for the user DDP buffer. A non-0 value ensures a DDP flush
* won't block indefinitely if there's nothing to place (which should be rare).
@@ -820,7 +901,7 @@
return err;
if (gl) {
if (p->ubuf)
- t3_free_ddp_gl(p->pdev, p->ubuf);
+ t3_free_ddp_gl(p->ubuf);
p->ubuf = gl;
t3_setup_ppods(so, gl, pages2ppods(gl->dgl_nelem), p->ubuf_tag, len,
gl->dgl_offset, 0);
@@ -847,7 +928,7 @@
struct toepcb *toep = sototcpcb(so)->t_toe;
struct ddp_state *p = &toep->tp_ddp_state;
- if (!p || !p->pdev)
+ if (!p)
return -1;
len = setup_iovec_ppods(so, iov, 0);
@@ -888,10 +969,251 @@
OVERLAY_MASK | flags, 1);
}
#ifdef T3_TRACE
- T3_TRACE5(TIDTB(sk),
+ T3_TRACE5(TIDTB(so),
"t3_overlay_ubuf: tag %u flags 0x%x mask 0x%x ubuf_idx %d "
" kbuf_idx %d",
p->ubuf_tag, flags, OVERLAY_MASK, ubuf_idx, p->kbuf_idx);
#endif
return 0;
}
+
+
+
+/*
+ * Returns whether a connection should enable DDP. This happens when all of
+ * the following conditions are met:
+ * - the connection's ULP mode is DDP
+ * - DDP is not already enabled
+ * - the last receive was above the DDP threshold
+ * - receive buffers are in user space
+ * - receive side isn't shutdown (handled by caller)
+ * - the connection's receive window is big enough so that sizable buffers
+ * can be posted without closing the window in the middle of DDP (checked
+ * when the connection is offloaded)
+ */
+#ifdef notyet
+static int
+so_should_ddp(const struct toepcb *toep, int last_recv_len)
+{
+ return toep->tp_ulp_mode == ULP_MODE_TCPDDP && !toep->tp_dpp_state.cur_buf &&
+ last_recv_len > TOM_TUNABLE(toep->tp_toedev, ddp_thres) &&
+ toep->tp_tp->rcv_wnd >
+ (TOM_TUNABLE(toep->tp_toedev, ddp_copy_limit) +
+ DDP_RSVD_WIN);
+}
+#endif
+/*
+ * Clean up DDP state that needs to survive until socket close time, such as the
+ * DDP buffers. The buffers are already unmapped at this point as unmapping
+ * needs the PCI device and a socket may close long after the device is removed.
+ */
+void
+t3_cleanup_ddp(struct socket *so)
+{
+ struct toepcb *toep = sototcpcb(so)->t_toe;
+ struct ddp_state *p = &toep->tp_ddp_state;
+ int idx;
+
+ if (!p)
+ return;
+
+ for (idx = 0; idx < NUM_DDP_KBUF; idx++)
+ if (p->kbuf[idx]) {
+ ddp_gl_free_pages(p->kbuf[idx], 0);
+ free(p->kbuf[idx], M_DEVBUF);
+ }
+
+ if (p->ubuf) {
+ ddp_gl_free_pages(p->ubuf, 0);
+ free(p->ubuf, M_DEVBUF);
+ }
+ toep->tp_ulp_mode = 0;
+}
+
+/*
+ * This is a companion to t3_cleanup_ddp() and releases the HW resources
+ * associated with a connection's DDP state, such as the page pods.
+ * It's called when HW is done with a connection. The rest of the state
+ * remains available until both HW and the app are done with the connection.
+ */
+void
+t3_release_ddp_resources(struct toepcb *toep)
+{
+ struct ddp_state *p = &toep->tp_ddp_state;
+ struct tom_data *d = TOM_DATA(toep->tp_toedev);
+ int idx;
+
+ for (idx = 0; idx < NUM_DDP_KBUF; idx++) {
+ t3_free_ppods(d, p->kbuf_tag[idx],
+ p->kbuf_nppods[idx]);
+ unmap_ddp_gl(p->kbuf[idx]);
+ }
+
+ if (p->ubuf_nppods) {
+ t3_free_ppods(d, p->ubuf_tag, p->ubuf_nppods);
+ p->ubuf_nppods = 0;
+ }
+ if (p->ubuf)
+ unmap_ddp_gl(p->ubuf);
+
+}
+
+void
+t3_post_kbuf(struct socket *so, int modulate)
+{
+ struct toepcb *toep = sototcpcb(so)->t_toe;
+ struct ddp_state *p = &toep->tp_ddp_state;
+
+ t3_set_ddp_tag(so, p->cur_buf, p->kbuf_tag[p->cur_buf] << 6);
+ t3_set_ddp_buf(so, p->cur_buf, 0, p->kbuf[p->cur_buf]->dgl_length);
+ t3_repost_kbuf(so, p->cur_buf, modulate, 1);
+
+#ifdef T3_TRACE
+ T3_TRACE1(TIDTB(so),
+ "t3_post_kbuf: cur_buf = kbuf_idx = %u ", p->cur_buf);
+#endif
+}
+
+/*
+ * Prepare a socket for DDP. Must be called when the socket is known to be
+ * open.
+ */
+int
+t3_enter_ddp(struct socket *so, unsigned int kbuf_size, unsigned int waitall)
+{
+ int err = ENOMEM;
+ unsigned int nppods, kbuf_pages, idx = 0;
+ struct toepcb *toep = sototcpcb(so)->t_toe;
+ struct ddp_state *p = &toep->tp_ddp_state;
+ struct tom_data *d = TOM_DATA(toep->tp_toedev);
+
+ if (kbuf_size > M_TCB_RX_DDP_BUF0_LEN)
+ return (EINVAL);
+
+ kbuf_pages = (kbuf_size + PAGE_SIZE - 1) >> PAGE_SHIFT;
+ nppods = pages2ppods(kbuf_pages);
+
+ p->kbuf_noinval = !!waitall;
+
+ p->kbuf_tag[NUM_DDP_KBUF - 1] = -1;
+ for (idx = 0; idx < NUM_DDP_KBUF; idx++) {
+ p->kbuf[idx] =
+ malloc(sizeof (struct ddp_gather_list) + kbuf_pages *
+ sizeof(vm_page_t *), M_DEVBUF, M_NOWAIT|M_ZERO);
+ if (!p->kbuf[idx])
+ goto err;
+
+ p->kbuf_tag[idx] = t3_alloc_ppods(d, nppods);
+ if (p->kbuf_tag[idx] < 0)
+ goto err;
+
+ p->kbuf_nppods[idx] = nppods;
+ p->kbuf[idx]->dgl_length = kbuf_size;
+ p->kbuf[idx]->dgl_offset = 0;
+ p->kbuf[idx]->dgl_nelem = kbuf_pages;
+#ifdef notyet
+ p->kbuf[idx]->pages =
+ (struct page **)&p->kbuf[idx]->phys_addr[kbuf_pages];
+
+
+ for (i = 0; i < kbuf_pages; ++i) {
+
+ p->kbuf[idx]->pages[i] = alloc_page(sk->sk_allocation);
+ if (!p->kbuf[idx]->pages[i]) {
+ p->kbuf[idx]->nelem = i;
+ goto err;
+ }
+
+ }
+
+ for (i = 0; i < kbuf_pages; ++i)
+ p->kbuf[idx]->phys_addr[i] =
+ pci_map_page(p->pdev, p->kbuf[idx]->pages[i],
+ 0, PAGE_SIZE, PCI_DMA_FROMDEVICE);
+#endif
+ t3_setup_ppods(so, p->kbuf[idx], nppods, p->kbuf_tag[idx],
+ p->kbuf[idx]->dgl_length, 0, 0);
+ }
+ t3_set_ddp_tag(so, 0, p->kbuf_tag[0] << 6);
+ t3_set_ddp_buf(so, 0, 0, p->kbuf[0]->dgl_length);
+ t3_repost_kbuf(so, 0, 0, 1);
+ t3_set_rcv_coalesce_enable(so,
+ TOM_TUNABLE(TOE_DEV(so), ddp_rcvcoalesce));
+
+#ifdef T3_TRACE
+ T3_TRACE4(TIDTB(so),
+ "t3_enter_ddp: kbuf_size %u waitall %u tag0 %d tag1 %d",
+ kbuf_size, waitall, p->kbuf_tag[0], p->kbuf_tag[1]);
+#endif
+
+ return 0;
+
+err:
+ t3_release_ddp_resources(toep);
+ t3_cleanup_ddp(so);
+ return err;
+}
+
+int
+t3_ddp_copy(const struct mbuf *m, int offset, struct iovec *to, int len)
+{
+#ifdef notyet
+ int err, page_no, page_off;
+ struct ddp_gather_list *gl = (struct ddp_gather_list *)skb->mac.raw;
+
+ if (!gl->pages) {
+ dump_stack();
+ BUG_ON(1);
+ }
+
+ offset += gl->offset + TCP_SKB_CB(skb)->when;
+ page_no = offset >> PAGE_SHIFT;
+ page_off = offset & ~PAGE_MASK;
+
+ while (len) {
+ int copy = min_t(int, len, PAGE_SIZE - page_off);
+
+ err = memcpy_toiovec(to, page_address(gl->pages[page_no]) +
+ page_off, copy);
+ if (err)
+ return -EFAULT;
+ page_no++;
+ page_off = 0;
+ len -= copy;
+ }
+#endif
+ return 0;
+}
+
+/*
+ * Allocate n page pods. Returns -1 on failure or the page pod tag.
+ */
+int t3_alloc_ppods(struct tom_data *td, unsigned int n)
+{
+ unsigned int i, j;
+
+ if (__predict_false(!td->ppod_map))
+ return -1;
+
+ mtx_lock(&td->ppod_map_lock);
+ for (i = 0; i < td->nppods; ) {
+ for (j = 0; j < n; ++j) /* scan ppod_map[i..i+n-1] */
+ if (td->ppod_map[i + j]) {
+ i = i + j + 1;
+ goto next;
+ }
+
+ memset(&td->ppod_map[i], 1, n); /* allocate range */
+ mtx_unlock(&td->ppod_map_lock);
+ return i;
+next: ;
+ }
+ mtx_unlock(&td->ppod_map_lock);
+ return (0);
+}
+
+void t3_free_ppods(struct tom_data *td, unsigned int tag, unsigned int n)
+{
+ /* No need to take ppod_lock here */
+ memset(&td->ppod_map[tag], 0, n);
+}
==== //depot/projects/toehead/sys/dev/cxgb/ulp/tom/cxgb_t3_ddp.h#3 (text+ko) ====
@@ -85,7 +85,6 @@
#define M_PPOD_PGSZ 0x3
#define V_PPOD_PGSZ(x) ((x) << S_PPOD_PGSZ)
-struct pci_dev;
#include <vm/vm.h>
#include <vm/vm_page.h>
#include <machine/bus.h>
@@ -105,7 +104,6 @@
};
struct ddp_state {
- struct pci_dev *pdev;
struct ddp_buf_state buf_state[2]; /* per buffer state */
int cur_buf;
unsigned short kbuf_noinval;
@@ -132,33 +130,30 @@
PSH flag set */
};
-#ifdef notyet
+#include <dev/cxgb/ulp/tom/cxgb_toepcb.h>
+
/*
* Returns 1 if a UBUF DMA buffer might be active.
*/
-static inline int t3_ddp_ubuf_pending(struct sock *so)
+static inline int t3_ddp_ubuf_pending(struct socket *so)
{
- struct tcp_sock *tp = tcp_sk(sk);
- struct ddp_state *p = DDP_STATE(tp);
+ struct toepcb *toep = sototcpcb(so)->t_toe;
+ struct ddp_state *p = &toep->tp_ddp_state;
/* When the TOM_TUNABLE(ddp) is enabled, we're always in ULP_MODE DDP,
* but DDP_STATE() is only valid if the connection actually enabled
* DDP.
*/
- if (!p)
- return 0;
-
return (p->buf_state[0].flags & (DDP_BF_NOFLIP | DDP_BF_NOCOPY)) ||
(p->buf_state[1].flags & (DDP_BF_NOFLIP | DDP_BF_NOCOPY));
}
-#endif
int t3_setup_ppods(struct socket *so, const struct ddp_gather_list *gl,
unsigned int nppods, unsigned int tag, unsigned int maxoff,
unsigned int pg_off, unsigned int color);
int t3_alloc_ppods(struct tom_data *td, unsigned int n);
void t3_free_ppods(struct tom_data *td, unsigned int tag, unsigned int n);
-void t3_free_ddp_gl(struct pci_dev *pdev, struct ddp_gather_list *gl);
+void t3_free_ddp_gl(struct ddp_gather_list *gl);
int t3_ddp_copy(const struct mbuf *skb, int offset, struct iovec *to,
int len);
//void t3_repost_kbuf(struct socket *so, int modulate, int activate);
More information about the p4-projects
mailing list