PERFORCE change 134065 for review
Kip Macy
kmacy at FreeBSD.org
Thu Jan 24 23:08:25 PST 2008
http://perforce.freebsd.org/chv.cgi?CH=134065
Change 134065 by kmacy at kmacy:storage:toehead on 2008/01/25 07:07:45
split ddp support and vm functions in to separate files
Affected files ...
.. //depot/projects/toehead/sys/dev/cxgb/sys/mbufq.h#3 edit
.. //depot/projects/toehead/sys/dev/cxgb/ulp/tom/cxgb_cpl_io.c#7 edit
.. //depot/projects/toehead/sys/dev/cxgb/ulp/tom/cxgb_cpl_socket.c#6 edit
.. //depot/projects/toehead/sys/dev/cxgb/ulp/tom/cxgb_t3_ddp.h#6 edit
.. //depot/projects/toehead/sys/dev/cxgb/ulp/tom/cxgb_toepcb.h#3 edit
.. //depot/projects/toehead/sys/modules/cxgb/tom/Makefile#2 edit
Differences ...
==== //depot/projects/toehead/sys/dev/cxgb/sys/mbufq.h#3 (text+ko) ====
@@ -103,7 +103,7 @@
}
static __inline struct mbuf *
-mbufq_peek(struct mbuf_head *l)
+mbufq_peek(const struct mbuf_head *l)
{
return (l->head);
}
==== //depot/projects/toehead/sys/dev/cxgb/ulp/tom/cxgb_cpl_io.c#7 (text+ko) ====
@@ -490,12 +490,10 @@
m = m_gethdr_nofail(sizeof(*req));
-#ifdef notyet
- req = (struct cpl_rx_data_ack *)__skb_put(skb, sizeof(*req));
+ req = mtod(m, struct cpl_rx_data_ack *);
req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
-#else
- req = mtod(m, struct cpl_rx_data_ack *);
-#endif
+ m->m_pkthdr.len = m->m_len = sizeof(*req);
+
OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_RX_DATA_ACK, toep->tp_tid));
req->credit_dack = htonl(F_RX_MODULATE | F_RX_DACK_CHANGE |
V_RX_DACK_MODE(1) |
@@ -1163,6 +1161,20 @@
return V_FLAVORS_VALID(flv_valid) |
V_CONG_CONTROL_FLAVOR(flv_valid ? TOM_TUNABLE(dev, cong_alg) : 0);
}
+
+#if DEBUG_WR > 1
+static int
+count_pending_wrs(const struct toepcb *toep)
+{
+ const struct mbuf *m;
+ int n = 0;
+
+ wr_queue_walk(toep, m)
+ n += m->m_pkthdr.csum_data;
+ return (n);
+}
+#endif
+
#if 0
(((*(struct tom_data **)&(dev)->l4opt)->conf.cong_alg) != -1)
#endif
@@ -2132,7 +2144,7 @@
unsigned int rcv_nxt = ntohl(req->rcv_nxt) - 1; /* exclude FIN */
if (tp->rcv_nxt == rcv_nxt) /* no data */
- return 0;
+ return (0);
if (__predict_false(so_no_receive(so))) {
handle_excess_rx(toep, m);
@@ -2189,7 +2201,6 @@
goto out;
}
-
if (toep->tp_ulp_mode == ULP_MODE_TCPDDP) {
keep = handle_peer_close_data(so, m);
if (keep < 0)
@@ -3533,7 +3544,6 @@
if (__predict_false(credits < p->m_pkthdr.csum_data)) {
#if DEBUG_WR > 1
struct tx_data_wr *w = cplhdr(p);
-#ifdef notyet
log(LOG_ERR,
"TID %u got %u WR credits, need %u, len %u, "
"main body %u, frags %u, seq # %u, ACK una %u,"
@@ -3541,8 +3551,7 @@
toep->tp_tid, credits, p->csum, p->len,
p->len - p->data_len, skb_shinfo(p)->nr_frags,
ntohl(w->sndseq), snd_una, ntohl(hdr->snd_nxt),
- WR_AVAIL(tp), count_pending_wrs(tp) - credits);
-#endif
+ toep->tp_wr_avail, count_pending_wrs(tp) - credits);
#endif
p->m_pkthdr.csum_data -= credits;
break;
@@ -3880,11 +3889,9 @@
wrlen = sizeof(*wr) + 3 * sizeof(*req) + sizeof(*getreq);
m = m_gethdr_nofail(wrlen);
m_set_priority(m, mkprio(CPL_PRIORITY_CONTROL, toep));
-#ifdef notyet
- wr = (struct work_request_hdr *)__skb_put(skb, wrlen);
-#else
wr = mtod(m, struct work_request_hdr *);
-#endif
+ m->m_pkthdr.len = m->m_len = wrlen;
+
/* Set the ATOMIC flag to make sure that TP processes the following
* CPLs in an atomic manner and no wire segments can be interleaved.
*/
@@ -3955,12 +3962,10 @@
(modulate ? sizeof(struct cpl_rx_data_ack) : 0);
m = m_gethdr_nofail(wrlen);
m_set_priority(m, mkprio(CPL_PRIORITY_CONTROL, toep));
-#ifdef notyet
- wr = (struct work_request_hdr *)__skb_put(skb, wrlen);
+ wr = mtod(m, struct work_request_hdr *);
wr->wr_hi = htonl(V_WR_OP(FW_WROPCODE_BYPASS));
-#else
- wr = mtod(m, struct work_request_hdr *);
-#endif
+ m->m_pkthdr.len = m->m_len = wrlen;
+
req = (struct cpl_set_tcb_field *)(wr + 1);
if (len0) { /* program buffer 0 offset and length */
mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_BUF0_OFFSET,
==== //depot/projects/toehead/sys/dev/cxgb/ulp/tom/cxgb_cpl_socket.c#6 (text+ko) ====
@@ -73,6 +73,7 @@
#include <dev/cxgb/common/cxgb_ctl_defs.h>
#include <dev/cxgb/cxgb_l2t.h>
#include <dev/cxgb/cxgb_offload.h>
+
#include <vm/vm.h>
#include <vm/vm_page.h>
#include <vm/vm_map.h>
@@ -86,6 +87,7 @@
#include <dev/cxgb/ulp/tom/cxgb_t3_ddp.h>
#include <dev/cxgb/ulp/tom/cxgb_toepcb.h>
#include <dev/cxgb/ulp/tom/cxgb_tcp.h>
+#include <dev/cxgb/ulp/tom/cxgb_vm.h>
static int (*pru_sosend)(struct socket *so, struct sockaddr *addr,
struct uio *uio, struct mbuf *top, struct mbuf *control,
@@ -95,9 +97,6 @@
struct uio *uio, struct mbuf **mp0, struct mbuf **controlp,
int *flagsp);
-#define VM_HOLD_WRITEABLE 0x1
-static int vm_fault_hold_user_pages(vm_offset_t addr, vm_page_t *mp, int count, int flags);
-static void vm_fault_unhold_pages(vm_page_t *m, int count);
#define TMP_IOV_MAX 16
#ifndef PG_FRAME
#define PG_FRAME ~PAGE_MASK
@@ -240,6 +239,29 @@
return (0);
}
+/*
+ * Returns whether a connection should enable DDP. This happens when all of
+ * the following conditions are met:
+ * - the connection's ULP mode is DDP
+ * - DDP is not already enabled
+ * - the last receive was above the DDP threshold
+ * - receive buffers are in user space
+ * - receive side isn't shutdown (handled by caller)
+ * - the connection's receive window is big enough so that sizable buffers
+ * can be posted without closing the window in the middle of DDP (checked
+ * when the connection is offloaded)
+ */
+#ifdef notyet
+static int
+so_should_ddp(const struct toepcb *toep, int last_recv_len)
+{
+ return toep->tp_ulp_mode == ULP_MODE_TCPDDP && (toep->tp_ddp_state.ubuf == NULL) &&
+ last_recv_len > TOM_TUNABLE(toep->tp_toedev, ddp_thres) &&
+ toep->tp_tp->rcv_wnd >
+ (TOM_TUNABLE(toep->tp_toedev, ddp_copy_limit) + DDP_RSVD_WIN);
+}
+#endif
+
static void
cxgb_wait_dma_completion(struct toepcb *toep)
{
@@ -501,749 +523,3 @@
so->so_proto->pr_usrreqs->pru_sosend = cxgb_sosend;
so->so_proto->pr_usrreqs->pru_soreceive = cxgb_soreceive;
}
-
-/*
- * This routine takes a user address range and does the following:
- * - validate that the user has access to those pages (flags indicates read or write) - if not fail
- * - validate that count is enough to hold range number of pages - if not fail
- * - fault in any non-resident pages
- * - if the user is doing a read force a write fault for any COWed pages
- * - if the user is doing a read mark all pages as dirty
- * - hold all pages
- * - return number of pages in count
- */
-static int
-vm_fault_hold_user_pages(vm_offset_t addr, vm_page_t *mp, int count, int flags)
-{
-
- vm_offset_t start, va;
- vm_paddr_t pa;
- int pageslen, faults, rv;
-
- struct thread *td;
- vm_map_t map;
- pmap_t pmap;
- vm_page_t m, *pages;
- vm_prot_t prot;
-
- /*
- * Check that virtual address range is legal
- * This check is somewhat bogus as on some architectures kernel
- * and user do not share VA - however, it appears that all FreeBSD
- * architectures define it
- */
- pageslen = count * PAGE_SIZE;
- if (addr + pageslen > VM_MAXUSER_ADDRESS)
- return (EFAULT);
-
- td = curthread;
- map = &td->td_proc->p_vmspace->vm_map;
- pmap = &td->td_proc->p_vmspace->vm_pmap;
- pages = mp;
-
- prot = (flags & VM_HOLD_WRITEABLE) ? VM_PROT_WRITE : VM_PROT_READ;
- bzero(pages, sizeof(vm_page_t *) * count);
-retry:
-
- /*
- * First optimistically assume that all pages are resident (and R/W if for write)
- * if so just mark pages as held (and dirty if for write) and return
- */
- vm_page_lock_queues();
- for (start = addr, pages = mp, faults = 0, va = addr; va < start + pageslen; va += PAGE_SIZE, pages++) {
- /*
- * Assure that we only hold the page once
- */
- if (*pages == NULL) {
- /*
- * page queue mutex is recursable so this is OK
- * it would be really nice if we had an unlocked version of this so
- * we were only acquiring the pmap lock 1 time as opposed to potentially
- * many dozens of times
- */
- m = pmap_extract_and_hold(pmap, va, prot);
- if (m == NULL) {
- faults++;
- continue;
- }
-
- *pages = m;
- if (flags & VM_HOLD_WRITEABLE)
- vm_page_dirty(m);
- }
- }
- vm_page_unlock_queues();
-
- if (faults == 0)
- return (0);
- /*
- * Pages either have insufficient permissions or are not present
- * trigger a fault where neccessary
- *
- */
- for (va = start; va < pageslen; va += PAGE_SIZE) {
- m = NULL;
- pa = pmap_extract(pmap, va);
- rv = 0;
- if (pa)
- m = PHYS_TO_VM_PAGE(pa);
- if (flags & VM_HOLD_WRITEABLE) {
- if (m == NULL || (m->flags & PG_WRITEABLE) == 0)
- rv = vm_fault(map, va, VM_PROT_WRITE, VM_FAULT_DIRTY);
- } else if (m == NULL)
- rv = vm_fault(map, va, VM_PROT_READ, VM_FAULT_NORMAL);
- if (rv)
- goto error;
- }
- goto retry;
-
-error:
- vm_page_lock_queues();
- for (pages = mp,
- va = start; va < start + pageslen;
- va += PAGE_SIZE,
- pages++)
- if (*pages)
- vm_page_unhold(*pages);
- vm_page_unlock_queues();
- return (EFAULT);
-}
-
-static void
-vm_fault_unhold_pages(vm_page_t *mp, int count)
-{
-
- KASSERT(count >= 0, ("negative count %d", count));
- vm_page_lock_queues();
- while (count--) {
- vm_page_unhold(*mp);
- mp++;
- }
- vm_page_unlock_queues();
-}
-
-/**
- * t3_pin_pages - pin a user memory range and prepare it for DDP
- * @addr - the starting address
- * @len - the length of the range
- * @newgl - contains the pages and physical addresses of the pinned range
- * @gl - an existing gather list, may be %NULL
- *
- * Pins the pages in the user-space memory range [addr, addr + len) and
- * maps them for DMA. Returns a gather list with the pinned pages and
- * their physical addresses. If @gl is non NULL the pages it describes
- * are compared against the pages for [addr, addr + len), and if the
- * existing gather list already covers the range a new list is not
- * allocated. Returns 0 on success, or a negative errno. On success if
- * a new gather list was allocated it is returned in @newgl.
- */
-static int
-t3_pin_pages(bus_dma_tag_t tag, bus_dmamap_t map, unsigned long addr,
- size_t len, struct ddp_gather_list **newgl,
- const struct ddp_gather_list *gl)
-{
- int i, err;
- size_t pg_off;
- unsigned int npages;
- struct ddp_gather_list *p;
-
- if (addr >= VM_MAXUSER_ADDRESS)
- return (EINVAL);
-#if 0
- if (!access_ok(VERIFY_WRITE, addr, len))
- return (EFAULT);
-#endif
- pg_off = addr & ~PAGE_MASK;
- npages = (pg_off + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
- p = malloc(sizeof(struct ddp_gather_list) + npages * sizeof(vm_page_t *),
- M_DEVBUF, M_NOWAIT);
- if (!p)
- return (ENOMEM);
-
-
- err = vm_fault_hold_user_pages(addr, p->dgl_pages, npages, VM_HOLD_WRITEABLE);
-
- if (err)
- goto free_gl;
-
- if (gl && gl->dgl_offset == pg_off && gl->dgl_nelem >= npages &&
- gl->dgl_length >= len) {
- for (i = 0; i < npages; ++i)
- if (p->dgl_pages[i] != gl->dgl_pages[i])
- goto different_gl;
- err = 0;
- goto unpin;
- }
-
-different_gl:
- p->dgl_length = len;
- p->dgl_offset = pg_off;
- p->dgl_nelem = npages;
-#ifdef notyet
- p->phys_addr[0] = pci_map_page(pdev, p->pages[0], pg_off,
- PAGE_SIZE - pg_off,
- PCI_DMA_FROMDEVICE) - pg_off;
- for (i = 1; i < npages; ++i)
- p->phys_addr[i] = pci_map_page(pdev, p->pages[i], 0, PAGE_SIZE,
- PCI_DMA_FROMDEVICE);
-#endif
-
- *newgl = p;
- return 0;
-unpin:
- vm_fault_unhold_pages(p->dgl_pages, npages);
-
-free_gl:
- free(p, M_DEVBUF);
- *newgl = NULL;
- return err;
-}
-
-/*
- * Return the # of page pods needed to accommodate a # of pages.
- */
-static inline unsigned int
-pages2ppods(unsigned int pages)
-{
- return (pages + PPOD_PAGES - 1) / PPOD_PAGES + NUM_SENTINEL_PPODS;
-}
-
-
-static void
-unmap_ddp_gl(const struct ddp_gather_list *gl)
-{
-#ifdef notyet
- int i;
-
- if (!gl->nelem)
- return;
-
- pci_unmap_page(pdev, gl->phys_addr[0] + gl->offset,
- PAGE_SIZE - gl->offset, PCI_DMA_FROMDEVICE);
- for (i = 1; i < gl->nelem; ++i)
- pci_unmap_page(pdev, gl->phys_addr[i], PAGE_SIZE,
- PCI_DMA_FROMDEVICE);
-
-#endif
-}
-
-static void
-ddp_gl_free_pages(struct ddp_gather_list *gl, int dirty)
-{
-#ifdef notyet
- int i;
-
- for (i = 0; i < gl->nelem; ++i) {
- if (dirty)
- set_page_dirty_lock(gl->pages[i]);
- put_page(gl->pages[i]);
- }
-#endif
-}
-
-void
-t3_free_ddp_gl(struct ddp_gather_list *gl)
-{
- unmap_ddp_gl(gl);
- ddp_gl_free_pages(gl, 0);
- free(gl, M_DEVBUF);
-}
-
-/* Max # of page pods for a buffer, enough for 1MB buffer at 4KB page size */
-#define MAX_PPODS 64U
-
-/*
- * Allocate page pods for DDP buffer 1 (the user buffer) and set up the tag in
- * the TCB. We allocate page pods in multiples of PPOD_CLUSTER_SIZE. First we
- * try to allocate enough page pods to accommodate the whole buffer, subject to
- * the MAX_PPODS limit. If that fails we try to allocate PPOD_CLUSTER_SIZE page
- * pods before failing entirely.
- */
-static int
-alloc_buf1_ppods(struct socket *so, struct ddp_state *p,
- unsigned long addr, unsigned int len)
-{
- int tag, npages, nppods;
- struct tom_data *d = TOM_DATA(TOE_DEV(so));
-
- npages = ((addr & ~PAGE_MASK) + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
- nppods = min(pages2ppods(npages), MAX_PPODS);
-#ifdef notyet
- nppods = ALIGN(nppods, PPOD_CLUSTER_SIZE);
-#endif
- tag = t3_alloc_ppods(d, nppods);
- if (tag < 0 && nppods > PPOD_CLUSTER_SIZE) {
- nppods = PPOD_CLUSTER_SIZE;
- tag = t3_alloc_ppods(d, nppods);
- }
- if (tag < 0)
- return (ENOMEM);
-
- p->ubuf_nppods = nppods;
- p->ubuf_tag = tag;
-#if NUM_DDP_KBUF == 1
- t3_set_ddp_tag(so, 1, tag << 6);
-#endif
- return 0;
-}
-
-
-
-/*
- * Reposts the kernel DDP buffer after it has been previously become full and
- * invalidated. We just need to reset the offset and adjust the DDP flags.
- * Conveniently, we can set the flags and the offset with a single message.
- * Note that this function does not set the buffer length. Again conveniently
- * our kernel buffer is of fixed size. If the length needs to be changed it
- * needs to be done separately.
- */
-static void
-t3_repost_kbuf(struct socket *so, unsigned int bufidx, int modulate,
- int activate)
-{
- struct toepcb *toep = sototcpcb(so)->t_toe;
- struct ddp_state *p = &toep->tp_ddp_state;
-
- p->buf_state[bufidx].cur_offset = p->kbuf[bufidx]->dgl_offset;
- p->buf_state[bufidx].flags = p->kbuf_noinval ? DDP_BF_NOINVAL : 0;
- p->buf_state[bufidx].gl = p->kbuf[bufidx];
- p->cur_buf = bufidx;
- p->kbuf_idx = bufidx;
- if (!bufidx)
- t3_setup_ddpbufs(toep, 0, 0, 0, 0,
- V_TF_DDP_PSH_NO_INVALIDATE(p->kbuf_noinval) |
- V_TF_DDP_BUF0_VALID(1),
- V_TF_DDP_PSH_NO_INVALIDATE(1) | V_TF_DDP_OFF(1) |
- V_TF_DDP_BUF0_VALID(1) |
- V_TF_DDP_ACTIVE_BUF(activate), modulate);
- else
- t3_setup_ddpbufs(toep, 0, 0, 0, 0,
- V_TF_DDP_PSH_NO_INVALIDATE(p->kbuf_noinval) |
- V_TF_DDP_BUF1_VALID(1) |
- V_TF_DDP_ACTIVE_BUF(activate),
- V_TF_DDP_PSH_NO_INVALIDATE(1) | V_TF_DDP_OFF(1) |
- V_TF_DDP_BUF1_VALID(1) | V_TF_DDP_ACTIVE_BUF(1),
- modulate);
-
-}
-
-/*
- * Starting offset for the user DDP buffer. A non-0 value ensures a DDP flush
- * won't block indefinitely if there's nothing to place (which should be rare).
- */
-#define UBUF_OFFSET 1
-
-static __inline unsigned long
-select_ddp_flags(const struct socket *so, int buf_idx,
- int nonblock, int rcv_flags)
-{
- if (buf_idx == 1) {
- if (__predict_false(rcv_flags & MSG_WAITALL))
- return V_TF_DDP_PSH_NO_INVALIDATE(1) |
- V_TF_DDP_PUSH_DISABLE_1(1);
- if (nonblock)
- return V_TF_DDP_BUF1_FLUSH(1);
-
- return V_TF_DDP_BUF1_FLUSH(!TOM_TUNABLE(TOE_DEV(so),
- ddp_push_wait));
- }
-
- if (__predict_false(rcv_flags & MSG_WAITALL))
- return V_TF_DDP_PSH_NO_INVALIDATE(1) |
- V_TF_DDP_PUSH_DISABLE_0(1);
- if (nonblock)
- return V_TF_DDP_BUF0_FLUSH(1);
-
- return V_TF_DDP_BUF0_FLUSH(!TOM_TUNABLE(TOE_DEV(so), ddp_push_wait));
-}
-
-/**
- * setup_iovec_ppods - setup HW page pods for a user iovec
- * @sk: the associated socket
- * @iov: the iovec
- * @oft: additional bytes to map before the start of the buffer
- *
- * Pins a user iovec and sets up HW page pods for DDP into it. We allocate
- * page pods for user buffers on the first call per socket. Afterwards we
- * limit the buffer length to whatever the existing page pods can accommodate.
- * Returns a negative error code or the length of the mapped buffer.
- *
- * The current implementation handles iovecs with only one entry.
- */
-static int
-setup_iovec_ppods(struct socket *so, const struct iovec *iov, int oft)
-{
- int err;
- unsigned int len;
- struct ddp_gather_list *gl = NULL;
- struct toepcb *toep = sototcpcb(so)->t_toe;
- struct ddp_state *p = &toep->tp_ddp_state;
- unsigned long addr = (unsigned long)iov->iov_base - oft;
-
- if (__predict_false(!p->ubuf_nppods)) {
- err = alloc_buf1_ppods(so, p, addr, iov->iov_len + oft);
- if (err)
- return err;
- }
-
- len = (p->ubuf_nppods - NUM_SENTINEL_PPODS) * PPOD_PAGES * PAGE_SIZE;
- len -= addr & ~PAGE_MASK;
- if (len > M_TCB_RX_DDP_BUF0_LEN)
- len = M_TCB_RX_DDP_BUF0_LEN;
- len = min(len, sototcpcb(so)->rcv_wnd - 32768);
- len = min(len, iov->iov_len + oft);
-
- if (len <= p->kbuf[0]->dgl_length)
- return -EINVAL;
-
- err = t3_pin_pages(toep->tp_rx_dmat, toep->tp_dmamap, addr, len, &gl, p->ubuf);
- if (err < 0)
- return err;
- if (gl) {
- if (p->ubuf)
- t3_free_ddp_gl(p->ubuf);
- p->ubuf = gl;
- t3_setup_ppods(so, gl, pages2ppods(gl->dgl_nelem), p->ubuf_tag, len,
- gl->dgl_offset, 0);
- }
- return len;
-}
-
-#define OVERLAY_MASK (V_TF_DDP_PSH_NO_INVALIDATE(1) | \
- V_TF_DDP_BUF1_FLUSH(1) | \
- V_TF_DDP_BUF0_FLUSH(1) | \
- V_TF_DDP_PUSH_DISABLE_1(1) | \
- V_TF_DDP_PUSH_DISABLE_0(1) | \
- V_TF_DDP_INDICATE_OUT(1))
-
-/*
- * Post a user buffer as an overlay on top of the current kernel buffer.
- */
-int
-t3_overlay_ubuf(struct socket *so, const struct iovec *iov,
- int nonblock, int rcv_flags, int modulate, int post_kbuf)
-{
- int len, ubuf_idx;
- unsigned long flags;
- struct toepcb *toep = sototcpcb(so)->t_toe;
- struct ddp_state *p = &toep->tp_ddp_state;
-
- if (!p)
- return -1;
-
- len = setup_iovec_ppods(so, iov, 0);
- if (len < 0)
- return len;
-
- ubuf_idx = p->kbuf_idx;
- p->buf_state[ubuf_idx].flags = DDP_BF_NOFLIP;
- /* Use existing offset */
- /* Don't need to update .gl, user buffer isn't copied. */
- p->cur_buf = ubuf_idx;
-
- flags = select_ddp_flags(so, ubuf_idx, nonblock, rcv_flags);
-
- if (post_kbuf) {
- struct ddp_buf_state *dbs = &p->buf_state[ubuf_idx ^ 1];
-
- dbs->cur_offset = 0;
- dbs->flags = 0;
- dbs->gl = p->kbuf[ubuf_idx ^ 1];
- p->kbuf_idx ^= 1;
- flags |= p->kbuf_idx ?
- V_TF_DDP_BUF1_VALID(1) | V_TF_DDP_PUSH_DISABLE_1(0) :
- V_TF_DDP_BUF0_VALID(1) | V_TF_DDP_PUSH_DISABLE_0(0);
- }
-
- if (ubuf_idx == 0) {
- t3_overlay_ddpbuf(toep, 0, p->ubuf_tag << 6, p->kbuf_tag[1] << 6,
- len);
- t3_setup_ddpbufs(toep, 0, 0, p->kbuf[1]->dgl_length, 0,
- flags,
- OVERLAY_MASK | flags, 1);
- } else {
- t3_overlay_ddpbuf(toep, 1, p->kbuf_tag[0] << 6, p->ubuf_tag << 6,
- len);
- t3_setup_ddpbufs(toep, p->kbuf[0]->dgl_length, 0, 0, 0,
- flags,
- OVERLAY_MASK | flags, 1);
- }
-#ifdef T3_TRACE
- T3_TRACE5(TIDTB(so),
- "t3_overlay_ubuf: tag %u flags 0x%x mask 0x%x ubuf_idx %d "
- " kbuf_idx %d",
- p->ubuf_tag, flags, OVERLAY_MASK, ubuf_idx, p->kbuf_idx);
-#endif
- return 0;
-}
-
-
-
-/*
- * Returns whether a connection should enable DDP. This happens when all of
- * the following conditions are met:
- * - the connection's ULP mode is DDP
- * - DDP is not already enabled
- * - the last receive was above the DDP threshold
- * - receive buffers are in user space
- * - receive side isn't shutdown (handled by caller)
- * - the connection's receive window is big enough so that sizable buffers
- * can be posted without closing the window in the middle of DDP (checked
- * when the connection is offloaded)
- */
-#ifdef notyet
-static int
-so_should_ddp(const struct toepcb *toep, int last_recv_len)
-{
- return toep->tp_ulp_mode == ULP_MODE_TCPDDP && !toep->tp_dpp_state.cur_buf &&
- last_recv_len > TOM_TUNABLE(toep->tp_toedev, ddp_thres) &&
- toep->tp_tp->rcv_wnd >
- (TOM_TUNABLE(toep->tp_toedev, ddp_copy_limit) +
- DDP_RSVD_WIN);
-}
-
-static inline int
-is_ddp(const struct mbuf *m)
-{
- return (m->m_flags & M_DDP);
-}
-
-static inline int
-is_ddp_psh(const struct mbuf *m)
-{
- return is_ddp(skb) && (m->m_pkthdr.csum_flags & DDP_BF_PSH);
-}
-
-/*
- * Copy data from an sk_buff to an iovec. Deals with RX_DATA, which carry the
- * data in the sk_buff body, and with RX_DATA_DDP, which place the data in a
- * DDP buffer.
- */
-static inline int
-copy_data(const struct mbuf *m, int offset, struct iovec *to, int len)
-{
- if (__predict_true(!is_ddp(m))) /* RX_DATA */
- return mbuf_copy_datagram_iovec(m, offset, to, len);
- if (__predict_true(m->pkthdr.csum_flags & DDP_BF_NOCOPY)) { /* user DDP */
- to->iov_len -= len;
- to->iov_base += len;
- return 0;
- }
- return t3_ddp_copy(m, offset, to, len); /* kernel DDP */
-}
-
-
-#endif
-/*
- * Clean up DDP state that needs to survive until socket close time, such as the
- * DDP buffers. The buffers are already unmapped at this point as unmapping
- * needs the PCI device and a socket may close long after the device is removed.
- */
-void
-t3_cleanup_ddp(struct socket *so)
-{
- struct toepcb *toep = sototcpcb(so)->t_toe;
- struct ddp_state *p = &toep->tp_ddp_state;
- int idx;
-
- for (idx = 0; idx < NUM_DDP_KBUF; idx++)
- if (p->kbuf[idx]) {
- ddp_gl_free_pages(p->kbuf[idx], 0);
- free(p->kbuf[idx], M_DEVBUF);
- }
-
- if (p->ubuf) {
- ddp_gl_free_pages(p->ubuf, 0);
- free(p->ubuf, M_DEVBUF);
- p->ubuf = NULL;
- }
- toep->tp_ulp_mode = 0;
-}
-
-/*
- * This is a companion to t3_cleanup_ddp() and releases the HW resources
- * associated with a connection's DDP state, such as the page pods.
- * It's called when HW is done with a connection. The rest of the state
- * remains available until both HW and the app are done with the connection.
- */
-void
-t3_release_ddp_resources(struct toepcb *toep)
-{
- struct ddp_state *p = &toep->tp_ddp_state;
- struct tom_data *d = TOM_DATA(toep->tp_toedev);
- int idx;
-
- for (idx = 0; idx < NUM_DDP_KBUF; idx++) {
- t3_free_ppods(d, p->kbuf_tag[idx],
- p->kbuf_nppods[idx]);
- unmap_ddp_gl(p->kbuf[idx]);
- }
-
- if (p->ubuf_nppods) {
- t3_free_ppods(d, p->ubuf_tag, p->ubuf_nppods);
- p->ubuf_nppods = 0;
- }
- if (p->ubuf)
- unmap_ddp_gl(p->ubuf);
-
-}
-
-void
-t3_post_kbuf(struct socket *so, int modulate)
-{
- struct toepcb *toep = sototcpcb(so)->t_toe;
- struct ddp_state *p = &toep->tp_ddp_state;
-
- t3_set_ddp_tag(so, p->cur_buf, p->kbuf_tag[p->cur_buf] << 6);
- t3_set_ddp_buf(so, p->cur_buf, 0, p->kbuf[p->cur_buf]->dgl_length);
- t3_repost_kbuf(so, p->cur_buf, modulate, 1);
-
-#ifdef T3_TRACE
- T3_TRACE1(TIDTB(so),
- "t3_post_kbuf: cur_buf = kbuf_idx = %u ", p->cur_buf);
-#endif
-}
-
-/*
- * Prepare a socket for DDP. Must be called when the socket is known to be
- * open.
- */
-int
-t3_enter_ddp(struct socket *so, unsigned int kbuf_size, unsigned int waitall)
-{
- int err = ENOMEM;
- unsigned int nppods, kbuf_pages, idx = 0;
- struct toepcb *toep = sototcpcb(so)->t_toe;
- struct ddp_state *p = &toep->tp_ddp_state;
- struct tom_data *d = TOM_DATA(toep->tp_toedev);
-
- if (kbuf_size > M_TCB_RX_DDP_BUF0_LEN)
- return (EINVAL);
-
- kbuf_pages = (kbuf_size + PAGE_SIZE - 1) >> PAGE_SHIFT;
- nppods = pages2ppods(kbuf_pages);
-
- p->kbuf_noinval = !!waitall;
-
- p->kbuf_tag[NUM_DDP_KBUF - 1] = -1;
- for (idx = 0; idx < NUM_DDP_KBUF; idx++) {
- p->kbuf[idx] =
- malloc(sizeof (struct ddp_gather_list) + kbuf_pages *
- sizeof(vm_page_t *), M_DEVBUF, M_NOWAIT|M_ZERO);
- if (!p->kbuf[idx])
- goto err;
-
- p->kbuf_tag[idx] = t3_alloc_ppods(d, nppods);
- if (p->kbuf_tag[idx] < 0)
- goto err;
-
- p->kbuf_nppods[idx] = nppods;
- p->kbuf[idx]->dgl_length = kbuf_size;
- p->kbuf[idx]->dgl_offset = 0;
- p->kbuf[idx]->dgl_nelem = kbuf_pages;
-#ifdef notyet
- p->kbuf[idx]->pages =
- (struct page **)&p->kbuf[idx]->phys_addr[kbuf_pages];
-
-
- for (i = 0; i < kbuf_pages; ++i) {
-
- p->kbuf[idx]->pages[i] = alloc_page(sk->sk_allocation);
- if (!p->kbuf[idx]->pages[i]) {
- p->kbuf[idx]->nelem = i;
- goto err;
- }
-
- }
-
- for (i = 0; i < kbuf_pages; ++i)
- p->kbuf[idx]->phys_addr[i] =
- pci_map_page(p->pdev, p->kbuf[idx]->pages[i],
- 0, PAGE_SIZE, PCI_DMA_FROMDEVICE);
-#endif
- t3_setup_ppods(so, p->kbuf[idx], nppods, p->kbuf_tag[idx],
- p->kbuf[idx]->dgl_length, 0, 0);
- }
- t3_set_ddp_tag(so, 0, p->kbuf_tag[0] << 6);
- t3_set_ddp_buf(so, 0, 0, p->kbuf[0]->dgl_length);
- t3_repost_kbuf(so, 0, 0, 1);
- t3_set_rcv_coalesce_enable(so,
- TOM_TUNABLE(TOE_DEV(so), ddp_rcvcoalesce));
-
-#ifdef T3_TRACE
- T3_TRACE4(TIDTB(so),
- "t3_enter_ddp: kbuf_size %u waitall %u tag0 %d tag1 %d",
- kbuf_size, waitall, p->kbuf_tag[0], p->kbuf_tag[1]);
-#endif
-
- return 0;
-
-err:
- t3_release_ddp_resources(toep);
- t3_cleanup_ddp(so);
- return err;
-}
-
-int
-t3_ddp_copy(const struct mbuf *m, int offset, struct iovec *to, int len)
-{
-#ifdef notyet
- int err, page_no, page_off;
- struct ddp_gather_list *gl = (struct ddp_gather_list *)skb->mac.raw;
-
- if (!gl->pages) {
- dump_stack();
- BUG_ON(1);
- }
-
- offset += gl->offset + TCP_SKB_CB(skb)->when;
- page_no = offset >> PAGE_SHIFT;
- page_off = offset & ~PAGE_MASK;
-
- while (len) {
- int copy = min_t(int, len, PAGE_SIZE - page_off);
-
- err = memcpy_toiovec(to, page_address(gl->pages[page_no]) +
- page_off, copy);
- if (err)
- return -EFAULT;
- page_no++;
- page_off = 0;
- len -= copy;
- }
-#endif
- return 0;
-}
-
-/*
- * Allocate n page pods. Returns -1 on failure or the page pod tag.
- */
-int t3_alloc_ppods(struct tom_data *td, unsigned int n)
-{
- unsigned int i, j;
-
- if (__predict_false(!td->ppod_map))
- return -1;
-
- mtx_lock(&td->ppod_map_lock);
- for (i = 0; i < td->nppods; ) {
- for (j = 0; j < n; ++j) /* scan ppod_map[i..i+n-1] */
- if (td->ppod_map[i + j]) {
- i = i + j + 1;
- goto next;
- }
-
- memset(&td->ppod_map[i], 1, n); /* allocate range */
- mtx_unlock(&td->ppod_map_lock);
- return i;
-next: ;
- }
- mtx_unlock(&td->ppod_map_lock);
- return (0);
-}
-
-void t3_free_ppods(struct tom_data *td, unsigned int tag, unsigned int n)
-{
- /* No need to take ppod_lock here */
- memset(&td->ppod_map[tag], 0, n);
-}
==== //depot/projects/toehead/sys/dev/cxgb/ulp/tom/cxgb_t3_ddp.h#6 (text+ko) ====
@@ -150,20 +150,20 @@
int t3_setup_ppods(struct socket *so, const struct ddp_gather_list *gl,
unsigned int nppods, unsigned int tag, unsigned int maxoff,
unsigned int pg_off, unsigned int color);
-int t3_alloc_ppods(struct tom_data *td, unsigned int n);
+int t3_alloc_ppods(struct tom_data *td, unsigned int n, int *tag);
void t3_free_ppods(struct tom_data *td, unsigned int tag, unsigned int n);
void t3_free_ddp_gl(struct ddp_gather_list *gl);
-int t3_ddp_copy(const struct mbuf *skb, int offset, struct iovec *to,
+int t3_ddp_copy(const struct mbuf *m, int offset, struct uio *uio,
int len);
//void t3_repost_kbuf(struct socket *so, int modulate, int activate);
void t3_post_kbuf(struct socket *so, int modulate);
int t3_post_ubuf(struct socket *so, const struct iovec *iov, int nonblock,
int rcv_flags, int modulate, int post_kbuf);
-void t3_cancel_ubuf(struct socket *so);
+void t3_cancel_ubuf(struct toepcb *toep);
int t3_overlay_ubuf(struct socket *so, const struct iovec *iov, int nonblock,
int rcv_flags, int modulate, int post_kbuf);
int t3_enter_ddp(struct socket *so, unsigned int kbuf_size, unsigned int waitall);
-void t3_cleanup_ddp(struct socket *so);
+void t3_cleanup_ddp(struct toepcb *toep);
void t3_release_ddp_resources(struct toepcb *toep);
void t3_cancel_ddpbuf(struct toepcb *, unsigned int bufidx);
void t3_overlay_ddpbuf(struct toepcb *, unsigned int bufidx, unsigned int tag0,
==== //depot/projects/toehead/sys/dev/cxgb/ulp/tom/cxgb_toepcb.h#3 (text+ko) ====
@@ -99,7 +99,7 @@
}
static inline struct mbuf *
-peek_wr(struct toepcb *toep)
+peek_wr(const struct toepcb *toep)
{
return (mbufq_peek(&toep->wr_list));
@@ -112,5 +112,10 @@
return (mbufq_dequeue(&toep->wr_list));
}
+#define wr_queue_walk(toep, m) \
+ for (m = peek_wr(toep); m; m = m->m_nextpkt)
+
+
+
#endif
==== //depot/projects/toehead/sys/modules/cxgb/tom/Makefile#2 (text+ko) ====
@@ -4,7 +4,7 @@
KMOD= tom
SRCS= cxgb_tom.c cxgb_cpl_io.c cxgb_listen.c cxgb_tom_sysctl.c cxgb_cpl_socket.c
-#SRCS+= cxgb_tcp_subr.c cxgb_tcp_usrreq.c
+SRCS+= cxgb_ddp.c cxgb_vm.c
>>> TRUNCATED FOR MAIL (1000 lines) <<<
More information about the p4-projects
mailing list