PERFORCE change 134527 for review
Kip Macy
kmacy at FreeBSD.org
Wed Jan 30 22:21:36 PST 2008
http://perforce.freebsd.org/chv.cgi?CH=134527
Change 134527 by kmacy at kmacy:storage:toehead on 2008/01/31 06:21:26
fix ppod setup, rcv_wnd setting, locking on calls handle_excess_rx,
accounting for copied_seq, and socket ddp initialization
Affected files ...
.. //depot/projects/toehead/sys/dev/cxgb/cxgb_offload.c#3 edit
.. //depot/projects/toehead/sys/dev/cxgb/cxgb_sge.c#6 edit
.. //depot/projects/toehead/sys/dev/cxgb/ulp/tom/cxgb_cpl_io.c#11 edit
.. //depot/projects/toehead/sys/dev/cxgb/ulp/tom/cxgb_cpl_socket.c#12 edit
.. //depot/projects/toehead/sys/dev/cxgb/ulp/tom/cxgb_ddp.c#4 edit
.. //depot/projects/toehead/sys/dev/cxgb/ulp/tom/cxgb_defs.h#4 edit
.. //depot/projects/toehead/sys/dev/cxgb/ulp/tom/cxgb_tom.c#3 edit
Differences ...
==== //depot/projects/toehead/sys/dev/cxgb/cxgb_offload.c#3 (text+ko) ====
@@ -380,6 +380,8 @@
struct iff_mac *iffmacp;
struct ddp_params *ddpp;
struct adap_ports *ports;
+ struct ofld_page_info *rx_page_info;
+ struct tp_params *tp = &adapter->params.tp;
int port;
switch (req) {
@@ -444,6 +446,11 @@
case FAILOVER_CLEAR:
t3_failover_clear(adapter);
break;
+ case GET_RX_PAGE_INFO:
+ rx_page_info = data;
+ rx_page_info->page_size = tp->rx_pg_size;
+ rx_page_info->num = tp->rx_num_pgs;
+ break;
case ULP_ISCSI_GET_PARAMS:
case ULP_ISCSI_SET_PARAMS:
if (!offload_running(adapter))
==== //depot/projects/toehead/sys/dev/cxgb/cxgb_sge.c#6 (text+ko) ====
@@ -73,7 +73,11 @@
#endif
int txq_fills = 0;
-static int recycle_enable = 1;
+/*
+ * XXX don't re-enable this until TOE stops assuming
+ * we have an m_ext
+ */
+static int recycle_enable = 0;
extern int cxgb_txq_buf_ring_size;
int cxgb_cached_allocations;
int cxgb_cached;
==== //depot/projects/toehead/sys/dev/cxgb/ulp/tom/cxgb_cpl_io.c#11 (text+ko) ====
@@ -1052,10 +1052,9 @@
/*
* Determine the receive window size for a socket.
*/
-static unsigned int
-select_rcv_wnd(struct socket *so)
+static unsigned long
+select_rcv_wnd(struct toedev *dev, struct socket *so)
{
- struct toedev *dev = TOE_DEV(so);
struct tom_data *d = TOM_DATA(dev);
unsigned int wnd;
unsigned int max_rcv_wnd;
@@ -1063,7 +1062,9 @@
if (tcp_do_autorcvbuf)
wnd = tcp_autorcvbuf_max;
else
- wnd = sbspace(&so->so_rcv);
+ wnd = so->so_rcv.sb_hiwat;
+
+
/* XXX
* For receive coalescing to work effectively we need a receive window
@@ -1076,8 +1077,14 @@
max_rcv_wnd = (dev->tod_ttid == TOE_ID_CHELSIO_T3B ?
(uint32_t)d->rx_page_size * 23 :
MAX_RCV_WND);
-
- return min(wnd, max_rcv_wnd);
+
+ if (wnd > max_rcv_wnd)
+ wnd = max_rcv_wnd;
+
+ printf("tcp_autorcvbuf_max=%d hiwat=%d min_win=%d max_win=%d returned=%d\n",
+ tcp_autorcvbuf_max, so->so_rcv.sb_hiwat, MIN_RCV_WND, MAX_RCV_WND, wnd);
+
+ return ((unsigned long) wnd);
}
/*
@@ -1116,7 +1123,7 @@
* XXX broken
*
*/
- tp->rcv_wnd = select_rcv_wnd(so);
+ tp->rcv_wnd = select_rcv_wnd(dev, so);
toep->tp_ulp_mode = TOM_TUNABLE(dev, ddp) && !(so->so_options & SO_NO_DDP) &&
tp->rcv_wnd >= MIN_DDP_RCV_WIN ? ULP_MODE_TCPDDP : 0;
toep->tp_qset_idx = 0;
@@ -1775,7 +1782,6 @@
handle_ddp_data(struct toepcb *toep, struct mbuf *m)
{
struct tcpcb *tp = toep->tp_tp;
- struct socket *so;
struct ddp_state *q;
struct ddp_buf_state *bsp;
struct cpl_rx_data *hdr = cplhdr(m);
@@ -1806,10 +1812,6 @@
bsp->cur_offset += m->m_pkthdr.len;
if (!(bsp->flags & DDP_BF_NOFLIP))
q->cur_buf ^= 1;
- tp->t_rcvtime = ticks;
-
- so = toeptoso(toep);
- sbappend(&so->so_rcv, m);
/*
* For now, don't re-enable DDP after a connection fell out of DDP
* mode.
@@ -1837,7 +1839,7 @@
return;
}
- if (toep->tp_ulp_mode == ULP_MODE_TCPDDP)
+ if (toep->tp_ulp_mode == ULP_MODE_TCPDDP && toep->tp_ddp_state.kbuf[0])
handle_ddp_data(toep, m);
m->m_seq = ntohl(hdr->seq);
@@ -1939,7 +1941,11 @@
struct socket *so = toeptoso(toep);
if (__predict_false(so_no_receive(so))) {
+ struct inpcb *inp = sotoinpcb(so);
+
+ INP_LOCK(inp);
handle_excess_rx(toep, m);
+ INP_UNLOCK(inp);
return;
}
@@ -2056,7 +2062,11 @@
unsigned int ddp_report, buf_idx, when;
if (__predict_false(so_no_receive(so))) {
+ struct inpcb *inp = sotoinpcb(so);
+
+ INP_LOCK(inp);
handle_excess_rx(toep, m);
+ INP_UNLOCK(inp);
return;
}
TRACE_ENTER;
@@ -3044,14 +3054,15 @@
newtoep->tp_flags = TP_SYN_RCVD;
newtoep->tp_tid = tid;
newtoep->tp_toedev = tdev;
+ tp->rcv_wnd = select_rcv_wnd(tdev, so);
- printf("inserting tid=%d\n", tid);
+ printf("inserting tid=%d rcv_wnd=%ld\n", tid, tp->rcv_wnd);
cxgb_insert_tid(cdev, d->client, newtoep, tid);
SOCK_LOCK(so);
LIST_INSERT_HEAD(&lctx->synq_head, newtoep, synq_entry);
SOCK_UNLOCK(so);
-
+#ifdef notyet
if (lctx->ulp_mode) {
ddp_mbuf = m_gethdr(M_NOWAIT, MT_DATA);
@@ -3060,7 +3071,22 @@
else
newtoep->tp_ulp_mode = lctx->ulp_mode;
}
+#else
+ newtoep->tp_ulp_mode = TOM_TUNABLE(tdev, ddp) && /* !sock_flag(sk, NO_DDP) && */
+ tp->rcv_wnd >= MIN_DDP_RCV_WIN ? ULP_MODE_TCPDDP : 0;
+ printf("ddp=%d rcv_wnd=%ld min_win=%d\n",
+ TOM_TUNABLE(tdev, ddp), tp->rcv_wnd, MIN_DDP_RCV_WIN);
+
+ if (newtoep->tp_ulp_mode) {
+ ddp_mbuf = m_gethdr(M_NOWAIT, MT_DATA);
+
+ if (ddp_mbuf == NULL)
+ newtoep->tp_ulp_mode = 0;
+ }
+
+#endif
+
set_arp_failure_handler(reply_mbuf, pass_accept_rpl_arp_failure);
DPRINTF("adding request to syn cache\n");
@@ -3342,7 +3368,7 @@
toep->tp_flags = 0;
tp->t_toe = toep;
reset_wr_list(toep);
- tp->rcv_wnd = select_rcv_wnd(so);
+ tp->rcv_wnd = select_rcv_wnd(tdev, so);
DPRINTF("rcv_wnd=%ld\n", tp->rcv_wnd);
install_offload_ops(so);
@@ -3725,7 +3751,6 @@
const struct tom_data *td = TOM_DATA(TOE_DEV(so));
unsigned int ppod_addr = tag * PPOD_SIZE + td->ddp_llimit;
- TRACE_ENTER;
for (i = 0; i < nppods; ++i) {
m = m_gethdr_nofail(sizeof(*req) + PPOD_SIZE);
m_set_priority(m, mkprio(CPL_PRIORITY_CONTROL, toep));
@@ -3753,8 +3778,6 @@
send_or_defer(toep, m, 0);
ppod_addr += PPOD_SIZE;
}
-
- TRACE_EXIT;
return (0);
}
@@ -3975,7 +3998,6 @@
struct work_request_hdr *wr;
struct cpl_set_tcb_field *req;
- TRACE_ENTER;
wrlen = sizeof(*wr) + sizeof(*req) + (len0 ? sizeof(*req) : 0) +
(len1 ? sizeof(*req) : 0) +
(modulate ? sizeof(struct cpl_rx_data_ack) : 0);
@@ -4021,7 +4043,6 @@
#endif
cxgb_ofld_send(TOEP_T3C_DEV(toep), m);
- TRACE_EXIT;
}
void
==== //depot/projects/toehead/sys/dev/cxgb/ulp/tom/cxgb_cpl_socket.c#12 (text+ko) ====
@@ -255,6 +255,11 @@
static int
so_should_ddp(const struct toepcb *toep, int last_recv_len)
{
+
+ printf("ulp_mode=%d last_recv_len=%d ddp_thresh=%d rcv_wnd=%ld ddp_copy_limit=%d\n",
+ toep->tp_ulp_mode, last_recv_len, TOM_TUNABLE(toep->tp_toedev, ddp_thres),
+ toep->tp_tp->rcv_wnd, (TOM_TUNABLE(toep->tp_toedev, ddp_copy_limit) + DDP_RSVD_WIN));
+
return toep->tp_ulp_mode == ULP_MODE_TCPDDP && (toep->tp_ddp_state.kbuf[0] == NULL) &&
last_recv_len > TOM_TUNABLE(toep->tp_toedev, ddp_thres) &&
toep->tp_tp->rcv_wnd >
@@ -276,28 +281,37 @@
static int
m_uiomove(const struct mbuf *m, int offset, int len, struct uio *uio)
{
- int curlen, err = 0;
+ int curlen, startlen, resid_init, err = 0;
caddr_t buf;
-
+
+ DPRINTF("m_uiomove(m=%p, offset=%d, len=%d, ...)\n",
+ m, offset, len);
+
+ startlen = len;
+ resid_init = uio->uio_resid;
while (m && len) {
buf = mtod(m, caddr_t);
curlen = m->m_len;
- if (offset < curlen) {
+ if (offset && (offset < curlen)) {
curlen -= offset;
buf += offset;
offset = 0;
- } else {
+ } else if (offset) {
offset -= curlen;
m = m->m_next;
continue;
}
+ err = uiomove(buf, min(len, curlen), uio);
+ if (err) {
+ printf("uiomove_frombuf returned %d\n", err);
+ return (err);
+ }
- err = uiomove_frombuf(buf, min(len, curlen), uio);
- if (err)
- return (err);
- len -= min(len, m->m_len);
+ len -= min(len, curlen);
m = m->m_next;
}
+ DPRINTF("copied %d bytes - resid_init=%d uio_resid=%d\n",
+ startlen - len, resid_init, uio->uio_resid);
return (err);
}
@@ -310,16 +324,24 @@
copy_data(const struct mbuf *m, int offset, int len, struct uio *uio)
{
struct iovec *to = uio->uio_iov;
+ int err;
+
- if (__predict_true(!is_ddp(m))) /* RX_DATA */
+ if (__predict_true(!is_ddp(m))) { /* RX_DATA */
return m_uiomove(m, offset, len, uio);
- if (__predict_true(m->m_ddp_flags & DDP_BF_NOCOPY)) { /* user DDP */
+ } if (__predict_true(m->m_ddp_flags & DDP_BF_NOCOPY)) { /* user DDP */
+ TRACE_ENTER;
to->iov_len -= len;
to->iov_base = ((caddr_t)to->iov_base) + len;
uio->uio_iov = to;
+ uio->uio_resid -= len;
+ TRACE_EXIT;
return (0);
}
- return t3_ddp_copy(m, offset, uio, len); /* kernel DDP */
+ TRACE_ENTER;
+ err = t3_ddp_copy(m, offset, uio, len); /* kernel DDP */
+ TRACE_EXIT;
+ return (err);
}
static void
@@ -508,6 +530,9 @@
}
+
+#define IS_NONBLOCKING(so) ((so)->so_state & SS_NBIO)
+
static int
t3_soreceive(struct socket *so, int *flagsp, struct uio *uio)
{
@@ -515,18 +540,21 @@
struct toepcb *toep = tp->t_toe;
struct mbuf *m;
uint32_t offset;
- int err, flags, avail, len, buffers_freed = 0, copied = 0;
+ int err, flags, avail, len, buffers_freed, copied, copied_unacked;
int target; /* Read at least this many bytes */
int user_ddp_ok, user_ddp_pending = 0;
struct ddp_state *p;
struct inpcb *inp = sotoinpcb(so);
-
+
+
+ copied = copied_unacked = buffers_freed = 0;
flags = flagsp ? (*flagsp &~ MSG_EOR) : 0;
err = sblock(&so->so_rcv, SBLOCKWAIT(flags));
+
if (err)
return (err);
- TRACE_ENTER;
+
SOCKBUF_LOCK(&so->so_rcv);
restart:
len = uio->uio_resid;
@@ -572,13 +600,14 @@
t3_cleanup_rbuf(tp);
INP_UNLOCK(inp);
SOCKBUF_LOCK(&so->so_rcv);
+ copied_unacked = 0;
goto restart;
}
if (p->ubuf && user_ddp_ok && !user_ddp_pending &&
uio->uio_iov->iov_len > p->kbuf[0]->dgl_length &&
p->ubuf_ddp_ready) {
user_ddp_pending =
- !t3_overlay_ubuf(so, uio, (so->so_state & SS_NBIO), flags);
+ !t3_overlay_ubuf(so, uio, IS_NONBLOCKING(so), flags);
if (user_ddp_pending) {
p->kbuf_posted++;
user_ddp_ok = 0;
@@ -588,6 +617,7 @@
/* One shot at DDP if we already have enough data */
if (copied >= target)
user_ddp_ok = 0;
+ printf("sbwaiting 1\n");
if ((err = sbwait(&so->so_rcv)) != 0)
goto done;
//for timers to work await_ddp_completion(sk, flags, &timeo);
@@ -598,7 +628,10 @@
INP_LOCK(inp);
t3_cleanup_rbuf(tp);
INP_UNLOCK(inp);
- SOCKBUF_LOCK(&so->so_rcv);
+ SOCKBUF_LOCK(&so->so_rcv);
+ copied_unacked = 0;
+ printf("sbwaiting 2\n");
+
if ((err = sbwait(&so->so_rcv)) != 0)
goto done;
}
@@ -612,10 +645,12 @@
m = so->so_rcv.sb_mb = m_free(m);
goto done;
}
- offset = toep->tp_copied_seq - m->m_seq + 1 /* OFF by one somewhere :-{ */;
+ offset = toep->tp_copied_seq + copied_unacked - m->m_seq + 1 /* OFF by one somewhere :-{ */;
+ DPRINTF("m=%p copied_seq=0x%x copied_unacked=%d m_seq=0x%x offset=%d\n",
+ m, toep->tp_copied_seq, copied_unacked, m->m_seq, offset);
if (offset >= m->m_pkthdr.len)
- panic("t3_soreceive: BUG: OFFSET > LEN offset %d copied_seq 0x%x seq 0x%x "
- "pktlen %d ddp flags 0x%x", offset, toep->tp_copied_seq, m->m_seq,
+ panic("t3_soreceive: OFFSET >= LEN offset %d copied_seq 0x%x seq 0x%x "
+ "pktlen %d ddp flags 0x%x", offset, toep->tp_copied_seq + copied_unacked, m->m_seq,
m->m_pkthdr.len, m->m_ddp_flags);
avail = m->m_pkthdr.len - offset;
if (len < avail) {
@@ -630,7 +665,7 @@
* first and we are not delivering urgent data inline.
*/
if (__predict_false(toep->tp_urg_data)) {
- uint32_t urg_offset = tp->rcv_up - tp->copied_seq;
+ uint32_t urg_offset = tp->rcv_up - tp->copied_seq + copied_unacked;
if (urg_offset < avail) {
if (urg_offset) {
@@ -672,19 +707,28 @@
* If MSG_TRUNC is specified the data is discarded.
* XXX need to check pr_atomic
*/
- if (__predict_true(!(flags & MSG_TRUNC)))
+ if (__predict_true(!(flags & MSG_TRUNC))) {
+ int resid = uio->uio_resid;
+ SOCKBUF_UNLOCK(&so->so_rcv);
if ((err = copy_data(m, offset, avail, uio))) {
if (err)
err = EFAULT;
- goto done;
+ goto done_unlocked;
}
-
- toep->tp_copied_seq += avail;
+ SOCKBUF_LOCK(&so->so_rcv);
+ if (!(resid > uio->uio_resid))
+ printf("copied zero bytes :-/ resid=%d uio_resid=%d copied=%d copied_unacked=%d\n",
+ resid, uio->uio_resid, copied, copied_unacked);
+ }
+
+ sbdrop_locked(&so->so_rcv, avail);
+ buffers_freed++;
copied += avail;
+ copied_unacked += avail;
len -= avail;
#ifdef URGENT_DATA_SUPPORTED
skip_copy:
- if (tp->urg_data && after(tp->copied_seq, tp->urg_seq))
+ if (tp->urg_data && after(tp->copied_seq + copied_unacked, tp->urg_seq))
tp->urg_data = 0;
#endif
/*
@@ -706,9 +750,6 @@
p->ubuf_ddp_ready = 1;
}
}
- sbfree(&so->so_rcv, m);
- m = so->so_rcv.sb_mb = m_free(m); /* XXX need to clean mbuf first */
- buffers_freed++;
if ((so->so_rcv.sb_mb == NULL) && got_psh)
goto done;
@@ -733,7 +774,7 @@
}
user_ddp_pending = 0;
}
- if (p->kbuf_posted == 0) {
+ if ((p->kbuf[0] != NULL) && (p->kbuf_posted == 0)) {
#ifdef T3_TRACE
T3_TRACE0(TIDTB(so),
"chelsio_recvmsg: about to exit, repost kbuf");
@@ -741,15 +782,19 @@
t3_post_kbuf(so, 1);
p->kbuf_posted++;
- } else if (so_should_ddp(toep, copied)) {
+ } else if (so_should_ddp(toep, copied)
+#ifdef notyet
+ && !IS_NONBLOCKING(so)
+#endif
+ ) {
printf("entering ddp\n");
t3_enter_ddp(so, TOM_TUNABLE(TOE_DEV(so),
ddp_copy_limit), 0);
p->kbuf_posted = 1;
- }
+ } else
+ printf("user_ddp_pending=%d kbuf[0]=%p kbuf_posted=%d so_should_ddp=%d\n",
+ user_ddp_pending, p->kbuf[0], p->kbuf_posted, so_should_ddp(toep, copied));
}
- if (buffers_freed)
- t3_cleanup_rbuf(tp);
#ifdef T3_TRACE
T3_TRACE5(TIDTB(so),
"chelsio_recvmsg <-: copied %d len %d buffers_freed %d "
@@ -758,9 +803,15 @@
user_ddp_pending);
#endif
SOCKBUF_UNLOCK(&so->so_rcv);
+done_unlocked:
+ if (copied) {
+ INP_LOCK(inp);
+ t3_cleanup_rbuf(tp);
+ INP_UNLOCK(inp);
+ }
+
sbunlock(&so->so_rcv);
- TRACE_EXIT;
return (err);
}
@@ -808,10 +859,6 @@
return (rv);
}
}
- if (uio->uio_resid > PAGE_SIZE)
- printf("flags=0x%x nonblocking=0x%x iovcnt=%d mp0=%p uio_resid=%d \n",
- flags, !!(so->so_state && SS_NBIO), uio->uio_iovcnt, mp0, uio->uio_resid);
-
return pru_soreceive(so, psa, uio, mp0, controlp, flagsp);
}
==== //depot/projects/toehead/sys/dev/cxgb/ulp/tom/cxgb_ddp.c#4 (text+ko) ====
@@ -568,9 +568,11 @@
if (p->kbuf[idx] == NULL)
goto err;
err = t3_alloc_ppods(d, nppods, &p->kbuf_tag[idx]);
- if (err)
+ if (err) {
+ printf("t3_alloc_ppods failed err=%d\n", err);
goto err;
-
+ }
+
p->kbuf_nppods[idx] = nppods;
p->kbuf[idx]->dgl_length = kbuf_size;
p->kbuf[idx]->dgl_offset = 0;
@@ -582,6 +584,7 @@
VM_ALLOC_ZERO);
if (p->kbuf[idx]->dgl_pages[i] == NULL) {
p->kbuf[idx]->dgl_nelem = i;
+ printf("failed to allocate kbuf pages\n");
goto err;
}
}
@@ -623,13 +626,15 @@
{
int page_off;
struct ddp_gather_list *gl = (struct ddp_gather_list *)m->m_ddp_gl;
-
+
+ TRACE_ENTER;
if (!gl->dgl_pages)
panic("pages not set\n");
offset += gl->dgl_offset + m->m_cur_offset;
page_off = offset & ~PAGE_MASK;
+ TRACE_EXIT;
return uiomove_fromphys(gl->dgl_pages, page_off, len, uio);
}
@@ -641,8 +646,10 @@
{
unsigned int i, j;
- if (__predict_false(!td->ppod_map))
+ if (__predict_false(!td->ppod_map)) {
+ printf("ppod_map not set\n");
return (EINVAL);
+ }
mtx_lock(&td->ppod_map_lock);
for (i = 0; i < td->nppods; ) {
==== //depot/projects/toehead/sys/dev/cxgb/ulp/tom/cxgb_defs.h#4 (text+ko) ====
@@ -40,8 +40,8 @@
#define toeptoso(toep) ((toep)->tp_tp->t_inpcb->inp_socket)
#define sototoep(so) (sototcpcb((so))->t_toe)
-#define TRACE_ENTER printf("%s:%s entered", __FUNCTION__, __FILE__)
-#define TRACE_EXIT printf("%s:%s:%d exited", __FUNCTION__, __FILE__, __LINE__)
+#define TRACE_ENTER printf("%s:%s entered\n", __FUNCTION__, __FILE__)
+#define TRACE_EXIT printf("%s:%s:%d exited\n", __FUNCTION__, __FILE__, __LINE__)
==== //depot/projects/toehead/sys/dev/cxgb/ulp/tom/cxgb_tom.c#3 (text+ko) ====
@@ -376,14 +376,12 @@
t->ddp_ulimit = ddp.ulimit;
t->pdev = ddp.pdev;
t->rx_page_size = rx_page_info.page_size;
-#ifdef notyet
/* OK if this fails, we just can't do DDP */
t->nppods = (ddp.ulimit + 1 - ddp.llimit) / PPOD_SIZE;
- t->ppod_map = t3_alloc_mem(t->nppods);
-#endif
+ t->ppod_map = malloc(t->nppods, M_DEVBUF, M_WAITOK);
+ mtx_init(&t->ppod_map_lock, "ppod map", NULL, MTX_DEF);
#if 0
- spin_lock_init(&t->ppod_map_lock);
tom_proc_init(dev);
#ifdef CONFIG_SYSCTL
t->sysctl = t3_sysctl_register(dev, &t->conf);
More information about the p4-projects
mailing list