PERFORCE change 134944 for review
Kip Macy
kmacy at FreeBSD.org
Wed Feb 6 16:52:34 PST 2008
http://perforce.freebsd.org/chv.cgi?CH=134944
Change 134944 by kmacy at kmacy:entropy:iwarp on 2008/02/07 00:52:21
IFtoehead 134879
fix various issues tied to ddp setup
fix dependencies for cxgb module unload
Affected files ...
.. //depot/projects/iwarp/sys/dev/cxgb/cxgb_offload.c#14 integrate
.. //depot/projects/iwarp/sys/dev/cxgb/cxgb_sge.c#11 integrate
.. //depot/projects/iwarp/sys/dev/cxgb/sys/mvec.h#6 integrate
.. //depot/projects/iwarp/sys/dev/cxgb/ulp/tom/cxgb_cpl_io.c#8 integrate
.. //depot/projects/iwarp/sys/dev/cxgb/ulp/tom/cxgb_cpl_socket.c#7 integrate
.. //depot/projects/iwarp/sys/dev/cxgb/ulp/tom/cxgb_ddp.c#2 integrate
.. //depot/projects/iwarp/sys/dev/cxgb/ulp/tom/cxgb_defs.h#7 integrate
.. //depot/projects/iwarp/sys/dev/cxgb/ulp/tom/cxgb_t3_ddp.h#5 integrate
.. //depot/projects/iwarp/sys/dev/cxgb/ulp/tom/cxgb_tom.c#5 integrate
.. //depot/projects/iwarp/sys/dev/cxgb/ulp/tom/cxgb_vm.c#2 integrate
.. //depot/projects/iwarp/usr.sbin/cxgbtool/cxgbtool.c#5 integrate
Differences ...
==== //depot/projects/iwarp/sys/dev/cxgb/cxgb_offload.c#14 (text+ko) ====
@@ -448,7 +448,7 @@
case GET_RX_PAGE_INFO:
rx_page_info = data;
rx_page_info->page_size = tp->rx_pg_size;
- rx_page_info->num = tp->rx_num_pgs;
+ rx_page_info->num = tp->rx_num_pgs;
break;
case ULP_ISCSI_GET_PARAMS:
case ULP_ISCSI_SET_PARAMS:
==== //depot/projects/iwarp/sys/dev/cxgb/cxgb_sge.c#11 (text+ko) ====
@@ -73,11 +73,16 @@
#endif
int txq_fills = 0;
-static int recycle_enable = 1;
+/*
+ * XXX don't re-enable this until TOE stops assuming
+ * we have an m_ext
+ */
+static int recycle_enable = 0;
extern int cxgb_txq_buf_ring_size;
int cxgb_cached_allocations;
int cxgb_cached;
-int cxgb_ext_freed;
+int cxgb_ext_freed = 0;
+int cxgb_ext_inited = 0;
extern int cxgb_use_16k_clusters;
extern int cxgb_pcpu_cache_enable;
@@ -775,14 +780,6 @@
void
t3_sge_deinit_sw(adapter_t *sc)
{
- int i;
-
- callout_drain(&sc->sge_timer_ch);
- if (sc->tq)
- taskqueue_drain(sc->tq, &sc->slow_intr_task);
- for (i = 0; i < sc->params.nports; i++)
- if (sc->port[i].tq != NULL)
- taskqueue_drain(sc->port[i].tq, &sc->port[i].timer_reclaim_task);
mi_deinit();
}
@@ -2441,7 +2438,8 @@
bzero(cl, header_size);
m = (struct mbuf *)cl;
-
+
+ cxgb_ext_inited++;
SLIST_INIT(&m->m_pkthdr.tags);
m->m_type = MT_DATA;
m->m_flags = flags | M_NOFREE | M_EXT;
@@ -3003,12 +3001,8 @@
return (err);
}
-
-/*
- * broken by recent mbuf changes
- */
static int
-t3_dump_txq(SYSCTL_HANDLER_ARGS)
+t3_dump_txq_eth(SYSCTL_HANDLER_ARGS)
{
struct sge_txq *txq;
struct sge_qset *qs;
@@ -3037,7 +3031,7 @@
txq->txq_dump_start = 0;
return (EINVAL);
}
- err = t3_sge_read_ecntxt(qs->port->adapter, txq->cntxt_id, data);
+ err = t3_sge_read_ecntxt(qs->port->adapter, qs->rspq.cntxt_id, data);
if (err)
return (err);
@@ -3081,7 +3075,68 @@
return (err);
}
+static int
+t3_dump_txq_ctrl(SYSCTL_HANDLER_ARGS)
+{
+ struct sge_txq *txq;
+ struct sge_qset *qs;
+ int i, j, err, dump_end;
+ static int multiplier = 1;
+ struct sbuf *sb;
+ struct tx_desc *txd;
+ uint32_t *WR, wr_hi, wr_lo, gen;
+
+ txq = arg1;
+ qs = txq_to_qset(txq, TXQ_CTRL);
+ if (txq->txq_dump_count == 0) {
+ return (0);
+ }
+ if (txq->txq_dump_count > 256) {
+ log(LOG_WARNING,
+ "dump count is too large %d\n", txq->txq_dump_count);
+ txq->txq_dump_count = 1;
+ return (EINVAL);
+ }
+ if (txq->txq_dump_start > 255) {
+ log(LOG_WARNING,
+ "dump start of %d is greater than queue size\n",
+ txq->txq_dump_start);
+ txq->txq_dump_start = 0;
+ return (EINVAL);
+ }
+retry_sbufops:
+ sb = sbuf_new(NULL, NULL, QDUMP_SBUF_SIZE*multiplier, SBUF_FIXEDLEN);
+ sbuf_printf(sb, " qid=%d start=%d -> end=%d\n", qs->idx,
+ txq->txq_dump_start,
+ (txq->txq_dump_start + txq->txq_dump_count) & 255);
+
+ dump_end = txq->txq_dump_start + txq->txq_dump_count;
+ for (i = txq->txq_dump_start; i < dump_end; i++) {
+ txd = &txq->desc[i & (255)];
+ WR = (uint32_t *)txd->flit;
+ wr_hi = ntohl(WR[0]);
+ wr_lo = ntohl(WR[1]);
+ gen = G_WR_GEN(wr_lo);
+
+ sbuf_printf(sb," wr_hi %08x wr_lo %08x gen %d\n",
+ wr_hi, wr_lo, gen);
+ for (j = 2; j < 30; j += 4)
+ sbuf_printf(sb, "\t%08x %08x %08x %08x \n",
+ WR[j], WR[j + 1], WR[j + 2], WR[j + 3]);
+
+ }
+ if (sbuf_overflowed(sb)) {
+ sbuf_delete(sb);
+ multiplier++;
+ goto retry_sbufops;
+ }
+ sbuf_finish(sb);
+ err = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1);
+ sbuf_delete(sb);
+ return (err);
+}
+
static int
t3_lro_enable(SYSCTL_HANDLER_ARGS)
{
@@ -3206,6 +3261,10 @@
CTLFLAG_RD, &cxgb_ext_freed,
0, "#times a cluster was freed through ext_free");
SYSCTL_ADD_INT(ctx, children, OID_AUTO,
+ "ext_inited",
+ CTLFLAG_RD, &cxgb_ext_inited,
+ 0, "#times a cluster was initialized for ext_free");
+ SYSCTL_ADD_INT(ctx, children, OID_AUTO,
"mbufs_outstanding",
CTLFLAG_RD, &cxgb_mbufs_outstanding,
0, "#mbufs in flight in the driver");
@@ -3255,8 +3314,8 @@
for (j = 0; j < pi->nqsets; j++) {
struct sge_qset *qs = &sc->sge.qs[pi->first_qset + j];
- struct sysctl_oid *qspoid, *rspqpoid, *txqpoid;
- struct sysctl_oid_list *qspoidlist, *rspqpoidlist, *txqpoidlist;
+ struct sysctl_oid *qspoid, *rspqpoid, *txqpoid, *ctrlqpoid;
+ struct sysctl_oid_list *qspoidlist, *rspqpoidlist, *txqpoidlist, *ctrlqpoidlist;
struct sge_txq *txq = &qs->txq[TXQ_ETH];
snprintf(qs->namebuf, QS_NAME_LEN, "qs%d", j);
@@ -3273,8 +3332,10 @@
txq_names[0], CTLFLAG_RD, NULL, "txq statistics");
txqpoidlist = SYSCTL_CHILDREN(txqpoid);
-
-
+ ctrlqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO,
+ txq_names[2], CTLFLAG_RD, NULL, "ctrlq statistics");
+ ctrlqpoidlist = SYSCTL_CHILDREN(ctrlqpoid);
+
SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "size",
CTLFLAG_RD, &qs->rspq.size,
0, "#entries in response queue");
@@ -3297,8 +3358,7 @@
CTLTYPE_STRING | CTLFLAG_RD, &qs->rspq,
0, t3_dump_rspq, "A", "dump of the response queue");
-
-
+
SYSCTL_ADD_INT(ctx, txqpoidlist, OID_AUTO, "dropped",
CTLFLAG_RD, &qs->txq[TXQ_ETH].txq_drops,
0, "#tunneled packets dropped");
@@ -3355,7 +3415,22 @@
0, "txq #entries to dump");
SYSCTL_ADD_PROC(ctx, txqpoidlist, OID_AUTO, "qdump",
CTLTYPE_STRING | CTLFLAG_RD, &qs->txq[TXQ_ETH],
- 0, t3_dump_txq, "A", "dump of the transmit queue");
+ 0, t3_dump_txq_eth, "A", "dump of the transmit queue");
+
+ SYSCTL_ADD_UINT(ctx, ctrlqpoidlist, OID_AUTO, "dump_start",
+ CTLFLAG_RW, &qs->txq[TXQ_CTRL].txq_dump_start,
+ 0, "ctrlq start idx for dump");
+ SYSCTL_ADD_UINT(ctx, ctrlqpoidlist, OID_AUTO, "dump_count",
+ CTLFLAG_RW, &qs->txq[TXQ_CTRL].txq_dump_count,
+ 0, "ctrl #entries to dump");
+ SYSCTL_ADD_PROC(ctx, ctrlqpoidlist, OID_AUTO, "qdump",
+ CTLTYPE_STRING | CTLFLAG_RD, &qs->txq[TXQ_CTRL],
+ 0, t3_dump_txq_ctrl, "A", "dump of the transmit queue");
+
+
+
+
+
}
}
}
==== //depot/projects/iwarp/sys/dev/cxgb/sys/mvec.h#6 (text+ko) ====
@@ -66,7 +66,7 @@
#define m_seq m_pkthdr.csum_data /* stored sequence */
#define m_ddp_gl m_ext.ext_buf /* ddp list */
#define m_ddp_flags m_pkthdr.csum_flags /* ddp flags */
-#define m_ulp_mode m_ext.ext_type /* upper level protocol */
+#define m_ulp_mode m_pkthdr.tso_segsz /* upper level protocol */
extern uma_zone_t zone_miovec;
==== //depot/projects/iwarp/sys/dev/cxgb/ulp/tom/cxgb_cpl_io.c#8 (text+ko) ====
@@ -579,7 +579,7 @@
* to the HW for the amount of data processed.
*/
void
-t3_cleanup_rbuf(struct tcpcb *tp)
+t3_cleanup_rbuf(struct tcpcb *tp, int copied)
{
struct toepcb *toep = tp->t_toe;
struct socket *so;
@@ -587,17 +587,28 @@
int dack_mode, must_send, read;
u32 thres, credits, dack = 0;
+ so = tp->t_inpcb->inp_socket;
if (!((tp->t_state == TCPS_ESTABLISHED) || (tp->t_state == TCPS_FIN_WAIT_1) ||
- (tp->t_state == TCPS_FIN_WAIT_2)))
+ (tp->t_state == TCPS_FIN_WAIT_2))) {
+ if (copied) {
+ SOCKBUF_LOCK(&so->so_rcv);
+ toep->tp_copied_seq += copied;
+ SOCKBUF_UNLOCK(&so->so_rcv);
+ }
+
return;
- INP_LOCK_ASSERT(tp->t_inpcb);
+ }
- so = tp->t_inpcb->inp_socket;
+ INP_LOCK_ASSERT(tp->t_inpcb);
SOCKBUF_LOCK(&so->so_rcv);
- read = toep->tp_enqueued_bytes - so->so_rcv.sb_cc;
- toep->tp_copied_seq += read;
- toep->tp_enqueued_bytes -= read;
+ if (copied)
+ toep->tp_copied_seq += copied;
+ else {
+ read = toep->tp_enqueued_bytes - so->so_rcv.sb_cc;
+ toep->tp_copied_seq += read;
+ }
credits = toep->tp_copied_seq - toep->tp_rcv_wup;
+ toep->tp_enqueued_bytes = so->so_rcv.sb_cc;
SOCKBUF_UNLOCK(&so->so_rcv);
if (credits > so->so_rcv.sb_mbmax)
@@ -687,7 +698,7 @@
cxgb_toe_rcvd(struct tcpcb *tp)
{
INP_LOCK_ASSERT(tp->t_inpcb);
- t3_cleanup_rbuf(tp);
+ t3_cleanup_rbuf(tp, 0);
return (0);
}
@@ -1054,10 +1065,9 @@
/*
* Determine the receive window size for a socket.
*/
-static unsigned int
-select_rcv_wnd(struct socket *so)
+static unsigned long
+select_rcv_wnd(struct toedev *dev, struct socket *so)
{
- struct toedev *dev = TOE_DEV(so);
struct tom_data *d = TOM_DATA(dev);
unsigned int wnd;
unsigned int max_rcv_wnd;
@@ -1065,7 +1075,9 @@
if (tcp_do_autorcvbuf)
wnd = tcp_autorcvbuf_max;
else
- wnd = sbspace(&so->so_rcv);
+ wnd = so->so_rcv.sb_hiwat;
+
+
/* XXX
* For receive coalescing to work effectively we need a receive window
@@ -1079,7 +1091,7 @@
(uint32_t)d->rx_page_size * 23 :
MAX_RCV_WND);
- return (min(wnd, max_rcv_wnd));
+ return min(wnd, max_rcv_wnd);
}
/*
@@ -1118,8 +1130,8 @@
* XXX broken
*
*/
- tp->rcv_wnd = select_rcv_wnd(so);
-
+ tp->rcv_wnd = select_rcv_wnd(dev, so);
+
toep->tp_ulp_mode = TOM_TUNABLE(dev, ddp) && !(so->so_options & SO_NO_DDP) &&
tp->rcv_wnd >= MIN_DDP_RCV_WIN ? ULP_MODE_TCPDDP : 0;
toep->tp_qset_idx = 0;
@@ -1608,7 +1620,6 @@
uint64_t t;
__be64 *tcb;
- TRACE_ENTER;
/* Note that we only accout for CPL_GET_TCB issued by the DDP code. We
* really need a cookie in order to dispatch the RPLs.
*/
@@ -1625,7 +1636,6 @@
m_freem(m);
if (__predict_true((so->so_state & SS_NOFDREF) == 0))
sorwakeup(so);
- TRACE_EXIT;
return;
}
@@ -1684,7 +1694,6 @@
if (__predict_false(so_no_receive(so) && m->m_pkthdr.len)) {
handle_excess_rx(toep, m);
- TRACE_EXIT;
return;
}
@@ -1704,7 +1713,7 @@
}
#endif
m->m_ddp_flags = DDP_BF_PSH | DDP_BF_NOCOPY | 1;
- bsp->flags &= ~DDP_BF_NOCOPY;
+ bsp->flags &= ~(DDP_BF_NOCOPY|DDP_BF_NODATA);
q->cur_buf ^= 1;
} else if (bsp->flags & DDP_BF_NOFLIP) {
@@ -1720,11 +1729,11 @@
* and we need to decrement the posted count.
*/
if (m->m_pkthdr.len == 0) {
- if (ddp_offset == 0)
+ if (ddp_offset == 0) {
q->kbuf_posted--;
- panic("length not set");
+ bsp->flags |= DDP_BF_NODATA;
+ }
m_free(m);
- TRACE_EXIT;
return;
}
} else {
@@ -1732,12 +1741,12 @@
* but it got here way late and nobody cares anymore.
*/
m_free(m);
- TRACE_EXIT;
return;
}
tp = toep->tp_tp;
m->m_ddp_gl = (unsigned char *)bsp->gl;
+ m->m_flags |= M_DDP;
m->m_seq = tp->rcv_nxt;
tp->rcv_nxt += m->m_pkthdr.len;
tp->t_rcvtime = ticks;
@@ -1750,10 +1759,12 @@
"tcb_rpl_as_ddp_complete: seq 0x%x hwbuf %u lskb->len %u",
m->m_seq, q->cur_buf, m->m_pkthdr.len);
#endif
- sbappend(&so->so_rcv, m);
+ SOCKBUF_LOCK(&so->so_rcv);
+ sbappendstream_locked(&so->so_rcv, m);
if (__predict_true((so->so_state & SS_NOFDREF) == 0))
- sorwakeup(so);
- TRACE_EXIT;
+ sorwakeup_locked(so);
+ else
+ SOCKBUF_UNLOCK(&so->so_rcv);
}
/*
@@ -1766,9 +1777,10 @@
struct toepcb *toep = (struct toepcb *)ctx;
/* OK if socket doesn't exist */
- if (toep == NULL)
+ if (toep == NULL) {
+ printf("null toep in do_get_tcb_rpl\n");
return (CPL_RET_BUF_DONE);
-
+ }
tcb_rpl_as_ddp_complete(toep, m);
return (0);
@@ -1778,7 +1790,6 @@
handle_ddp_data(struct toepcb *toep, struct mbuf *m)
{
struct tcpcb *tp = toep->tp_tp;
- struct socket *so;
struct ddp_state *q;
struct ddp_buf_state *bsp;
struct cpl_rx_data *hdr = cplhdr(m);
@@ -1790,7 +1801,10 @@
TRACE_ENTER;
q = &toep->tp_ddp_state;
bsp = &q->buf_state[q->cur_buf];
- m->m_pkthdr.len = rcv_nxt - tp->rcv_nxt;
+ m->m_len = m->m_pkthdr.len = rcv_nxt - tp->rcv_nxt;
+
+ printf("rcv_nxt=0x%x tp->rcv_next=0x%x len=%d\n",
+ rcv_nxt, tp->rcv_nxt, m->m_pkthdr.len);
#ifdef T3_TRACE
if ((int)m->m_pkthdr.len < 0) {
@@ -1799,20 +1813,19 @@
#endif
m->m_ddp_gl = (unsigned char *)bsp->gl;
+ m->m_flags |= M_DDP;
m->m_cur_offset = bsp->cur_offset;
m->m_ddp_flags = DDP_BF_PSH | (bsp->flags & DDP_BF_NOCOPY) | 1;
if (bsp->flags & DDP_BF_NOCOPY)
bsp->flags &= ~DDP_BF_NOCOPY;
+ printf("ddp flags=0x%x\n", m->m_ddp_flags);
+
m->m_seq = tp->rcv_nxt;
tp->rcv_nxt = rcv_nxt;
bsp->cur_offset += m->m_pkthdr.len;
if (!(bsp->flags & DDP_BF_NOFLIP))
q->cur_buf ^= 1;
- tp->t_rcvtime = ticks;
-
- so = toeptoso(toep);
- sbappend(&so->so_rcv, m);
/*
* For now, don't re-enable DDP after a connection fell out of DDP
* mode.
@@ -1837,14 +1850,14 @@
if (__predict_false(so_no_receive(so))) {
handle_excess_rx(toep, m);
INP_UNLOCK(tp->t_inpcb);
+ TRACE_EXIT;
return;
}
if (toep->tp_ulp_mode == ULP_MODE_TCPDDP)
handle_ddp_data(toep, m);
-
+
m->m_seq = ntohl(hdr->seq);
- m->m_ddp_flags = 0;
m->m_ulp_mode = 0; /* for iSCSI */
#if VALIDATE_SEQ
@@ -1889,11 +1902,12 @@
"new_rx_data: seq 0x%x len %u",
m->m_seq, m->m_pkthdr.len);
#endif
+ INP_UNLOCK(tp->t_inpcb);
SOCKBUF_LOCK(&so->so_rcv);
if (sb_notify(&so->so_rcv))
DPRINTF("rx_data so=%p flags=0x%x len=%d\n", so, so->so_rcv.sb_flags, m->m_pkthdr.len);
- sbappend_locked(&so->so_rcv, m);
+ sbappendstream_locked(&so->so_rcv, m);
#ifdef notyet
/*
@@ -1906,7 +1920,7 @@
so, so->so_rcv.sb_cc, so->so_rcv.sb_mbmax));
#endif
- INP_UNLOCK(tp->t_inpcb);
+
DPRINTF("sb_cc=%d sb_mbcnt=%d\n",
so->so_rcv.sb_cc, so->so_rcv.sb_mbcnt);
@@ -1940,13 +1954,17 @@
struct cpl_rx_data_ddp *hdr;
unsigned int ddp_len, rcv_nxt, ddp_report, end_offset, buf_idx;
struct socket *so = toeptoso(toep);
+ int nomoredata = 0;
+
+ if (__predict_false(so_no_receive(so))) {
+ struct inpcb *inp = sotoinpcb(so);
- if (__predict_false(so_no_receive(so))) {
+ INP_LOCK(inp);
handle_excess_rx(toep, m);
+ INP_UNLOCK(inp);
return;
}
- TRACE_ENTER;
tp = sototcpcb(so);
q = &toep->tp_ddp_state;
hdr = cplhdr(m);
@@ -1971,7 +1989,7 @@
/*
* Overload to store old RCV_NXT
*/
- m->m_pkthdr.csum_data = tp->rcv_nxt;
+ m->m_seq = tp->rcv_nxt;
tp->rcv_nxt = rcv_nxt;
/*
@@ -1979,7 +1997,14 @@
* m->m_len here, we need to be very careful that nothing from now on
* interprets ->len of this packet the usual way.
*/
- m->m_len = tp->rcv_nxt - m->m_pkthdr.csum_data;
+ m->m_len = m->m_pkthdr.len = tp->rcv_nxt - m->m_seq;
+ /*
+ * Length is only meaningful for kbuf
+ */
+ if (!(bsp->flags & DDP_BF_NOCOPY))
+ KASSERT(m->m_len <= bsp->gl->dgl_length,
+ ("length received exceeds ddp pages: len=%d dgl_length=%d",
+ m->m_len, bsp->gl->dgl_length));
/*
* Figure out where the new data was placed in the buffer and store it
@@ -1989,8 +2014,9 @@
end_offset = G_DDP_OFFSET(ddp_report) + ddp_len;
m->m_cur_offset = end_offset - m->m_pkthdr.len;
m->m_ddp_gl = (unsigned char *)bsp->gl;
+ m->m_flags |= M_DDP;
bsp->cur_offset = end_offset;
-
+ toep->tp_enqueued_bytes += m->m_pkthdr.len;
/*
* Bit 0 of flags stores whether the DDP buffer is completed.
* Note that other parts of the code depend on this being in bit 0.
@@ -1998,26 +2024,30 @@
if ((bsp->flags & DDP_BF_NOINVAL) && end_offset != bsp->gl->dgl_length) {
panic("spurious ddp completion");
} else {
- m->m_pkthdr.csum_flags = !!(ddp_report & F_DDP_BUF_COMPLETE);
- if (m->m_pkthdr.csum_flags && !(bsp->flags & DDP_BF_NOFLIP))
+ m->m_ddp_flags = !!(ddp_report & F_DDP_BUF_COMPLETE);
+ if (m->m_ddp_flags && !(bsp->flags & DDP_BF_NOFLIP))
q->cur_buf ^= 1; /* flip buffers */
}
if (bsp->flags & DDP_BF_NOCOPY) {
- m->m_pkthdr.csum_flags |= (bsp->flags & DDP_BF_NOCOPY);
+ m->m_ddp_flags |= (bsp->flags & DDP_BF_NOCOPY);
bsp->flags &= ~DDP_BF_NOCOPY;
}
if (ddp_report & F_DDP_PSH)
- m->m_pkthdr.csum_flags |= DDP_BF_PSH;
-
+ m->m_ddp_flags |= DDP_BF_PSH;
+ if (nomoredata)
+ m->m_ddp_flags |= DDP_BF_NODATA;
+
tp->t_rcvtime = ticks;
+
+ SOCKBUF_LOCK(&so->so_rcv);
sbappendstream_locked(&so->so_rcv, m);
if ((so->so_state & SS_NOFDREF) == 0)
sorwakeup_locked(so);
-
- TRACE_EXIT;
+ else
+ SOCKBUF_UNLOCK(&so->so_rcv);
}
#define DDP_ERR (F_DDP_PPOD_MISMATCH | F_DDP_LLIMIT_ERR | F_DDP_ULIMIT_ERR |\
@@ -2057,9 +2087,14 @@
struct ddp_buf_state *bsp;
struct cpl_rx_ddp_complete *hdr;
unsigned int ddp_report, buf_idx, when;
+ int nomoredata = 0;
if (__predict_false(so_no_receive(so))) {
+ struct inpcb *inp = sotoinpcb(so);
+
+ INP_LOCK(inp);
handle_excess_rx(toep, m);
+ INP_UNLOCK(inp);
return;
}
TRACE_ENTER;
@@ -2070,7 +2105,7 @@
bsp = &q->buf_state[buf_idx];
when = bsp->cur_offset;
- m->m_len = G_DDP_OFFSET(ddp_report) - when;
+ m->m_len = m->m_pkthdr.len = G_DDP_OFFSET(ddp_report) - when;
#ifdef T3_TRACE
T3_TRACE5(TIDTB(sk),
@@ -2082,9 +2117,12 @@
bsp->cur_offset += m->m_len;
- if (!(bsp->flags & DDP_BF_NOFLIP))
+ if (!(bsp->flags & DDP_BF_NOFLIP)) {
q->cur_buf ^= 1; /* flip buffers */
-
+ if (G_DDP_OFFSET(ddp_report) < q->kbuf[0]->dgl_length)
+ nomoredata=1;
+ }
+
#ifdef T3_TRACE
T3_TRACE4(TIDTB(sk),
"process_ddp_complete: tp->rcv_nxt 0x%x cur_offset %u "
@@ -2093,18 +2131,23 @@
G_DDP_OFFSET(ddp_report));
#endif
m->m_ddp_gl = (unsigned char *)bsp->gl;
- m->m_pkthdr.csum_flags = (bsp->flags & DDP_BF_NOCOPY) | 1;
+ m->m_flags |= M_DDP;
+ m->m_ddp_flags = (bsp->flags & DDP_BF_NOCOPY) | 1;
if (bsp->flags & DDP_BF_NOCOPY)
bsp->flags &= ~DDP_BF_NOCOPY;
+ if (nomoredata)
+ m->m_ddp_flags |= DDP_BF_NODATA;
+
m->m_pkthdr.csum_data = tp->rcv_nxt;
tp->rcv_nxt += m->m_len;
tp->t_rcvtime = ticks;
+ SOCKBUF_LOCK(&so->so_rcv);
sbappendstream_locked(&so->so_rcv, m);
if ((so->so_state & SS_NOFDREF) == 0)
sorwakeup_locked(so);
-
+ SOCKBUF_UNLOCK(&so->so_rcv);
TRACE_EXIT;
}
@@ -2184,8 +2227,9 @@
q = &toep->tp_ddp_state;
bsp = &q->buf_state[q->cur_buf];
- m->m_pkthdr.len = rcv_nxt - tp->rcv_nxt;
+ m->m_len = m->m_pkthdr.len = rcv_nxt - tp->rcv_nxt;
m->m_ddp_gl = (unsigned char *)bsp->gl;
+ m->m_flags |= M_DDP;
m->m_cur_offset = bsp->cur_offset;
m->m_ddp_flags =
DDP_BF_PSH | (bsp->flags & DDP_BF_NOCOPY) | 1;
@@ -2195,7 +2239,7 @@
if (!(bsp->flags & DDP_BF_NOFLIP))
q->cur_buf ^= 1;
tp->t_rcvtime = ticks;
- sbappend(&so->so_rcv, m);
+ sbappendstream(&so->so_rcv, m);
if (__predict_true((so->so_state & SS_NOFDREF) == 0))
sorwakeup(so);
return (1);
@@ -2918,7 +2962,8 @@
th.th_seq = req->rcv_isn;
th.th_flags = TH_SYN;
- toep->tp_iss = toep->tp_delack_seq = toep->tp_rcv_wup = toep->tp_copied_seq = rcv_isn;
+ toep->tp_iss = toep->tp_delack_seq = toep->tp_rcv_wup = toep->tp_copied_seq = rcv_isn + 1;
+
inc.inc_isipv6 = 0;
inc.inc_len = 0;
@@ -3047,14 +3092,15 @@
newtoep->tp_flags = TP_SYN_RCVD;
newtoep->tp_tid = tid;
newtoep->tp_toedev = tdev;
+ tp->rcv_wnd = select_rcv_wnd(tdev, so);
- printf("inserting tid=%d\n", tid);
+ printf("inserting tid=%d rcv_wnd=%ld\n", tid, tp->rcv_wnd);
cxgb_insert_tid(cdev, d->client, newtoep, tid);
SOCK_LOCK(so);
LIST_INSERT_HEAD(&lctx->synq_head, newtoep, synq_entry);
SOCK_UNLOCK(so);
-
+#ifdef notyet
if (lctx->ulp_mode) {
ddp_mbuf = m_gethdr(M_NOWAIT, MT_DATA);
@@ -3063,7 +3109,22 @@
else
newtoep->tp_ulp_mode = lctx->ulp_mode;
}
+#else
+ newtoep->tp_ulp_mode = TOM_TUNABLE(tdev, ddp) && /* !sock_flag(sk, NO_DDP) && */
+ tp->rcv_wnd >= MIN_DDP_RCV_WIN ? ULP_MODE_TCPDDP : 0;
+
+ if (newtoep->tp_ulp_mode) {
+ ddp_mbuf = m_gethdr(M_NOWAIT, MT_DATA);
+
+ if (ddp_mbuf == NULL)
+ newtoep->tp_ulp_mode = 0;
+ }
+
+ printf("ddp=%d rcv_wnd=%ld min_win=%d ulp_mode=%d\n",
+ TOM_TUNABLE(tdev, ddp), tp->rcv_wnd, MIN_DDP_RCV_WIN, newtoep->tp_ulp_mode);
+#endif
+
set_arp_failure_handler(reply_mbuf, pass_accept_rpl_arp_failure);
DPRINTF("adding request to syn cache\n");
@@ -3346,7 +3407,8 @@
toep->tp_flags = 0;
tp->t_toe = toep;
reset_wr_list(toep);
- tp->rcv_wnd = select_rcv_wnd(so);
+ tp->rcv_wnd = select_rcv_wnd(tdev, so);
+ tp->rcv_nxt = toep->tp_copied_seq;
install_offload_ops(so);
toep->tp_wr_max = toep->tp_wr_avail = TOM_TUNABLE(tdev, max_wrs);
@@ -3728,7 +3790,6 @@
const struct tom_data *td = TOM_DATA(TOE_DEV(so));
unsigned int ppod_addr = tag * PPOD_SIZE + td->ddp_llimit;
- TRACE_ENTER;
for (i = 0; i < nppods; ++i) {
m = m_gethdr_nofail(sizeof(*req) + PPOD_SIZE);
m_set_priority(m, mkprio(CPL_PRIORITY_CONTROL, toep));
@@ -3756,8 +3817,6 @@
send_or_defer(toep, m, 0);
ppod_addr += PPOD_SIZE;
}
-
- TRACE_EXIT;
return (0);
}
@@ -3905,7 +3964,6 @@
struct cpl_set_tcb_field *req;
struct ddp_state *p = &toep->tp_ddp_state;
- TRACE_ENTER;
wrlen = sizeof(*wr) + 3 * sizeof(*req) + sizeof(*getreq);
m = m_gethdr_nofail(wrlen);
m_set_priority(m, mkprio(CPL_PRIORITY_CONTROL, toep));
@@ -3960,7 +4018,6 @@
bufidx, tag0, tag1, len);
#endif
cxgb_ofld_send(TOEP_T3C_DEV(toep), m);
- TRACE_EXIT;
}
/*
@@ -3978,7 +4035,6 @@
struct work_request_hdr *wr;
struct cpl_set_tcb_field *req;
- TRACE_ENTER;
wrlen = sizeof(*wr) + sizeof(*req) + (len0 ? sizeof(*req) : 0) +
(len1 ? sizeof(*req) : 0) +
(modulate ? sizeof(struct cpl_rx_data_ack) : 0);
@@ -4024,7 +4080,6 @@
#endif
cxgb_ofld_send(TOEP_T3C_DEV(toep), m);
- TRACE_EXIT;
}
void
==== //depot/projects/iwarp/sys/dev/cxgb/ulp/tom/cxgb_cpl_socket.c#7 (text+ko) ====
@@ -41,6 +41,7 @@
#include <sys/condvar.h>
#include <sys/mutex.h>
#include <sys/proc.h>
+#include <sys/smp.h>
#include <sys/socket.h>
#include <sys/syslog.h>
#include <sys/socketvar.h>
@@ -48,6 +49,7 @@
#include <sys/file.h>
#include <machine/bus.h>
+#include <machine/cpu.h>
#include <net/if.h>
#include <net/route.h>
@@ -257,6 +259,11 @@
static int
so_should_ddp(const struct toepcb *toep, int last_recv_len)
{
+
+ DPRINTF("ulp_mode=%d last_recv_len=%d ddp_thresh=%d rcv_wnd=%ld ddp_copy_limit=%d\n",
+ toep->tp_ulp_mode, last_recv_len, TOM_TUNABLE(toep->tp_toedev, ddp_thres),
+ toep->tp_tp->rcv_wnd, (TOM_TUNABLE(toep->tp_toedev, ddp_copy_limit) + DDP_RSVD_WIN));
+
return toep->tp_ulp_mode == ULP_MODE_TCPDDP && (toep->tp_ddp_state.kbuf[0] == NULL) &&
last_recv_len > TOM_TUNABLE(toep->tp_toedev, ddp_thres) &&
toep->tp_tp->rcv_wnd >
@@ -278,28 +285,37 @@
static int
m_uiomove(const struct mbuf *m, int offset, int len, struct uio *uio)
{
- int curlen, err = 0;
+ int curlen, startlen, resid_init, err = 0;
caddr_t buf;
-
+
+ DPRINTF("m_uiomove(m=%p, offset=%d, len=%d, ...)\n",
+ m, offset, len);
+
+ startlen = len;
+ resid_init = uio->uio_resid;
while (m && len) {
buf = mtod(m, caddr_t);
curlen = m->m_len;
- if (offset < curlen) {
+ if (offset && (offset < curlen)) {
curlen -= offset;
buf += offset;
offset = 0;
- } else {
+ } else if (offset) {
offset -= curlen;
m = m->m_next;
continue;
}
+ err = uiomove(buf, min(len, curlen), uio);
+ if (err) {
+ printf("uiomove returned %d\n", err);
+ return (err);
+ }
- err = uiomove_frombuf(buf, min(len, curlen), uio);
- if (err)
- return (err);
- len -= min(len, m->m_len);
+ len -= min(len, curlen);
m = m->m_next;
}
+ DPRINTF("copied %d bytes - resid_init=%d uio_resid=%d\n",
+ startlen - len, resid_init, uio->uio_resid);
return (err);
}
@@ -312,16 +328,20 @@
copy_data(const struct mbuf *m, int offset, int len, struct uio *uio)
{
struct iovec *to = uio->uio_iov;
+ int err;
+
- if (__predict_true(!is_ddp(m))) /* RX_DATA */
+ if (__predict_true(!is_ddp(m))) { /* RX_DATA */
return m_uiomove(m, offset, len, uio);
- if (__predict_true(m->m_ddp_flags & DDP_BF_NOCOPY)) { /* user DDP */
+ } if (__predict_true(m->m_ddp_flags & DDP_BF_NOCOPY)) { /* user DDP */
to->iov_len -= len;
to->iov_base = ((caddr_t)to->iov_base) + len;
uio->uio_iov = to;
+ uio->uio_resid -= len;
return (0);
}
- return t3_ddp_copy(m, offset, uio, len); /* kernel DDP */
+ err = t3_ddp_copy(m, offset, uio, len); /* kernel DDP */
+ return (err);
}
static void
@@ -509,7 +529,45 @@
return pru_sosend(so, addr, uio, top, control, flags, td);
}
+/*
+ * Following replacement or removal of the first mbuf on the first mbuf chain
+ * of a socket buffer, push necessary state changes back into the socket
+ * buffer so that other consumers see the values consistently. 'nextrecord'
+ * is the callers locally stored value of the original value of
+ * sb->sb_mb->m_nextpkt which must be restored when the lead mbuf changes.
+ * NOTE: 'nextrecord' may be NULL.
+ */
+#if 1
+static __inline void
+sockbuf_pushsync(struct sockbuf *sb, struct mbuf *nextrecord)
+{
+
+ SOCKBUF_LOCK_ASSERT(sb);
+ /*
+ * First, update for the new value of nextrecord. If necessary, make
+ * it the first record.
+ */
+ if (sb->sb_mb != NULL)
+ sb->sb_mb->m_nextpkt = nextrecord;
+ else
+ sb->sb_mb = nextrecord;
+ /*
+ * Now update any dependent socket buffer fields to reflect the new
+ * state. This is an expanded inline of SB_EMPTY_FIXUP(), with the
+ * addition of a second clause that takes care of the case where
+ * sb_mb has been updated, but remains the last record.
+ */
+ if (sb->sb_mb == NULL) {
+ sb->sb_mbtail = NULL;
+ sb->sb_lastrecord = NULL;
+ } else if (sb->sb_mb->m_nextpkt == NULL)
+ sb->sb_lastrecord = sb->sb_mb;
+}
+#endif
+
+#define IS_NONBLOCKING(so) ((so)->so_state & SS_NBIO)
+
static int
t3_soreceive(struct socket *so, int *flagsp, struct uio *uio)
{
@@ -517,23 +575,25 @@
struct toepcb *toep = tp->t_toe;
struct mbuf *m;
uint32_t offset;
- int err, flags, avail, len, buffers_freed = 0, copied = 0;
+ int err, flags, avail, len, copied, copied_unacked;
int target; /* Read at least this many bytes */
int user_ddp_ok, user_ddp_pending = 0;
struct ddp_state *p;
struct inpcb *inp = sotoinpcb(so);
-
+
+ avail = offset = copied = copied_unacked = 0;
flags = flagsp ? (*flagsp &~ MSG_EOR) : 0;
err = sblock(&so->so_rcv, SBLOCKWAIT(flags));
+
if (err)
return (err);
- TRACE_ENTER;
+
SOCKBUF_LOCK(&so->so_rcv);
restart:
len = uio->uio_resid;
m = so->so_rcv.sb_mb;
- target = (flags & MSG_WAITALL) ? min(len, so->so_rcv.sb_hiwat) : so->so_rcv.sb_lowat;
+ target = (flags & MSG_WAITALL) ? len : so->so_rcv.sb_lowat;
p = &toep->tp_ddp_state;
user_ddp_ok = p->ubuf_ddp_ready;
p->cancel_ubuf = 0;
@@ -561,6 +621,8 @@
so->so_error = 0;
goto done;
}
+ if (so->so_rcv.sb_state & SBS_CANTRCVMORE)
+ goto done;
>>> TRUNCATED FOR MAIL (1000 lines) <<<
More information about the p4-projects
mailing list