PERFORCE change 134697 for review
Kip Macy
kmacy at FreeBSD.org
Sat Feb 2 23:18:55 PST 2008
http://perforce.freebsd.org/chv.cgi?CH=134697
Change 134697 by kmacy at kmacy:storage:toehead on 2008/02/03 07:18:30
- fix socket buffer accounting
- use sbappendstream
Affected files ...
.. //depot/projects/toehead/sys/dev/cxgb/ulp/tom/cxgb_cpl_io.c#14 edit
.. //depot/projects/toehead/sys/dev/cxgb/ulp/tom/cxgb_cpl_socket.c#15 edit
Differences ...
==== //depot/projects/toehead/sys/dev/cxgb/ulp/tom/cxgb_cpl_io.c#14 (text+ko) ====
@@ -586,12 +586,19 @@
int dack_mode, must_send, read;
u32 thres, credits, dack = 0;
+ so = tp->t_inpcb->inp_socket;
if (!((tp->t_state == TCPS_ESTABLISHED) || (tp->t_state == TCPS_FIN_WAIT_1) ||
- (tp->t_state == TCPS_FIN_WAIT_2)))
+ (tp->t_state == TCPS_FIN_WAIT_2))) {
+ if (copied) {
+ SOCKBUF_LOCK(&so->so_rcv);
+ toep->tp_copied_seq += copied;
+ SOCKBUF_UNLOCK(&so->so_rcv);
+ }
+
return;
- INP_LOCK_ASSERT(tp->t_inpcb);
+ }
- so = tp->t_inpcb->inp_socket;
+ INP_LOCK_ASSERT(tp->t_inpcb);
SOCKBUF_LOCK(&so->so_rcv);
if (copied)
toep->tp_copied_seq += copied;
@@ -1755,9 +1762,12 @@
"tcb_rpl_as_ddp_complete: seq 0x%x hwbuf %u lskb->len %u",
m->m_seq, q->cur_buf, m->m_pkthdr.len);
#endif
- sbappend(&so->so_rcv, m);
+ SOCKBUF_LOCK(&so->so_rcv);
+ sbappendstream_locked(&so->so_rcv, m);
if (__predict_true((so->so_state & SS_NOFDREF) == 0))
- sorwakeup(so);
+ sorwakeup_locked(so);
+ else
+ SOCKBUF_UNLOCK(&so->so_rcv);
}
/*
@@ -1895,11 +1905,12 @@
"new_rx_data: seq 0x%x len %u",
m->m_seq, m->m_pkthdr.len);
#endif
+ INP_UNLOCK(tp->t_inpcb);
SOCKBUF_LOCK(&so->so_rcv);
if (sb_notify(&so->so_rcv))
DPRINTF("rx_data so=%p flags=0x%x len=%d\n", so, so->so_rcv.sb_flags, m->m_pkthdr.len);
- sbappend_locked(&so->so_rcv, m);
+ sbappendstream_locked(&so->so_rcv, m);
#ifdef notyet
/*
@@ -1912,7 +1923,7 @@
so, so->so_rcv.sb_cc, so->so_rcv.sb_mbmax));
#endif
- INP_UNLOCK(tp->t_inpcb);
+
DPRINTF("sb_cc=%d sb_mbcnt=%d\n",
so->so_rcv.sb_cc, so->so_rcv.sb_mbcnt);
@@ -2134,11 +2145,12 @@
tp->rcv_nxt += m->m_len;
tp->t_rcvtime = ticks;
+ SOCKBUF_LOCK(&so->so_rcv);
sbappendstream_locked(&so->so_rcv, m);
if ((so->so_state & SS_NOFDREF) == 0)
sorwakeup_locked(so);
-
+ SOCKBUF_UNLOCK(&so->so_rcv);
TRACE_EXIT;
}
@@ -2230,7 +2242,7 @@
if (!(bsp->flags & DDP_BF_NOFLIP))
q->cur_buf ^= 1;
tp->t_rcvtime = ticks;
- sbappend(&so->so_rcv, m);
+ sbappendstream(&so->so_rcv, m);
if (__predict_true((so->so_state & SS_NOFDREF) == 0))
sorwakeup(so);
return (1);
==== //depot/projects/toehead/sys/dev/cxgb/ulp/tom/cxgb_cpl_socket.c#15 (text+ko) ====
@@ -41,12 +41,14 @@
#include <sys/condvar.h>
#include <sys/mutex.h>
#include <sys/proc.h>
+#include <sys/smp.h>
#include <sys/socket.h>
#include <sys/syslog.h>
#include <sys/socketvar.h>
#include <sys/uio.h>
#include <machine/bus.h>
+#include <machine/cpu.h>
#include <net/if.h>
#include <net/route.h>
@@ -525,7 +527,42 @@
return pru_sosend(so, addr, uio, top, control, flags, td);
}
+/*
+ * Following replacement or removal of the first mbuf on the first mbuf chain
+ * of a socket buffer, push necessary state changes back into the socket
+ * buffer so that other consumers see the values consistently. 'nextrecord'
+ * is the callers locally stored value of the original value of
+ * sb->sb_mb->m_nextpkt which must be restored when the lead mbuf changes.
+ * NOTE: 'nextrecord' may be NULL.
+ */
+#if 1
+static __inline void
+sockbuf_pushsync(struct sockbuf *sb, struct mbuf *nextrecord)
+{
+
+ SOCKBUF_LOCK_ASSERT(sb);
+ /*
+ * First, update for the new value of nextrecord. If necessary, make
+ * it the first record.
+ */
+ if (sb->sb_mb != NULL)
+ sb->sb_mb->m_nextpkt = nextrecord;
+ else
+ sb->sb_mb = nextrecord;
+ /*
+ * Now update any dependent socket buffer fields to reflect the new
+ * state. This is an expanded inline of SB_EMPTY_FIXUP(), with the
+ * addition of a second clause that takes care of the case where
+ * sb_mb has been updated, but remains the last record.
+ */
+ if (sb->sb_mb == NULL) {
+ sb->sb_mbtail = NULL;
+ sb->sb_lastrecord = NULL;
+ } else if (sb->sb_mb->m_nextpkt == NULL)
+ sb->sb_lastrecord = sb->sb_mb;
+}
+#endif
#define IS_NONBLOCKING(so) ((so)->so_state & SS_NBIO)
@@ -536,14 +573,13 @@
struct toepcb *toep = tp->t_toe;
struct mbuf *m;
uint32_t offset;
- int err, flags, avail, len, buffers_freed, copied, copied_unacked;
+ int err, flags, avail, len, copied, copied_unacked;
int target; /* Read at least this many bytes */
int user_ddp_ok, user_ddp_pending = 0;
struct ddp_state *p;
struct inpcb *inp = sotoinpcb(so);
-
- copied = copied_unacked = buffers_freed = 0;
+ avail = offset = copied = copied_unacked = 0;
flags = flagsp ? (*flagsp &~ MSG_EOR) : 0;
err = sblock(&so->so_rcv, SBLOCKWAIT(flags));
@@ -583,6 +619,8 @@
so->so_error = 0;
goto done;
}
+ if (so->so_rcv.sb_state & SBS_CANTRCVMORE)
+ goto done;
if (so->so_state & (SS_ISDISCONNECTING|SS_ISDISCONNECTED))
goto done;
if (tp->t_state == TCPS_CLOSED) {
@@ -625,15 +663,23 @@
} else if (copied >= target)
goto done;
else {
- SOCKBUF_UNLOCK(&so->so_rcv);
- INP_LOCK(inp);
- t3_cleanup_rbuf(tp, copied_unacked);
- INP_UNLOCK(inp);
- SOCKBUF_LOCK(&so->so_rcv);
- copied_unacked = 0;
+ int i = 0;
+ if (copied_unacked) {
+ SOCKBUF_UNLOCK(&so->so_rcv);
+ INP_LOCK(inp);
+ t3_cleanup_rbuf(tp, copied_unacked);
+ INP_UNLOCK(inp);
+ copied_unacked = 0;
+ if (mp_ncpus > 1)
+ while (i++ < 200 && so->so_rcv.sb_mb == NULL)
+ cpu_spinwait();
+ SOCKBUF_LOCK(&so->so_rcv);
+ }
+
if (so->so_rcv.sb_mb)
goto restart;
- printf("sbwaiting 2 copied=%d target=%d\n", copied, target);
+ printf("sbwaiting 2 copied=%d target=%d avail=%d so=%p mb=%p cc=%d\n", copied, target, avail, so,
+ so->so_rcv.sb_mb, so->so_rcv.sb_cc);
if ((err = sbwait(&so->so_rcv)) != 0)
goto done;
}
@@ -642,24 +688,27 @@
if (m->m_pkthdr.len == 0) {
if ((m->m_ddp_flags & DDP_BF_NOCOPY) == 0)
panic("empty mbuf and NOCOPY not set\n");
+ printf("dropping empty mbuf\n");
user_ddp_pending = 0;
- sbfree(&so->so_rcv, m);
- m = so->so_rcv.sb_mb = m_free(m);
+ sbdroprecord_locked(&so->so_rcv);
goto done;
}
+
offset = toep->tp_copied_seq + copied_unacked - m->m_seq;
DPRINTF("m=%p copied_seq=0x%x copied_unacked=%d m_seq=0x%x offset=%d pktlen=%d is_ddp(m)=%d\n",
m, toep->tp_copied_seq, copied_unacked, m->m_seq, offset, m->m_pkthdr.len, !!is_ddp(m));
+
if (offset >= m->m_pkthdr.len)
panic("t3_soreceive: OFFSET >= LEN offset %d copied_seq 0x%x seq 0x%x "
"pktlen %d ddp flags 0x%x", offset, toep->tp_copied_seq + copied_unacked, m->m_seq,
m->m_pkthdr.len, m->m_ddp_flags);
+
avail = m->m_pkthdr.len - offset;
if (len < avail) {
- if (is_ddp(m) && (m->m_ddp_flags & DDP_BF_NOCOPY))
+ if (is_ddp(m) && (m->m_ddp_flags & DDP_BF_NOCOPY))
panic("bad state in t3_soreceive\n");
avail = len;
- }
+ }
#ifdef URGENT_DATA_SUPPORTED
/*
* Check if the data we are preparing to copy contains urgent
@@ -720,16 +769,15 @@
goto done_unlocked;
}
SOCKBUF_LOCK(&so->so_rcv);
- if (!(resid > uio->uio_resid))
- printf("copied zero bytes :-/ resid=%d uio_resid=%d copied=%d copied_unacked=%d\n",
- resid, uio->uio_resid, copied, copied_unacked);
- }
-
- sbdrop_locked(&so->so_rcv, avail);
- buffers_freed++;
+ if (avail != (resid - uio->uio_resid))
+ printf("didn't copy all bytes :-/ avail=%d offset=%d pktlen=%d resid=%d uio_resid=%d copied=%d copied_unacked=%d is_ddp(m)=%d\n",
+ avail, offset, m->m_pkthdr.len, resid, uio->uio_resid, copied, copied_unacked, is_ddp(m));
+ }
+
copied += avail;
copied_unacked += avail;
len -= avail;
+
#ifdef URGENT_DATA_SUPPORTED
skip_copy:
if (tp->urg_data && after(tp->copied_seq + copied_unacked, tp->urg_seq))
@@ -741,7 +789,8 @@
*/
if (avail + offset >= m->m_pkthdr.len) {
unsigned int fl = m->m_ddp_flags;
- int exitnow, got_psh = 0, nomoredata = 0;
+ int exitnow, count, got_psh = 0, nomoredata = 0;
+ struct mbuf *nextrecord;
if (p->kbuf[0] != NULL && is_ddp(m) && (fl & 1)) {
if (is_ddp_psh(m) && user_ddp_pending)
@@ -757,25 +806,47 @@
p->ubuf_ddp_ready = 1;
}
}
+
+ nextrecord = m->m_nextpkt;
+ count = m->m_pkthdr.len;
+ while (count) {
+ count -= m->m_len;
+ sbfree(&so->so_rcv, m);
+ so->so_rcv.sb_mb = m_free(m);
+ m = so->so_rcv.sb_mb;
+ }
+ sockbuf_pushsync(&so->so_rcv, nextrecord);
+
exitnow = got_psh || nomoredata;
- if ((so->so_rcv.sb_mb == NULL) && exitnow)
+ if ((so->so_rcv.sb_mb == NULL) && exitnow) {
+ printf("exiting\n");
goto done;
- }
- if (len > 0)
+ }
+ if (copied_unacked > (so->so_rcv.sb_hiwat >> 2)) {
+ SOCKBUF_UNLOCK(&so->so_rcv);
+ INP_LOCK(inp);
+ t3_cleanup_rbuf(tp, copied_unacked);
+ INP_UNLOCK(inp);
+ copied_unacked = 0;
+ SOCKBUF_LOCK(&so->so_rcv);
+ }
+ }
+ if (len > 0)
goto restart;
+
+ done:
-done:
/*
* If we can still receive decide what to do in preparation for the
* next receive. Note that RCV_SHUTDOWN is set if the connection
* transitioned to CLOSE but not if it was in that state to begin with.
*/
if (__predict_true((so->so_state & (SS_ISDISCONNECTING|SS_ISDISCONNECTED)) == 0)) {
- SOCKBUF_UNLOCK(&so->so_rcv);
- SOCKBUF_LOCK(&so->so_rcv);
if (user_ddp_pending) {
+ SOCKBUF_UNLOCK(&so->so_rcv);
+ SOCKBUF_LOCK(&so->so_rcv);
user_ddp_ok = 0;
t3_cancel_ubuf(toep);
if (so->so_rcv.sb_mb) {
@@ -810,12 +881,11 @@
#endif
SOCKBUF_UNLOCK(&so->so_rcv);
done_unlocked:
- if (copied) {
+ if (copied_unacked) {
INP_LOCK(inp);
t3_cleanup_rbuf(tp, copied_unacked);
INP_UNLOCK(inp);
}
-
sbunlock(&so->so_rcv);
return (err);
@@ -844,27 +914,22 @@
* - iovcnt is 1
*
*/
+
if ((tp->t_flags & TF_TOE) && ((flags & (MSG_WAITALL|MSG_OOB|MSG_PEEK|MSG_DONTWAIT)) == 0)
-#ifdef notyet
- && ((so->so_state & SS_NBIO) == 0)
-#endif
&& (uio->uio_iovcnt == 1) &&
((so->so_rcv.sb_state & SBS_CANTRCVMORE) == 0) && (mp0 == NULL)) {
tdev = TOE_DEV(so);
zcopy_thres = TOM_TUNABLE(tdev, ddp_thres);
zcopy_enabled = TOM_TUNABLE(tdev, ddp);
-
if ((uio->uio_resid > zcopy_thres) &&
(uio->uio_iovcnt == 1)
-#if 0
- && ((so->so_state & SS_NBIO) == 0)
-#endif
&& zcopy_enabled) {
rv = t3_soreceive(so, flagsp, uio);
if (rv != EAGAIN)
return (rv);
- }
- }
+ }
+ } else if (tp->t_flags & TF_TOE)
+ printf("skipping t3_soreceive\n");
return pru_soreceive(so, psa, uio, mp0, controlp, flagsp);
}
More information about the p4-projects
mailing list