PERFORCE change 122495 for review
Kip Macy
kmacy at FreeBSD.org
Thu Jun 28 17:13:05 UTC 2007
http://perforce.freebsd.org/chv.cgi?CH=122495
Change 122495 by kmacy at kmacy_vt-x:opentoe_init on 2007/06/28 17:12:16
implement chelsio_usr_sosend down to t3_push_frames
Affected files ...
.. //depot/projects/opentoe/sys/dev/cxgb/notes.txt#1 add
.. //depot/projects/opentoe/sys/dev/cxgb/ulp/t3_tom/t3_cpl_io.c#6 edit
.. //depot/projects/opentoe/sys/dev/cxgb/ulp/t3_tom/t3_cpl_socket.c#4 edit
.. //depot/projects/opentoe/sys/dev/cxgb/ulp/t3_tom/t3_defs.h#4 edit
Differences ...
==== //depot/projects/opentoe/sys/dev/cxgb/ulp/t3_tom/t3_cpl_io.c#6 (text+ko) ====
@@ -66,9 +66,6 @@
#include <dev/cxgb/ulp/t3_tom/t3_ddp.h>
#include <dev/cxgb/ulp/toecore/toedev.h>
-
-
-
#define DEBUG_WR 0
extern struct protosw t3_tcp_proto;
@@ -471,9 +468,9 @@
* Returns true if an mbuf carries urgent data.
*/
static inline int
-skb_urgent(struct mbuf *skb)
+mbuf_urgent(struct mbuf *m)
{
- return (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_URG) != 0;
+ return (m->m_flags & TCPCB_FLAG_URG) != 0;
}
/*
@@ -491,8 +488,7 @@
struct tx_data_wr *req;
struct tcpcb *tp = sototcpcb(so);
- skb->h.raw = skb->data;
- req = (struct tx_data_wr *)__skb_push(skb, sizeof(*req));
+ req = mtod(m, struct tx_data_wr *);
req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA));
req->wr_lo = htonl(V_WR_TID(TID(so)));
req->sndseq = htonl(tp->snd_nxt);
@@ -501,24 +497,18 @@
req->param = htonl(V_TX_PORT(L2T_ENTRY(so)->smt_idx));
/* V_TX_ULP_SUBMODE sets both the mode and submode */
req->flags = htonl(V_TX_ULP_SUBMODE(skb_ulp_mode(m)) |
- V_TX_URG(skb_urgent(m)) |
- V_TX_SHOVE((!sock_flag(sk, TX_MORE_DATA)) &&
- (skb_peek(&sk->sk_write_queue) ? 0 : 1)));
+ V_TX_URG(mbuf_urgent(m)) |
+ V_TX_SHOVE((!GET_TOE_FLAG(so, TX_MORE_DATA)) &&
+ (so->so_snd.sb_mb != NULL));
- if (!sock_flag(sk, TX_DATA_SENT)) {
- req->flags |= htonl(F_TX_INIT | V_TX_CPU_IDX(qset(so)));
- /*
- * The send buffer size is in 32KB. In addition Linux doubles
- * what the user requested to account for header and mbuf
- * overhead. We care about pure payload here so divide by an
- * extra 2 to get the user's requested value.
- */
- req->param |= htonl((sk->sk_userlocks & SOCK_SNDBUF_LOCK) ?
- V_TX_SNDBUF(sk->sk_sndbuf >> 16) :
- V_TX_SNDBUF(M_TX_SNDBUF));
- sock_set_flag(sk, TX_DATA_SENT);
- }
-}
+ if (GET_TOE_FLAG(so, TX_DATA_SENT) == 0) {
+ req->flags |= htonl(F_TX_INIT | V_TX_CPU_IDX(qset(so)));
+ req->param |= htonl((so->so_snd.sb_flags & SB_AUTOSIZE) ?
+ V_TX_SNDBUF(tcp_autosndbuf_max) :
+ V_TX_SNDBUF(so->so_cred->cr_uidinfo->ui_sbsize));
+ SET_TOE_FLAG(so, TX_DATA_SENT);
+ }
+
/*
* Prepends TX_DATA_WR or CPL_CLOSE_CON_REQ headers to buffers waiting in a
@@ -535,25 +525,27 @@
struct toedev *cdev;
struct tom_data *d;
- if (__predict_false(sk_in_state(sk, TCPF_SYN_SENT | TCPF_CLOSE)))
+ if (__predict_false((tp->t_state == TCPS_SYN_SENT) ||
+ (tp->t_state == TCPS_CLOSE)))
return 0;
/*
* We shouldn't really be called at all after an abort but check just
* in case.
- */
+ * XXX not clear how to check this on FreeBSD
+ */
+#ifdef notyet
if (__predict_false(sock_flag(sk, ABORT_SHUTDOWN)))
return 0;
-
+#endif
d = TOM_DATA(TOE_DEV(so));
cdev = d->cdev;
- while (WR_AVAIL(so) && (skb = skb_peek(&sk->sk_write_queue)) != NULL &&
- !sock_flag(sk, TX_WAIT_IDLE) &&
- (!(TCP_SKB_CB(skb)->flags & TCPCB_FLAG_HOLD) ||
- skb_queue_len(&sk->sk_write_queue) > 1)) {
+ while (WR_AVAIL(so) && (m = so->so_snd.sb_mb) != NULL &&
+ !(TOE_FLAGS(so) & TX_WAIT_IDLE) &&
+ (so->so_snd.sb_mb != so->so_snd.sb_lastrecord)) {
- int len = skb->len; /* length before skb_push */
+ int len = m->m_pkthdr.len; /* length before skb_push */
int frags = skb_shinfo(skb)->nr_frags + (len != skb->data_len);
int wrs_needed = skb_wrs[frags];
@@ -561,39 +553,40 @@
if (WR_AVAIL(so) < wrs_needed)
break;
- __skb_unlink(skb, &sk->sk_write_queue);
+ so->so_snd.sb_mb = m->m_nextpkt;
+
m->m_priority = mkprio(CPL_PRIORITY_DATA, so);
m->csum_data = wrs_needed; /* remember this until the WR_ACK */
WR_AVAIL(so) -= wrs_needed;
WR_UNACKED(so) += wrs_needed;
enqueue_wr(tp, m);
- if (__predict_true(TCP_SKB_CB(skb)->flags & TCPCB_FLAG_NEED_HDR)) {
+ if (__predict_true(m->m_flags & TCPCB_FLAG_NEED_HDR)) {
len += ulp_extra_len(m);
make_tx_data_wr(so, m, len);
tp->snd_nxt += len;
- tp->lsndtime = tcp_time_stamp;
+ tp->ts_recent_age = ticks;
#if defined(CONFIG_T3_ZCOPY_SENDMSG) || defined(CONFIG_T3_ZCOPY_SENDMSG_MODULE)
atomic_add(skb->len - sizeof (struct tx_data_wr),
&d->tx_dma_pending);
m->m_pkthdr.priv = so;
#endif
if ((req_completion && WR_UNACKED(so) == wrs_needed) ||
- (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_COMPL) ||
+ (m->m_flags & TCPCB_FLAG_COMPL) ||
WR_UNACKED(so) >= WR_MAX(so) / 2) {
struct work_request_hdr *wr = cplhdr(m);
wr->wr_hi |= htonl(F_WR_COMPL);
WR_UNACKED(so) = 0;
}
- TCP_SKB_CB(skb)->flags &= ~TCPCB_FLAG_NEED_HDR;
+ m->m_flags &= ~TCPCB_FLAG_NEED_HDR;
} else if (skb->data[0] == FW_WROPCODE_OFLD_CLOSE_CON)
sock_set_flag(sk, CLOSE_CON_REQUESTED);
total_size += skb->truesize;
- if (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_BARRIER)
- sock_set_flag(sk, TX_WAIT_IDLE);
- set_arp_failure_handler(skb, arp_failure_discard);
+ if (m->m_flags & TCPCB_FLAG_BARRIER)
+ SET_TOE_FLAG(so, TX_WAIT_IDLE);
+ set_arp_failure_handler(m, arp_failure_discard);
l2t_send(cdev, m, L2T_ENTRY(so));
}
@@ -1686,15 +1679,15 @@
/*
* XXX ?
*/
- sototcpcb(so)->ts_recent = tcp_time_stamp;
+ sototcpcb(so)->ts_recent_age = ticks;
+
skb->h.th = tcphdr_skb->h.th;
#ifdef T3_TRACE
T3_TRACE3(TB(q),
"tcb_rpl_as_ddp_complete: seq 0x%x hwbuf %u lskb->len %u",
TCP_SKB_CB(skb)->seq, q->cur_buf, m->m_len);
-#endif
-
+#endif
sbappend(&so->so_rcv, m);
if (!sock_flag(so, SOCK_DEAD))
@@ -1753,7 +1746,7 @@
if (!(bsp->flags & DDP_BF_NOFLIP))
q->cur_buf ^= 1;
- sototcpcb(so)->ts_recent = tcp_time_stamp;
+ sototcpcb(so)->ts_recent = ticks;
sbappend(so->so_rcv, m);
/* For now, don't re-enable DDP after a connection fell out of DDP
@@ -1815,7 +1808,7 @@
"new_rx_data: seq 0x%x len %u",
TCP_SKB_CB(skb)->seq, m->m_len);
#endif
- sototcpcb(so)->ts_recent = tcp_time_stamp;
+ sototcpcb(so)->ts_recent = ticks;
sbappend(so->so_rcv, m);
if (!sock_flag(sk, SOCK_DEAD))
sk->sk_data_ready(sk, 0);
@@ -1919,7 +1912,7 @@
if (ddp_report & F_DDP_PSH)
TCP_SKB_CB(skb)->flags |= DDP_BF_PSH;
- sototcpcb(so)->ts_recent = tcp_time_stamp;
+ sototcpcb(so)->ts_recent = ticks;
sbappend(&so->so_rcv, m);
if (!sock_flag(sk, SOCK_DEAD))
sk->sk_data_ready(sk, 0);
@@ -2004,7 +1997,7 @@
TCP_SKB_CB(skb)->seq = tp->rcv_nxt;
tp->rcv_nxt += skb->len;
- sototcpcb(so)->ts_recent = tcp_time_stamp;
+ sototcpcb(so)->ts_recent = ticks;
sbappend(so->so_rcv, m);
if (!sock_flag(sk, SOCK_DEAD))
@@ -2094,7 +2087,7 @@
bsp->cur_offset += skb->len;
if (!(bsp->flags & DDP_BF_NOFLIP))
q->cur_buf ^= 1;
- sototcpcb(so)->ts_recent = tcp_time_stamp;
+ sototcpcb(so)->ts_recent = ticks;
sbappend(&so->so_rcv, m);
if (!sock_flag(so, SOCK_DEAD))
sk->sk_data_ready(so, 0);
@@ -3135,7 +3128,7 @@
unsigned int tid = TID(so);
skb_queue_walk(&sk->sk_write_queue, skb) {
- if (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_NEED_HDR) {
+ if (m->m_flags & TCPCB_FLAG_NEED_HDR) {
TCP_SKB_CB(skb)->seq = tp->write_seq;
tp->write_seq += skb->len + ulp_extra_len(m);
} else {
@@ -3162,7 +3155,7 @@
log(LOG_ERR, "TID %u expected SYN_SENT, found %d\n",
TID(so), tp->t_state);
- tp->rcv_tstamp = tcp_time_stamp;
+ tp->ts_recent_age = ticks;
DELACK_SEQ(tp) = tp->copied_seq = tp->rcv_wup = tp->rcv_nxt = rcv_isn;
make_established(sk, ntohl(req->snd_isn), ntohs(req->tcp_opt));
@@ -3285,7 +3278,7 @@
if (tp->snd_una != snd_una) {
tp->snd_una = snd_una;
dst_confirm(so->sk_dst_cache);
- tp->rcv_tstamp = tcp_time_stamp;
+ tp->ts_recent_age = ticks;
if (tp->snd_una == tp->snd_nxt)
sock_reset_flag(so, TX_WAIT_IDLE);
}
==== //depot/projects/opentoe/sys/dev/cxgb/ulp/t3_tom/t3_cpl_socket.c#4 (text+ko) ====
@@ -90,6 +90,62 @@
#define TCPDEBUG2(req)
#endif
+
+/*
+ * Returns true if a connection should send more data to the TOE ASAP.
+ */
+static inline int
+should_push(const struct socket *so)
+{
+ struct tcpcb *tp = sototcpcb(so);
+ struct toedev *dev = TOE_DEV(so);
+
+ return !(WR_MAX(tp) - WR_AVAIL(tp)) ||
+ tp->snd_nxt - tp->snd_una <= TOM_TUNABLE(dev, tx_hold_thres) ||
+ (tp->t_flags & TF_NODELAY);
+}
+
+/*
+ * Decide if the last frame on the send queue needs any special annotations
+ * (e.g., marked URG) and whether it should be transmitted immediately or
+ * held for additional data. This is the only routine that performs the full
+ * suite of tests for a Tx packet and therefore must be called for the last
+ * packet added by the various send*() APIs.
+ */
+static void
+tcp_push(struct socket *so, int flags)
+{
+
+ if ((so->so_snd.sb_mb != so->so_snd.sb_lastrecord) &&
+ should_push(so)) {
+#ifdef notyet
+ /*
+ * XXX first cut doesn't support OOB data
+ */
+ struct tcpcb *tp = sototcpcb(so);
+ struct sk_buff *skb = sk->sk_write_queue.prev;
+
+ mark_urg(tp, flags, skb);
+#endif
+ t3_push_frames(so, 1);
+ }
+}
+
+/*
+ * Try to transmit the send queue if it has just one packet. This is intended
+ * to be called as full packets are added to the send queue by the various
+ * send*() APIs when we expect additional packets to be generated by the
+ * current API call. It should not be called for the last packet generated,
+ * use the full tcp_push call above for that.
+ */
+static inline void
+push_frames_if_head(struct sock *so)
+{
+ if (so->so_snd.sb_mb == so->so_snd.sb_lastrecord)
+ t3_push_frames(so, 1);
+}
+
+
static int
chelsio_ip_ctloutput(struct socket *so, struct sockopt *sopt)
{
@@ -105,7 +161,7 @@
sizeof optval);
if (inp->inp_ip_tos != optval) {
inp->inp_ip_tos = optval;
-#ifdef notyet
+#ifdef notyet
sk->sk_priority = rt_tos2priority(optval);
#endif
t3_set_tos(so);
@@ -147,7 +203,8 @@
}
tp = intotcpcb(inp);
- if (sopt->sopt_name == TCP_NODELAY) {
+ switch (sopt->sopt_name) {
+ case TCP_NODELAY: {
switch (sopt->sopt_dir) {
case SOPT_SET:
int oldflags = tp->t_flags;
@@ -162,7 +219,8 @@
else
tp->t_flags &= ~TF_NODELAY;
- if ((oldflags & TF_NODELAY) == 0)
+ if ((oldflags & TF_NODELAY) !=
+ (tp->t_flags & TF_NODELAY))
t3_set_nagle(so);
break;
@@ -175,8 +233,42 @@
break;
}
- } else
+ break;
+ }
+ case TCP_NOPUSH: {
+ switch (sopt->sopt_dir) {
+ case SOPT_SET:
+ int oldflags = tp->t_flags;
+
+ error = sooptcopyin(sopt, &optval, sizeof optval,
+ sizeof optval);
+ if (error)
+ break;
+
+ if (optval > 0)
+ tp->t_flags |= TF_NOPUSH;
+ else
+ tp->t_flags &= ~TF_NODELAY;
+
+ if ((oldflags & TF_NOPUSH) &&
+ ((tp->t_flags & TF_NOPUSH) == 0))
+ tcp_push(so, 0);
+
+ break;
+ case SOPT_GET:
+ optval = tp->t_flags & TF_NOPUSH;
+ error = sooptcopyout(sopt, &optval, sizeof optval);
+ break;
+ default:
+ error = ENOPROTOOPT;
+ break;
+
+ }
+ break;
+ }
+ default:
error = tcp_ctloutput(so, sopt);
+ }
out:
INP_UNLOCK(inp);
return (error);
@@ -401,14 +493,14 @@
chelsio_usr_sosend(struct socket *so, struct sockaddr **psa, struct uio *uio,
struct mbuf *top, struct mbuf *control, int flags, struct thread *td)
{
- long space, resid;
+ long space, resid, resid_init;
int clen = 0, error, dontroute;
int atomic = sosendallatonce(so) || top;
if (uio != NULL)
- resid = uio->uio_resid;
+ resid_init = resid = uio->uio_resid;
else
- resid = top->m_pkthdr.len;
+ resid_init = resid = top->m_pkthdr.len;
/*
* In theory resid should be unsigned. However, space must be
* signed, as it might be less than 0 if we over-committed, and we
@@ -521,8 +613,40 @@
#endif
resid = uio->uio_resid;
}
-
-
+ if (dontroute) {
+ SOCK_LOCK(so);
+ so->so_options |= SO_DONTROUTE;
+ SOCK_UNLOCK(so);
+ }
+ /*
+ * XXX ignore OOB date for now
+ */
+ if (resid > 0 && space > 0)
+ push_frames_if_head(so);
+
+ if (dontroute) {
+ SOCK_LOCK(so);
+ so->so_options &= ~SO_DONTROUTE;
+ SOCK_UNLOCK(so);
+ }
+ clen = 0;
+ control = NULL;
+ top = NULL;
+ if (error)
+ goto release;
+ } while (resid && space > 0);
+ } while (resid);
+
+ if (resid_init != resid)
+ tcp_push(so, flags);
+release:
+ sbunlock(&so->so_snd);
+out:
+ if (top != NULL)
+ m_freem(top);
+ if (control != NULL)
+ m_freem(control);
+ return (error);
}
/*
==== //depot/projects/opentoe/sys/dev/cxgb/ulp/t3_tom/t3_defs.h#4 (text+ko) ====
@@ -22,6 +22,24 @@
struct toedev;
struct tom_data;
+struct t3_toe_private {
+ unsigned int t3_tid;
+ struct toedev t3_toedev;
+ struct l2t_entry t3_l2t_entry;
+ unsigned int t3_wr_avail;
+ unsigned int t3_wr_unacked;
+ unsigned int t3_wr_max;
+ struct ddp_state t3_ddp_state;
+ struct mbuf *t3_ctrl_mbuf_cache;
+ struct mbuf *t3_mbuf_ulp_lhdr;
+ uint8_t t3_mtu_idx;
+ uint8_t t3_ulp_mode;
+ uint8_t t3_hw_qset_idx;
+ uint8_t t3_rss_qset_idx;
+ uint32_t t3_flags;
+};
+
+
/*
* These flags track some close related events. They share the same space as
* the sock_flags in include/net/sock.h, make sure there are no collisions.
@@ -39,6 +57,8 @@
TX_FAILOVER // Tx traffic failing over
};
+#define
+
/*
* Flags for tcp_skb_cb.flags. Make sure there are no collisions with the
* ones already defined, we use only TCPCB_FLAG_URG currently.
@@ -53,8 +73,11 @@
TCPCB_FLAG_ZCOPY = 1 << 6,
TCPCB_FLAG_ZCOPY_COW = 1 << 7,
#endif
+};
+
+#define TCPCB_FLAG_NEED_HDR M_PROTO1
+#define TCPCB_FLAG_BARRIER M_PROTO2
-};
#define mtoso(m) ((struct socket *)m->m_pkthdr.priv)
/* The ULP mode/submode of an mbuf */
@@ -94,6 +117,9 @@
/* the TOE device */
#define TOE_DEV(so) (T3_TOE(so)->t3_toedev)
+/* socket TOE flags */
+#define TOE_FLAGS(so) (T3_TOE(so)->t3_flags)
+
/* pointer to the L2T entry. */
#define L2T_ENTRY(so) (T3_TOE(so)->t3_l2t_entry)
@@ -133,6 +159,8 @@
/* TOE RSS queue set */
#define qset(so) (T3_TOE(so)->t3_qset)
+#define GET_TOE_FLAG(so, flag) (TOE_FLAGS((so)) & (flag))
+#define SET_TOE_FLAG(so, flag) (TOE_FLAGS((so)) |= (flag))
typedef void (*defer_handler_t)(struct toedev *dev, struct mbuf *m);
@@ -200,19 +228,6 @@
#endif
}
-/*
- * Set the ULP mode and submode for a Tx packet.
- */
-static inline void skb_set_ulp_mode(struct mbuf *m, int mode, int submode)
-{
-#ifdef notyet
- /*
- * XXX need to flesh out t3_priv structure
- */
- skb_ulp_mode(m) = (mode << 4) | submode;
-#endif
-}
-
extern const unsigned int t3_ulp_extra_len[];
/*
* Return the length of any HW additions that will be made to a Tx packet.
@@ -220,11 +235,7 @@
*/
static inline unsigned int ulp_extra_len(const struct mbuf *m)
{
-#ifdef notyet
- return t3_ulp_extra_len[skb_ulp_mode(m) & 3];
-#else
- return (0);
-#endif
+ return t3_ulp_extra_len[ULP_MODE(mtoso(m)) & 3];
}
#include <dev/cxgb/cxgb_osdep.h>
More information about the p4-projects
mailing list