ALTQ with IPFW
Brian Fundakowski Feldman
green at FreeBSD.org
Thu Sep 30 20:13:10 PDT 2004
Okay, here's a newer revision that turns ALTQ into an action modifier
similar to O_LOG. To use the previous behavior, you would specifiy
"ipfw add count altq <qname> ..."
--
Brian Fundakowski Feldman \'[ FreeBSD ]''''''''''\
<> green at FreeBSD.org \ The Power to Serve! \
Opinions expressed are my own. \,,,,,,,,,,,,,,,,,,,,,,\
-------------- next part --------------
Index: sys/netinet/ip_divert.c
===================================================================
RCS file: /usr/ncvs/src/sys/netinet/ip_divert.c,v
retrieving revision 1.99
diff -u -r1.99 ip_divert.c
--- sys/netinet/ip_divert.c 5 Sep 2004 02:34:12 -0000 1.99
+++ sys/netinet/ip_divert.c 29 Sep 2004 14:44:19 -0000
@@ -66,6 +66,7 @@
#include <netinet/ip.h>
#include <netinet/ip_divert.h>
#include <netinet/ip_var.h>
+#include <netinet/ip_fw.h>
/*
* Divert sockets
@@ -268,6 +269,8 @@
div_output(struct socket *so, struct mbuf *m,
struct sockaddr_in *sin, struct mbuf *control)
{
+ struct m_tag *mtag;
+ struct divert_tag *dt;
int error = 0;
KASSERT(m->m_pkthdr.rcvif == NULL, ("rcvif not null"));
@@ -275,23 +278,22 @@
if (control)
m_freem(control); /* XXX */
+ mtag = m_tag_get(PACKET_TAG_DIVERT,
+ sizeof(struct divert_tag), M_NOWAIT);
+ if (mtag == NULL) {
+ error = ENOBUFS;
+ goto cantsend;
+ }
+ dt = (struct divert_tag *)(mtag+1);
+ dt->info = 0;
+ dt->cookie = 0;
+ m_tag_prepend(m, mtag);
+
/* Loopback avoidance and state recovery */
if (sin) {
- struct m_tag *mtag;
- struct divert_tag *dt;
int i;
- mtag = m_tag_get(PACKET_TAG_DIVERT,
- sizeof(struct divert_tag), M_NOWAIT);
- if (mtag == NULL) {
- error = ENOBUFS;
- goto cantsend;
- }
- dt = (struct divert_tag *)(mtag+1);
- dt->info = 0;
dt->cookie = sin->sin_port;
- m_tag_prepend(m, mtag);
-
/*
* Find receive interface with the given name, stuffed
* (if it exists) in the sin_zero[] field.
@@ -309,6 +311,7 @@
struct ip *const ip = mtod(m, struct ip *);
struct inpcb *inp;
+ dt->info |= IP_FW_DIVERT_OUTPUT_FLAG;
INP_INFO_WLOCK(&divcbinfo);
inp = sotoinpcb(so);
INP_LOCK(inp);
@@ -341,6 +344,7 @@
INP_UNLOCK(inp);
INP_INFO_WUNLOCK(&divcbinfo);
} else {
+ dt->info |= IP_FW_DIVERT_LOOPBACK_FLAG;
if (m->m_pkthdr.rcvif == NULL) {
/*
* No luck with the name, check by IP address.
Index: sys/netinet/ip_fw.h
===================================================================
RCS file: /usr/ncvs/src/sys/netinet/ip_fw.h,v
retrieving revision 1.91
diff -u -r1.91 ip_fw.h
--- sys/netinet/ip_fw.h 29 Sep 2004 04:54:33 -0000 1.91
+++ sys/netinet/ip_fw.h 30 Sep 2004 05:41:01 -0000
@@ -134,6 +134,9 @@
O_IP_DST_LOOKUP, /* arg1=table number, u32=value */
O_ANTISPOOF, /* none */
O_JAIL, /* u32 = id */
+ O_ALTQ, /* u32 = altq classif. qid */
+ O_DIVERTED, /* arg1=bitmap (1:loop, 2:out) */
+ O_TCPDATALEN, /* arg1 = len */
O_LAST_OPCODE /* not an opcode! */
};
@@ -251,6 +254,14 @@
} ipfw_insn_pipe;
/*
+ * This is used for storing an altq queue id number.
+ */
+typedef struct _ipfw_insn_altq {
+ ipfw_insn o;
+ u_int32_t qid;
+} ipfw_insn_altq;
+
+/*
* This is used for limit rules.
*/
typedef struct _ipfw_insn_limit {
@@ -293,6 +304,7 @@
* first instruction (at r->cmd) MUST BE an O_PROBE_STATE
* + if a rule has a "log" option, then the first action
* (at ACTION_PTR(r)) MUST be O_LOG
+ * + if a rule has an "altq" option, it comes after "log"
*
* NOTE: we use a simple linked list of rules because we never need
* to delete a rule without scanning the list. We do not use
@@ -405,9 +417,11 @@
*/
#ifdef _KERNEL
-#define IP_FW_PORT_DYNT_FLAG 0x10000
-#define IP_FW_PORT_TEE_FLAG 0x20000
-#define IP_FW_PORT_DENY_FLAG 0x40000
+#define IP_FW_PORT_DYNT_FLAG 0x00010000
+#define IP_FW_PORT_TEE_FLAG 0x00020000
+#define IP_FW_PORT_DENY_FLAG 0x00040000
+#define IP_FW_DIVERT_LOOPBACK_FLAG 0x00080000
+#define IP_FW_DIVERT_OUTPUT_FLAG 0x00100000
/*
* Arguments for calling ipfw_chk() and dummynet_io(). We put them
Index: sys/netinet/ip_fw2.c
===================================================================
RCS file: /usr/ncvs/src/sys/netinet/ip_fw2.c,v
retrieving revision 1.77
diff -u -r1.77 ip_fw2.c
--- sys/netinet/ip_fw2.c 29 Sep 2004 04:54:33 -0000 1.77
+++ sys/netinet/ip_fw2.c 30 Sep 2004 08:15:18 -0000
@@ -77,6 +77,7 @@
#include <netinet/tcpip.h>
#include <netinet/udp.h>
#include <netinet/udp_var.h>
+#include <altq/if_altq.h>
#ifdef IPSEC
#include <netinet6/ipsec.h>
@@ -553,6 +554,13 @@
if (l->log_left == 0)
limit_reached = l->max_log;
cmd += F_LEN(cmd); /* point to first action */
+ if (cmd->opcode == O_ALTQ) {
+ ipfw_insn_altq *altq = (ipfw_insn_altq *)cmd;
+
+ snprintf(SNPARGS(action2, 0), "Altq %d",
+ altq->qid);
+ cmd += F_LEN(cmd);
+ }
if (cmd->opcode == O_PROB)
cmd += F_LEN(cmd);
@@ -1324,6 +1332,8 @@
cmd = ACTION_PTR(me);
if (cmd->opcode == O_LOG)
cmd += F_LEN(cmd);
+ if (cmd->opcode == O_ALTQ)
+ cmd += F_LEN(cmd);
if ( cmd->opcode == O_SKIPTO )
for (rule = me->next; rule ; rule = rule->next)
if (rule->rulenum >= cmd->arg1)
@@ -1708,6 +1718,14 @@
int ugid_lookup = 0;
/*
+ * divinput_flags If non-zero, set to the IP_FW_DIVERT_*_FLAG
+ * associated with a packet input on a divert socket. This
+ * will allow to distinguish traffic and its direction when
+ * it originates from a divert socket.
+ */
+ u_int divinput_flags = 0;
+
+ /*
* oif | args->oif If NULL, ipfw_chk has been called on the
* inbound path (ether_input, bdg_forward, ip_input).
* If non-NULL, ipfw_chk has been called on the outbound path
@@ -1883,8 +1901,11 @@
}
}
/* reset divert rule to avoid confusion later */
- if (mtag)
+ if (mtag) {
+ divinput_flags = divert_info(mtag) &
+ (IP_FW_DIVERT_OUTPUT_FLAG | IP_FW_DIVERT_LOOPBACK_FLAG);
m_tag_delete(m, mtag);
+ }
/*
* Now scan the rules, and parse microinstructions for each rule.
@@ -2017,6 +2038,13 @@
match = (args->eh != NULL);
break;
+ case O_DIVERTED:
+ match = (cmd->arg1 & 1 && divinput_flags &
+ IP_FW_DIVERT_LOOPBACK_FLAG) ||
+ (cmd->arg1 & 2 && divinput_flags &
+ IP_FW_DIVERT_OUTPUT_FLAG);
+ break;
+
case O_PROTO:
/*
* We do not allow an arg of 0 so the
@@ -2175,6 +2203,28 @@
flags_match(cmd, ip->ip_tos));
break;
+ case O_TCPDATALEN:
+ if (proto == IPPROTO_TCP && offset == 0) {
+ struct tcphdr *tcp;
+ uint16_t x;
+ uint16_t *p;
+ int i;
+
+ tcp = L3HDR(struct tcphdr,ip);
+ x = ip_len -
+ ((ip->ip_hl + tcp->th_off) << 2);
+ if (cmdlen == 1) {
+ match = (cmd->arg1 == x);
+ break;
+ }
+ /* otherwise we have ranges */
+ p = ((ipfw_insn_u16 *)cmd)->ports;
+ i = cmdlen - 1;
+ for (; !match && i>0; i--, p += 2)
+ match = (x >= p[0] && x <= p[1]);
+ }
+ break;
+
case O_TCPFLAGS:
match = (proto == IPPROTO_TCP && offset == 0 &&
flags_match(cmd,
@@ -2212,6 +2262,32 @@
(TH_RST | TH_ACK | TH_SYN)) != TH_SYN);
break;
+ case O_ALTQ: {
+ struct altq_tag *at;
+ ipfw_insn_altq *altq = (ipfw_insn_altq *)cmd;
+
+ match = 1;
+ mtag = m_tag_get(PACKET_TAG_PF_QID,
+ sizeof(struct altq_tag),
+ M_NOWAIT);
+ if (mtag == NULL) {
+ /*
+ * Let the packet fall back to the
+ * default ALTQ.
+ */
+ break;
+ }
+ at = (struct altq_tag *)(mtag+1);
+ at->qid = altq->qid;
+ if (hlen != 0)
+ at->af = AF_INET;
+ else
+ at->af = AF_LINK;
+ at->hdr = ip;
+ m_tag_prepend(m, mtag);
+ break;
+ }
+
case O_LOG:
if (fw_verbose)
ipfw_log(f, hlen, args->eh, m, oif);
@@ -2275,6 +2351,9 @@
* or to the SKIPTO target ('goto again' after
* having set f, cmd and l), respectively.
*
+ * O_LOG and O_ALTQ action parameters:
+ * perform some action and set match = 1;
+ *
* O_LIMIT and O_KEEP_STATE: these opcodes are
* not real 'actions', and are stored right
* before the 'action' part of the rule.
@@ -2846,6 +2925,11 @@
printf("ipfw: size mismatch (have %d want %d)\n", size, l);
return (EINVAL);
}
+ if (rule->act_ofs >= rule->cmd_len) {
+ printf("ipfw: bogus action offset (%u > %u)\n",
+ rule->act_ofs, rule->cmd_len - 1);
+ return (EINVAL);
+ }
/*
* Now go for the individual checks. Very simple ones, basically only
* instruction sizes.
@@ -2868,6 +2952,7 @@
case O_LAYER2:
case O_IN:
case O_FRAG:
+ case O_DIVERTED:
case O_IPOPT:
case O_IPTOS:
case O_IPPRECEDENCE:
@@ -2951,6 +3036,7 @@
case O_IPID:
case O_IPTTL:
case O_IPLEN:
+ case O_TCPDATALEN:
if (cmdlen < 1 || cmdlen > 31)
goto bad_size;
break;
@@ -2969,6 +3055,11 @@
goto bad_size;
break;
+ case O_ALTQ:
+ if (cmdlen != F_INSN_SIZE(ipfw_insn_altq))
+ goto bad_size;
+ break;
+
case O_PIPE:
case O_QUEUE:
if (cmdlen != F_INSN_SIZE(ipfw_insn_pipe))
Index: sys/netinet/tcp.h
===================================================================
RCS file: /usr/ncvs/src/sys/netinet/tcp.h,v
retrieving revision 1.26
diff -u -r1.26 tcp.h
--- sys/netinet/tcp.h 16 Aug 2004 18:32:07 -0000 1.26
+++ sys/netinet/tcp.h 29 Sep 2004 05:09:59 -0000
@@ -161,12 +161,14 @@
/*
* User-settable options (used with setsockopt).
*/
-#define TCP_NODELAY 0x01 /* don't delay send to coalesce packets */
+#define TCP_NODELAY 0x01 /* don't delay send to coalesce packets */
#if __BSD_VISIBLE
-#define TCP_MAXSEG 0x02 /* set maximum segment size */
-#define TCP_NOPUSH 0x04 /* don't push last block of write */
-#define TCP_NOOPT 0x08 /* don't use TCP options */
-#define TCP_MD5SIG 0x10 /* use MD5 digests (RFC2385) */
+#define TCP_MAXSEG 0x02 /* set maximum segment size */
+#define TCP_NOPUSH 0x04 /* don't push last block of write */
+#define TCP_NOOPT 0x08 /* don't use TCP options */
+#define TCP_MD5SIG 0x10 /* use MD5 digests (RFC2385) */
+#define TCP_DELACKTIME 0x20 /* delayed ack time (timeval, 0 disables) */
+#define TCP_REXMITJITTER 0x40 /* retransmit slop time (timeval, 0 disables) */
#endif
#endif /* !_NETINET_TCP_H_ */
Index: sys/netinet/tcp_input.c
===================================================================
RCS file: /usr/ncvs/src/sys/netinet/tcp_input.c,v
retrieving revision 1.252
diff -u -r1.252 tcp_input.c
--- sys/netinet/tcp_input.c 17 Aug 2004 22:05:54 -0000 1.252
+++ sys/netinet/tcp_input.c 29 Sep 2004 05:33:17 -0000
@@ -195,7 +195,7 @@
#define DELAY_ACK(tp) \
((!callout_active(tp->tt_delack) && \
(tp->t_flags & TF_RXWIN0SENT) == 0) && \
- (tcp_delack_enabled || (tp->t_flags & TF_NEEDSYN)))
+ (tp->t_delacktime > 0 || (tp->t_flags & TF_NEEDSYN)))
/* Initialize TCP reassembly queue */
uma_zone_t tcp_reass_zone;
@@ -1416,8 +1416,8 @@
* ACKNOW will be turned on later.
*/
if (DELAY_ACK(tp) && tlen != 0)
- callout_reset(tp->tt_delack, tcp_delacktime,
- tcp_timer_delack, tp);
+ callout_reset(tp->tt_delack,
+ TCP_DELACKTICKS(tp), tcp_timer_delack, tp);
else
tp->t_flags |= TF_ACKNOW;
/*
@@ -2509,7 +2509,7 @@
INP_LOCK_ASSERT(inp);
if (tp->t_flags & TF_DELACK) {
tp->t_flags &= ~TF_DELACK;
- callout_reset(tp->tt_delack, tcp_delacktime,
+ callout_reset(tp->tt_delack, TCP_DELACKTICKS(tp),
tcp_timer_delack, tp);
}
INP_UNLOCK(inp);
@@ -2841,7 +2841,7 @@
* statistical, we have to test that we don't drop below
* the minimum feasible timer (which is 2 ticks).
*/
- TCPT_RANGESET(tp->t_rxtcur, TCP_REXMTVAL(tp),
+ TCPT_RANGESET(tp, tp->t_rxtcur, TCP_REXMTVAL(tp),
max(tp->t_rttmin, rtt + 2), TCPTV_REXMTMAX);
/*
@@ -3082,7 +3082,7 @@
tp->t_rttvar =
tp->t_srtt * TCP_RTTVAR_SCALE / TCP_RTT_SCALE;
}
- TCPT_RANGESET(tp->t_rxtcur,
+ TCPT_RANGESET(tp, tp->t_rxtcur,
((tp->t_srtt >> 2) + tp->t_rttvar) >> 1,
tp->t_rttmin, TCPTV_REXMTMAX);
}
Index: sys/netinet/tcp_output.c
===================================================================
RCS file: /usr/ncvs/src/sys/netinet/tcp_output.c,v
retrieving revision 1.101
diff -u -r1.101 tcp_output.c
--- sys/netinet/tcp_output.c 5 Sep 2004 02:34:12 -0000 1.101
+++ sys/netinet/tcp_output.c 29 Sep 2004 04:41:12 -0000
@@ -1169,7 +1169,7 @@
/*
* Start/restart persistance timer.
*/
- TCPT_RANGESET(tt, t * tcp_backoff[tp->t_rxtshift],
+ TCPT_RANGESET(tp, tt, t * tcp_backoff[tp->t_rxtshift],
TCPTV_PERSMIN, TCPTV_PERSMAX);
callout_reset(tp->tt_persist, tt, tcp_timer_persist, tp);
if (tp->t_rxtshift < TCP_MAXRXTSHIFT)
Index: sys/netinet/tcp_subr.c
===================================================================
RCS file: /usr/ncvs/src/sys/netinet/tcp_subr.c,v
retrieving revision 1.203
diff -u -r1.203 tcp_subr.c
--- sys/netinet/tcp_subr.c 5 Sep 2004 02:34:12 -0000 1.203
+++ sys/netinet/tcp_subr.c 29 Sep 2004 05:37:00 -0000
@@ -620,6 +620,8 @@
tp->t_flags = (TF_REQ_SCALE|TF_REQ_TSTMP);
if (tcp_do_rfc1644)
tp->t_flags |= TF_REQ_CC;
+ if (tcp_delack_enabled)
+ tp->t_delacktime = max(tcp_delacktime, 1);
tp->sack_enable = tcp_do_sack;
tp->t_inpcb = inp; /* XXX */
/*
@@ -631,6 +633,7 @@
tp->t_rttvar = ((TCPTV_RTOBASE - TCPTV_SRTTBASE) << TCP_RTTVAR_SHIFT) / 4;
tp->t_rttmin = tcp_rexmit_min;
tp->t_rxtcur = TCPTV_RTOBASE;
+ tp->t_rxtjitter = max(tcp_rexmit_slop, 0);
tp->snd_cwnd = TCP_MAXWIN << TCP_MAX_WINSHIFT;
tp->snd_bwnd = TCP_MAXWIN << TCP_MAX_WINSHIFT;
tp->snd_ssthresh = TCP_MAXWIN << TCP_MAX_WINSHIFT;
Index: sys/netinet/tcp_timer.c
===================================================================
RCS file: /usr/ncvs/src/sys/netinet/tcp_timer.c,v
retrieving revision 1.66
diff -u -r1.66 tcp_timer.c
--- sys/netinet/tcp_timer.c 16 Aug 2004 18:32:07 -0000 1.66
+++ sys/netinet/tcp_timer.c 29 Sep 2004 04:41:16 -0000
@@ -538,7 +538,7 @@
rexmt = TCP_REXMTVAL(tp) * tcp_syn_backoff[tp->t_rxtshift];
else
rexmt = TCP_REXMTVAL(tp) * tcp_backoff[tp->t_rxtshift];
- TCPT_RANGESET(tp->t_rxtcur, rexmt,
+ TCPT_RANGESET(tp, tp->t_rxtcur, rexmt,
tp->t_rttmin, TCPTV_REXMTMAX);
/*
* Disable rfc1323 and rfc1644 if we havn't got any response to
Index: sys/netinet/tcp_timer.h
===================================================================
RCS file: /usr/ncvs/src/sys/netinet/tcp_timer.h,v
retrieving revision 1.26
diff -u -r1.26 tcp_timer.h
--- sys/netinet/tcp_timer.h 16 Aug 2004 18:32:07 -0000 1.26
+++ sys/netinet/tcp_timer.h 29 Sep 2004 05:31:10 -0000
@@ -126,8 +126,8 @@
/*
* Force a time value to be in a certain range.
*/
-#define TCPT_RANGESET(tv, value, tvmin, tvmax) do { \
- (tv) = (value) + tcp_rexmit_slop; \
+#define TCPT_RANGESET(tp, tv, value, tvmin, tvmax) do { \
+ (tv) = (value) + TCP_REXMITJITTERTICKS(tp); \
if ((u_long)(tv) < (u_long)(tvmin)) \
(tv) = (tvmin); \
else if ((u_long)(tv) > (u_long)(tvmax)) \
Index: sys/netinet/tcp_usrreq.c
===================================================================
RCS file: /usr/ncvs/src/sys/netinet/tcp_usrreq.c,v
retrieving revision 1.107
diff -u -r1.107 tcp_usrreq.c
--- sys/netinet/tcp_usrreq.c 16 Aug 2004 18:32:07 -0000 1.107
+++ sys/netinet/tcp_usrreq.c 29 Sep 2004 05:37:25 -0000
@@ -997,9 +997,12 @@
struct socket *so;
struct sockopt *sopt;
{
+ struct timeval opttv;
int error, opt, optval;
struct inpcb *inp;
struct tcpcb *tp;
+ void *optout;
+ socklen_t optlen;
error = 0;
INP_INFO_RLOCK(&tcbinfo);
@@ -1090,6 +1093,32 @@
error = EINVAL;
break;
+ case TCP_DELACKTIME:
+ error = sooptcopyin(sopt, &opttv, sizeof opttv,
+ sizeof opttv);
+ if (error)
+ break;
+
+ if (opttv.tv_sec == 0 && opttv.tv_usec == 0)
+ tp->t_delacktime = 0;
+ else
+ tp->t_delacktime = tvtohz(&opttv);
+ error = 0;
+ break;
+
+ case TCP_REXMITJITTER:
+ error = sooptcopyin(sopt, &opttv, sizeof opttv,
+ sizeof opttv);
+ if (error)
+ break;
+
+ if (opttv.tv_sec == 0 && opttv.tv_usec == 0)
+ tp->t_rxtjitter = 0;
+ else
+ tp->t_rxtjitter = tvtohz(&opttv);
+ error = 0;
+ break;
+
default:
error = ENOPROTOOPT;
break;
@@ -1097,6 +1126,8 @@
break;
case SOPT_GET:
+ optout = &optval;
+ optlen = sizeof(optval);
switch (sopt->sopt_name) {
#ifdef TCP_SIGNATURE
case TCP_MD5SIG:
@@ -1115,12 +1146,36 @@
case TCP_NOPUSH:
optval = tp->t_flags & TF_NOPUSH;
break;
+ case TCP_DELACKTIME:
+ optout = &opttv;
+ optlen = sizeof(opttv);
+ if (tp->t_delacktime == 0) {
+ opttv.tv_sec = 0;
+ opttv.tv_usec = 0;
+ } else {
+ opttv.tv_sec = tp->t_delacktime / hz;
+ opttv.tv_usec = (tp->t_delacktime % hz) *
+ (1000000 / hz);
+ }
+ break;
+ case TCP_REXMITJITTER:
+ optout = &opttv;
+ optlen = sizeof(opttv);
+ if (tp->t_rxtjitter == 0) {
+ opttv.tv_sec = 0;
+ opttv.tv_usec = 0;
+ } else {
+ opttv.tv_sec = tp->t_rxtjitter / hz;
+ opttv.tv_usec = (tp->t_rxtjitter % hz) *
+ (1000000 / hz);
+ }
+ break;
default:
error = ENOPROTOOPT;
break;
}
if (error == 0)
- error = sooptcopyout(sopt, &optval, sizeof optval);
+ error = sooptcopyout(sopt, optout, optlen);
break;
}
INP_UNLOCK(inp);
Index: sys/netinet/tcp_var.h
===================================================================
RCS file: /usr/ncvs/src/sys/netinet/tcp_var.h,v
retrieving revision 1.109
diff -u -r1.109 tcp_var.h
--- sys/netinet/tcp_var.h 16 Aug 2004 18:32:07 -0000 1.109
+++ sys/netinet/tcp_var.h 29 Sep 2004 05:36:47 -0000
@@ -200,6 +200,8 @@
tcp_seq rcv_lastsack; /* last seq number(+1) sack'd by rcv'r*/
int rcv_numsacks; /* # distinct sack blks present */
struct sackblk sackblks[MAX_SACK_BLKS]; /* seq nos. of sack blocks */
+ int t_rxtjitter; /* retransmission slop ticks to use. */
+ int t_delacktime; /* delayed ack ticks to use. */
};
#define IN_FASTRECOVERY(tp) (tp->t_flags & TF_FASTRECOVERY)
@@ -368,6 +370,20 @@
+ (tp)->t_rttvar) >> TCP_DELTA_SHIFT)
/*
+ * Per-socket retransmit slop setting (0 for off, else value in ticks).
+ */
+#define TCP_REXMITJITTERTICKS(tp) \
+ (tp)->t_rxtjitter
+
+/*
+ * Per-socket delayed ack timer setting (0 for off, else value in ticks).
+ * If off, and using T/TCP, the value will fall be the system value as
+ * delayed ack will be a necessity.
+ */
+#define TCP_DELACKTICKS(tp) \
+ ((tp)->t_delacktime == 0 ? tcp_delacktime : (tp)->t_delacktime)
+
+/*
* TCP statistics.
* Many of these should be kept per connection,
* but that's inconvenient at the moment.
Index: sbin/ipfw/Makefile
===================================================================
RCS file: /usr/ncvs/src/sbin/ipfw/Makefile,v
retrieving revision 1.12
diff -u -r1.12 Makefile
--- sbin/ipfw/Makefile 11 Jul 2002 17:33:37 -0000 1.12
+++ sbin/ipfw/Makefile 29 Sep 2004 03:09:15 -0000
@@ -4,5 +4,6 @@
SRCS= ipfw2.c
WARNS?= 0
MAN= ipfw.8
+CFLAGS+= -I${.CURDIR}/../../sys/contrib/pf
.include <bsd.prog.mk>
Index: sbin/ipfw/ipfw2.c
===================================================================
RCS file: /usr/ncvs/src/sbin/ipfw/ipfw2.c,v
retrieving revision 1.59
diff -u -r1.59 ipfw2.c
--- sbin/ipfw/ipfw2.c 21 Sep 2004 22:12:43 -0000 1.59
+++ sbin/ipfw/ipfw2.c 1 Oct 2004 01:27:08 -0000
@@ -27,6 +27,7 @@
#include <sys/sysctl.h>
#include <sys/time.h>
#include <sys/wait.h>
+#include <sys/queue.h>
#include <ctype.h>
#include <err.h>
@@ -43,8 +44,11 @@
#include <timeconv.h> /* XXX do we need this ? */
#include <unistd.h>
#include <sysexits.h>
+#include <unistd.h>
+#include <fcntl.h>
#include <net/if.h>
+#include <net/pfvar.h>
#include <netinet/in.h>
#include <netinet/in_systm.h>
#include <netinet/ip.h>
@@ -202,6 +206,9 @@
TOK_UNREACH,
TOK_CHECKSTATE,
+ TOK_ALTQ,
+ TOK_LOG,
+
TOK_UID,
TOK_GID,
TOK_JAIL,
@@ -210,6 +217,9 @@
TOK_KEEPSTATE,
TOK_LAYER2,
TOK_OUT,
+ TOK_DIVERTED,
+ TOK_DIVERTEDLOOPBACK,
+ TOK_DIVERTEDOUTPUT,
TOK_XMIT,
TOK_RECV,
TOK_VIA,
@@ -223,6 +233,7 @@
TOK_IPVER,
TOK_ESTAB,
TOK_SETUP,
+ TOK_TCPDATALEN,
TOK_TCPFLAGS,
TOK_TCPOPTS,
TOK_TCPSEQ,
@@ -302,6 +313,12 @@
{ NULL, 0 } /* terminator */
};
+struct _s_x rule_action_params[] = {
+ { "altq", TOK_ALTQ },
+ { "log", TOK_LOG },
+ { NULL, 0 } /* terminator */
+};
+
struct _s_x rule_options[] = {
{ "uid", TOK_UID },
{ "gid", TOK_GID },
@@ -312,6 +329,9 @@
{ "bridged", TOK_LAYER2 },
{ "layer2", TOK_LAYER2 },
{ "out", TOK_OUT },
+ { "diverted", TOK_DIVERTED },
+ { "diverted-loopback", TOK_DIVERTEDLOOPBACK },
+ { "diverted-output", TOK_DIVERTEDOUTPUT },
{ "xmit", TOK_XMIT },
{ "recv", TOK_RECV },
{ "via", TOK_VIA },
@@ -329,6 +349,7 @@
{ "estab", TOK_ESTAB },
{ "established", TOK_ESTAB },
{ "setup", TOK_SETUP },
+ { "tcpdatalen", TOK_TCPDATALEN },
{ "tcpflags", TOK_TCPFLAGS },
{ "tcpflgs", TOK_TCPFLAGS },
{ "tcpoptions", TOK_TCPOPTS },
@@ -462,6 +483,7 @@
{"iplen", O_IPLEN},
{"ipttl", O_IPTTL},
{"mac-type", O_MAC_TYPE},
+ {"tcpdatalen", O_TCPDATALEN},
{NULL, 0}
};
@@ -563,6 +585,107 @@
}
/*
+ * Map between current altq queue id numbers and names.
+ */
+static int altq_fetched = 0;
+static TAILQ_HEAD(, pf_altq) altq_entries =
+ TAILQ_HEAD_INITIALIZER(altq_entries);
+
+static void
+altq_set_enabled(int enabled)
+{
+ int pffd;
+
+ pffd = open("/dev/pf", O_RDWR);
+ if (pffd == -1)
+ err(EX_UNAVAILABLE,
+ "altq support opening pf(4) control device");
+ if (enabled) {
+ if (ioctl(pffd, DIOCSTARTALTQ) != 0 && errno != EEXIST)
+ err(EX_UNAVAILABLE, "enabling altq");
+ } else {
+ if (ioctl(pffd, DIOCSTOPALTQ) != 0 && errno != ENOENT)
+ err(EX_UNAVAILABLE, "disabling altq");
+ }
+ close(pffd);
+}
+
+static void
+altq_fetch()
+{
+ struct pfioc_altq pfioc;
+ struct pf_altq *altq;
+ int pffd, mnr;
+
+ if (altq_fetched)
+ return;
+ altq_fetched = 1;
+ pffd = open("/dev/pf", O_RDONLY);
+ if (pffd == -1) {
+ warn("altq support opening pf(4) control device");
+ return;
+ }
+ bzero(&pfioc, sizeof(pfioc));
+ if (ioctl(pffd, DIOCGETALTQS, &pfioc) != 0) {
+ warn("altq support getting queue list");
+ close(pffd);
+ return;
+ }
+ mnr = pfioc.nr;
+ for (pfioc.nr = 0; pfioc.nr < mnr; pfioc.nr++) {
+ if (ioctl(pffd, DIOCGETALTQ, &pfioc) != 0) {
+ if (errno == EBUSY)
+ break;
+ warn("altq support getting queue list");
+ close(pffd);
+ return;
+ }
+ if (pfioc.altq.qid == 0)
+ continue;
+ altq = malloc(sizeof(*altq));
+ if (altq == NULL)
+ err(EX_OSERR, "malloc");
+ *altq = pfioc.altq;
+ TAILQ_INSERT_TAIL(&altq_entries, altq, entries);
+ }
+ close(pffd);
+}
+
+static u_int32_t
+altq_name_to_qid(const char *name)
+{
+ struct pf_altq *altq;
+
+ altq_fetch();
+ TAILQ_FOREACH(altq, &altq_entries, entries)
+ if (strcmp(name, altq->qname) == 0)
+ break;
+ if (altq == NULL)
+ errx(EX_DATAERR, "altq has no queue named `%s'", name);
+ return altq->qid;
+}
+
+static const char *
+altq_qid_to_name(u_int32_t qid)
+{
+ struct pf_altq *altq;
+
+ altq_fetch();
+ TAILQ_FOREACH(altq, &altq_entries, entries)
+ if (qid == altq->qid)
+ break;
+ if (altq == NULL)
+ return NULL;
+ return altq->qname;
+}
+
+static void
+fill_altq_qid(u_int32_t *qid, const char *av)
+{
+ *qid = altq_name_to_qid(av);
+}
+
+/*
* Fill the body of the command with the list of port ranges.
*/
static int
@@ -908,6 +1031,7 @@
int proto = 0; /* default */
int flags = 0; /* prerequisites */
ipfw_insn_log *logptr = NULL; /* set if we find an O_LOG */
+ ipfw_insn_altq *altqptr = NULL; /* set if we find an O_ALTQ */
int or_block = 0; /* we are in an or block */
uint32_t set_disable;
@@ -1033,6 +1157,10 @@
logptr = (ipfw_insn_log *)cmd;
break;
+ case O_ALTQ: /* O_ALTQ is printed after O_LOG */
+ altqptr = (ipfw_insn_altq *)cmd;
+ break;
+
default:
printf("** unrecognized action %d len %d",
cmd->opcode, cmd->len);
@@ -1044,6 +1172,15 @@
else
printf(" log");
}
+ if (altqptr) {
+ const char *qname;
+
+ qname = altq_qid_to_name(altqptr->qid);
+ if (qname == NULL)
+ printf(" altq ?<%u>", altqptr->qid);
+ else
+ printf(" altq %s", qname);
+ }
/*
* then print the body.
@@ -1174,6 +1311,23 @@
printf(cmd->len & F_NOT ? " out" : " in");
break;
+ case O_DIVERTED:
+ switch (cmd->arg1) {
+ case 3:
+ printf(" diverted");
+ break;
+ case 1:
+ printf(" diverted-loopback");
+ break;
+ case 2:
+ printf(" diverted-output");
+ break;
+ default:
+ printf(" diverted-?<%u>", cmd->arg1);
+ break;
+ }
+ break;
+
case O_LAYER2:
printf(" layer2");
break;
@@ -1244,6 +1398,14 @@
printf(" established");
break;
+ case O_TCPDATALEN:
+ if (F_LEN(cmd) == 1)
+ printf(" tcpdatalen %u", cmd->arg1 );
+ else
+ print_newports((ipfw_insn_u16 *)cmd, 0,
+ O_TCPDATALEN);
+ break;
+
case O_TCPFLAGS:
print_flags("tcpflags", cmd, f_tcpflags);
break;
@@ -1709,6 +1871,8 @@
} else if (strncmp(*av, "dyn_keepalive", strlen(*av)) == 0) {
sysctlbyname("net.inet.ip.fw.dyn_keepalive", NULL, 0,
&which, sizeof(which));
+ } else if (strncmp(*av, "altq", strlen(*av)) == 0) {
+ altq_set_enabled(which);
} else {
warnx("unrecognize enable/disable keyword: %s\n", *av);
}
@@ -1903,21 +2067,23 @@
"set [disable N... enable N...] | move [rule] X to Y | swap X Y | show\n"
"table N {add ip[/bits] [value] | delete ip[/bits] | flush | list}\n"
"\n"
-"RULE-BODY: check-state [LOG] | ACTION [LOG] ADDR [OPTION_LIST]\n"
+"RULE-BODY: check-state [PARAMS] | ACTION [PARAMS] ADDR [OPTION_LIST]\n"
"ACTION: check-state | allow | count | deny | reject | skipto N |\n"
" {divert|tee} PORT | forward ADDR | pipe N | queue N\n"
+"PARAMS: [log [logamount LOGLIMIT]] [altq QUEUE_NAME]\n"
"ADDR: [ MAC dst src ether_type ] \n"
" [ from IPADDR [ PORT ] to IPADDR [ PORTLIST ] ]\n"
"IPADDR: [not] { any | me | ip/bits{x,y,z} | table(t[,v]) | IPLIST }\n"
"IPLIST: { ip | ip/bits | ip:mask }[,IPLIST]\n"
"OPTION_LIST: OPTION [OPTION_LIST]\n"
-"OPTION: bridged | {dst-ip|src-ip} ADDR | {dst-port|src-port} LIST |\n"
+"OPTION: bridged | diverted | diverted-loopback | diverted-output |\n"
+" {dst-ip|src-ip} ADDR | {dst-port|src-port} LIST |\n"
" estab | frag | {gid|uid} N | icmptypes LIST | in | out | ipid LIST |\n"
" iplen LIST | ipoptions SPEC | ipprecedence | ipsec | iptos SPEC |\n"
" ipttl LIST | ipversion VER | keep-state | layer2 | limit ... |\n"
" mac ... | mac-type LIST | proto LIST | {recv|xmit|via} {IF|IPADDR} |\n"
" setup | {tcpack|tcpseq|tcpwin} NN | tcpflags SPEC | tcpoptions SPEC |\n"
-" verrevpath | versrcreach | antispoof\n"
+" tcpdatalen LIST | verrevpath | versrcreach | antispoof\n"
);
exit(0);
}
@@ -2756,11 +2922,11 @@
* Rules are added into the 'rulebuf' and then copied in the correct order
* into the actual rule.
*
- * The syntax for a rule starts with the action, followed by an
- * optional log action, and the various match patterns.
+ * The syntax for a rule starts with the action, followed by
+ * optional action parameters, and the various match patterns.
* In the assembled microcode, the first opcode must be an O_PROBE_STATE
* (generated if the rule includes a keep-state option), then the
- * various match patterns, the "log" action, and the actual action.
+ * various match patterns, log/altq actions, and the actual action.
*
*/
static void
@@ -2783,6 +2949,7 @@
* various flags used to record that we entered some fields.
*/
ipfw_insn *have_state = NULL; /* check-state or keep-state */
+ ipfw_insn *have_log = NULL, *have_altq = NULL;
size_t len;
int i;
@@ -2945,32 +3112,63 @@
action = next_cmd(action);
/*
+ * [altq queuename] -- altq tag, optional
* [log [logamount N]] -- log, optional
*
- * If exists, it goes first in the cmdbuf, but then it is
+ * If they exist, it go first in the cmdbuf, but then it is
* skipped in the copy section to the end of the buffer.
*/
- if (ac && !strncmp(*av, "log", strlen(*av))) {
- ipfw_insn_log *c = (ipfw_insn_log *)cmd;
- int l;
+ while (ac != 0 && (i = match_token(rule_action_params, *av)) != -1) {
+ ac--; av++;
+ switch (i) {
+ case TOK_LOG:
+ {
+ ipfw_insn_log *c = (ipfw_insn_log *)cmd;
+ int l;
- cmd->len = F_INSN_SIZE(ipfw_insn_log);
- cmd->opcode = O_LOG;
- av++; ac--;
- if (ac && !strncmp(*av, "logamount", strlen(*av))) {
- ac--; av++;
- NEED1("logamount requires argument");
- l = atoi(*av);
- if (l < 0)
- errx(EX_DATAERR, "logamount must be positive");
- c->max_log = l;
+ if (have_log)
+ errx(EX_DATAERR,
+ "log cannot be specified more than once");
+ have_log = (ipfw_insn *)c;
+ cmd->len = F_INSN_SIZE(ipfw_insn_log);
+ cmd->opcode = O_LOG;
+ if (ac && !strncmp(*av, "logamount", strlen(*av))) {
+ ac--; av++;
+ NEED1("logamount requires argument");
+ l = atoi(*av);
+ if (l < 0)
+ errx(EX_DATAERR,
+ "logamount must be positive");
+ c->max_log = l;
+ ac--; av++;
+ } else {
+ len = sizeof(c->max_log);
+ if (sysctlbyname("net.inet.ip.fw.verbose_limit",
+ &c->max_log, &len, NULL, 0) == -1)
+ errx(1, "sysctlbyname(\"%s\")",
+ "net.inet.ip.fw.verbose_limit");
+ }
+ }
+ break;
+
+ case TOK_ALTQ:
+ {
+ ipfw_insn_altq *a = (ipfw_insn_altq *)cmd;
+
+ NEED1("missing altq queue name");
+ if (have_altq)
+ errx(EX_DATAERR,
+ "altq cannot be specified more than once");
+ have_altq = (ipfw_insn *)a;
+ cmd->len = F_INSN_SIZE(ipfw_insn_altq);
+ cmd->opcode = O_ALTQ;
+ fill_altq_qid(&a->qid, *av);
ac--; av++;
- } else {
- len = sizeof(c->max_log);
- if (sysctlbyname("net.inet.ip.fw.verbose_limit",
- &c->max_log, &len, NULL, 0) == -1)
- errx(1, "sysctlbyname(\"%s\")",
- "net.inet.ip.fw.verbose_limit");
+ }
+ break;
+
+ default:
+ abort();
}
cmd = next_cmd(cmd);
}
@@ -3197,6 +3395,18 @@
fill_cmd(cmd, O_IN, 0, 0);
break;
+ case TOK_DIVERTED:
+ fill_cmd(cmd, O_DIVERTED, 0, 3);
+ break;
+
+ case TOK_DIVERTEDLOOPBACK:
+ fill_cmd(cmd, O_DIVERTED, 0, 1);
+ break;
+
+ case TOK_DIVERTEDOUTPUT:
+ fill_cmd(cmd, O_DIVERTED, 0, 2);
+ break;
+
case TOK_FRAG:
fill_cmd(cmd, O_FRAG, 0, 0);
break;
@@ -3344,6 +3554,17 @@
(TH_SYN) | ( (TH_ACK) & 0xff) <<8 );
break;
+ case TOK_TCPDATALEN:
+ NEED1("tcpdatalen requires length");
+ if (strpbrk(*av, "-,")) {
+ if (!add_ports(cmd, *av, 0, O_TCPDATALEN))
+ errx(EX_DATAERR, "invalid tcpdata len %s", *av);
+ } else
+ fill_cmd(cmd, O_TCPDATALEN, 0,
+ strtoul(*av, NULL, 0));
+ ac--; av++;
+ break;
+
case TOK_TCPOPTS:
NEED1("missing argument for tcpoptions");
fill_flags(cmd, O_TCPOPTS, f_tcpopts, *av);
@@ -3533,7 +3754,7 @@
dst = next_cmd(dst);
}
/*
- * copy all commands but O_LOG, O_KEEP_STATE, O_LIMIT
+ * copy all commands but O_LOG, O_KEEP_STATE, O_LIMIT, O_ALTQ
*/
for (src = (ipfw_insn *)cmdbuf; src != cmd; src += i) {
i = F_LEN(src);
@@ -3542,6 +3763,7 @@
case O_LOG:
case O_KEEP_STATE:
case O_LIMIT:
+ case O_ALTQ:
break;
default:
bcopy(src, dst, i * sizeof(uint32_t));
@@ -3563,12 +3785,16 @@
rule->act_ofs = dst - rule->cmd;
/*
- * put back O_LOG if necessary
+ * put back O_LOG, O_ALTQ if necessary
*/
- src = (ipfw_insn *)cmdbuf;
- if (src->opcode == O_LOG) {
- i = F_LEN(src);
- bcopy(src, dst, i * sizeof(uint32_t));
+ if (have_log) {
+ i = F_LEN(have_log);
+ bcopy(have_log, dst, i * sizeof(uint32_t));
+ dst += i;
+ }
+ if (have_altq) {
+ i = F_LEN(have_altq);
+ bcopy(have_altq, dst, i * sizeof(uint32_t));
dst += i;
}
/*
More information about the freebsd-ipfw
mailing list