svn commit: r199619 - in head/sys: amd64/amd64 i386/i386
Jung-uk Kim
jkim at FreeBSD.org
Sat Nov 21 00:19:09 UTC 2009
Author: jkim
Date: Sat Nov 21 00:19:09 2009
New Revision: 199619
URL: http://svn.freebsd.org/changeset/base/199619
Log:
Add an experimental and rudimentary JIT optimizer to reduce unncessary
overhead from short BPF filter programs such as "get the first 96 bytes".
Modified:
head/sys/amd64/amd64/bpf_jit_machdep.c
head/sys/amd64/amd64/bpf_jit_machdep.h
head/sys/i386/i386/bpf_jit_machdep.c
head/sys/i386/i386/bpf_jit_machdep.h
Modified: head/sys/amd64/amd64/bpf_jit_machdep.c
==============================================================================
--- head/sys/amd64/amd64/bpf_jit_machdep.c Fri Nov 20 23:14:08 2009 (r199618)
+++ head/sys/amd64/amd64/bpf_jit_machdep.c Sat Nov 21 00:19:09 2009 (r199619)
@@ -57,18 +57,19 @@ __FBSDID("$FreeBSD$");
bpf_filter_func bpf_jit_compile(struct bpf_insn *, u_int, size_t *);
/*
- * emit routine to update the jump table
+ * Emit routine to update the jump table.
*/
static void
emit_length(bpf_bin_stream *stream, __unused u_int value, u_int len)
{
- (stream->refs)[stream->bpf_pc] += len;
+ if (stream->refs != NULL)
+ (stream->refs)[stream->bpf_pc] += len;
stream->cur_ip += len;
}
/*
- * emit routine to output the actual binary code
+ * Emit routine to output the actual binary code.
*/
static void
emit_code(bpf_bin_stream *stream, u_int value, u_int len)
@@ -95,37 +96,79 @@ emit_code(bpf_bin_stream *stream, u_int
}
/*
- * Function that does the real stuff
+ * Scan the filter program and find possible optimization.
+ */
+static int
+bpf_jit_optimize(struct bpf_insn *prog, u_int nins)
+{
+ const struct bpf_insn *p;
+ int flags;
+ u_int i;
+
+ /* Do we return immediately? */
+ if (BPF_CLASS(prog[0].code) == BPF_RET)
+ return (BPF_JIT_FLAG_RET);
+
+ for (flags = 0, i = 0; i < nins; i++) {
+ p = &prog[i];
+
+ /* Do we need reference table? */
+ if ((flags & BPF_JIT_FLAG_JMP) == 0 &&
+ BPF_CLASS(p->code) == BPF_JMP)
+ flags |= BPF_JIT_FLAG_JMP;
+
+ /* Do we need scratch memory? */
+ if ((flags & BPF_JIT_FLAG_MEM) == 0 &&
+ (p->code == BPF_ST || p->code == BPF_STX ||
+ p->code == (BPF_LD|BPF_MEM) ||
+ p->code == (BPF_LDX|BPF_MEM)))
+ flags |= BPF_JIT_FLAG_MEM;
+
+ if (flags == BPF_JIT_FLAG_ALL)
+ break;
+ }
+
+ return (flags);
+}
+
+/*
+ * Function that does the real stuff.
*/
bpf_filter_func
bpf_jit_compile(struct bpf_insn *prog, u_int nins, size_t *size)
{
bpf_bin_stream stream;
struct bpf_insn *ins;
+ int flags, flag_ret, flag_jmp, flag_mem;
u_int i, pass;
+ flags = bpf_jit_optimize(prog, nins);
+ flag_ret = (flags & BPF_JIT_FLAG_RET) != 0;
+ flag_jmp = (flags & BPF_JIT_FLAG_JMP) != 0;
+ flag_mem = (flags & BPF_JIT_FLAG_MEM) != 0;
+
/*
* NOTE: Do not modify the name of this variable, as it's used by
* the macros to emit code.
*/
emit_func emitm;
+ memset(&stream, 0, sizeof(stream));
+
/* Allocate the reference table for the jumps. */
+ if (flag_jmp) {
#ifdef _KERNEL
- stream.refs = malloc((nins + 1) * sizeof(u_int), M_BPFJIT,
- M_NOWAIT | M_ZERO);
+ stream.refs = malloc((nins + 1) * sizeof(u_int), M_BPFJIT,
+ M_NOWAIT | M_ZERO);
#else
- stream.refs = malloc((nins + 1) * sizeof(u_int));
+ stream.refs = malloc((nins + 1) * sizeof(u_int));
#endif
- if (stream.refs == NULL)
- return (NULL);
+ if (stream.refs == NULL)
+ return (NULL);
#ifndef _KERNEL
- memset(stream.refs, 0, (nins + 1) * sizeof(u_int));
+ memset(stream.refs, 0, (nins + 1) * sizeof(u_int));
#endif
-
- stream.cur_ip = 0;
- stream.bpf_pc = 0;
- stream.ibuf = NULL;
+ }
/*
* The first pass will emit the lengths of the instructions
@@ -137,12 +180,16 @@ bpf_jit_compile(struct bpf_insn *prog, u
ins = prog;
/* Create the procedure header. */
- PUSH(RBP);
- MOVrq(RSP, RBP);
- SUBib(BPF_MEMWORDS * sizeof(uint32_t), RSP);
- MOVrq2(RDI, R8);
- MOVrd2(ESI, R9D);
- MOVrd(EDX, EDI);
+ if (flag_mem) {
+ PUSH(RBP);
+ MOVrq(RSP, RBP);
+ SUBib(BPF_MEMWORDS * sizeof(uint32_t), RSP);
+ }
+ if (!flag_ret) {
+ MOVrq2(RDI, R8);
+ MOVrd2(ESI, R9D);
+ MOVrd(EDX, EDI);
+ }
for (i = 0; i < nins; i++) {
stream.bpf_pc++;
@@ -157,11 +204,15 @@ bpf_jit_compile(struct bpf_insn *prog, u
case BPF_RET|BPF_K:
MOVid(ins->k, EAX);
- LEAVE_RET();
+ if (flag_mem)
+ LEAVE();
+ RET();
break;
case BPF_RET|BPF_A:
- LEAVE_RET();
+ if (flag_mem)
+ LEAVE();
+ RET();
break;
case BPF_LD|BPF_W|BPF_ABS:
@@ -171,9 +222,15 @@ bpf_jit_compile(struct bpf_insn *prog, u
MOVrd(EDI, ECX);
SUBrd(ESI, ECX);
CMPid(sizeof(int32_t), ECX);
- JAEb(4);
- ZEROrd(EAX);
- LEAVE_RET();
+ if (flag_mem) {
+ JAEb(4);
+ ZEROrd(EAX);
+ LEAVE();
+ } else {
+ JAEb(3);
+ ZEROrd(EAX);
+ }
+ RET();
MOVrq3(R8, RCX);
MOVobd(RCX, RSI, EAX);
BSWAP(EAX);
@@ -187,8 +244,12 @@ bpf_jit_compile(struct bpf_insn *prog, u
MOVrd(EDI, ECX);
SUBrd(ESI, ECX);
CMPid(sizeof(int16_t), ECX);
- JAEb(2);
- LEAVE_RET();
+ if (flag_mem) {
+ JAEb(2);
+ LEAVE();
+ } else
+ JAEb(1);
+ RET();
MOVrq3(R8, RCX);
MOVobw(RCX, RSI, AX);
SWAP_AX();
@@ -198,8 +259,12 @@ bpf_jit_compile(struct bpf_insn *prog, u
ZEROrd(EAX);
MOVid(ins->k, ESI);
CMPrd(EDI, ESI);
- JBb(2);
- LEAVE_RET();
+ if (flag_mem) {
+ JBb(2);
+ LEAVE();
+ } else
+ JBb(1);
+ RET();
MOVrq3(R8, RCX);
MOVobb(RCX, RSI, AL);
break;
@@ -224,9 +289,15 @@ bpf_jit_compile(struct bpf_insn *prog, u
MOVrd(EDI, ECX);
SUBrd(ESI, ECX);
CMPid(sizeof(int32_t), ECX);
- JAEb(4);
- ZEROrd(EAX);
- LEAVE_RET();
+ if (flag_mem) {
+ JAEb(4);
+ ZEROrd(EAX);
+ LEAVE();
+ } else {
+ JAEb(3);
+ ZEROrd(EAX);
+ }
+ RET();
MOVrq3(R8, RCX);
MOVobd(RCX, RSI, EAX);
BSWAP(EAX);
@@ -245,8 +316,12 @@ bpf_jit_compile(struct bpf_insn *prog, u
MOVrd(EDI, ECX);
SUBrd(ESI, ECX);
CMPid(sizeof(int16_t), ECX);
- JAEb(2);
- LEAVE_RET();
+ if (flag_mem) {
+ JAEb(2);
+ LEAVE();
+ } else
+ JAEb(1);
+ RET();
MOVrq3(R8, RCX);
MOVobw(RCX, RSI, AX);
SWAP_AX();
@@ -260,8 +335,12 @@ bpf_jit_compile(struct bpf_insn *prog, u
MOVrd(EDI, ECX);
SUBrd(EDX, ECX);
CMPrd(ESI, ECX);
- JAb(2);
- LEAVE_RET();
+ if (flag_mem) {
+ JAb(2);
+ LEAVE();
+ } else
+ JAb(1);
+ RET();
MOVrq3(R8, RCX);
ADDrd(EDX, ESI);
MOVobb(RCX, RSI, AL);
@@ -270,9 +349,15 @@ bpf_jit_compile(struct bpf_insn *prog, u
case BPF_LDX|BPF_MSH|BPF_B:
MOVid(ins->k, ESI);
CMPrd(EDI, ESI);
- JBb(4);
- ZEROrd(EAX);
- LEAVE_RET();
+ if (flag_mem) {
+ JBb(4);
+ ZEROrd(EAX);
+ LEAVE();
+ } else {
+ JBb(3);
+ ZEROrd(EAX);
+ }
+ RET();
ZEROrd(EDX);
MOVrq3(R8, RCX);
MOVobb(RCX, RSI, DL);
@@ -390,9 +475,15 @@ bpf_jit_compile(struct bpf_insn *prog, u
case BPF_ALU|BPF_DIV|BPF_X:
TESTrd(EDX, EDX);
- JNEb(4);
- ZEROrd(EAX);
- LEAVE_RET();
+ if (flag_mem) {
+ JNEb(4);
+ ZEROrd(EAX);
+ LEAVE();
+ } else {
+ JNEb(3);
+ ZEROrd(EAX);
+ }
+ RET();
MOVrd(EDX, ECX);
ZEROrd(EDX);
DIVrd(ECX);
@@ -492,8 +583,9 @@ bpf_jit_compile(struct bpf_insn *prog, u
* Modify the reference table to contain the offsets and
* not the lengths of the instructions.
*/
- for (i = 1; i < nins + 1; i++)
- stream.refs[i] += stream.refs[i - 1];
+ if (flag_jmp)
+ for (i = 1; i < nins + 1; i++)
+ stream.refs[i] += stream.refs[i - 1];
/* Reset the counters. */
stream.cur_ip = 0;
@@ -507,10 +599,14 @@ bpf_jit_compile(struct bpf_insn *prog, u
* The reference table is needed only during compilation,
* now we can free it.
*/
+ if (flag_jmp)
#ifdef _KERNEL
- free(stream.refs, M_BPFJIT);
+ free(stream.refs, M_BPFJIT);
#else
- free(stream.refs);
+ free(stream.refs);
+#endif
+
+#ifndef _KERNEL
if (stream.ibuf != NULL &&
mprotect(stream.ibuf, *size, PROT_READ | PROT_EXEC) != 0) {
munmap(stream.ibuf, *size);
Modified: head/sys/amd64/amd64/bpf_jit_machdep.h
==============================================================================
--- head/sys/amd64/amd64/bpf_jit_machdep.h Fri Nov 20 23:14:08 2009 (r199618)
+++ head/sys/amd64/amd64/bpf_jit_machdep.h Sat Nov 21 00:19:09 2009 (r199619)
@@ -85,7 +85,15 @@
#define DL 2
#define BL 3
-/* A stream of native binary code.*/
+/* Optimization flags */
+#define BPF_JIT_FLAG_RET 0x01
+#define BPF_JIT_FLAG_JMP 0x02
+#define BPF_JIT_FLAG_MEM 0x04
+
+#define BPF_JIT_FLAG_ALL \
+ (BPF_JIT_FLAG_JMP | BPF_JIT_FLAG_MEM)
+
+/* A stream of native binary code */
typedef struct bpf_bin_stream {
/* Current native instruction pointer. */
int cur_ip;
@@ -117,7 +125,7 @@ typedef struct bpf_bin_stream {
typedef void (*emit_func)(bpf_bin_stream *stream, u_int value, u_int n);
/*
- * native Instruction Macros
+ * Native instruction macros
*/
/* movl i32,r32 */
@@ -220,9 +228,14 @@ typedef void (*emit_func)(bpf_bin_stream
emitm(&stream, (5 << 4) | (0 << 3) | (r64 & 0x7), 1); \
} while (0)
-/* leave/ret */
-#define LEAVE_RET() do { \
- emitm(&stream, 0xc3c9, 2); \
+/* leaveq */
+#define LEAVE() do { \
+ emitm(&stream, 0xc9, 1); \
+} while (0)
+
+/* retq */
+#define RET() do { \
+ emitm(&stream, 0xc3, 1); \
} while (0)
/* addl sr32,dr32 */
Modified: head/sys/i386/i386/bpf_jit_machdep.c
==============================================================================
--- head/sys/i386/i386/bpf_jit_machdep.c Fri Nov 20 23:14:08 2009 (r199618)
+++ head/sys/i386/i386/bpf_jit_machdep.c Sat Nov 21 00:19:09 2009 (r199619)
@@ -57,18 +57,19 @@ __FBSDID("$FreeBSD$");
bpf_filter_func bpf_jit_compile(struct bpf_insn *, u_int, size_t *);
/*
- * emit routine to update the jump table
+ * Emit routine to update the jump table.
*/
static void
emit_length(bpf_bin_stream *stream, __unused u_int value, u_int len)
{
- (stream->refs)[stream->bpf_pc] += len;
+ if (stream->refs != NULL)
+ (stream->refs)[stream->bpf_pc] += len;
stream->cur_ip += len;
}
/*
- * emit routine to output the actual binary code
+ * Emit routine to output the actual binary code.
*/
static void
emit_code(bpf_bin_stream *stream, u_int value, u_int len)
@@ -95,37 +96,79 @@ emit_code(bpf_bin_stream *stream, u_int
}
/*
- * Function that does the real stuff
+ * Scan the filter program and find possible optimization.
+ */
+static int
+bpf_jit_optimize(struct bpf_insn *prog, u_int nins)
+{
+ const struct bpf_insn *p;
+ int flags;
+ u_int i;
+
+ /* Do we return immediately? */
+ if (BPF_CLASS(prog[0].code) == BPF_RET)
+ return (BPF_JIT_FLAG_RET);
+
+ for (flags = 0, i = 0; i < nins; i++) {
+ p = &prog[i];
+
+ /* Do we need reference table? */
+ if ((flags & BPF_JIT_FLAG_JMP) == 0 &&
+ BPF_CLASS(p->code) == BPF_JMP)
+ flags |= BPF_JIT_FLAG_JMP;
+
+ /* Do we need scratch memory? */
+ if ((flags & BPF_JIT_FLAG_MEM) == 0 &&
+ (p->code == BPF_ST || p->code == BPF_STX ||
+ p->code == (BPF_LD|BPF_MEM) ||
+ p->code == (BPF_LDX|BPF_MEM)))
+ flags |= BPF_JIT_FLAG_MEM;
+
+ if (flags == BPF_JIT_FLAG_ALL)
+ break;
+ }
+
+ return (flags);
+}
+
+/*
+ * Function that does the real stuff.
*/
bpf_filter_func
bpf_jit_compile(struct bpf_insn *prog, u_int nins, size_t *size)
{
bpf_bin_stream stream;
struct bpf_insn *ins;
+ int flags, flag_ret, flag_jmp, flag_mem;
u_int i, pass;
+ flags = bpf_jit_optimize(prog, nins);
+ flag_ret = (flags & BPF_JIT_FLAG_RET) != 0;
+ flag_jmp = (flags & BPF_JIT_FLAG_JMP) != 0;
+ flag_mem = (flags & BPF_JIT_FLAG_MEM) != 0;
+
/*
* NOTE: Do not modify the name of this variable, as it's used by
* the macros to emit code.
*/
emit_func emitm;
+ memset(&stream, 0, sizeof(stream));
+
/* Allocate the reference table for the jumps. */
+ if (flag_jmp) {
#ifdef _KERNEL
- stream.refs = malloc((nins + 1) * sizeof(u_int), M_BPFJIT,
- M_NOWAIT | M_ZERO);
+ stream.refs = malloc((nins + 1) * sizeof(u_int), M_BPFJIT,
+ M_NOWAIT | M_ZERO);
#else
- stream.refs = malloc((nins + 1) * sizeof(u_int));
+ stream.refs = malloc((nins + 1) * sizeof(u_int));
#endif
- if (stream.refs == NULL)
- return (NULL);
+ if (stream.refs == NULL)
+ return (NULL);
#ifndef _KERNEL
- memset(stream.refs, 0, (nins + 1) * sizeof(u_int));
+ memset(stream.refs, 0, (nins + 1) * sizeof(u_int));
#endif
-
- stream.cur_ip = 0;
- stream.bpf_pc = 0;
- stream.ibuf = NULL;
+ }
/*
* The first pass will emit the lengths of the instructions
@@ -137,14 +180,19 @@ bpf_jit_compile(struct bpf_insn *prog, u
ins = prog;
/* Create the procedure header. */
- PUSH(EBP);
- MOVrd(ESP, EBP);
- SUBib(BPF_MEMWORDS * sizeof(uint32_t), ESP);
- PUSH(EDI);
- PUSH(ESI);
- PUSH(EBX);
- MOVodd(8, EBP, EBX);
- MOVodd(16, EBP, EDI);
+ if (!flag_ret) {
+ PUSH(EBP);
+ MOVrd(ESP, EBP);
+ }
+ if (flag_mem)
+ SUBib(BPF_MEMWORDS * sizeof(uint32_t), ESP);
+ if (!flag_ret) {
+ PUSH(EDI);
+ PUSH(ESI);
+ PUSH(EBX);
+ MOVodd(8, EBP, EBX);
+ MOVodd(16, EBP, EDI);
+ }
for (i = 0; i < nins; i++) {
stream.bpf_pc++;
@@ -159,17 +207,23 @@ bpf_jit_compile(struct bpf_insn *prog, u
case BPF_RET|BPF_K:
MOVid(ins->k, EAX);
- POP(EBX);
- POP(ESI);
- POP(EDI);
- LEAVE_RET();
+ if (!flag_ret) {
+ POP(EBX);
+ POP(ESI);
+ POP(EDI);
+ LEAVE();
+ }
+ RET();
break;
case BPF_RET|BPF_A:
- POP(EBX);
- POP(ESI);
- POP(EDI);
- LEAVE_RET();
+ if (!flag_ret) {
+ POP(EBX);
+ POP(ESI);
+ POP(EDI);
+ LEAVE();
+ }
+ RET();
break;
case BPF_LD|BPF_W|BPF_ABS:
@@ -184,7 +238,8 @@ bpf_jit_compile(struct bpf_insn *prog, u
POP(EBX);
POP(ESI);
POP(EDI);
- LEAVE_RET();
+ LEAVE();
+ RET();
MOVobd(EBX, ESI, EAX);
BSWAP(EAX);
break;
@@ -201,7 +256,8 @@ bpf_jit_compile(struct bpf_insn *prog, u
POP(EBX);
POP(ESI);
POP(EDI);
- LEAVE_RET();
+ LEAVE();
+ RET();
MOVobw(EBX, ESI, AX);
SWAP_AX();
break;
@@ -214,7 +270,8 @@ bpf_jit_compile(struct bpf_insn *prog, u
POP(EBX);
POP(ESI);
POP(EDI);
- LEAVE_RET();
+ LEAVE();
+ RET();
MOVobb(EBX, ESI, AL);
break;
@@ -243,7 +300,8 @@ bpf_jit_compile(struct bpf_insn *prog, u
POP(EBX);
POP(ESI);
POP(EDI);
- LEAVE_RET();
+ LEAVE();
+ RET();
MOVobd(EBX, ESI, EAX);
BSWAP(EAX);
break;
@@ -265,7 +323,8 @@ bpf_jit_compile(struct bpf_insn *prog, u
POP(EBX);
POP(ESI);
POP(EDI);
- LEAVE_RET();
+ LEAVE();
+ RET();
MOVobw(EBX, ESI, AX);
SWAP_AX();
break;
@@ -282,7 +341,8 @@ bpf_jit_compile(struct bpf_insn *prog, u
POP(EBX);
POP(ESI);
POP(EDI);
- LEAVE_RET();
+ LEAVE();
+ RET();
ADDrd(EDX, ESI);
MOVobb(EBX, ESI, AL);
break;
@@ -295,7 +355,8 @@ bpf_jit_compile(struct bpf_insn *prog, u
POP(EBX);
POP(ESI);
POP(EDI);
- LEAVE_RET();
+ LEAVE();
+ RET();
ZEROrd(EDX);
MOVobb(EBX, ESI, DL);
ANDib(0x0f, DL);
@@ -425,7 +486,8 @@ bpf_jit_compile(struct bpf_insn *prog, u
POP(EBX);
POP(ESI);
POP(EDI);
- LEAVE_RET();
+ LEAVE();
+ RET();
MOVrd(EDX, ECX);
ZEROrd(EDX);
DIVrd(ECX);
@@ -525,8 +587,9 @@ bpf_jit_compile(struct bpf_insn *prog, u
* Modify the reference table to contain the offsets and
* not the lengths of the instructions.
*/
- for (i = 1; i < nins + 1; i++)
- stream.refs[i] += stream.refs[i - 1];
+ if (flag_jmp)
+ for (i = 1; i < nins + 1; i++)
+ stream.refs[i] += stream.refs[i - 1];
/* Reset the counters. */
stream.cur_ip = 0;
@@ -540,10 +603,14 @@ bpf_jit_compile(struct bpf_insn *prog, u
* The reference table is needed only during compilation,
* now we can free it.
*/
+ if (flag_jmp)
#ifdef _KERNEL
- free(stream.refs, M_BPFJIT);
+ free(stream.refs, M_BPFJIT);
#else
- free(stream.refs);
+ free(stream.refs);
+#endif
+
+#ifndef _KERNEL
if (stream.ibuf != NULL &&
mprotect(stream.ibuf, *size, PROT_READ | PROT_EXEC) != 0) {
munmap(stream.ibuf, *size);
Modified: head/sys/i386/i386/bpf_jit_machdep.h
==============================================================================
--- head/sys/i386/i386/bpf_jit_machdep.h Fri Nov 20 23:14:08 2009 (r199618)
+++ head/sys/i386/i386/bpf_jit_machdep.h Sat Nov 21 00:19:09 2009 (r199619)
@@ -60,7 +60,15 @@
#define DL 2
#define BL 3
-/* A stream of native binary code.*/
+/* Optimization flags */
+#define BPF_JIT_FLAG_RET 0x01
+#define BPF_JIT_FLAG_JMP 0x02
+#define BPF_JIT_FLAG_MEM 0x04
+
+#define BPF_JIT_FLAG_ALL \
+ (BPF_JIT_FLAG_JMP | BPF_JIT_FLAG_MEM)
+
+/* A stream of native binary code */
typedef struct bpf_bin_stream {
/* Current native instruction pointer. */
int cur_ip;
@@ -92,7 +100,7 @@ typedef struct bpf_bin_stream {
typedef void (*emit_func)(bpf_bin_stream *stream, u_int value, u_int n);
/*
- * native Instruction Macros
+ * Native instruction macros
*/
/* movl i32,r32 */
@@ -165,9 +173,14 @@ typedef void (*emit_func)(bpf_bin_stream
emitm(&stream, (5 << 4) | (1 << 3) | (r32 & 0x7), 1); \
} while (0)
-/* leave/ret */
-#define LEAVE_RET() do { \
- emitm(&stream, 0xc3c9, 2); \
+/* leave */
+#define LEAVE() do { \
+ emitm(&stream, 0xc9, 1); \
+} while (0)
+
+/* ret */
+#define RET() do { \
+ emitm(&stream, 0xc3, 1); \
} while (0)
/* addl sr32,dr32 */
More information about the svn-src-all
mailing list