svn commit: r199721 - in head/sys: amd64/amd64 i386/i386

Jung-uk Kim jkim at FreeBSD.org
Mon Nov 23 22:23:20 UTC 2009


Author: jkim
Date: Mon Nov 23 22:23:19 2009
New Revision: 199721
URL: http://svn.freebsd.org/changeset/base/199721

Log:
  - Add more aggressive BPF JIT optimization.  This is in more favor of i386
  while the previous commit was more amd64-centric.
  - Use calloc(3) instead of malloc(3)/memset(3) in user land[1].
  
  Submitted by:	ed[1]

Modified:
  head/sys/amd64/amd64/bpf_jit_machdep.c
  head/sys/amd64/amd64/bpf_jit_machdep.h
  head/sys/i386/i386/bpf_jit_machdep.c
  head/sys/i386/i386/bpf_jit_machdep.h

Modified: head/sys/amd64/amd64/bpf_jit_machdep.c
==============================================================================
--- head/sys/amd64/amd64/bpf_jit_machdep.c	Mon Nov 23 21:17:38 2009	(r199720)
+++ head/sys/amd64/amd64/bpf_jit_machdep.c	Mon Nov 23 22:23:19 2009	(r199721)
@@ -101,29 +101,46 @@ emit_code(bpf_bin_stream *stream, u_int 
 static int
 bpf_jit_optimize(struct bpf_insn *prog, u_int nins)
 {
-	const struct bpf_insn *p;
 	int flags;
 	u_int i;
 
 	/* Do we return immediately? */
 	if (BPF_CLASS(prog[0].code) == BPF_RET)
-		return (BPF_JIT_FLAG_RET);
+		return (BPF_JIT_FRET);
 
 	for (flags = 0, i = 0; i < nins; i++) {
-		p = &prog[i];
-
-		/* Do we need reference table? */
-		if ((flags & BPF_JIT_FLAG_JMP) == 0 &&
-		    BPF_CLASS(p->code) == BPF_JMP)
-			flags |= BPF_JIT_FLAG_JMP;
-
-		/* Do we need scratch memory? */
-		if ((flags & BPF_JIT_FLAG_MEM) == 0 &&
-		    (p->code == BPF_ST || p->code == BPF_STX ||
-		    p->code == (BPF_LD|BPF_MEM) ||
-		    p->code == (BPF_LDX|BPF_MEM)))
-			flags |= BPF_JIT_FLAG_MEM;
-
+		switch (prog[i].code) {
+		case BPF_LD|BPF_W|BPF_ABS:
+		case BPF_LD|BPF_H|BPF_ABS:
+		case BPF_LD|BPF_B|BPF_ABS:
+		case BPF_LD|BPF_W|BPF_IND:
+		case BPF_LD|BPF_H|BPF_IND:
+		case BPF_LD|BPF_B|BPF_IND:
+		case BPF_LDX|BPF_MSH|BPF_B:
+			flags |= BPF_JIT_FPKT;
+			break;
+		case BPF_LD|BPF_MEM:
+		case BPF_LDX|BPF_MEM:
+		case BPF_ST:
+		case BPF_STX:
+			flags |= BPF_JIT_FMEM;
+			break;
+		case BPF_LD|BPF_W|BPF_LEN:
+		case BPF_LDX|BPF_W|BPF_LEN:
+			flags |= BPF_JIT_FLEN;
+			break;
+		case BPF_JMP|BPF_JA:
+		case BPF_JMP|BPF_JGT|BPF_K:
+		case BPF_JMP|BPF_JGE|BPF_K:
+		case BPF_JMP|BPF_JEQ|BPF_K:
+		case BPF_JMP|BPF_JSET|BPF_K:
+		case BPF_JMP|BPF_JGT|BPF_X:
+		case BPF_JMP|BPF_JGE|BPF_X:
+		case BPF_JMP|BPF_JEQ|BPF_X:
+		case BPF_JMP|BPF_JSET|BPF_X:
+			flags |= BPF_JIT_FJMP;
+			break;
+		}
 		if (flags == BPF_JIT_FLAG_ALL)
 			break;
 	}
@@ -139,35 +156,37 @@ bpf_jit_compile(struct bpf_insn *prog, u
 {
 	bpf_bin_stream stream;
 	struct bpf_insn *ins;
-	int flags, flag_ret, flag_jmp, flag_mem;
+	int flags, fret, fpkt, fmem, fjmp, flen;
 	u_int i, pass;
 
-	flags = bpf_jit_optimize(prog, nins);
-	flag_ret = (flags & BPF_JIT_FLAG_RET) != 0;
-	flag_jmp = (flags & BPF_JIT_FLAG_JMP) != 0;
-	flag_mem = (flags & BPF_JIT_FLAG_MEM) != 0;
-
 	/*
 	 * NOTE: Do not modify the name of this variable, as it's used by
 	 * the macros to emit code.
 	 */
 	emit_func emitm;
 
+	flags = bpf_jit_optimize(prog, nins);
+	fret = (flags & BPF_JIT_FRET) != 0;
+	fpkt = (flags & BPF_JIT_FPKT) != 0;
+	fmem = (flags & BPF_JIT_FMEM) != 0;
+	fjmp = (flags & BPF_JIT_FJMP) != 0;
+	flen = (flags & BPF_JIT_FLEN) != 0;
+
+	if (fret)
+		nins = 1;
+
 	memset(&stream, 0, sizeof(stream));
 
 	/* Allocate the reference table for the jumps. */
-	if (flag_jmp) {
+	if (fjmp) {
 #ifdef _KERNEL
 		stream.refs = malloc((nins + 1) * sizeof(u_int), M_BPFJIT,
 		    M_NOWAIT | M_ZERO);
 #else
-		stream.refs = malloc((nins + 1) * sizeof(u_int));
+		stream.refs = calloc(nins + 1, sizeof(u_int));
 #endif
 		if (stream.refs == NULL)
 			return (NULL);
-#ifndef _KERNEL
-		memset(stream.refs, 0, (nins + 1) * sizeof(u_int));
-#endif
 	}
 
 	/*
@@ -180,14 +199,15 @@ bpf_jit_compile(struct bpf_insn *prog, u
 		ins = prog;
 
 		/* Create the procedure header. */
-		if (flag_mem) {
+		if (fmem) {
 			PUSH(RBP);
 			MOVrq(RSP, RBP);
 			SUBib(BPF_MEMWORDS * sizeof(uint32_t), RSP);
 		}
-		if (!flag_ret) {
-			MOVrq2(RDI, R8);
+		if (flen)
 			MOVrd2(ESI, R9D);
+		if (fpkt) {
+			MOVrq2(RDI, R8);
 			MOVrd(EDX, EDI);
 		}
 
@@ -204,13 +224,13 @@ bpf_jit_compile(struct bpf_insn *prog, u
 
 			case BPF_RET|BPF_K:
 				MOVid(ins->k, EAX);
-				if (flag_mem)
+				if (fmem)
 					LEAVE();
 				RET();
 				break;
 
 			case BPF_RET|BPF_A:
-				if (flag_mem)
+				if (fmem)
 					LEAVE();
 				RET();
 				break;
@@ -222,7 +242,7 @@ bpf_jit_compile(struct bpf_insn *prog, u
 				MOVrd(EDI, ECX);
 				SUBrd(ESI, ECX);
 				CMPid(sizeof(int32_t), ECX);
-				if (flag_mem) {
+				if (fmem) {
 					JAEb(4);
 					ZEROrd(EAX);
 					LEAVE();
@@ -244,7 +264,7 @@ bpf_jit_compile(struct bpf_insn *prog, u
 				MOVrd(EDI, ECX);
 				SUBrd(ESI, ECX);
 				CMPid(sizeof(int16_t), ECX);
-				if (flag_mem) {
+				if (fmem) {
 					JAEb(2);
 					LEAVE();
 				} else
@@ -259,7 +279,7 @@ bpf_jit_compile(struct bpf_insn *prog, u
 				ZEROrd(EAX);
 				MOVid(ins->k, ESI);
 				CMPrd(EDI, ESI);
-				if (flag_mem) {
+				if (fmem) {
 					JBb(2);
 					LEAVE();
 				} else
@@ -289,7 +309,7 @@ bpf_jit_compile(struct bpf_insn *prog, u
 				MOVrd(EDI, ECX);
 				SUBrd(ESI, ECX);
 				CMPid(sizeof(int32_t), ECX);
-				if (flag_mem) {
+				if (fmem) {
 					JAEb(4);
 					ZEROrd(EAX);
 					LEAVE();
@@ -316,7 +336,7 @@ bpf_jit_compile(struct bpf_insn *prog, u
 				MOVrd(EDI, ECX);
 				SUBrd(ESI, ECX);
 				CMPid(sizeof(int16_t), ECX);
-				if (flag_mem) {
+				if (fmem) {
 					JAEb(2);
 					LEAVE();
 				} else
@@ -335,7 +355,7 @@ bpf_jit_compile(struct bpf_insn *prog, u
 				MOVrd(EDI, ECX);
 				SUBrd(EDX, ECX);
 				CMPrd(ESI, ECX);
-				if (flag_mem) {
+				if (fmem) {
 					JAb(2);
 					LEAVE();
 				} else
@@ -349,7 +369,7 @@ bpf_jit_compile(struct bpf_insn *prog, u
 			case BPF_LDX|BPF_MSH|BPF_B:
 				MOVid(ins->k, ESI);
 				CMPrd(EDI, ESI);
-				if (flag_mem) {
+				if (fmem) {
 					JBb(4);
 					ZEROrd(EAX);
 					LEAVE();
@@ -475,7 +495,7 @@ bpf_jit_compile(struct bpf_insn *prog, u
 
 			case BPF_ALU|BPF_DIV|BPF_X:
 				TESTrd(EDX, EDX);
-				if (flag_mem) {
+				if (fmem) {
 					JNEb(4);
 					ZEROrd(EAX);
 					LEAVE();
@@ -583,7 +603,7 @@ bpf_jit_compile(struct bpf_insn *prog, u
 		 * Modify the reference table to contain the offsets and
 		 * not the lengths of the instructions.
 		 */
-		if (flag_jmp)
+		if (fjmp)
 			for (i = 1; i < nins + 1; i++)
 				stream.refs[i] += stream.refs[i - 1];
 
@@ -599,7 +619,7 @@ bpf_jit_compile(struct bpf_insn *prog, u
 	 * The reference table is needed only during compilation,
 	 * now we can free it.
 	 */
-	if (flag_jmp)
+	if (fjmp)
 #ifdef _KERNEL
 		free(stream.refs, M_BPFJIT);
 #else

Modified: head/sys/amd64/amd64/bpf_jit_machdep.h
==============================================================================
--- head/sys/amd64/amd64/bpf_jit_machdep.h	Mon Nov 23 21:17:38 2009	(r199720)
+++ head/sys/amd64/amd64/bpf_jit_machdep.h	Mon Nov 23 22:23:19 2009	(r199721)
@@ -86,12 +86,14 @@
 #define BL	3
 
 /* Optimization flags */
-#define	BPF_JIT_FLAG_RET	0x01
-#define	BPF_JIT_FLAG_JMP	0x02
-#define	BPF_JIT_FLAG_MEM	0x04
+#define	BPF_JIT_FRET	0x01
+#define	BPF_JIT_FPKT	0x02
+#define	BPF_JIT_FMEM	0x04
+#define	BPF_JIT_FJMP	0x08
+#define	BPF_JIT_FLEN	0x10
 
 #define	BPF_JIT_FLAG_ALL	\
-    (BPF_JIT_FLAG_JMP | BPF_JIT_FLAG_MEM)
+    (BPF_JIT_FPKT | BPF_JIT_FMEM | BPF_JIT_FJMP | BPF_JIT_FLEN)
 
 /* A stream of native binary code */
 typedef struct bpf_bin_stream {

Modified: head/sys/i386/i386/bpf_jit_machdep.c
==============================================================================
--- head/sys/i386/i386/bpf_jit_machdep.c	Mon Nov 23 21:17:38 2009	(r199720)
+++ head/sys/i386/i386/bpf_jit_machdep.c	Mon Nov 23 22:23:19 2009	(r199721)
@@ -101,29 +101,45 @@ emit_code(bpf_bin_stream *stream, u_int 
 static int
 bpf_jit_optimize(struct bpf_insn *prog, u_int nins)
 {
-	const struct bpf_insn *p;
 	int flags;
 	u_int i;
 
 	/* Do we return immediately? */
 	if (BPF_CLASS(prog[0].code) == BPF_RET)
-		return (BPF_JIT_FLAG_RET);
+		return (BPF_JIT_FRET);
 
 	for (flags = 0, i = 0; i < nins; i++) {
-		p = &prog[i];
-
-		/* Do we need reference table? */
-		if ((flags & BPF_JIT_FLAG_JMP) == 0 &&
-		    BPF_CLASS(p->code) == BPF_JMP)
-			flags |= BPF_JIT_FLAG_JMP;
-
-		/* Do we need scratch memory? */
-		if ((flags & BPF_JIT_FLAG_MEM) == 0 &&
-		    (p->code == BPF_ST || p->code == BPF_STX ||
-		    p->code == (BPF_LD|BPF_MEM) ||
-		    p->code == (BPF_LDX|BPF_MEM)))
-			flags |= BPF_JIT_FLAG_MEM;
-
+		switch (prog[i].code) {
+		case BPF_LD|BPF_W|BPF_ABS:
+		case BPF_LD|BPF_H|BPF_ABS:
+		case BPF_LD|BPF_B|BPF_ABS:
+		case BPF_LD|BPF_W|BPF_IND:
+		case BPF_LD|BPF_H|BPF_IND:
+		case BPF_LD|BPF_B|BPF_IND:
+		case BPF_LDX|BPF_MSH|BPF_B:
+			flags |= BPF_JIT_FPKT;
+			break;
+		case BPF_LD|BPF_MEM:
+		case BPF_LDX|BPF_MEM:
+		case BPF_ST:
+		case BPF_STX:
+			flags |= BPF_JIT_FMEM;
+			break;
+		case BPF_JMP|BPF_JA:
+		case BPF_JMP|BPF_JGT|BPF_K:
+		case BPF_JMP|BPF_JGE|BPF_K:
+		case BPF_JMP|BPF_JEQ|BPF_K:
+		case BPF_JMP|BPF_JSET|BPF_K:
+		case BPF_JMP|BPF_JGT|BPF_X:
+		case BPF_JMP|BPF_JGE|BPF_X:
+		case BPF_JMP|BPF_JEQ|BPF_X:
+		case BPF_JMP|BPF_JSET|BPF_X:
+			flags |= BPF_JIT_FJMP;
+			break;
+		case BPF_ALU|BPF_DIV|BPF_K:
+			flags |= BPF_JIT_FADK;
+			break;
+		}
 		if (flags == BPF_JIT_FLAG_ALL)
 			break;
 	}
@@ -139,35 +155,39 @@ bpf_jit_compile(struct bpf_insn *prog, u
 {
 	bpf_bin_stream stream;
 	struct bpf_insn *ins;
-	int flags, flag_ret, flag_jmp, flag_mem;
+	int flags, fret, fpkt, fmem, fjmp, fadk;
+	int save_esp;
 	u_int i, pass;
 
-	flags = bpf_jit_optimize(prog, nins);
-	flag_ret = (flags & BPF_JIT_FLAG_RET) != 0;
-	flag_jmp = (flags & BPF_JIT_FLAG_JMP) != 0;
-	flag_mem = (flags & BPF_JIT_FLAG_MEM) != 0;
-
 	/*
 	 * NOTE: Do not modify the name of this variable, as it's used by
 	 * the macros to emit code.
 	 */
 	emit_func emitm;
 
+	flags = bpf_jit_optimize(prog, nins);
+	fret = (flags & BPF_JIT_FRET) != 0;
+	fpkt = (flags & BPF_JIT_FPKT) != 0;
+	fmem = (flags & BPF_JIT_FMEM) != 0;
+	fjmp = (flags & BPF_JIT_FJMP) != 0;
+	fadk = (flags & BPF_JIT_FADK) != 0;
+	save_esp = (fpkt || fmem || fadk);	/* Stack is used. */
+
+	if (fret)
+		nins = 1;
+
 	memset(&stream, 0, sizeof(stream));
 
 	/* Allocate the reference table for the jumps. */
-	if (flag_jmp) {
+	if (fjmp) {
 #ifdef _KERNEL
 		stream.refs = malloc((nins + 1) * sizeof(u_int), M_BPFJIT,
 		    M_NOWAIT | M_ZERO);
 #else
-		stream.refs = malloc((nins + 1) * sizeof(u_int));
+		stream.refs = calloc(nins + 1, sizeof(u_int));
 #endif
 		if (stream.refs == NULL)
 			return (NULL);
-#ifndef _KERNEL
-		memset(stream.refs, 0, (nins + 1) * sizeof(u_int));
-#endif
 	}
 
 	/*
@@ -180,15 +200,16 @@ bpf_jit_compile(struct bpf_insn *prog, u
 		ins = prog;
 
 		/* Create the procedure header. */
-		if (!flag_ret) {
+		if (save_esp) {
 			PUSH(EBP);
 			MOVrd(ESP, EBP);
 		}
-		if (flag_mem)
+		if (fmem)
 			SUBib(BPF_MEMWORDS * sizeof(uint32_t), ESP);
-		if (!flag_ret) {
-			PUSH(EDI);
+		if (save_esp)
 			PUSH(ESI);
+		if (fpkt) {
+			PUSH(EDI);
 			PUSH(EBX);
 			MOVodd(8, EBP, EBX);
 			MOVodd(16, EBP, EDI);
@@ -207,20 +228,24 @@ bpf_jit_compile(struct bpf_insn *prog, u
 
 			case BPF_RET|BPF_K:
 				MOVid(ins->k, EAX);
-				if (!flag_ret) {
-					POP(EBX);
+				if (save_esp) {
+					if (fpkt) {
+						POP(EBX);
+						POP(EDI);
+					}
 					POP(ESI);
-					POP(EDI);
 					LEAVE();
 				}
 				RET();
 				break;
 
 			case BPF_RET|BPF_A:
-				if (!flag_ret) {
-					POP(EBX);
+				if (save_esp) {
+					if (fpkt) {
+						POP(EBX);
+						POP(EDI);
+					}
 					POP(ESI);
-					POP(EDI);
 					LEAVE();
 				}
 				RET();
@@ -236,8 +261,8 @@ bpf_jit_compile(struct bpf_insn *prog, u
 				JAEb(7);
 				ZEROrd(EAX);
 				POP(EBX);
-				POP(ESI);
 				POP(EDI);
+				POP(ESI);
 				LEAVE();
 				RET();
 				MOVobd(EBX, ESI, EAX);
@@ -254,8 +279,8 @@ bpf_jit_compile(struct bpf_insn *prog, u
 				CMPid(sizeof(int16_t), ECX);
 				JAEb(5);
 				POP(EBX);
-				POP(ESI);
 				POP(EDI);
+				POP(ESI);
 				LEAVE();
 				RET();
 				MOVobw(EBX, ESI, AX);
@@ -268,19 +293,29 @@ bpf_jit_compile(struct bpf_insn *prog, u
 				CMPrd(EDI, ESI);
 				JBb(5);
 				POP(EBX);
-				POP(ESI);
 				POP(EDI);
+				POP(ESI);
 				LEAVE();
 				RET();
 				MOVobb(EBX, ESI, AL);
 				break;
 
 			case BPF_LD|BPF_W|BPF_LEN:
-				MOVodd(12, EBP, EAX);
+				if (save_esp)
+					MOVodd(12, EBP, EAX);
+				else {
+					MOVrd(ESP, ECX);
+					MOVodd(12, ECX, EAX);
+				}
 				break;
 
 			case BPF_LDX|BPF_W|BPF_LEN:
-				MOVodd(12, EBP, EDX);
+				if (save_esp)
+					MOVodd(12, EBP, EDX);
+				else {
+					MOVrd(ESP, ECX);
+					MOVodd(12, ECX, EDX);
+				}
 				break;
 
 			case BPF_LD|BPF_W|BPF_IND:
@@ -298,8 +333,8 @@ bpf_jit_compile(struct bpf_insn *prog, u
 				JAEb(7);
 				ZEROrd(EAX);
 				POP(EBX);
-				POP(ESI);
 				POP(EDI);
+				POP(ESI);
 				LEAVE();
 				RET();
 				MOVobd(EBX, ESI, EAX);
@@ -321,8 +356,8 @@ bpf_jit_compile(struct bpf_insn *prog, u
 				CMPid(sizeof(int16_t), ECX);
 				JAEb(5);
 				POP(EBX);
-				POP(ESI);
 				POP(EDI);
+				POP(ESI);
 				LEAVE();
 				RET();
 				MOVobw(EBX, ESI, AX);
@@ -339,8 +374,8 @@ bpf_jit_compile(struct bpf_insn *prog, u
 				CMPrd(ESI, ECX);
 				JAb(5);
 				POP(EBX);
-				POP(ESI);
 				POP(EDI);
+				POP(ESI);
 				LEAVE();
 				RET();
 				ADDrd(EDX, ESI);
@@ -353,8 +388,8 @@ bpf_jit_compile(struct bpf_insn *prog, u
 				JBb(7);
 				ZEROrd(EAX);
 				POP(EBX);
-				POP(ESI);
 				POP(EDI);
+				POP(ESI);
 				LEAVE();
 				RET();
 				ZEROrd(EDX);
@@ -481,12 +516,22 @@ bpf_jit_compile(struct bpf_insn *prog, u
 
 			case BPF_ALU|BPF_DIV|BPF_X:
 				TESTrd(EDX, EDX);
-				JNEb(7);
-				ZEROrd(EAX);
-				POP(EBX);
-				POP(ESI);
-				POP(EDI);
-				LEAVE();
+				if (save_esp) {
+					if (fpkt) {
+						JNEb(7);
+						ZEROrd(EAX);
+						POP(EBX);
+						POP(EDI);
+					} else {
+						JNEb(5);
+						ZEROrd(EAX);
+					}
+					POP(ESI);
+					LEAVE();
+				} else {
+					JNEb(3);
+					ZEROrd(EAX);
+				}
 				RET();
 				MOVrd(EDX, ECX);
 				ZEROrd(EDX);
@@ -587,7 +632,7 @@ bpf_jit_compile(struct bpf_insn *prog, u
 		 * Modify the reference table to contain the offsets and
 		 * not the lengths of the instructions.
 		 */
-		if (flag_jmp)
+		if (fjmp)
 			for (i = 1; i < nins + 1; i++)
 				stream.refs[i] += stream.refs[i - 1];
 
@@ -603,7 +648,7 @@ bpf_jit_compile(struct bpf_insn *prog, u
 	 * The reference table is needed only during compilation,
 	 * now we can free it.
 	 */
-	if (flag_jmp)
+	if (fjmp)
 #ifdef _KERNEL
 		free(stream.refs, M_BPFJIT);
 #else

Modified: head/sys/i386/i386/bpf_jit_machdep.h
==============================================================================
--- head/sys/i386/i386/bpf_jit_machdep.h	Mon Nov 23 21:17:38 2009	(r199720)
+++ head/sys/i386/i386/bpf_jit_machdep.h	Mon Nov 23 22:23:19 2009	(r199721)
@@ -61,12 +61,14 @@
 #define BL	3
 
 /* Optimization flags */
-#define	BPF_JIT_FLAG_RET	0x01
-#define	BPF_JIT_FLAG_JMP	0x02
-#define	BPF_JIT_FLAG_MEM	0x04
+#define	BPF_JIT_FRET	0x01
+#define	BPF_JIT_FPKT	0x02
+#define	BPF_JIT_FMEM	0x04
+#define	BPF_JIT_FJMP	0x08
+#define	BPF_JIT_FADK	0x10
 
 #define	BPF_JIT_FLAG_ALL	\
-    (BPF_JIT_FLAG_JMP | BPF_JIT_FLAG_MEM)
+    (BPF_JIT_FPKT | BPF_JIT_FMEM | BPF_JIT_FJMP | BPF_JIT_FADK)
 
 /* A stream of native binary code */
 typedef struct bpf_bin_stream {


More information about the svn-src-head mailing list