bpf, x86_32: add eBPF JIT compiler for ia32
The JIT compiler emits ia32 bit instructions. Currently, It supports eBPF only. Classic BPF is supported because of the conversion by BPF core. Almost all instructions from eBPF ISA supported except the following: BPF_ALU64 | BPF_DIV | BPF_K BPF_ALU64 | BPF_DIV | BPF_X BPF_ALU64 | BPF_MOD | BPF_K BPF_ALU64 | BPF_MOD | BPF_X BPF_STX | BPF_XADD | BPF_W BPF_STX | BPF_XADD | BPF_DW It doesn't support BPF_JMP|BPF_CALL with BPF_PSEUDO_CALL at the moment. IA32 has few general purpose registers, EAX|EDX|ECX|EBX|ESI|EDI. I use EAX|EDX|ECX|EBX as temporary registers to simulate instructions in eBPF ISA, and allocate ESI|EDI to BPF_REG_AX for constant blinding, all others eBPF registers, R0-R10, are simulated through scratch space on stack. The reasons behind the hardware registers allocation policy are: 1:MUL need EAX:EDX, shift operation need ECX, so they aren't fit for general eBPF 64bit register simulation. 2:We need at least 4 registers to simulate most eBPF ISA operations on registers operands instead of on register&memory operands. 3:We need to put BPF_REG_AX on hardware registers, or constant blinding will degrade jit performance heavily. Tested on PC (Intel(R) Core(TM) i5-5200U CPU). Testing results on i5-5200U: 1) test_bpf: Summary: 349 PASSED, 0 FAILED, [319/341 JIT'ed] 2) test_progs: Summary: 83 PASSED, 0 FAILED. 3) test_lpm: OK 4) test_lru_map: OK 5) test_verifier: Summary: 828 PASSED, 0 FAILED. Above tests are all done in following two conditions separately: 1:bpf_jit_enable=1 and bpf_jit_harden=0 2:bpf_jit_enable=1 and bpf_jit_harden=2 Below are some numbers for this jit implementation: Note: I run test_progs in kselftest 100 times continuously for every condition, the numbers are in format: total/times=avg. The numbers that test_bpf reports show almost the same relation. a:jit_enable=0 and jit_harden=0 b:jit_enable=1 and jit_harden=0 test_pkt_access:PASS:ipv4:15622/100=156 test_pkt_access:PASS:ipv4:10674/100=106 test_pkt_access:PASS:ipv6:9130/100=91 test_pkt_access:PASS:ipv6:4855/100=48 test_xdp:PASS:ipv4:240198/100=2401 test_xdp:PASS:ipv4:138912/100=1389 test_xdp:PASS:ipv6:137326/100=1373 test_xdp:PASS:ipv6:68542/100=685 test_l4lb:PASS:ipv4:61100/100=611 test_l4lb:PASS:ipv4:37302/100=373 test_l4lb:PASS:ipv6:101000/100=1010 test_l4lb:PASS:ipv6:55030/100=550 c:jit_enable=1 and jit_harden=2 test_pkt_access:PASS:ipv4:10558/100=105 test_pkt_access:PASS:ipv6:5092/100=50 test_xdp:PASS:ipv4:131902/100=1319 test_xdp:PASS:ipv6:77932/100=779 test_l4lb:PASS:ipv4:38924/100=389 test_l4lb:PASS:ipv6:57520/100=575 The numbers show we get 30%~50% improvement. See Documentation/networking/filter.txt for more information. Changelog: Changes v5-v6: 1:Add do {} while (0) to RETPOLINE_RAX_BPF_JIT for consistence reason. 2:Clean up non-standard comments, reported by Daniel Borkmann. 3:Fix a memory leak issue, repoted by Daniel Borkmann. Changes v4-v5: 1:Delete is_on_stack, BPF_REG_AX is the only one on real hardware registers, so just check with it. 2:Apply commit1612a981b7
("bpf, x64: fix JIT emission for dead code"), suggested by Daniel Borkmann. Changes v3-v4: 1:Fix changelog in commit. I install llvm-6.0, then test_progs willn't report errors. I submit another patch: "bpf: fix misaligned access for BPF_PROG_TYPE_PERF_EVENT program type on x86_32 platform" to fix another problem, after that patch, test_verifier willn't report errors too. 2:Fix clear r0[1] twice unnecessarily in *BPF_IND|BPF_ABS* simulation. Changes v2-v3: 1:Move BPF_REG_AX to real hardware registers for performance reason. 3:Using bpf_load_pointer instead of bpf_jit32.S, suggested by Daniel Borkmann. 4:Delete partial codes in1c2a088a66
, suggested by Daniel Borkmann. 5:Some bug fixes and comments improvement. Changes v1-v2: 1:Fix bug in emit_ia32_neg64. 2:Fix bug in emit_ia32_arsh_r64. 3:Delete filename in top level comment, suggested by Thomas Gleixner. 4:Delete unnecessary boiler plate text, suggested by Thomas Gleixner. 5:Rewrite some words in changelog. 6:CodingSytle improvement and a little more comments. Signed-off-by: Wang YanQing <udknight@gmail.com> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
This commit is contained in:
Родитель
6f96674dbd
Коммит
03f5781be2
|
@ -45,6 +45,7 @@ through bpf(2) and passing a verifier in the kernel, a JIT will then
|
|||
translate these BPF proglets into native CPU instructions. There are
|
||||
two flavors of JITs, the newer eBPF JIT currently supported on:
|
||||
- x86_64
|
||||
- x86_32
|
||||
- arm64
|
||||
- arm32
|
||||
- ppc64
|
||||
|
|
|
@ -137,7 +137,7 @@ config X86
|
|||
select HAVE_DMA_CONTIGUOUS
|
||||
select HAVE_DYNAMIC_FTRACE
|
||||
select HAVE_DYNAMIC_FTRACE_WITH_REGS
|
||||
select HAVE_EBPF_JIT if X86_64
|
||||
select HAVE_EBPF_JIT
|
||||
select HAVE_EFFICIENT_UNALIGNED_ACCESS
|
||||
select HAVE_EXIT_THREAD
|
||||
select HAVE_FENTRY if X86_64 || DYNAMIC_FTRACE
|
||||
|
|
|
@ -291,16 +291,20 @@ do { \
|
|||
* lfence
|
||||
* jmp spec_trap
|
||||
* do_rop:
|
||||
* mov %rax,(%rsp)
|
||||
* mov %rax,(%rsp) for x86_64
|
||||
* mov %edx,(%esp) for x86_32
|
||||
* retq
|
||||
*
|
||||
* Without retpolines configured:
|
||||
*
|
||||
* jmp *%rax
|
||||
* jmp *%rax for x86_64
|
||||
* jmp *%edx for x86_32
|
||||
*/
|
||||
#ifdef CONFIG_RETPOLINE
|
||||
#ifdef CONFIG_X86_64
|
||||
# define RETPOLINE_RAX_BPF_JIT_SIZE 17
|
||||
# define RETPOLINE_RAX_BPF_JIT() \
|
||||
do { \
|
||||
EMIT1_off32(0xE8, 7); /* callq do_rop */ \
|
||||
/* spec_trap: */ \
|
||||
EMIT2(0xF3, 0x90); /* pause */ \
|
||||
|
@ -308,11 +312,31 @@ do { \
|
|||
EMIT2(0xEB, 0xF9); /* jmp spec_trap */ \
|
||||
/* do_rop: */ \
|
||||
EMIT4(0x48, 0x89, 0x04, 0x24); /* mov %rax,(%rsp) */ \
|
||||
EMIT1(0xC3); /* retq */
|
||||
EMIT1(0xC3); /* retq */ \
|
||||
} while (0)
|
||||
#else
|
||||
# define RETPOLINE_EDX_BPF_JIT() \
|
||||
do { \
|
||||
EMIT1_off32(0xE8, 7); /* call do_rop */ \
|
||||
/* spec_trap: */ \
|
||||
EMIT2(0xF3, 0x90); /* pause */ \
|
||||
EMIT3(0x0F, 0xAE, 0xE8); /* lfence */ \
|
||||
EMIT2(0xEB, 0xF9); /* jmp spec_trap */ \
|
||||
/* do_rop: */ \
|
||||
EMIT3(0x89, 0x14, 0x24); /* mov %edx,(%esp) */ \
|
||||
EMIT1(0xC3); /* ret */ \
|
||||
} while (0)
|
||||
#endif
|
||||
#else /* !CONFIG_RETPOLINE */
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
# define RETPOLINE_RAX_BPF_JIT_SIZE 2
|
||||
# define RETPOLINE_RAX_BPF_JIT() \
|
||||
EMIT2(0xFF, 0xE0); /* jmp *%rax */
|
||||
#else
|
||||
# define RETPOLINE_EDX_BPF_JIT() \
|
||||
EMIT2(0xFF, 0xE2) /* jmp *%edx */
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#endif /* _ASM_X86_NOSPEC_BRANCH_H_ */
|
||||
|
|
|
@ -1,6 +1,10 @@
|
|||
#
|
||||
# Arch-specific network modules
|
||||
#
|
||||
OBJECT_FILES_NON_STANDARD_bpf_jit.o += y
|
||||
|
||||
obj-$(CONFIG_BPF_JIT) += bpf_jit.o bpf_jit_comp.o
|
||||
ifeq ($(CONFIG_X86_32),y)
|
||||
obj-$(CONFIG_BPF_JIT) += bpf_jit_comp32.o
|
||||
else
|
||||
OBJECT_FILES_NON_STANDARD_bpf_jit.o += y
|
||||
obj-$(CONFIG_BPF_JIT) += bpf_jit.o bpf_jit_comp.o
|
||||
endif
|
||||
|
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
Загрузка…
Ссылка в новой задаче