bpf, arm64: Support more atomic operations
Atomics for eBPF patch series adds support for atomic[64]_fetch_add, atomic[64]_[fetch_]{and,or,xor} and atomic[64]_{xchg|cmpxchg}, but it only adds support for x86-64, so support these atomic operations for arm64 as well. Basically the implementation procedure is almost mechanical translation of code snippets in atomic_ll_sc.h & atomic_lse.h & cmpxchg.h located under arch/arm64/include/asm. When LSE atomic is unavailable, an extra temporary register is needed for (BPF_ADD | BPF_FETCH) to save the value of src register, instead of adding TMP_REG_4 just use BPF_REG_AX instead. Also make emit_lse_atomic() as an empty inline function when CONFIG_ARM64_LSE_ATOMICS is disabled. For cpus_have_cap(ARM64_HAS_LSE_ATOMICS) case and no-LSE-ATOMICS case, the following three tests: "./test_verifier", "./test_progs -t atomic" and "insmod ./test_bpf.ko" are exercised and passed. Signed-off-by: Hou Tao <houtao1@huawei.com> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Link: https://lore.kernel.org/bpf/20220217072232.1186625-4-houtao1@huawei.com
This commit is contained in:
Родитель
79e7ce2e51
Коммит
1902472b4f
|
@ -88,17 +88,42 @@
|
|||
/* [Rn] = Rt; (atomic) Rs = [state] */
|
||||
#define A64_STXR(sf, Rt, Rn, Rs) \
|
||||
A64_LSX(sf, Rt, Rn, Rs, STORE_EX)
|
||||
/* [Rn] = Rt (store release); (atomic) Rs = [state] */
|
||||
#define A64_STLXR(sf, Rt, Rn, Rs) \
|
||||
aarch64_insn_gen_load_store_ex(Rt, Rn, Rs, A64_SIZE(sf), \
|
||||
AARCH64_INSN_LDST_STORE_REL_EX)
|
||||
|
||||
/*
|
||||
* LSE atomics
|
||||
*
|
||||
* STADD is simply encoded as an alias for LDADD with XZR as
|
||||
* the destination register.
|
||||
* ST{ADD,CLR,SET,EOR} is simply encoded as an alias for
|
||||
* LDD{ADD,CLR,SET,EOR} with XZR as the destination register.
|
||||
*/
|
||||
#define A64_STADD(sf, Rn, Rs) \
|
||||
#define A64_ST_OP(sf, Rn, Rs, op) \
|
||||
aarch64_insn_gen_atomic_ld_op(A64_ZR, Rn, Rs, \
|
||||
A64_SIZE(sf), AARCH64_INSN_MEM_ATOMIC_ADD, \
|
||||
A64_SIZE(sf), AARCH64_INSN_MEM_ATOMIC_##op, \
|
||||
AARCH64_INSN_MEM_ORDER_NONE)
|
||||
/* [Rn] <op>= Rs */
|
||||
#define A64_STADD(sf, Rn, Rs) A64_ST_OP(sf, Rn, Rs, ADD)
|
||||
#define A64_STCLR(sf, Rn, Rs) A64_ST_OP(sf, Rn, Rs, CLR)
|
||||
#define A64_STEOR(sf, Rn, Rs) A64_ST_OP(sf, Rn, Rs, EOR)
|
||||
#define A64_STSET(sf, Rn, Rs) A64_ST_OP(sf, Rn, Rs, SET)
|
||||
|
||||
#define A64_LD_OP_AL(sf, Rt, Rn, Rs, op) \
|
||||
aarch64_insn_gen_atomic_ld_op(Rt, Rn, Rs, \
|
||||
A64_SIZE(sf), AARCH64_INSN_MEM_ATOMIC_##op, \
|
||||
AARCH64_INSN_MEM_ORDER_ACQREL)
|
||||
/* Rt = [Rn] (load acquire); [Rn] <op>= Rs (store release) */
|
||||
#define A64_LDADDAL(sf, Rt, Rn, Rs) A64_LD_OP_AL(sf, Rt, Rn, Rs, ADD)
|
||||
#define A64_LDCLRAL(sf, Rt, Rn, Rs) A64_LD_OP_AL(sf, Rt, Rn, Rs, CLR)
|
||||
#define A64_LDEORAL(sf, Rt, Rn, Rs) A64_LD_OP_AL(sf, Rt, Rn, Rs, EOR)
|
||||
#define A64_LDSETAL(sf, Rt, Rn, Rs) A64_LD_OP_AL(sf, Rt, Rn, Rs, SET)
|
||||
/* Rt = [Rn] (load acquire); [Rn] = Rs (store release) */
|
||||
#define A64_SWPAL(sf, Rt, Rn, Rs) A64_LD_OP_AL(sf, Rt, Rn, Rs, SWP)
|
||||
/* Rs = CAS(Rn, Rs, Rt) (load acquire & store release) */
|
||||
#define A64_CASAL(sf, Rt, Rn, Rs) \
|
||||
aarch64_insn_gen_cas(Rt, Rn, Rs, A64_SIZE(sf), \
|
||||
AARCH64_INSN_MEM_ORDER_ACQREL)
|
||||
|
||||
/* Add/subtract (immediate) */
|
||||
#define A64_ADDSUB_IMM(sf, Rd, Rn, imm12, type) \
|
||||
|
@ -203,6 +228,9 @@
|
|||
#define A64_ANDS(sf, Rd, Rn, Rm) A64_LOGIC_SREG(sf, Rd, Rn, Rm, AND_SETFLAGS)
|
||||
/* Rn & Rm; set condition flags */
|
||||
#define A64_TST(sf, Rn, Rm) A64_ANDS(sf, A64_ZR, Rn, Rm)
|
||||
/* Rd = ~Rm (alias of ORN with A64_ZR as Rn) */
|
||||
#define A64_MVN(sf, Rd, Rm) \
|
||||
A64_LOGIC_SREG(sf, Rd, A64_ZR, Rm, ORN)
|
||||
|
||||
/* Logical (immediate) */
|
||||
#define A64_LOGIC_IMM(sf, Rd, Rn, imm, type) ({ \
|
||||
|
@ -226,4 +254,7 @@
|
|||
#define A64_BTI_J A64_HINT(AARCH64_INSN_HINT_BTIJ)
|
||||
#define A64_BTI_JC A64_HINT(AARCH64_INSN_HINT_BTIJC)
|
||||
|
||||
/* DMB */
|
||||
#define A64_DMB_ISH aarch64_insn_gen_dmb(AARCH64_INSN_MB_ISH)
|
||||
|
||||
#endif /* _BPF_JIT_H */
|
||||
|
|
|
@ -27,6 +27,17 @@
|
|||
#define TCALL_CNT (MAX_BPF_JIT_REG + 2)
|
||||
#define TMP_REG_3 (MAX_BPF_JIT_REG + 3)
|
||||
|
||||
#define check_imm(bits, imm) do { \
|
||||
if ((((imm) > 0) && ((imm) >> (bits))) || \
|
||||
(((imm) < 0) && (~(imm) >> (bits)))) { \
|
||||
pr_info("[%2d] imm=%d(0x%x) out of range\n", \
|
||||
i, imm, imm); \
|
||||
return -EINVAL; \
|
||||
} \
|
||||
} while (0)
|
||||
#define check_imm19(imm) check_imm(19, imm)
|
||||
#define check_imm26(imm) check_imm(26, imm)
|
||||
|
||||
/* Map BPF registers to A64 registers */
|
||||
static const int bpf2a64[] = {
|
||||
/* return value from in-kernel function, and exit value from eBPF */
|
||||
|
@ -329,6 +340,170 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx)
|
|||
#undef jmp_offset
|
||||
}
|
||||
|
||||
#ifdef CONFIG_ARM64_LSE_ATOMICS
|
||||
static int emit_lse_atomic(const struct bpf_insn *insn, struct jit_ctx *ctx)
|
||||
{
|
||||
const u8 code = insn->code;
|
||||
const u8 dst = bpf2a64[insn->dst_reg];
|
||||
const u8 src = bpf2a64[insn->src_reg];
|
||||
const u8 tmp = bpf2a64[TMP_REG_1];
|
||||
const u8 tmp2 = bpf2a64[TMP_REG_2];
|
||||
const bool isdw = BPF_SIZE(code) == BPF_DW;
|
||||
const s16 off = insn->off;
|
||||
u8 reg;
|
||||
|
||||
if (!off) {
|
||||
reg = dst;
|
||||
} else {
|
||||
emit_a64_mov_i(1, tmp, off, ctx);
|
||||
emit(A64_ADD(1, tmp, tmp, dst), ctx);
|
||||
reg = tmp;
|
||||
}
|
||||
|
||||
switch (insn->imm) {
|
||||
/* lock *(u32/u64 *)(dst_reg + off) <op>= src_reg */
|
||||
case BPF_ADD:
|
||||
emit(A64_STADD(isdw, reg, src), ctx);
|
||||
break;
|
||||
case BPF_AND:
|
||||
emit(A64_MVN(isdw, tmp2, src), ctx);
|
||||
emit(A64_STCLR(isdw, reg, tmp2), ctx);
|
||||
break;
|
||||
case BPF_OR:
|
||||
emit(A64_STSET(isdw, reg, src), ctx);
|
||||
break;
|
||||
case BPF_XOR:
|
||||
emit(A64_STEOR(isdw, reg, src), ctx);
|
||||
break;
|
||||
/* src_reg = atomic_fetch_<op>(dst_reg + off, src_reg) */
|
||||
case BPF_ADD | BPF_FETCH:
|
||||
emit(A64_LDADDAL(isdw, src, reg, src), ctx);
|
||||
break;
|
||||
case BPF_AND | BPF_FETCH:
|
||||
emit(A64_MVN(isdw, tmp2, src), ctx);
|
||||
emit(A64_LDCLRAL(isdw, src, reg, tmp2), ctx);
|
||||
break;
|
||||
case BPF_OR | BPF_FETCH:
|
||||
emit(A64_LDSETAL(isdw, src, reg, src), ctx);
|
||||
break;
|
||||
case BPF_XOR | BPF_FETCH:
|
||||
emit(A64_LDEORAL(isdw, src, reg, src), ctx);
|
||||
break;
|
||||
/* src_reg = atomic_xchg(dst_reg + off, src_reg); */
|
||||
case BPF_XCHG:
|
||||
emit(A64_SWPAL(isdw, src, reg, src), ctx);
|
||||
break;
|
||||
/* r0 = atomic_cmpxchg(dst_reg + off, r0, src_reg); */
|
||||
case BPF_CMPXCHG:
|
||||
emit(A64_CASAL(isdw, src, reg, bpf2a64[BPF_REG_0]), ctx);
|
||||
break;
|
||||
default:
|
||||
pr_err_once("unknown atomic op code %02x\n", insn->imm);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
#else
|
||||
static inline int emit_lse_atomic(const struct bpf_insn *insn, struct jit_ctx *ctx)
|
||||
{
|
||||
return -EINVAL;
|
||||
}
|
||||
#endif
|
||||
|
||||
static int emit_ll_sc_atomic(const struct bpf_insn *insn, struct jit_ctx *ctx)
|
||||
{
|
||||
const u8 code = insn->code;
|
||||
const u8 dst = bpf2a64[insn->dst_reg];
|
||||
const u8 src = bpf2a64[insn->src_reg];
|
||||
const u8 tmp = bpf2a64[TMP_REG_1];
|
||||
const u8 tmp2 = bpf2a64[TMP_REG_2];
|
||||
const u8 tmp3 = bpf2a64[TMP_REG_3];
|
||||
const int i = insn - ctx->prog->insnsi;
|
||||
const s32 imm = insn->imm;
|
||||
const s16 off = insn->off;
|
||||
const bool isdw = BPF_SIZE(code) == BPF_DW;
|
||||
u8 reg;
|
||||
s32 jmp_offset;
|
||||
|
||||
if (!off) {
|
||||
reg = dst;
|
||||
} else {
|
||||
emit_a64_mov_i(1, tmp, off, ctx);
|
||||
emit(A64_ADD(1, tmp, tmp, dst), ctx);
|
||||
reg = tmp;
|
||||
}
|
||||
|
||||
if (imm == BPF_ADD || imm == BPF_AND ||
|
||||
imm == BPF_OR || imm == BPF_XOR) {
|
||||
/* lock *(u32/u64 *)(dst_reg + off) <op>= src_reg */
|
||||
emit(A64_LDXR(isdw, tmp2, reg), ctx);
|
||||
if (imm == BPF_ADD)
|
||||
emit(A64_ADD(isdw, tmp2, tmp2, src), ctx);
|
||||
else if (imm == BPF_AND)
|
||||
emit(A64_AND(isdw, tmp2, tmp2, src), ctx);
|
||||
else if (imm == BPF_OR)
|
||||
emit(A64_ORR(isdw, tmp2, tmp2, src), ctx);
|
||||
else
|
||||
emit(A64_EOR(isdw, tmp2, tmp2, src), ctx);
|
||||
emit(A64_STXR(isdw, tmp2, reg, tmp3), ctx);
|
||||
jmp_offset = -3;
|
||||
check_imm19(jmp_offset);
|
||||
emit(A64_CBNZ(0, tmp3, jmp_offset), ctx);
|
||||
} else if (imm == (BPF_ADD | BPF_FETCH) ||
|
||||
imm == (BPF_AND | BPF_FETCH) ||
|
||||
imm == (BPF_OR | BPF_FETCH) ||
|
||||
imm == (BPF_XOR | BPF_FETCH)) {
|
||||
/* src_reg = atomic_fetch_<op>(dst_reg + off, src_reg) */
|
||||
const u8 ax = bpf2a64[BPF_REG_AX];
|
||||
|
||||
emit(A64_MOV(isdw, ax, src), ctx);
|
||||
emit(A64_LDXR(isdw, src, reg), ctx);
|
||||
if (imm == (BPF_ADD | BPF_FETCH))
|
||||
emit(A64_ADD(isdw, tmp2, src, ax), ctx);
|
||||
else if (imm == (BPF_AND | BPF_FETCH))
|
||||
emit(A64_AND(isdw, tmp2, src, ax), ctx);
|
||||
else if (imm == (BPF_OR | BPF_FETCH))
|
||||
emit(A64_ORR(isdw, tmp2, src, ax), ctx);
|
||||
else
|
||||
emit(A64_EOR(isdw, tmp2, src, ax), ctx);
|
||||
emit(A64_STLXR(isdw, tmp2, reg, tmp3), ctx);
|
||||
jmp_offset = -3;
|
||||
check_imm19(jmp_offset);
|
||||
emit(A64_CBNZ(0, tmp3, jmp_offset), ctx);
|
||||
emit(A64_DMB_ISH, ctx);
|
||||
} else if (imm == BPF_XCHG) {
|
||||
/* src_reg = atomic_xchg(dst_reg + off, src_reg); */
|
||||
emit(A64_MOV(isdw, tmp2, src), ctx);
|
||||
emit(A64_LDXR(isdw, src, reg), ctx);
|
||||
emit(A64_STLXR(isdw, tmp2, reg, tmp3), ctx);
|
||||
jmp_offset = -2;
|
||||
check_imm19(jmp_offset);
|
||||
emit(A64_CBNZ(0, tmp3, jmp_offset), ctx);
|
||||
emit(A64_DMB_ISH, ctx);
|
||||
} else if (imm == BPF_CMPXCHG) {
|
||||
/* r0 = atomic_cmpxchg(dst_reg + off, r0, src_reg); */
|
||||
const u8 r0 = bpf2a64[BPF_REG_0];
|
||||
|
||||
emit(A64_MOV(isdw, tmp2, r0), ctx);
|
||||
emit(A64_LDXR(isdw, r0, reg), ctx);
|
||||
emit(A64_EOR(isdw, tmp3, r0, tmp2), ctx);
|
||||
jmp_offset = 4;
|
||||
check_imm19(jmp_offset);
|
||||
emit(A64_CBNZ(isdw, tmp3, jmp_offset), ctx);
|
||||
emit(A64_STLXR(isdw, src, reg, tmp3), ctx);
|
||||
jmp_offset = -4;
|
||||
check_imm19(jmp_offset);
|
||||
emit(A64_CBNZ(0, tmp3, jmp_offset), ctx);
|
||||
emit(A64_DMB_ISH, ctx);
|
||||
} else {
|
||||
pr_err_once("unknown atomic op code %02x\n", imm);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void build_epilogue(struct jit_ctx *ctx)
|
||||
{
|
||||
const u8 r0 = bpf2a64[BPF_REG_0];
|
||||
|
@ -434,29 +609,16 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
|
|||
const u8 src = bpf2a64[insn->src_reg];
|
||||
const u8 tmp = bpf2a64[TMP_REG_1];
|
||||
const u8 tmp2 = bpf2a64[TMP_REG_2];
|
||||
const u8 tmp3 = bpf2a64[TMP_REG_3];
|
||||
const s16 off = insn->off;
|
||||
const s32 imm = insn->imm;
|
||||
const int i = insn - ctx->prog->insnsi;
|
||||
const bool is64 = BPF_CLASS(code) == BPF_ALU64 ||
|
||||
BPF_CLASS(code) == BPF_JMP;
|
||||
const bool isdw = BPF_SIZE(code) == BPF_DW;
|
||||
u8 jmp_cond, reg;
|
||||
u8 jmp_cond;
|
||||
s32 jmp_offset;
|
||||
u32 a64_insn;
|
||||
int ret;
|
||||
|
||||
#define check_imm(bits, imm) do { \
|
||||
if ((((imm) > 0) && ((imm) >> (bits))) || \
|
||||
(((imm) < 0) && (~(imm) >> (bits)))) { \
|
||||
pr_info("[%2d] imm=%d(0x%x) out of range\n", \
|
||||
i, imm, imm); \
|
||||
return -EINVAL; \
|
||||
} \
|
||||
} while (0)
|
||||
#define check_imm19(imm) check_imm(19, imm)
|
||||
#define check_imm26(imm) check_imm(26, imm)
|
||||
|
||||
switch (code) {
|
||||
/* dst = src */
|
||||
case BPF_ALU | BPF_MOV | BPF_X:
|
||||
|
@ -891,33 +1053,12 @@ emit_cond_jmp:
|
|||
|
||||
case BPF_STX | BPF_ATOMIC | BPF_W:
|
||||
case BPF_STX | BPF_ATOMIC | BPF_DW:
|
||||
if (insn->imm != BPF_ADD) {
|
||||
pr_err_once("unknown atomic op code %02x\n", insn->imm);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* STX XADD: lock *(u32 *)(dst + off) += src
|
||||
* and
|
||||
* STX XADD: lock *(u64 *)(dst + off) += src
|
||||
*/
|
||||
|
||||
if (!off) {
|
||||
reg = dst;
|
||||
} else {
|
||||
emit_a64_mov_i(1, tmp, off, ctx);
|
||||
emit(A64_ADD(1, tmp, tmp, dst), ctx);
|
||||
reg = tmp;
|
||||
}
|
||||
if (cpus_have_cap(ARM64_HAS_LSE_ATOMICS)) {
|
||||
emit(A64_STADD(isdw, reg, src), ctx);
|
||||
} else {
|
||||
emit(A64_LDXR(isdw, tmp2, reg), ctx);
|
||||
emit(A64_ADD(isdw, tmp2, tmp2, src), ctx);
|
||||
emit(A64_STXR(isdw, tmp2, reg, tmp3), ctx);
|
||||
jmp_offset = -3;
|
||||
check_imm19(jmp_offset);
|
||||
emit(A64_CBNZ(0, tmp3, jmp_offset), ctx);
|
||||
}
|
||||
if (cpus_have_cap(ARM64_HAS_LSE_ATOMICS))
|
||||
ret = emit_lse_atomic(insn, ctx);
|
||||
else
|
||||
ret = emit_ll_sc_atomic(insn, ctx);
|
||||
if (ret)
|
||||
return ret;
|
||||
break;
|
||||
|
||||
default:
|
||||
|
|
Загрузка…
Ссылка в новой задаче