2021-05-14 03:36:04 +03:00
|
|
|
/* SPDX-License-Identifier: GPL-2.0-only */
|
|
|
|
/* A pointer that can point to either kernel or userspace memory. */
|
|
|
|
#ifndef _LINUX_BPFPTR_H
|
|
|
|
#define _LINUX_BPFPTR_H
|
|
|
|
|
2021-10-02 04:17:51 +03:00
|
|
|
#include <linux/mm.h>
|
2021-05-14 03:36:04 +03:00
|
|
|
#include <linux/sockptr.h>
|
|
|
|
|
|
|
|
typedef sockptr_t bpfptr_t;
|
|
|
|
|
|
|
|
static inline bool bpfptr_is_kernel(bpfptr_t bpfptr)
|
|
|
|
{
|
|
|
|
return bpfptr.is_kernel;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline bpfptr_t KERNEL_BPFPTR(void *p)
|
|
|
|
{
|
|
|
|
return (bpfptr_t) { .kernel = p, .is_kernel = true };
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline bpfptr_t USER_BPFPTR(void __user *p)
|
|
|
|
{
|
|
|
|
return (bpfptr_t) { .user = p };
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline bpfptr_t make_bpfptr(u64 addr, bool is_kernel)
|
|
|
|
{
|
|
|
|
if (is_kernel)
|
|
|
|
return KERNEL_BPFPTR((void*) (uintptr_t) addr);
|
|
|
|
else
|
|
|
|
return USER_BPFPTR(u64_to_user_ptr(addr));
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline bool bpfptr_is_null(bpfptr_t bpfptr)
|
|
|
|
{
|
|
|
|
if (bpfptr_is_kernel(bpfptr))
|
|
|
|
return !bpfptr.kernel;
|
|
|
|
return !bpfptr.user;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void bpfptr_add(bpfptr_t *bpfptr, size_t val)
|
|
|
|
{
|
|
|
|
if (bpfptr_is_kernel(*bpfptr))
|
|
|
|
bpfptr->kernel += val;
|
|
|
|
else
|
|
|
|
bpfptr->user += val;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline int copy_from_bpfptr_offset(void *dst, bpfptr_t src,
|
|
|
|
size_t offset, size_t size)
|
|
|
|
{
|
BPF: Fix potential bad pointer dereference in bpf_sys_bpf()
The bpf_sys_bpf() helper function allows an eBPF program to load another
eBPF program from within the kernel. In this case the argument union
bpf_attr pointer (as well as the insns and license pointers inside) is a
kernel address instead of a userspace address (which is the case of a
usual bpf() syscall). To make the memory copying process in the syscall
work in both cases, bpfptr_t was introduced to wrap around the pointer
and distinguish its origin. Specifically, when copying memory contents
from a bpfptr_t, a copy_from_user() is performed in case of a userspace
address and a memcpy() is performed for a kernel address.
This can lead to problems because the in-kernel pointer is never checked
for validity. The problem happens when an eBPF syscall program tries to
call bpf_sys_bpf() to load a program but provides a bad insns pointer --
say 0xdeadbeef -- in the bpf_attr union. The helper calls __sys_bpf()
which would then call bpf_prog_load() to load the program.
bpf_prog_load() is responsible for copying the eBPF instructions to the
newly allocated memory for the program; it creates a kernel bpfptr_t for
insns and invokes copy_from_bpfptr(). Internally, all bpfptr_t
operations are backed by the corresponding sockptr_t operations, which
performs direct memcpy() on kernel pointers for copy_from/strncpy_from
operations. Therefore, the code is always happy to dereference the bad
pointer to trigger a un-handle-able page fault and in turn an oops.
However, this is not supposed to happen because at that point the eBPF
program is already verified and should not cause a memory error.
Sample KASAN trace:
[ 25.685056][ T228] ==================================================================
[ 25.685680][ T228] BUG: KASAN: user-memory-access in copy_from_bpfptr+0x21/0x30
[ 25.686210][ T228] Read of size 80 at addr 00000000deadbeef by task poc/228
[ 25.686732][ T228]
[ 25.686893][ T228] CPU: 3 PID: 228 Comm: poc Not tainted 5.19.0-rc7 #7
[ 25.687375][ T228] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS d55cb5a 04/01/2014
[ 25.687991][ T228] Call Trace:
[ 25.688223][ T228] <TASK>
[ 25.688429][ T228] dump_stack_lvl+0x73/0x9e
[ 25.688747][ T228] print_report+0xea/0x200
[ 25.689061][ T228] ? copy_from_bpfptr+0x21/0x30
[ 25.689401][ T228] ? _printk+0x54/0x6e
[ 25.689693][ T228] ? _raw_spin_lock_irqsave+0x70/0xd0
[ 25.690071][ T228] ? copy_from_bpfptr+0x21/0x30
[ 25.690412][ T228] kasan_report+0xb5/0xe0
[ 25.690716][ T228] ? copy_from_bpfptr+0x21/0x30
[ 25.691059][ T228] kasan_check_range+0x2bd/0x2e0
[ 25.691405][ T228] ? copy_from_bpfptr+0x21/0x30
[ 25.691734][ T228] memcpy+0x25/0x60
[ 25.692000][ T228] copy_from_bpfptr+0x21/0x30
[ 25.692328][ T228] bpf_prog_load+0x604/0x9e0
[ 25.692653][ T228] ? cap_capable+0xb4/0xe0
[ 25.692956][ T228] ? security_capable+0x4f/0x70
[ 25.693324][ T228] __sys_bpf+0x3af/0x580
[ 25.693635][ T228] bpf_sys_bpf+0x45/0x240
[ 25.693937][ T228] bpf_prog_f0ec79a5a3caca46_bpf_func1+0xa2/0xbd
[ 25.694394][ T228] bpf_prog_run_pin_on_cpu+0x2f/0xb0
[ 25.694756][ T228] bpf_prog_test_run_syscall+0x146/0x1c0
[ 25.695144][ T228] bpf_prog_test_run+0x172/0x190
[ 25.695487][ T228] __sys_bpf+0x2c5/0x580
[ 25.695776][ T228] __x64_sys_bpf+0x3a/0x50
[ 25.696084][ T228] do_syscall_64+0x60/0x90
[ 25.696393][ T228] ? fpregs_assert_state_consistent+0x50/0x60
[ 25.696815][ T228] ? exit_to_user_mode_prepare+0x36/0xa0
[ 25.697202][ T228] ? syscall_exit_to_user_mode+0x20/0x40
[ 25.697586][ T228] ? do_syscall_64+0x6e/0x90
[ 25.697899][ T228] entry_SYSCALL_64_after_hwframe+0x63/0xcd
[ 25.698312][ T228] RIP: 0033:0x7f6d543fb759
[ 25.698624][ T228] Code: 08 5b 89 e8 5d c3 66 2e 0f 1f 84 00 00 00 00 00 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 97 a6 0e 00 f7 d8 64 89 01 48
[ 25.699946][ T228] RSP: 002b:00007ffc3df78468 EFLAGS: 00000287 ORIG_RAX: 0000000000000141
[ 25.700526][ T228] RAX: ffffffffffffffda RBX: 00007ffc3df78628 RCX: 00007f6d543fb759
[ 25.701071][ T228] RDX: 0000000000000090 RSI: 00007ffc3df78478 RDI: 000000000000000a
[ 25.701636][ T228] RBP: 00007ffc3df78510 R08: 0000000000000000 R09: 0000000000300000
[ 25.702191][ T228] R10: 0000000000000005 R11: 0000000000000287 R12: 0000000000000000
[ 25.702736][ T228] R13: 00007ffc3df78638 R14: 000055a1584aca68 R15: 00007f6d5456a000
[ 25.703282][ T228] </TASK>
[ 25.703490][ T228] ==================================================================
[ 25.704050][ T228] Disabling lock debugging due to kernel taint
Update copy_from_bpfptr() and strncpy_from_bpfptr() so that:
- for a kernel pointer, it uses the safe copy_from_kernel_nofault() and
strncpy_from_kernel_nofault() functions.
- for a userspace pointer, it performs copy_from_user() and
strncpy_from_user().
Fixes: af2ac3e13e45 ("bpf: Prepare bpf syscall to be used from kernel and user space.")
Link: https://lore.kernel.org/bpf/20220727132905.45166-1-jinghao@linux.ibm.com/
Signed-off-by: Jinghao Jia <jinghao@linux.ibm.com>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/r/20220729201713.88688-1-jinghao@linux.ibm.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-07-29 23:17:13 +03:00
|
|
|
if (!bpfptr_is_kernel(src))
|
|
|
|
return copy_from_user(dst, src.user + offset, size);
|
|
|
|
return copy_from_kernel_nofault(dst, src.kernel + offset, size);
|
2021-05-14 03:36:04 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
static inline int copy_from_bpfptr(void *dst, bpfptr_t src, size_t size)
|
|
|
|
{
|
|
|
|
return copy_from_bpfptr_offset(dst, src, 0, size);
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline int copy_to_bpfptr_offset(bpfptr_t dst, size_t offset,
|
|
|
|
const void *src, size_t size)
|
|
|
|
{
|
|
|
|
return copy_to_sockptr_offset((sockptr_t) dst, offset, src, size);
|
|
|
|
}
|
|
|
|
|
2021-08-19 02:52:16 +03:00
|
|
|
static inline void *kvmemdup_bpfptr(bpfptr_t src, size_t len)
|
2021-05-14 03:36:04 +03:00
|
|
|
{
|
2021-08-19 02:52:16 +03:00
|
|
|
void *p = kvmalloc(len, GFP_USER | __GFP_NOWARN);
|
|
|
|
|
|
|
|
if (!p)
|
|
|
|
return ERR_PTR(-ENOMEM);
|
|
|
|
if (copy_from_bpfptr(p, src, len)) {
|
|
|
|
kvfree(p);
|
|
|
|
return ERR_PTR(-EFAULT);
|
|
|
|
}
|
|
|
|
return p;
|
2021-05-14 03:36:04 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
static inline long strncpy_from_bpfptr(char *dst, bpfptr_t src, size_t count)
|
|
|
|
{
|
BPF: Fix potential bad pointer dereference in bpf_sys_bpf()
The bpf_sys_bpf() helper function allows an eBPF program to load another
eBPF program from within the kernel. In this case the argument union
bpf_attr pointer (as well as the insns and license pointers inside) is a
kernel address instead of a userspace address (which is the case of a
usual bpf() syscall). To make the memory copying process in the syscall
work in both cases, bpfptr_t was introduced to wrap around the pointer
and distinguish its origin. Specifically, when copying memory contents
from a bpfptr_t, a copy_from_user() is performed in case of a userspace
address and a memcpy() is performed for a kernel address.
This can lead to problems because the in-kernel pointer is never checked
for validity. The problem happens when an eBPF syscall program tries to
call bpf_sys_bpf() to load a program but provides a bad insns pointer --
say 0xdeadbeef -- in the bpf_attr union. The helper calls __sys_bpf()
which would then call bpf_prog_load() to load the program.
bpf_prog_load() is responsible for copying the eBPF instructions to the
newly allocated memory for the program; it creates a kernel bpfptr_t for
insns and invokes copy_from_bpfptr(). Internally, all bpfptr_t
operations are backed by the corresponding sockptr_t operations, which
performs direct memcpy() on kernel pointers for copy_from/strncpy_from
operations. Therefore, the code is always happy to dereference the bad
pointer to trigger a un-handle-able page fault and in turn an oops.
However, this is not supposed to happen because at that point the eBPF
program is already verified and should not cause a memory error.
Sample KASAN trace:
[ 25.685056][ T228] ==================================================================
[ 25.685680][ T228] BUG: KASAN: user-memory-access in copy_from_bpfptr+0x21/0x30
[ 25.686210][ T228] Read of size 80 at addr 00000000deadbeef by task poc/228
[ 25.686732][ T228]
[ 25.686893][ T228] CPU: 3 PID: 228 Comm: poc Not tainted 5.19.0-rc7 #7
[ 25.687375][ T228] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS d55cb5a 04/01/2014
[ 25.687991][ T228] Call Trace:
[ 25.688223][ T228] <TASK>
[ 25.688429][ T228] dump_stack_lvl+0x73/0x9e
[ 25.688747][ T228] print_report+0xea/0x200
[ 25.689061][ T228] ? copy_from_bpfptr+0x21/0x30
[ 25.689401][ T228] ? _printk+0x54/0x6e
[ 25.689693][ T228] ? _raw_spin_lock_irqsave+0x70/0xd0
[ 25.690071][ T228] ? copy_from_bpfptr+0x21/0x30
[ 25.690412][ T228] kasan_report+0xb5/0xe0
[ 25.690716][ T228] ? copy_from_bpfptr+0x21/0x30
[ 25.691059][ T228] kasan_check_range+0x2bd/0x2e0
[ 25.691405][ T228] ? copy_from_bpfptr+0x21/0x30
[ 25.691734][ T228] memcpy+0x25/0x60
[ 25.692000][ T228] copy_from_bpfptr+0x21/0x30
[ 25.692328][ T228] bpf_prog_load+0x604/0x9e0
[ 25.692653][ T228] ? cap_capable+0xb4/0xe0
[ 25.692956][ T228] ? security_capable+0x4f/0x70
[ 25.693324][ T228] __sys_bpf+0x3af/0x580
[ 25.693635][ T228] bpf_sys_bpf+0x45/0x240
[ 25.693937][ T228] bpf_prog_f0ec79a5a3caca46_bpf_func1+0xa2/0xbd
[ 25.694394][ T228] bpf_prog_run_pin_on_cpu+0x2f/0xb0
[ 25.694756][ T228] bpf_prog_test_run_syscall+0x146/0x1c0
[ 25.695144][ T228] bpf_prog_test_run+0x172/0x190
[ 25.695487][ T228] __sys_bpf+0x2c5/0x580
[ 25.695776][ T228] __x64_sys_bpf+0x3a/0x50
[ 25.696084][ T228] do_syscall_64+0x60/0x90
[ 25.696393][ T228] ? fpregs_assert_state_consistent+0x50/0x60
[ 25.696815][ T228] ? exit_to_user_mode_prepare+0x36/0xa0
[ 25.697202][ T228] ? syscall_exit_to_user_mode+0x20/0x40
[ 25.697586][ T228] ? do_syscall_64+0x6e/0x90
[ 25.697899][ T228] entry_SYSCALL_64_after_hwframe+0x63/0xcd
[ 25.698312][ T228] RIP: 0033:0x7f6d543fb759
[ 25.698624][ T228] Code: 08 5b 89 e8 5d c3 66 2e 0f 1f 84 00 00 00 00 00 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 97 a6 0e 00 f7 d8 64 89 01 48
[ 25.699946][ T228] RSP: 002b:00007ffc3df78468 EFLAGS: 00000287 ORIG_RAX: 0000000000000141
[ 25.700526][ T228] RAX: ffffffffffffffda RBX: 00007ffc3df78628 RCX: 00007f6d543fb759
[ 25.701071][ T228] RDX: 0000000000000090 RSI: 00007ffc3df78478 RDI: 000000000000000a
[ 25.701636][ T228] RBP: 00007ffc3df78510 R08: 0000000000000000 R09: 0000000000300000
[ 25.702191][ T228] R10: 0000000000000005 R11: 0000000000000287 R12: 0000000000000000
[ 25.702736][ T228] R13: 00007ffc3df78638 R14: 000055a1584aca68 R15: 00007f6d5456a000
[ 25.703282][ T228] </TASK>
[ 25.703490][ T228] ==================================================================
[ 25.704050][ T228] Disabling lock debugging due to kernel taint
Update copy_from_bpfptr() and strncpy_from_bpfptr() so that:
- for a kernel pointer, it uses the safe copy_from_kernel_nofault() and
strncpy_from_kernel_nofault() functions.
- for a userspace pointer, it performs copy_from_user() and
strncpy_from_user().
Fixes: af2ac3e13e45 ("bpf: Prepare bpf syscall to be used from kernel and user space.")
Link: https://lore.kernel.org/bpf/20220727132905.45166-1-jinghao@linux.ibm.com/
Signed-off-by: Jinghao Jia <jinghao@linux.ibm.com>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/r/20220729201713.88688-1-jinghao@linux.ibm.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-07-29 23:17:13 +03:00
|
|
|
if (bpfptr_is_kernel(src))
|
|
|
|
return strncpy_from_kernel_nofault(dst, src.kernel, count);
|
|
|
|
return strncpy_from_user(dst, src.user, count);
|
2021-05-14 03:36:04 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
#endif /* _LINUX_BPFPTR_H */
|