- A fix for fanotify_mark() missing the conversion of x86_32 native
syscalls which take 64-bit arguments to the compat handlers due to former having a general compat handler. (Brian Gerst) - Add a forgotten pmd page destructor call to pud_free_pmd_page() where a pmd page is freed. (Dan Williams) - Make IN/OUT insns with an u8 immediate port operand handling for SEV-ES guests more precise by using only the single port byte and not the whole s32 value of the insn decoder. (Peter Gonda) - Correct a straddling end range check before returning the proper MTRR type, when the end address is the same as top of memory. (Ying-Tsun Huang) - Change PQR_ASSOC MSR update scheme when moving a task to a resctrl resource group to avoid significant performance overhead with some resctrl workloads. (Fenghua Yu) - Avoid the actual task move overhead when the task is already in the resource group. (Fenghua Yu) -----BEGIN PGP SIGNATURE----- iQIzBAABCgAdFiEEzv7L6UO9uDPlPSfHEsHwGGHeVUoFAl/61xYACgkQEsHwGGHe VUrcvQ//dAWAteCC/BXVHpgcWrBOgPrkwv7aAo70bIO50fUj4pHPYbfhOJU1ey7j 5o4FrqdsOVhGfZjQzvT/juLsr9mQHsfszxKpDTLyK3wVtUtIODYXzgiXRc/qfZDO ozXCVUsUSKJgrIcKTBQbmugK36iZZk+ER+qzUaqd0aq8mocdtSSO8b14uaRJw3MR vumqmEmEEcyM9XK0UgTLPcf6Uhu+Mlg3YSNkV5Qhu0yiCTJaqeEySsytUcRsnnF/ z8AkxZP03Q65o3aoRoSGZihHNKTkNucbavYp70LkcqopoHlC+XERvya9ANRibLPi /+s9GQUm4QPg7XRHLB8dXFZ9RY3YGUeE60BUxVZa4vI3pwciPQD5tbvUF3F/jEN0 PYLy/zVlAkDfI6Z8wTl8DNmd8nd/rE0F4p5zayjpQUWsjjfZDrh+GzBl/YsMuYRp G8dk3tEUc8KREBEccv/YzuVcE0AhX4t1tkn3l2Le5v+4PbwRWBm2uNOiRfn4OM31 iB4E4yCHBnBhTyBA0TkWuHV1TJX6Tb2+0g+D49ZoMGFVoBd8NL6f+dBr0psjX/U+ RsZucit0FcJG2VhJNXEPD+rwNZ6XPfDmIU9GNTAmXUuoKR/kqT8D/NWYkqmKh/Vw +F2EIgOZVhQVOvLKWRut+4qmQRStm6B3UBJimEDySUJPT72O+dU= =2/Eq -----END PGP SIGNATURE----- Merge tag 'x86_urgent_for_v5.11_rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip Pull x86 fixes from Borislav Petkov: "As expected, fixes started trickling in after the holidays so here is the accumulated pile of x86 fixes for 5.11: - A fix for fanotify_mark() missing the conversion of x86_32 native syscalls which take 64-bit arguments to the compat handlers due to former having a general compat handler. (Brian Gerst) - Add a forgotten pmd page destructor call to pud_free_pmd_page() where a pmd page is freed. (Dan Williams) - Make IN/OUT insns with an u8 immediate port operand handling for SEV-ES guests more precise by using only the single port byte and not the whole s32 value of the insn decoder. (Peter Gonda) - Correct a straddling end range check before returning the proper MTRR type, when the end address is the same as top of memory. (Ying-Tsun Huang) - Change PQR_ASSOC MSR update scheme when moving a task to a resctrl resource group to avoid significant performance overhead with some resctrl workloads. (Fenghua Yu) - Avoid the actual task move overhead when the task is already in the resource group. (Fenghua Yu)" * tag 'x86_urgent_for_v5.11_rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86/resctrl: Don't move a task to the same resource group x86/resctrl: Use an IPI instead of task_work_add() to update PQR_ASSOC MSR x86/mtrr: Correct the range check before performing MTRR type lookups x86/sev-es: Fix SEV-ES OUT/IN immediate opcode vc handling x86/mm: Fix leak of pmd ptlock fanotify: Fix sys_fanotify_mark() on native x86-32
This commit is contained in:
Коммит
a440e4d761
|
@ -1105,6 +1105,12 @@ config HAVE_ARCH_PFN_VALID
|
|||
config ARCH_SUPPORTS_DEBUG_PAGEALLOC
|
||||
bool
|
||||
|
||||
config ARCH_SPLIT_ARG64
|
||||
bool
|
||||
help
|
||||
If a 32-bit architecture requires 64-bit arguments to be split into
|
||||
pairs of 32-bit arguments, select this option.
|
||||
|
||||
source "kernel/gcov/Kconfig"
|
||||
|
||||
source "scripts/gcc-plugins/Kconfig"
|
||||
|
|
|
@ -19,6 +19,7 @@ config X86_32
|
|||
select KMAP_LOCAL
|
||||
select MODULES_USE_ELF_REL
|
||||
select OLD_SIGACTION
|
||||
select ARCH_SPLIT_ARG64
|
||||
|
||||
config X86_64
|
||||
def_bool y
|
||||
|
|
|
@ -167,9 +167,6 @@ static u8 mtrr_type_lookup_variable(u64 start, u64 end, u64 *partial_end,
|
|||
*repeat = 0;
|
||||
*uniform = 1;
|
||||
|
||||
/* Make end inclusive instead of exclusive */
|
||||
end--;
|
||||
|
||||
prev_match = MTRR_TYPE_INVALID;
|
||||
for (i = 0; i < num_var_ranges; ++i) {
|
||||
unsigned short start_state, end_state, inclusive;
|
||||
|
@ -261,6 +258,9 @@ u8 mtrr_type_lookup(u64 start, u64 end, u8 *uniform)
|
|||
int repeat;
|
||||
u64 partial_end;
|
||||
|
||||
/* Make end inclusive instead of exclusive */
|
||||
end--;
|
||||
|
||||
if (!mtrr_state_set)
|
||||
return MTRR_TYPE_INVALID;
|
||||
|
||||
|
|
|
@ -525,89 +525,70 @@ static void rdtgroup_remove(struct rdtgroup *rdtgrp)
|
|||
kfree(rdtgrp);
|
||||
}
|
||||
|
||||
struct task_move_callback {
|
||||
struct callback_head work;
|
||||
struct rdtgroup *rdtgrp;
|
||||
};
|
||||
|
||||
static void move_myself(struct callback_head *head)
|
||||
static void _update_task_closid_rmid(void *task)
|
||||
{
|
||||
struct task_move_callback *callback;
|
||||
struct rdtgroup *rdtgrp;
|
||||
|
||||
callback = container_of(head, struct task_move_callback, work);
|
||||
rdtgrp = callback->rdtgrp;
|
||||
|
||||
/*
|
||||
* If resource group was deleted before this task work callback
|
||||
* was invoked, then assign the task to root group and free the
|
||||
* resource group.
|
||||
* If the task is still current on this CPU, update PQR_ASSOC MSR.
|
||||
* Otherwise, the MSR is updated when the task is scheduled in.
|
||||
*/
|
||||
if (atomic_dec_and_test(&rdtgrp->waitcount) &&
|
||||
(rdtgrp->flags & RDT_DELETED)) {
|
||||
current->closid = 0;
|
||||
current->rmid = 0;
|
||||
rdtgroup_remove(rdtgrp);
|
||||
}
|
||||
if (task == current)
|
||||
resctrl_sched_in();
|
||||
}
|
||||
|
||||
if (unlikely(current->flags & PF_EXITING))
|
||||
goto out;
|
||||
|
||||
preempt_disable();
|
||||
/* update PQR_ASSOC MSR to make resource group go into effect */
|
||||
resctrl_sched_in();
|
||||
preempt_enable();
|
||||
|
||||
out:
|
||||
kfree(callback);
|
||||
static void update_task_closid_rmid(struct task_struct *t)
|
||||
{
|
||||
if (IS_ENABLED(CONFIG_SMP) && task_curr(t))
|
||||
smp_call_function_single(task_cpu(t), _update_task_closid_rmid, t, 1);
|
||||
else
|
||||
_update_task_closid_rmid(t);
|
||||
}
|
||||
|
||||
static int __rdtgroup_move_task(struct task_struct *tsk,
|
||||
struct rdtgroup *rdtgrp)
|
||||
{
|
||||
struct task_move_callback *callback;
|
||||
int ret;
|
||||
|
||||
callback = kzalloc(sizeof(*callback), GFP_KERNEL);
|
||||
if (!callback)
|
||||
return -ENOMEM;
|
||||
callback->work.func = move_myself;
|
||||
callback->rdtgrp = rdtgrp;
|
||||
/* If the task is already in rdtgrp, no need to move the task. */
|
||||
if ((rdtgrp->type == RDTCTRL_GROUP && tsk->closid == rdtgrp->closid &&
|
||||
tsk->rmid == rdtgrp->mon.rmid) ||
|
||||
(rdtgrp->type == RDTMON_GROUP && tsk->rmid == rdtgrp->mon.rmid &&
|
||||
tsk->closid == rdtgrp->mon.parent->closid))
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* Take a refcount, so rdtgrp cannot be freed before the
|
||||
* callback has been invoked.
|
||||
* Set the task's closid/rmid before the PQR_ASSOC MSR can be
|
||||
* updated by them.
|
||||
*
|
||||
* For ctrl_mon groups, move both closid and rmid.
|
||||
* For monitor groups, can move the tasks only from
|
||||
* their parent CTRL group.
|
||||
*/
|
||||
atomic_inc(&rdtgrp->waitcount);
|
||||
ret = task_work_add(tsk, &callback->work, TWA_RESUME);
|
||||
if (ret) {
|
||||
/*
|
||||
* Task is exiting. Drop the refcount and free the callback.
|
||||
* No need to check the refcount as the group cannot be
|
||||
* deleted before the write function unlocks rdtgroup_mutex.
|
||||
*/
|
||||
atomic_dec(&rdtgrp->waitcount);
|
||||
kfree(callback);
|
||||
rdt_last_cmd_puts("Task exited\n");
|
||||
} else {
|
||||
/*
|
||||
* For ctrl_mon groups move both closid and rmid.
|
||||
* For monitor groups, can move the tasks only from
|
||||
* their parent CTRL group.
|
||||
*/
|
||||
if (rdtgrp->type == RDTCTRL_GROUP) {
|
||||
tsk->closid = rdtgrp->closid;
|
||||
|
||||
if (rdtgrp->type == RDTCTRL_GROUP) {
|
||||
tsk->closid = rdtgrp->closid;
|
||||
tsk->rmid = rdtgrp->mon.rmid;
|
||||
} else if (rdtgrp->type == RDTMON_GROUP) {
|
||||
if (rdtgrp->mon.parent->closid == tsk->closid) {
|
||||
tsk->rmid = rdtgrp->mon.rmid;
|
||||
} else if (rdtgrp->type == RDTMON_GROUP) {
|
||||
if (rdtgrp->mon.parent->closid == tsk->closid) {
|
||||
tsk->rmid = rdtgrp->mon.rmid;
|
||||
} else {
|
||||
rdt_last_cmd_puts("Can't move task to different control group\n");
|
||||
ret = -EINVAL;
|
||||
}
|
||||
} else {
|
||||
rdt_last_cmd_puts("Can't move task to different control group\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
|
||||
/*
|
||||
* Ensure the task's closid and rmid are written before determining if
|
||||
* the task is current that will decide if it will be interrupted.
|
||||
*/
|
||||
barrier();
|
||||
|
||||
/*
|
||||
* By now, the task's closid and rmid are set. If the task is current
|
||||
* on a CPU, the PQR_ASSOC MSR needs to be updated to make the resource
|
||||
* group go into effect. If the task is not current, the MSR will be
|
||||
* updated when the task is scheduled in.
|
||||
*/
|
||||
update_task_closid_rmid(tsk);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static bool is_closid_match(struct task_struct *t, struct rdtgroup *r)
|
||||
|
|
|
@ -305,14 +305,14 @@ static enum es_result vc_ioio_exitinfo(struct es_em_ctxt *ctxt, u64 *exitinfo)
|
|||
case 0xe4:
|
||||
case 0xe5:
|
||||
*exitinfo |= IOIO_TYPE_IN;
|
||||
*exitinfo |= (u64)insn->immediate.value << 16;
|
||||
*exitinfo |= (u8)insn->immediate.value << 16;
|
||||
break;
|
||||
|
||||
/* OUT immediate opcodes */
|
||||
case 0xe6:
|
||||
case 0xe7:
|
||||
*exitinfo |= IOIO_TYPE_OUT;
|
||||
*exitinfo |= (u64)insn->immediate.value << 16;
|
||||
*exitinfo |= (u8)insn->immediate.value << 16;
|
||||
break;
|
||||
|
||||
/* IN register opcodes */
|
||||
|
|
|
@ -829,6 +829,8 @@ int pud_free_pmd_page(pud_t *pud, unsigned long addr)
|
|||
}
|
||||
|
||||
free_page((unsigned long)pmd_sv);
|
||||
|
||||
pgtable_pmd_page_dtor(virt_to_page(pmd));
|
||||
free_page((unsigned long)pmd);
|
||||
|
||||
return 1;
|
||||
|
|
|
@ -1285,26 +1285,23 @@ fput_and_out:
|
|||
return ret;
|
||||
}
|
||||
|
||||
#ifndef CONFIG_ARCH_SPLIT_ARG64
|
||||
SYSCALL_DEFINE5(fanotify_mark, int, fanotify_fd, unsigned int, flags,
|
||||
__u64, mask, int, dfd,
|
||||
const char __user *, pathname)
|
||||
{
|
||||
return do_fanotify_mark(fanotify_fd, flags, mask, dfd, pathname);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_COMPAT
|
||||
COMPAT_SYSCALL_DEFINE6(fanotify_mark,
|
||||
#if defined(CONFIG_ARCH_SPLIT_ARG64) || defined(CONFIG_COMPAT)
|
||||
SYSCALL32_DEFINE6(fanotify_mark,
|
||||
int, fanotify_fd, unsigned int, flags,
|
||||
__u32, mask0, __u32, mask1, int, dfd,
|
||||
SC_ARG64(mask), int, dfd,
|
||||
const char __user *, pathname)
|
||||
{
|
||||
return do_fanotify_mark(fanotify_fd, flags,
|
||||
#ifdef __BIG_ENDIAN
|
||||
((__u64)mask0 << 32) | mask1,
|
||||
#else
|
||||
((__u64)mask1 << 32) | mask0,
|
||||
#endif
|
||||
dfd, pathname);
|
||||
return do_fanotify_mark(fanotify_fd, flags, SC_VAL64(__u64, mask),
|
||||
dfd, pathname);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
|
|
@ -251,6 +251,30 @@ static inline int is_syscall_trace_event(struct trace_event_call *tp_event)
|
|||
static inline long __do_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__))
|
||||
#endif /* __SYSCALL_DEFINEx */
|
||||
|
||||
/* For split 64-bit arguments on 32-bit architectures */
|
||||
#ifdef __LITTLE_ENDIAN
|
||||
#define SC_ARG64(name) u32, name##_lo, u32, name##_hi
|
||||
#else
|
||||
#define SC_ARG64(name) u32, name##_hi, u32, name##_lo
|
||||
#endif
|
||||
#define SC_VAL64(type, name) ((type) name##_hi << 32 | name##_lo)
|
||||
|
||||
#ifdef CONFIG_COMPAT
|
||||
#define SYSCALL32_DEFINE1 COMPAT_SYSCALL_DEFINE1
|
||||
#define SYSCALL32_DEFINE2 COMPAT_SYSCALL_DEFINE2
|
||||
#define SYSCALL32_DEFINE3 COMPAT_SYSCALL_DEFINE3
|
||||
#define SYSCALL32_DEFINE4 COMPAT_SYSCALL_DEFINE4
|
||||
#define SYSCALL32_DEFINE5 COMPAT_SYSCALL_DEFINE5
|
||||
#define SYSCALL32_DEFINE6 COMPAT_SYSCALL_DEFINE6
|
||||
#else
|
||||
#define SYSCALL32_DEFINE1 SYSCALL_DEFINE1
|
||||
#define SYSCALL32_DEFINE2 SYSCALL_DEFINE2
|
||||
#define SYSCALL32_DEFINE3 SYSCALL_DEFINE3
|
||||
#define SYSCALL32_DEFINE4 SYSCALL_DEFINE4
|
||||
#define SYSCALL32_DEFINE5 SYSCALL_DEFINE5
|
||||
#define SYSCALL32_DEFINE6 SYSCALL_DEFINE6
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Called before coming back to user-mode. Returning to user-mode with an
|
||||
* address limit different than USER_DS can allow to overwrite kernel memory.
|
||||
|
|
Загрузка…
Ссылка в новой задаче