From 54dff232142e255ff644d73de1c8d80122f5ad7b Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 4 Jan 2022 09:50:18 +0000 Subject: [PATCH 01/14] MAINTAINERS: add myself as reviewer for atomics As I've fiddled about with the atomic infrastructure a fair bit now, Peter suggested I should add myself as a reviewer or maintainer to make sure I'm Cc'd on anything I might have an opinion on. For now, add myself as a reviewer. Signed-off-by: Mark Rutland Signed-off-by: Peter Zijlstra (Intel) Acked-by: Boqun Feng Acked-by: Will Deacon Link: https://lore.kernel.org/r/20220104095018.1990058-1-mark.rutland@arm.com --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index ea3e6c914384..bccca3e5e2cd 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3193,6 +3193,7 @@ ATOMIC INFRASTRUCTURE M: Will Deacon M: Peter Zijlstra R: Boqun Feng +R: Mark Rutland L: linux-kernel@vger.kernel.org S: Maintained F: arch/*/include/asm/atomic*.h From e204193b138af347fbbbe026e68cb3385112f387 Mon Sep 17 00:00:00 2001 From: Xiu Jianfeng Date: Mon, 13 Dec 2021 21:26:18 +0800 Subject: [PATCH 02/14] lockdep: Use memset_startat() helper in reinit_class() use memset_startat() helper to simplify the code, there is no functional change in this patch. Signed-off-by: Xiu Jianfeng Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20211213132618.105737-1-xiujianfeng@huawei.com --- kernel/locking/lockdep.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 4a882f83aeb9..89b3df51fd98 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -6011,13 +6011,10 @@ static void zap_class(struct pending_free *pf, struct lock_class *class) static void reinit_class(struct lock_class *class) { - void *const p = class; - const unsigned int offset = offsetof(struct lock_class, key); - WARN_ON_ONCE(!class->lock_entry.next); WARN_ON_ONCE(!list_empty(&class->locks_after)); WARN_ON_ONCE(!list_empty(&class->locks_before)); - memset(p + offset, 0, sizeof(*class) - offset); + memset_startat(class, 0, key); WARN_ON_ONCE(!class->lock_entry.next); WARN_ON_ONCE(!list_empty(&class->locks_after)); WARN_ON_ONCE(!list_empty(&class->locks_before)); From 61cc4534b6550997c97a03759ab46b29d44c0017 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Sun, 2 Jan 2022 21:35:58 -0500 Subject: [PATCH 03/14] locking/lockdep: Avoid potential access of invalid memory in lock_class It was found that reading /proc/lockdep after a lockdep splat may potentially cause an access to freed memory if lockdep_unregister_key() is called after the splat but before access to /proc/lockdep [1]. This is due to the fact that graph_lock() call in lockdep_unregister_key() fails after the clearing of debug_locks by the splat process. After lockdep_unregister_key() is called, the lock_name may be freed but the corresponding lock_class structure still have a reference to it. That invalid memory pointer will then be accessed when /proc/lockdep is read by a user and a use-after-free (UAF) error will be reported if KASAN is enabled. To fix this problem, lockdep_unregister_key() is now modified to always search for a matching key irrespective of the debug_locks state and zap the corresponding lock class if a matching one is found. [1] https://lore.kernel.org/lkml/77f05c15-81b6-bddd-9650-80d5f23fe330@i-love.sakura.ne.jp/ Fixes: 8b39adbee805 ("locking/lockdep: Make lockdep_unregister_key() honor 'debug_locks' again") Reported-by: Tetsuo Handa Signed-off-by: Waiman Long Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Bart Van Assche Link: https://lkml.kernel.org/r/20220103023558.1377055-1-longman@redhat.com --- kernel/locking/lockdep.c | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 89b3df51fd98..2e6892ec3756 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -6287,7 +6287,13 @@ void lockdep_reset_lock(struct lockdep_map *lock) lockdep_reset_lock_reg(lock); } -/* Unregister a dynamically allocated key. */ +/* + * Unregister a dynamically allocated key. + * + * Unlike lockdep_register_key(), a search is always done to find a matching + * key irrespective of debug_locks to avoid potential invalid access to freed + * memory in lock_class entry. + */ void lockdep_unregister_key(struct lock_class_key *key) { struct hlist_head *hash_head = keyhashentry(key); @@ -6302,10 +6308,8 @@ void lockdep_unregister_key(struct lock_class_key *key) return; raw_local_irq_save(flags); - if (!graph_lock()) - goto out_irq; + lockdep_lock(); - pf = get_pending_free(); hlist_for_each_entry_rcu(k, hash_head, hash_entry) { if (k == key) { hlist_del_rcu(&k->hash_entry); @@ -6313,11 +6317,13 @@ void lockdep_unregister_key(struct lock_class_key *key) break; } } - WARN_ON_ONCE(!found); - __lockdep_free_key_range(pf, key, 1); - call_rcu_zapped(pf); - graph_unlock(); -out_irq: + WARN_ON_ONCE(!found && debug_locks); + if (found) { + pf = get_pending_free(); + __lockdep_free_key_range(pf, key, 1); + call_rcu_zapped(pf); + } + lockdep_unlock(); raw_local_irq_restore(flags); /* Wait until is_dynamic_key() has finished accessing k->hash_entry. */ From acb13ea0baf8db8d05a3910c06e997c90825faad Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Thu, 13 Jan 2022 16:53:56 +0100 Subject: [PATCH 04/14] asm-generic/bitops: Always inline all bit manipulation helpers Make it consistent with the atomic/atomic-instrumented.h helpers. And defconfig size is actually going down! text data bss dec hex filename 22352096 8213152 1917164 32482412 1efa46c vmlinux.x86-64.defconfig.before 22350551 8213184 1917164 32480899 1ef9e83 vmlinux.x86-64.defconfig.after Suggested-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Signed-off-by: Peter Zijlstra (Intel) Acked-by: Marco Elver Link: https://lore.kernel.org/r/20220113155357.4706-2-bp@alien8.de --- include/asm-generic/bitops/instrumented-atomic.h | 12 ++++++------ .../asm-generic/bitops/instrumented-non-atomic.h | 16 ++++++++-------- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/include/asm-generic/bitops/instrumented-atomic.h b/include/asm-generic/bitops/instrumented-atomic.h index c90192b1c755..4225a8ca9c1a 100644 --- a/include/asm-generic/bitops/instrumented-atomic.h +++ b/include/asm-generic/bitops/instrumented-atomic.h @@ -23,7 +23,7 @@ * Note that @nr may be almost arbitrarily large; this function is not * restricted to acting on a single-word quantity. */ -static inline void set_bit(long nr, volatile unsigned long *addr) +static __always_inline void set_bit(long nr, volatile unsigned long *addr) { instrument_atomic_write(addr + BIT_WORD(nr), sizeof(long)); arch_set_bit(nr, addr); @@ -36,7 +36,7 @@ static inline void set_bit(long nr, volatile unsigned long *addr) * * This is a relaxed atomic operation (no implied memory barriers). */ -static inline void clear_bit(long nr, volatile unsigned long *addr) +static __always_inline void clear_bit(long nr, volatile unsigned long *addr) { instrument_atomic_write(addr + BIT_WORD(nr), sizeof(long)); arch_clear_bit(nr, addr); @@ -52,7 +52,7 @@ static inline void clear_bit(long nr, volatile unsigned long *addr) * Note that @nr may be almost arbitrarily large; this function is not * restricted to acting on a single-word quantity. */ -static inline void change_bit(long nr, volatile unsigned long *addr) +static __always_inline void change_bit(long nr, volatile unsigned long *addr) { instrument_atomic_write(addr + BIT_WORD(nr), sizeof(long)); arch_change_bit(nr, addr); @@ -65,7 +65,7 @@ static inline void change_bit(long nr, volatile unsigned long *addr) * * This is an atomic fully-ordered operation (implied full memory barrier). */ -static inline bool test_and_set_bit(long nr, volatile unsigned long *addr) +static __always_inline bool test_and_set_bit(long nr, volatile unsigned long *addr) { kcsan_mb(); instrument_atomic_read_write(addr + BIT_WORD(nr), sizeof(long)); @@ -79,7 +79,7 @@ static inline bool test_and_set_bit(long nr, volatile unsigned long *addr) * * This is an atomic fully-ordered operation (implied full memory barrier). */ -static inline bool test_and_clear_bit(long nr, volatile unsigned long *addr) +static __always_inline bool test_and_clear_bit(long nr, volatile unsigned long *addr) { kcsan_mb(); instrument_atomic_read_write(addr + BIT_WORD(nr), sizeof(long)); @@ -93,7 +93,7 @@ static inline bool test_and_clear_bit(long nr, volatile unsigned long *addr) * * This is an atomic fully-ordered operation (implied full memory barrier). */ -static inline bool test_and_change_bit(long nr, volatile unsigned long *addr) +static __always_inline bool test_and_change_bit(long nr, volatile unsigned long *addr) { kcsan_mb(); instrument_atomic_read_write(addr + BIT_WORD(nr), sizeof(long)); diff --git a/include/asm-generic/bitops/instrumented-non-atomic.h b/include/asm-generic/bitops/instrumented-non-atomic.h index 37363d570b9b..7ab1ecc37782 100644 --- a/include/asm-generic/bitops/instrumented-non-atomic.h +++ b/include/asm-generic/bitops/instrumented-non-atomic.h @@ -22,7 +22,7 @@ * region of memory concurrently, the effect may be that only one operation * succeeds. */ -static inline void __set_bit(long nr, volatile unsigned long *addr) +static __always_inline void __set_bit(long nr, volatile unsigned long *addr) { instrument_write(addr + BIT_WORD(nr), sizeof(long)); arch___set_bit(nr, addr); @@ -37,7 +37,7 @@ static inline void __set_bit(long nr, volatile unsigned long *addr) * region of memory concurrently, the effect may be that only one operation * succeeds. */ -static inline void __clear_bit(long nr, volatile unsigned long *addr) +static __always_inline void __clear_bit(long nr, volatile unsigned long *addr) { instrument_write(addr + BIT_WORD(nr), sizeof(long)); arch___clear_bit(nr, addr); @@ -52,13 +52,13 @@ static inline void __clear_bit(long nr, volatile unsigned long *addr) * region of memory concurrently, the effect may be that only one operation * succeeds. */ -static inline void __change_bit(long nr, volatile unsigned long *addr) +static __always_inline void __change_bit(long nr, volatile unsigned long *addr) { instrument_write(addr + BIT_WORD(nr), sizeof(long)); arch___change_bit(nr, addr); } -static inline void __instrument_read_write_bitop(long nr, volatile unsigned long *addr) +static __always_inline void __instrument_read_write_bitop(long nr, volatile unsigned long *addr) { if (IS_ENABLED(CONFIG_KCSAN_ASSUME_PLAIN_WRITES_ATOMIC)) { /* @@ -90,7 +90,7 @@ static inline void __instrument_read_write_bitop(long nr, volatile unsigned long * This operation is non-atomic. If two instances of this operation race, one * can appear to succeed but actually fail. */ -static inline bool __test_and_set_bit(long nr, volatile unsigned long *addr) +static __always_inline bool __test_and_set_bit(long nr, volatile unsigned long *addr) { __instrument_read_write_bitop(nr, addr); return arch___test_and_set_bit(nr, addr); @@ -104,7 +104,7 @@ static inline bool __test_and_set_bit(long nr, volatile unsigned long *addr) * This operation is non-atomic. If two instances of this operation race, one * can appear to succeed but actually fail. */ -static inline bool __test_and_clear_bit(long nr, volatile unsigned long *addr) +static __always_inline bool __test_and_clear_bit(long nr, volatile unsigned long *addr) { __instrument_read_write_bitop(nr, addr); return arch___test_and_clear_bit(nr, addr); @@ -118,7 +118,7 @@ static inline bool __test_and_clear_bit(long nr, volatile unsigned long *addr) * This operation is non-atomic. If two instances of this operation race, one * can appear to succeed but actually fail. */ -static inline bool __test_and_change_bit(long nr, volatile unsigned long *addr) +static __always_inline bool __test_and_change_bit(long nr, volatile unsigned long *addr) { __instrument_read_write_bitop(nr, addr); return arch___test_and_change_bit(nr, addr); @@ -129,7 +129,7 @@ static inline bool __test_and_change_bit(long nr, volatile unsigned long *addr) * @nr: bit number to test * @addr: Address to start counting from */ -static inline bool test_bit(long nr, const volatile unsigned long *addr) +static __always_inline bool test_bit(long nr, const volatile unsigned long *addr) { instrument_atomic_read(addr + BIT_WORD(nr), sizeof(long)); return arch_test_bit(nr, addr); From 1dc01abad6544cb9d884071b626b706e37aa9601 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Thu, 13 Jan 2022 16:53:57 +0100 Subject: [PATCH 05/14] cpumask: Always inline helpers which use bit manipulation functions Former are always inlined so do that for the latter too, for consistency. Size impact is a whopping 5 bytes increase! :-) text data bss dec hex filename 22350551 8213184 1917164 32480899 1ef9e83 vmlinux.x86-64.defconfig.before 22350556 8213152 1917164 32480872 1ef9e68 vmlinux.x86-64.defconfig.after Signed-off-by: Borislav Petkov Signed-off-by: Peter Zijlstra (Intel) Acked-by: Marco Elver Link: https://lore.kernel.org/r/20220113155357.4706-3-bp@alien8.de --- include/linux/cpumask.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 64dae70d31f5..6b06c698cd2a 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -341,12 +341,12 @@ extern int cpumask_next_wrap(int n, const struct cpumask *mask, int start, bool * @cpu: cpu number (< nr_cpu_ids) * @dstp: the cpumask pointer */ -static inline void cpumask_set_cpu(unsigned int cpu, struct cpumask *dstp) +static __always_inline void cpumask_set_cpu(unsigned int cpu, struct cpumask *dstp) { set_bit(cpumask_check(cpu), cpumask_bits(dstp)); } -static inline void __cpumask_set_cpu(unsigned int cpu, struct cpumask *dstp) +static __always_inline void __cpumask_set_cpu(unsigned int cpu, struct cpumask *dstp) { __set_bit(cpumask_check(cpu), cpumask_bits(dstp)); } @@ -357,12 +357,12 @@ static inline void __cpumask_set_cpu(unsigned int cpu, struct cpumask *dstp) * @cpu: cpu number (< nr_cpu_ids) * @dstp: the cpumask pointer */ -static inline void cpumask_clear_cpu(int cpu, struct cpumask *dstp) +static __always_inline void cpumask_clear_cpu(int cpu, struct cpumask *dstp) { clear_bit(cpumask_check(cpu), cpumask_bits(dstp)); } -static inline void __cpumask_clear_cpu(int cpu, struct cpumask *dstp) +static __always_inline void __cpumask_clear_cpu(int cpu, struct cpumask *dstp) { __clear_bit(cpumask_check(cpu), cpumask_bits(dstp)); } @@ -374,7 +374,7 @@ static inline void __cpumask_clear_cpu(int cpu, struct cpumask *dstp) * * Returns 1 if @cpu is set in @cpumask, else returns 0 */ -static inline int cpumask_test_cpu(int cpu, const struct cpumask *cpumask) +static __always_inline int cpumask_test_cpu(int cpu, const struct cpumask *cpumask) { return test_bit(cpumask_check(cpu), cpumask_bits((cpumask))); } @@ -388,7 +388,7 @@ static inline int cpumask_test_cpu(int cpu, const struct cpumask *cpumask) * * test_and_set_bit wrapper for cpumasks. */ -static inline int cpumask_test_and_set_cpu(int cpu, struct cpumask *cpumask) +static __always_inline int cpumask_test_and_set_cpu(int cpu, struct cpumask *cpumask) { return test_and_set_bit(cpumask_check(cpu), cpumask_bits(cpumask)); } @@ -402,7 +402,7 @@ static inline int cpumask_test_and_set_cpu(int cpu, struct cpumask *cpumask) * * test_and_clear_bit wrapper for cpumasks. */ -static inline int cpumask_test_and_clear_cpu(int cpu, struct cpumask *cpumask) +static __always_inline int cpumask_test_and_clear_cpu(int cpu, struct cpumask *cpumask) { return test_and_clear_bit(cpumask_check(cpu), cpumask_bits(cpumask)); } From c441e934b604a3b5f350a9104124cf6a3ba07a34 Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Sat, 15 Jan 2022 15:16:57 -0800 Subject: [PATCH 06/14] locking: Add missing __sched attributes This patch adds __sched attributes to a few missing places to show blocked function rather than locking function in get_wchan. Signed-off-by: Minchan Kim Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20220115231657.84828-1-minchan@kernel.org --- kernel/locking/percpu-rwsem.c | 5 +++-- kernel/locking/rwsem.c | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/kernel/locking/percpu-rwsem.c b/kernel/locking/percpu-rwsem.c index 70a32a576f3f..c9fdae94e098 100644 --- a/kernel/locking/percpu-rwsem.c +++ b/kernel/locking/percpu-rwsem.c @@ -7,6 +7,7 @@ #include #include #include +#include #include int __percpu_init_rwsem(struct percpu_rw_semaphore *sem, @@ -162,7 +163,7 @@ static void percpu_rwsem_wait(struct percpu_rw_semaphore *sem, bool reader) __set_current_state(TASK_RUNNING); } -bool __percpu_down_read(struct percpu_rw_semaphore *sem, bool try) +bool __sched __percpu_down_read(struct percpu_rw_semaphore *sem, bool try) { if (__percpu_down_read_trylock(sem)) return true; @@ -211,7 +212,7 @@ static bool readers_active_check(struct percpu_rw_semaphore *sem) return true; } -void percpu_down_write(struct percpu_rw_semaphore *sem) +void __sched percpu_down_write(struct percpu_rw_semaphore *sem) { might_sleep(); rwsem_acquire(&sem->dep_map, 0, 0, _RET_IP_); diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c index 69aba4abe104..acde5d6f1254 100644 --- a/kernel/locking/rwsem.c +++ b/kernel/locking/rwsem.c @@ -1048,7 +1048,7 @@ out_nolock: /* * Wait until we successfully acquire the write lock */ -static struct rw_semaphore * +static struct rw_semaphore __sched * rwsem_down_write_slowpath(struct rw_semaphore *sem, int state) { long count; From dc1b4df09acdca7a89806b28f235cd6d8dcd3d24 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 7 Feb 2022 10:19:43 +0000 Subject: [PATCH 07/14] atomics: Fix atomic64_{read_acquire,set_release} fallbacks Arnd reports that on 32-bit architectures, the fallbacks for atomic64_read_acquire() and atomic64_set_release() are broken as they use smp_load_acquire() and smp_store_release() respectively, which do not work on types larger than the native word size. Since those contain compiletime_assert_atomic_type(), any attempt to use those fallbacks will result in a build-time error. e.g. with the following added to arch/arm/kernel/setup.c: | void test_atomic64(atomic64_t *v) | { | atomic64_set_release(v, 5); | atomic64_read_acquire(v); | } The compiler will complain as follows: | In file included from : | In function 'arch_atomic64_set_release', | inlined from 'test_atomic64' at ./include/linux/atomic/atomic-instrumented.h:669:2: | ././include/linux/compiler_types.h:346:38: error: call to '__compiletime_assert_9' declared with attribute error: Need native word sized stores/loads for atomicity. | 346 | _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__) | | ^ | ././include/linux/compiler_types.h:327:4: note: in definition of macro '__compiletime_assert' | 327 | prefix ## suffix(); \ | | ^~~~~~ | ././include/linux/compiler_types.h:346:2: note: in expansion of macro '_compiletime_assert' | 346 | _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__) | | ^~~~~~~~~~~~~~~~~~~ | ././include/linux/compiler_types.h:349:2: note: in expansion of macro 'compiletime_assert' | 349 | compiletime_assert(__native_word(t), \ | | ^~~~~~~~~~~~~~~~~~ | ./include/asm-generic/barrier.h:133:2: note: in expansion of macro 'compiletime_assert_atomic_type' | 133 | compiletime_assert_atomic_type(*p); \ | | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | ./include/asm-generic/barrier.h:164:55: note: in expansion of macro '__smp_store_release' | 164 | #define smp_store_release(p, v) do { kcsan_release(); __smp_store_release(p, v); } while (0) | | ^~~~~~~~~~~~~~~~~~~ | ./include/linux/atomic/atomic-arch-fallback.h:1270:2: note: in expansion of macro 'smp_store_release' | 1270 | smp_store_release(&(v)->counter, i); | | ^~~~~~~~~~~~~~~~~ | make[2]: *** [scripts/Makefile.build:288: arch/arm/kernel/setup.o] Error 1 | make[1]: *** [scripts/Makefile.build:550: arch/arm/kernel] Error 2 | make: *** [Makefile:1831: arch/arm] Error 2 Fix this by only using smp_load_acquire() and smp_store_release() for native atomic types, and otherwise falling back to the regular barriers necessary for acquire/release semantics, as we do in the more generic acquire and release fallbacks. Since the fallback templates are used to generate the atomic64_*() and atomic_*() operations, the __native_word() check is added to both. For the atomic_*() operations, which are always 32-bit, the __native_word() check is redundant but not harmful, as it is always true. For the example above this works as expected on 32-bit, e.g. for arm multi_v7_defconfig: | : | push {r4, r5} | dmb ish | pldw [r0] | mov r2, #5 | mov r3, #0 | ldrexd r4, [r0] | strexd r4, r2, [r0] | teq r4, #0 | bne 484 | ldrexd r2, [r0] | dmb ish | pop {r4, r5} | bx lr ... and also on 64-bit, e.g. for arm64 defconfig: | : | bti c | paciasp | mov x1, #0x5 | stlr x1, [x0] | ldar x0, [x0] | autiasp | ret Reported-by: Arnd Bergmann Signed-off-by: Mark Rutland Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Ard Biesheuvel Reviewed-by: Boqun Feng Link: https://lore.kernel.org/r/20220207101943.439825-1-mark.rutland@arm.com --- include/linux/atomic/atomic-arch-fallback.h | 38 ++++++++++++++++++--- scripts/atomic/fallbacks/read_acquire | 11 +++++- scripts/atomic/fallbacks/set_release | 7 +++- 3 files changed, 49 insertions(+), 7 deletions(-) diff --git a/include/linux/atomic/atomic-arch-fallback.h b/include/linux/atomic/atomic-arch-fallback.h index a3dba31df01e..6db58d180866 100644 --- a/include/linux/atomic/atomic-arch-fallback.h +++ b/include/linux/atomic/atomic-arch-fallback.h @@ -151,7 +151,16 @@ static __always_inline int arch_atomic_read_acquire(const atomic_t *v) { - return smp_load_acquire(&(v)->counter); + int ret; + + if (__native_word(atomic_t)) { + ret = smp_load_acquire(&(v)->counter); + } else { + ret = arch_atomic_read(v); + __atomic_acquire_fence(); + } + + return ret; } #define arch_atomic_read_acquire arch_atomic_read_acquire #endif @@ -160,7 +169,12 @@ arch_atomic_read_acquire(const atomic_t *v) static __always_inline void arch_atomic_set_release(atomic_t *v, int i) { - smp_store_release(&(v)->counter, i); + if (__native_word(atomic_t)) { + smp_store_release(&(v)->counter, i); + } else { + __atomic_release_fence(); + arch_atomic_set(v, i); + } } #define arch_atomic_set_release arch_atomic_set_release #endif @@ -1258,7 +1272,16 @@ arch_atomic_dec_if_positive(atomic_t *v) static __always_inline s64 arch_atomic64_read_acquire(const atomic64_t *v) { - return smp_load_acquire(&(v)->counter); + s64 ret; + + if (__native_word(atomic64_t)) { + ret = smp_load_acquire(&(v)->counter); + } else { + ret = arch_atomic64_read(v); + __atomic_acquire_fence(); + } + + return ret; } #define arch_atomic64_read_acquire arch_atomic64_read_acquire #endif @@ -1267,7 +1290,12 @@ arch_atomic64_read_acquire(const atomic64_t *v) static __always_inline void arch_atomic64_set_release(atomic64_t *v, s64 i) { - smp_store_release(&(v)->counter, i); + if (__native_word(atomic64_t)) { + smp_store_release(&(v)->counter, i); + } else { + __atomic_release_fence(); + arch_atomic64_set(v, i); + } } #define arch_atomic64_set_release arch_atomic64_set_release #endif @@ -2358,4 +2386,4 @@ arch_atomic64_dec_if_positive(atomic64_t *v) #endif #endif /* _LINUX_ATOMIC_FALLBACK_H */ -// cca554917d7ea73d5e3e7397dd70c484cad9b2c4 +// 8e2cc06bc0d2c0967d2f8424762bd48555ee40ae diff --git a/scripts/atomic/fallbacks/read_acquire b/scripts/atomic/fallbacks/read_acquire index 803ba7561076..a0ea1d26e6b2 100755 --- a/scripts/atomic/fallbacks/read_acquire +++ b/scripts/atomic/fallbacks/read_acquire @@ -2,6 +2,15 @@ cat <counter); + ${int} ret; + + if (__native_word(${atomic}_t)) { + ret = smp_load_acquire(&(v)->counter); + } else { + ret = arch_${atomic}_read(v); + __atomic_acquire_fence(); + } + + return ret; } EOF diff --git a/scripts/atomic/fallbacks/set_release b/scripts/atomic/fallbacks/set_release index 86ede759f24e..05cdb7f42477 100755 --- a/scripts/atomic/fallbacks/set_release +++ b/scripts/atomic/fallbacks/set_release @@ -2,6 +2,11 @@ cat <counter, i); + if (__native_word(${atomic}_t)) { + smp_store_release(&(v)->counter, i); + } else { + __atomic_release_fence(); + arch_${atomic}_set(v, i); + } } EOF From 9983a9d577db415c41099a20a5637ab25dd3c240 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Tue, 8 Feb 2022 18:08:02 +0100 Subject: [PATCH 08/14] locking/local_lock: Make the empty local_lock_*() function a macro. It has been said that local_lock() does not add any overhead compared to preempt_disable() in a !LOCKDEP configuration. A micro benchmark showed an unexpected result which can be reduced to the fact that local_lock() was not entirely optimized away. In the !LOCKDEP configuration local_lock_acquire() is an empty static inline function. On x86 the this_cpu_ptr() argument of that function is fully evaluated leading to an additional mov+add instructions which are not needed and not used. Replace the static inline function with a macro. The typecheck() macro ensures that the argument is of proper type while the resulting disassembly shows no traces of this_cpu_ptr(). Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Waiman Long Link: https://lkml.kernel.org/r/YgKjciR60fZft2l4@linutronix.de --- include/linux/local_lock_internal.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/linux/local_lock_internal.h b/include/linux/local_lock_internal.h index 975e33b793a7..6d635e8306d6 100644 --- a/include/linux/local_lock_internal.h +++ b/include/linux/local_lock_internal.h @@ -44,9 +44,9 @@ static inline void local_lock_debug_init(local_lock_t *l) } #else /* CONFIG_DEBUG_LOCK_ALLOC */ # define LOCAL_LOCK_DEBUG_INIT(lockname) -static inline void local_lock_acquire(local_lock_t *l) { } -static inline void local_lock_release(local_lock_t *l) { } -static inline void local_lock_debug_init(local_lock_t *l) { } +# define local_lock_acquire(__ll) do { typecheck(local_lock_t *, __ll); } while (0) +# define local_lock_release(__ll) do { typecheck(local_lock_t *, __ll); } while (0) +# define local_lock_debug_init(__ll) do { typecheck(local_lock_t *, __ll); } while (0) #endif /* !CONFIG_DEBUG_LOCK_ALLOC */ #define INIT_LOCAL_LOCK(lockname) { LOCAL_LOCK_DEBUG_INIT(lockname) } From 1c6f9ec0095459b5120a1cb059d442f56d3cb6e7 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Tue, 8 Feb 2022 18:21:10 +0100 Subject: [PATCH 09/14] locking: Enable RT_MUTEXES by default on PREEMPT_RT. The CONFIG_RT_MUTEXES option is enabled by CONFIG_FUTEX and CONFIG_I2C. If both are disabled then a CONFIG_PREEMPT_RT build fails to compile. It is not possible to have a PREEMPT_RT kernel without RT_MUTEX support because RT_MUTEX based locking is always used. Enable CONFIG_RT_MUTEXES by default on PREEMPT_RT builds. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/YgKmhjkcuqWXdUjQ@linutronix.de --- init/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/init/Kconfig b/init/Kconfig index e9119bf54b1f..beb5b866c318 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -2054,6 +2054,7 @@ source "arch/Kconfig" config RT_MUTEXES bool + default y if PREEMPT_RT config BASE_SMALL int From f5c54f77b07b278cfde4a654e111c39996ac8b5b Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 4 Feb 2022 09:30:13 +0100 Subject: [PATCH 10/14] cpumask: Add a x86-specific cpumask_clear_cpu() helper Add a x86-specific cpumask_clear_cpu() helper which will be used in places where the explicit KASAN-instrumentation in the *_bit() helpers is unwanted. Also, always inline two more cpumask generic helpers. allyesconfig: text data bss dec hex filename 190553143 159425889 32076404 382055436 16c5b40c vmlinux.before 190551812 159424945 32076404 382053161 16c5ab29 vmlinux.after Signed-off-by: Borislav Petkov Acked-by: Marco Elver Link: https://lore.kernel.org/r/20220204083015.17317-2-bp@alien8.de --- arch/x86/include/asm/cpumask.h | 10 ++++++++++ include/linux/cpumask.h | 4 ++-- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/cpumask.h b/arch/x86/include/asm/cpumask.h index 3afa990d756b..c5aed9e9226c 100644 --- a/arch/x86/include/asm/cpumask.h +++ b/arch/x86/include/asm/cpumask.h @@ -20,11 +20,21 @@ static __always_inline bool arch_cpu_online(int cpu) { return arch_test_bit(cpu, cpumask_bits(cpu_online_mask)); } + +static __always_inline void arch_cpumask_clear_cpu(int cpu, struct cpumask *dstp) +{ + arch_clear_bit(cpumask_check(cpu), cpumask_bits(dstp)); +} #else static __always_inline bool arch_cpu_online(int cpu) { return cpu == 0; } + +static __always_inline void arch_cpumask_clear_cpu(int cpu, struct cpumask *dstp) +{ + return; +} #endif #define arch_cpu_is_offline(cpu) unlikely(!arch_cpu_online(cpu)) diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 6b06c698cd2a..fe29ac7cc469 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -102,7 +102,7 @@ extern atomic_t __num_online_cpus; extern cpumask_t cpus_booted_once_mask; -static inline void cpu_max_bits_warn(unsigned int cpu, unsigned int bits) +static __always_inline void cpu_max_bits_warn(unsigned int cpu, unsigned int bits) { #ifdef CONFIG_DEBUG_PER_CPU_MAPS WARN_ON_ONCE(cpu >= bits); @@ -110,7 +110,7 @@ static inline void cpu_max_bits_warn(unsigned int cpu, unsigned int bits) } /* verify cpu argument to cpumask_* operators */ -static inline unsigned int cpumask_check(unsigned int cpu) +static __always_inline unsigned int cpumask_check(unsigned int cpu) { cpu_max_bits_warn(cpu, nr_cpumask_bits); return cpu; From b008893b08dcc8c30d756db05c229a1491bcb992 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 4 Feb 2022 09:30:14 +0100 Subject: [PATCH 11/14] x86/ptrace: Always inline v8086_mode() for instrumentation Instrumentation glue like KASAN causes the following warning: vmlinux.o: warning: objtool: mce_gather_info()+0x5f: call to v8086_mode.constprop.0() leaves .noinstr.text section due to gcc creating a function call for that oneliner. Force-inline it and even save some vmlinux bytes (.config is close to an allmodconfig): text data bss dec hex filename 209431677 208257651 34411048 452100376 1af28118 vmlinux.before 209431519 208257615 34411048 452100182 1af28056 vmlinux.after Signed-off-by: Borislav Petkov Acked-by: Marco Elver Link: https://lore.kernel.org/r/20220204083015.17317-3-bp@alien8.de --- arch/x86/include/asm/ptrace.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h index 703663175a5a..4357e0f2cd5f 100644 --- a/arch/x86/include/asm/ptrace.h +++ b/arch/x86/include/asm/ptrace.h @@ -137,7 +137,7 @@ static __always_inline int user_mode(struct pt_regs *regs) #endif } -static inline int v8086_mode(struct pt_regs *regs) +static __always_inline int v8086_mode(struct pt_regs *regs) { #ifdef CONFIG_X86_32 return (regs->flags & X86_VM_MASK); From fb7275acd6fb988313dddd8d3d19efa70d9015ad Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Thu, 10 Feb 2022 22:55:26 -0500 Subject: [PATCH 12/14] locking/lockdep: Iterate lock_classes directly when reading lockdep files When dumping lock_classes information via /proc/lockdep, we can't take the lockdep lock as the lock hold time is indeterminate. Iterating over all_lock_classes without holding lock can be dangerous as there is a slight chance that it may branch off to other lists leading to infinite loop or even access invalid memory if changes are made to all_lock_classes list in parallel. To avoid this problem, iteration of lock classes is now done directly on the lock_classes array itself. The lock_classes_in_use bitmap is checked to see if the lock class is being used. To avoid iterating the full array all the times, a new max_lock_class_idx value is added to track the maximum lock_class index that is currently being used. We can theoretically take the lockdep lock for iterating all_lock_classes when other lockdep files (lockdep_stats and lock_stat) are accessed as the lock hold time will be shorter for them. For consistency, they are also modified to iterate the lock_classes array directly. Signed-off-by: Waiman Long Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20220211035526.1329503-2-longman@redhat.com --- kernel/locking/lockdep.c | 14 +++++--- kernel/locking/lockdep_internals.h | 6 ++-- kernel/locking/lockdep_proc.c | 51 +++++++++++++++++++++++++----- 3 files changed, 56 insertions(+), 15 deletions(-) diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 2e6892ec3756..50036c10b518 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -183,11 +183,9 @@ static DECLARE_BITMAP(list_entries_in_use, MAX_LOCKDEP_ENTRIES); static struct hlist_head lock_keys_hash[KEYHASH_SIZE]; unsigned long nr_lock_classes; unsigned long nr_zapped_classes; -#ifndef CONFIG_DEBUG_LOCKDEP -static -#endif +unsigned long max_lock_class_idx; struct lock_class lock_classes[MAX_LOCKDEP_KEYS]; -static DECLARE_BITMAP(lock_classes_in_use, MAX_LOCKDEP_KEYS); +DECLARE_BITMAP(lock_classes_in_use, MAX_LOCKDEP_KEYS); static inline struct lock_class *hlock_class(struct held_lock *hlock) { @@ -338,7 +336,7 @@ static inline void lock_release_holdtime(struct held_lock *hlock) * elements. These elements are linked together by the lock_entry member in * struct lock_class. */ -LIST_HEAD(all_lock_classes); +static LIST_HEAD(all_lock_classes); static LIST_HEAD(free_lock_classes); /** @@ -1252,6 +1250,7 @@ register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force) struct lockdep_subclass_key *key; struct hlist_head *hash_head; struct lock_class *class; + int idx; DEBUG_LOCKS_WARN_ON(!irqs_disabled()); @@ -1317,6 +1316,9 @@ register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force) * of classes. */ list_move_tail(&class->lock_entry, &all_lock_classes); + idx = class - lock_classes; + if (idx > max_lock_class_idx) + max_lock_class_idx = idx; if (verbose(class)) { graph_unlock(); @@ -6000,6 +6002,8 @@ static void zap_class(struct pending_free *pf, struct lock_class *class) WRITE_ONCE(class->name, NULL); nr_lock_classes--; __clear_bit(class - lock_classes, lock_classes_in_use); + if (class - lock_classes == max_lock_class_idx) + max_lock_class_idx--; } else { WARN_ONCE(true, "%s() failed for class %s\n", __func__, class->name); diff --git a/kernel/locking/lockdep_internals.h b/kernel/locking/lockdep_internals.h index ecb8662e7a4e..bbe9000260d0 100644 --- a/kernel/locking/lockdep_internals.h +++ b/kernel/locking/lockdep_internals.h @@ -121,7 +121,6 @@ static const unsigned long LOCKF_USED_IN_IRQ_READ = #define MAX_LOCKDEP_CHAIN_HLOCKS (MAX_LOCKDEP_CHAINS*5) -extern struct list_head all_lock_classes; extern struct lock_chain lock_chains[]; #define LOCK_USAGE_CHARS (2*XXX_LOCK_USAGE_STATES + 1) @@ -151,6 +150,10 @@ extern unsigned int nr_large_chain_blocks; extern unsigned int max_lockdep_depth; extern unsigned int max_bfs_queue_depth; +extern unsigned long max_lock_class_idx; + +extern struct lock_class lock_classes[MAX_LOCKDEP_KEYS]; +extern unsigned long lock_classes_in_use[]; #ifdef CONFIG_PROVE_LOCKING extern unsigned long lockdep_count_forward_deps(struct lock_class *); @@ -205,7 +208,6 @@ struct lockdep_stats { }; DECLARE_PER_CPU(struct lockdep_stats, lockdep_stats); -extern struct lock_class lock_classes[MAX_LOCKDEP_KEYS]; #define __debug_atomic_inc(ptr) \ this_cpu_inc(lockdep_stats.ptr); diff --git a/kernel/locking/lockdep_proc.c b/kernel/locking/lockdep_proc.c index b8d9a050c337..15fdc7fa5c68 100644 --- a/kernel/locking/lockdep_proc.c +++ b/kernel/locking/lockdep_proc.c @@ -24,14 +24,33 @@ #include "lockdep_internals.h" +/* + * Since iteration of lock_classes is done without holding the lockdep lock, + * it is not safe to iterate all_lock_classes list directly as the iteration + * may branch off to free_lock_classes or the zapped list. Iteration is done + * directly on the lock_classes array by checking the lock_classes_in_use + * bitmap and max_lock_class_idx. + */ +#define iterate_lock_classes(idx, class) \ + for (idx = 0, class = lock_classes; idx <= max_lock_class_idx; \ + idx++, class++) + static void *l_next(struct seq_file *m, void *v, loff_t *pos) { - return seq_list_next(v, &all_lock_classes, pos); + struct lock_class *class = v; + + ++class; + *pos = class - lock_classes; + return (*pos > max_lock_class_idx) ? NULL : class; } static void *l_start(struct seq_file *m, loff_t *pos) { - return seq_list_start_head(&all_lock_classes, *pos); + unsigned long idx = *pos; + + if (idx > max_lock_class_idx) + return NULL; + return lock_classes + idx; } static void l_stop(struct seq_file *m, void *v) @@ -57,14 +76,16 @@ static void print_name(struct seq_file *m, struct lock_class *class) static int l_show(struct seq_file *m, void *v) { - struct lock_class *class = list_entry(v, struct lock_class, lock_entry); + struct lock_class *class = v; struct lock_list *entry; char usage[LOCK_USAGE_CHARS]; + int idx = class - lock_classes; - if (v == &all_lock_classes) { + if (v == lock_classes) seq_printf(m, "all lock classes:\n"); + + if (!test_bit(idx, lock_classes_in_use)) return 0; - } seq_printf(m, "%p", class->key); #ifdef CONFIG_DEBUG_LOCKDEP @@ -220,8 +241,11 @@ static int lockdep_stats_show(struct seq_file *m, void *v) #ifdef CONFIG_PROVE_LOCKING struct lock_class *class; + unsigned long idx; - list_for_each_entry(class, &all_lock_classes, lock_entry) { + iterate_lock_classes(idx, class) { + if (!test_bit(idx, lock_classes_in_use)) + continue; if (class->usage_mask == 0) nr_unused++; @@ -254,6 +278,7 @@ static int lockdep_stats_show(struct seq_file *m, void *v) sum_forward_deps += lockdep_count_forward_deps(class); } + #ifdef CONFIG_DEBUG_LOCKDEP DEBUG_LOCKS_WARN_ON(debug_atomic_read(nr_unused_locks) != nr_unused); #endif @@ -345,6 +370,8 @@ static int lockdep_stats_show(struct seq_file *m, void *v) seq_printf(m, " max bfs queue depth: %11u\n", max_bfs_queue_depth); #endif + seq_printf(m, " max lock class index: %11lu\n", + max_lock_class_idx); lockdep_stats_debug_show(m); seq_printf(m, " debug_locks: %11u\n", debug_locks); @@ -622,12 +649,16 @@ static int lock_stat_open(struct inode *inode, struct file *file) if (!res) { struct lock_stat_data *iter = data->stats; struct seq_file *m = file->private_data; + unsigned long idx; - list_for_each_entry(class, &all_lock_classes, lock_entry) { + iterate_lock_classes(idx, class) { + if (!test_bit(idx, lock_classes_in_use)) + continue; iter->class = class; iter->stats = lock_stats(class); iter++; } + data->iter_end = iter; sort(data->stats, data->iter_end - data->stats, @@ -645,6 +676,7 @@ static ssize_t lock_stat_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { struct lock_class *class; + unsigned long idx; char c; if (count) { @@ -654,8 +686,11 @@ static ssize_t lock_stat_write(struct file *file, const char __user *buf, if (c != '0') return count; - list_for_each_entry(class, &all_lock_classes, lock_entry) + iterate_lock_classes(idx, class) { + if (!test_bit(idx, lock_classes_in_use)) + continue; clear_lock_stats(class); + } } return count; } From fe65deb56e552a8c9bf7f27860dbdeac12a36116 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 14 Feb 2022 01:57:16 +0900 Subject: [PATCH 13/14] jump_label: Avoid unneeded casts in STATIC_KEY_INIT_{TRUE,FALSE} Commit 3821fd35b58d ("jump_label: Reduce the size of struct static_key") introduced the union to struct static_key. It is more natual to set JUMP_TYPE_* to the .type field without casting. Signed-off-by: Masahiro Yamada Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20220213165717.2354046-1-masahiroy@kernel.org --- include/linux/jump_label.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h index 48b9b2a82767..6924e6837e6d 100644 --- a/include/linux/jump_label.h +++ b/include/linux/jump_label.h @@ -251,10 +251,10 @@ extern void static_key_disable_cpuslocked(struct static_key *key); */ #define STATIC_KEY_INIT_TRUE \ { .enabled = { 1 }, \ - { .entries = (void *)JUMP_TYPE_TRUE } } + { .type = JUMP_TYPE_TRUE } } #define STATIC_KEY_INIT_FALSE \ { .enabled = { 0 }, \ - { .entries = (void *)JUMP_TYPE_FALSE } } + { .type = JUMP_TYPE_FALSE } } #else /* !CONFIG_JUMP_LABEL */ From cd27ccfc727e99352321c0c75012ab9c5a90321e Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 14 Feb 2022 01:57:17 +0900 Subject: [PATCH 14/14] jump_label: Refactor #ifdef of struct static_key Move #ifdef CONFIG_JUMP_LABEL inside the struct static_key. Signed-off-by: Masahiro Yamada Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20220213165717.2354046-2-masahiroy@kernel.org --- include/linux/jump_label.h | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h index 6924e6837e6d..107751cc047b 100644 --- a/include/linux/jump_label.h +++ b/include/linux/jump_label.h @@ -82,10 +82,9 @@ extern bool static_key_initialized; "%s(): static key '%pS' used before call to jump_label_init()", \ __func__, (key)) -#ifdef CONFIG_JUMP_LABEL - struct static_key { atomic_t enabled; +#ifdef CONFIG_JUMP_LABEL /* * Note: * To make anonymous unions work with old compilers, the static @@ -104,13 +103,9 @@ struct static_key { struct jump_entry *entries; struct static_key_mod *next; }; +#endif /* CONFIG_JUMP_LABEL */ }; -#else -struct static_key { - atomic_t enabled; -}; -#endif /* CONFIG_JUMP_LABEL */ #endif /* __ASSEMBLY__ */ #ifdef CONFIG_JUMP_LABEL