2019-05-27 09:55:01 +03:00
|
|
|
/* SPDX-License-Identifier: GPL-2.0-or-later */
|
2006-12-08 13:37:49 +03:00
|
|
|
/* Integer base 2 logarithm calculation
|
|
|
|
*
|
|
|
|
* Copyright (C) 2006 Red Hat, Inc. All Rights Reserved.
|
|
|
|
* Written by David Howells (dhowells@redhat.com)
|
|
|
|
*/
|
|
|
|
|
|
|
|
#ifndef _LINUX_LOG2_H
|
|
|
|
#define _LINUX_LOG2_H
|
|
|
|
|
|
|
|
#include <linux/types.h>
|
|
|
|
#include <linux/bitops.h>
|
|
|
|
|
|
|
|
/*
|
|
|
|
* non-constant log of base 2 calculators
|
|
|
|
* - the arch may override these in asm/bitops.h if they can be implemented
|
|
|
|
* more efficiently than using fls() and fls64()
|
|
|
|
* - the arch is not required to handle n==0 if implementing the fallback
|
|
|
|
*/
|
|
|
|
#ifndef CONFIG_ARCH_HAS_ILOG2_U32
|
ilog2: force inlining of __ilog2_u32() and __ilog2_u64()
Building a kernel with CONFIG_CC_OPTIMISE_FOR_SIZE leads to
__ilog2_u32() being duplicated 50 times and __ilog2_u64() 3 times in
vmlinux on a tiny powerpc32 config.
__ilog2_u32() being 2 instructions it is not worth being kept out of
line, so force inlining. Allthough the u64 version is a bit bigger,
there is still a small benefit in keeping it inlined. On a 64 bits
config there's a real benefit.
With this change the size of vmlinux text is reduced by 1 kbytes, which
is approx 50% more than the size of the removed functions.
Before the patch there is for instance:
c00d2a94 <__ilog2_u32>:
c00d2a94: 7c 63 00 34 cntlzw r3,r3
c00d2a98: 20 63 00 1f subfic r3,r3,31
c00d2a9c: 4e 80 00 20 blr
c00d36d8 <__order_base_2>:
c00d36d8: 28 03 00 01 cmplwi r3,1
c00d36dc: 40 81 00 2c ble c00d3708 <__order_base_2+0x30>
c00d36e0: 94 21 ff f0 stwu r1,-16(r1)
c00d36e4: 7c 08 02 a6 mflr r0
c00d36e8: 38 63 ff ff addi r3,r3,-1
c00d36ec: 90 01 00 14 stw r0,20(r1)
c00d36f0: 4b ff f3 a5 bl c00d2a94 <__ilog2_u32>
c00d36f4: 80 01 00 14 lwz r0,20(r1)
c00d36f8: 38 63 00 01 addi r3,r3,1
c00d36fc: 7c 08 03 a6 mtlr r0
c00d3700: 38 21 00 10 addi r1,r1,16
c00d3704: 4e 80 00 20 blr
c00d3708: 38 60 00 00 li r3,0
c00d370c: 4e 80 00 20 blr
With the patch it has become:
c00d356c <__order_base_2>:
c00d356c: 28 03 00 01 cmplwi r3,1
c00d3570: 40 81 00 14 ble c00d3584 <__order_base_2+0x18>
c00d3574: 38 63 ff ff addi r3,r3,-1
c00d3578: 7c 63 00 34 cntlzw r3,r3
c00d357c: 20 63 00 20 subfic r3,r3,32
c00d3580: 4e 80 00 20 blr
c00d3584: 38 60 00 00 li r3,0
c00d3588: 4e 80 00 20 blr
No more need for __order_base_2() to setup a stack frame and
save/restore caller address. And the following 'add 1' is
merged in the subtract.
Another typical use of it:
c080ff28 <hugepagesz_setup>:
...
c080fff8: 7f c3 f3 78 mr r3,r30
c080fffc: 4b 8f 81 f1 bl c01081ec <__ilog2_u32>
c0810000: 38 63 ff f2 addi r3,r3,-14
...
Becomes
c080ff1c <hugepagesz_setup>:
...
c080ffec: 7f c3 00 34 cntlzw r3,r30
c080fff0: 20 63 00 11 subfic r3,r3,17
...
Here no need to move r30 argument to r3 then substract 14 to result. Just
work on r30 and merge the 'sub 14' with the 'sub from 31'.
Link: https://lkml.kernel.org/r/803a2ac3d923ebcfd0dd40f5886b05cae7bb0aba.1644243860.git.christophe.leroy@csgroup.eu
Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2022-03-24 02:05:44 +03:00
|
|
|
static __always_inline __attribute__((const))
|
2006-12-08 13:37:49 +03:00
|
|
|
int __ilog2_u32(u32 n)
|
|
|
|
{
|
|
|
|
return fls(n) - 1;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifndef CONFIG_ARCH_HAS_ILOG2_U64
|
ilog2: force inlining of __ilog2_u32() and __ilog2_u64()
Building a kernel with CONFIG_CC_OPTIMISE_FOR_SIZE leads to
__ilog2_u32() being duplicated 50 times and __ilog2_u64() 3 times in
vmlinux on a tiny powerpc32 config.
__ilog2_u32() being 2 instructions it is not worth being kept out of
line, so force inlining. Allthough the u64 version is a bit bigger,
there is still a small benefit in keeping it inlined. On a 64 bits
config there's a real benefit.
With this change the size of vmlinux text is reduced by 1 kbytes, which
is approx 50% more than the size of the removed functions.
Before the patch there is for instance:
c00d2a94 <__ilog2_u32>:
c00d2a94: 7c 63 00 34 cntlzw r3,r3
c00d2a98: 20 63 00 1f subfic r3,r3,31
c00d2a9c: 4e 80 00 20 blr
c00d36d8 <__order_base_2>:
c00d36d8: 28 03 00 01 cmplwi r3,1
c00d36dc: 40 81 00 2c ble c00d3708 <__order_base_2+0x30>
c00d36e0: 94 21 ff f0 stwu r1,-16(r1)
c00d36e4: 7c 08 02 a6 mflr r0
c00d36e8: 38 63 ff ff addi r3,r3,-1
c00d36ec: 90 01 00 14 stw r0,20(r1)
c00d36f0: 4b ff f3 a5 bl c00d2a94 <__ilog2_u32>
c00d36f4: 80 01 00 14 lwz r0,20(r1)
c00d36f8: 38 63 00 01 addi r3,r3,1
c00d36fc: 7c 08 03 a6 mtlr r0
c00d3700: 38 21 00 10 addi r1,r1,16
c00d3704: 4e 80 00 20 blr
c00d3708: 38 60 00 00 li r3,0
c00d370c: 4e 80 00 20 blr
With the patch it has become:
c00d356c <__order_base_2>:
c00d356c: 28 03 00 01 cmplwi r3,1
c00d3570: 40 81 00 14 ble c00d3584 <__order_base_2+0x18>
c00d3574: 38 63 ff ff addi r3,r3,-1
c00d3578: 7c 63 00 34 cntlzw r3,r3
c00d357c: 20 63 00 20 subfic r3,r3,32
c00d3580: 4e 80 00 20 blr
c00d3584: 38 60 00 00 li r3,0
c00d3588: 4e 80 00 20 blr
No more need for __order_base_2() to setup a stack frame and
save/restore caller address. And the following 'add 1' is
merged in the subtract.
Another typical use of it:
c080ff28 <hugepagesz_setup>:
...
c080fff8: 7f c3 f3 78 mr r3,r30
c080fffc: 4b 8f 81 f1 bl c01081ec <__ilog2_u32>
c0810000: 38 63 ff f2 addi r3,r3,-14
...
Becomes
c080ff1c <hugepagesz_setup>:
...
c080ffec: 7f c3 00 34 cntlzw r3,r30
c080fff0: 20 63 00 11 subfic r3,r3,17
...
Here no need to move r30 argument to r3 then substract 14 to result. Just
work on r30 and merge the 'sub 14' with the 'sub from 31'.
Link: https://lkml.kernel.org/r/803a2ac3d923ebcfd0dd40f5886b05cae7bb0aba.1644243860.git.christophe.leroy@csgroup.eu
Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2022-03-24 02:05:44 +03:00
|
|
|
static __always_inline __attribute__((const))
|
2006-12-08 13:37:49 +03:00
|
|
|
int __ilog2_u64(u64 n)
|
|
|
|
{
|
|
|
|
return fls64(n) - 1;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2017-09-30 18:43:38 +03:00
|
|
|
/**
|
|
|
|
* is_power_of_2() - check if a value is a power of two
|
|
|
|
* @n: the value to check
|
|
|
|
*
|
|
|
|
* Determine whether some value is a power of two, where zero is
|
2007-01-30 14:06:00 +03:00
|
|
|
* *not* considered a power of two.
|
2017-09-30 18:43:38 +03:00
|
|
|
* Return: true if @n is a power of 2, otherwise false.
|
2007-01-30 14:06:00 +03:00
|
|
|
*/
|
|
|
|
static inline __attribute__((const))
|
|
|
|
bool is_power_of_2(unsigned long n)
|
|
|
|
{
|
|
|
|
return (n != 0 && ((n & (n - 1)) == 0));
|
|
|
|
}
|
|
|
|
|
2017-09-30 18:43:38 +03:00
|
|
|
/**
|
|
|
|
* __roundup_pow_of_two() - round up to nearest power of two
|
|
|
|
* @n: value to round up
|
2006-12-08 13:37:51 +03:00
|
|
|
*/
|
|
|
|
static inline __attribute__((const))
|
|
|
|
unsigned long __roundup_pow_of_two(unsigned long n)
|
|
|
|
{
|
|
|
|
return 1UL << fls_long(n - 1);
|
|
|
|
}
|
|
|
|
|
2017-09-30 18:43:38 +03:00
|
|
|
/**
|
|
|
|
* __rounddown_pow_of_two() - round down to nearest power of two
|
|
|
|
* @n: value to round down
|
2007-10-17 10:29:32 +04:00
|
|
|
*/
|
|
|
|
static inline __attribute__((const))
|
|
|
|
unsigned long __rounddown_pow_of_two(unsigned long n)
|
|
|
|
{
|
|
|
|
return 1UL << (fls_long(n) - 1);
|
|
|
|
}
|
|
|
|
|
2006-12-08 13:37:49 +03:00
|
|
|
/**
|
2018-04-18 02:35:06 +03:00
|
|
|
* const_ilog2 - log base 2 of 32-bit or a 64-bit constant unsigned value
|
2017-09-30 18:43:38 +03:00
|
|
|
* @n: parameter
|
2006-12-08 13:37:49 +03:00
|
|
|
*
|
2018-04-18 02:35:06 +03:00
|
|
|
* Use this where sparse expects a true constant expression, e.g. for array
|
|
|
|
* indices.
|
2006-12-08 13:37:49 +03:00
|
|
|
*/
|
2018-04-18 02:35:06 +03:00
|
|
|
#define const_ilog2(n) \
|
2006-12-08 13:37:49 +03:00
|
|
|
( \
|
|
|
|
__builtin_constant_p(n) ? ( \
|
2017-03-02 23:17:22 +03:00
|
|
|
(n) < 2 ? 0 : \
|
2006-12-08 13:37:49 +03:00
|
|
|
(n) & (1ULL << 63) ? 63 : \
|
|
|
|
(n) & (1ULL << 62) ? 62 : \
|
|
|
|
(n) & (1ULL << 61) ? 61 : \
|
|
|
|
(n) & (1ULL << 60) ? 60 : \
|
|
|
|
(n) & (1ULL << 59) ? 59 : \
|
|
|
|
(n) & (1ULL << 58) ? 58 : \
|
|
|
|
(n) & (1ULL << 57) ? 57 : \
|
|
|
|
(n) & (1ULL << 56) ? 56 : \
|
|
|
|
(n) & (1ULL << 55) ? 55 : \
|
|
|
|
(n) & (1ULL << 54) ? 54 : \
|
|
|
|
(n) & (1ULL << 53) ? 53 : \
|
|
|
|
(n) & (1ULL << 52) ? 52 : \
|
|
|
|
(n) & (1ULL << 51) ? 51 : \
|
|
|
|
(n) & (1ULL << 50) ? 50 : \
|
|
|
|
(n) & (1ULL << 49) ? 49 : \
|
|
|
|
(n) & (1ULL << 48) ? 48 : \
|
|
|
|
(n) & (1ULL << 47) ? 47 : \
|
|
|
|
(n) & (1ULL << 46) ? 46 : \
|
|
|
|
(n) & (1ULL << 45) ? 45 : \
|
|
|
|
(n) & (1ULL << 44) ? 44 : \
|
|
|
|
(n) & (1ULL << 43) ? 43 : \
|
|
|
|
(n) & (1ULL << 42) ? 42 : \
|
|
|
|
(n) & (1ULL << 41) ? 41 : \
|
|
|
|
(n) & (1ULL << 40) ? 40 : \
|
|
|
|
(n) & (1ULL << 39) ? 39 : \
|
|
|
|
(n) & (1ULL << 38) ? 38 : \
|
|
|
|
(n) & (1ULL << 37) ? 37 : \
|
|
|
|
(n) & (1ULL << 36) ? 36 : \
|
|
|
|
(n) & (1ULL << 35) ? 35 : \
|
|
|
|
(n) & (1ULL << 34) ? 34 : \
|
|
|
|
(n) & (1ULL << 33) ? 33 : \
|
|
|
|
(n) & (1ULL << 32) ? 32 : \
|
|
|
|
(n) & (1ULL << 31) ? 31 : \
|
|
|
|
(n) & (1ULL << 30) ? 30 : \
|
|
|
|
(n) & (1ULL << 29) ? 29 : \
|
|
|
|
(n) & (1ULL << 28) ? 28 : \
|
|
|
|
(n) & (1ULL << 27) ? 27 : \
|
|
|
|
(n) & (1ULL << 26) ? 26 : \
|
|
|
|
(n) & (1ULL << 25) ? 25 : \
|
|
|
|
(n) & (1ULL << 24) ? 24 : \
|
|
|
|
(n) & (1ULL << 23) ? 23 : \
|
|
|
|
(n) & (1ULL << 22) ? 22 : \
|
|
|
|
(n) & (1ULL << 21) ? 21 : \
|
|
|
|
(n) & (1ULL << 20) ? 20 : \
|
|
|
|
(n) & (1ULL << 19) ? 19 : \
|
|
|
|
(n) & (1ULL << 18) ? 18 : \
|
|
|
|
(n) & (1ULL << 17) ? 17 : \
|
|
|
|
(n) & (1ULL << 16) ? 16 : \
|
|
|
|
(n) & (1ULL << 15) ? 15 : \
|
|
|
|
(n) & (1ULL << 14) ? 14 : \
|
|
|
|
(n) & (1ULL << 13) ? 13 : \
|
|
|
|
(n) & (1ULL << 12) ? 12 : \
|
|
|
|
(n) & (1ULL << 11) ? 11 : \
|
|
|
|
(n) & (1ULL << 10) ? 10 : \
|
|
|
|
(n) & (1ULL << 9) ? 9 : \
|
|
|
|
(n) & (1ULL << 8) ? 8 : \
|
|
|
|
(n) & (1ULL << 7) ? 7 : \
|
|
|
|
(n) & (1ULL << 6) ? 6 : \
|
|
|
|
(n) & (1ULL << 5) ? 5 : \
|
|
|
|
(n) & (1ULL << 4) ? 4 : \
|
|
|
|
(n) & (1ULL << 3) ? 3 : \
|
|
|
|
(n) & (1ULL << 2) ? 2 : \
|
2018-04-18 02:35:06 +03:00
|
|
|
1) : \
|
|
|
|
-1)
|
|
|
|
|
|
|
|
/**
|
|
|
|
* ilog2 - log base 2 of 32-bit or a 64-bit unsigned value
|
|
|
|
* @n: parameter
|
|
|
|
*
|
|
|
|
* constant-capable log of base 2 calculation
|
|
|
|
* - this can be used to initialise global variables from constant data, hence
|
|
|
|
* the massive ternary operator construction
|
|
|
|
*
|
|
|
|
* selects the appropriately-sized optimised version depending on sizeof(n)
|
|
|
|
*/
|
|
|
|
#define ilog2(n) \
|
|
|
|
( \
|
|
|
|
__builtin_constant_p(n) ? \
|
2020-12-16 07:43:37 +03:00
|
|
|
((n) < 2 ? 0 : \
|
|
|
|
63 - __builtin_clzll(n)) : \
|
2018-04-18 02:35:06 +03:00
|
|
|
(sizeof(n) <= 4) ? \
|
|
|
|
__ilog2_u32(n) : \
|
|
|
|
__ilog2_u64(n) \
|
2006-12-08 13:37:49 +03:00
|
|
|
)
|
|
|
|
|
2006-12-08 13:37:51 +03:00
|
|
|
/**
|
|
|
|
* roundup_pow_of_two - round the given value up to nearest power of two
|
2017-09-30 18:43:38 +03:00
|
|
|
* @n: parameter
|
2006-12-08 13:37:51 +03:00
|
|
|
*
|
2007-02-17 21:17:37 +03:00
|
|
|
* round the given value up to the nearest power of two
|
2006-12-08 13:37:51 +03:00
|
|
|
* - the result is undefined when n == 0
|
|
|
|
* - this can be used to initialise global variables from constant data
|
|
|
|
*/
|
|
|
|
#define roundup_pow_of_two(n) \
|
|
|
|
( \
|
|
|
|
__builtin_constant_p(n) ? ( \
|
2020-09-05 02:36:19 +03:00
|
|
|
((n) == 1) ? 1 : \
|
2006-12-08 13:37:51 +03:00
|
|
|
(1UL << (ilog2((n) - 1) + 1)) \
|
|
|
|
) : \
|
|
|
|
__roundup_pow_of_two(n) \
|
|
|
|
)
|
|
|
|
|
2007-10-17 10:29:32 +04:00
|
|
|
/**
|
|
|
|
* rounddown_pow_of_two - round the given value down to nearest power of two
|
2017-09-30 18:43:38 +03:00
|
|
|
* @n: parameter
|
2007-10-17 10:29:32 +04:00
|
|
|
*
|
|
|
|
* round the given value down to the nearest power of two
|
|
|
|
* - the result is undefined when n == 0
|
|
|
|
* - this can be used to initialise global variables from constant data
|
|
|
|
*/
|
|
|
|
#define rounddown_pow_of_two(n) \
|
|
|
|
( \
|
|
|
|
__builtin_constant_p(n) ? ( \
|
|
|
|
(1UL << ilog2(n))) : \
|
|
|
|
__rounddown_pow_of_two(n) \
|
|
|
|
)
|
|
|
|
|
2017-09-30 18:43:38 +03:00
|
|
|
static inline __attribute_const__
|
|
|
|
int __order_base_2(unsigned long n)
|
|
|
|
{
|
|
|
|
return n > 1 ? ilog2(n - 1) + 1 : 0;
|
|
|
|
}
|
|
|
|
|
2008-02-06 12:36:54 +03:00
|
|
|
/**
|
|
|
|
* order_base_2 - calculate the (rounded up) base 2 order of the argument
|
|
|
|
* @n: parameter
|
|
|
|
*
|
|
|
|
* The first few values calculated by this routine:
|
|
|
|
* ob2(0) = 0
|
|
|
|
* ob2(1) = 0
|
|
|
|
* ob2(2) = 1
|
|
|
|
* ob2(3) = 2
|
|
|
|
* ob2(4) = 2
|
|
|
|
* ob2(5) = 3
|
|
|
|
* ... and so on.
|
|
|
|
*/
|
2017-02-02 21:05:26 +03:00
|
|
|
#define order_base_2(n) \
|
|
|
|
( \
|
|
|
|
__builtin_constant_p(n) ? ( \
|
|
|
|
((n) == 0 || (n) == 1) ? 0 : \
|
|
|
|
ilog2((n) - 1) + 1) : \
|
|
|
|
__order_base_2(n) \
|
|
|
|
)
|
2019-06-21 11:42:02 +03:00
|
|
|
|
|
|
|
static inline __attribute__((const))
|
|
|
|
int __bits_per(unsigned long n)
|
|
|
|
{
|
|
|
|
if (n < 2)
|
|
|
|
return 1;
|
|
|
|
if (is_power_of_2(n))
|
|
|
|
return order_base_2(n) + 1;
|
|
|
|
return order_base_2(n);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* bits_per - calculate the number of bits required for the argument
|
|
|
|
* @n: parameter
|
|
|
|
*
|
|
|
|
* This is constant-capable and can be used for compile time
|
|
|
|
* initializations, e.g bitfields.
|
|
|
|
*
|
|
|
|
* The first few values calculated by this routine:
|
|
|
|
* bf(0) = 1
|
|
|
|
* bf(1) = 1
|
|
|
|
* bf(2) = 2
|
|
|
|
* bf(3) = 2
|
|
|
|
* bf(4) = 3
|
|
|
|
* ... and so on.
|
|
|
|
*/
|
|
|
|
#define bits_per(n) \
|
|
|
|
( \
|
|
|
|
__builtin_constant_p(n) ? ( \
|
|
|
|
((n) == 0 || (n) == 1) \
|
|
|
|
? 1 : ilog2(n) + 1 \
|
|
|
|
) : \
|
|
|
|
__bits_per(n) \
|
|
|
|
)
|
2006-12-08 13:37:49 +03:00
|
|
|
#endif /* _LINUX_LOG2_H */
|