percpu-refcount: Use normal instead of RCU-sched"

This is a revert of commit
   a4244454df ("percpu-refcount: use RCU-sched insted of normal RCU")

which claims the only reason for using RCU-sched is
   "rcu_read_[un]lock() … are slightly more expensive than preempt_disable/enable()"

and
    "As the RCU critical sections are extremely short, using sched-RCU
    shouldn't have any latency implications."

The problem with using RCU-sched here is that it disables preemption and
the release callback (called from percpu_ref_put_many()) must not
acquire any sleeping locks like spinlock_t. This breaks PREEMPT_RT
because some of the users acquire spinlock_t locks in their callbacks.

Using rcu_read_lock() on PREEMPTION=n kernels is not any different
compared to rcu_read_lock_sched(). On PREEMPTION=y kernels there are
already performance issues due to additional preemption points.
Looking at the code, the rcu_read_lock() is just an increment and unlock
is almost just a decrement unless there is something special to do. Both
are functions while disabling preemption is inlined.
Doing a small benchmark, the minimal amount of time required was mostly
the same. The average time required was higher due to the higher MAX
value (which could be preemption). With DEBUG_PREEMPT=y it is
rcu_read_lock_sched() that takes a little longer due to the additional
debug code.

Convert back to normal RCU.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Dennis Zhou <dennis@kernel.org>
This commit is contained in:
Sebastian Andrzej Siewior 2019-11-08 18:35:53 +01:00 коммит произвёл Dennis Zhou
Родитель 825dbc6ff7
Коммит 9e8d42a0f7
1 изменённых файлов: 8 добавлений и 8 удалений

Просмотреть файл

@ -186,14 +186,14 @@ static inline void percpu_ref_get_many(struct percpu_ref *ref, unsigned long nr)
{ {
unsigned long __percpu *percpu_count; unsigned long __percpu *percpu_count;
rcu_read_lock_sched(); rcu_read_lock();
if (__ref_is_percpu(ref, &percpu_count)) if (__ref_is_percpu(ref, &percpu_count))
this_cpu_add(*percpu_count, nr); this_cpu_add(*percpu_count, nr);
else else
atomic_long_add(nr, &ref->count); atomic_long_add(nr, &ref->count);
rcu_read_unlock_sched(); rcu_read_unlock();
} }
/** /**
@ -223,7 +223,7 @@ static inline bool percpu_ref_tryget(struct percpu_ref *ref)
unsigned long __percpu *percpu_count; unsigned long __percpu *percpu_count;
bool ret; bool ret;
rcu_read_lock_sched(); rcu_read_lock();
if (__ref_is_percpu(ref, &percpu_count)) { if (__ref_is_percpu(ref, &percpu_count)) {
this_cpu_inc(*percpu_count); this_cpu_inc(*percpu_count);
@ -232,7 +232,7 @@ static inline bool percpu_ref_tryget(struct percpu_ref *ref)
ret = atomic_long_inc_not_zero(&ref->count); ret = atomic_long_inc_not_zero(&ref->count);
} }
rcu_read_unlock_sched(); rcu_read_unlock();
return ret; return ret;
} }
@ -257,7 +257,7 @@ static inline bool percpu_ref_tryget_live(struct percpu_ref *ref)
unsigned long __percpu *percpu_count; unsigned long __percpu *percpu_count;
bool ret = false; bool ret = false;
rcu_read_lock_sched(); rcu_read_lock();
if (__ref_is_percpu(ref, &percpu_count)) { if (__ref_is_percpu(ref, &percpu_count)) {
this_cpu_inc(*percpu_count); this_cpu_inc(*percpu_count);
@ -266,7 +266,7 @@ static inline bool percpu_ref_tryget_live(struct percpu_ref *ref)
ret = atomic_long_inc_not_zero(&ref->count); ret = atomic_long_inc_not_zero(&ref->count);
} }
rcu_read_unlock_sched(); rcu_read_unlock();
return ret; return ret;
} }
@ -285,14 +285,14 @@ static inline void percpu_ref_put_many(struct percpu_ref *ref, unsigned long nr)
{ {
unsigned long __percpu *percpu_count; unsigned long __percpu *percpu_count;
rcu_read_lock_sched(); rcu_read_lock();
if (__ref_is_percpu(ref, &percpu_count)) if (__ref_is_percpu(ref, &percpu_count))
this_cpu_sub(*percpu_count, nr); this_cpu_sub(*percpu_count, nr);
else if (unlikely(atomic_long_sub_and_test(nr, &ref->count))) else if (unlikely(atomic_long_sub_and_test(nr, &ref->count)))
ref->release(ref); ref->release(ref);
rcu_read_unlock_sched(); rcu_read_unlock();
} }
/** /**