From 91a967fd6934abc0c7e4b0d26728e38674278707 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 28 Jul 2022 15:37:05 -0700 Subject: [PATCH] rcu: Add full-sized polling for get_completed*() and poll_state*() The get_completed_synchronize_rcu() and poll_state_synchronize_rcu() APIs compress the combined expedited and normal grace-period states into a single unsigned long, which conserves storage, but can miss grace periods in certain cases involving overlapping normal and expedited grace periods. Missing the occasional grace period is usually not a problem, but there are use cases that care about each and every grace period. This commit therefore adds the first members of the full-state RCU grace-period polling API, namely the get_completed_synchronize_rcu_full() and poll_state_synchronize_rcu_full() functions. These use up to three times the storage (rcu_gp_oldstate structure instead of unsigned long), but which are guaranteed not to miss grace periods, at least in situations where the single-CPU grace-period optimization does not apply. Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 3 ++ include/linux/rcutiny.h | 9 +++++ include/linux/rcutree.h | 8 +++++ kernel/rcu/rcutorture.c | 9 +++++ kernel/rcu/tiny.c | 10 ++++++ kernel/rcu/tree.c | 76 +++++++++++++++++++++++++++++++++++++--- 6 files changed, 111 insertions(+), 4 deletions(-) diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index f527f27e6438..faaa174dfb27 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -42,7 +42,10 @@ void call_rcu(struct rcu_head *head, rcu_callback_t func); void rcu_barrier_tasks(void); void rcu_barrier_tasks_rude(void); void synchronize_rcu(void); + +struct rcu_gp_oldstate; unsigned long get_completed_synchronize_rcu(void); +void get_completed_synchronize_rcu_full(struct rcu_gp_oldstate *rgosp); #ifdef CONFIG_PREEMPT_RCU diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index 62815c0a2dce..1fbff8600d92 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -14,10 +14,19 @@ #include /* for HZ */ +struct rcu_gp_oldstate { + unsigned long rgos_norm; +}; + unsigned long get_state_synchronize_rcu(void); unsigned long start_poll_synchronize_rcu(void); bool poll_state_synchronize_rcu(unsigned long oldstate); +static inline bool poll_state_synchronize_rcu_full(struct rcu_gp_oldstate *rgosp) +{ + return poll_state_synchronize_rcu(rgosp->rgos_norm); +} + static inline void cond_synchronize_rcu(unsigned long oldstate) { might_sleep(); diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 47eaa4cb0df7..4ccbc3aa9dc2 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -40,11 +40,19 @@ bool rcu_eqs_special_set(int cpu); void rcu_momentary_dyntick_idle(void); void kfree_rcu_scheduler_running(void); bool rcu_gp_might_be_stalled(void); + +struct rcu_gp_oldstate { + unsigned long rgos_norm; + unsigned long rgos_exp; + unsigned long rgos_polled; +}; + unsigned long start_poll_synchronize_rcu_expedited(void); void cond_synchronize_rcu_expedited(unsigned long oldstate); unsigned long get_state_synchronize_rcu(void); unsigned long start_poll_synchronize_rcu(void); bool poll_state_synchronize_rcu(unsigned long oldstate); +bool poll_state_synchronize_rcu_full(struct rcu_gp_oldstate *rgosp); void cond_synchronize_rcu(unsigned long oldstate); bool rcu_is_idle_cpu(int cpu); diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index d8e1b270a065..b31e6ed64d1b 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -336,8 +336,10 @@ struct rcu_torture_ops { void (*cond_sync_exp)(unsigned long oldstate); unsigned long (*get_gp_state)(void); unsigned long (*get_gp_completed)(void); + void (*get_gp_completed_full)(struct rcu_gp_oldstate *rgosp); unsigned long (*start_gp_poll)(void); bool (*poll_gp_state)(unsigned long oldstate); + bool (*poll_gp_state_full)(struct rcu_gp_oldstate *rgosp); void (*cond_sync)(unsigned long oldstate); call_rcu_func_t call; void (*cb_barrier)(void); @@ -503,8 +505,10 @@ static struct rcu_torture_ops rcu_ops = { .exp_sync = synchronize_rcu_expedited, .get_gp_state = get_state_synchronize_rcu, .get_gp_completed = get_completed_synchronize_rcu, + .get_gp_completed_full = get_completed_synchronize_rcu_full, .start_gp_poll = start_poll_synchronize_rcu, .poll_gp_state = poll_state_synchronize_rcu, + .poll_gp_state_full = poll_state_synchronize_rcu_full, .cond_sync = cond_synchronize_rcu, .get_gp_state_exp = get_state_synchronize_rcu, .start_gp_poll_exp = start_poll_synchronize_rcu_expedited, @@ -1212,6 +1216,7 @@ rcu_torture_writer(void *arg) bool boot_ended; bool can_expedite = !rcu_gp_is_expedited() && !rcu_gp_is_normal(); unsigned long cookie; + struct rcu_gp_oldstate cookie_full; int expediting = 0; unsigned long gp_snap; int i; @@ -1277,6 +1282,10 @@ rcu_torture_writer(void *arg) } cur_ops->readunlock(idx); } + if (cur_ops->get_gp_completed_full && cur_ops->poll_gp_state_full) { + cur_ops->get_gp_completed_full(&cookie_full); + WARN_ON_ONCE(!cur_ops->poll_gp_state_full(&cookie_full)); + } switch (synctype[torture_random(&rand) % nsynctypes]) { case RTWS_DEF_FREE: rcu_torture_writer_state = RTWS_DEF_FREE; diff --git a/kernel/rcu/tiny.c b/kernel/rcu/tiny.c index f0561ee16b9c..435edc785412 100644 --- a/kernel/rcu/tiny.c +++ b/kernel/rcu/tiny.c @@ -183,6 +183,16 @@ void call_rcu(struct rcu_head *head, rcu_callback_t func) } EXPORT_SYMBOL_GPL(call_rcu); +/* + * Store a grace-period-counter "cookie". For more information, + * see the Tree RCU header comment. + */ +void get_completed_synchronize_rcu_full(struct rcu_gp_oldstate *rgosp) +{ + rgosp->rgos_norm = RCU_GET_STATE_COMPLETED; +} +EXPORT_SYMBOL_GPL(get_completed_synchronize_rcu_full); + /* * Return a grace-period-counter "cookie". For more information, * see the Tree RCU header comment. diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 79aea7df4345..d47c9b6d8106 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -3522,6 +3522,22 @@ void synchronize_rcu(void) } EXPORT_SYMBOL_GPL(synchronize_rcu); +/** + * get_completed_synchronize_rcu_full - Return a full pre-completed polled state cookie + * @rgosp: Place to put state cookie + * + * Stores into @rgosp a value that will always be treated by functions + * like poll_state_synchronize_rcu_full() as a cookie whose grace period + * has already completed. + */ +void get_completed_synchronize_rcu_full(struct rcu_gp_oldstate *rgosp) +{ + rgosp->rgos_norm = RCU_GET_STATE_COMPLETED; + rgosp->rgos_exp = RCU_GET_STATE_COMPLETED; + rgosp->rgos_polled = RCU_GET_STATE_COMPLETED; +} +EXPORT_SYMBOL_GPL(get_completed_synchronize_rcu_full); + /** * get_state_synchronize_rcu - Snapshot current RCU state * @@ -3580,7 +3596,7 @@ unsigned long start_poll_synchronize_rcu(void) EXPORT_SYMBOL_GPL(start_poll_synchronize_rcu); /** - * poll_state_synchronize_rcu - Conditionally wait for an RCU grace period + * poll_state_synchronize_rcu - Has the specified RCU grace period completed? * * @oldstate: value from get_state_synchronize_rcu() or start_poll_synchronize_rcu() * @@ -3595,9 +3611,10 @@ EXPORT_SYMBOL_GPL(start_poll_synchronize_rcu); * But counter wrap is harmless. If the counter wraps, we have waited for * more than a billion grace periods (and way more on a 64-bit system!). * Those needing to keep oldstate values for very long time periods - * (many hours even on 32-bit systems) should check them occasionally - * and either refresh them or set a flag indicating that the grace period - * has completed. + * (many hours even on 32-bit systems) should check them occasionally and + * either refresh them or set a flag indicating that the grace period has + * completed. Alternatively, they can use get_completed_synchronize_rcu() + * to get a guaranteed-completed grace-period state. * * This function provides the same memory-ordering guarantees that * would be provided by a synchronize_rcu() that was invoked at the call @@ -3615,6 +3632,57 @@ bool poll_state_synchronize_rcu(unsigned long oldstate) } EXPORT_SYMBOL_GPL(poll_state_synchronize_rcu); +/** + * poll_state_synchronize_rcu_full - Has the specified RCU grace period completed? + * @rgosp: value from get_state_synchronize_rcu_full() or start_poll_synchronize_rcu_full() + * + * If a full RCU grace period has elapsed since the earlier call from + * which *rgosp was obtained, return @true, otherwise return @false. + * If @false is returned, it is the caller's responsibility to invoke this + * function later on until it does return @true. Alternatively, the caller + * can explicitly wait for a grace period, for example, by passing @rgosp + * to cond_synchronize_rcu() or by directly invoking synchronize_rcu(). + * + * Yes, this function does not take counter wrap into account. + * But counter wrap is harmless. If the counter wraps, we have waited + * for more than a billion grace periods (and way more on a 64-bit + * system!). Those needing to keep rcu_gp_oldstate values for very + * long time periods (many hours even on 32-bit systems) should check + * them occasionally and either refresh them or set a flag indicating + * that the grace period has completed. Alternatively, they can use + * get_completed_synchronize_rcu_full() to get a guaranteed-completed + * grace-period state. + * + * This function provides the same memory-ordering guarantees that would + * be provided by a synchronize_rcu() that was invoked at the call to + * the function that provided @rgosp, and that returned at the end of this + * function. And this guarantee requires that the root rcu_node structure's + * ->gp_seq field be checked instead of that of the rcu_state structure. + * The problem is that the just-ending grace-period's callbacks can be + * invoked between the time that the root rcu_node structure's ->gp_seq + * field is updated and the time that the rcu_state structure's ->gp_seq + * field is updated. Therefore, if a single synchronize_rcu() is to + * cause a subsequent poll_state_synchronize_rcu_full() to return @true, + * then the root rcu_node structure is the one that needs to be polled. + */ +bool poll_state_synchronize_rcu_full(struct rcu_gp_oldstate *rgosp) +{ + struct rcu_node *rnp = rcu_get_root(); + + smp_mb(); // Order against root rcu_node structure grace-period cleanup. + if (rgosp->rgos_norm == RCU_GET_STATE_COMPLETED || + rcu_seq_done_exact(&rnp->gp_seq, rgosp->rgos_norm) || + rgosp->rgos_exp == RCU_GET_STATE_COMPLETED || + rcu_seq_done_exact(&rcu_state.expedited_sequence, rgosp->rgos_exp) || + rgosp->rgos_polled == RCU_GET_STATE_COMPLETED || + rcu_seq_done_exact(&rcu_state.gp_seq_polled, rgosp->rgos_polled)) { + smp_mb(); /* Ensure GP ends before subsequent accesses. */ + return true; + } + return false; +} +EXPORT_SYMBOL_GPL(poll_state_synchronize_rcu_full); + /** * cond_synchronize_rcu - Conditionally wait for an RCU grace period *