Stack tracing and RCU has been having issues with each other and lockdep

has been pointing out constant problems. The changes have been going into the stack tracer, but it has been discovered that the problem isn't with the stack tracer itself, but it is with calling save_stack_trace() from within the internals of RCU. The stack tracer is the one that can trigger the issue the easiest, but examining the problem further, it could also happen from a WARN() in the wrong place, or even if an NMI happened in this area and it did an rcu_read_lock(). The critical area is where RCU is not watching. Which can happen while going to and from idle, or bringing up or taking down a CPU. The final fix was to put the protection in kernel_text_address() as it is the one that requires RCU to be watching while doing the stack trace. To make this work properly, Paul had to allow rcu_irq_enter() happen after rcu_nmi_enter(). This should have been done anyway, since an NMI can page fault (reading vmalloc area), and a page fault triggers rcu_irq_enter(). One patch is just a consolidation of code so that the fix only needed to be done in one location. -----BEGIN PGP SIGNATURE----- iQFIBAABCAAyFiEEQEw9Eu0DdyUUkuUUybkF8mrZjcsFAlnGyXoUHHJvc3RlZHRA Z29vZG1pcy5vcmcACgkQybkF8mrZjctKtwf8CeKGqOdlqkZEafIpWaIASXmAVMO/ WE+hQK+rCydWFvzADgb/rOmsR0ou8WGEXcuUPxVxmvMyqhKhZ6AU1hE/7Y8P0pMq F4bev+j3lAJC65ezFAh+ZQcIjaRIH4MFVPsUTaibSPSN7xziMNIpbf9VOVfpUm8A jf9p6YAmyhFVi6DstCc29SWnywEVwC2ZWRVKRPXKry8/dPxjfVcLclGX680Eqi9I EnYaOdC/mGbtvHPOUSs/P0cfxExHmyEErQHeOV8FPymj6KJ6+KoYIiELNlTHUBj/ eeKzrHc/b3j+lz0RPlA8WxYmpmEm4SE5cV3vRebdBNUBrABSN1RxeOozyQ== =1KkS -----END PGP SIGNATURE----- Merge tag 'trace-v4.14-rc1-2' of git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-trace Pull tracing fixes from Steven Rostedt: "Stack tracing and RCU has been having issues with each other and lockdep has been pointing out constant problems. The changes have been going into the stack tracer, but it has been discovered that the problem isn't with the stack tracer itself, but it is with calling save_stack_trace() from within the internals of RCU. The stack tracer is the one that can trigger the issue the easiest, but examining the problem further, it could also happen from a WARN() in the wrong place, or even if an NMI happened in this area and it did an rcu_read_lock(). The critical area is where RCU is not watching. Which can happen while going to and from idle, or bringing up or taking down a CPU. The final fix was to put the protection in kernel_text_address() as it is the one that requires RCU to be watching while doing the stack trace. To make this work properly, Paul had to allow rcu_irq_enter() happen after rcu_nmi_enter(). This should have been done anyway, since an NMI can page fault (reading vmalloc area), and a page fault triggers rcu_irq_enter(). One patch is just a consolidation of code so that the fix only needed to be done in one location" * tag 'trace-v4.14-rc1-2' of git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-trace: tracing: Remove RCU work arounds from stack tracer extable: Enable RCU if it is not watching in kernel_text_address() extable: Consolidate *kernel_text_address() functions rcu: Allow for page faults in NMI handlers
2017-09-25 15:22:31 -07:00 · 2017-09-25 15:22:31 -07:00 · ac0a36461f
--- a/kernel/extable.c
+++ b/kernel/extable.c
@ -102,15 +102,7 @@ int core_kernel_data(unsigned long addr)

 int __kernel_text_address(unsigned long addr)
 {
-	if (core_kernel_text(addr))
-		return 1;
-	if (is_module_text_address(addr))
-		return 1;
-	if (is_ftrace_trampoline(addr))
-		return 1;
-	if (is_kprobe_optinsn_slot(addr) || is_kprobe_insn_slot(addr))
-		return 1;
-	if (is_bpf_text_address(addr))
+	if (kernel_text_address(addr))
 		return 1;
 	/*
 	 * There might be init symbols in saved stacktraces.
@ -127,17 +119,42 @@ int __kernel_text_address(unsigned long addr)

 int kernel_text_address(unsigned long addr)
 {
+	bool no_rcu;
+	int ret = 1;
+
 	if (core_kernel_text(addr))
 		return 1;
+
+	/*
+	 * If a stack dump happens while RCU is not watching, then
+	 * RCU needs to be notified that it requires to start
+	 * watching again. This can happen either by tracing that
+	 * triggers a stack trace, or a WARN() that happens during
+	 * coming back from idle, or cpu on or offlining.
+	 *
+	 * is_module_text_address() as well as the kprobe slots
+	 * and is_bpf_text_address() require RCU to be watching.
+	 */
+	no_rcu = !rcu_is_watching();
+
+	/* Treat this like an NMI as it can happen anywhere */
+	if (no_rcu)
+		rcu_nmi_enter();
+
 	if (is_module_text_address(addr))
-		return 1;
+		goto out;
 	if (is_ftrace_trampoline(addr))
-		return 1;
+		goto out;
 	if (is_kprobe_optinsn_slot(addr) || is_kprobe_insn_slot(addr))
-		return 1;
+		goto out;
 	if (is_bpf_text_address(addr))
-		return 1;
-	return 0;
+		goto out;
+	ret = 0;
+out:
+	if (no_rcu)
+		rcu_nmi_exit();
+
+	return ret;
 }

 /*
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@ -882,6 +882,11 @@ void rcu_irq_exit(void)

 	RCU_LOCKDEP_WARN(!irqs_disabled(), "rcu_irq_exit() invoked with irqs enabled!!!");
 	rdtp = this_cpu_ptr(&rcu_dynticks);
+
+	/* Page faults can happen in NMI handlers, so check... */
+	if (READ_ONCE(rdtp->dynticks_nmi_nesting))
+		return;
+
 	WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
 		     rdtp->dynticks_nesting < 1);
 	if (rdtp->dynticks_nesting <= 1) {
@ -1015,6 +1020,11 @@ void rcu_irq_enter(void)

 	RCU_LOCKDEP_WARN(!irqs_disabled(), "rcu_irq_enter() invoked with irqs enabled!!!");
 	rdtp = this_cpu_ptr(&rcu_dynticks);
+
+	/* Page faults can happen in NMI handlers, so check... */
+	if (READ_ONCE(rdtp->dynticks_nmi_nesting))
+		return;
+
 	oldval = rdtp->dynticks_nesting;
 	rdtp->dynticks_nesting++;
 	WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@ -96,23 +96,9 @@ check_stack(unsigned long ip, unsigned long *stack)
 	if (in_nmi())
 		return;

-	/*
-	 * There's a slight chance that we are tracing inside the
-	 * RCU infrastructure, and rcu_irq_enter() will not work
-	 * as expected.
-	 */
-	if (unlikely(rcu_irq_enter_disabled()))
-		return;
-
 	local_irq_save(flags);
 	arch_spin_lock(&stack_trace_max_lock);

-	/*
-	 * RCU may not be watching, make it see us.
-	 * The stack trace code uses rcu_sched.
-	 */
-	rcu_irq_enter();
-
 	/* In case another CPU set the tracer_frame on us */
 	if (unlikely(!frame_size))
 		this_size -= tracer_frame;
@ -205,7 +191,6 @@ check_stack(unsigned long ip, unsigned long *stack)
 	}

 out:
-	rcu_irq_exit();
 	arch_spin_unlock(&stack_trace_max_lock);
 	local_irq_restore(flags);
 }