Conflicts:

	arch/x86/kernel/io_apic.c
	kernel/sched.c
	kernel/sched_stats.h
This commit is contained in:
Rusty Russell 2008-12-13 21:55:51 +10:30
Родитель 7be7585393 8299608f14
Коммит 968ea6d80e
215 изменённых файлов: 10629 добавлений и 3965 удалений

Просмотреть файл

@ -0,0 +1,32 @@
CPU Accounting Controller
-------------------------
The CPU accounting controller is used to group tasks using cgroups and
account the CPU usage of these groups of tasks.
The CPU accounting controller supports multi-hierarchy groups. An accounting
group accumulates the CPU usage of all of its child groups and the tasks
directly present in its group.
Accounting groups can be created by first mounting the cgroup filesystem.
# mkdir /cgroups
# mount -t cgroup -ocpuacct none /cgroups
With the above step, the initial or the parent accounting group
becomes visible at /cgroups. At bootup, this group includes all the
tasks in the system. /cgroups/tasks lists the tasks in this cgroup.
/cgroups/cpuacct.usage gives the CPU time (in nanoseconds) obtained by
this group which is essentially the CPU time obtained by all the tasks
in the system.
New accounting groups can be created under the parent group /cgroups.
# cd /cgroups
# mkdir g1
# echo $$ > g1
The above steps create a new group g1 and move the current shell
process (bash) into it. CPU time consumed by this bash and its children
can be obtained from g1/cpuacct.usage and the same is accumulated in
/cgroups/cpuacct.usage also.

Просмотреть файл

@ -82,7 +82,7 @@ of ftrace. Here is a list of some of the key files:
tracer is not adding more data, they will display
the same information every time they are read.
iter_ctrl: This file lets the user control the amount of data
trace_options: This file lets the user control the amount of data
that is displayed in one of the above output
files.
@ -94,10 +94,10 @@ of ftrace. Here is a list of some of the key files:
only be recorded if the latency is greater than
the value in this file. (in microseconds)
trace_entries: This sets or displays the number of bytes each CPU
buffer_size_kb: This sets or displays the number of kilobytes each CPU
buffer can hold. The tracer buffers are the same size
for each CPU. The displayed number is the size of the
CPU buffer and not total size of all buffers. The
CPU buffer and not total size of all buffers. The
trace buffers are allocated in pages (blocks of memory
that the kernel uses for allocation, usually 4 KB in size).
If the last page allocated has room for more bytes
@ -127,6 +127,8 @@ of ftrace. Here is a list of some of the key files:
be traced. If a function exists in both set_ftrace_filter
and set_ftrace_notrace, the function will _not_ be traced.
set_ftrace_pid: Have the function tracer only trace a single thread.
available_filter_functions: This lists the functions that ftrace
has processed and can trace. These are the function
names that you can pass to "set_ftrace_filter" or
@ -316,23 +318,23 @@ The above is mostly meaningful for kernel developers.
The rest is the same as the 'trace' file.
iter_ctrl
---------
trace_options
-------------
The iter_ctrl file is used to control what gets printed in the trace
The trace_options file is used to control what gets printed in the trace
output. To see what is available, simply cat the file:
cat /debug/tracing/iter_ctrl
cat /debug/tracing/trace_options
print-parent nosym-offset nosym-addr noverbose noraw nohex nobin \
noblock nostacktrace nosched-tree
noblock nostacktrace nosched-tree nouserstacktrace nosym-userobj
To disable one of the options, echo in the option prepended with "no".
echo noprint-parent > /debug/tracing/iter_ctrl
echo noprint-parent > /debug/tracing/trace_options
To enable an option, leave off the "no".
echo sym-offset > /debug/tracing/iter_ctrl
echo sym-offset > /debug/tracing/trace_options
Here are the available options:
@ -378,6 +380,20 @@ Here are the available options:
When a trace is recorded, so is the stack of functions.
This allows for back traces of trace sites.
userstacktrace - This option changes the trace.
It records a stacktrace of the current userspace thread.
sym-userobj - when user stacktrace are enabled, look up which object the
address belongs to, and print a relative address
This is especially useful when ASLR is on, otherwise you don't
get a chance to resolve the address to object/file/line after the app is no
longer running
The lookup is performed when you read trace,trace_pipe,latency_trace. Example:
a.out-1623 [000] 40874.465068: /root/a.out[+0x480] <-/root/a.out[+0
x494] <- /root/a.out[+0x4a8] <- /lib/libc-2.7.so[+0x1e1a6]
sched-tree - TBD (any users??)
@ -1059,6 +1075,83 @@ For simple one time traces, the above is sufficent. For anything else,
a search through /proc/mounts may be needed to find where the debugfs
file-system is mounted.
Single thread tracing
---------------------
By writing into /debug/tracing/set_ftrace_pid you can trace a
single thread. For example:
# cat /debug/tracing/set_ftrace_pid
no pid
# echo 3111 > /debug/tracing/set_ftrace_pid
# cat /debug/tracing/set_ftrace_pid
3111
# echo function > /debug/tracing/current_tracer
# cat /debug/tracing/trace | head
# tracer: function
#
# TASK-PID CPU# TIMESTAMP FUNCTION
# | | | | |
yum-updatesd-3111 [003] 1637.254676: finish_task_switch <-thread_return
yum-updatesd-3111 [003] 1637.254681: hrtimer_cancel <-schedule_hrtimeout_range
yum-updatesd-3111 [003] 1637.254682: hrtimer_try_to_cancel <-hrtimer_cancel
yum-updatesd-3111 [003] 1637.254683: lock_hrtimer_base <-hrtimer_try_to_cancel
yum-updatesd-3111 [003] 1637.254685: fget_light <-do_sys_poll
yum-updatesd-3111 [003] 1637.254686: pipe_poll <-do_sys_poll
# echo -1 > /debug/tracing/set_ftrace_pid
# cat /debug/tracing/trace |head
# tracer: function
#
# TASK-PID CPU# TIMESTAMP FUNCTION
# | | | | |
##### CPU 3 buffer started ####
yum-updatesd-3111 [003] 1701.957688: free_poll_entry <-poll_freewait
yum-updatesd-3111 [003] 1701.957689: remove_wait_queue <-free_poll_entry
yum-updatesd-3111 [003] 1701.957691: fput <-free_poll_entry
yum-updatesd-3111 [003] 1701.957692: audit_syscall_exit <-sysret_audit
yum-updatesd-3111 [003] 1701.957693: path_put <-audit_syscall_exit
If you want to trace a function when executing, you could use
something like this simple program:
#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
int main (int argc, char **argv)
{
if (argc < 1)
exit(-1);
if (fork() > 0) {
int fd, ffd;
char line[64];
int s;
ffd = open("/debug/tracing/current_tracer", O_WRONLY);
if (ffd < 0)
exit(-1);
write(ffd, "nop", 3);
fd = open("/debug/tracing/set_ftrace_pid", O_WRONLY);
s = sprintf(line, "%d\n", getpid());
write(fd, line, s);
write(ffd, "function", 8);
close(fd);
close(ffd);
execvp(argv[1], argv+1);
}
return 0;
}
dynamic ftrace
--------------
@ -1158,7 +1251,11 @@ These are the only wild cards which are supported.
<match>*<match> will not work.
# echo hrtimer_* > /debug/tracing/set_ftrace_filter
Note: It is better to use quotes to enclose the wild cards, otherwise
the shell may expand the parameters into names of files in the local
directory.
# echo 'hrtimer_*' > /debug/tracing/set_ftrace_filter
Produces:
@ -1213,7 +1310,7 @@ Again, now we want to append.
# echo sys_nanosleep > /debug/tracing/set_ftrace_filter
# cat /debug/tracing/set_ftrace_filter
sys_nanosleep
# echo hrtimer_* >> /debug/tracing/set_ftrace_filter
# echo 'hrtimer_*' >> /debug/tracing/set_ftrace_filter
# cat /debug/tracing/set_ftrace_filter
hrtimer_run_queues
hrtimer_run_pending
@ -1299,41 +1396,29 @@ trace entries
-------------
Having too much or not enough data can be troublesome in diagnosing
an issue in the kernel. The file trace_entries is used to modify
an issue in the kernel. The file buffer_size_kb is used to modify
the size of the internal trace buffers. The number listed
is the number of entries that can be recorded per CPU. To know
the full size, multiply the number of possible CPUS with the
number of entries.
# cat /debug/tracing/trace_entries
65620
# cat /debug/tracing/buffer_size_kb
1408 (units kilobytes)
Note, to modify this, you must have tracing completely disabled. To do that,
echo "nop" into the current_tracer. If the current_tracer is not set
to "nop", an EINVAL error will be returned.
# echo nop > /debug/tracing/current_tracer
# echo 100000 > /debug/tracing/trace_entries
# cat /debug/tracing/trace_entries
100045
Notice that we echoed in 100,000 but the size is 100,045. The entries
are held in individual pages. It allocates the number of pages it takes
to fulfill the request. If more entries may fit on the last page
then they will be added.
# echo 1 > /debug/tracing/trace_entries
# cat /debug/tracing/trace_entries
85
This shows us that 85 entries can fit in a single page.
# echo 10000 > /debug/tracing/buffer_size_kb
# cat /debug/tracing/buffer_size_kb
10000 (units kilobytes)
The number of pages which will be allocated is limited to a percentage
of available memory. Allocating too much will produce an error.
# echo 1000000000000 > /debug/tracing/trace_entries
# echo 1000000000000 > /debug/tracing/buffer_size_kb
-bash: echo: write error: Cannot allocate memory
# cat /debug/tracing/trace_entries
# cat /debug/tracing/buffer_size_kb
85

Просмотреть файл

@ -750,6 +750,14 @@ and is between 256 and 4096 characters. It is defined in the file
parameter will force ia64_sal_cache_flush to call
ia64_pal_cache_flush instead of SAL_CACHE_FLUSH.
ftrace=[tracer]
[ftrace] will set and start the specified tracer
as early as possible in order to facilitate early
boot debugging.
ftrace_dump_on_oops
[ftrace] will dump the trace buffers on oops.
gamecon.map[2|3]=
[HW,JOY] Multisystem joystick and NES/SNES/PSX pad
support via parallel port (up to 5 devices per port)

Просмотреть файл

@ -71,35 +71,50 @@ Look at the current lock statistics:
# less /proc/lock_stat
01 lock_stat version 0.2
01 lock_stat version 0.3
02 -----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
03 class name con-bounces contentions waittime-min waittime-max waittime-total acq-bounces acquisitions holdtime-min holdtime-max holdtime-total
04 -----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
05
06 &inode->i_data.tree_lock-W: 15 21657 0.18 1093295.30 11547131054.85 58 10415 0.16 87.51 6387.60
07 &inode->i_data.tree_lock-R: 0 0 0.00 0.00 0.00 23302 231198 0.25 8.45 98023.38
08 --------------------------
09 &inode->i_data.tree_lock 0 [<ffffffff8027c08f>] add_to_page_cache+0x5f/0x190
10
11 ...............................................................................................................................................................................................
12
13 dcache_lock: 1037 1161 0.38 45.32 774.51 6611 243371 0.15 306.48 77387.24
14 -----------
15 dcache_lock 180 [<ffffffff802c0d7e>] sys_getcwd+0x11e/0x230
16 dcache_lock 165 [<ffffffff802c002a>] d_alloc+0x15a/0x210
17 dcache_lock 33 [<ffffffff8035818d>] _atomic_dec_and_lock+0x4d/0x70
18 dcache_lock 1 [<ffffffff802beef8>] shrink_dcache_parent+0x18/0x130
06 &mm->mmap_sem-W: 233 538 18446744073708 22924.27 607243.51 1342 45806 1.71 8595.89 1180582.34
07 &mm->mmap_sem-R: 205 587 18446744073708 28403.36 731975.00 1940 412426 0.58 187825.45 6307502.88
08 ---------------
09 &mm->mmap_sem 487 [<ffffffff8053491f>] do_page_fault+0x466/0x928
10 &mm->mmap_sem 179 [<ffffffff802a6200>] sys_mprotect+0xcd/0x21d
11 &mm->mmap_sem 279 [<ffffffff80210a57>] sys_mmap+0x75/0xce
12 &mm->mmap_sem 76 [<ffffffff802a490b>] sys_munmap+0x32/0x59
13 ---------------
14 &mm->mmap_sem 270 [<ffffffff80210a57>] sys_mmap+0x75/0xce
15 &mm->mmap_sem 431 [<ffffffff8053491f>] do_page_fault+0x466/0x928
16 &mm->mmap_sem 138 [<ffffffff802a490b>] sys_munmap+0x32/0x59
17 &mm->mmap_sem 145 [<ffffffff802a6200>] sys_mprotect+0xcd/0x21d
18
19 ...............................................................................................................................................................................................
20
21 dcache_lock: 621 623 0.52 118.26 1053.02 6745 91930 0.29 316.29 118423.41
22 -----------
23 dcache_lock 179 [<ffffffff80378274>] _atomic_dec_and_lock+0x34/0x54
24 dcache_lock 113 [<ffffffff802cc17b>] d_alloc+0x19a/0x1eb
25 dcache_lock 99 [<ffffffff802ca0dc>] d_rehash+0x1b/0x44
26 dcache_lock 104 [<ffffffff802cbca0>] d_instantiate+0x36/0x8a
27 -----------
28 dcache_lock 192 [<ffffffff80378274>] _atomic_dec_and_lock+0x34/0x54
29 dcache_lock 98 [<ffffffff802ca0dc>] d_rehash+0x1b/0x44
30 dcache_lock 72 [<ffffffff802cc17b>] d_alloc+0x19a/0x1eb
31 dcache_lock 112 [<ffffffff802cbca0>] d_instantiate+0x36/0x8a
This excerpt shows the first two lock class statistics. Line 01 shows the
output version - each time the format changes this will be updated. Line 02-04
show the header with column descriptions. Lines 05-10 and 13-18 show the actual
show the header with column descriptions. Lines 05-18 and 20-31 show the actual
statistics. These statistics come in two parts; the actual stats separated by a
short separator (line 08, 14) from the contention points.
short separator (line 08, 13) from the contention points.
The first lock (05-10) is a read/write lock, and shows two lines above the
The first lock (05-18) is a read/write lock, and shows two lines above the
short separator. The contention points don't match the column descriptors,
they have two: contentions and [<IP>] symbol.
they have two: contentions and [<IP>] symbol. The second set of contention
points are the points we're contending with.
The integer part of the time values is in us.
View the top contending locks:

Просмотреть файл

@ -51,11 +51,16 @@ to call) for the specific marker through marker_probe_register() and can be
activated by calling marker_arm(). Marker deactivation can be done by calling
marker_disarm() as many times as marker_arm() has been called. Removing a probe
is done through marker_probe_unregister(); it will disarm the probe.
marker_synchronize_unregister() must be called before the end of the module exit
function to make sure there is no caller left using the probe. This, and the
fact that preemption is disabled around the probe call, make sure that probe
removal and module unload are safe. See the "Probe example" section below for a
sample probe module.
marker_synchronize_unregister() must be called between probe unregistration and
the first occurrence of
- the end of module exit function,
to make sure there is no caller left using the probe;
- the free of any resource used by the probes,
to make sure the probes wont be accessing invalid data.
This, and the fact that preemption is disabled around the probe call, make sure
that probe removal and module unload are safe. See the "Probe example" section
below for a sample probe module.
The marker mechanism supports inserting multiple instances of the same marker.
Markers can be put in inline functions, inlined static functions, and
@ -70,6 +75,20 @@ a printk warning which identifies the inconsistency:
"Format mismatch for probe probe_name (format), marker (format)"
Another way to use markers is to simply define the marker without generating any
function call to actually call into the marker. This is useful in combination
with tracepoint probes in a scheme like this :
void probe_tracepoint_name(unsigned int arg1, struct task_struct *tsk);
DEFINE_MARKER_TP(marker_eventname, tracepoint_name, probe_tracepoint_name,
"arg1 %u pid %d");
notrace void probe_tracepoint_name(unsigned int arg1, struct task_struct *tsk)
{
struct marker *marker = &GET_MARKER(kernel_irq_entry);
/* write data to trace buffers ... */
}
* Probe / marker example

Просмотреть файл

@ -8,7 +8,7 @@ Context switch
By default, the switch_to arch function is called with the runqueue
locked. This is usually not a problem unless switch_to may need to
take the runqueue lock. This is usually due to a wake up operation in
the context switch. See include/asm-ia64/system.h for an example.
the context switch. See arch/ia64/include/asm/system.h for an example.
To request the scheduler call switch_to with the runqueue unlocked,
you must `#define __ARCH_WANT_UNLOCKED_CTXSW` in a header file
@ -23,7 +23,7 @@ disabled. Interrupts may be enabled over the call if it is likely to
introduce a significant interrupt latency by adding the line
`#define __ARCH_WANT_INTERRUPTS_ON_CTXSW` in the same place as for
unlocked context switches. This define also implies
`__ARCH_WANT_UNLOCKED_CTXSW`. See include/asm-arm/system.h for an
`__ARCH_WANT_UNLOCKED_CTXSW`. See arch/arm/include/asm/system.h for an
example.

Просмотреть файл

@ -3,28 +3,30 @@
Mathieu Desnoyers
This document introduces Linux Kernel Tracepoints and their use. It provides
examples of how to insert tracepoints in the kernel and connect probe functions
to them and provides some examples of probe functions.
This document introduces Linux Kernel Tracepoints and their use. It
provides examples of how to insert tracepoints in the kernel and
connect probe functions to them and provides some examples of probe
functions.
* Purpose of tracepoints
A tracepoint placed in code provides a hook to call a function (probe) that you
can provide at runtime. A tracepoint can be "on" (a probe is connected to it) or
"off" (no probe is attached). When a tracepoint is "off" it has no effect,
except for adding a tiny time penalty (checking a condition for a branch) and
space penalty (adding a few bytes for the function call at the end of the
instrumented function and adds a data structure in a separate section). When a
tracepoint is "on", the function you provide is called each time the tracepoint
is executed, in the execution context of the caller. When the function provided
ends its execution, it returns to the caller (continuing from the tracepoint
site).
A tracepoint placed in code provides a hook to call a function (probe)
that you can provide at runtime. A tracepoint can be "on" (a probe is
connected to it) or "off" (no probe is attached). When a tracepoint is
"off" it has no effect, except for adding a tiny time penalty
(checking a condition for a branch) and space penalty (adding a few
bytes for the function call at the end of the instrumented function
and adds a data structure in a separate section). When a tracepoint
is "on", the function you provide is called each time the tracepoint
is executed, in the execution context of the caller. When the function
provided ends its execution, it returns to the caller (continuing from
the tracepoint site).
You can put tracepoints at important locations in the code. They are
lightweight hooks that can pass an arbitrary number of parameters,
which prototypes are described in a tracepoint declaration placed in a header
file.
which prototypes are described in a tracepoint declaration placed in a
header file.
They can be used for tracing and performance accounting.
@ -42,14 +44,16 @@ In include/trace/subsys.h :
#include <linux/tracepoint.h>
DEFINE_TRACE(subsys_eventname,
TPPTOTO(int firstarg, struct task_struct *p),
DECLARE_TRACE(subsys_eventname,
TPPROTO(int firstarg, struct task_struct *p),
TPARGS(firstarg, p));
In subsys/file.c (where the tracing statement must be added) :
#include <trace/subsys.h>
DEFINE_TRACE(subsys_eventname);
void somefct(void)
{
...
@ -61,31 +65,41 @@ Where :
- subsys_eventname is an identifier unique to your event
- subsys is the name of your subsystem.
- eventname is the name of the event to trace.
- TPPTOTO(int firstarg, struct task_struct *p) is the prototype of the function
called by this tracepoint.
- TPARGS(firstarg, p) are the parameters names, same as found in the prototype.
Connecting a function (probe) to a tracepoint is done by providing a probe
(function to call) for the specific tracepoint through
- TPPROTO(int firstarg, struct task_struct *p) is the prototype of the
function called by this tracepoint.
- TPARGS(firstarg, p) are the parameters names, same as found in the
prototype.
Connecting a function (probe) to a tracepoint is done by providing a
probe (function to call) for the specific tracepoint through
register_trace_subsys_eventname(). Removing a probe is done through
unregister_trace_subsys_eventname(); it will remove the probe sure there is no
caller left using the probe when it returns. Probe removal is preempt-safe
because preemption is disabled around the probe call. See the "Probe example"
section below for a sample probe module.
unregister_trace_subsys_eventname(); it will remove the probe.
The tracepoint mechanism supports inserting multiple instances of the same
tracepoint, but a single definition must be made of a given tracepoint name over
all the kernel to make sure no type conflict will occur. Name mangling of the
tracepoints is done using the prototypes to make sure typing is correct.
Verification of probe type correctness is done at the registration site by the
compiler. Tracepoints can be put in inline functions, inlined static functions,
and unrolled loops as well as regular functions.
tracepoint_synchronize_unregister() must be called before the end of
the module exit function to make sure there is no caller left using
the probe. This, and the fact that preemption is disabled around the
probe call, make sure that probe removal and module unload are safe.
See the "Probe example" section below for a sample probe module.
The naming scheme "subsys_event" is suggested here as a convention intended
to limit collisions. Tracepoint names are global to the kernel: they are
considered as being the same whether they are in the core kernel image or in
modules.
The tracepoint mechanism supports inserting multiple instances of the
same tracepoint, but a single definition must be made of a given
tracepoint name over all the kernel to make sure no type conflict will
occur. Name mangling of the tracepoints is done using the prototypes
to make sure typing is correct. Verification of probe type correctness
is done at the registration site by the compiler. Tracepoints can be
put in inline functions, inlined static functions, and unrolled loops
as well as regular functions.
The naming scheme "subsys_event" is suggested here as a convention
intended to limit collisions. Tracepoint names are global to the
kernel: they are considered as being the same whether they are in the
core kernel image or in modules.
If the tracepoint has to be used in kernel modules, an
EXPORT_TRACEPOINT_SYMBOL_GPL() or EXPORT_TRACEPOINT_SYMBOL() can be
used to export the defined tracepoints.
* Probe / tracepoint example

Просмотреть файл

@ -99,7 +99,7 @@ config GENERIC_IOMAP
bool
default y
config SCHED_NO_NO_OMIT_FRAME_POINTER
config SCHED_OMIT_FRAME_POINTER
bool
default y

Просмотреть файл

@ -55,7 +55,6 @@
void build_cpu_to_node_map(void);
#define SD_CPU_INIT (struct sched_domain) { \
.span = CPU_MASK_NONE, \
.parent = NULL, \
.child = NULL, \
.groups = NULL, \
@ -80,7 +79,6 @@ void build_cpu_to_node_map(void);
/* sched_domains SD_NODE_INIT for IA64 NUMA machines */
#define SD_NODE_INIT (struct sched_domain) { \
.span = CPU_MASK_NONE, \
.parent = NULL, \
.child = NULL, \
.groups = NULL, \

Просмотреть файл

@ -274,7 +274,7 @@ config GENERIC_CALIBRATE_DELAY
bool
default y
config SCHED_NO_NO_OMIT_FRAME_POINTER
config SCHED_OMIT_FRAME_POINTER
bool
default y

Просмотреть файл

@ -653,7 +653,7 @@ config GENERIC_CMOS_UPDATE
bool
default y
config SCHED_NO_NO_OMIT_FRAME_POINTER
config SCHED_OMIT_FRAME_POINTER
bool
default y

Просмотреть файл

@ -37,7 +37,6 @@ extern unsigned char __node_distances[MAX_COMPACT_NODES][MAX_COMPACT_NODES];
/* sched_domains SD_NODE_INIT for SGI IP27 machines */
#define SD_NODE_INIT (struct sched_domain) { \
.span = CPU_MASK_NONE, \
.parent = NULL, \
.child = NULL, \
.groups = NULL, \

Просмотреть файл

@ -141,7 +141,7 @@ config GENERIC_NVRAM
bool
default y if PPC32
config SCHED_NO_NO_OMIT_FRAME_POINTER
config SCHED_OMIT_FRAME_POINTER
bool
default y

Просмотреть файл

@ -7,7 +7,19 @@
#ifndef __ASSEMBLY__
extern void _mcount(void);
#endif
#ifdef CONFIG_DYNAMIC_FTRACE
static inline unsigned long ftrace_call_adjust(unsigned long addr)
{
/* reloction of mcount call site is the same as the address */
return addr;
}
struct dyn_arch_ftrace {
struct module *mod;
};
#endif /* CONFIG_DYNAMIC_FTRACE */
#endif /* __ASSEMBLY__ */
#endif

Просмотреть файл

@ -34,11 +34,19 @@ struct mod_arch_specific {
#ifdef __powerpc64__
unsigned int stubs_section; /* Index of stubs section in module */
unsigned int toc_section; /* What section is the TOC? */
#else
#ifdef CONFIG_DYNAMIC_FTRACE
unsigned long toc;
unsigned long tramp;
#endif
#else /* powerpc64 */
/* Indices of PLT sections within module. */
unsigned int core_plt_section;
unsigned int init_plt_section;
#ifdef CONFIG_DYNAMIC_FTRACE
unsigned long tramp;
#endif
#endif /* powerpc64 */
/* List of BUG addresses, source line numbers and filenames */
struct list_head bug_list;
@ -68,6 +76,12 @@ struct mod_arch_specific {
# endif /* MODULE */
#endif
#ifdef CONFIG_DYNAMIC_FTRACE
# ifdef MODULE
asm(".section .ftrace.tramp,\"ax\",@nobits; .align 3; .previous");
# endif /* MODULE */
#endif
struct exception_table_entry;
void sort_ex_table(struct exception_table_entry *start,

Просмотреть файл

@ -48,7 +48,6 @@ static inline int pcibus_to_node(struct pci_bus *bus)
/* sched_domains SD_NODE_INIT for PPC64 machines */
#define SD_NODE_INIT (struct sched_domain) { \
.span = CPU_MASK_NONE, \
.parent = NULL, \
.child = NULL, \
.groups = NULL, \

Просмотреть файл

@ -17,6 +17,7 @@ ifdef CONFIG_FUNCTION_TRACER
CFLAGS_REMOVE_cputable.o = -pg -mno-sched-epilog
CFLAGS_REMOVE_prom_init.o = -pg -mno-sched-epilog
CFLAGS_REMOVE_btext.o = -pg -mno-sched-epilog
CFLAGS_REMOVE_prom.o = -pg -mno-sched-epilog
ifdef CONFIG_DYNAMIC_FTRACE
# dynamic ftrace setup.

Просмотреть файл

@ -1162,39 +1162,17 @@ machine_check_in_rtas:
#ifdef CONFIG_DYNAMIC_FTRACE
_GLOBAL(mcount)
_GLOBAL(_mcount)
stwu r1,-48(r1)
stw r3, 12(r1)
stw r4, 16(r1)
stw r5, 20(r1)
stw r6, 24(r1)
mflr r3
stw r7, 28(r1)
mfcr r5
stw r8, 32(r1)
stw r9, 36(r1)
stw r10,40(r1)
stw r3, 44(r1)
stw r5, 8(r1)
subi r3, r3, MCOUNT_INSN_SIZE
.globl mcount_call
mcount_call:
bl ftrace_stub
nop
lwz r6, 8(r1)
lwz r0, 44(r1)
lwz r3, 12(r1)
/*
* It is required that _mcount on PPC32 must preserve the
* link register. But we have r0 to play with. We use r0
* to push the return address back to the caller of mcount
* into the ctr register, restore the link register and
* then jump back using the ctr register.
*/
mflr r0
mtctr r0
lwz r4, 16(r1)
mtcr r6
lwz r5, 20(r1)
lwz r6, 24(r1)
lwz r0, 52(r1)
lwz r7, 28(r1)
lwz r8, 32(r1)
lwz r0, 4(r1)
mtlr r0
lwz r9, 36(r1)
lwz r10,40(r1)
addi r1, r1, 48
bctr
_GLOBAL(ftrace_caller)

Просмотреть файл

@ -894,18 +894,6 @@ _GLOBAL(enter_prom)
#ifdef CONFIG_DYNAMIC_FTRACE
_GLOBAL(mcount)
_GLOBAL(_mcount)
/* Taken from output of objdump from lib64/glibc */
mflr r3
stdu r1, -112(r1)
std r3, 128(r1)
subi r3, r3, MCOUNT_INSN_SIZE
.globl mcount_call
mcount_call:
bl ftrace_stub
nop
ld r0, 128(r1)
mtlr r0
addi r1, r1, 112
blr
_GLOBAL(ftrace_caller)

Просмотреть файл

@ -9,22 +9,30 @@
#include <linux/spinlock.h>
#include <linux/hardirq.h>
#include <linux/uaccess.h>
#include <linux/module.h>
#include <linux/ftrace.h>
#include <linux/percpu.h>
#include <linux/init.h>
#include <linux/list.h>
#include <asm/cacheflush.h>
#include <asm/code-patching.h>
#include <asm/ftrace.h>
#if 0
#define DEBUGP printk
#else
#define DEBUGP(fmt , ...) do { } while (0)
#endif
static unsigned int ftrace_nop = 0x60000000;
static unsigned int ftrace_nop = PPC_NOP_INSTR;
#ifdef CONFIG_PPC32
# define GET_ADDR(addr) addr
#else
/* PowerPC64's functions are data that points to the functions */
# define GET_ADDR(addr) *(unsigned long *)addr
# define GET_ADDR(addr) (*(unsigned long *)addr)
#endif
@ -33,12 +41,12 @@ static unsigned int ftrace_calc_offset(long ip, long addr)
return (int)(addr - ip);
}
unsigned char *ftrace_nop_replace(void)
static unsigned char *ftrace_nop_replace(void)
{
return (char *)&ftrace_nop;
}
unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
static unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
{
static unsigned int op;
@ -68,49 +76,422 @@ unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
# define _ASM_PTR " .long "
#endif
int
static int
ftrace_modify_code(unsigned long ip, unsigned char *old_code,
unsigned char *new_code)
{
unsigned replaced;
unsigned old = *(unsigned *)old_code;
unsigned new = *(unsigned *)new_code;
int faulted = 0;
unsigned char replaced[MCOUNT_INSN_SIZE];
/*
* Note: Due to modules and __init, code can
* disappear and change, we need to protect against faulting
* as well as code changing.
* as well as code changing. We do this by using the
* probe_kernel_* functions.
*
* No real locking needed, this code is run through
* kstop_machine.
* kstop_machine, or before SMP starts.
*/
asm volatile (
"1: lwz %1, 0(%2)\n"
" cmpw %1, %5\n"
" bne 2f\n"
" stwu %3, 0(%2)\n"
"2:\n"
".section .fixup, \"ax\"\n"
"3: li %0, 1\n"
" b 2b\n"
".previous\n"
".section __ex_table,\"a\"\n"
_ASM_ALIGN "\n"
_ASM_PTR "1b, 3b\n"
".previous"
: "=r"(faulted), "=r"(replaced)
: "r"(ip), "r"(new),
"0"(faulted), "r"(old)
: "memory");
if (replaced != old && replaced != new)
faulted = 2;
/* read the text we want to modify */
if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE))
return -EFAULT;
if (!faulted)
flush_icache_range(ip, ip + 8);
/* Make sure it is what we expect it to be */
if (memcmp(replaced, old_code, MCOUNT_INSN_SIZE) != 0)
return -EINVAL;
return faulted;
/* replace the text with the new text */
if (probe_kernel_write((void *)ip, new_code, MCOUNT_INSN_SIZE))
return -EPERM;
flush_icache_range(ip, ip + 8);
return 0;
}
/*
* Helper functions that are the same for both PPC64 and PPC32.
*/
static int test_24bit_addr(unsigned long ip, unsigned long addr)
{
/* use the create_branch to verify that this offset can be branched */
return create_branch((unsigned int *)ip, addr, 0);
}
static int is_bl_op(unsigned int op)
{
return (op & 0xfc000003) == 0x48000001;
}
static unsigned long find_bl_target(unsigned long ip, unsigned int op)
{
static int offset;
offset = (op & 0x03fffffc);
/* make it signed */
if (offset & 0x02000000)
offset |= 0xfe000000;
return ip + (long)offset;
}
#ifdef CONFIG_PPC64
static int
__ftrace_make_nop(struct module *mod,
struct dyn_ftrace *rec, unsigned long addr)
{
unsigned int op;
unsigned int jmp[5];
unsigned long ptr;
unsigned long ip = rec->ip;
unsigned long tramp;
int offset;
/* read where this goes */
if (probe_kernel_read(&op, (void *)ip, sizeof(int)))
return -EFAULT;
/* Make sure that that this is still a 24bit jump */
if (!is_bl_op(op)) {
printk(KERN_ERR "Not expected bl: opcode is %x\n", op);
return -EINVAL;
}
/* lets find where the pointer goes */
tramp = find_bl_target(ip, op);
/*
* On PPC64 the trampoline looks like:
* 0x3d, 0x82, 0x00, 0x00, addis r12,r2, <high>
* 0x39, 0x8c, 0x00, 0x00, addi r12,r12, <low>
* Where the bytes 2,3,6 and 7 make up the 32bit offset
* to the TOC that holds the pointer.
* to jump to.
* 0xf8, 0x41, 0x00, 0x28, std r2,40(r1)
* 0xe9, 0x6c, 0x00, 0x20, ld r11,32(r12)
* The actually address is 32 bytes from the offset
* into the TOC.
* 0xe8, 0x4c, 0x00, 0x28, ld r2,40(r12)
*/
DEBUGP("ip:%lx jumps to %lx r2: %lx", ip, tramp, mod->arch.toc);
/* Find where the trampoline jumps to */
if (probe_kernel_read(jmp, (void *)tramp, sizeof(jmp))) {
printk(KERN_ERR "Failed to read %lx\n", tramp);
return -EFAULT;
}
DEBUGP(" %08x %08x", jmp[0], jmp[1]);
/* verify that this is what we expect it to be */
if (((jmp[0] & 0xffff0000) != 0x3d820000) ||
((jmp[1] & 0xffff0000) != 0x398c0000) ||
(jmp[2] != 0xf8410028) ||
(jmp[3] != 0xe96c0020) ||
(jmp[4] != 0xe84c0028)) {
printk(KERN_ERR "Not a trampoline\n");
return -EINVAL;
}
offset = (unsigned)((unsigned short)jmp[0]) << 16 |
(unsigned)((unsigned short)jmp[1]);
DEBUGP(" %x ", offset);
/* get the address this jumps too */
tramp = mod->arch.toc + offset + 32;
DEBUGP("toc: %lx", tramp);
if (probe_kernel_read(jmp, (void *)tramp, 8)) {
printk(KERN_ERR "Failed to read %lx\n", tramp);
return -EFAULT;
}
DEBUGP(" %08x %08x\n", jmp[0], jmp[1]);
ptr = ((unsigned long)jmp[0] << 32) + jmp[1];
/* This should match what was called */
if (ptr != GET_ADDR(addr)) {
printk(KERN_ERR "addr does not match %lx\n", ptr);
return -EINVAL;
}
/*
* We want to nop the line, but the next line is
* 0xe8, 0x41, 0x00, 0x28 ld r2,40(r1)
* This needs to be turned to a nop too.
*/
if (probe_kernel_read(&op, (void *)(ip+4), MCOUNT_INSN_SIZE))
return -EFAULT;
if (op != 0xe8410028) {
printk(KERN_ERR "Next line is not ld! (%08x)\n", op);
return -EINVAL;
}
/*
* Milton Miller pointed out that we can not blindly do nops.
* If a task was preempted when calling a trace function,
* the nops will remove the way to restore the TOC in r2
* and the r2 TOC will get corrupted.
*/
/*
* Replace:
* bl <tramp> <==== will be replaced with "b 1f"
* ld r2,40(r1)
* 1:
*/
op = 0x48000008; /* b +8 */
if (probe_kernel_write((void *)ip, &op, MCOUNT_INSN_SIZE))
return -EPERM;
flush_icache_range(ip, ip + 8);
return 0;
}
#else /* !PPC64 */
static int
__ftrace_make_nop(struct module *mod,
struct dyn_ftrace *rec, unsigned long addr)
{
unsigned int op;
unsigned int jmp[4];
unsigned long ip = rec->ip;
unsigned long tramp;
if (probe_kernel_read(&op, (void *)ip, MCOUNT_INSN_SIZE))
return -EFAULT;
/* Make sure that that this is still a 24bit jump */
if (!is_bl_op(op)) {
printk(KERN_ERR "Not expected bl: opcode is %x\n", op);
return -EINVAL;
}
/* lets find where the pointer goes */
tramp = find_bl_target(ip, op);
/*
* On PPC32 the trampoline looks like:
* 0x3d, 0x60, 0x00, 0x00 lis r11,sym@ha
* 0x39, 0x6b, 0x00, 0x00 addi r11,r11,sym@l
* 0x7d, 0x69, 0x03, 0xa6 mtctr r11
* 0x4e, 0x80, 0x04, 0x20 bctr
*/
DEBUGP("ip:%lx jumps to %lx", ip, tramp);
/* Find where the trampoline jumps to */
if (probe_kernel_read(jmp, (void *)tramp, sizeof(jmp))) {
printk(KERN_ERR "Failed to read %lx\n", tramp);
return -EFAULT;
}
DEBUGP(" %08x %08x ", jmp[0], jmp[1]);
/* verify that this is what we expect it to be */
if (((jmp[0] & 0xffff0000) != 0x3d600000) ||
((jmp[1] & 0xffff0000) != 0x396b0000) ||
(jmp[2] != 0x7d6903a6) ||
(jmp[3] != 0x4e800420)) {
printk(KERN_ERR "Not a trampoline\n");
return -EINVAL;
}
tramp = (jmp[1] & 0xffff) |
((jmp[0] & 0xffff) << 16);
if (tramp & 0x8000)
tramp -= 0x10000;
DEBUGP(" %x ", tramp);
if (tramp != addr) {
printk(KERN_ERR
"Trampoline location %08lx does not match addr\n",
tramp);
return -EINVAL;
}
op = PPC_NOP_INSTR;
if (probe_kernel_write((void *)ip, &op, MCOUNT_INSN_SIZE))
return -EPERM;
flush_icache_range(ip, ip + 8);
return 0;
}
#endif /* PPC64 */
int ftrace_make_nop(struct module *mod,
struct dyn_ftrace *rec, unsigned long addr)
{
unsigned char *old, *new;
unsigned long ip = rec->ip;
/*
* If the calling address is more that 24 bits away,
* then we had to use a trampoline to make the call.
* Otherwise just update the call site.
*/
if (test_24bit_addr(ip, addr)) {
/* within range */
old = ftrace_call_replace(ip, addr);
new = ftrace_nop_replace();
return ftrace_modify_code(ip, old, new);
}
/*
* Out of range jumps are called from modules.
* We should either already have a pointer to the module
* or it has been passed in.
*/
if (!rec->arch.mod) {
if (!mod) {
printk(KERN_ERR "No module loaded addr=%lx\n",
addr);
return -EFAULT;
}
rec->arch.mod = mod;
} else if (mod) {
if (mod != rec->arch.mod) {
printk(KERN_ERR
"Record mod %p not equal to passed in mod %p\n",
rec->arch.mod, mod);
return -EINVAL;
}
/* nothing to do if mod == rec->arch.mod */
} else
mod = rec->arch.mod;
return __ftrace_make_nop(mod, rec, addr);
}
#ifdef CONFIG_PPC64
static int
__ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
{
unsigned int op[2];
unsigned long ip = rec->ip;
/* read where this goes */
if (probe_kernel_read(op, (void *)ip, MCOUNT_INSN_SIZE * 2))
return -EFAULT;
/*
* It should be pointing to two nops or
* b +8; ld r2,40(r1)
*/
if (((op[0] != 0x48000008) || (op[1] != 0xe8410028)) &&
((op[0] != PPC_NOP_INSTR) || (op[1] != PPC_NOP_INSTR))) {
printk(KERN_ERR "Expected NOPs but have %x %x\n", op[0], op[1]);
return -EINVAL;
}
/* If we never set up a trampoline to ftrace_caller, then bail */
if (!rec->arch.mod->arch.tramp) {
printk(KERN_ERR "No ftrace trampoline\n");
return -EINVAL;
}
/* create the branch to the trampoline */
op[0] = create_branch((unsigned int *)ip,
rec->arch.mod->arch.tramp, BRANCH_SET_LINK);
if (!op[0]) {
printk(KERN_ERR "REL24 out of range!\n");
return -EINVAL;
}
/* ld r2,40(r1) */
op[1] = 0xe8410028;
DEBUGP("write to %lx\n", rec->ip);
if (probe_kernel_write((void *)ip, op, MCOUNT_INSN_SIZE * 2))
return -EPERM;
flush_icache_range(ip, ip + 8);
return 0;
}
#else
static int
__ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
{
unsigned int op;
unsigned long ip = rec->ip;
/* read where this goes */
if (probe_kernel_read(&op, (void *)ip, MCOUNT_INSN_SIZE))
return -EFAULT;
/* It should be pointing to a nop */
if (op != PPC_NOP_INSTR) {
printk(KERN_ERR "Expected NOP but have %x\n", op);
return -EINVAL;
}
/* If we never set up a trampoline to ftrace_caller, then bail */
if (!rec->arch.mod->arch.tramp) {
printk(KERN_ERR "No ftrace trampoline\n");
return -EINVAL;
}
/* create the branch to the trampoline */
op = create_branch((unsigned int *)ip,
rec->arch.mod->arch.tramp, BRANCH_SET_LINK);
if (!op) {
printk(KERN_ERR "REL24 out of range!\n");
return -EINVAL;
}
DEBUGP("write to %lx\n", rec->ip);
if (probe_kernel_write((void *)ip, &op, MCOUNT_INSN_SIZE))
return -EPERM;
flush_icache_range(ip, ip + 8);
return 0;
}
#endif /* CONFIG_PPC64 */
int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
{
unsigned char *old, *new;
unsigned long ip = rec->ip;
/*
* If the calling address is more that 24 bits away,
* then we had to use a trampoline to make the call.
* Otherwise just update the call site.
*/
if (test_24bit_addr(ip, addr)) {
/* within range */
old = ftrace_nop_replace();
new = ftrace_call_replace(ip, addr);
return ftrace_modify_code(ip, old, new);
}
/*
* Out of range jumps are called from modules.
* Being that we are converting from nop, it had better
* already have a module defined.
*/
if (!rec->arch.mod) {
printk(KERN_ERR "No module loaded\n");
return -EINVAL;
}
return __ftrace_make_call(rec, addr);
}
int ftrace_update_ftrace_func(ftrace_func_t func)
@ -128,10 +509,10 @@ int ftrace_update_ftrace_func(ftrace_func_t func)
int __init ftrace_dyn_arch_init(void *data)
{
/* This is running in kstop_machine */
/* caller expects data to be zero */
unsigned long *p = data;
ftrace_mcount_set(data);
*p = 0;
return 0;
}

Просмотреть файл

@ -69,10 +69,15 @@ void cpu_idle(void)
smp_mb();
local_irq_disable();
/* Don't trace irqs off for idle */
stop_critical_timings();
/* check again after disabling irqs */
if (!need_resched() && !cpu_should_die())
ppc_md.power_save();
start_critical_timings();
local_irq_enable();
set_thread_flag(TIF_POLLING_NRFLAG);

Просмотреть файл

@ -22,6 +22,7 @@
#include <linux/fs.h>
#include <linux/string.h>
#include <linux/kernel.h>
#include <linux/ftrace.h>
#include <linux/cache.h>
#include <linux/bug.h>
#include <linux/sort.h>
@ -53,6 +54,9 @@ static unsigned int count_relocs(const Elf32_Rela *rela, unsigned int num)
r_addend = rela[i].r_addend;
}
#ifdef CONFIG_DYNAMIC_FTRACE
_count_relocs++; /* add one for ftrace_caller */
#endif
return _count_relocs;
}
@ -306,5 +310,11 @@ int apply_relocate_add(Elf32_Shdr *sechdrs,
return -ENOEXEC;
}
}
#ifdef CONFIG_DYNAMIC_FTRACE
module->arch.tramp =
do_plt_call(module->module_core,
(unsigned long)ftrace_caller,
sechdrs, module);
#endif
return 0;
}

Просмотреть файл

@ -20,6 +20,7 @@
#include <linux/moduleloader.h>
#include <linux/err.h>
#include <linux/vmalloc.h>
#include <linux/ftrace.h>
#include <linux/bug.h>
#include <asm/module.h>
#include <asm/firmware.h>
@ -163,6 +164,11 @@ static unsigned long get_stubs_size(const Elf64_Ehdr *hdr,
}
}
#ifdef CONFIG_DYNAMIC_FTRACE
/* make the trampoline to the ftrace_caller */
relocs++;
#endif
DEBUGP("Looks like a total of %lu stubs, max\n", relocs);
return relocs * sizeof(struct ppc64_stub_entry);
}
@ -441,5 +447,12 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
}
}
#ifdef CONFIG_DYNAMIC_FTRACE
me->arch.toc = my_r2(sechdrs, me);
me->arch.tramp = stub_for_addr(sechdrs,
(unsigned long)ftrace_caller,
me);
#endif
return 0;
}

Просмотреть файл

@ -6,6 +6,9 @@ ifeq ($(CONFIG_PPC64),y)
EXTRA_CFLAGS += -mno-minimal-toc
endif
CFLAGS_REMOVE_code-patching.o = -pg
CFLAGS_REMOVE_feature-fixups.o = -pg
obj-y := string.o alloc.o \
checksum_$(CONFIG_WORD_SIZE).o
obj-$(CONFIG_PPC32) += div64.o copy_32.o crtsavres.o

Просмотреть файл

@ -212,7 +212,7 @@ static void update_cpu_core_map(void)
cpu_core_map[cpu] = cpu_coregroup_map(cpu);
}
void arch_update_cpu_topology(void)
int arch_update_cpu_topology(void)
{
struct tl_info *info = tl_info;
struct sys_device *sysdev;
@ -221,7 +221,7 @@ void arch_update_cpu_topology(void)
if (!machine_has_topology) {
update_cpu_core_map();
topology_update_polarization_simple();
return;
return 0;
}
stsi(info, 15, 1, 2);
tl_to_cores(info);
@ -230,6 +230,7 @@ void arch_update_cpu_topology(void)
sysdev = get_cpu_sysdev(cpu);
kobject_uevent(&sysdev->kobj, KOBJ_CHANGE);
}
return 1;
}
static void topology_work_fn(struct work_struct *work)

Просмотреть файл

@ -5,7 +5,6 @@
/* sched_domains SD_NODE_INIT for sh machines */
#define SD_NODE_INIT (struct sched_domain) { \
.span = CPU_MASK_NONE, \
.parent = NULL, \
.child = NULL, \
.groups = NULL, \

Просмотреть файл

@ -11,21 +11,21 @@ extern int get_signals(void);
extern void block_signals(void);
extern void unblock_signals(void);
#define local_save_flags(flags) do { typecheck(unsigned long, flags); \
#define raw_local_save_flags(flags) do { typecheck(unsigned long, flags); \
(flags) = get_signals(); } while(0)
#define local_irq_restore(flags) do { typecheck(unsigned long, flags); \
#define raw_local_irq_restore(flags) do { typecheck(unsigned long, flags); \
set_signals(flags); } while(0)
#define local_irq_save(flags) do { local_save_flags(flags); \
local_irq_disable(); } while(0)
#define raw_local_irq_save(flags) do { raw_local_save_flags(flags); \
raw_local_irq_disable(); } while(0)
#define local_irq_enable() unblock_signals()
#define local_irq_disable() block_signals()
#define raw_local_irq_enable() unblock_signals()
#define raw_local_irq_disable() block_signals()
#define irqs_disabled() \
({ \
unsigned long flags; \
local_save_flags(flags); \
raw_local_save_flags(flags); \
(flags == 0); \
})

Просмотреть файл

@ -29,11 +29,14 @@ config X86
select HAVE_FTRACE_MCOUNT_RECORD
select HAVE_DYNAMIC_FTRACE
select HAVE_FUNCTION_TRACER
select HAVE_FUNCTION_GRAPH_TRACER
select HAVE_FUNCTION_TRACE_MCOUNT_TEST
select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64)
select HAVE_ARCH_KGDB if !X86_VOYAGER
select HAVE_ARCH_TRACEHOOK
select HAVE_GENERIC_DMA_COHERENT if X86_32
select HAVE_EFFICIENT_UNALIGNED_ACCESS
select USER_STACKTRACE_SUPPORT
config ARCH_DEFCONFIG
string
@ -238,6 +241,16 @@ config X86_HAS_BOOT_CPU_ID
def_bool y
depends on X86_VOYAGER
config SPARSE_IRQ
bool "Support sparse irq numbering"
depends on (PCI_MSI || HT_IRQ) && SMP
default y
help
This enables support for sparse irq, esp for msi/msi-x. You may need
if you have lots of cards supports msi-x installed.
If you don't know what to do here, say Y.
config X86_FIND_SMP_CONFIG
def_bool y
depends on X86_MPPARSE || X86_VOYAGER
@ -367,10 +380,10 @@ config X86_RDC321X
as R-8610-(G).
If you don't have one of these chips, you should say N here.
config SCHED_NO_NO_OMIT_FRAME_POINTER
config SCHED_OMIT_FRAME_POINTER
def_bool y
prompt "Single-depth WCHAN output"
depends on X86_32
depends on X86
help
Calculate simpler /proc/<PID>/wchan values. If this option
is disabled then wchan values will recurse back to the
@ -465,10 +478,6 @@ config X86_CYCLONE_TIMER
def_bool y
depends on X86_GENERICARCH
config ES7000_CLUSTERED_APIC
def_bool y
depends on SMP && X86_ES7000 && MPENTIUMIII
source "arch/x86/Kconfig.cpu"
config HPET_TIMER
@ -1632,13 +1641,6 @@ config APM_ALLOW_INTS
many of the newer IBM Thinkpads. If you experience hangs when you
suspend, try setting this to Y. Otherwise, say N.
config APM_REAL_MODE_POWER_OFF
bool "Use real mode APM BIOS call to power off"
help
Use real mode APM BIOS calls to switch off the computer. This is
a work-around for a number of buggy BIOSes. Switch this option on if
your computer crashes instead of powering off properly.
endif # APM
source "arch/x86/kernel/cpu/cpufreq/Kconfig"

Просмотреть файл

@ -515,6 +515,7 @@ config CPU_SUP_UMC_32
config X86_DS
def_bool X86_PTRACE_BTS
depends on X86_DEBUGCTLMSR
select HAVE_HW_BRANCH_TRACER
config X86_PTRACE_BTS
bool "Branch Trace Store"

Просмотреть файл

@ -186,14 +186,10 @@ config IOMMU_LEAK
Add a simple leak tracer to the IOMMU code. This is useful when you
are debugging a buggy device driver that leaks IOMMU mappings.
config MMIOTRACE_HOOKS
bool
config MMIOTRACE
bool "Memory mapped IO tracing"
depends on DEBUG_KERNEL && PCI
select TRACING
select MMIOTRACE_HOOKS
help
Mmiotrace traces Memory Mapped I/O access and is meant for
debugging and reverse engineering. It is called from the ioremap

Просмотреть файл

@ -193,6 +193,7 @@ extern u8 setup_APIC_eilvt_ibs(u8 vector, u8 msg_type, u8 mask);
static inline void lapic_shutdown(void) { }
#define local_apic_timer_c2_ok 1
static inline void init_apic_mappings(void) { }
static inline void disable_local_APIC(void) { }
#endif /* !CONFIG_X86_LOCAL_APIC */

Просмотреть файл

@ -24,8 +24,6 @@ static inline cpumask_t target_cpus(void)
#define INT_DELIVERY_MODE (dest_Fixed)
#define INT_DEST_MODE (0) /* phys delivery to target proc */
#define NO_BALANCE_IRQ (0)
#define WAKE_SECONDARY_VIA_INIT
static inline unsigned long check_apicid_used(physid_mask_t bitmap, int apicid)
{

Просмотреть файл

@ -7,13 +7,12 @@
*
* It manages:
* - per-thread and per-cpu allocation of BTS and PEBS
* - buffer memory allocation (optional)
* - buffer overflow handling
* - buffer overflow handling (to be done)
* - buffer access
*
* It assumes:
* - get_task_struct on all parameter tasks
* - current is allowed to trace parameter tasks
* - get_task_struct on all traced tasks
* - current is allowed to trace tasks
*
*
* Copyright (C) 2007-2008 Intel Corporation.
@ -26,11 +25,18 @@
#include <linux/types.h>
#include <linux/init.h>
#include <linux/err.h>
#ifdef CONFIG_X86_DS
struct task_struct;
struct ds_tracer;
struct bts_tracer;
struct pebs_tracer;
typedef void (*bts_ovfl_callback_t)(struct bts_tracer *);
typedef void (*pebs_ovfl_callback_t)(struct pebs_tracer *);
/*
* Request BTS or PEBS
@ -38,60 +44,62 @@ struct task_struct;
* Due to alignement constraints, the actual buffer may be slightly
* smaller than the requested or provided buffer.
*
* Returns 0 on success; -Eerrno otherwise
* Returns a pointer to a tracer structure on success, or
* ERR_PTR(errcode) on failure.
*
* The interrupt threshold is independent from the overflow callback
* to allow users to use their own overflow interrupt handling mechanism.
*
* task: the task to request recording for;
* NULL for per-cpu recording on the current cpu
* base: the base pointer for the (non-pageable) buffer;
* NULL if buffer allocation requested
* size: the size of the requested or provided buffer
* size: the size of the provided buffer in bytes
* ovfl: pointer to a function to be called on buffer overflow;
* NULL if cyclic buffer requested
* th: the interrupt threshold in records from the end of the buffer;
* -1 if no interrupt threshold is requested.
*/
typedef void (*ds_ovfl_callback_t)(struct task_struct *);
extern int ds_request_bts(struct task_struct *task, void *base, size_t size,
ds_ovfl_callback_t ovfl);
extern int ds_request_pebs(struct task_struct *task, void *base, size_t size,
ds_ovfl_callback_t ovfl);
extern struct bts_tracer *ds_request_bts(struct task_struct *task,
void *base, size_t size,
bts_ovfl_callback_t ovfl, size_t th);
extern struct pebs_tracer *ds_request_pebs(struct task_struct *task,
void *base, size_t size,
pebs_ovfl_callback_t ovfl,
size_t th);
/*
* Release BTS or PEBS resources
*
* Frees buffers allocated on ds_request.
*
* Returns 0 on success; -Eerrno otherwise
*
* task: the task to release resources for;
* NULL to release resources for the current cpu
* tracer: the tracer handle returned from ds_request_~()
*/
extern int ds_release_bts(struct task_struct *task);
extern int ds_release_pebs(struct task_struct *task);
extern int ds_release_bts(struct bts_tracer *tracer);
extern int ds_release_pebs(struct pebs_tracer *tracer);
/*
* Return the (array) index of the write pointer.
* Get the (array) index of the write pointer.
* (assuming an array of BTS/PEBS records)
*
* Returns -Eerrno on error
* Returns 0 on success; -Eerrno on error
*
* task: the task to access;
* NULL to access the current cpu
* pos (out): if not NULL, will hold the result
* tracer: the tracer handle returned from ds_request_~()
* pos (out): will hold the result
*/
extern int ds_get_bts_index(struct task_struct *task, size_t *pos);
extern int ds_get_pebs_index(struct task_struct *task, size_t *pos);
extern int ds_get_bts_index(struct bts_tracer *tracer, size_t *pos);
extern int ds_get_pebs_index(struct pebs_tracer *tracer, size_t *pos);
/*
* Return the (array) index one record beyond the end of the array.
* Get the (array) index one record beyond the end of the array.
* (assuming an array of BTS/PEBS records)
*
* Returns -Eerrno on error
* Returns 0 on success; -Eerrno on error
*
* task: the task to access;
* NULL to access the current cpu
* pos (out): if not NULL, will hold the result
* tracer: the tracer handle returned from ds_request_~()
* pos (out): will hold the result
*/
extern int ds_get_bts_end(struct task_struct *task, size_t *pos);
extern int ds_get_pebs_end(struct task_struct *task, size_t *pos);
extern int ds_get_bts_end(struct bts_tracer *tracer, size_t *pos);
extern int ds_get_pebs_end(struct pebs_tracer *tracer, size_t *pos);
/*
* Provide a pointer to the BTS/PEBS record at parameter index.
@ -102,14 +110,13 @@ extern int ds_get_pebs_end(struct task_struct *task, size_t *pos);
*
* Returns the size of a single record on success; -Eerrno on error
*
* task: the task to access;
* NULL to access the current cpu
* tracer: the tracer handle returned from ds_request_~()
* index: the index of the requested record
* record (out): pointer to the requested record
*/
extern int ds_access_bts(struct task_struct *task,
extern int ds_access_bts(struct bts_tracer *tracer,
size_t index, const void **record);
extern int ds_access_pebs(struct task_struct *task,
extern int ds_access_pebs(struct pebs_tracer *tracer,
size_t index, const void **record);
/*
@ -129,38 +136,24 @@ extern int ds_access_pebs(struct task_struct *task,
*
* Returns the number of bytes written or -Eerrno.
*
* task: the task to access;
* NULL to access the current cpu
* tracer: the tracer handle returned from ds_request_~()
* buffer: the buffer to write
* size: the size of the buffer
*/
extern int ds_write_bts(struct task_struct *task,
extern int ds_write_bts(struct bts_tracer *tracer,
const void *buffer, size_t size);
extern int ds_write_pebs(struct task_struct *task,
extern int ds_write_pebs(struct pebs_tracer *tracer,
const void *buffer, size_t size);
/*
* Same as ds_write_bts/pebs, but omit ownership checks.
*
* This is needed to have some other task than the owner of the
* BTS/PEBS buffer or the parameter task itself write into the
* respective buffer.
*/
extern int ds_unchecked_write_bts(struct task_struct *task,
const void *buffer, size_t size);
extern int ds_unchecked_write_pebs(struct task_struct *task,
const void *buffer, size_t size);
/*
* Reset the write pointer of the BTS/PEBS buffer.
*
* Returns 0 on success; -Eerrno on error
*
* task: the task to access;
* NULL to access the current cpu
* tracer: the tracer handle returned from ds_request_~()
*/
extern int ds_reset_bts(struct task_struct *task);
extern int ds_reset_pebs(struct task_struct *task);
extern int ds_reset_bts(struct bts_tracer *tracer);
extern int ds_reset_pebs(struct pebs_tracer *tracer);
/*
* Clear the BTS/PEBS buffer and reset the write pointer.
@ -168,33 +161,30 @@ extern int ds_reset_pebs(struct task_struct *task);
*
* Returns 0 on success; -Eerrno on error
*
* task: the task to access;
* NULL to access the current cpu
* tracer: the tracer handle returned from ds_request_~()
*/
extern int ds_clear_bts(struct task_struct *task);
extern int ds_clear_pebs(struct task_struct *task);
extern int ds_clear_bts(struct bts_tracer *tracer);
extern int ds_clear_pebs(struct pebs_tracer *tracer);
/*
* Provide the PEBS counter reset value.
*
* Returns 0 on success; -Eerrno on error
*
* task: the task to access;
* NULL to access the current cpu
* tracer: the tracer handle returned from ds_request_pebs()
* value (out): the counter reset value
*/
extern int ds_get_pebs_reset(struct task_struct *task, u64 *value);
extern int ds_get_pebs_reset(struct pebs_tracer *tracer, u64 *value);
/*
* Set the PEBS counter reset value.
*
* Returns 0 on success; -Eerrno on error
*
* task: the task to access;
* NULL to access the current cpu
* tracer: the tracer handle returned from ds_request_pebs()
* value: the new counter reset value
*/
extern int ds_set_pebs_reset(struct task_struct *task, u64 value);
extern int ds_set_pebs_reset(struct pebs_tracer *tracer, u64 value);
/*
* Initialization
@ -207,17 +197,13 @@ extern void __cpuinit ds_init_intel(struct cpuinfo_x86 *);
/*
* The DS context - part of struct thread_struct.
*/
#define MAX_SIZEOF_DS (12 * 8)
struct ds_context {
/* pointer to the DS configuration; goes into MSR_IA32_DS_AREA */
unsigned char *ds;
unsigned char ds[MAX_SIZEOF_DS];
/* the owner of the BTS and PEBS configuration, respectively */
struct task_struct *owner[2];
/* buffer overflow notification function for BTS and PEBS */
ds_ovfl_callback_t callback[2];
/* the original buffer address */
void *buffer[2];
/* the number of allocated pages for on-request allocated buffers */
unsigned int pages[2];
struct ds_tracer *owner[2];
/* use count */
unsigned long count;
/* a pointer to the context location inside the thread_struct

Просмотреть файл

@ -8,7 +8,9 @@ enum reboot_type {
BOOT_BIOS = 'b',
#endif
BOOT_ACPI = 'a',
BOOT_EFI = 'e'
BOOT_EFI = 'e',
BOOT_CF9 = 'p',
BOOT_CF9_COND = 'q',
};
extern enum reboot_type reboot_type;

Просмотреть файл

@ -9,31 +9,27 @@ static inline int apic_id_registered(void)
return (1);
}
static inline cpumask_t target_cpus(void)
static inline cpumask_t target_cpus_cluster(void)
{
#if defined CONFIG_ES7000_CLUSTERED_APIC
return CPU_MASK_ALL;
#else
return cpumask_of_cpu(smp_processor_id());
#endif
}
#if defined CONFIG_ES7000_CLUSTERED_APIC
#define APIC_DFR_VALUE (APIC_DFR_CLUSTER)
#define INT_DELIVERY_MODE (dest_LowestPrio)
#define INT_DEST_MODE (1) /* logical delivery broadcast to all procs */
#define NO_BALANCE_IRQ (1)
#undef WAKE_SECONDARY_VIA_INIT
#define WAKE_SECONDARY_VIA_MIP
#else
static inline cpumask_t target_cpus(void)
{
return cpumask_of_cpu(smp_processor_id());
}
#define APIC_DFR_VALUE_CLUSTER (APIC_DFR_CLUSTER)
#define INT_DELIVERY_MODE_CLUSTER (dest_LowestPrio)
#define INT_DEST_MODE_CLUSTER (1) /* logical delivery broadcast to all procs */
#define NO_BALANCE_IRQ_CLUSTER (1)
#define APIC_DFR_VALUE (APIC_DFR_FLAT)
#define INT_DELIVERY_MODE (dest_Fixed)
#define INT_DEST_MODE (0) /* phys delivery to target procs */
#define NO_BALANCE_IRQ (0)
#undef APIC_DEST_LOGICAL
#define APIC_DEST_LOGICAL 0x0
#define WAKE_SECONDARY_VIA_INIT
#endif
static inline unsigned long check_apicid_used(physid_mask_t bitmap, int apicid)
{
@ -60,6 +56,16 @@ static inline unsigned long calculate_ldr(int cpu)
* an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel
* document number 292116). So here it goes...
*/
static inline void init_apic_ldr_cluster(void)
{
unsigned long val;
int cpu = smp_processor_id();
apic_write(APIC_DFR, APIC_DFR_VALUE_CLUSTER);
val = calculate_ldr(cpu);
apic_write(APIC_LDR, val);
}
static inline void init_apic_ldr(void)
{
unsigned long val;
@ -70,10 +76,6 @@ static inline void init_apic_ldr(void)
apic_write(APIC_LDR, val);
}
#ifndef CONFIG_X86_GENERICARCH
extern void enable_apic_mode(void);
#endif
extern int apic_version [MAX_APICS];
static inline void setup_apic_routing(void)
{
@ -144,7 +146,7 @@ static inline int check_phys_apicid_present(int cpu_physical_apicid)
return (1);
}
static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask)
static inline unsigned int cpu_mask_to_apicid_cluster(cpumask_t cpumask)
{
int num_bits_set;
int cpus_found = 0;
@ -154,11 +156,7 @@ static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask)
num_bits_set = cpus_weight(cpumask);
/* Return id to all */
if (num_bits_set == NR_CPUS)
#if defined CONFIG_ES7000_CLUSTERED_APIC
return 0xFF;
#else
return cpu_to_logical_apicid(0);
#endif
/*
* The cpus in the mask must all be on the apic cluster. If are not
* on the same apicid cluster return default value of TARGET_CPUS.
@ -171,11 +169,40 @@ static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask)
if (apicid_cluster(apicid) !=
apicid_cluster(new_apicid)){
printk ("%s: Not a valid mask!\n", __func__);
#if defined CONFIG_ES7000_CLUSTERED_APIC
return 0xFF;
#else
}
apicid = new_apicid;
cpus_found++;
}
cpu++;
}
return apicid;
}
static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask)
{
int num_bits_set;
int cpus_found = 0;
int cpu;
int apicid;
num_bits_set = cpus_weight(cpumask);
/* Return id to all */
if (num_bits_set == NR_CPUS)
return cpu_to_logical_apicid(0);
/*
* The cpus in the mask must all be on the apic cluster. If are not
* on the same apicid cluster return default value of TARGET_CPUS.
*/
cpu = first_cpu(cpumask);
apicid = cpu_to_logical_apicid(cpu);
while (cpus_found < num_bits_set) {
if (cpu_isset(cpu, cpumask)) {
int new_apicid = cpu_to_logical_apicid(cpu);
if (apicid_cluster(apicid) !=
apicid_cluster(new_apicid)){
printk ("%s: Not a valid mask!\n", __func__);
return cpu_to_logical_apicid(0);
#endif
}
apicid = new_apicid;
cpus_found++;

Просмотреть файл

@ -1,36 +1,12 @@
#ifndef __ASM_ES7000_WAKECPU_H
#define __ASM_ES7000_WAKECPU_H
/*
* This file copes with machines that wakeup secondary CPUs by the
* INIT, INIT, STARTUP sequence.
*/
#ifdef CONFIG_ES7000_CLUSTERED_APIC
#define WAKE_SECONDARY_VIA_MIP
#else
#define WAKE_SECONDARY_VIA_INIT
#endif
#ifdef WAKE_SECONDARY_VIA_MIP
extern int es7000_start_cpu(int cpu, unsigned long eip);
static inline int
wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
{
int boot_error = 0;
boot_error = es7000_start_cpu(phys_apicid, start_eip);
return boot_error;
}
#endif
#define TRAMPOLINE_LOW phys_to_virt(0x467)
#define TRAMPOLINE_HIGH phys_to_virt(0x469)
#define boot_cpu_apicid boot_cpu_physical_apicid
#define TRAMPOLINE_PHYS_LOW 0x467
#define TRAMPOLINE_PHYS_HIGH 0x469
static inline void wait_for_init_deassert(atomic_t *deassert)
{
#ifdef WAKE_SECONDARY_VIA_INIT
#ifndef CONFIG_ES7000_CLUSTERED_APIC
while (!atomic_read(deassert))
cpu_relax();
#endif
@ -50,9 +26,12 @@ static inline void restore_NMI_vector(unsigned short *high, unsigned short *low)
{
}
#define inquire_remote_apic(apicid) do { \
if (apic_verbosity >= APIC_DEBUG) \
__inquire_remote_apic(apicid); \
} while (0)
extern void __inquire_remote_apic(int apicid);
static inline void inquire_remote_apic(int apicid)
{
if (apic_verbosity >= APIC_DEBUG)
__inquire_remote_apic(apicid);
}
#endif /* __ASM_MACH_WAKECPU_H */

Просмотреть файл

@ -17,8 +17,40 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr)
*/
return addr - 1;
}
#endif
#ifdef CONFIG_DYNAMIC_FTRACE
struct dyn_arch_ftrace {
/* No extra data needed for x86 */
};
#endif /* CONFIG_DYNAMIC_FTRACE */
#endif /* __ASSEMBLY__ */
#endif /* CONFIG_FUNCTION_TRACER */
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
#ifndef __ASSEMBLY__
/*
* Stack of return addresses for functions
* of a thread.
* Used in struct thread_info
*/
struct ftrace_ret_stack {
unsigned long ret;
unsigned long func;
unsigned long long calltime;
};
/*
* Primary handler of a function return.
* It relays on ftrace_return_to_handler.
* Defined in entry32.S
*/
extern void return_to_handler(void);
#endif /* __ASSEMBLY__ */
#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
#endif /* _ASM_X86_FTRACE_H */

Просмотреть файл

@ -2,6 +2,7 @@
#define _ASM_X86_GENAPIC_32_H
#include <asm/mpspec.h>
#include <asm/atomic.h>
/*
* Generic APIC driver interface.
@ -65,6 +66,14 @@ struct genapic {
void (*send_IPI_allbutself)(int vector);
void (*send_IPI_all)(int vector);
#endif
int (*wakeup_cpu)(int apicid, unsigned long start_eip);
int trampoline_phys_low;
int trampoline_phys_high;
void (*wait_for_init_deassert)(atomic_t *deassert);
void (*smp_callin_clear_local_apic)(void);
void (*store_NMI_vector)(unsigned short *high, unsigned short *low);
void (*restore_NMI_vector)(unsigned short *high, unsigned short *low);
void (*inquire_remote_apic)(int apicid);
};
#define APICFUNC(x) .x = x,
@ -105,16 +114,24 @@ struct genapic {
APICFUNC(get_apic_id) \
.apic_id_mask = APIC_ID_MASK, \
APICFUNC(cpu_mask_to_apicid) \
APICFUNC(vector_allocation_domain) \
APICFUNC(vector_allocation_domain) \
APICFUNC(acpi_madt_oem_check) \
IPIFUNC(send_IPI_mask) \
IPIFUNC(send_IPI_allbutself) \
IPIFUNC(send_IPI_all) \
APICFUNC(enable_apic_mode) \
APICFUNC(phys_pkg_id) \
.trampoline_phys_low = TRAMPOLINE_PHYS_LOW, \
.trampoline_phys_high = TRAMPOLINE_PHYS_HIGH, \
APICFUNC(wait_for_init_deassert) \
APICFUNC(smp_callin_clear_local_apic) \
APICFUNC(store_NMI_vector) \
APICFUNC(restore_NMI_vector) \
APICFUNC(inquire_remote_apic) \
}
extern struct genapic *genapic;
extern void es7000_update_genapic_to_cluster(void);
enum uv_system_type {UV_NONE, UV_LEGACY_APIC, UV_X2APIC, UV_NON_UNIQUE_APIC};
#define get_uv_system_type() UV_NONE

Просмотреть файл

@ -32,6 +32,8 @@ struct genapic {
unsigned int (*get_apic_id)(unsigned long x);
unsigned long (*set_apic_id)(unsigned int id);
unsigned long apic_id_mask;
/* wakeup_secondary_cpu */
int (*wakeup_cpu)(int apicid, unsigned long start_eip);
};
extern struct genapic *genapic;

Просмотреть файл

@ -188,17 +188,14 @@ extern void restore_IO_APIC_setup(void);
extern void reinit_intr_remapped_IO_APIC(int);
#endif
extern int probe_nr_irqs(void);
extern void probe_nr_irqs_gsi(void);
#else /* !CONFIG_X86_IO_APIC */
#define io_apic_assign_pci_irqs 0
static const int timer_through_8259 = 0;
static inline void ioapic_init_mappings(void) { }
static inline void ioapic_init_mappings(void) { }
static inline int probe_nr_irqs(void)
{
return NR_IRQS;
}
static inline void probe_nr_irqs_gsi(void) { }
#endif
#endif /* _ASM_X86_IO_APIC_H */

Просмотреть файл

@ -101,12 +101,23 @@
#define LAST_VM86_IRQ 15
#define invalid_vm86_irq(irq) ((irq) < 3 || (irq) > 15)
#define NR_IRQS_LEGACY 16
#if defined(CONFIG_X86_IO_APIC) && !defined(CONFIG_X86_VOYAGER)
#ifndef CONFIG_SPARSE_IRQ
# if NR_CPUS < MAX_IO_APICS
# define NR_IRQS (NR_VECTORS + (32 * NR_CPUS))
# else
# define NR_IRQS (NR_VECTORS + (32 * MAX_IO_APICS))
# endif
#else
# if (8 * NR_CPUS) > (32 * MAX_IO_APICS)
# define NR_IRQS (NR_VECTORS + (8 * NR_CPUS))
# else
# define NR_IRQS (NR_VECTORS + (32 * MAX_IO_APICS))
# endif
#endif
#elif defined(CONFIG_X86_VOYAGER)

Просмотреть файл

@ -32,11 +32,13 @@ static inline cpumask_t target_cpus(void)
#define vector_allocation_domain (genapic->vector_allocation_domain)
#define read_apic_id() (GET_APIC_ID(apic_read(APIC_ID)))
#define send_IPI_self (genapic->send_IPI_self)
#define wakeup_secondary_cpu (genapic->wakeup_cpu)
extern void setup_apic_routing(void);
#else
#define INT_DELIVERY_MODE dest_LowestPrio
#define INT_DEST_MODE 1 /* logical delivery broadcast to all procs */
#define TARGET_CPUS (target_cpus())
#define wakeup_secondary_cpu wakeup_secondary_cpu_via_init
/*
* Set up the logical destination ID.
*

Просмотреть файл

@ -1,17 +1,8 @@
#ifndef _ASM_X86_MACH_DEFAULT_MACH_WAKECPU_H
#define _ASM_X86_MACH_DEFAULT_MACH_WAKECPU_H
/*
* This file copes with machines that wakeup secondary CPUs by the
* INIT, INIT, STARTUP sequence.
*/
#define WAKE_SECONDARY_VIA_INIT
#define TRAMPOLINE_LOW phys_to_virt(0x467)
#define TRAMPOLINE_HIGH phys_to_virt(0x469)
#define boot_cpu_apicid boot_cpu_physical_apicid
#define TRAMPOLINE_PHYS_LOW (0x467)
#define TRAMPOLINE_PHYS_HIGH (0x469)
static inline void wait_for_init_deassert(atomic_t *deassert)
{
@ -33,9 +24,12 @@ static inline void restore_NMI_vector(unsigned short *high, unsigned short *low)
{
}
#define inquire_remote_apic(apicid) do { \
if (apic_verbosity >= APIC_DEBUG) \
__inquire_remote_apic(apicid); \
} while (0)
extern void __inquire_remote_apic(int apicid);
static inline void inquire_remote_apic(int apicid)
{
if (apic_verbosity >= APIC_DEBUG)
__inquire_remote_apic(apicid);
}
#endif /* _ASM_X86_MACH_DEFAULT_MACH_WAKECPU_H */

Просмотреть файл

@ -13,9 +13,11 @@ static inline void smpboot_setup_warm_reset_vector(unsigned long start_eip)
CMOS_WRITE(0xa, 0xf);
local_flush_tlb();
pr_debug("1.\n");
*((volatile unsigned short *) TRAMPOLINE_HIGH) = start_eip >> 4;
*((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_HIGH)) =
start_eip >> 4;
pr_debug("2.\n");
*((volatile unsigned short *) TRAMPOLINE_LOW) = start_eip & 0xf;
*((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_LOW)) =
start_eip & 0xf;
pr_debug("3.\n");
}
@ -32,7 +34,7 @@ static inline void smpboot_restore_warm_reset_vector(void)
*/
CMOS_WRITE(0, 0xf);
*((volatile long *) phys_to_virt(0x467)) = 0;
*((volatile long *)phys_to_virt(TRAMPOLINE_PHYS_LOW)) = 0;
}
static inline void __init smpboot_setup_io_apic(void)

Просмотреть файл

@ -27,6 +27,7 @@
#define vector_allocation_domain (genapic->vector_allocation_domain)
#define enable_apic_mode (genapic->enable_apic_mode)
#define phys_pkg_id (genapic->phys_pkg_id)
#define wakeup_secondary_cpu (genapic->wakeup_cpu)
extern void generic_bigsmp_probe(void);

Просмотреть файл

@ -0,0 +1,12 @@
#ifndef _ASM_X86_MACH_GENERIC_MACH_WAKECPU_H
#define _ASM_X86_MACH_GENERIC_MACH_WAKECPU_H
#define TRAMPOLINE_PHYS_LOW (genapic->trampoline_phys_low)
#define TRAMPOLINE_PHYS_HIGH (genapic->trampoline_phys_high)
#define wait_for_init_deassert (genapic->wait_for_init_deassert)
#define smp_callin_clear_local_apic (genapic->smp_callin_clear_local_apic)
#define store_NMI_vector (genapic->store_NMI_vector)
#define restore_NMI_vector (genapic->restore_NMI_vector)
#define inquire_remote_apic (genapic->inquire_remote_apic)
#endif /* _ASM_X86_MACH_GENERIC_MACH_APIC_H */

Просмотреть файл

@ -3,12 +3,8 @@
/* This file copes with machines that wakeup secondary CPUs by NMIs */
#define WAKE_SECONDARY_VIA_NMI
#define TRAMPOLINE_LOW phys_to_virt(0x8)
#define TRAMPOLINE_HIGH phys_to_virt(0xa)
#define boot_cpu_apicid boot_cpu_logical_apicid
#define TRAMPOLINE_PHYS_LOW (0x8)
#define TRAMPOLINE_PHYS_HIGH (0xa)
/* We don't do anything here because we use NMI's to boot instead */
static inline void wait_for_init_deassert(atomic_t *deassert)
@ -27,17 +23,23 @@ static inline void smp_callin_clear_local_apic(void)
static inline void store_NMI_vector(unsigned short *high, unsigned short *low)
{
printk("Storing NMI vector\n");
*high = *((volatile unsigned short *) TRAMPOLINE_HIGH);
*low = *((volatile unsigned short *) TRAMPOLINE_LOW);
*high =
*((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_HIGH));
*low =
*((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_LOW));
}
static inline void restore_NMI_vector(unsigned short *high, unsigned short *low)
{
printk("Restoring NMI vector\n");
*((volatile unsigned short *) TRAMPOLINE_HIGH) = *high;
*((volatile unsigned short *) TRAMPOLINE_LOW) = *low;
*((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_HIGH)) =
*high;
*((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_LOW)) =
*low;
}
#define inquire_remote_apic(apicid) {}
static inline void inquire_remote_apic(int apicid)
{
}
#endif /* __ASM_NUMAQ_WAKECPU_H */

Просмотреть файл

@ -16,6 +16,8 @@ static inline void visws_early_detect(void) { }
static inline int is_visws_box(void) { return 0; }
#endif
extern int wakeup_secondary_cpu_via_nmi(int apicid, unsigned long start_eip);
extern int wakeup_secondary_cpu_via_init(int apicid, unsigned long start_eip);
/*
* Any setup quirks to be performed?
*/
@ -39,6 +41,7 @@ struct x86_quirks {
void (*smp_read_mpc_oem)(struct mp_config_oemtable *oemtable,
unsigned short oemsize);
int (*setup_ioapic_ids)(void);
int (*update_genapic)(void);
};
extern struct x86_quirks *x86_quirks;

Просмотреть файл

@ -314,6 +314,8 @@ extern void free_init_pages(char *what, unsigned long begin, unsigned long end);
void default_idle(void);
void stop_this_cpu(void *dummy);
/*
* Force strict CPU ordering.
* And yes, this is required on UP too when we're talking

Просмотреть файл

@ -20,6 +20,8 @@
struct task_struct;
struct exec_domain;
#include <asm/processor.h>
#include <asm/ftrace.h>
#include <asm/atomic.h>
struct thread_info {
struct task_struct *task; /* main task structure */

Просмотреть файл

@ -157,6 +157,7 @@ extern int __get_user_bad(void);
int __ret_gu; \
unsigned long __val_gu; \
__chk_user_ptr(ptr); \
might_fault(); \
switch (sizeof(*(ptr))) { \
case 1: \
__get_user_x(1, __ret_gu, __val_gu, ptr); \
@ -241,6 +242,7 @@ extern void __put_user_8(void);
int __ret_pu; \
__typeof__(*(ptr)) __pu_val; \
__chk_user_ptr(ptr); \
might_fault(); \
__pu_val = x; \
switch (sizeof(*(ptr))) { \
case 1: \

Просмотреть файл

@ -82,8 +82,8 @@ __copy_to_user_inatomic(void __user *to, const void *from, unsigned long n)
static __always_inline unsigned long __must_check
__copy_to_user(void __user *to, const void *from, unsigned long n)
{
might_sleep();
return __copy_to_user_inatomic(to, from, n);
might_fault();
return __copy_to_user_inatomic(to, from, n);
}
static __always_inline unsigned long
@ -137,7 +137,7 @@ __copy_from_user_inatomic(void *to, const void __user *from, unsigned long n)
static __always_inline unsigned long
__copy_from_user(void *to, const void __user *from, unsigned long n)
{
might_sleep();
might_fault();
if (__builtin_constant_p(n)) {
unsigned long ret;
@ -159,7 +159,7 @@ __copy_from_user(void *to, const void __user *from, unsigned long n)
static __always_inline unsigned long __copy_from_user_nocache(void *to,
const void __user *from, unsigned long n)
{
might_sleep();
might_fault();
if (__builtin_constant_p(n)) {
unsigned long ret;

Просмотреть файл

@ -29,6 +29,8 @@ static __always_inline __must_check
int __copy_from_user(void *dst, const void __user *src, unsigned size)
{
int ret = 0;
might_fault();
if (!__builtin_constant_p(size))
return copy_user_generic(dst, (__force void *)src, size);
switch (size) {
@ -71,6 +73,8 @@ static __always_inline __must_check
int __copy_to_user(void __user *dst, const void *src, unsigned size)
{
int ret = 0;
might_fault();
if (!__builtin_constant_p(size))
return copy_user_generic((__force void *)dst, src, size);
switch (size) {
@ -113,6 +117,8 @@ static __always_inline __must_check
int __copy_in_user(void __user *dst, const void __user *src, unsigned size)
{
int ret = 0;
might_fault();
if (!__builtin_constant_p(size))
return copy_user_generic((__force void *)dst,
(__force void *)src, size);

Просмотреть файл

@ -25,7 +25,7 @@ CFLAGS_tsc.o := $(nostackp)
obj-y := process_$(BITS).o signal_$(BITS).o entry_$(BITS).o
obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o
obj-y += time_$(BITS).o ioport.o ldt.o
obj-y += time_$(BITS).o ioport.o ldt.o dumpstack.o
obj-y += setup.o i8259.o irqinit_$(BITS).o setup_percpu.o
obj-$(CONFIG_X86_VISWS) += visws_quirks.o
obj-$(CONFIG_X86_32) += probe_roms_32.o
@ -65,6 +65,7 @@ obj-$(CONFIG_X86_LOCAL_APIC) += apic.o nmi.o
obj-$(CONFIG_X86_IO_APIC) += io_apic.o
obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups_32.o
obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o
obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o
obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o
obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o
obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o

Просмотреть файл

@ -1360,6 +1360,17 @@ static void __init acpi_process_madt(void)
disable_acpi();
}
}
/*
* ACPI supports both logical (e.g. Hyper-Threading) and physical
* processors, where MPS only supports physical.
*/
if (acpi_lapic && acpi_ioapic)
printk(KERN_INFO "Using ACPI (MADT) for SMP configuration "
"information\n");
else if (acpi_lapic)
printk(KERN_INFO "Using ACPI for processor (LAPIC) "
"configuration information\n");
#endif
return;
}

Просмотреть файл

@ -391,11 +391,7 @@ static int power_off;
#else
static int power_off = 1;
#endif
#ifdef CONFIG_APM_REAL_MODE_POWER_OFF
static int realmode_power_off = 1;
#else
static int realmode_power_off;
#endif
#ifdef CONFIG_APM_ALLOW_INTS
static int allow_ints = 1;
#else

Просмотреть файл

@ -33,6 +33,7 @@
#include <linux/cpufreq.h>
#include <linux/compiler.h>
#include <linux/dmi.h>
#include <linux/ftrace.h>
#include <linux/acpi.h>
#include <acpi/processor.h>
@ -391,6 +392,7 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy,
unsigned int next_perf_state = 0; /* Index into perf table */
unsigned int i;
int result = 0;
struct power_trace it;
dprintk("acpi_cpufreq_target %d (%d)\n", target_freq, policy->cpu);
@ -427,6 +429,8 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy,
}
}
trace_power_mark(&it, POWER_PSTATE, next_perf_state);
switch (data->cpu_feature) {
case SYSTEM_INTEL_MSR_CAPABLE:
cmd.type = SYSTEM_INTEL_MSR_CAPABLE;

Просмотреть файл

@ -307,12 +307,11 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
set_cpu_cap(c, X86_FEATURE_P4);
if (c->x86 == 6)
set_cpu_cap(c, X86_FEATURE_P3);
#endif
if (cpu_has_bts)
ptrace_bts_init_intel(c);
#endif
detect_extended_topology(c);
if (!cpu_has(c, X86_FEATURE_XTOPOLOGY)) {
/*

Просмотреть файл

@ -7,13 +7,12 @@
*
* It manages:
* - per-thread and per-cpu allocation of BTS and PEBS
* - buffer memory allocation (optional)
* - buffer overflow handling
* - buffer overflow handling (to be done)
* - buffer access
*
* It assumes:
* - get_task_struct on all parameter tasks
* - current is allowed to trace parameter tasks
* - get_task_struct on all traced tasks
* - current is allowed to trace tasks
*
*
* Copyright (C) 2007-2008 Intel Corporation.
@ -28,6 +27,7 @@
#include <linux/slab.h>
#include <linux/sched.h>
#include <linux/mm.h>
#include <linux/kernel.h>
/*
@ -44,6 +44,33 @@ struct ds_configuration {
};
static struct ds_configuration ds_cfg;
/*
* A BTS or PEBS tracer.
*
* This holds the configuration of the tracer and serves as a handle
* to identify tracers.
*/
struct ds_tracer {
/* the DS context (partially) owned by this tracer */
struct ds_context *context;
/* the buffer provided on ds_request() and its size in bytes */
void *buffer;
size_t size;
};
struct bts_tracer {
/* the common DS part */
struct ds_tracer ds;
/* buffer overflow notification function */
bts_ovfl_callback_t ovfl;
};
struct pebs_tracer {
/* the common DS part */
struct ds_tracer ds;
/* buffer overflow notification function */
pebs_ovfl_callback_t ovfl;
};
/*
* Debug Store (DS) save area configuration (see Intel64 and IA32
@ -107,35 +134,14 @@ static inline void ds_set(unsigned char *base, enum ds_qualifier qual,
(*(unsigned long *)base) = value;
}
#define DS_ALIGNMENT (1 << 3) /* BTS and PEBS buffer alignment */
/*
* Locking is done only for allocating BTS or PEBS resources and for
* guarding context and buffer memory allocation.
*
* Most functions require the current task to own the ds context part
* they are going to access. All the locking is done when validating
* access to the context.
* Locking is done only for allocating BTS or PEBS resources.
*/
static spinlock_t ds_lock = __SPIN_LOCK_UNLOCKED(ds_lock);
/*
* Validate that the current task is allowed to access the BTS/PEBS
* buffer of the parameter task.
*
* Returns 0, if access is granted; -Eerrno, otherwise.
*/
static inline int ds_validate_access(struct ds_context *context,
enum ds_qualifier qual)
{
if (!context)
return -EPERM;
if (context->owner[qual] == current)
return 0;
return -EPERM;
}
/*
* We either support (system-wide) per-cpu or per-thread allocation.
@ -183,50 +189,12 @@ static inline int check_tracer(struct task_struct *task)
*
* Contexts are use-counted. They are allocated on first access and
* deallocated when the last user puts the context.
*
* We distinguish between an allocating and a non-allocating get of a
* context:
* - the allocating get is used for requesting BTS/PEBS resources. It
* requires the caller to hold the global ds_lock.
* - the non-allocating get is used for all other cases. A
* non-existing context indicates an error. It acquires and releases
* the ds_lock itself for obtaining the context.
*
* A context and its DS configuration are allocated and deallocated
* together. A context always has a DS configuration of the
* appropriate size.
*/
static DEFINE_PER_CPU(struct ds_context *, system_context);
#define this_system_context per_cpu(system_context, smp_processor_id())
/*
* Returns the pointer to the parameter task's context or to the
* system-wide context, if task is NULL.
*
* Increases the use count of the returned context, if not NULL.
*/
static inline struct ds_context *ds_get_context(struct task_struct *task)
{
struct ds_context *context;
unsigned long irq;
spin_lock_irqsave(&ds_lock, irq);
context = (task ? task->thread.ds_ctx : this_system_context);
if (context)
context->count++;
spin_unlock_irqrestore(&ds_lock, irq);
return context;
}
/*
* Same as ds_get_context, but allocates the context and it's DS
* structure, if necessary; returns NULL; if out of memory.
*/
static inline struct ds_context *ds_alloc_context(struct task_struct *task)
{
struct ds_context **p_context =
(task ? &task->thread.ds_ctx : &this_system_context);
@ -238,16 +206,9 @@ static inline struct ds_context *ds_alloc_context(struct task_struct *task)
if (!context)
return NULL;
context->ds = kzalloc(ds_cfg.sizeof_ds, GFP_KERNEL);
if (!context->ds) {
kfree(context);
return NULL;
}
spin_lock_irqsave(&ds_lock, irq);
if (*p_context) {
kfree(context->ds);
kfree(context);
context = *p_context;
@ -272,10 +233,6 @@ static inline struct ds_context *ds_alloc_context(struct task_struct *task)
return context;
}
/*
* Decreases the use count of the parameter context, if not NULL.
* Deallocates the context, if the use count reaches zero.
*/
static inline void ds_put_context(struct ds_context *context)
{
unsigned long irq;
@ -296,13 +253,6 @@ static inline void ds_put_context(struct ds_context *context)
if (!context->task || (context->task == current))
wrmsrl(MSR_IA32_DS_AREA, 0);
put_tracer(context->task);
/* free any leftover buffers from tracers that did not
* deallocate them properly. */
kfree(context->buffer[ds_bts]);
kfree(context->buffer[ds_pebs]);
kfree(context->ds);
kfree(context);
out:
spin_unlock_irqrestore(&ds_lock, irq);
@ -312,119 +262,37 @@ static inline void ds_put_context(struct ds_context *context)
/*
* Handle a buffer overflow
*
* task: the task whose buffers are overflowing;
* NULL for a buffer overflow on the current cpu
* context: the ds context
* qual: the buffer type
*/
static void ds_overflow(struct task_struct *task, struct ds_context *context,
enum ds_qualifier qual)
static void ds_overflow(struct ds_context *context, enum ds_qualifier qual)
{
if (!context)
return;
if (context->callback[qual])
(*context->callback[qual])(task);
/* todo: do some more overflow handling */
}
/*
* Allocate a non-pageable buffer of the parameter size.
* Checks the memory and the locked memory rlimit.
*
* Returns the buffer, if successful;
* NULL, if out of memory or rlimit exceeded.
*
* size: the requested buffer size in bytes
* pages (out): if not NULL, contains the number of pages reserved
*/
static inline void *ds_allocate_buffer(size_t size, unsigned int *pages)
{
unsigned long rlim, vm, pgsz;
void *buffer;
pgsz = PAGE_ALIGN(size) >> PAGE_SHIFT;
rlim = current->signal->rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT;
vm = current->mm->total_vm + pgsz;
if (rlim < vm)
return NULL;
rlim = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT;
vm = current->mm->locked_vm + pgsz;
if (rlim < vm)
return NULL;
buffer = kzalloc(size, GFP_KERNEL);
if (!buffer)
return NULL;
current->mm->total_vm += pgsz;
current->mm->locked_vm += pgsz;
if (pages)
*pages = pgsz;
return buffer;
}
static int ds_request(struct task_struct *task, void *base, size_t size,
ds_ovfl_callback_t ovfl, enum ds_qualifier qual)
{
struct ds_context *context;
unsigned long buffer, adj;
const unsigned long alignment = (1 << 3);
unsigned long irq;
int error = 0;
if (!ds_cfg.sizeof_ds)
return -EOPNOTSUPP;
/* we require some space to do alignment adjustments below */
if (size < (alignment + ds_cfg.sizeof_rec[qual]))
return -EINVAL;
/* buffer overflow notification is not yet implemented */
if (ovfl)
return -EOPNOTSUPP;
context = ds_alloc_context(task);
if (!context)
return -ENOMEM;
spin_lock_irqsave(&ds_lock, irq);
error = -EPERM;
if (!check_tracer(task))
goto out_unlock;
get_tracer(task);
error = -EALREADY;
if (context->owner[qual] == current)
goto out_put_tracer;
error = -EPERM;
if (context->owner[qual] != NULL)
goto out_put_tracer;
context->owner[qual] = current;
spin_unlock_irqrestore(&ds_lock, irq);
error = -ENOMEM;
if (!base) {
base = ds_allocate_buffer(size, &context->pages[qual]);
if (!base)
goto out_release;
context->buffer[qual] = base;
switch (qual) {
case ds_bts: {
struct bts_tracer *tracer =
container_of(context->owner[qual],
struct bts_tracer, ds);
if (tracer->ovfl)
tracer->ovfl(tracer);
}
error = 0;
break;
case ds_pebs: {
struct pebs_tracer *tracer =
container_of(context->owner[qual],
struct pebs_tracer, ds);
if (tracer->ovfl)
tracer->ovfl(tracer);
}
break;
}
}
context->callback[qual] = ovfl;
static void ds_install_ds_config(struct ds_context *context,
enum ds_qualifier qual,
void *base, size_t size, size_t ith)
{
unsigned long buffer, adj;
/* adjust the buffer address and size to meet alignment
* constraints:
@ -436,7 +304,7 @@ static int ds_request(struct task_struct *task, void *base, size_t size,
*/
buffer = (unsigned long)base;
adj = ALIGN(buffer, alignment) - buffer;
adj = ALIGN(buffer, DS_ALIGNMENT) - buffer;
buffer += adj;
size -= adj;
@ -447,210 +315,289 @@ static int ds_request(struct task_struct *task, void *base, size_t size,
ds_set(context->ds, qual, ds_index, buffer);
ds_set(context->ds, qual, ds_absolute_maximum, buffer + size);
if (ovfl) {
/* todo: select a suitable interrupt threshold */
} else
ds_set(context->ds, qual,
ds_interrupt_threshold, buffer + size + 1);
/* The value for 'no threshold' is -1, which will set the
* threshold outside of the buffer, just like we want it.
*/
ds_set(context->ds, qual,
ds_interrupt_threshold, buffer + size - ith);
}
/* we keep the context until ds_release */
return error;
static int ds_request(struct ds_tracer *tracer, enum ds_qualifier qual,
struct task_struct *task,
void *base, size_t size, size_t th)
{
struct ds_context *context;
unsigned long irq;
int error;
out_release:
context->owner[qual] = NULL;
ds_put_context(context);
put_tracer(task);
return error;
error = -EOPNOTSUPP;
if (!ds_cfg.sizeof_ds)
goto out;
error = -EINVAL;
if (!base)
goto out;
/* we require some space to do alignment adjustments below */
error = -EINVAL;
if (size < (DS_ALIGNMENT + ds_cfg.sizeof_rec[qual]))
goto out;
if (th != (size_t)-1) {
th *= ds_cfg.sizeof_rec[qual];
error = -EINVAL;
if (size <= th)
goto out;
}
tracer->buffer = base;
tracer->size = size;
error = -ENOMEM;
context = ds_get_context(task);
if (!context)
goto out;
tracer->context = context;
spin_lock_irqsave(&ds_lock, irq);
error = -EPERM;
if (!check_tracer(task))
goto out_unlock;
get_tracer(task);
error = -EPERM;
if (context->owner[qual])
goto out_put_tracer;
context->owner[qual] = tracer;
spin_unlock_irqrestore(&ds_lock, irq);
ds_install_ds_config(context, qual, base, size, th);
return 0;
out_put_tracer:
spin_unlock_irqrestore(&ds_lock, irq);
ds_put_context(context);
put_tracer(task);
return error;
out_unlock:
spin_unlock_irqrestore(&ds_lock, irq);
ds_put_context(context);
return error;
}
int ds_request_bts(struct task_struct *task, void *base, size_t size,
ds_ovfl_callback_t ovfl)
{
return ds_request(task, base, size, ovfl, ds_bts);
}
int ds_request_pebs(struct task_struct *task, void *base, size_t size,
ds_ovfl_callback_t ovfl)
{
return ds_request(task, base, size, ovfl, ds_pebs);
}
static int ds_release(struct task_struct *task, enum ds_qualifier qual)
{
struct ds_context *context;
int error;
context = ds_get_context(task);
error = ds_validate_access(context, qual);
if (error < 0)
goto out;
kfree(context->buffer[qual]);
context->buffer[qual] = NULL;
current->mm->total_vm -= context->pages[qual];
current->mm->locked_vm -= context->pages[qual];
context->pages[qual] = 0;
context->owner[qual] = NULL;
/*
* we put the context twice:
* once for the ds_get_context
* once for the corresponding ds_request
*/
ds_put_context(context);
tracer->context = NULL;
out:
ds_put_context(context);
return error;
}
int ds_release_bts(struct task_struct *task)
struct bts_tracer *ds_request_bts(struct task_struct *task,
void *base, size_t size,
bts_ovfl_callback_t ovfl, size_t th)
{
return ds_release(task, ds_bts);
}
int ds_release_pebs(struct task_struct *task)
{
return ds_release(task, ds_pebs);
}
static int ds_get_index(struct task_struct *task, size_t *pos,
enum ds_qualifier qual)
{
struct ds_context *context;
unsigned long base, index;
struct bts_tracer *tracer;
int error;
context = ds_get_context(task);
error = ds_validate_access(context, qual);
if (error < 0)
/* buffer overflow notification is not yet implemented */
error = -EOPNOTSUPP;
if (ovfl)
goto out;
error = -ENOMEM;
tracer = kzalloc(sizeof(*tracer), GFP_KERNEL);
if (!tracer)
goto out;
tracer->ovfl = ovfl;
error = ds_request(&tracer->ds, ds_bts, task, base, size, th);
if (error < 0)
goto out_tracer;
return tracer;
out_tracer:
kfree(tracer);
out:
return ERR_PTR(error);
}
struct pebs_tracer *ds_request_pebs(struct task_struct *task,
void *base, size_t size,
pebs_ovfl_callback_t ovfl, size_t th)
{
struct pebs_tracer *tracer;
int error;
/* buffer overflow notification is not yet implemented */
error = -EOPNOTSUPP;
if (ovfl)
goto out;
error = -ENOMEM;
tracer = kzalloc(sizeof(*tracer), GFP_KERNEL);
if (!tracer)
goto out;
tracer->ovfl = ovfl;
error = ds_request(&tracer->ds, ds_pebs, task, base, size, th);
if (error < 0)
goto out_tracer;
return tracer;
out_tracer:
kfree(tracer);
out:
return ERR_PTR(error);
}
static void ds_release(struct ds_tracer *tracer, enum ds_qualifier qual)
{
BUG_ON(tracer->context->owner[qual] != tracer);
tracer->context->owner[qual] = NULL;
put_tracer(tracer->context->task);
ds_put_context(tracer->context);
}
int ds_release_bts(struct bts_tracer *tracer)
{
if (!tracer)
return -EINVAL;
ds_release(&tracer->ds, ds_bts);
kfree(tracer);
return 0;
}
int ds_release_pebs(struct pebs_tracer *tracer)
{
if (!tracer)
return -EINVAL;
ds_release(&tracer->ds, ds_pebs);
kfree(tracer);
return 0;
}
static size_t ds_get_index(struct ds_context *context, enum ds_qualifier qual)
{
unsigned long base, index;
base = ds_get(context->ds, qual, ds_buffer_base);
index = ds_get(context->ds, qual, ds_index);
error = ((index - base) / ds_cfg.sizeof_rec[qual]);
if (pos)
*pos = error;
out:
ds_put_context(context);
return error;
return (index - base) / ds_cfg.sizeof_rec[qual];
}
int ds_get_bts_index(struct task_struct *task, size_t *pos)
int ds_get_bts_index(struct bts_tracer *tracer, size_t *pos)
{
return ds_get_index(task, pos, ds_bts);
if (!tracer)
return -EINVAL;
if (!pos)
return -EINVAL;
*pos = ds_get_index(tracer->ds.context, ds_bts);
return 0;
}
int ds_get_pebs_index(struct task_struct *task, size_t *pos)
int ds_get_pebs_index(struct pebs_tracer *tracer, size_t *pos)
{
return ds_get_index(task, pos, ds_pebs);
if (!tracer)
return -EINVAL;
if (!pos)
return -EINVAL;
*pos = ds_get_index(tracer->ds.context, ds_pebs);
return 0;
}
static int ds_get_end(struct task_struct *task, size_t *pos,
enum ds_qualifier qual)
static size_t ds_get_end(struct ds_context *context, enum ds_qualifier qual)
{
struct ds_context *context;
unsigned long base, end;
int error;
context = ds_get_context(task);
error = ds_validate_access(context, qual);
if (error < 0)
goto out;
unsigned long base, max;
base = ds_get(context->ds, qual, ds_buffer_base);
end = ds_get(context->ds, qual, ds_absolute_maximum);
max = ds_get(context->ds, qual, ds_absolute_maximum);
error = ((end - base) / ds_cfg.sizeof_rec[qual]);
if (pos)
*pos = error;
out:
ds_put_context(context);
return error;
return (max - base) / ds_cfg.sizeof_rec[qual];
}
int ds_get_bts_end(struct task_struct *task, size_t *pos)
int ds_get_bts_end(struct bts_tracer *tracer, size_t *pos)
{
return ds_get_end(task, pos, ds_bts);
if (!tracer)
return -EINVAL;
if (!pos)
return -EINVAL;
*pos = ds_get_end(tracer->ds.context, ds_bts);
return 0;
}
int ds_get_pebs_end(struct task_struct *task, size_t *pos)
int ds_get_pebs_end(struct pebs_tracer *tracer, size_t *pos)
{
return ds_get_end(task, pos, ds_pebs);
if (!tracer)
return -EINVAL;
if (!pos)
return -EINVAL;
*pos = ds_get_end(tracer->ds.context, ds_pebs);
return 0;
}
static int ds_access(struct task_struct *task, size_t index,
const void **record, enum ds_qualifier qual)
static int ds_access(struct ds_context *context, enum ds_qualifier qual,
size_t index, const void **record)
{
struct ds_context *context;
unsigned long base, idx;
int error;
if (!record)
return -EINVAL;
context = ds_get_context(task);
error = ds_validate_access(context, qual);
if (error < 0)
goto out;
base = ds_get(context->ds, qual, ds_buffer_base);
idx = base + (index * ds_cfg.sizeof_rec[qual]);
error = -EINVAL;
if (idx > ds_get(context->ds, qual, ds_absolute_maximum))
goto out;
return -EINVAL;
*record = (const void *)idx;
error = ds_cfg.sizeof_rec[qual];
out:
ds_put_context(context);
return error;
return ds_cfg.sizeof_rec[qual];
}
int ds_access_bts(struct task_struct *task, size_t index, const void **record)
int ds_access_bts(struct bts_tracer *tracer, size_t index,
const void **record)
{
return ds_access(task, index, record, ds_bts);
if (!tracer)
return -EINVAL;
return ds_access(tracer->ds.context, ds_bts, index, record);
}
int ds_access_pebs(struct task_struct *task, size_t index, const void **record)
int ds_access_pebs(struct pebs_tracer *tracer, size_t index,
const void **record)
{
return ds_access(task, index, record, ds_pebs);
if (!tracer)
return -EINVAL;
return ds_access(tracer->ds.context, ds_pebs, index, record);
}
static int ds_write(struct task_struct *task, const void *record, size_t size,
enum ds_qualifier qual, int force)
static int ds_write(struct ds_context *context, enum ds_qualifier qual,
const void *record, size_t size)
{
struct ds_context *context;
int error;
int bytes_written = 0;
if (!record)
return -EINVAL;
error = -EPERM;
context = ds_get_context(task);
if (!context)
goto out;
if (!force) {
error = ds_validate_access(context, qual);
if (error < 0)
goto out;
}
error = 0;
while (size) {
unsigned long base, index, end, write_end, int_th;
unsigned long write_size, adj_write_size;
@ -678,14 +625,14 @@ static int ds_write(struct task_struct *task, const void *record, size_t size,
write_end = end;
if (write_end <= index)
goto out;
break;
write_size = min((unsigned long) size, write_end - index);
memcpy((void *)index, record, write_size);
record = (const char *)record + write_size;
size -= write_size;
error += write_size;
size -= write_size;
bytes_written += write_size;
adj_write_size = write_size / ds_cfg.sizeof_rec[qual];
adj_write_size *= ds_cfg.sizeof_rec[qual];
@ -700,47 +647,32 @@ static int ds_write(struct task_struct *task, const void *record, size_t size,
ds_set(context->ds, qual, ds_index, index);
if (index >= int_th)
ds_overflow(task, context, qual);
ds_overflow(context, qual);
}
out:
ds_put_context(context);
return error;
return bytes_written;
}
int ds_write_bts(struct task_struct *task, const void *record, size_t size)
int ds_write_bts(struct bts_tracer *tracer, const void *record, size_t size)
{
return ds_write(task, record, size, ds_bts, /* force = */ 0);
if (!tracer)
return -EINVAL;
return ds_write(tracer->ds.context, ds_bts, record, size);
}
int ds_write_pebs(struct task_struct *task, const void *record, size_t size)
int ds_write_pebs(struct pebs_tracer *tracer, const void *record, size_t size)
{
return ds_write(task, record, size, ds_pebs, /* force = */ 0);
if (!tracer)
return -EINVAL;
return ds_write(tracer->ds.context, ds_pebs, record, size);
}
int ds_unchecked_write_bts(struct task_struct *task,
const void *record, size_t size)
static void ds_reset_or_clear(struct ds_context *context,
enum ds_qualifier qual, int clear)
{
return ds_write(task, record, size, ds_bts, /* force = */ 1);
}
int ds_unchecked_write_pebs(struct task_struct *task,
const void *record, size_t size)
{
return ds_write(task, record, size, ds_pebs, /* force = */ 1);
}
static int ds_reset_or_clear(struct task_struct *task,
enum ds_qualifier qual, int clear)
{
struct ds_context *context;
unsigned long base, end;
int error;
context = ds_get_context(task);
error = ds_validate_access(context, qual);
if (error < 0)
goto out;
base = ds_get(context->ds, qual, ds_buffer_base);
end = ds_get(context->ds, qual, ds_absolute_maximum);
@ -749,70 +681,69 @@ static int ds_reset_or_clear(struct task_struct *task,
memset((void *)base, 0, end - base);
ds_set(context->ds, qual, ds_index, base);
error = 0;
out:
ds_put_context(context);
return error;
}
int ds_reset_bts(struct task_struct *task)
int ds_reset_bts(struct bts_tracer *tracer)
{
return ds_reset_or_clear(task, ds_bts, /* clear = */ 0);
if (!tracer)
return -EINVAL;
ds_reset_or_clear(tracer->ds.context, ds_bts, /* clear = */ 0);
return 0;
}
int ds_reset_pebs(struct task_struct *task)
int ds_reset_pebs(struct pebs_tracer *tracer)
{
return ds_reset_or_clear(task, ds_pebs, /* clear = */ 0);
if (!tracer)
return -EINVAL;
ds_reset_or_clear(tracer->ds.context, ds_pebs, /* clear = */ 0);
return 0;
}
int ds_clear_bts(struct task_struct *task)
int ds_clear_bts(struct bts_tracer *tracer)
{
return ds_reset_or_clear(task, ds_bts, /* clear = */ 1);
if (!tracer)
return -EINVAL;
ds_reset_or_clear(tracer->ds.context, ds_bts, /* clear = */ 1);
return 0;
}
int ds_clear_pebs(struct task_struct *task)
int ds_clear_pebs(struct pebs_tracer *tracer)
{
return ds_reset_or_clear(task, ds_pebs, /* clear = */ 1);
if (!tracer)
return -EINVAL;
ds_reset_or_clear(tracer->ds.context, ds_pebs, /* clear = */ 1);
return 0;
}
int ds_get_pebs_reset(struct task_struct *task, u64 *value)
int ds_get_pebs_reset(struct pebs_tracer *tracer, u64 *value)
{
struct ds_context *context;
int error;
if (!tracer)
return -EINVAL;
if (!value)
return -EINVAL;
context = ds_get_context(task);
error = ds_validate_access(context, ds_pebs);
if (error < 0)
goto out;
*value = *(u64 *)(tracer->ds.context->ds + (ds_cfg.sizeof_field * 8));
*value = *(u64 *)(context->ds + (ds_cfg.sizeof_field * 8));
error = 0;
out:
ds_put_context(context);
return error;
return 0;
}
int ds_set_pebs_reset(struct task_struct *task, u64 value)
int ds_set_pebs_reset(struct pebs_tracer *tracer, u64 value)
{
struct ds_context *context;
int error;
if (!tracer)
return -EINVAL;
context = ds_get_context(task);
error = ds_validate_access(context, ds_pebs);
if (error < 0)
goto out;
*(u64 *)(tracer->ds.context->ds + (ds_cfg.sizeof_field * 8)) = value;
*(u64 *)(context->ds + (ds_cfg.sizeof_field * 8)) = value;
error = 0;
out:
ds_put_context(context);
return error;
return 0;
}
static const struct ds_configuration ds_cfg_var = {
@ -840,6 +771,10 @@ static inline void
ds_configure(const struct ds_configuration *cfg)
{
ds_cfg = *cfg;
printk(KERN_INFO "DS available\n");
BUG_ON(MAX_SIZEOF_DS < ds_cfg.sizeof_ds);
}
void __cpuinit ds_init_intel(struct cpuinfo_x86 *c)
@ -847,17 +782,16 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c)
switch (c->x86) {
case 0x6:
switch (c->x86_model) {
case 0 ... 0xC:
/* sorry, don't know about them */
break;
case 0xD:
case 0xE: /* Pentium M */
ds_configure(&ds_cfg_var);
break;
case 0xF: /* Core2 */
case 0x1C: /* Atom */
default: /* Core2, Atom, ... */
ds_configure(&ds_cfg_64);
break;
default:
/* sorry, don't know about them */
break;
}
break;
case 0xF:
@ -884,6 +818,8 @@ void ds_free(struct ds_context *context)
* is dying. There should not be any user of that context left
* to disturb us, anymore. */
unsigned long leftovers = context->count;
while (leftovers--)
while (leftovers--) {
put_tracer(context->task);
ds_put_context(context);
}
}

351
arch/x86/kernel/dumpstack.c Normal file
Просмотреть файл

@ -0,0 +1,351 @@
/*
* Copyright (C) 1991, 1992 Linus Torvalds
* Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs
*/
#include <linux/kallsyms.h>
#include <linux/kprobes.h>
#include <linux/uaccess.h>
#include <linux/utsname.h>
#include <linux/hardirq.h>
#include <linux/kdebug.h>
#include <linux/module.h>
#include <linux/ptrace.h>
#include <linux/kexec.h>
#include <linux/bug.h>
#include <linux/nmi.h>
#include <linux/sysfs.h>
#include <asm/stacktrace.h>
#include "dumpstack.h"
int panic_on_unrecovered_nmi;
unsigned int code_bytes = 64;
int kstack_depth_to_print = 3 * STACKSLOTS_PER_LINE;
static int die_counter;
void printk_address(unsigned long address, int reliable)
{
printk(" [<%p>] %s%pS\n", (void *) address,
reliable ? "" : "? ", (void *) address);
}
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
static void
print_ftrace_graph_addr(unsigned long addr, void *data,
const struct stacktrace_ops *ops,
struct thread_info *tinfo, int *graph)
{
struct task_struct *task = tinfo->task;
unsigned long ret_addr;
int index = task->curr_ret_stack;
if (addr != (unsigned long)return_to_handler)
return;
if (!task->ret_stack || index < *graph)
return;
index -= *graph;
ret_addr = task->ret_stack[index].ret;
ops->address(data, ret_addr, 1);
(*graph)++;
}
#else
static inline void
print_ftrace_graph_addr(unsigned long addr, void *data,
const struct stacktrace_ops *ops,
struct thread_info *tinfo, int *graph)
{ }
#endif
/*
* x86-64 can have up to three kernel stacks:
* process stack
* interrupt stack
* severe exception (double fault, nmi, stack fault, debug, mce) hardware stack
*/
static inline int valid_stack_ptr(struct thread_info *tinfo,
void *p, unsigned int size, void *end)
{
void *t = tinfo;
if (end) {
if (p < end && p >= (end-THREAD_SIZE))
return 1;
else
return 0;
}
return p > t && p < t + THREAD_SIZE - size;
}
unsigned long
print_context_stack(struct thread_info *tinfo,
unsigned long *stack, unsigned long bp,
const struct stacktrace_ops *ops, void *data,
unsigned long *end, int *graph)
{
struct stack_frame *frame = (struct stack_frame *)bp;
while (valid_stack_ptr(tinfo, stack, sizeof(*stack), end)) {
unsigned long addr;
addr = *stack;
if (__kernel_text_address(addr)) {
if ((unsigned long) stack == bp + sizeof(long)) {
ops->address(data, addr, 1);
frame = frame->next_frame;
bp = (unsigned long) frame;
} else {
ops->address(data, addr, bp == 0);
}
print_ftrace_graph_addr(addr, data, ops, tinfo, graph);
}
stack++;
}
return bp;
}
static void
print_trace_warning_symbol(void *data, char *msg, unsigned long symbol)
{
printk(data);
print_symbol(msg, symbol);
printk("\n");
}
static void print_trace_warning(void *data, char *msg)
{
printk("%s%s\n", (char *)data, msg);
}
static int print_trace_stack(void *data, char *name)
{
printk("%s <%s> ", (char *)data, name);
return 0;
}
/*
* Print one address/symbol entries per line.
*/
static void print_trace_address(void *data, unsigned long addr, int reliable)
{
touch_nmi_watchdog();
printk(data);
printk_address(addr, reliable);
}
static const struct stacktrace_ops print_trace_ops = {
.warning = print_trace_warning,
.warning_symbol = print_trace_warning_symbol,
.stack = print_trace_stack,
.address = print_trace_address,
};
void
show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
unsigned long *stack, unsigned long bp, char *log_lvl)
{
printk("%sCall Trace:\n", log_lvl);
dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl);
}
void show_trace(struct task_struct *task, struct pt_regs *regs,
unsigned long *stack, unsigned long bp)
{
show_trace_log_lvl(task, regs, stack, bp, "");
}
void show_stack(struct task_struct *task, unsigned long *sp)
{
show_stack_log_lvl(task, NULL, sp, 0, "");
}
/*
* The architecture-independent dump_stack generator
*/
void dump_stack(void)
{
unsigned long bp = 0;
unsigned long stack;
#ifdef CONFIG_FRAME_POINTER
if (!bp)
get_bp(bp);
#endif
printk("Pid: %d, comm: %.20s %s %s %.*s\n",
current->pid, current->comm, print_tainted(),
init_utsname()->release,
(int)strcspn(init_utsname()->version, " "),
init_utsname()->version);
show_trace(NULL, NULL, &stack, bp);
}
EXPORT_SYMBOL(dump_stack);
static raw_spinlock_t die_lock = __RAW_SPIN_LOCK_UNLOCKED;
static int die_owner = -1;
static unsigned int die_nest_count;
unsigned __kprobes long oops_begin(void)
{
int cpu;
unsigned long flags;
oops_enter();
/* racy, but better than risking deadlock. */
raw_local_irq_save(flags);
cpu = smp_processor_id();
if (!__raw_spin_trylock(&die_lock)) {
if (cpu == die_owner)
/* nested oops. should stop eventually */;
else
__raw_spin_lock(&die_lock);
}
die_nest_count++;
die_owner = cpu;
console_verbose();
bust_spinlocks(1);
return flags;
}
void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr)
{
if (regs && kexec_should_crash(current))
crash_kexec(regs);
bust_spinlocks(0);
die_owner = -1;
add_taint(TAINT_DIE);
die_nest_count--;
if (!die_nest_count)
/* Nest count reaches zero, release the lock. */
__raw_spin_unlock(&die_lock);
raw_local_irq_restore(flags);
oops_exit();
if (!signr)
return;
if (in_interrupt())
panic("Fatal exception in interrupt");
if (panic_on_oops)
panic("Fatal exception");
do_exit(signr);
}
int __kprobes __die(const char *str, struct pt_regs *regs, long err)
{
#ifdef CONFIG_X86_32
unsigned short ss;
unsigned long sp;
#endif
printk(KERN_EMERG "%s: %04lx [#%d] ", str, err & 0xffff, ++die_counter);
#ifdef CONFIG_PREEMPT
printk("PREEMPT ");
#endif
#ifdef CONFIG_SMP
printk("SMP ");
#endif
#ifdef CONFIG_DEBUG_PAGEALLOC
printk("DEBUG_PAGEALLOC");
#endif
printk("\n");
sysfs_printk_last_file();
if (notify_die(DIE_OOPS, str, regs, err,
current->thread.trap_no, SIGSEGV) == NOTIFY_STOP)
return 1;
show_registers(regs);
#ifdef CONFIG_X86_32
sp = (unsigned long) (&regs->sp);
savesegment(ss, ss);
if (user_mode(regs)) {
sp = regs->sp;
ss = regs->ss & 0xffff;
}
printk(KERN_EMERG "EIP: [<%08lx>] ", regs->ip);
print_symbol("%s", regs->ip);
printk(" SS:ESP %04x:%08lx\n", ss, sp);
#else
/* Executive summary in case the oops scrolled away */
printk(KERN_ALERT "RIP ");
printk_address(regs->ip, 1);
printk(" RSP <%016lx>\n", regs->sp);
#endif
return 0;
}
/*
* This is gone through when something in the kernel has done something bad
* and is about to be terminated:
*/
void die(const char *str, struct pt_regs *regs, long err)
{
unsigned long flags = oops_begin();
int sig = SIGSEGV;
if (!user_mode_vm(regs))
report_bug(regs->ip, regs);
if (__die(str, regs, err))
sig = 0;
oops_end(flags, regs, sig);
}
void notrace __kprobes
die_nmi(char *str, struct pt_regs *regs, int do_panic)
{
unsigned long flags;
if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 2, SIGINT) == NOTIFY_STOP)
return;
/*
* We are in trouble anyway, lets at least try
* to get a message out.
*/
flags = oops_begin();
printk(KERN_EMERG "%s", str);
printk(" on CPU%d, ip %08lx, registers:\n",
smp_processor_id(), regs->ip);
show_registers(regs);
oops_end(flags, regs, 0);
if (do_panic || panic_on_oops)
panic("Non maskable interrupt");
nmi_exit();
local_irq_enable();
do_exit(SIGBUS);
}
static int __init oops_setup(char *s)
{
if (!s)
return -EINVAL;
if (!strcmp(s, "panic"))
panic_on_oops = 1;
return 0;
}
early_param("oops", oops_setup);
static int __init kstack_setup(char *s)
{
if (!s)
return -EINVAL;
kstack_depth_to_print = simple_strtoul(s, NULL, 0);
return 0;
}
early_param("kstack", kstack_setup);
static int __init code_bytes_setup(char *s)
{
code_bytes = simple_strtoul(s, NULL, 0);
if (code_bytes > 8192)
code_bytes = 8192;
return 1;
}
__setup("code_bytes=", code_bytes_setup);

Просмотреть файл

@ -0,0 +1,39 @@
/*
* Copyright (C) 1991, 1992 Linus Torvalds
* Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs
*/
#ifndef DUMPSTACK_H
#define DUMPSTACK_H
#ifdef CONFIG_X86_32
#define STACKSLOTS_PER_LINE 8
#define get_bp(bp) asm("movl %%ebp, %0" : "=r" (bp) :)
#else
#define STACKSLOTS_PER_LINE 4
#define get_bp(bp) asm("movq %%rbp, %0" : "=r" (bp) :)
#endif
extern unsigned long
print_context_stack(struct thread_info *tinfo,
unsigned long *stack, unsigned long bp,
const struct stacktrace_ops *ops, void *data,
unsigned long *end, int *graph);
extern void
show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
unsigned long *stack, unsigned long bp, char *log_lvl);
extern void
show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
unsigned long *sp, unsigned long bp, char *log_lvl);
extern unsigned int code_bytes;
extern int kstack_depth_to_print;
/* The form of the top of the frame on the stack */
struct stack_frame {
struct stack_frame *next_frame;
unsigned long return_address;
};
#endif

Просмотреть файл

@ -17,69 +17,14 @@
#include <asm/stacktrace.h>
#define STACKSLOTS_PER_LINE 8
#define get_bp(bp) asm("movl %%ebp, %0" : "=r" (bp) :)
int panic_on_unrecovered_nmi;
int kstack_depth_to_print = 3 * STACKSLOTS_PER_LINE;
static unsigned int code_bytes = 64;
static int die_counter;
void printk_address(unsigned long address, int reliable)
{
printk(" [<%p>] %s%pS\n", (void *) address,
reliable ? "" : "? ", (void *) address);
}
static inline int valid_stack_ptr(struct thread_info *tinfo,
void *p, unsigned int size, void *end)
{
void *t = tinfo;
if (end) {
if (p < end && p >= (end-THREAD_SIZE))
return 1;
else
return 0;
}
return p > t && p < t + THREAD_SIZE - size;
}
/* The form of the top of the frame on the stack */
struct stack_frame {
struct stack_frame *next_frame;
unsigned long return_address;
};
static inline unsigned long
print_context_stack(struct thread_info *tinfo,
unsigned long *stack, unsigned long bp,
const struct stacktrace_ops *ops, void *data,
unsigned long *end)
{
struct stack_frame *frame = (struct stack_frame *)bp;
while (valid_stack_ptr(tinfo, stack, sizeof(*stack), end)) {
unsigned long addr;
addr = *stack;
if (__kernel_text_address(addr)) {
if ((unsigned long) stack == bp + sizeof(long)) {
ops->address(data, addr, 1);
frame = frame->next_frame;
bp = (unsigned long) frame;
} else {
ops->address(data, addr, bp == 0);
}
}
stack++;
}
return bp;
}
#include "dumpstack.h"
void dump_trace(struct task_struct *task, struct pt_regs *regs,
unsigned long *stack, unsigned long bp,
const struct stacktrace_ops *ops, void *data)
{
int graph = 0;
if (!task)
task = current;
@ -107,7 +52,8 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
context = (struct thread_info *)
((unsigned long)stack & (~(THREAD_SIZE - 1)));
bp = print_context_stack(context, stack, bp, ops, data, NULL);
bp = print_context_stack(context, stack, bp, ops,
data, NULL, &graph);
stack = (unsigned long *)context->previous_esp;
if (!stack)
@ -119,57 +65,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
}
EXPORT_SYMBOL(dump_trace);
static void
print_trace_warning_symbol(void *data, char *msg, unsigned long symbol)
{
printk(data);
print_symbol(msg, symbol);
printk("\n");
}
static void print_trace_warning(void *data, char *msg)
{
printk("%s%s\n", (char *)data, msg);
}
static int print_trace_stack(void *data, char *name)
{
printk("%s <%s> ", (char *)data, name);
return 0;
}
/*
* Print one address/symbol entries per line.
*/
static void print_trace_address(void *data, unsigned long addr, int reliable)
{
touch_nmi_watchdog();
printk(data);
printk_address(addr, reliable);
}
static const struct stacktrace_ops print_trace_ops = {
.warning = print_trace_warning,
.warning_symbol = print_trace_warning_symbol,
.stack = print_trace_stack,
.address = print_trace_address,
};
static void
show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
unsigned long *stack, unsigned long bp, char *log_lvl)
{
printk("%sCall Trace:\n", log_lvl);
dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl);
}
void show_trace(struct task_struct *task, struct pt_regs *regs,
unsigned long *stack, unsigned long bp)
{
show_trace_log_lvl(task, regs, stack, bp, "");
}
static void
void
show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
unsigned long *sp, unsigned long bp, char *log_lvl)
{
@ -196,33 +92,6 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
show_trace_log_lvl(task, regs, sp, bp, log_lvl);
}
void show_stack(struct task_struct *task, unsigned long *sp)
{
show_stack_log_lvl(task, NULL, sp, 0, "");
}
/*
* The architecture-independent dump_stack generator
*/
void dump_stack(void)
{
unsigned long bp = 0;
unsigned long stack;
#ifdef CONFIG_FRAME_POINTER
if (!bp)
get_bp(bp);
#endif
printk("Pid: %d, comm: %.20s %s %s %.*s\n",
current->pid, current->comm, print_tainted(),
init_utsname()->release,
(int)strcspn(init_utsname()->version, " "),
init_utsname()->version);
show_trace(NULL, NULL, &stack, bp);
}
EXPORT_SYMBOL(dump_stack);
void show_registers(struct pt_regs *regs)
{
@ -283,167 +152,3 @@ int is_valid_bugaddr(unsigned long ip)
return ud2 == 0x0b0f;
}
static raw_spinlock_t die_lock = __RAW_SPIN_LOCK_UNLOCKED;
static int die_owner = -1;
static unsigned int die_nest_count;
unsigned __kprobes long oops_begin(void)
{
unsigned long flags;
oops_enter();
if (die_owner != raw_smp_processor_id()) {
console_verbose();
raw_local_irq_save(flags);
__raw_spin_lock(&die_lock);
die_owner = smp_processor_id();
die_nest_count = 0;
bust_spinlocks(1);
} else {
raw_local_irq_save(flags);
}
die_nest_count++;
return flags;
}
void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr)
{
bust_spinlocks(0);
die_owner = -1;
add_taint(TAINT_DIE);
__raw_spin_unlock(&die_lock);
raw_local_irq_restore(flags);
if (!regs)
return;
if (kexec_should_crash(current))
crash_kexec(regs);
if (in_interrupt())
panic("Fatal exception in interrupt");
if (panic_on_oops)
panic("Fatal exception");
oops_exit();
do_exit(signr);
}
int __kprobes __die(const char *str, struct pt_regs *regs, long err)
{
unsigned short ss;
unsigned long sp;
printk(KERN_EMERG "%s: %04lx [#%d] ", str, err & 0xffff, ++die_counter);
#ifdef CONFIG_PREEMPT
printk("PREEMPT ");
#endif
#ifdef CONFIG_SMP
printk("SMP ");
#endif
#ifdef CONFIG_DEBUG_PAGEALLOC
printk("DEBUG_PAGEALLOC");
#endif
printk("\n");
sysfs_printk_last_file();
if (notify_die(DIE_OOPS, str, regs, err,
current->thread.trap_no, SIGSEGV) == NOTIFY_STOP)
return 1;
show_registers(regs);
/* Executive summary in case the oops scrolled away */
sp = (unsigned long) (&regs->sp);
savesegment(ss, ss);
if (user_mode(regs)) {
sp = regs->sp;
ss = regs->ss & 0xffff;
}
printk(KERN_EMERG "EIP: [<%08lx>] ", regs->ip);
print_symbol("%s", regs->ip);
printk(" SS:ESP %04x:%08lx\n", ss, sp);
return 0;
}
/*
* This is gone through when something in the kernel has done something bad
* and is about to be terminated:
*/
void die(const char *str, struct pt_regs *regs, long err)
{
unsigned long flags = oops_begin();
if (die_nest_count < 3) {
report_bug(regs->ip, regs);
if (__die(str, regs, err))
regs = NULL;
} else {
printk(KERN_EMERG "Recursive die() failure, output suppressed\n");
}
oops_end(flags, regs, SIGSEGV);
}
static DEFINE_SPINLOCK(nmi_print_lock);
void notrace __kprobes
die_nmi(char *str, struct pt_regs *regs, int do_panic)
{
if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 2, SIGINT) == NOTIFY_STOP)
return;
spin_lock(&nmi_print_lock);
/*
* We are in trouble anyway, lets at least try
* to get a message out:
*/
bust_spinlocks(1);
printk(KERN_EMERG "%s", str);
printk(" on CPU%d, ip %08lx, registers:\n",
smp_processor_id(), regs->ip);
show_registers(regs);
if (do_panic)
panic("Non maskable interrupt");
console_silent();
spin_unlock(&nmi_print_lock);
/*
* If we are in kernel we are probably nested up pretty bad
* and might aswell get out now while we still can:
*/
if (!user_mode_vm(regs)) {
current->thread.trap_no = 2;
crash_kexec(regs);
}
bust_spinlocks(0);
do_exit(SIGSEGV);
}
static int __init oops_setup(char *s)
{
if (!s)
return -EINVAL;
if (!strcmp(s, "panic"))
panic_on_oops = 1;
return 0;
}
early_param("oops", oops_setup);
static int __init kstack_setup(char *s)
{
if (!s)
return -EINVAL;
kstack_depth_to_print = simple_strtoul(s, NULL, 0);
return 0;
}
early_param("kstack", kstack_setup);
static int __init code_bytes_setup(char *s)
{
code_bytes = simple_strtoul(s, NULL, 0);
if (code_bytes > 8192)
code_bytes = 8192;
return 1;
}
__setup("code_bytes=", code_bytes_setup);

Просмотреть файл

@ -17,19 +17,7 @@
#include <asm/stacktrace.h>
#define STACKSLOTS_PER_LINE 4
#define get_bp(bp) asm("movq %%rbp, %0" : "=r" (bp) :)
int panic_on_unrecovered_nmi;
int kstack_depth_to_print = 3 * STACKSLOTS_PER_LINE;
static unsigned int code_bytes = 64;
static int die_counter;
void printk_address(unsigned long address, int reliable)
{
printk(" [<%p>] %s%pS\n", (void *) address,
reliable ? "" : "? ", (void *) address);
}
#include "dumpstack.h"
static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
unsigned *usedp, char **idp)
@ -113,51 +101,6 @@ static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
* severe exception (double fault, nmi, stack fault, debug, mce) hardware stack
*/
static inline int valid_stack_ptr(struct thread_info *tinfo,
void *p, unsigned int size, void *end)
{
void *t = tinfo;
if (end) {
if (p < end && p >= (end-THREAD_SIZE))
return 1;
else
return 0;
}
return p > t && p < t + THREAD_SIZE - size;
}
/* The form of the top of the frame on the stack */
struct stack_frame {
struct stack_frame *next_frame;
unsigned long return_address;
};
static inline unsigned long
print_context_stack(struct thread_info *tinfo,
unsigned long *stack, unsigned long bp,
const struct stacktrace_ops *ops, void *data,
unsigned long *end)
{
struct stack_frame *frame = (struct stack_frame *)bp;
while (valid_stack_ptr(tinfo, stack, sizeof(*stack), end)) {
unsigned long addr;
addr = *stack;
if (__kernel_text_address(addr)) {
if ((unsigned long) stack == bp + sizeof(long)) {
ops->address(data, addr, 1);
frame = frame->next_frame;
bp = (unsigned long) frame;
} else {
ops->address(data, addr, bp == 0);
}
}
stack++;
}
return bp;
}
void dump_trace(struct task_struct *task, struct pt_regs *regs,
unsigned long *stack, unsigned long bp,
const struct stacktrace_ops *ops, void *data)
@ -166,6 +109,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
unsigned long *irqstack_end = (unsigned long *)cpu_pda(cpu)->irqstackptr;
unsigned used = 0;
struct thread_info *tinfo;
int graph = 0;
if (!task)
task = current;
@ -206,7 +150,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
break;
bp = print_context_stack(tinfo, stack, bp, ops,
data, estack_end);
data, estack_end, &graph);
ops->stack(data, "<EOE>");
/*
* We link to the next stack via the
@ -225,7 +169,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
if (ops->stack(data, "IRQ") < 0)
break;
bp = print_context_stack(tinfo, stack, bp,
ops, data, irqstack_end);
ops, data, irqstack_end, &graph);
/*
* We link to the next stack (which would be
* the process stack normally) the last
@ -243,62 +187,12 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
/*
* This handles the process stack:
*/
bp = print_context_stack(tinfo, stack, bp, ops, data, NULL);
bp = print_context_stack(tinfo, stack, bp, ops, data, NULL, &graph);
put_cpu();
}
EXPORT_SYMBOL(dump_trace);
static void
print_trace_warning_symbol(void *data, char *msg, unsigned long symbol)
{
printk(data);
print_symbol(msg, symbol);
printk("\n");
}
static void print_trace_warning(void *data, char *msg)
{
printk("%s%s\n", (char *)data, msg);
}
static int print_trace_stack(void *data, char *name)
{
printk("%s <%s> ", (char *)data, name);
return 0;
}
/*
* Print one address/symbol entries per line.
*/
static void print_trace_address(void *data, unsigned long addr, int reliable)
{
touch_nmi_watchdog();
printk(data);
printk_address(addr, reliable);
}
static const struct stacktrace_ops print_trace_ops = {
.warning = print_trace_warning,
.warning_symbol = print_trace_warning_symbol,
.stack = print_trace_stack,
.address = print_trace_address,
};
static void
show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
unsigned long *stack, unsigned long bp, char *log_lvl)
{
printk("%sCall Trace:\n", log_lvl);
dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl);
}
void show_trace(struct task_struct *task, struct pt_regs *regs,
unsigned long *stack, unsigned long bp)
{
show_trace_log_lvl(task, regs, stack, bp, "");
}
static void
void
show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
unsigned long *sp, unsigned long bp, char *log_lvl)
{
@ -342,33 +236,6 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
show_trace_log_lvl(task, regs, sp, bp, log_lvl);
}
void show_stack(struct task_struct *task, unsigned long *sp)
{
show_stack_log_lvl(task, NULL, sp, 0, "");
}
/*
* The architecture-independent dump_stack generator
*/
void dump_stack(void)
{
unsigned long bp = 0;
unsigned long stack;
#ifdef CONFIG_FRAME_POINTER
if (!bp)
get_bp(bp);
#endif
printk("Pid: %d, comm: %.20s %s %s %.*s\n",
current->pid, current->comm, print_tainted(),
init_utsname()->release,
(int)strcspn(init_utsname()->version, " "),
init_utsname()->version);
show_trace(NULL, NULL, &stack, bp);
}
EXPORT_SYMBOL(dump_stack);
void show_registers(struct pt_regs *regs)
{
int i;
@ -429,147 +296,3 @@ int is_valid_bugaddr(unsigned long ip)
return ud2 == 0x0b0f;
}
static raw_spinlock_t die_lock = __RAW_SPIN_LOCK_UNLOCKED;
static int die_owner = -1;
static unsigned int die_nest_count;
unsigned __kprobes long oops_begin(void)
{
int cpu;
unsigned long flags;
oops_enter();
/* racy, but better than risking deadlock. */
raw_local_irq_save(flags);
cpu = smp_processor_id();
if (!__raw_spin_trylock(&die_lock)) {
if (cpu == die_owner)
/* nested oops. should stop eventually */;
else
__raw_spin_lock(&die_lock);
}
die_nest_count++;
die_owner = cpu;
console_verbose();
bust_spinlocks(1);
return flags;
}
void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr)
{
die_owner = -1;
bust_spinlocks(0);
die_nest_count--;
if (!die_nest_count)
/* Nest count reaches zero, release the lock. */
__raw_spin_unlock(&die_lock);
raw_local_irq_restore(flags);
if (!regs) {
oops_exit();
return;
}
if (in_interrupt())
panic("Fatal exception in interrupt");
if (panic_on_oops)
panic("Fatal exception");
oops_exit();
do_exit(signr);
}
int __kprobes __die(const char *str, struct pt_regs *regs, long err)
{
printk(KERN_EMERG "%s: %04lx [#%d] ", str, err & 0xffff, ++die_counter);
#ifdef CONFIG_PREEMPT
printk("PREEMPT ");
#endif
#ifdef CONFIG_SMP
printk("SMP ");
#endif
#ifdef CONFIG_DEBUG_PAGEALLOC
printk("DEBUG_PAGEALLOC");
#endif
printk("\n");
sysfs_printk_last_file();
if (notify_die(DIE_OOPS, str, regs, err,
current->thread.trap_no, SIGSEGV) == NOTIFY_STOP)
return 1;
show_registers(regs);
add_taint(TAINT_DIE);
/* Executive summary in case the oops scrolled away */
printk(KERN_ALERT "RIP ");
printk_address(regs->ip, 1);
printk(" RSP <%016lx>\n", regs->sp);
if (kexec_should_crash(current))
crash_kexec(regs);
return 0;
}
void die(const char *str, struct pt_regs *regs, long err)
{
unsigned long flags = oops_begin();
if (!user_mode(regs))
report_bug(regs->ip, regs);
if (__die(str, regs, err))
regs = NULL;
oops_end(flags, regs, SIGSEGV);
}
notrace __kprobes void
die_nmi(char *str, struct pt_regs *regs, int do_panic)
{
unsigned long flags;
if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 2, SIGINT) == NOTIFY_STOP)
return;
flags = oops_begin();
/*
* We are in trouble anyway, lets at least try
* to get a message out.
*/
printk(KERN_EMERG "%s", str);
printk(" on CPU%d, ip %08lx, registers:\n",
smp_processor_id(), regs->ip);
show_registers(regs);
if (kexec_should_crash(current))
crash_kexec(regs);
if (do_panic || panic_on_oops)
panic("Non maskable interrupt");
oops_end(flags, NULL, SIGBUS);
nmi_exit();
local_irq_enable();
do_exit(SIGBUS);
}
static int __init oops_setup(char *s)
{
if (!s)
return -EINVAL;
if (!strcmp(s, "panic"))
panic_on_oops = 1;
return 0;
}
early_param("oops", oops_setup);
static int __init kstack_setup(char *s)
{
if (!s)
return -EINVAL;
kstack_depth_to_print = simple_strtoul(s, NULL, 0);
return 0;
}
early_param("kstack", kstack_setup);
static int __init code_bytes_setup(char *s)
{
code_bytes = simple_strtoul(s, NULL, 0);
if (code_bytes > 8192)
code_bytes = 8192;
return 1;
}
__setup("code_bytes=", code_bytes_setup);

Просмотреть файл

@ -1157,6 +1157,9 @@ ENTRY(mcount)
END(mcount)
ENTRY(ftrace_caller)
cmpl $0, function_trace_stop
jne ftrace_stub
pushl %eax
pushl %ecx
pushl %edx
@ -1171,6 +1174,11 @@ ftrace_call:
popl %edx
popl %ecx
popl %eax
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
.globl ftrace_graph_call
ftrace_graph_call:
jmp ftrace_stub
#endif
.globl ftrace_stub
ftrace_stub:
@ -1180,8 +1188,18 @@ END(ftrace_caller)
#else /* ! CONFIG_DYNAMIC_FTRACE */
ENTRY(mcount)
cmpl $0, function_trace_stop
jne ftrace_stub
cmpl $ftrace_stub, ftrace_trace_function
jnz trace
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
cmpl $ftrace_stub, ftrace_graph_return
jnz ftrace_graph_caller
cmpl $ftrace_graph_entry_stub, ftrace_graph_entry
jnz ftrace_graph_caller
#endif
.globl ftrace_stub
ftrace_stub:
ret
@ -1200,12 +1218,43 @@ trace:
popl %edx
popl %ecx
popl %eax
jmp ftrace_stub
END(mcount)
#endif /* CONFIG_DYNAMIC_FTRACE */
#endif /* CONFIG_FUNCTION_TRACER */
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
ENTRY(ftrace_graph_caller)
cmpl $0, function_trace_stop
jne ftrace_stub
pushl %eax
pushl %ecx
pushl %edx
movl 0xc(%esp), %edx
lea 0x4(%ebp), %eax
subl $MCOUNT_INSN_SIZE, %edx
call prepare_ftrace_return
popl %edx
popl %ecx
popl %eax
ret
END(ftrace_graph_caller)
.globl return_to_handler
return_to_handler:
pushl $0
pushl %eax
pushl %ecx
pushl %edx
call ftrace_return_to_handler
movl %eax, 0xc(%esp)
popl %edx
popl %ecx
popl %eax
ret
#endif
.section .rodata,"a"
#include "syscall_table_32.S"

Просмотреть файл

@ -68,6 +68,8 @@ ENTRY(mcount)
END(mcount)
ENTRY(ftrace_caller)
cmpl $0, function_trace_stop
jne ftrace_stub
/* taken from glibc */
subq $0x38, %rsp
@ -96,6 +98,12 @@ ftrace_call:
movq (%rsp), %rax
addq $0x38, %rsp
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
.globl ftrace_graph_call
ftrace_graph_call:
jmp ftrace_stub
#endif
.globl ftrace_stub
ftrace_stub:
retq
@ -103,8 +111,20 @@ END(ftrace_caller)
#else /* ! CONFIG_DYNAMIC_FTRACE */
ENTRY(mcount)
cmpl $0, function_trace_stop
jne ftrace_stub
cmpq $ftrace_stub, ftrace_trace_function
jnz trace
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
cmpq $ftrace_stub, ftrace_graph_return
jnz ftrace_graph_caller
cmpq $ftrace_graph_entry_stub, ftrace_graph_entry
jnz ftrace_graph_caller
#endif
.globl ftrace_stub
ftrace_stub:
retq
@ -140,6 +160,69 @@ END(mcount)
#endif /* CONFIG_DYNAMIC_FTRACE */
#endif /* CONFIG_FUNCTION_TRACER */
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
ENTRY(ftrace_graph_caller)
cmpl $0, function_trace_stop
jne ftrace_stub
subq $0x38, %rsp
movq %rax, (%rsp)
movq %rcx, 8(%rsp)
movq %rdx, 16(%rsp)
movq %rsi, 24(%rsp)
movq %rdi, 32(%rsp)
movq %r8, 40(%rsp)
movq %r9, 48(%rsp)
leaq 8(%rbp), %rdi
movq 0x38(%rsp), %rsi
subq $MCOUNT_INSN_SIZE, %rsi
call prepare_ftrace_return
movq 48(%rsp), %r9
movq 40(%rsp), %r8
movq 32(%rsp), %rdi
movq 24(%rsp), %rsi
movq 16(%rsp), %rdx
movq 8(%rsp), %rcx
movq (%rsp), %rax
addq $0x38, %rsp
retq
END(ftrace_graph_caller)
.globl return_to_handler
return_to_handler:
subq $80, %rsp
movq %rax, (%rsp)
movq %rcx, 8(%rsp)
movq %rdx, 16(%rsp)
movq %rsi, 24(%rsp)
movq %rdi, 32(%rsp)
movq %r8, 40(%rsp)
movq %r9, 48(%rsp)
movq %r10, 56(%rsp)
movq %r11, 64(%rsp)
call ftrace_return_to_handler
movq %rax, 72(%rsp)
movq 64(%rsp), %r11
movq 56(%rsp), %r10
movq 48(%rsp), %r9
movq 40(%rsp), %r8
movq 32(%rsp), %rdi
movq 24(%rsp), %rsi
movq 16(%rsp), %rdx
movq 8(%rsp), %rcx
movq (%rsp), %rax
addq $72, %rsp
retq
#endif
#ifndef CONFIG_PREEMPT
#define retint_kernel retint_restore_args
#endif

Просмотреть файл

@ -38,8 +38,11 @@
#include <asm/io.h>
#include <asm/nmi.h>
#include <asm/smp.h>
#include <asm/atomic.h>
#include <asm/apicdef.h>
#include <mach_mpparse.h>
#include <asm/genapic.h>
#include <asm/setup.h>
/*
* ES7000 chipsets
@ -161,6 +164,43 @@ es7000_rename_gsi(int ioapic, int gsi)
return gsi;
}
static int wakeup_secondary_cpu_via_mip(int cpu, unsigned long eip)
{
unsigned long vect = 0, psaival = 0;
if (psai == NULL)
return -1;
vect = ((unsigned long)__pa(eip)/0x1000) << 16;
psaival = (0x1000000 | vect | cpu);
while (*psai & 0x1000000)
;
*psai = psaival;
return 0;
}
static void noop_wait_for_deassert(atomic_t *deassert_not_used)
{
}
static int __init es7000_update_genapic(void)
{
genapic->wakeup_cpu = wakeup_secondary_cpu_via_mip;
/* MPENTIUMIII */
if (boot_cpu_data.x86 == 6 &&
(boot_cpu_data.x86_model >= 7 || boot_cpu_data.x86_model <= 11)) {
es7000_update_genapic_to_cluster();
genapic->wait_for_init_deassert = noop_wait_for_deassert;
genapic->wakeup_cpu = wakeup_secondary_cpu_via_mip;
}
return 0;
}
void __init
setup_unisys(void)
{
@ -176,6 +216,8 @@ setup_unisys(void)
else
es7000_plat = ES7000_CLASSIC;
ioapic_renumber_irq = es7000_rename_gsi;
x86_quirks->update_genapic = es7000_update_genapic;
}
/*
@ -317,26 +359,6 @@ es7000_mip_write(struct mip_reg *mip_reg)
return status;
}
int
es7000_start_cpu(int cpu, unsigned long eip)
{
unsigned long vect = 0, psaival = 0;
if (psai == NULL)
return -1;
vect = ((unsigned long)__pa(eip)/0x1000) << 16;
psaival = (0x1000000 | vect | cpu);
while (*psai & 0x1000000)
;
*psai = psaival;
return 0;
}
void __init
es7000_sw_apic(void)
{

Просмотреть файл

@ -14,14 +14,17 @@
#include <linux/uaccess.h>
#include <linux/ftrace.h>
#include <linux/percpu.h>
#include <linux/sched.h>
#include <linux/init.h>
#include <linux/list.h>
#include <asm/ftrace.h>
#include <linux/ftrace.h>
#include <asm/nops.h>
#include <asm/nmi.h>
static unsigned char ftrace_nop[MCOUNT_INSN_SIZE];
#ifdef CONFIG_DYNAMIC_FTRACE
union ftrace_code_union {
char code[MCOUNT_INSN_SIZE];
@ -31,18 +34,12 @@ union ftrace_code_union {
} __attribute__((packed));
};
static int ftrace_calc_offset(long ip, long addr)
{
return (int)(addr - ip);
}
unsigned char *ftrace_nop_replace(void)
{
return ftrace_nop;
}
unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
static unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
{
static union ftrace_code_union calc;
@ -56,7 +53,142 @@ unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
return calc.code;
}
int
/*
* Modifying code must take extra care. On an SMP machine, if
* the code being modified is also being executed on another CPU
* that CPU will have undefined results and possibly take a GPF.
* We use kstop_machine to stop other CPUS from exectuing code.
* But this does not stop NMIs from happening. We still need
* to protect against that. We separate out the modification of
* the code to take care of this.
*
* Two buffers are added: An IP buffer and a "code" buffer.
*
* 1) Put the instruction pointer into the IP buffer
* and the new code into the "code" buffer.
* 2) Set a flag that says we are modifying code
* 3) Wait for any running NMIs to finish.
* 4) Write the code
* 5) clear the flag.
* 6) Wait for any running NMIs to finish.
*
* If an NMI is executed, the first thing it does is to call
* "ftrace_nmi_enter". This will check if the flag is set to write
* and if it is, it will write what is in the IP and "code" buffers.
*
* The trick is, it does not matter if everyone is writing the same
* content to the code location. Also, if a CPU is executing code
* it is OK to write to that code location if the contents being written
* are the same as what exists.
*/
static atomic_t in_nmi = ATOMIC_INIT(0);
static int mod_code_status; /* holds return value of text write */
static int mod_code_write; /* set when NMI should do the write */
static void *mod_code_ip; /* holds the IP to write to */
static void *mod_code_newcode; /* holds the text to write to the IP */
static unsigned nmi_wait_count;
static atomic_t nmi_update_count = ATOMIC_INIT(0);
int ftrace_arch_read_dyn_info(char *buf, int size)
{
int r;
r = snprintf(buf, size, "%u %u",
nmi_wait_count,
atomic_read(&nmi_update_count));
return r;
}
static void ftrace_mod_code(void)
{
/*
* Yes, more than one CPU process can be writing to mod_code_status.
* (and the code itself)
* But if one were to fail, then they all should, and if one were
* to succeed, then they all should.
*/
mod_code_status = probe_kernel_write(mod_code_ip, mod_code_newcode,
MCOUNT_INSN_SIZE);
}
void ftrace_nmi_enter(void)
{
atomic_inc(&in_nmi);
/* Must have in_nmi seen before reading write flag */
smp_mb();
if (mod_code_write) {
ftrace_mod_code();
atomic_inc(&nmi_update_count);
}
}
void ftrace_nmi_exit(void)
{
/* Finish all executions before clearing in_nmi */
smp_wmb();
atomic_dec(&in_nmi);
}
static void wait_for_nmi(void)
{
int waited = 0;
while (atomic_read(&in_nmi)) {
waited = 1;
cpu_relax();
}
if (waited)
nmi_wait_count++;
}
static int
do_ftrace_mod_code(unsigned long ip, void *new_code)
{
mod_code_ip = (void *)ip;
mod_code_newcode = new_code;
/* The buffers need to be visible before we let NMIs write them */
smp_wmb();
mod_code_write = 1;
/* Make sure write bit is visible before we wait on NMIs */
smp_mb();
wait_for_nmi();
/* Make sure all running NMIs have finished before we write the code */
smp_mb();
ftrace_mod_code();
/* Make sure the write happens before clearing the bit */
smp_wmb();
mod_code_write = 0;
/* make sure NMIs see the cleared bit */
smp_mb();
wait_for_nmi();
return mod_code_status;
}
static unsigned char ftrace_nop[MCOUNT_INSN_SIZE];
static unsigned char *ftrace_nop_replace(void)
{
return ftrace_nop;
}
static int
ftrace_modify_code(unsigned long ip, unsigned char *old_code,
unsigned char *new_code)
{
@ -81,7 +213,7 @@ ftrace_modify_code(unsigned long ip, unsigned char *old_code,
return -EINVAL;
/* replace the text with the new text */
if (probe_kernel_write((void *)ip, new_code, MCOUNT_INSN_SIZE))
if (do_ftrace_mod_code(ip, new_code))
return -EPERM;
sync_core();
@ -89,6 +221,29 @@ ftrace_modify_code(unsigned long ip, unsigned char *old_code,
return 0;
}
int ftrace_make_nop(struct module *mod,
struct dyn_ftrace *rec, unsigned long addr)
{
unsigned char *new, *old;
unsigned long ip = rec->ip;
old = ftrace_call_replace(ip, addr);
new = ftrace_nop_replace();
return ftrace_modify_code(rec->ip, old, new);
}
int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
{
unsigned char *new, *old;
unsigned long ip = rec->ip;
old = ftrace_nop_replace();
new = ftrace_call_replace(ip, addr);
return ftrace_modify_code(rec->ip, old, new);
}
int ftrace_update_ftrace_func(ftrace_func_t func)
{
unsigned long ip = (unsigned long)(&ftrace_call);
@ -165,3 +320,218 @@ int __init ftrace_dyn_arch_init(void *data)
return 0;
}
#endif
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
#ifdef CONFIG_DYNAMIC_FTRACE
extern void ftrace_graph_call(void);
static int ftrace_mod_jmp(unsigned long ip,
int old_offset, int new_offset)
{
unsigned char code[MCOUNT_INSN_SIZE];
if (probe_kernel_read(code, (void *)ip, MCOUNT_INSN_SIZE))
return -EFAULT;
if (code[0] != 0xe9 || old_offset != *(int *)(&code[1]))
return -EINVAL;
*(int *)(&code[1]) = new_offset;
if (do_ftrace_mod_code(ip, &code))
return -EPERM;
return 0;
}
int ftrace_enable_ftrace_graph_caller(void)
{
unsigned long ip = (unsigned long)(&ftrace_graph_call);
int old_offset, new_offset;
old_offset = (unsigned long)(&ftrace_stub) - (ip + MCOUNT_INSN_SIZE);
new_offset = (unsigned long)(&ftrace_graph_caller) - (ip + MCOUNT_INSN_SIZE);
return ftrace_mod_jmp(ip, old_offset, new_offset);
}
int ftrace_disable_ftrace_graph_caller(void)
{
unsigned long ip = (unsigned long)(&ftrace_graph_call);
int old_offset, new_offset;
old_offset = (unsigned long)(&ftrace_graph_caller) - (ip + MCOUNT_INSN_SIZE);
new_offset = (unsigned long)(&ftrace_stub) - (ip + MCOUNT_INSN_SIZE);
return ftrace_mod_jmp(ip, old_offset, new_offset);
}
#else /* CONFIG_DYNAMIC_FTRACE */
/*
* These functions are picked from those used on
* this page for dynamic ftrace. They have been
* simplified to ignore all traces in NMI context.
*/
static atomic_t in_nmi;
void ftrace_nmi_enter(void)
{
atomic_inc(&in_nmi);
}
void ftrace_nmi_exit(void)
{
atomic_dec(&in_nmi);
}
#endif /* !CONFIG_DYNAMIC_FTRACE */
/* Add a function return address to the trace stack on thread info.*/
static int push_return_trace(unsigned long ret, unsigned long long time,
unsigned long func, int *depth)
{
int index;
if (!current->ret_stack)
return -EBUSY;
/* The return trace stack is full */
if (current->curr_ret_stack == FTRACE_RETFUNC_DEPTH - 1) {
atomic_inc(&current->trace_overrun);
return -EBUSY;
}
index = ++current->curr_ret_stack;
barrier();
current->ret_stack[index].ret = ret;
current->ret_stack[index].func = func;
current->ret_stack[index].calltime = time;
*depth = index;
return 0;
}
/* Retrieve a function return address to the trace stack on thread info.*/
static void pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret)
{
int index;
index = current->curr_ret_stack;
if (unlikely(index < 0)) {
ftrace_graph_stop();
WARN_ON(1);
/* Might as well panic, otherwise we have no where to go */
*ret = (unsigned long)panic;
return;
}
*ret = current->ret_stack[index].ret;
trace->func = current->ret_stack[index].func;
trace->calltime = current->ret_stack[index].calltime;
trace->overrun = atomic_read(&current->trace_overrun);
trace->depth = index;
barrier();
current->curr_ret_stack--;
}
/*
* Send the trace to the ring-buffer.
* @return the original return address.
*/
unsigned long ftrace_return_to_handler(void)
{
struct ftrace_graph_ret trace;
unsigned long ret;
pop_return_trace(&trace, &ret);
trace.rettime = cpu_clock(raw_smp_processor_id());
ftrace_graph_return(&trace);
if (unlikely(!ret)) {
ftrace_graph_stop();
WARN_ON(1);
/* Might as well panic. What else to do? */
ret = (unsigned long)panic;
}
return ret;
}
/*
* Hook the return address and push it in the stack of return addrs
* in current thread info.
*/
void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr)
{
unsigned long old;
unsigned long long calltime;
int faulted;
struct ftrace_graph_ent trace;
unsigned long return_hooker = (unsigned long)
&return_to_handler;
/* Nmi's are currently unsupported */
if (unlikely(atomic_read(&in_nmi)))
return;
if (unlikely(atomic_read(&current->tracing_graph_pause)))
return;
/*
* Protect against fault, even if it shouldn't
* happen. This tool is too much intrusive to
* ignore such a protection.
*/
asm volatile(
"1: " _ASM_MOV " (%[parent_old]), %[old]\n"
"2: " _ASM_MOV " %[return_hooker], (%[parent_replaced])\n"
" movl $0, %[faulted]\n"
".section .fixup, \"ax\"\n"
"3: movl $1, %[faulted]\n"
".previous\n"
_ASM_EXTABLE(1b, 3b)
_ASM_EXTABLE(2b, 3b)
: [parent_replaced] "=r" (parent), [old] "=r" (old),
[faulted] "=r" (faulted)
: [parent_old] "0" (parent), [return_hooker] "r" (return_hooker)
: "memory"
);
if (unlikely(faulted)) {
ftrace_graph_stop();
WARN_ON(1);
return;
}
if (unlikely(!__kernel_text_address(old))) {
ftrace_graph_stop();
*parent = old;
WARN_ON(1);
return;
}
calltime = cpu_clock(raw_smp_processor_id());
if (push_return_trace(old, calltime,
self_addr, &trace.depth) == -EBUSY) {
*parent = old;
return;
}
trace.func = self_addr;
/* Only trace if the calling function expects to */
if (!ftrace_graph_entry(&trace)) {
current->curr_ret_stack--;
*parent = old;
}
}
#endif /* CONFIG_FUNCTION_GRAPH_TRACER */

Просмотреть файл

@ -21,6 +21,7 @@
#include <asm/smp.h>
#include <asm/ipi.h>
#include <asm/genapic.h>
#include <asm/setup.h>
extern struct genapic apic_flat;
extern struct genapic apic_physflat;
@ -53,6 +54,9 @@ void __init setup_apic_routing(void)
genapic = &apic_physflat;
printk(KERN_INFO "Setting APIC routing to %s\n", genapic->name);
}
if (x86_quirks->update_genapic)
x86_quirks->update_genapic();
}
/* Same for both flat and physical. */

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -118,6 +118,9 @@ int show_interrupts(struct seq_file *p, void *v)
}
desc = irq_to_desc(i);
if (!desc)
return 0;
spin_lock_irqsave(&desc->lock, flags);
#ifndef CONFIG_SMP
any_count = kstat_irqs(i);

Просмотреть файл

@ -242,6 +242,8 @@ void fixup_irqs(cpumask_t map)
for_each_irq_desc(irq, desc) {
cpumask_t mask;
if (!desc)
continue;
if (irq == 2)
continue;

Просмотреть файл

@ -94,6 +94,8 @@ void fixup_irqs(cpumask_t map)
int break_affinity = 0;
int set_affinity = 1;
if (!desc)
continue;
if (irq == 2)
continue;

Просмотреть файл

@ -68,8 +68,7 @@ void __init init_ISA_irqs (void)
/*
* 16 old-style INTA-cycle interrupts:
*/
for (i = 0; i < 16; i++) {
/* first time call this irq_desc */
for (i = 0; i < NR_IRQS_LEGACY; i++) {
struct irq_desc *desc = irq_to_desc(i);
desc->status = IRQ_DISABLED;

Просмотреть файл

@ -142,8 +142,7 @@ void __init init_ISA_irqs(void)
init_bsp_APIC();
init_8259A(0);
for (i = 0; i < 16; i++) {
/* first time call this irq_desc */
for (i = 0; i < NR_IRQS_LEGACY; i++) {
struct irq_desc *desc = irq_to_desc(i);
desc->status = IRQ_DISABLED;

Просмотреть файл

@ -586,26 +586,23 @@ static void __init __get_smp_config(unsigned int early)
{
struct intel_mp_floating *mpf = mpf_found;
if (!mpf)
return;
if (acpi_lapic && early)
return;
/*
* MPS doesn't support hyperthreading, aka only have
* thread 0 apic id in MPS table
*/
if (acpi_lapic && acpi_ioapic)
return;
if (x86_quirks->mach_get_smp_config) {
if (x86_quirks->mach_get_smp_config(early))
return;
}
if (acpi_lapic && early)
return;
/*
* ACPI supports both logical (e.g. Hyper-Threading) and physical
* processors, where MPS only supports physical.
*/
if (acpi_lapic && acpi_ioapic) {
printk(KERN_INFO "Using ACPI (MADT) for SMP configuration "
"information\n");
return;
} else if (acpi_lapic)
printk(KERN_INFO "Using ACPI for processor (LAPIC) "
"configuration information\n");
if (!mpf)
return;
printk(KERN_INFO "Intel MultiProcessor Specification v1.%d\n",
mpf->mpf_specification);

Просмотреть файл

@ -31,7 +31,7 @@
#include <asm/numaq.h>
#include <asm/topology.h>
#include <asm/processor.h>
#include <asm/mpspec.h>
#include <asm/genapic.h>
#include <asm/e820.h>
#include <asm/setup.h>
@ -235,6 +235,13 @@ static int __init numaq_setup_ioapic_ids(void)
return 1;
}
static int __init numaq_update_genapic(void)
{
genapic->wakeup_cpu = wakeup_secondary_cpu_via_nmi;
return 0;
}
static struct x86_quirks numaq_x86_quirks __initdata = {
.arch_pre_time_init = numaq_pre_time_init,
.arch_time_init = NULL,
@ -250,6 +257,7 @@ static struct x86_quirks numaq_x86_quirks __initdata = {
.mpc_oem_pci_bus = mpc_oem_pci_bus,
.smp_read_mpc_oem = smp_read_mpc_oem,
.setup_ioapic_ids = numaq_setup_ioapic_ids,
.update_genapic = numaq_update_genapic,
};
void numaq_mps_oem_check(struct mp_config_table *mpc, char *oem,

Просмотреть файл

@ -7,7 +7,9 @@
#include <linux/module.h>
#include <linux/pm.h>
#include <linux/clockchips.h>
#include <linux/ftrace.h>
#include <asm/system.h>
#include <asm/apic.h>
unsigned long idle_halt;
EXPORT_SYMBOL(idle_halt);
@ -100,6 +102,9 @@ static inline int hlt_use_halt(void)
void default_idle(void)
{
if (hlt_use_halt()) {
struct power_trace it;
trace_power_start(&it, POWER_CSTATE, 1);
current_thread_info()->status &= ~TS_POLLING;
/*
* TS_POLLING-cleared state must be visible before we
@ -112,6 +117,7 @@ void default_idle(void)
else
local_irq_enable();
current_thread_info()->status |= TS_POLLING;
trace_power_end(&it);
} else {
local_irq_enable();
/* loop is done by the caller */
@ -122,6 +128,21 @@ void default_idle(void)
EXPORT_SYMBOL(default_idle);
#endif
void stop_this_cpu(void *dummy)
{
local_irq_disable();
/*
* Remove this CPU:
*/
cpu_clear(smp_processor_id(), cpu_online_map);
disable_local_APIC();
for (;;) {
if (hlt_works(smp_processor_id()))
halt();
}
}
static void do_nothing(void *unused)
{
}
@ -154,24 +175,31 @@ EXPORT_SYMBOL_GPL(cpu_idle_wait);
*/
void mwait_idle_with_hints(unsigned long ax, unsigned long cx)
{
struct power_trace it;
trace_power_start(&it, POWER_CSTATE, (ax>>4)+1);
if (!need_resched()) {
__monitor((void *)&current_thread_info()->flags, 0, 0);
smp_mb();
if (!need_resched())
__mwait(ax, cx);
}
trace_power_end(&it);
}
/* Default MONITOR/MWAIT with no hints, used for default C1 state */
static void mwait_idle(void)
{
struct power_trace it;
if (!need_resched()) {
trace_power_start(&it, POWER_CSTATE, 1);
__monitor((void *)&current_thread_info()->flags, 0, 0);
smp_mb();
if (!need_resched())
__sti_mwait(0, 0);
else
local_irq_enable();
trace_power_end(&it);
} else
local_irq_enable();
}
@ -183,9 +211,13 @@ static void mwait_idle(void)
*/
static void poll_idle(void)
{
struct power_trace it;
trace_power_start(&it, POWER_CSTATE, 0);
local_irq_enable();
while (!need_resched())
cpu_relax();
trace_power_end(&it);
}
/*

Просмотреть файл

@ -38,6 +38,7 @@
#include <linux/percpu.h>
#include <linux/prctl.h>
#include <linux/dmi.h>
#include <linux/ftrace.h>
#include <asm/uaccess.h>
#include <asm/pgtable.h>
@ -548,7 +549,8 @@ __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
* the task-switch, and shows up in ret_from_fork in entry.S,
* for example.
*/
struct task_struct * __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
__notrace_funcgraph struct task_struct *
__switch_to(struct task_struct *prev_p, struct task_struct *next_p)
{
struct thread_struct *prev = &prev_p->thread,
*next = &next_p->thread;

Просмотреть файл

@ -39,6 +39,7 @@
#include <linux/prctl.h>
#include <linux/uaccess.h>
#include <linux/io.h>
#include <linux/ftrace.h>
#include <asm/pgtable.h>
#include <asm/system.h>
@ -551,8 +552,9 @@ static inline void __switch_to_xtra(struct task_struct *prev_p,
* - could test fs/gs bitsliced
*
* Kprobes not supported here. Set the probe on schedule instead.
* Function graph tracer not supported too.
*/
struct task_struct *
__notrace_funcgraph struct task_struct *
__switch_to(struct task_struct *prev_p, struct task_struct *next_p)
{
struct thread_struct *prev = &prev_p->thread;

Просмотреть файл

@ -668,14 +668,14 @@ static int ptrace_bts_read_record(struct task_struct *child, size_t index,
size_t bts_index, bts_end;
int error;
error = ds_get_bts_end(child, &bts_end);
error = ds_get_bts_end(child->bts, &bts_end);
if (error < 0)
return error;
if (bts_end <= index)
return -EINVAL;
error = ds_get_bts_index(child, &bts_index);
error = ds_get_bts_index(child->bts, &bts_index);
if (error < 0)
return error;
@ -684,7 +684,7 @@ static int ptrace_bts_read_record(struct task_struct *child, size_t index,
if (bts_end <= bts_index)
bts_index -= bts_end;
error = ds_access_bts(child, bts_index, &bts_record);
error = ds_access_bts(child->bts, bts_index, &bts_record);
if (error < 0)
return error;
@ -705,14 +705,14 @@ static int ptrace_bts_drain(struct task_struct *child,
size_t end, i;
int error;
error = ds_get_bts_index(child, &end);
error = ds_get_bts_index(child->bts, &end);
if (error < 0)
return error;
if (size < (end * sizeof(struct bts_struct)))
return -EIO;
error = ds_access_bts(child, 0, (const void **)&raw);
error = ds_access_bts(child->bts, 0, (const void **)&raw);
if (error < 0)
return error;
@ -723,18 +723,13 @@ static int ptrace_bts_drain(struct task_struct *child,
return -EFAULT;
}
error = ds_clear_bts(child);
error = ds_clear_bts(child->bts);
if (error < 0)
return error;
return end;
}
static void ptrace_bts_ovfl(struct task_struct *child)
{
send_sig(child->thread.bts_ovfl_signal, child, 0);
}
static int ptrace_bts_config(struct task_struct *child,
long cfg_size,
const struct ptrace_bts_config __user *ucfg)
@ -760,24 +755,46 @@ static int ptrace_bts_config(struct task_struct *child,
goto errout;
if (cfg.flags & PTRACE_BTS_O_ALLOC) {
ds_ovfl_callback_t ovfl = NULL;
bts_ovfl_callback_t ovfl = NULL;
unsigned int sig = 0;
/* we ignore the error in case we were not tracing child */
(void)ds_release_bts(child);
error = -EINVAL;
if (cfg.size < (10 * bts_cfg.sizeof_bts))
goto errout;
if (cfg.flags & PTRACE_BTS_O_SIGNAL) {
if (!cfg.signal)
goto errout;
error = -EOPNOTSUPP;
goto errout;
sig = cfg.signal;
ovfl = ptrace_bts_ovfl;
}
error = ds_request_bts(child, /* base = */ NULL, cfg.size, ovfl);
if (error < 0)
if (child->bts) {
(void)ds_release_bts(child->bts);
kfree(child->bts_buffer);
child->bts = NULL;
child->bts_buffer = NULL;
}
error = -ENOMEM;
child->bts_buffer = kzalloc(cfg.size, GFP_KERNEL);
if (!child->bts_buffer)
goto errout;
child->bts = ds_request_bts(child, child->bts_buffer, cfg.size,
ovfl, /* th = */ (size_t)-1);
if (IS_ERR(child->bts)) {
error = PTR_ERR(child->bts);
kfree(child->bts_buffer);
child->bts = NULL;
child->bts_buffer = NULL;
goto errout;
}
child->thread.bts_ovfl_signal = sig;
}
@ -823,15 +840,15 @@ static int ptrace_bts_status(struct task_struct *child,
if (cfg_size < sizeof(cfg))
return -EIO;
error = ds_get_bts_end(child, &end);
error = ds_get_bts_end(child->bts, &end);
if (error < 0)
return error;
error = ds_access_bts(child, /* index = */ 0, &base);
error = ds_access_bts(child->bts, /* index = */ 0, &base);
if (error < 0)
return error;
error = ds_access_bts(child, /* index = */ end, &max);
error = ds_access_bts(child->bts, /* index = */ end, &max);
if (error < 0)
return error;
@ -884,10 +901,7 @@ static int ptrace_bts_write_record(struct task_struct *child,
return -EINVAL;
}
/* The writing task will be the switched-to task on a context
* switch. It needs to write into the switched-from task's BTS
* buffer. */
return ds_unchecked_write_bts(child, bts_record, bts_cfg.sizeof_bts);
return ds_write_bts(child->bts, bts_record, bts_cfg.sizeof_bts);
}
void ptrace_bts_take_timestamp(struct task_struct *tsk,
@ -929,17 +943,16 @@ void __cpuinit ptrace_bts_init_intel(struct cpuinfo_x86 *c)
switch (c->x86) {
case 0x6:
switch (c->x86_model) {
case 0 ... 0xC:
/* sorry, don't know about them */
break;
case 0xD:
case 0xE: /* Pentium M */
bts_configure(&bts_cfg_pentium_m);
break;
case 0xF: /* Core2 */
case 0x1C: /* Atom */
default: /* Core2, Atom, ... */
bts_configure(&bts_cfg_core2);
break;
default:
/* sorry, don't know about them */
break;
}
break;
case 0xF:
@ -973,13 +986,17 @@ void ptrace_disable(struct task_struct *child)
clear_tsk_thread_flag(child, TIF_SYSCALL_EMU);
#endif
#ifdef CONFIG_X86_PTRACE_BTS
(void)ds_release_bts(child);
if (child->bts) {
(void)ds_release_bts(child->bts);
kfree(child->bts_buffer);
child->bts_buffer = NULL;
child->thread.debugctlmsr &= ~bts_cfg.debugctl_mask;
if (!child->thread.debugctlmsr)
clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR);
child->thread.debugctlmsr &= ~bts_cfg.debugctl_mask;
if (!child->thread.debugctlmsr)
clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR);
clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS);
clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS);
}
#endif /* CONFIG_X86_PTRACE_BTS */
}
@ -1111,9 +1128,16 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
(child, data, (struct ptrace_bts_config __user *)addr);
break;
case PTRACE_BTS_SIZE:
ret = ds_get_bts_index(child, /* pos = */ NULL);
case PTRACE_BTS_SIZE: {
size_t size;
ret = ds_get_bts_index(child->bts, &size);
if (ret == 0) {
BUG_ON(size != (int) size);
ret = (int) size;
}
break;
}
case PTRACE_BTS_GET:
ret = ptrace_bts_read_record
@ -1121,7 +1145,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
break;
case PTRACE_BTS_CLEAR:
ret = ds_clear_bts(child);
ret = ds_clear_bts(child->bts);
break;
case PTRACE_BTS_DRAIN:

Просмотреть файл

@ -36,7 +36,10 @@ int reboot_force;
static int reboot_cpu = -1;
#endif
/* reboot=b[ios] | s[mp] | t[riple] | k[bd] | e[fi] [, [w]arm | [c]old]
/* This is set by the PCI code if either type 1 or type 2 PCI is detected */
bool port_cf9_safe = false;
/* reboot=b[ios] | s[mp] | t[riple] | k[bd] | e[fi] [, [w]arm | [c]old] | p[ci]
warm Don't set the cold reboot flag
cold Set the cold reboot flag
bios Reboot by jumping through the BIOS (only for X86_32)
@ -45,6 +48,7 @@ static int reboot_cpu = -1;
kbd Use the keyboard controller. cold reset (default)
acpi Use the RESET_REG in the FADT
efi Use efi reset_system runtime service
pci Use the so-called "PCI reset register", CF9
force Avoid anything that could hang.
*/
static int __init reboot_setup(char *str)
@ -79,6 +83,7 @@ static int __init reboot_setup(char *str)
case 'k':
case 't':
case 'e':
case 'p':
reboot_type = *str;
break;
@ -404,12 +409,27 @@ static void native_machine_emergency_restart(void)
reboot_type = BOOT_KBD;
break;
case BOOT_EFI:
if (efi_enabled)
efi.reset_system(reboot_mode ? EFI_RESET_WARM : EFI_RESET_COLD,
efi.reset_system(reboot_mode ?
EFI_RESET_WARM :
EFI_RESET_COLD,
EFI_SUCCESS, 0, NULL);
reboot_type = BOOT_KBD;
break;
case BOOT_CF9:
port_cf9_safe = true;
/* fall through */
case BOOT_CF9_COND:
if (port_cf9_safe) {
u8 cf9 = inb(0xcf9) & ~6;
outb(cf9|2, 0xcf9); /* Request hard reset */
udelay(50);
outb(cf9|6, 0xcf9); /* Actually do the reset */
udelay(50);
}
reboot_type = BOOT_KBD;
break;
}
@ -470,6 +490,11 @@ static void native_machine_restart(char *__unused)
static void native_machine_halt(void)
{
/* stop other cpus and apics */
machine_shutdown();
/* stop this cpu */
stop_this_cpu(NULL);
}
static void native_machine_power_off(void)

Просмотреть файл

@ -583,7 +583,20 @@ static int __init setup_elfcorehdr(char *arg)
early_param("elfcorehdr", setup_elfcorehdr);
#endif
static struct x86_quirks default_x86_quirks __initdata;
static int __init default_update_genapic(void)
{
#ifdef CONFIG_X86_SMP
# if defined(CONFIG_X86_GENERICARCH) || defined(CONFIG_X86_64)
genapic->wakeup_cpu = wakeup_secondary_cpu_via_init;
# endif
#endif
return 0;
}
static struct x86_quirks default_x86_quirks __initdata = {
.update_genapic = default_update_genapic,
};
struct x86_quirks *x86_quirks __initdata = &default_x86_quirks;
@ -1082,7 +1095,7 @@ void __init setup_arch(char **cmdline_p)
ioapic_init_mappings();
/* need to wait for io_apic is mapped */
nr_irqs = probe_nr_irqs();
probe_nr_irqs_gsi();
kvm_guest_init();

Просмотреть файл

@ -140,19 +140,6 @@ void native_send_call_func_ipi(cpumask_t mask)
send_IPI_mask(mask, CALL_FUNCTION_VECTOR);
}
static void stop_this_cpu(void *dummy)
{
local_irq_disable();
/*
* Remove this CPU:
*/
cpu_clear(smp_processor_id(), cpu_online_map);
disable_local_APIC();
if (hlt_works(smp_processor_id()))
for (;;) halt();
for (;;);
}
/*
* this function calls the 'stop' function on all other CPUs in the system.
*/

Просмотреть файл

@ -62,6 +62,7 @@
#include <asm/mtrr.h>
#include <asm/vmi.h>
#include <asm/genapic.h>
#include <asm/setup.h>
#include <linux/mc146818rtc.h>
#include <mach_apic.h>
@ -530,7 +531,7 @@ static void impress_friends(void)
pr_debug("Before bogocount - setting activated=1.\n");
}
static inline void __inquire_remote_apic(int apicid)
void __inquire_remote_apic(int apicid)
{
unsigned i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 };
char *names[] = { "ID", "VERSION", "SPIV" };
@ -569,14 +570,13 @@ static inline void __inquire_remote_apic(int apicid)
}
}
#ifdef WAKE_SECONDARY_VIA_NMI
/*
* Poke the other CPU in the eye via NMI to wake it up. Remember that the normal
* INIT, INIT, STARTUP sequence will reset the chip hard for us, and this
* won't ... remember to clear down the APIC, etc later.
*/
static int __devinit
wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip)
int __devinit
wakeup_secondary_cpu_via_nmi(int logical_apicid, unsigned long start_eip)
{
unsigned long send_status, accept_status = 0;
int maxlvt;
@ -593,7 +593,7 @@ wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip)
* Give the other CPU some time to accept the IPI.
*/
udelay(200);
if (APIC_INTEGRATED(apic_version[phys_apicid])) {
if (APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) {
maxlvt = lapic_get_maxlvt();
if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */
apic_write(APIC_ESR, 0);
@ -608,11 +608,9 @@ wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip)
return (send_status | accept_status);
}
#endif /* WAKE_SECONDARY_VIA_NMI */
#ifdef WAKE_SECONDARY_VIA_INIT
static int __devinit
wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
int __devinit
wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip)
{
unsigned long send_status, accept_status = 0;
int maxlvt, num_starts, j;
@ -731,7 +729,6 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
return (send_status | accept_status);
}
#endif /* WAKE_SECONDARY_VIA_INIT */
struct create_idle {
struct work_struct work;

Просмотреть файл

@ -6,6 +6,7 @@
#include <linux/sched.h>
#include <linux/stacktrace.h>
#include <linux/module.h>
#include <linux/uaccess.h>
#include <asm/stacktrace.h>
static void save_stack_warning(void *data, char *msg)
@ -83,3 +84,66 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
trace->entries[trace->nr_entries++] = ULONG_MAX;
}
EXPORT_SYMBOL_GPL(save_stack_trace_tsk);
/* Userspace stacktrace - based on kernel/trace/trace_sysprof.c */
struct stack_frame {
const void __user *next_fp;
unsigned long ret_addr;
};
static int copy_stack_frame(const void __user *fp, struct stack_frame *frame)
{
int ret;
if (!access_ok(VERIFY_READ, fp, sizeof(*frame)))
return 0;
ret = 1;
pagefault_disable();
if (__copy_from_user_inatomic(frame, fp, sizeof(*frame)))
ret = 0;
pagefault_enable();
return ret;
}
static inline void __save_stack_trace_user(struct stack_trace *trace)
{
const struct pt_regs *regs = task_pt_regs(current);
const void __user *fp = (const void __user *)regs->bp;
if (trace->nr_entries < trace->max_entries)
trace->entries[trace->nr_entries++] = regs->ip;
while (trace->nr_entries < trace->max_entries) {
struct stack_frame frame;
frame.next_fp = NULL;
frame.ret_addr = 0;
if (!copy_stack_frame(fp, &frame))
break;
if ((unsigned long)fp < regs->sp)
break;
if (frame.ret_addr) {
trace->entries[trace->nr_entries++] =
frame.ret_addr;
}
if (fp == frame.next_fp)
break;
fp = frame.next_fp;
}
}
void save_stack_trace_user(struct stack_trace *trace)
{
/*
* Trace user stack if we are not a kernel thread
*/
if (current->mm) {
__save_stack_trace_user(trace);
}
if (trace->nr_entries < trace->max_entries)
trace->entries[trace->nr_entries++] = ULONG_MAX;
}

Просмотреть файл

@ -17,6 +17,9 @@
* want per guest time just set the kernel.vsyscall64 sysctl to 0.
*/
/* Disable profiling for userspace code: */
#define DISABLE_BRANCH_PROFILING
#include <linux/time.h>
#include <linux/init.h>
#include <linux/kernel.h>

Просмотреть файл

@ -39,7 +39,7 @@ static inline int __movsl_is_ok(unsigned long a1, unsigned long a2, unsigned lon
#define __do_strncpy_from_user(dst, src, count, res) \
do { \
int __d0, __d1, __d2; \
might_sleep(); \
might_fault(); \
__asm__ __volatile__( \
" testl %1,%1\n" \
" jz 2f\n" \
@ -126,7 +126,7 @@ EXPORT_SYMBOL(strncpy_from_user);
#define __do_clear_user(addr,size) \
do { \
int __d0; \
might_sleep(); \
might_fault(); \
__asm__ __volatile__( \
"0: rep; stosl\n" \
" movl %2,%0\n" \
@ -155,7 +155,7 @@ do { \
unsigned long
clear_user(void __user *to, unsigned long n)
{
might_sleep();
might_fault();
if (access_ok(VERIFY_WRITE, to, n))
__do_clear_user(to, n);
return n;
@ -197,7 +197,7 @@ long strnlen_user(const char __user *s, long n)
unsigned long mask = -__addr_ok(s);
unsigned long res, tmp;
might_sleep();
might_fault();
__asm__ __volatile__(
" testl %0, %0\n"

Просмотреть файл

@ -15,7 +15,7 @@
#define __do_strncpy_from_user(dst,src,count,res) \
do { \
long __d0, __d1, __d2; \
might_sleep(); \
might_fault(); \
__asm__ __volatile__( \
" testq %1,%1\n" \
" jz 2f\n" \
@ -64,7 +64,7 @@ EXPORT_SYMBOL(strncpy_from_user);
unsigned long __clear_user(void __user *addr, unsigned long size)
{
long __d0;
might_sleep();
might_fault();
/* no memory constraint because it doesn't change any memory gcc knows
about */
asm volatile(

Просмотреть файл

@ -17,6 +17,7 @@
#include <asm/bigsmp/apic.h>
#include <asm/bigsmp/ipi.h>
#include <asm/mach-default/mach_mpparse.h>
#include <asm/mach-default/mach_wakecpu.h>
static int dmi_bigsmp; /* can be set by dmi scanners */

Просмотреть файл

@ -16,6 +16,7 @@
#include <asm/mach-default/mach_apic.h>
#include <asm/mach-default/mach_ipi.h>
#include <asm/mach-default/mach_mpparse.h>
#include <asm/mach-default/mach_wakecpu.h>
/* should be called last. */
static int probe_default(void)

Просмотреть файл

@ -16,7 +16,19 @@
#include <asm/es7000/apic.h>
#include <asm/es7000/ipi.h>
#include <asm/es7000/mpparse.h>
#include <asm/es7000/wakecpu.h>
#include <asm/mach-default/mach_wakecpu.h>
void __init es7000_update_genapic_to_cluster(void)
{
genapic->target_cpus = target_cpus_cluster;
genapic->int_delivery_mode = INT_DELIVERY_MODE_CLUSTER;
genapic->int_dest_mode = INT_DEST_MODE_CLUSTER;
genapic->no_balance_irq = NO_BALANCE_IRQ_CLUSTER;
genapic->init_apic_ldr = init_apic_ldr_cluster;
genapic->cpu_mask_to_apicid = cpu_mask_to_apicid_cluster;
}
static int probe_es7000(void)
{

Просмотреть файл

@ -15,6 +15,7 @@
#include <asm/mpspec.h>
#include <asm/apicdef.h>
#include <asm/genapic.h>
#include <asm/setup.h>
extern struct genapic apic_numaq;
extern struct genapic apic_summit;
@ -57,6 +58,9 @@ static int __init parse_apic(char *arg)
}
}
if (x86_quirks->update_genapic)
x86_quirks->update_genapic();
/* Parsed again by __setup for debug/verbose */
return 0;
}
@ -72,12 +76,15 @@ void __init generic_bigsmp_probe(void)
* - we find more than 8 CPUs in acpi LAPIC listing with xAPIC support
*/
if (!cmdline_apic && genapic == &apic_default)
if (!cmdline_apic && genapic == &apic_default) {
if (apic_bigsmp.probe()) {
genapic = &apic_bigsmp;
if (x86_quirks->update_genapic)
x86_quirks->update_genapic();
printk(KERN_INFO "Overriding APIC driver with %s\n",
genapic->name);
}
}
#endif
}
@ -94,6 +101,9 @@ void __init generic_apic_probe(void)
/* Not visible without early console */
if (!apic_probe[i])
panic("Didn't find an APIC driver");
if (x86_quirks->update_genapic)
x86_quirks->update_genapic();
}
printk(KERN_INFO "Using APIC driver %s\n", genapic->name);
}
@ -108,6 +118,8 @@ int __init mps_oem_check(struct mp_config_table *mpc, char *oem,
if (apic_probe[i]->mps_oem_check(mpc, oem, productid)) {
if (!cmdline_apic) {
genapic = apic_probe[i];
if (x86_quirks->update_genapic)
x86_quirks->update_genapic();
printk(KERN_INFO "Switched to APIC driver `%s'.\n",
genapic->name);
}
@ -124,6 +136,8 @@ int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id)
if (apic_probe[i]->acpi_madt_oem_check(oem_id, oem_table_id)) {
if (!cmdline_apic) {
genapic = apic_probe[i];
if (x86_quirks->update_genapic)
x86_quirks->update_genapic();
printk(KERN_INFO "Switched to APIC driver `%s'.\n",
genapic->name);
}

Просмотреть файл

@ -16,6 +16,7 @@
#include <asm/summit/apic.h>
#include <asm/summit/ipi.h>
#include <asm/summit/mpparse.h>
#include <asm/mach-default/mach_wakecpu.h>
static int probe_summit(void)
{

Просмотреть файл

@ -8,9 +8,8 @@ obj-$(CONFIG_X86_PTDUMP) += dump_pagetables.o
obj-$(CONFIG_HIGHMEM) += highmem_32.o
obj-$(CONFIG_MMIOTRACE_HOOKS) += kmmio.o
obj-$(CONFIG_MMIOTRACE) += mmiotrace.o
mmiotrace-y := pf_in.o mmio-mod.o
mmiotrace-y := kmmio.o pf_in.o mmio-mod.o
obj-$(CONFIG_MMIOTRACE_TEST) += testmmiotrace.o
obj-$(CONFIG_NUMA) += numa_$(BITS).o

Просмотреть файл

@ -53,7 +53,7 @@
static inline int kmmio_fault(struct pt_regs *regs, unsigned long addr)
{
#ifdef CONFIG_MMIOTRACE_HOOKS
#ifdef CONFIG_MMIOTRACE
if (unlikely(is_kmmio_active()))
if (kmmio_handler(regs, addr) == 1)
return -1;
@ -413,6 +413,7 @@ static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs,
unsigned long error_code)
{
unsigned long flags = oops_begin();
int sig = SIGKILL;
struct task_struct *tsk;
printk(KERN_ALERT "%s: Corrupted page table at address %lx\n",
@ -423,8 +424,8 @@ static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs,
tsk->thread.trap_no = 14;
tsk->thread.error_code = error_code;
if (__die("Bad pagetable", regs, error_code))
regs = NULL;
oops_end(flags, regs, SIGKILL);
sig = 0;
oops_end(flags, regs, sig);
}
#endif
@ -590,6 +591,7 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
int fault;
#ifdef CONFIG_X86_64
unsigned long flags;
int sig;
#endif
tsk = current;
@ -849,11 +851,12 @@ no_context:
bust_spinlocks(0);
do_exit(SIGKILL);
#else
sig = SIGKILL;
if (__die("Oops", regs, error_code))
regs = NULL;
sig = 0;
/* Executive summary in case the body of the oops scrolled away */
printk(KERN_EMERG "CR2: %016lx\n", address);
oops_end(flags, regs, SIGKILL);
oops_end(flags, regs, sig);
#endif
/*

Просмотреть файл

@ -173,7 +173,7 @@ static int pci_conf2_write(unsigned int seg, unsigned int bus,
#undef PCI_CONF2_ADDRESS
static struct pci_raw_ops pci_direct_conf2 = {
struct pci_raw_ops pci_direct_conf2 = {
.read = pci_conf2_read,
.write = pci_conf2_write,
};
@ -289,6 +289,7 @@ int __init pci_direct_probe(void)
if (pci_check_type1()) {
raw_pci_ops = &pci_direct_conf1;
port_cf9_safe = true;
return 1;
}
release_resource(region);
@ -305,6 +306,7 @@ int __init pci_direct_probe(void)
if (pci_check_type2()) {
raw_pci_ops = &pci_direct_conf2;
port_cf9_safe = true;
return 2;
}

Просмотреть файл

@ -96,6 +96,7 @@ extern struct pci_raw_ops *raw_pci_ops;
extern struct pci_raw_ops *raw_pci_ext_ops;
extern struct pci_raw_ops pci_direct_conf1;
extern bool port_cf9_safe;
/* arch_initcall level */
extern int pci_direct_probe(void);

Просмотреть файл

@ -9,6 +9,9 @@
* Also alternative() doesn't work.
*/
/* Disable profiling for userspace code: */
#define DISABLE_BRANCH_PROFILING
#include <linux/kernel.h>
#include <linux/posix-timers.h>
#include <linux/time.h>

Просмотреть файл

@ -47,6 +47,7 @@ config BLK_DEV_IO_TRACE
depends on SYSFS
select RELAY
select DEBUG_FS
select TRACEPOINTS
help
Say Y here if you want to be able to trace the block layer actions
on a given queue. Tracing allows you to see any traffic happening

Просмотреть файл

@ -28,9 +28,23 @@
#include <linux/task_io_accounting_ops.h>
#include <linux/blktrace_api.h>
#include <linux/fault-inject.h>
#include <trace/block.h>
#include "blk.h"
DEFINE_TRACE(block_plug);
DEFINE_TRACE(block_unplug_io);
DEFINE_TRACE(block_unplug_timer);
DEFINE_TRACE(block_getrq);
DEFINE_TRACE(block_sleeprq);
DEFINE_TRACE(block_rq_requeue);
DEFINE_TRACE(block_bio_backmerge);
DEFINE_TRACE(block_bio_frontmerge);
DEFINE_TRACE(block_bio_queue);
DEFINE_TRACE(block_rq_complete);
DEFINE_TRACE(block_remap); /* Also used in drivers/md/dm.c */
EXPORT_TRACEPOINT_SYMBOL_GPL(block_remap);
static int __make_request(struct request_queue *q, struct bio *bio);
/*
@ -205,7 +219,7 @@ void blk_plug_device(struct request_queue *q)
if (!queue_flag_test_and_set(QUEUE_FLAG_PLUGGED, q)) {
mod_timer(&q->unplug_timer, jiffies + q->unplug_delay);
blk_add_trace_generic(q, NULL, 0, BLK_TA_PLUG);
trace_block_plug(q);
}
}
EXPORT_SYMBOL(blk_plug_device);
@ -292,9 +306,7 @@ void blk_unplug_work(struct work_struct *work)
struct request_queue *q =
container_of(work, struct request_queue, unplug_work);
blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_IO, NULL,
q->rq.count[READ] + q->rq.count[WRITE]);
trace_block_unplug_io(q);
q->unplug_fn(q);
}
@ -302,9 +314,7 @@ void blk_unplug_timeout(unsigned long data)
{
struct request_queue *q = (struct request_queue *)data;
blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_TIMER, NULL,
q->rq.count[READ] + q->rq.count[WRITE]);
trace_block_unplug_timer(q);
kblockd_schedule_work(q, &q->unplug_work);
}
@ -314,9 +324,7 @@ void blk_unplug(struct request_queue *q)
* devices don't necessarily have an ->unplug_fn defined
*/
if (q->unplug_fn) {
blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_IO, NULL,
q->rq.count[READ] + q->rq.count[WRITE]);
trace_block_unplug_io(q);
q->unplug_fn(q);
}
}
@ -822,7 +830,7 @@ rq_starved:
if (ioc_batching(q, ioc))
ioc->nr_batch_requests--;
blk_add_trace_generic(q, bio, rw, BLK_TA_GETRQ);
trace_block_getrq(q, bio, rw);
out:
return rq;
}
@ -848,7 +856,7 @@ static struct request *get_request_wait(struct request_queue *q, int rw_flags,
prepare_to_wait_exclusive(&rl->wait[rw], &wait,
TASK_UNINTERRUPTIBLE);
blk_add_trace_generic(q, bio, rw, BLK_TA_SLEEPRQ);
trace_block_sleeprq(q, bio, rw);
__generic_unplug_device(q);
spin_unlock_irq(q->queue_lock);
@ -928,7 +936,7 @@ void blk_requeue_request(struct request_queue *q, struct request *rq)
{
blk_delete_timer(rq);
blk_clear_rq_complete(rq);
blk_add_trace_rq(q, rq, BLK_TA_REQUEUE);
trace_block_rq_requeue(q, rq);
if (blk_rq_tagged(rq))
blk_queue_end_tag(q, rq);
@ -1167,7 +1175,7 @@ static int __make_request(struct request_queue *q, struct bio *bio)
if (!ll_back_merge_fn(q, req, bio))
break;
blk_add_trace_bio(q, bio, BLK_TA_BACKMERGE);
trace_block_bio_backmerge(q, bio);
req->biotail->bi_next = bio;
req->biotail = bio;
@ -1186,7 +1194,7 @@ static int __make_request(struct request_queue *q, struct bio *bio)
if (!ll_front_merge_fn(q, req, bio))
break;
blk_add_trace_bio(q, bio, BLK_TA_FRONTMERGE);
trace_block_bio_frontmerge(q, bio);
bio->bi_next = req->bio;
req->bio = bio;
@ -1269,7 +1277,7 @@ static inline void blk_partition_remap(struct bio *bio)
bio->bi_sector += p->start_sect;
bio->bi_bdev = bdev->bd_contains;
blk_add_trace_remap(bdev_get_queue(bio->bi_bdev), bio,
trace_block_remap(bdev_get_queue(bio->bi_bdev), bio,
bdev->bd_dev, bio->bi_sector,
bio->bi_sector - p->start_sect);
}
@ -1441,10 +1449,10 @@ end_io:
goto end_io;
if (old_sector != -1)
blk_add_trace_remap(q, bio, old_dev, bio->bi_sector,
trace_block_remap(q, bio, old_dev, bio->bi_sector,
old_sector);
blk_add_trace_bio(q, bio, BLK_TA_QUEUE);
trace_block_bio_queue(q, bio);
old_sector = bio->bi_sector;
old_dev = bio->bi_bdev->bd_dev;
@ -1678,7 +1686,7 @@ static int __end_that_request_first(struct request *req, int error,
int total_bytes, bio_nbytes, next_idx = 0;
struct bio *bio;
blk_add_trace_rq(req->q, req, BLK_TA_COMPLETE);
trace_block_rq_complete(req->q, req);
/*
* for a REQ_TYPE_BLOCK_PC request, we want to carry any eventual

Некоторые файлы не были показаны из-за слишком большого количества измененных файлов Показать больше