From 8b7d89d02ef3c6a7c73d6596f28cea7632850af4 Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Mon, 12 May 2008 21:20:56 +0200 Subject: [PATCH 01/30] x86: mmiotrace - trace memory mapped IO Mmiotrace is a tool for trapping memory mapped IO (MMIO) accesses within the kernel. It is used for debugging and especially for reverse engineering evil binary drivers. Mmiotrace works by wrapping the ioremap family of kernel functions and marking the returned pages as not present. Access to the IO memory triggers a page fault, which will be handled by mmiotrace's custom page fault handler. This will single-step the faulted instruction with the MMIO page marked as present. Access logs are directed to user space via relay and debug_fs. This page fault approach is necessary, because binary drivers have readl/writel etc. calls inlined and therefore extremely difficult to trap with with e.g. kprobes. This patch depends on the custom page fault handlers patch. Signed-off-by: Pekka Paalanen Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/Kconfig.debug | 27 ++ arch/x86/kernel/Makefile | 2 + arch/x86/kernel/init_task.c | 1 + arch/x86/kernel/mmiotrace/Makefile | 4 + arch/x86/kernel/mmiotrace/kmmio.c | 391 ++++++++++++++++ arch/x86/kernel/mmiotrace/kmmio.h | 58 +++ arch/x86/kernel/mmiotrace/mmio-mod.c | 527 ++++++++++++++++++++++ arch/x86/kernel/mmiotrace/pf_in.c | 489 ++++++++++++++++++++ arch/x86/kernel/mmiotrace/pf_in.h | 39 ++ arch/x86/kernel/mmiotrace/testmmiotrace.c | 77 ++++ include/linux/mmiotrace.h | 62 +++ 11 files changed, 1677 insertions(+) create mode 100644 arch/x86/kernel/mmiotrace/Makefile create mode 100644 arch/x86/kernel/mmiotrace/kmmio.c create mode 100644 arch/x86/kernel/mmiotrace/kmmio.h create mode 100644 arch/x86/kernel/mmiotrace/mmio-mod.c create mode 100644 arch/x86/kernel/mmiotrace/pf_in.c create mode 100644 arch/x86/kernel/mmiotrace/pf_in.h create mode 100644 arch/x86/kernel/mmiotrace/testmmiotrace.c create mode 100644 include/linux/mmiotrace.h diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index 9431a8399844..7c6496e2225e 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -176,6 +176,33 @@ config PAGE_FAULT_HANDLERS register a function that is called on every page fault. Custom handlers are used by some debugging and reverse engineering tools. +config MMIOTRACE + tristate "Memory mapped IO tracing" + depends on DEBUG_KERNEL && PAGE_FAULT_HANDLERS && RELAY && DEBUG_FS + default n + help + This will build a kernel module called mmiotrace. + + Mmiotrace traces Memory Mapped I/O access and is meant for debugging + and reverse engineering. The kernel module offers wrapped + versions of the ioremap family of functions. The driver to be traced + must be modified to call these wrappers. A user space program is + required to collect the MMIO data. + + See http://nouveau.freedesktop.org/wiki/MmioTrace + If you are not helping to develop drivers, say N. + +config MMIOTRACE_TEST + tristate "Test module for mmiotrace" + depends on MMIOTRACE && m + default n + help + This is a dumb module for testing mmiotrace. It is very dangerous + as it will write garbage to IO memory starting at a given address. + However, it should be safe to use on e.g. unused portion of VRAM. + + Say N, unless you absolutely know what you are doing. + # # IO delay types: # diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 739d49acd2f1..a51ac153685e 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -79,6 +79,8 @@ obj-$(CONFIG_KGDB) += kgdb.o obj-$(CONFIG_VM86) += vm86_32.o obj-$(CONFIG_EARLY_PRINTK) += early_printk.o +obj-$(CONFIG_MMIOTRACE) += mmiotrace/ + obj-$(CONFIG_HPET_TIMER) += hpet.o obj-$(CONFIG_K8_NB) += k8.o diff --git a/arch/x86/kernel/init_task.c b/arch/x86/kernel/init_task.c index a4f93b4120c1..027a5b6a12b2 100644 --- a/arch/x86/kernel/init_task.c +++ b/arch/x86/kernel/init_task.c @@ -15,6 +15,7 @@ static struct signal_struct init_signals = INIT_SIGNALS(init_signals); static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); struct mm_struct init_mm = INIT_MM(init_mm); EXPORT_UNUSED_SYMBOL(init_mm); /* will be removed in 2.6.26 */ +EXPORT_SYMBOL_GPL(init_mm); /* * Initial thread structure. diff --git a/arch/x86/kernel/mmiotrace/Makefile b/arch/x86/kernel/mmiotrace/Makefile new file mode 100644 index 000000000000..d6905f7f981b --- /dev/null +++ b/arch/x86/kernel/mmiotrace/Makefile @@ -0,0 +1,4 @@ +obj-$(CONFIG_MMIOTRACE) += mmiotrace.o +mmiotrace-objs := pf_in.o kmmio.o mmio-mod.o + +obj-$(CONFIG_MMIOTRACE_TEST) += testmmiotrace.o diff --git a/arch/x86/kernel/mmiotrace/kmmio.c b/arch/x86/kernel/mmiotrace/kmmio.c new file mode 100644 index 000000000000..8ba48f9c91b4 --- /dev/null +++ b/arch/x86/kernel/mmiotrace/kmmio.c @@ -0,0 +1,391 @@ +/* Support for MMIO probes. + * Benfit many code from kprobes + * (C) 2002 Louis Zhuang . + * 2007 Alexander Eichner + * 2008 Pekka Paalanen + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "kmmio.h" + +#define KMMIO_HASH_BITS 6 +#define KMMIO_TABLE_SIZE (1 << KMMIO_HASH_BITS) +#define KMMIO_PAGE_HASH_BITS 4 +#define KMMIO_PAGE_TABLE_SIZE (1 << KMMIO_PAGE_HASH_BITS) + +struct kmmio_context { + struct kmmio_fault_page *fpage; + struct kmmio_probe *probe; + unsigned long saved_flags; + int active; +}; + +static int kmmio_page_fault(struct pt_regs *regs, unsigned long error_code, + unsigned long address); +static int kmmio_die_notifier(struct notifier_block *nb, unsigned long val, + void *args); + +static DEFINE_SPINLOCK(kmmio_lock); + +/* These are protected by kmmio_lock */ +unsigned int kmmio_count; +static unsigned int handler_registered; +static struct list_head kmmio_page_table[KMMIO_PAGE_TABLE_SIZE]; +static LIST_HEAD(kmmio_probes); + +static struct kmmio_context kmmio_ctx[NR_CPUS]; + +static struct pf_handler kmmio_pf_hook = { + .handler = kmmio_page_fault +}; + +static struct notifier_block nb_die = { + .notifier_call = kmmio_die_notifier +}; + +int init_kmmio(void) +{ + int i; + for (i = 0; i < KMMIO_PAGE_TABLE_SIZE; i++) + INIT_LIST_HEAD(&kmmio_page_table[i]); + + register_die_notifier(&nb_die); + return 0; +} + +void cleanup_kmmio(void) +{ + /* + * Assume the following have been already cleaned by calling + * unregister_kmmio_probe() appropriately: + * kmmio_page_table, kmmio_probes + */ + if (handler_registered) { + unregister_page_fault_handler(&kmmio_pf_hook); + synchronize_rcu(); + } + unregister_die_notifier(&nb_die); +} + +/* + * this is basically a dynamic stabbing problem: + * Could use the existing prio tree code or + * Possible better implementations: + * The Interval Skip List: A Data Structure for Finding All Intervals That + * Overlap a Point (might be simple) + * Space Efficient Dynamic Stabbing with Fast Queries - Mikkel Thorup + */ +/* Get the kmmio at this addr (if any). You must be holding kmmio_lock. */ +static struct kmmio_probe *get_kmmio_probe(unsigned long addr) +{ + struct kmmio_probe *p; + list_for_each_entry(p, &kmmio_probes, list) { + if (addr >= p->addr && addr <= (p->addr + p->len)) + return p; + } + return NULL; +} + +static struct kmmio_fault_page *get_kmmio_fault_page(unsigned long page) +{ + struct list_head *head, *tmp; + + page &= PAGE_MASK; + head = &kmmio_page_table[hash_long(page, KMMIO_PAGE_HASH_BITS)]; + list_for_each(tmp, head) { + struct kmmio_fault_page *p + = list_entry(tmp, struct kmmio_fault_page, list); + if (p->page == page) + return p; + } + + return NULL; +} + +static void arm_kmmio_fault_page(unsigned long page, int *large) +{ + unsigned long address = page & PAGE_MASK; + pgd_t *pgd = pgd_offset_k(address); + pud_t *pud = pud_offset(pgd, address); + pmd_t *pmd = pmd_offset(pud, address); + pte_t *pte = pte_offset_kernel(pmd, address); + + if (pmd_large(*pmd)) { + set_pmd(pmd, __pmd(pmd_val(*pmd) & ~_PAGE_PRESENT)); + if (large) + *large = 1; + } else { + set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_PRESENT)); + } + + __flush_tlb_one(page); +} + +static void disarm_kmmio_fault_page(unsigned long page, int *large) +{ + unsigned long address = page & PAGE_MASK; + pgd_t *pgd = pgd_offset_k(address); + pud_t *pud = pud_offset(pgd, address); + pmd_t *pmd = pmd_offset(pud, address); + pte_t *pte = pte_offset_kernel(pmd, address); + + if (large && *large) { + set_pmd(pmd, __pmd(pmd_val(*pmd) | _PAGE_PRESENT)); + *large = 0; + } else { + set_pte(pte, __pte(pte_val(*pte) | _PAGE_PRESENT)); + } + + __flush_tlb_one(page); +} + +/* + * Interrupts are disabled on entry as trap3 is an interrupt gate + * and they remain disabled thorough out this function. + */ +static int kmmio_handler(struct pt_regs *regs, unsigned long addr) +{ + struct kmmio_context *ctx; + int cpu; + + /* + * Preemption is now disabled to prevent process switch during + * single stepping. We can only handle one active kmmio trace + * per cpu, so ensure that we finish it before something else + * gets to run. + * + * XXX what if an interrupt occurs between returning from + * do_page_fault() and entering the single-step exception handler? + * And that interrupt triggers a kmmio trap? + */ + preempt_disable(); + cpu = smp_processor_id(); + ctx = &kmmio_ctx[cpu]; + + /* interrupts disabled and CPU-local data => atomicity guaranteed. */ + if (ctx->active) { + /* + * This avoids a deadlock with kmmio_lock. + * If this page fault really was due to kmmio trap, + * all hell breaks loose. + */ + printk(KERN_EMERG "mmiotrace: recursive probe hit on CPU %d, " + "for address %lu. Ignoring.\n", + cpu, addr); + goto no_kmmio; + } + ctx->active++; + + /* + * Acquire the kmmio lock to prevent changes affecting + * get_kmmio_fault_page() and get_kmmio_probe(), since we save their + * returned pointers. + * The lock is released in post_kmmio_handler(). + * XXX: could/should get_kmmio_*() be using RCU instead of spinlock? + */ + spin_lock(&kmmio_lock); + + ctx->fpage = get_kmmio_fault_page(addr); + if (!ctx->fpage) { + /* this page fault is not caused by kmmio */ + goto no_kmmio_locked; + } + + ctx->probe = get_kmmio_probe(addr); + ctx->saved_flags = (regs->flags & (TF_MASK|IF_MASK)); + + if (ctx->probe && ctx->probe->pre_handler) + ctx->probe->pre_handler(ctx->probe, regs, addr); + + regs->flags |= TF_MASK; + regs->flags &= ~IF_MASK; + + /* We hold lock, now we set present bit in PTE and single step. */ + disarm_kmmio_fault_page(ctx->fpage->page, NULL); + + return 1; + +no_kmmio_locked: + spin_unlock(&kmmio_lock); + ctx->active--; +no_kmmio: + preempt_enable_no_resched(); + /* page fault not handled by kmmio */ + return 0; +} + +/* + * Interrupts are disabled on entry as trap1 is an interrupt gate + * and they remain disabled thorough out this function. + * And we hold kmmio lock. + */ +static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs) +{ + int cpu = smp_processor_id(); + struct kmmio_context *ctx = &kmmio_ctx[cpu]; + + if (!ctx->active) + return 0; + + if (ctx->probe && ctx->probe->post_handler) + ctx->probe->post_handler(ctx->probe, condition, regs); + + arm_kmmio_fault_page(ctx->fpage->page, NULL); + + regs->flags &= ~TF_MASK; + regs->flags |= ctx->saved_flags; + + /* These were acquired in kmmio_handler(). */ + ctx->active--; + spin_unlock(&kmmio_lock); + preempt_enable_no_resched(); + + /* + * if somebody else is singlestepping across a probe point, flags + * will have TF set, in which case, continue the remaining processing + * of do_debug, as if this is not a probe hit. + */ + if (regs->flags & TF_MASK) + return 0; + + return 1; +} + +static int add_kmmio_fault_page(unsigned long page) +{ + struct kmmio_fault_page *f; + + page &= PAGE_MASK; + f = get_kmmio_fault_page(page); + if (f) { + f->count++; + return 0; + } + + f = kmalloc(sizeof(*f), GFP_ATOMIC); + if (!f) + return -1; + + f->count = 1; + f->page = page; + list_add(&f->list, + &kmmio_page_table[hash_long(f->page, KMMIO_PAGE_HASH_BITS)]); + + arm_kmmio_fault_page(f->page, NULL); + + return 0; +} + +static void release_kmmio_fault_page(unsigned long page) +{ + struct kmmio_fault_page *f; + + page &= PAGE_MASK; + f = get_kmmio_fault_page(page); + if (!f) + return; + + f->count--; + if (!f->count) { + disarm_kmmio_fault_page(f->page, NULL); + list_del(&f->list); + } +} + +int register_kmmio_probe(struct kmmio_probe *p) +{ + int ret = 0; + unsigned long size = 0; + + spin_lock_irq(&kmmio_lock); + kmmio_count++; + if (get_kmmio_probe(p->addr)) { + ret = -EEXIST; + goto out; + } + list_add(&p->list, &kmmio_probes); + /*printk("adding fault pages...\n");*/ + while (size < p->len) { + if (add_kmmio_fault_page(p->addr + size)) + printk(KERN_ERR "mmio: Unable to set page fault.\n"); + size += PAGE_SIZE; + } + + if (!handler_registered) { + register_page_fault_handler(&kmmio_pf_hook); + handler_registered++; + } + +out: + spin_unlock_irq(&kmmio_lock); + /* + * XXX: What should I do here? + * Here was a call to global_flush_tlb(), but it does not exist + * anymore. + */ + return ret; +} + +void unregister_kmmio_probe(struct kmmio_probe *p) +{ + unsigned long size = 0; + + spin_lock_irq(&kmmio_lock); + while (size < p->len) { + release_kmmio_fault_page(p->addr + size); + size += PAGE_SIZE; + } + list_del(&p->list); + kmmio_count--; + spin_unlock_irq(&kmmio_lock); +} + +/* + * According to 2.6.20, mainly x86_64 arch: + * This is being called from do_page_fault(), via the page fault notifier + * chain. The chain is called for both user space faults and kernel space + * faults (address >= TASK_SIZE64), except not on faults serviced by + * vmalloc_fault(). + * + * We may be in an interrupt or a critical section. Also prefecthing may + * trigger a page fault. We may be in the middle of process switch. + * The page fault hook functionality has put us inside RCU read lock. + * + * Local interrupts are disabled, so preemption cannot happen. + * Do not enable interrupts, do not sleep, and watch out for other CPUs. + */ +static int kmmio_page_fault(struct pt_regs *regs, unsigned long error_code, + unsigned long address) +{ + if (is_kmmio_active()) + if (kmmio_handler(regs, address) == 1) + return -1; + return 0; +} + +static int kmmio_die_notifier(struct notifier_block *nb, unsigned long val, + void *args) +{ + struct die_args *arg = args; + + if (val == DIE_DEBUG) + if (post_kmmio_handler(arg->err, arg->regs) == 1) + return NOTIFY_STOP; + + return NOTIFY_DONE; +} diff --git a/arch/x86/kernel/mmiotrace/kmmio.h b/arch/x86/kernel/mmiotrace/kmmio.h new file mode 100644 index 000000000000..85b7f68a3b8a --- /dev/null +++ b/arch/x86/kernel/mmiotrace/kmmio.h @@ -0,0 +1,58 @@ +#ifndef _LINUX_KMMIO_H +#define _LINUX_KMMIO_H + +#include +#include +#include +#include +#include +#include +#include + +struct kmmio_probe; +struct kmmio_fault_page; +struct pt_regs; + +typedef void (*kmmio_pre_handler_t)(struct kmmio_probe *, + struct pt_regs *, unsigned long addr); +typedef void (*kmmio_post_handler_t)(struct kmmio_probe *, + unsigned long condition, struct pt_regs *); + +struct kmmio_probe { + struct list_head list; + + /* start location of the probe point */ + unsigned long addr; + + /* length of the probe region */ + unsigned long len; + + /* Called before addr is executed. */ + kmmio_pre_handler_t pre_handler; + + /* Called after addr is executed, unless... */ + kmmio_post_handler_t post_handler; +}; + +struct kmmio_fault_page { + struct list_head list; + + /* location of the fault page */ + unsigned long page; + + int count; +}; + +/* kmmio is active by some kmmio_probes? */ +static inline int is_kmmio_active(void) +{ + extern unsigned int kmmio_count; + return kmmio_count; +} + +int init_kmmio(void); +void cleanup_kmmio(void); +int register_kmmio_probe(struct kmmio_probe *p); +void unregister_kmmio_probe(struct kmmio_probe *p); + +#endif /* _LINUX_KMMIO_H */ diff --git a/arch/x86/kernel/mmiotrace/mmio-mod.c b/arch/x86/kernel/mmiotrace/mmio-mod.c new file mode 100644 index 000000000000..73561fe85f03 --- /dev/null +++ b/arch/x86/kernel/mmiotrace/mmio-mod.c @@ -0,0 +1,527 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2005 + * Jeff Muizelaar, 2006, 2007 + * Pekka Paalanen, 2008 + * + * Derived from the read-mod example from relay-examples by Tom Zanussi. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include /* for ISA_START_ADDRESS */ + +#include "kmmio.h" +#include "pf_in.h" + +/* This app's relay channel files will appear in /debug/mmio-trace */ +#define APP_DIR "mmio-trace" +/* the marker injection file in /proc */ +#define MARKER_FILE "mmio-marker" + +#define MODULE_NAME "mmiotrace" + +struct trap_reason { + unsigned long addr; + unsigned long ip; + enum reason_type type; + int active_traces; +}; + +static struct trap_reason pf_reason[NR_CPUS]; +static struct mm_io_header_rw cpu_trace[NR_CPUS]; + +static struct file_operations mmio_fops = { + .owner = THIS_MODULE, +}; + +static const size_t subbuf_size = 256*1024; +static struct rchan *chan; +static struct dentry *dir; +static int suspended; /* XXX should this be per cpu? */ +static struct proc_dir_entry *proc_marker_file; + +/* module parameters */ +static unsigned int n_subbufs = 32*4; +static unsigned long filter_offset; +static int nommiotrace; +static int ISA_trace; +static int trace_pc; + +module_param(n_subbufs, uint, 0); +module_param(filter_offset, ulong, 0); +module_param(nommiotrace, bool, 0); +module_param(ISA_trace, bool, 0); +module_param(trace_pc, bool, 0); + +MODULE_PARM_DESC(n_subbufs, "Number of 256kB buffers, default 128."); +MODULE_PARM_DESC(filter_offset, "Start address of traced mappings."); +MODULE_PARM_DESC(nommiotrace, "Disable actual MMIO tracing."); +MODULE_PARM_DESC(ISA_trace, "Do not exclude the low ISA range."); +MODULE_PARM_DESC(trace_pc, "Record address of faulting instructions."); + +static void record_timestamp(struct mm_io_header *header) +{ + struct timespec now; + + getnstimeofday(&now); + header->sec = now.tv_sec; + header->nsec = now.tv_nsec; +} + +/* + * Write callback for the /proc entry: + * Read a marker and write it to the mmio trace log + */ +static int write_marker(struct file *file, const char __user *buffer, + unsigned long count, void *data) +{ + char *event = NULL; + struct mm_io_header *headp; + int len = (count > 65535) ? 65535 : count; + + event = kzalloc(sizeof(*headp) + len, GFP_KERNEL); + if (!event) + return -ENOMEM; + + headp = (struct mm_io_header *)event; + headp->type = MMIO_MAGIC | (MMIO_MARKER << MMIO_OPCODE_SHIFT); + headp->data_len = len; + record_timestamp(headp); + + if (copy_from_user(event + sizeof(*headp), buffer, len)) { + kfree(event); + return -EFAULT; + } + + relay_write(chan, event, sizeof(*headp) + len); + kfree(event); + return len; +} + +static void print_pte(unsigned long address) +{ + pgd_t *pgd = pgd_offset_k(address); + pud_t *pud = pud_offset(pgd, address); + pmd_t *pmd = pmd_offset(pud, address); + if (pmd_large(*pmd)) { + printk(KERN_EMERG MODULE_NAME ": 4MB pages are not " + "currently supported: %lx\n", + address); + BUG(); + } + printk(KERN_DEBUG MODULE_NAME ": pte for 0x%lx: 0x%lx 0x%lx\n", + address, + pte_val(*pte_offset_kernel(pmd, address)), + pte_val(*pte_offset_kernel(pmd, address)) & _PAGE_PRESENT); +} + +/* + * For some reason the pre/post pairs have been called in an + * unmatched order. Report and die. + */ +static void die_kmmio_nesting_error(struct pt_regs *regs, unsigned long addr) +{ + const unsigned long cpu = smp_processor_id(); + printk(KERN_EMERG MODULE_NAME ": unexpected fault for address: %lx, " + "last fault for address: %lx\n", + addr, pf_reason[cpu].addr); + print_pte(addr); +#ifdef __i386__ + print_symbol(KERN_EMERG "faulting EIP is at %s\n", regs->ip); + print_symbol(KERN_EMERG "last faulting EIP was at %s\n", + pf_reason[cpu].ip); + printk(KERN_EMERG + "eax: %08lx ebx: %08lx ecx: %08lx edx: %08lx\n", + regs->ax, regs->bx, regs->cx, regs->dx); + printk(KERN_EMERG + "esi: %08lx edi: %08lx ebp: %08lx esp: %08lx\n", + regs->si, regs->di, regs->bp, regs->sp); +#else + print_symbol(KERN_EMERG "faulting RIP is at %s\n", regs->ip); + print_symbol(KERN_EMERG "last faulting RIP was at %s\n", + pf_reason[cpu].ip); + printk(KERN_EMERG "rax: %016lx rcx: %016lx rdx: %016lx\n", + regs->ax, regs->cx, regs->dx); + printk(KERN_EMERG "rsi: %016lx rdi: %016lx " + "rbp: %016lx rsp: %016lx\n", + regs->si, regs->di, regs->bp, regs->sp); +#endif + BUG(); +} + +static void pre(struct kmmio_probe *p, struct pt_regs *regs, + unsigned long addr) +{ + const unsigned long cpu = smp_processor_id(); + const unsigned long instptr = instruction_pointer(regs); + const enum reason_type type = get_ins_type(instptr); + + /* it doesn't make sense to have more than one active trace per cpu */ + if (pf_reason[cpu].active_traces) + die_kmmio_nesting_error(regs, addr); + else + pf_reason[cpu].active_traces++; + + pf_reason[cpu].type = type; + pf_reason[cpu].addr = addr; + pf_reason[cpu].ip = instptr; + + cpu_trace[cpu].header.type = MMIO_MAGIC; + cpu_trace[cpu].header.pid = 0; + cpu_trace[cpu].header.data_len = sizeof(struct mm_io_rw); + cpu_trace[cpu].rw.address = addr; + + /* + * Only record the program counter when requested. + * It may taint clean-room reverse engineering. + */ + if (trace_pc) + cpu_trace[cpu].rw.pc = instptr; + else + cpu_trace[cpu].rw.pc = 0; + + record_timestamp(&cpu_trace[cpu].header); + + switch (type) { + case REG_READ: + cpu_trace[cpu].header.type |= + (MMIO_READ << MMIO_OPCODE_SHIFT) | + (get_ins_mem_width(instptr) << MMIO_WIDTH_SHIFT); + break; + case REG_WRITE: + cpu_trace[cpu].header.type |= + (MMIO_WRITE << MMIO_OPCODE_SHIFT) | + (get_ins_mem_width(instptr) << MMIO_WIDTH_SHIFT); + cpu_trace[cpu].rw.value = get_ins_reg_val(instptr, regs); + break; + case IMM_WRITE: + cpu_trace[cpu].header.type |= + (MMIO_WRITE << MMIO_OPCODE_SHIFT) | + (get_ins_mem_width(instptr) << MMIO_WIDTH_SHIFT); + cpu_trace[cpu].rw.value = get_ins_imm_val(instptr); + break; + default: + { + unsigned char *ip = (unsigned char *)instptr; + cpu_trace[cpu].header.type |= + (MMIO_UNKNOWN_OP << MMIO_OPCODE_SHIFT); + cpu_trace[cpu].rw.value = (*ip) << 16 | + *(ip + 1) << 8 | + *(ip + 2); + } + } +} + +static void post(struct kmmio_probe *p, unsigned long condition, + struct pt_regs *regs) +{ + const unsigned long cpu = smp_processor_id(); + + /* this should always return the active_trace count to 0 */ + pf_reason[cpu].active_traces--; + if (pf_reason[cpu].active_traces) { + printk(KERN_EMERG MODULE_NAME ": unexpected post handler"); + BUG(); + } + + switch (pf_reason[cpu].type) { + case REG_READ: + cpu_trace[cpu].rw.value = get_ins_reg_val(pf_reason[cpu].ip, + regs); + break; + default: + break; + } + relay_write(chan, &cpu_trace[cpu], sizeof(struct mm_io_header_rw)); +} + +/* + * subbuf_start() relay callback. + * + * Defined so that we know when events are dropped due to the buffer-full + * condition. + */ +static int subbuf_start_handler(struct rchan_buf *buf, void *subbuf, + void *prev_subbuf, size_t prev_padding) +{ + if (relay_buf_full(buf)) { + if (!suspended) { + suspended = 1; + printk(KERN_ERR MODULE_NAME + ": cpu %d buffer full!!!\n", + smp_processor_id()); + } + return 0; + } else if (suspended) { + suspended = 0; + printk(KERN_ERR MODULE_NAME + ": cpu %d buffer no longer full.\n", + smp_processor_id()); + } + + return 1; +} + +/* file_create() callback. Creates relay file in debugfs. */ +static struct dentry *create_buf_file_handler(const char *filename, + struct dentry *parent, + int mode, + struct rchan_buf *buf, + int *is_global) +{ + struct dentry *buf_file; + + mmio_fops.read = relay_file_operations.read; + mmio_fops.open = relay_file_operations.open; + mmio_fops.poll = relay_file_operations.poll; + mmio_fops.mmap = relay_file_operations.mmap; + mmio_fops.release = relay_file_operations.release; + mmio_fops.splice_read = relay_file_operations.splice_read; + + buf_file = debugfs_create_file(filename, mode, parent, buf, + &mmio_fops); + + return buf_file; +} + +/* file_remove() default callback. Removes relay file in debugfs. */ +static int remove_buf_file_handler(struct dentry *dentry) +{ + debugfs_remove(dentry); + return 0; +} + +static struct rchan_callbacks relay_callbacks = { + .subbuf_start = subbuf_start_handler, + .create_buf_file = create_buf_file_handler, + .remove_buf_file = remove_buf_file_handler, +}; + +/* + * create_channel - creates channel /debug/APP_DIR/cpuXXX + * Returns channel on success, NULL otherwise + */ +static struct rchan *create_channel(unsigned size, unsigned n) +{ + return relay_open("cpu", dir, size, n, &relay_callbacks, NULL); +} + +/* destroy_channel - destroys channel /debug/APP_DIR/cpuXXX */ +static void destroy_channel(void) +{ + if (chan) { + relay_close(chan); + chan = NULL; + } +} + +struct remap_trace { + struct list_head list; + struct kmmio_probe probe; +}; +static LIST_HEAD(trace_list); +static DEFINE_SPINLOCK(trace_list_lock); + +static void do_ioremap_trace_core(unsigned long offset, unsigned long size, + void __iomem *addr) +{ + struct remap_trace *trace = kmalloc(sizeof(*trace), GFP_KERNEL); + struct mm_io_header_map event = { + .header = { + .type = MMIO_MAGIC | + (MMIO_PROBE << MMIO_OPCODE_SHIFT), + .sec = 0, + .nsec = 0, + .pid = 0, + .data_len = sizeof(struct mm_io_map) + }, + .map = { + .phys = offset, + .addr = (unsigned long)addr, + .len = size, + .pc = 0 + } + }; + record_timestamp(&event.header); + + *trace = (struct remap_trace) { + .probe = { + .addr = (unsigned long)addr, + .len = size, + .pre_handler = pre, + .post_handler = post, + } + }; + + relay_write(chan, &event, sizeof(event)); + spin_lock(&trace_list_lock); + list_add_tail(&trace->list, &trace_list); + spin_unlock(&trace_list_lock); + if (!nommiotrace) + register_kmmio_probe(&trace->probe); +} + +static void ioremap_trace_core(unsigned long offset, unsigned long size, + void __iomem *addr) +{ + if ((filter_offset) && (offset != filter_offset)) + return; + + /* Don't trace the low PCI/ISA area, it's always mapped.. */ + if (!ISA_trace && (offset < ISA_END_ADDRESS) && + (offset + size > ISA_START_ADDRESS)) { + printk(KERN_NOTICE MODULE_NAME ": Ignoring map of low " + "PCI/ISA area (0x%lx-0x%lx)\n", + offset, offset + size); + return; + } + do_ioremap_trace_core(offset, size, addr); +} + +void __iomem *ioremap_cache_trace(unsigned long offset, unsigned long size) +{ + void __iomem *p = ioremap_cache(offset, size); + printk(KERN_DEBUG MODULE_NAME ": ioremap_cache(0x%lx, 0x%lx) = %p\n", + offset, size, p); + ioremap_trace_core(offset, size, p); + return p; +} +EXPORT_SYMBOL(ioremap_cache_trace); + +void __iomem *ioremap_nocache_trace(unsigned long offset, unsigned long size) +{ + void __iomem *p = ioremap_nocache(offset, size); + printk(KERN_DEBUG MODULE_NAME ": ioremap_nocache(0x%lx, 0x%lx) = %p\n", + offset, size, p); + ioremap_trace_core(offset, size, p); + return p; +} +EXPORT_SYMBOL(ioremap_nocache_trace); + +void iounmap_trace(volatile void __iomem *addr) +{ + struct mm_io_header_map event = { + .header = { + .type = MMIO_MAGIC | + (MMIO_UNPROBE << MMIO_OPCODE_SHIFT), + .sec = 0, + .nsec = 0, + .pid = 0, + .data_len = sizeof(struct mm_io_map) + }, + .map = { + .phys = 0, + .addr = (unsigned long)addr, + .len = 0, + .pc = 0 + } + }; + struct remap_trace *trace; + struct remap_trace *tmp; + printk(KERN_DEBUG MODULE_NAME ": Unmapping %p.\n", addr); + record_timestamp(&event.header); + + spin_lock(&trace_list_lock); + list_for_each_entry_safe(trace, tmp, &trace_list, list) { + if ((unsigned long)addr == trace->probe.addr) { + if (!nommiotrace) + unregister_kmmio_probe(&trace->probe); + list_del(&trace->list); + kfree(trace); + break; + } + } + spin_unlock(&trace_list_lock); + relay_write(chan, &event, sizeof(event)); + iounmap(addr); +} +EXPORT_SYMBOL(iounmap_trace); + +static void clear_trace_list(void) +{ + struct remap_trace *trace; + struct remap_trace *tmp; + + spin_lock(&trace_list_lock); + list_for_each_entry_safe(trace, tmp, &trace_list, list) { + printk(KERN_WARNING MODULE_NAME ": purging non-iounmapped " + "trace @0x%08lx, size 0x%lx.\n", + trace->probe.addr, trace->probe.len); + if (!nommiotrace) + unregister_kmmio_probe(&trace->probe); + list_del(&trace->list); + kfree(trace); + break; + } + spin_unlock(&trace_list_lock); +} + +static int __init init(void) +{ + if (n_subbufs < 2) + return -EINVAL; + + dir = debugfs_create_dir(APP_DIR, NULL); + if (!dir) { + printk(KERN_ERR MODULE_NAME + ": Couldn't create relay app directory.\n"); + return -ENOMEM; + } + + chan = create_channel(subbuf_size, n_subbufs); + if (!chan) { + debugfs_remove(dir); + printk(KERN_ERR MODULE_NAME + ": relay app channel creation failed\n"); + return -ENOMEM; + } + + init_kmmio(); + + proc_marker_file = create_proc_entry(MARKER_FILE, 0, NULL); + if (proc_marker_file) + proc_marker_file->write_proc = write_marker; + + printk(KERN_DEBUG MODULE_NAME ": loaded.\n"); + if (nommiotrace) + printk(KERN_DEBUG MODULE_NAME ": MMIO tracing disabled.\n"); + if (ISA_trace) + printk(KERN_WARNING MODULE_NAME + ": Warning! low ISA range will be traced.\n"); + return 0; +} + +static void __exit cleanup(void) +{ + printk(KERN_DEBUG MODULE_NAME ": unload...\n"); + clear_trace_list(); + cleanup_kmmio(); + remove_proc_entry(MARKER_FILE, NULL); + destroy_channel(); + if (dir) + debugfs_remove(dir); +} + +module_init(init); +module_exit(cleanup); +MODULE_LICENSE("GPL"); diff --git a/arch/x86/kernel/mmiotrace/pf_in.c b/arch/x86/kernel/mmiotrace/pf_in.c new file mode 100644 index 000000000000..67ea520dde62 --- /dev/null +++ b/arch/x86/kernel/mmiotrace/pf_in.c @@ -0,0 +1,489 @@ +/* + * Fault Injection Test harness (FI) + * Copyright (C) Intel Crop. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, + * USA. + * + */ + +/* $Id: pf_in.c,v 1.1.1.1 2002/11/12 05:56:32 brlock Exp $ + * Copyright by Intel Crop., 2002 + * Louis Zhuang (louis.zhuang@intel.com) + * + * Bjorn Steinbrink (B.Steinbrink@gmx.de), 2007 + */ + +#include +#include /* struct pt_regs */ +#include "pf_in.h" + +#ifdef __i386__ +/* IA32 Manual 3, 2-1 */ +static unsigned char prefix_codes[] = { + 0xF0, 0xF2, 0xF3, 0x2E, 0x36, 0x3E, 0x26, 0x64, + 0x65, 0x2E, 0x3E, 0x66, 0x67 +}; +/* IA32 Manual 3, 3-432*/ +static unsigned int reg_rop[] = { + 0x8A, 0x8B, 0xB60F, 0xB70F, 0xBE0F, 0xBF0F +}; +static unsigned int reg_wop[] = { 0x88, 0x89 }; +static unsigned int imm_wop[] = { 0xC6, 0xC7 }; +/* IA32 Manual 3, 3-432*/ +static unsigned int rw8[] = { 0x88, 0x8A, 0xC6 }; +static unsigned int rw32[] = { + 0x89, 0x8B, 0xC7, 0xB60F, 0xB70F, 0xBE0F, 0xBF0F +}; +static unsigned int mw8[] = { 0x88, 0x8A, 0xC6, 0xB60F, 0xBE0F }; +static unsigned int mw16[] = { 0xB70F, 0xBF0F }; +static unsigned int mw32[] = { 0x89, 0x8B, 0xC7 }; +static unsigned int mw64[] = {}; +#else /* not __i386__ */ +static unsigned char prefix_codes[] = { + 0x66, 0x67, 0x2E, 0x3E, 0x26, 0x64, 0x65, 0x36, + 0xF0, 0xF3, 0xF2, + /* REX Prefixes */ + 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, + 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f +}; +/* AMD64 Manual 3, Appendix A*/ +static unsigned int reg_rop[] = { + 0x8A, 0x8B, 0xB60F, 0xB70F, 0xBE0F, 0xBF0F +}; +static unsigned int reg_wop[] = { 0x88, 0x89 }; +static unsigned int imm_wop[] = { 0xC6, 0xC7 }; +static unsigned int rw8[] = { 0xC6, 0x88, 0x8A }; +static unsigned int rw32[] = { + 0xC7, 0x89, 0x8B, 0xB60F, 0xB70F, 0xBE0F, 0xBF0F +}; +/* 8 bit only */ +static unsigned int mw8[] = { 0xC6, 0x88, 0x8A, 0xB60F, 0xBE0F }; +/* 16 bit only */ +static unsigned int mw16[] = { 0xB70F, 0xBF0F }; +/* 16 or 32 bit */ +static unsigned int mw32[] = { 0xC7 }; +/* 16, 32 or 64 bit */ +static unsigned int mw64[] = { 0x89, 0x8B }; +#endif /* not __i386__ */ + +static int skip_prefix(unsigned char *addr, int *shorted, int *enlarged, + int *rexr) +{ + int i; + unsigned char *p = addr; + *shorted = 0; + *enlarged = 0; + *rexr = 0; + +restart: + for (i = 0; i < ARRAY_SIZE(prefix_codes); i++) { + if (*p == prefix_codes[i]) { + if (*p == 0x66) + *shorted = 1; +#ifdef __amd64__ + if ((*p & 0xf8) == 0x48) + *enlarged = 1; + if ((*p & 0xf4) == 0x44) + *rexr = 1; +#endif + p++; + goto restart; + } + } + + return (p - addr); +} + +static int get_opcode(unsigned char *addr, unsigned int *opcode) +{ + int len; + + if (*addr == 0x0F) { + /* 0x0F is extension instruction */ + *opcode = *(unsigned short *)addr; + len = 2; + } else { + *opcode = *addr; + len = 1; + } + + return len; +} + +#define CHECK_OP_TYPE(opcode, array, type) \ + for (i = 0; i < ARRAY_SIZE(array); i++) { \ + if (array[i] == opcode) { \ + rv = type; \ + goto exit; \ + } \ + } + +enum reason_type get_ins_type(unsigned long ins_addr) +{ + unsigned int opcode; + unsigned char *p; + int shorted, enlarged, rexr; + int i; + enum reason_type rv = OTHERS; + + p = (unsigned char *)ins_addr; + p += skip_prefix(p, &shorted, &enlarged, &rexr); + p += get_opcode(p, &opcode); + + CHECK_OP_TYPE(opcode, reg_rop, REG_READ); + CHECK_OP_TYPE(opcode, reg_wop, REG_WRITE); + CHECK_OP_TYPE(opcode, imm_wop, IMM_WRITE); + +exit: + return rv; +} +#undef CHECK_OP_TYPE + +static unsigned int get_ins_reg_width(unsigned long ins_addr) +{ + unsigned int opcode; + unsigned char *p; + int i, shorted, enlarged, rexr; + + p = (unsigned char *)ins_addr; + p += skip_prefix(p, &shorted, &enlarged, &rexr); + p += get_opcode(p, &opcode); + + for (i = 0; i < ARRAY_SIZE(rw8); i++) + if (rw8[i] == opcode) + return 1; + + for (i = 0; i < ARRAY_SIZE(rw32); i++) + if (rw32[i] == opcode) + return (shorted ? 2 : (enlarged ? 8 : 4)); + + printk(KERN_ERR "mmiotrace: Unknown opcode 0x%02x\n", opcode); + return 0; +} + +unsigned int get_ins_mem_width(unsigned long ins_addr) +{ + unsigned int opcode; + unsigned char *p; + int i, shorted, enlarged, rexr; + + p = (unsigned char *)ins_addr; + p += skip_prefix(p, &shorted, &enlarged, &rexr); + p += get_opcode(p, &opcode); + + for (i = 0; i < ARRAY_SIZE(mw8); i++) + if (mw8[i] == opcode) + return 1; + + for (i = 0; i < ARRAY_SIZE(mw16); i++) + if (mw16[i] == opcode) + return 2; + + for (i = 0; i < ARRAY_SIZE(mw32); i++) + if (mw32[i] == opcode) + return shorted ? 2 : 4; + + for (i = 0; i < ARRAY_SIZE(mw64); i++) + if (mw64[i] == opcode) + return shorted ? 2 : (enlarged ? 8 : 4); + + printk(KERN_ERR "mmiotrace: Unknown opcode 0x%02x\n", opcode); + return 0; +} + +/* + * Define register ident in mod/rm byte. + * Note: these are NOT the same as in ptrace-abi.h. + */ +enum { + arg_AL = 0, + arg_CL = 1, + arg_DL = 2, + arg_BL = 3, + arg_AH = 4, + arg_CH = 5, + arg_DH = 6, + arg_BH = 7, + + arg_AX = 0, + arg_CX = 1, + arg_DX = 2, + arg_BX = 3, + arg_SP = 4, + arg_BP = 5, + arg_SI = 6, + arg_DI = 7, +#ifdef __amd64__ + arg_R8 = 8, + arg_R9 = 9, + arg_R10 = 10, + arg_R11 = 11, + arg_R12 = 12, + arg_R13 = 13, + arg_R14 = 14, + arg_R15 = 15 +#endif +}; + +static unsigned char *get_reg_w8(int no, struct pt_regs *regs) +{ + unsigned char *rv = NULL; + + switch (no) { + case arg_AL: + rv = (unsigned char *)®s->ax; + break; + case arg_BL: + rv = (unsigned char *)®s->bx; + break; + case arg_CL: + rv = (unsigned char *)®s->cx; + break; + case arg_DL: + rv = (unsigned char *)®s->dx; + break; + case arg_AH: + rv = 1 + (unsigned char *)®s->ax; + break; + case arg_BH: + rv = 1 + (unsigned char *)®s->bx; + break; + case arg_CH: + rv = 1 + (unsigned char *)®s->cx; + break; + case arg_DH: + rv = 1 + (unsigned char *)®s->dx; + break; +#ifdef __amd64__ + case arg_R8: + rv = (unsigned char *)®s->r8; + break; + case arg_R9: + rv = (unsigned char *)®s->r9; + break; + case arg_R10: + rv = (unsigned char *)®s->r10; + break; + case arg_R11: + rv = (unsigned char *)®s->r11; + break; + case arg_R12: + rv = (unsigned char *)®s->r12; + break; + case arg_R13: + rv = (unsigned char *)®s->r13; + break; + case arg_R14: + rv = (unsigned char *)®s->r14; + break; + case arg_R15: + rv = (unsigned char *)®s->r15; + break; +#endif + default: + printk(KERN_ERR "mmiotrace: Error reg no# %d\n", no); + break; + } + return rv; +} + +static unsigned long *get_reg_w32(int no, struct pt_regs *regs) +{ + unsigned long *rv = NULL; + + switch (no) { + case arg_AX: + rv = ®s->ax; + break; + case arg_BX: + rv = ®s->bx; + break; + case arg_CX: + rv = ®s->cx; + break; + case arg_DX: + rv = ®s->dx; + break; + case arg_SP: + rv = ®s->sp; + break; + case arg_BP: + rv = ®s->bp; + break; + case arg_SI: + rv = ®s->si; + break; + case arg_DI: + rv = ®s->di; + break; +#ifdef __amd64__ + case arg_R8: + rv = ®s->r8; + break; + case arg_R9: + rv = ®s->r9; + break; + case arg_R10: + rv = ®s->r10; + break; + case arg_R11: + rv = ®s->r11; + break; + case arg_R12: + rv = ®s->r12; + break; + case arg_R13: + rv = ®s->r13; + break; + case arg_R14: + rv = ®s->r14; + break; + case arg_R15: + rv = ®s->r15; + break; +#endif + default: + printk(KERN_ERR "mmiotrace: Error reg no# %d\n", no); + } + + return rv; +} + +unsigned long get_ins_reg_val(unsigned long ins_addr, struct pt_regs *regs) +{ + unsigned int opcode; + unsigned char mod_rm; + int reg; + unsigned char *p; + int i, shorted, enlarged, rexr; + unsigned long rv; + + p = (unsigned char *)ins_addr; + p += skip_prefix(p, &shorted, &enlarged, &rexr); + p += get_opcode(p, &opcode); + for (i = 0; i < ARRAY_SIZE(reg_rop); i++) + if (reg_rop[i] == opcode) { + rv = REG_READ; + goto do_work; + } + + for (i = 0; i < ARRAY_SIZE(reg_wop); i++) + if (reg_wop[i] == opcode) { + rv = REG_WRITE; + goto do_work; + } + + printk(KERN_ERR "mmiotrace: Not a register instruction, opcode " + "0x%02x\n", opcode); + goto err; + +do_work: + mod_rm = *p; + reg = ((mod_rm >> 3) & 0x7) | (rexr << 3); + switch (get_ins_reg_width(ins_addr)) { + case 1: + return *get_reg_w8(reg, regs); + + case 2: + return *(unsigned short *)get_reg_w32(reg, regs); + + case 4: + return *(unsigned int *)get_reg_w32(reg, regs); + +#ifdef __amd64__ + case 8: + return *(unsigned long *)get_reg_w32(reg, regs); +#endif + + default: + printk(KERN_ERR "mmiotrace: Error width# %d\n", reg); + } + +err: + return 0; +} + +unsigned long get_ins_imm_val(unsigned long ins_addr) +{ + unsigned int opcode; + unsigned char mod_rm; + unsigned char mod; + unsigned char *p; + int i, shorted, enlarged, rexr; + unsigned long rv; + + p = (unsigned char *)ins_addr; + p += skip_prefix(p, &shorted, &enlarged, &rexr); + p += get_opcode(p, &opcode); + for (i = 0; i < ARRAY_SIZE(imm_wop); i++) + if (imm_wop[i] == opcode) { + rv = IMM_WRITE; + goto do_work; + } + + printk(KERN_ERR "mmiotrace: Not an immediate instruction, opcode " + "0x%02x\n", opcode); + goto err; + +do_work: + mod_rm = *p; + mod = mod_rm >> 6; + p++; + switch (mod) { + case 0: + /* if r/m is 5 we have a 32 disp (IA32 Manual 3, Table 2-2) */ + /* AMD64: XXX Check for address size prefix? */ + if ((mod_rm & 0x7) == 0x5) + p += 4; + break; + + case 1: + p += 1; + break; + + case 2: + p += 4; + break; + + case 3: + default: + printk(KERN_ERR "mmiotrace: not a memory access instruction " + "at 0x%lx, rm_mod=0x%02x\n", + ins_addr, mod_rm); + } + + switch (get_ins_reg_width(ins_addr)) { + case 1: + return *(unsigned char *)p; + + case 2: + return *(unsigned short *)p; + + case 4: + return *(unsigned int *)p; + +#ifdef __amd64__ + case 8: + return *(unsigned long *)p; +#endif + + default: + printk(KERN_ERR "mmiotrace: Error: width.\n"); + } + +err: + return 0; +} diff --git a/arch/x86/kernel/mmiotrace/pf_in.h b/arch/x86/kernel/mmiotrace/pf_in.h new file mode 100644 index 000000000000..e05341a51a27 --- /dev/null +++ b/arch/x86/kernel/mmiotrace/pf_in.h @@ -0,0 +1,39 @@ +/* + * Fault Injection Test harness (FI) + * Copyright (C) Intel Crop. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, + * USA. + * + */ + +#ifndef __PF_H_ +#define __PF_H_ + +enum reason_type { + NOT_ME, /* page fault is not in regions */ + NOTHING, /* access others point in regions */ + REG_READ, /* read from addr to reg */ + REG_WRITE, /* write from reg to addr */ + IMM_WRITE, /* write from imm to addr */ + OTHERS /* Other instructions can not intercept */ +}; + +enum reason_type get_ins_type(unsigned long ins_addr); +unsigned int get_ins_mem_width(unsigned long ins_addr); +unsigned long get_ins_reg_val(unsigned long ins_addr, struct pt_regs *regs); +unsigned long get_ins_imm_val(unsigned long ins_addr); + +#endif /* __PF_H_ */ diff --git a/arch/x86/kernel/mmiotrace/testmmiotrace.c b/arch/x86/kernel/mmiotrace/testmmiotrace.c new file mode 100644 index 000000000000..40e66b0e6480 --- /dev/null +++ b/arch/x86/kernel/mmiotrace/testmmiotrace.c @@ -0,0 +1,77 @@ +/* + * Written by Pekka Paalanen, 2008 + */ +#include +#include + +extern void __iomem *ioremap_nocache_trace(unsigned long offset, + unsigned long size); +extern void iounmap_trace(volatile void __iomem *addr); + +#define MODULE_NAME "testmmiotrace" + +static unsigned long mmio_address; +module_param(mmio_address, ulong, 0); +MODULE_PARM_DESC(mmio_address, "Start address of the mapping of 16 kB."); + +static void do_write_test(void __iomem *p) +{ + unsigned int i; + for (i = 0; i < 256; i++) + iowrite8(i, p + i); + for (i = 1024; i < (5 * 1024); i += 2) + iowrite16(i * 12 + 7, p + i); + for (i = (5 * 1024); i < (16 * 1024); i += 4) + iowrite32(i * 212371 + 13, p + i); +} + +static void do_read_test(void __iomem *p) +{ + unsigned int i; + volatile unsigned int v; + for (i = 0; i < 256; i++) + v = ioread8(p + i); + for (i = 1024; i < (5 * 1024); i += 2) + v = ioread16(p + i); + for (i = (5 * 1024); i < (16 * 1024); i += 4) + v = ioread32(p + i); +} + +static void do_test(void) +{ + void __iomem *p = ioremap_nocache_trace(mmio_address, 0x4000); + if (!p) { + printk(KERN_ERR MODULE_NAME ": could not ioremap IO memory, " + "aborting.\n"); + return; + } + do_write_test(p); + do_read_test(p); + iounmap_trace(p); +} + +static int __init init(void) +{ + if (mmio_address == 0) { + printk(KERN_ERR MODULE_NAME ": you have to use the module " + "argument mmio_address.\n"); + printk(KERN_ERR MODULE_NAME ": DO NOT LOAD THIS MODULE UNLESS" + " YOU REALLY KNOW WHAT YOU ARE DOING!\n"); + return -ENXIO; + } + + printk(KERN_WARNING MODULE_NAME ": WARNING: mapping 16 kB @ 0x%08lx " + "in PCI address space, and writing " + "rubbish in there.\n", mmio_address); + do_test(); + return 0; +} + +static void __exit cleanup(void) +{ + printk(KERN_DEBUG MODULE_NAME ": unloaded.\n"); +} + +module_init(init); +module_exit(cleanup); +MODULE_LICENSE("GPL"); diff --git a/include/linux/mmiotrace.h b/include/linux/mmiotrace.h new file mode 100644 index 000000000000..cb247825f3ec --- /dev/null +++ b/include/linux/mmiotrace.h @@ -0,0 +1,62 @@ +#ifndef MMIOTRACE_H +#define MMIOTRACE_H + +#include + +#define MMIO_VERSION 0x04 + +/* mm_io_header.type */ +#define MMIO_OPCODE_MASK 0xff +#define MMIO_OPCODE_SHIFT 0 +#define MMIO_WIDTH_MASK 0xff00 +#define MMIO_WIDTH_SHIFT 8 +#define MMIO_MAGIC (0x6f000000 | (MMIO_VERSION<<16)) +#define MMIO_MAGIC_MASK 0xffff0000 + +enum mm_io_opcode { /* payload type: */ + MMIO_READ = 0x1, /* struct mm_io_rw */ + MMIO_WRITE = 0x2, /* struct mm_io_rw */ + MMIO_PROBE = 0x3, /* struct mm_io_map */ + MMIO_UNPROBE = 0x4, /* struct mm_io_map */ + MMIO_MARKER = 0x5, /* raw char data */ + MMIO_UNKNOWN_OP = 0x6, /* struct mm_io_rw */ +}; + +struct mm_io_header { + __u32 type; + __u32 sec; /* timestamp */ + __u32 nsec; + __u32 pid; /* PID of the process, or 0 for kernel core */ + __u16 data_len; /* length of the following payload */ +}; + +struct mm_io_rw { + __u64 address; /* virtual address of register */ + __u64 value; + __u64 pc; /* optional program counter */ +}; + +struct mm_io_map { + __u64 phys; /* base address in PCI space */ + __u64 addr; /* base virtual address */ + __u64 len; /* mapping size */ + __u64 pc; /* optional program counter */ +}; + + +/* + * These structures are used to allow a single relay_write() + * call to write a full packet. + */ + +struct mm_io_header_rw { + struct mm_io_header header; + struct mm_io_rw rw; +} __attribute__((packed)); + +struct mm_io_header_map { + struct mm_io_header header; + struct mm_io_map map; +} __attribute__((packed)); + +#endif /* MMIOTRACE_H */ From 75bb88350e0501b3cf5ac096a1008757844414a9 Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Mon, 12 May 2008 21:20:56 +0200 Subject: [PATCH 02/30] x86 mmiotrace: use lookup_address() Use lookup_address() from pageattr.c instead of doing the same manually. Also had to EXPORT_SYMBOL_GPL(lookup_address) to make this work for modules. This also fixes "undefined symbol 'init_mm'" compile error for x86_32. Signed-off-by: Pekka Paalanen Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/mmiotrace/kmmio.c | 46 +++++++++++++++++++--------- arch/x86/kernel/mmiotrace/mmio-mod.c | 19 +++++++----- arch/x86/mm/pageattr.c | 1 + 3 files changed, 44 insertions(+), 22 deletions(-) diff --git a/arch/x86/kernel/mmiotrace/kmmio.c b/arch/x86/kernel/mmiotrace/kmmio.c index 8ba48f9c91b4..28411dadb8b3 100644 --- a/arch/x86/kernel/mmiotrace/kmmio.c +++ b/arch/x86/kernel/mmiotrace/kmmio.c @@ -20,6 +20,7 @@ #include #include #include +#include #include "kmmio.h" @@ -117,40 +118,55 @@ static struct kmmio_fault_page *get_kmmio_fault_page(unsigned long page) return NULL; } -static void arm_kmmio_fault_page(unsigned long page, int *large) +static void arm_kmmio_fault_page(unsigned long page, int *page_level) { unsigned long address = page & PAGE_MASK; - pgd_t *pgd = pgd_offset_k(address); - pud_t *pud = pud_offset(pgd, address); - pmd_t *pmd = pmd_offset(pud, address); - pte_t *pte = pte_offset_kernel(pmd, address); + int level; + pte_t *pte = lookup_address(address, &level); - if (pmd_large(*pmd)) { + if (!pte) { + printk(KERN_ERR "Error in %s: no pte for page 0x%08lx\n", + __FUNCTION__, page); + return; + } + + if (level == PG_LEVEL_2M) { + pmd_t *pmd = (pmd_t *)pte; set_pmd(pmd, __pmd(pmd_val(*pmd) & ~_PAGE_PRESENT)); - if (large) - *large = 1; } else { + /* PG_LEVEL_4K */ set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_PRESENT)); } + if (page_level) + *page_level = level; + __flush_tlb_one(page); } -static void disarm_kmmio_fault_page(unsigned long page, int *large) +static void disarm_kmmio_fault_page(unsigned long page, int *page_level) { unsigned long address = page & PAGE_MASK; - pgd_t *pgd = pgd_offset_k(address); - pud_t *pud = pud_offset(pgd, address); - pmd_t *pmd = pmd_offset(pud, address); - pte_t *pte = pte_offset_kernel(pmd, address); + int level; + pte_t *pte = lookup_address(address, &level); - if (large && *large) { + if (!pte) { + printk(KERN_ERR "Error in %s: no pte for page 0x%08lx\n", + __FUNCTION__, page); + return; + } + + if (level == PG_LEVEL_2M) { + pmd_t *pmd = (pmd_t *)pte; set_pmd(pmd, __pmd(pmd_val(*pmd) | _PAGE_PRESENT)); - *large = 0; } else { + /* PG_LEVEL_4K */ set_pte(pte, __pte(pte_val(*pte) | _PAGE_PRESENT)); } + if (page_level) + *page_level = level; + __flush_tlb_one(page); } diff --git a/arch/x86/kernel/mmiotrace/mmio-mod.c b/arch/x86/kernel/mmiotrace/mmio-mod.c index 73561fe85f03..e43947d218a5 100644 --- a/arch/x86/kernel/mmiotrace/mmio-mod.c +++ b/arch/x86/kernel/mmiotrace/mmio-mod.c @@ -120,19 +120,24 @@ static int write_marker(struct file *file, const char __user *buffer, static void print_pte(unsigned long address) { - pgd_t *pgd = pgd_offset_k(address); - pud_t *pud = pud_offset(pgd, address); - pmd_t *pmd = pmd_offset(pud, address); - if (pmd_large(*pmd)) { + int level; + pte_t *pte = lookup_address(address, &level); + + if (!pte) { + printk(KERN_ERR "Error in %s: no pte for page 0x%08lx\n", + __FUNCTION__, address); + return; + } + + if (level == PG_LEVEL_2M) { printk(KERN_EMERG MODULE_NAME ": 4MB pages are not " "currently supported: %lx\n", address); BUG(); } printk(KERN_DEBUG MODULE_NAME ": pte for 0x%lx: 0x%lx 0x%lx\n", - address, - pte_val(*pte_offset_kernel(pmd, address)), - pte_val(*pte_offset_kernel(pmd, address)) & _PAGE_PRESENT); + address, pte_val(*pte), + pte_val(*pte) & _PAGE_PRESENT); } /* diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 60bcb5b6a37e..57970f2935c0 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -227,6 +227,7 @@ pte_t *lookup_address(unsigned long address, unsigned int *level) return pte_offset_kernel(pmd, address); } +EXPORT_SYMBOL_GPL(lookup_address); /* * Set the new pmd in all the pgds we know about: From fe1ffafa80f6673101c6560c2bacfe3df10372ee Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Mon, 12 May 2008 21:20:56 +0200 Subject: [PATCH 03/30] x86 mmiotrace: fix relay-buffer-full flag for SMP Relay has per-cpu buffers, but mmiotrace was using only a single flag for detecting buffer full/not-full transitions. The new code makes this per-cpu and actually counts missed events. Signed-off-by: Pekka Paalanen Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/mmiotrace/mmio-mod.c | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/arch/x86/kernel/mmiotrace/mmio-mod.c b/arch/x86/kernel/mmiotrace/mmio-mod.c index e43947d218a5..0019dcdf6158 100644 --- a/arch/x86/kernel/mmiotrace/mmio-mod.c +++ b/arch/x86/kernel/mmiotrace/mmio-mod.c @@ -29,6 +29,7 @@ #include #include #include /* for ISA_START_ADDRESS */ +#include #include "kmmio.h" #include "pf_in.h" @@ -47,9 +48,13 @@ struct trap_reason { int active_traces; }; +/* Accessed per-cpu. */ static struct trap_reason pf_reason[NR_CPUS]; static struct mm_io_header_rw cpu_trace[NR_CPUS]; +/* Access to this is not per-cpu. */ +static atomic_t dropped[NR_CPUS]; + static struct file_operations mmio_fops = { .owner = THIS_MODULE, }; @@ -57,7 +62,6 @@ static struct file_operations mmio_fops = { static const size_t subbuf_size = 256*1024; static struct rchan *chan; static struct dentry *dir; -static int suspended; /* XXX should this be per cpu? */ static struct proc_dir_entry *proc_marker_file; /* module parameters */ @@ -269,19 +273,21 @@ static void post(struct kmmio_probe *p, unsigned long condition, static int subbuf_start_handler(struct rchan_buf *buf, void *subbuf, void *prev_subbuf, size_t prev_padding) { + unsigned int cpu = buf->cpu; + atomic_t *drop = &dropped[cpu]; + int count; if (relay_buf_full(buf)) { - if (!suspended) { - suspended = 1; - printk(KERN_ERR MODULE_NAME - ": cpu %d buffer full!!!\n", - smp_processor_id()); + if (atomic_inc_return(drop) == 1) { + printk(KERN_ERR MODULE_NAME ": cpu %d buffer full!\n", + cpu); } return 0; - } else if (suspended) { - suspended = 0; + } else if ((count = atomic_read(drop))) { printk(KERN_ERR MODULE_NAME - ": cpu %d buffer no longer full.\n", - smp_processor_id()); + ": cpu %d buffer no longer full, " + "missed %d events.\n", + cpu, count); + atomic_sub(count, drop); } return 1; From 63ffa3e456c1a9884a3ebac997d91e3fdae18d78 Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Mon, 12 May 2008 21:20:57 +0200 Subject: [PATCH 04/30] x86 mmiotrace: comment about user space ABI Signed-off-by: Pekka Paalanen Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/mmiotrace.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/include/linux/mmiotrace.h b/include/linux/mmiotrace.h index cb247825f3ec..6ec288f1fe24 100644 --- a/include/linux/mmiotrace.h +++ b/include/linux/mmiotrace.h @@ -3,6 +3,10 @@ #include +/* + * If you change anything here, you must bump MMIO_VERSION. + * This is the relay data format for user space. + */ #define MMIO_VERSION 0x04 /* mm_io_header.type */ @@ -23,7 +27,7 @@ enum mm_io_opcode { /* payload type: */ }; struct mm_io_header { - __u32 type; + __u32 type; /* see MMIO_* macros above */ __u32 sec; /* timestamp */ __u32 nsec; __u32 pid; /* PID of the process, or 0 for kernel core */ From 10c43d2eb50c9a5ad60388b9d3c41c31150049e6 Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Mon, 12 May 2008 21:20:57 +0200 Subject: [PATCH 05/30] x86: explicit call to mmiotrace in do_page_fault() The custom page fault handler list is replaced with a single function pointer. All related functions and variables are renamed for mmiotrace. Signed-off-by: Pekka Paalanen Cc: Christoph Hellwig Cc: Arjan van de Ven Cc: pq@iki.fi Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/Kconfig.debug | 14 +++---- arch/x86/kernel/mmiotrace/kmmio.c | 14 +++---- arch/x86/mm/fault.c | 66 ++++++++++++++++--------------- include/asm-x86/kdebug.h | 12 +++--- 4 files changed, 52 insertions(+), 54 deletions(-) diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index 7c6496e2225e..9491c0ae03a3 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -168,20 +168,18 @@ config IOMMU_LEAK Add a simple leak tracer to the IOMMU code. This is useful when you are debugging a buggy device driver that leaks IOMMU mappings. -config PAGE_FAULT_HANDLERS - bool "Custom page fault handlers" - depends on DEBUG_KERNEL - help - Allow the use of custom page fault handlers. A kernel module may - register a function that is called on every page fault. Custom - handlers are used by some debugging and reverse engineering tools. +config MMIOTRACE_HOOKS + bool + default n config MMIOTRACE tristate "Memory mapped IO tracing" - depends on DEBUG_KERNEL && PAGE_FAULT_HANDLERS && RELAY && DEBUG_FS + depends on DEBUG_KERNEL && RELAY && DEBUG_FS + select MMIOTRACE_HOOKS default n help This will build a kernel module called mmiotrace. + Making this a built-in is heavily discouraged. Mmiotrace traces Memory Mapped I/O access and is meant for debugging and reverse engineering. The kernel module offers wrapped diff --git a/arch/x86/kernel/mmiotrace/kmmio.c b/arch/x86/kernel/mmiotrace/kmmio.c index 28411dadb8b3..e759f7c3878f 100644 --- a/arch/x86/kernel/mmiotrace/kmmio.c +++ b/arch/x86/kernel/mmiotrace/kmmio.c @@ -51,10 +51,6 @@ static LIST_HEAD(kmmio_probes); static struct kmmio_context kmmio_ctx[NR_CPUS]; -static struct pf_handler kmmio_pf_hook = { - .handler = kmmio_page_fault -}; - static struct notifier_block nb_die = { .notifier_call = kmmio_die_notifier }; @@ -77,7 +73,8 @@ void cleanup_kmmio(void) * kmmio_page_table, kmmio_probes */ if (handler_registered) { - unregister_page_fault_handler(&kmmio_pf_hook); + if (mmiotrace_unregister_pf(&kmmio_page_fault)) + BUG(); synchronize_rcu(); } unregister_die_notifier(&nb_die); @@ -343,8 +340,11 @@ int register_kmmio_probe(struct kmmio_probe *p) } if (!handler_registered) { - register_page_fault_handler(&kmmio_pf_hook); - handler_registered++; + if (mmiotrace_register_pf(&kmmio_page_fault)) + printk(KERN_ERR "mmiotrace: Cannot register page " + "fault handler.\n"); + else + handler_registered++; } out: diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 343f5c1aacc8..e9a086a1a9ff 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -49,53 +49,55 @@ #define PF_RSVD (1<<3) #define PF_INSTR (1<<4) -#ifdef CONFIG_PAGE_FAULT_HANDLERS -static HLIST_HEAD(pf_handlers); /* protected by RCU */ -static DEFINE_SPINLOCK(pf_handlers_writer); +#ifdef CONFIG_MMIOTRACE_HOOKS +static pf_handler_func mmiotrace_pf_handler; /* protected by RCU */ +static DEFINE_SPINLOCK(mmiotrace_handler_lock); -void register_page_fault_handler(struct pf_handler *new_pfh) +int mmiotrace_register_pf(pf_handler_func new_pfh) { + int ret = 0; unsigned long flags; - spin_lock_irqsave(&pf_handlers_writer, flags); - hlist_add_head_rcu(&new_pfh->hlist, &pf_handlers); - spin_unlock_irqrestore(&pf_handlers_writer, flags); + spin_lock_irqsave(&mmiotrace_handler_lock, flags); + if (mmiotrace_pf_handler) + ret = -EBUSY; + else + mmiotrace_pf_handler = new_pfh; + spin_unlock_irqrestore(&mmiotrace_handler_lock, flags); + return ret; } -EXPORT_SYMBOL_GPL(register_page_fault_handler); +EXPORT_SYMBOL_GPL(mmiotrace_register_pf); /** - * unregister_page_fault_handler: + * mmiotrace_unregister_pf: * The caller must ensure @old_pfh is not in use anymore before freeing it. - * This function does not guarantee it. The list of handlers is protected by - * RCU, so you can do this by e.g. calling synchronize_rcu(). + * This function does not guarantee it. The handler function pointer is + * protected by RCU, so you can do this by e.g. calling synchronize_rcu(). */ -void unregister_page_fault_handler(struct pf_handler *old_pfh) +int mmiotrace_unregister_pf(pf_handler_func old_pfh) { + int ret = 0; unsigned long flags; - spin_lock_irqsave(&pf_handlers_writer, flags); - hlist_del_rcu(&old_pfh->hlist); - spin_unlock_irqrestore(&pf_handlers_writer, flags); + spin_lock_irqsave(&mmiotrace_handler_lock, flags); + if (mmiotrace_pf_handler != old_pfh) + ret = -EPERM; + else + mmiotrace_pf_handler = NULL; + spin_unlock_irqrestore(&mmiotrace_handler_lock, flags); + return ret; } -EXPORT_SYMBOL_GPL(unregister_page_fault_handler); -#endif +EXPORT_SYMBOL_GPL(mmiotrace_unregister_pf); +#endif /* CONFIG_MMIOTRACE_HOOKS */ /* returns non-zero if do_page_fault() should return */ -static int handle_custom_pf(struct pt_regs *regs, unsigned long error_code, - unsigned long address) +static inline int call_mmiotrace(struct pt_regs *regs, + unsigned long error_code, + unsigned long address) { -#ifdef CONFIG_PAGE_FAULT_HANDLERS +#ifdef CONFIG_MMIOTRACE_HOOKS int ret = 0; - struct pf_handler *cur; - struct hlist_node *ncur; - - if (hlist_empty(&pf_handlers)) - return 0; - rcu_read_lock(); - hlist_for_each_entry_rcu(cur, ncur, &pf_handlers, hlist) { - ret = cur->handler(regs, error_code, address); - if (ret) - break; - } + if (mmiotrace_pf_handler) + ret = mmiotrace_pf_handler(regs, error_code, address); rcu_read_unlock(); return ret; #else @@ -655,7 +657,7 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code) if (notify_page_fault(regs)) return; - if (handle_custom_pf(regs, error_code, address)) + if (call_mmiotrace(regs, error_code, address)) return; /* diff --git a/include/asm-x86/kdebug.h b/include/asm-x86/kdebug.h index a80f2d6cc737..7063281040da 100644 --- a/include/asm-x86/kdebug.h +++ b/include/asm-x86/kdebug.h @@ -35,13 +35,11 @@ extern void show_regs(struct pt_regs *regs); extern unsigned long oops_begin(void); extern void oops_end(unsigned long, struct pt_regs *, int signr); -struct pf_handler { - struct hlist_node hlist; - int (*handler)(struct pt_regs *regs, unsigned long error_code, - unsigned long address); -}; +typedef int (*pf_handler_func)(struct pt_regs *regs, + unsigned long error_code, + unsigned long address); -extern void register_page_fault_handler(struct pf_handler *new_pfh); -extern void unregister_page_fault_handler(struct pf_handler *old_pfh); +extern int mmiotrace_register_pf(pf_handler_func new_pfh); +extern int mmiotrace_unregister_pf(pf_handler_func old_pfh); #endif From f513638030ca384b0bace4df64f0b82f6ae1e4c6 Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Mon, 12 May 2008 21:20:57 +0200 Subject: [PATCH 06/30] x86 mmiotrace: Use percpu instead of arrays. Signed-off-by: Pekka Paalanen Cc: Eric Dumazet Cc: pq@iki.fi Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/mmiotrace/kmmio.c | 27 +++++----- arch/x86/kernel/mmiotrace/mmio-mod.c | 80 +++++++++++++++------------- 2 files changed, 58 insertions(+), 49 deletions(-) diff --git a/arch/x86/kernel/mmiotrace/kmmio.c b/arch/x86/kernel/mmiotrace/kmmio.c index e759f7c3878f..5e239d0b8467 100644 --- a/arch/x86/kernel/mmiotrace/kmmio.c +++ b/arch/x86/kernel/mmiotrace/kmmio.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -49,7 +50,8 @@ static unsigned int handler_registered; static struct list_head kmmio_page_table[KMMIO_PAGE_TABLE_SIZE]; static LIST_HEAD(kmmio_probes); -static struct kmmio_context kmmio_ctx[NR_CPUS]; +/* Accessed per-cpu */ +static DEFINE_PER_CPU(struct kmmio_context, kmmio_ctx); static struct notifier_block nb_die = { .notifier_call = kmmio_die_notifier @@ -173,8 +175,7 @@ static void disarm_kmmio_fault_page(unsigned long page, int *page_level) */ static int kmmio_handler(struct pt_regs *regs, unsigned long addr) { - struct kmmio_context *ctx; - int cpu; + struct kmmio_context *ctx = &get_cpu_var(kmmio_ctx); /* * Preemption is now disabled to prevent process switch during @@ -187,8 +188,6 @@ static int kmmio_handler(struct pt_regs *regs, unsigned long addr) * And that interrupt triggers a kmmio trap? */ preempt_disable(); - cpu = smp_processor_id(); - ctx = &kmmio_ctx[cpu]; /* interrupts disabled and CPU-local data => atomicity guaranteed. */ if (ctx->active) { @@ -199,7 +198,7 @@ static int kmmio_handler(struct pt_regs *regs, unsigned long addr) */ printk(KERN_EMERG "mmiotrace: recursive probe hit on CPU %d, " "for address %lu. Ignoring.\n", - cpu, addr); + smp_processor_id(), addr); goto no_kmmio; } ctx->active++; @@ -231,6 +230,7 @@ static int kmmio_handler(struct pt_regs *regs, unsigned long addr) /* We hold lock, now we set present bit in PTE and single step. */ disarm_kmmio_fault_page(ctx->fpage->page, NULL); + put_cpu_var(kmmio_ctx); return 1; no_kmmio_locked: @@ -238,6 +238,7 @@ no_kmmio_locked: ctx->active--; no_kmmio: preempt_enable_no_resched(); + put_cpu_var(kmmio_ctx); /* page fault not handled by kmmio */ return 0; } @@ -249,11 +250,11 @@ no_kmmio: */ static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs) { - int cpu = smp_processor_id(); - struct kmmio_context *ctx = &kmmio_ctx[cpu]; + int ret = 0; + struct kmmio_context *ctx = &get_cpu_var(kmmio_ctx); if (!ctx->active) - return 0; + goto out; if (ctx->probe && ctx->probe->post_handler) ctx->probe->post_handler(ctx->probe, condition, regs); @@ -273,10 +274,12 @@ static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs) * will have TF set, in which case, continue the remaining processing * of do_debug, as if this is not a probe hit. */ - if (regs->flags & TF_MASK) - return 0; + if (!(regs->flags & TF_MASK)) + ret = 1; - return 1; +out: + put_cpu_var(kmmio_ctx); + return ret; } static int add_kmmio_fault_page(unsigned long page) diff --git a/arch/x86/kernel/mmiotrace/mmio-mod.c b/arch/x86/kernel/mmiotrace/mmio-mod.c index 0019dcdf6158..f9c609266d83 100644 --- a/arch/x86/kernel/mmiotrace/mmio-mod.c +++ b/arch/x86/kernel/mmiotrace/mmio-mod.c @@ -30,6 +30,7 @@ #include #include /* for ISA_START_ADDRESS */ #include +#include #include "kmmio.h" #include "pf_in.h" @@ -49,11 +50,11 @@ struct trap_reason { }; /* Accessed per-cpu. */ -static struct trap_reason pf_reason[NR_CPUS]; -static struct mm_io_header_rw cpu_trace[NR_CPUS]; +static DEFINE_PER_CPU(struct trap_reason, pf_reason); +static DEFINE_PER_CPU(struct mm_io_header_rw, cpu_trace); /* Access to this is not per-cpu. */ -static atomic_t dropped[NR_CPUS]; +static DEFINE_PER_CPU(atomic_t, dropped); static struct file_operations mmio_fops = { .owner = THIS_MODULE, @@ -150,15 +151,15 @@ static void print_pte(unsigned long address) */ static void die_kmmio_nesting_error(struct pt_regs *regs, unsigned long addr) { - const unsigned long cpu = smp_processor_id(); + const struct trap_reason *my_reason = &get_cpu_var(pf_reason); printk(KERN_EMERG MODULE_NAME ": unexpected fault for address: %lx, " "last fault for address: %lx\n", - addr, pf_reason[cpu].addr); + addr, my_reason->addr); print_pte(addr); #ifdef __i386__ print_symbol(KERN_EMERG "faulting EIP is at %s\n", regs->ip); print_symbol(KERN_EMERG "last faulting EIP was at %s\n", - pf_reason[cpu].ip); + my_reason->ip); printk(KERN_EMERG "eax: %08lx ebx: %08lx ecx: %08lx edx: %08lx\n", regs->ax, regs->bx, regs->cx, regs->dx); @@ -168,100 +169,105 @@ static void die_kmmio_nesting_error(struct pt_regs *regs, unsigned long addr) #else print_symbol(KERN_EMERG "faulting RIP is at %s\n", regs->ip); print_symbol(KERN_EMERG "last faulting RIP was at %s\n", - pf_reason[cpu].ip); + my_reason->ip); printk(KERN_EMERG "rax: %016lx rcx: %016lx rdx: %016lx\n", regs->ax, regs->cx, regs->dx); printk(KERN_EMERG "rsi: %016lx rdi: %016lx " "rbp: %016lx rsp: %016lx\n", regs->si, regs->di, regs->bp, regs->sp); #endif + put_cpu_var(pf_reason); BUG(); } static void pre(struct kmmio_probe *p, struct pt_regs *regs, unsigned long addr) { - const unsigned long cpu = smp_processor_id(); + struct trap_reason *my_reason = &get_cpu_var(pf_reason); + struct mm_io_header_rw *my_trace = &get_cpu_var(cpu_trace); const unsigned long instptr = instruction_pointer(regs); const enum reason_type type = get_ins_type(instptr); /* it doesn't make sense to have more than one active trace per cpu */ - if (pf_reason[cpu].active_traces) + if (my_reason->active_traces) die_kmmio_nesting_error(regs, addr); else - pf_reason[cpu].active_traces++; + my_reason->active_traces++; - pf_reason[cpu].type = type; - pf_reason[cpu].addr = addr; - pf_reason[cpu].ip = instptr; + my_reason->type = type; + my_reason->addr = addr; + my_reason->ip = instptr; - cpu_trace[cpu].header.type = MMIO_MAGIC; - cpu_trace[cpu].header.pid = 0; - cpu_trace[cpu].header.data_len = sizeof(struct mm_io_rw); - cpu_trace[cpu].rw.address = addr; + my_trace->header.type = MMIO_MAGIC; + my_trace->header.pid = 0; + my_trace->header.data_len = sizeof(struct mm_io_rw); + my_trace->rw.address = addr; /* * Only record the program counter when requested. * It may taint clean-room reverse engineering. */ if (trace_pc) - cpu_trace[cpu].rw.pc = instptr; + my_trace->rw.pc = instptr; else - cpu_trace[cpu].rw.pc = 0; + my_trace->rw.pc = 0; - record_timestamp(&cpu_trace[cpu].header); + record_timestamp(&my_trace->header); switch (type) { case REG_READ: - cpu_trace[cpu].header.type |= + my_trace->header.type |= (MMIO_READ << MMIO_OPCODE_SHIFT) | (get_ins_mem_width(instptr) << MMIO_WIDTH_SHIFT); break; case REG_WRITE: - cpu_trace[cpu].header.type |= + my_trace->header.type |= (MMIO_WRITE << MMIO_OPCODE_SHIFT) | (get_ins_mem_width(instptr) << MMIO_WIDTH_SHIFT); - cpu_trace[cpu].rw.value = get_ins_reg_val(instptr, regs); + my_trace->rw.value = get_ins_reg_val(instptr, regs); break; case IMM_WRITE: - cpu_trace[cpu].header.type |= + my_trace->header.type |= (MMIO_WRITE << MMIO_OPCODE_SHIFT) | (get_ins_mem_width(instptr) << MMIO_WIDTH_SHIFT); - cpu_trace[cpu].rw.value = get_ins_imm_val(instptr); + my_trace->rw.value = get_ins_imm_val(instptr); break; default: { unsigned char *ip = (unsigned char *)instptr; - cpu_trace[cpu].header.type |= + my_trace->header.type |= (MMIO_UNKNOWN_OP << MMIO_OPCODE_SHIFT); - cpu_trace[cpu].rw.value = (*ip) << 16 | - *(ip + 1) << 8 | - *(ip + 2); + my_trace->rw.value = (*ip) << 16 | *(ip + 1) << 8 | + *(ip + 2); } } + put_cpu_var(cpu_trace); + put_cpu_var(pf_reason); } static void post(struct kmmio_probe *p, unsigned long condition, struct pt_regs *regs) { - const unsigned long cpu = smp_processor_id(); + struct trap_reason *my_reason = &get_cpu_var(pf_reason); + struct mm_io_header_rw *my_trace = &get_cpu_var(cpu_trace); /* this should always return the active_trace count to 0 */ - pf_reason[cpu].active_traces--; - if (pf_reason[cpu].active_traces) { + my_reason->active_traces--; + if (my_reason->active_traces) { printk(KERN_EMERG MODULE_NAME ": unexpected post handler"); BUG(); } - switch (pf_reason[cpu].type) { + switch (my_reason->type) { case REG_READ: - cpu_trace[cpu].rw.value = get_ins_reg_val(pf_reason[cpu].ip, - regs); + my_trace->rw.value = get_ins_reg_val(my_reason->ip, regs); break; default: break; } - relay_write(chan, &cpu_trace[cpu], sizeof(struct mm_io_header_rw)); + relay_write(chan, my_trace, sizeof(*my_trace)); + put_cpu_var(cpu_trace); + put_cpu_var(pf_reason); } /* @@ -274,7 +280,7 @@ static int subbuf_start_handler(struct rchan_buf *buf, void *subbuf, void *prev_subbuf, size_t prev_padding) { unsigned int cpu = buf->cpu; - atomic_t *drop = &dropped[cpu]; + atomic_t *drop = &per_cpu(dropped, cpu); int count; if (relay_buf_full(buf)) { if (atomic_inc_return(drop) == 1) { From 0fd0e3da4557c479b820b9a4a7afa25b4637ddf2 Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Mon, 12 May 2008 21:20:57 +0200 Subject: [PATCH 07/30] x86: mmiotrace full patch, preview 1 kmmio.c handles the list of mmio probes with callbacks, list of traced pages, and attaching into the page fault handler and die notifier. It arms, traps and disarms the given pages, this is the core of mmiotrace. mmio-mod.c is a user interface, hooking into ioremap functions and registering the mmio probes. It also decodes the required information from trapped mmio accesses via the pre and post callbacks in each probe. Currently, hooking into ioremap functions works by redefining the symbols of the target (binary) kernel module, so that it calls the traced versions of the functions. The most notable changes done since the last discussion are: - kmmio.c is a built-in, not part of the module - direct call from fault.c to kmmio.c, removing all dynamic hooks - prepare for unregistering probes at any time - make kmmio re-initializable and accessible to more than one user - rewrite kmmio locking to remove all spinlocks from page fault path Can I abuse call_rcu() like I do in kmmio.c:unregister_kmmio_probe() or is there a better way? The function called via call_rcu() itself calls call_rcu() again, will this work or break? There I need a second grace period for RCU after the first grace period for page faults. Mmiotrace itself (mmio-mod.c) is still a module, I am going to attack that next. At some point I will start looking into how to make mmiotrace a tracer component of ftrace (thanks for the hint, Ingo). Ftrace should make the user space part of mmiotracing as simple as 'cat /debug/trace/mmio > dump.txt'. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/init_task.c | 1 - arch/x86/kernel/mmiotrace/Makefile | 8 +- arch/x86/kernel/mmiotrace/kmmio.c | 361 +++++++++++++++------- arch/x86/kernel/mmiotrace/kmmio.h | 58 ---- arch/x86/kernel/mmiotrace/mmio-mod.c | 81 +++-- arch/x86/kernel/mmiotrace/pf_in.c | 2 +- arch/x86/kernel/mmiotrace/testmmiotrace.c | 13 +- arch/x86/mm/fault.c | 59 +--- include/asm-x86/kdebug.h | 7 - include/linux/mmiotrace.h | 38 +++ 10 files changed, 341 insertions(+), 287 deletions(-) delete mode 100644 arch/x86/kernel/mmiotrace/kmmio.h diff --git a/arch/x86/kernel/init_task.c b/arch/x86/kernel/init_task.c index 027a5b6a12b2..a4f93b4120c1 100644 --- a/arch/x86/kernel/init_task.c +++ b/arch/x86/kernel/init_task.c @@ -15,7 +15,6 @@ static struct signal_struct init_signals = INIT_SIGNALS(init_signals); static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); struct mm_struct init_mm = INIT_MM(init_mm); EXPORT_UNUSED_SYMBOL(init_mm); /* will be removed in 2.6.26 */ -EXPORT_SYMBOL_GPL(init_mm); /* * Initial thread structure. diff --git a/arch/x86/kernel/mmiotrace/Makefile b/arch/x86/kernel/mmiotrace/Makefile index d6905f7f981b..cf1e747b463e 100644 --- a/arch/x86/kernel/mmiotrace/Makefile +++ b/arch/x86/kernel/mmiotrace/Makefile @@ -1,4 +1,4 @@ -obj-$(CONFIG_MMIOTRACE) += mmiotrace.o -mmiotrace-objs := pf_in.o kmmio.o mmio-mod.o - -obj-$(CONFIG_MMIOTRACE_TEST) += testmmiotrace.o +obj-$(CONFIG_MMIOTRACE_HOOKS) += kmmio.o +obj-$(CONFIG_MMIOTRACE) += mmiotrace.o +mmiotrace-objs := pf_in.o mmio-mod.o +obj-$(CONFIG_MMIOTRACE_TEST) += testmmiotrace.o diff --git a/arch/x86/kernel/mmiotrace/kmmio.c b/arch/x86/kernel/mmiotrace/kmmio.c index 5e239d0b8467..539a9b19588f 100644 --- a/arch/x86/kernel/mmiotrace/kmmio.c +++ b/arch/x86/kernel/mmiotrace/kmmio.c @@ -6,6 +6,7 @@ */ #include +#include #include #include #include @@ -17,70 +18,119 @@ #include #include #include +#include #include #include #include #include #include -#include "kmmio.h" +#include -#define KMMIO_HASH_BITS 6 -#define KMMIO_TABLE_SIZE (1 << KMMIO_HASH_BITS) #define KMMIO_PAGE_HASH_BITS 4 #define KMMIO_PAGE_TABLE_SIZE (1 << KMMIO_PAGE_HASH_BITS) +struct kmmio_fault_page { + struct list_head list; + struct kmmio_fault_page *release_next; + unsigned long page; /* location of the fault page */ + + /* + * Number of times this page has been registered as a part + * of a probe. If zero, page is disarmed and this may be freed. + * Used only by writers (RCU). + */ + int count; +}; + +struct kmmio_delayed_release { + struct rcu_head rcu; + struct kmmio_fault_page *release_list; +}; + struct kmmio_context { struct kmmio_fault_page *fpage; struct kmmio_probe *probe; unsigned long saved_flags; + unsigned long addr; int active; }; -static int kmmio_page_fault(struct pt_regs *regs, unsigned long error_code, - unsigned long address); static int kmmio_die_notifier(struct notifier_block *nb, unsigned long val, void *args); +static DECLARE_MUTEX(kmmio_init_mutex); static DEFINE_SPINLOCK(kmmio_lock); /* These are protected by kmmio_lock */ +static int kmmio_initialized; unsigned int kmmio_count; -static unsigned int handler_registered; + +/* Read-protected by RCU, write-protected by kmmio_lock. */ static struct list_head kmmio_page_table[KMMIO_PAGE_TABLE_SIZE]; static LIST_HEAD(kmmio_probes); +static struct list_head *kmmio_page_list(unsigned long page) +{ + return &kmmio_page_table[hash_long(page, KMMIO_PAGE_HASH_BITS)]; +} + /* Accessed per-cpu */ static DEFINE_PER_CPU(struct kmmio_context, kmmio_ctx); +/* protected by kmmio_init_mutex */ static struct notifier_block nb_die = { .notifier_call = kmmio_die_notifier }; -int init_kmmio(void) +/** + * Makes sure kmmio is initialized and usable. + * This must be called before any other kmmio function defined here. + * May sleep. + */ +void reference_kmmio(void) { - int i; - for (i = 0; i < KMMIO_PAGE_TABLE_SIZE; i++) - INIT_LIST_HEAD(&kmmio_page_table[i]); - - register_die_notifier(&nb_die); - return 0; -} - -void cleanup_kmmio(void) -{ - /* - * Assume the following have been already cleaned by calling - * unregister_kmmio_probe() appropriately: - * kmmio_page_table, kmmio_probes - */ - if (handler_registered) { - if (mmiotrace_unregister_pf(&kmmio_page_fault)) + down(&kmmio_init_mutex); + spin_lock_irq(&kmmio_lock); + if (!kmmio_initialized) { + int i; + for (i = 0; i < KMMIO_PAGE_TABLE_SIZE; i++) + INIT_LIST_HEAD(&kmmio_page_table[i]); + if (register_die_notifier(&nb_die)) BUG(); - synchronize_rcu(); } - unregister_die_notifier(&nb_die); + kmmio_initialized++; + spin_unlock_irq(&kmmio_lock); + up(&kmmio_init_mutex); } +EXPORT_SYMBOL_GPL(reference_kmmio); + +/** + * Clean up kmmio after use. This must be called for every call to + * reference_kmmio(). All probes registered after the corresponding + * reference_kmmio() must have been unregistered when calling this. + * May sleep. + */ +void unreference_kmmio(void) +{ + bool unreg = false; + + down(&kmmio_init_mutex); + spin_lock_irq(&kmmio_lock); + + if (kmmio_initialized == 1) { + BUG_ON(is_kmmio_active()); + unreg = true; + } + kmmio_initialized--; + BUG_ON(kmmio_initialized < 0); + spin_unlock_irq(&kmmio_lock); + + if (unreg) + unregister_die_notifier(&nb_die); /* calls sync_rcu() */ + up(&kmmio_init_mutex); +} +EXPORT_SYMBOL(unreference_kmmio); /* * this is basically a dynamic stabbing problem: @@ -90,33 +140,33 @@ void cleanup_kmmio(void) * Overlap a Point (might be simple) * Space Efficient Dynamic Stabbing with Fast Queries - Mikkel Thorup */ -/* Get the kmmio at this addr (if any). You must be holding kmmio_lock. */ +/* Get the kmmio at this addr (if any). You must be holding RCU read lock. */ static struct kmmio_probe *get_kmmio_probe(unsigned long addr) { struct kmmio_probe *p; - list_for_each_entry(p, &kmmio_probes, list) { + list_for_each_entry_rcu(p, &kmmio_probes, list) { if (addr >= p->addr && addr <= (p->addr + p->len)) return p; } return NULL; } +/* You must be holding RCU read lock. */ static struct kmmio_fault_page *get_kmmio_fault_page(unsigned long page) { - struct list_head *head, *tmp; + struct list_head *head; + struct kmmio_fault_page *p; page &= PAGE_MASK; - head = &kmmio_page_table[hash_long(page, KMMIO_PAGE_HASH_BITS)]; - list_for_each(tmp, head) { - struct kmmio_fault_page *p - = list_entry(tmp, struct kmmio_fault_page, list); + head = kmmio_page_list(page); + list_for_each_entry_rcu(p, head, list) { if (p->page == page) return p; } - return NULL; } +/** Mark the given page as not present. Access to it will trigger a fault. */ static void arm_kmmio_fault_page(unsigned long page, int *page_level) { unsigned long address = page & PAGE_MASK; @@ -124,8 +174,8 @@ static void arm_kmmio_fault_page(unsigned long page, int *page_level) pte_t *pte = lookup_address(address, &level); if (!pte) { - printk(KERN_ERR "Error in %s: no pte for page 0x%08lx\n", - __FUNCTION__, page); + pr_err("kmmio: Error in %s: no pte for page 0x%08lx\n", + __func__, page); return; } @@ -143,6 +193,7 @@ static void arm_kmmio_fault_page(unsigned long page, int *page_level) __flush_tlb_one(page); } +/** Mark the given page as present. */ static void disarm_kmmio_fault_page(unsigned long page, int *page_level) { unsigned long address = page & PAGE_MASK; @@ -150,8 +201,8 @@ static void disarm_kmmio_fault_page(unsigned long page, int *page_level) pte_t *pte = lookup_address(address, &level); if (!pte) { - printk(KERN_ERR "Error in %s: no pte for page 0x%08lx\n", - __FUNCTION__, page); + pr_err("kmmio: Error in %s: no pte for page 0x%08lx\n", + __func__, page); return; } @@ -169,13 +220,25 @@ static void disarm_kmmio_fault_page(unsigned long page, int *page_level) __flush_tlb_one(page); } +/* + * This is being called from do_page_fault(). + * + * We may be in an interrupt or a critical section. Also prefecthing may + * trigger a page fault. We may be in the middle of process switch. + * We cannot take any locks, because we could be executing especially + * within a kmmio critical section. + * + * Local interrupts are disabled, so preemption cannot happen. + * Do not enable interrupts, do not sleep, and watch out for other CPUs. + */ /* * Interrupts are disabled on entry as trap3 is an interrupt gate * and they remain disabled thorough out this function. */ -static int kmmio_handler(struct pt_regs *regs, unsigned long addr) +int kmmio_handler(struct pt_regs *regs, unsigned long addr) { - struct kmmio_context *ctx = &get_cpu_var(kmmio_ctx); + struct kmmio_context *ctx; + struct kmmio_fault_page *faultpage; /* * Preemption is now disabled to prevent process switch during @@ -186,40 +249,40 @@ static int kmmio_handler(struct pt_regs *regs, unsigned long addr) * XXX what if an interrupt occurs between returning from * do_page_fault() and entering the single-step exception handler? * And that interrupt triggers a kmmio trap? + * XXX If we tracing an interrupt service routine or whatever, is + * this enough to keep it on the current cpu? */ preempt_disable(); - /* interrupts disabled and CPU-local data => atomicity guaranteed. */ + rcu_read_lock(); + faultpage = get_kmmio_fault_page(addr); + if (!faultpage) { + /* + * Either this page fault is not caused by kmmio, or + * another CPU just pulled the kmmio probe from under + * our feet. In the latter case all hell breaks loose. + */ + goto no_kmmio; + } + + ctx = &get_cpu_var(kmmio_ctx); if (ctx->active) { /* - * This avoids a deadlock with kmmio_lock. + * Prevent overwriting already in-flight context. * If this page fault really was due to kmmio trap, * all hell breaks loose. */ - printk(KERN_EMERG "mmiotrace: recursive probe hit on CPU %d, " - "for address %lu. Ignoring.\n", + pr_emerg("kmmio: recursive probe hit on CPU %d, " + "for address 0x%08lx. Ignoring.\n", smp_processor_id(), addr); - goto no_kmmio; + goto no_kmmio_ctx; } ctx->active++; - /* - * Acquire the kmmio lock to prevent changes affecting - * get_kmmio_fault_page() and get_kmmio_probe(), since we save their - * returned pointers. - * The lock is released in post_kmmio_handler(). - * XXX: could/should get_kmmio_*() be using RCU instead of spinlock? - */ - spin_lock(&kmmio_lock); - - ctx->fpage = get_kmmio_fault_page(addr); - if (!ctx->fpage) { - /* this page fault is not caused by kmmio */ - goto no_kmmio_locked; - } - + ctx->fpage = faultpage; ctx->probe = get_kmmio_probe(addr); ctx->saved_flags = (regs->flags & (TF_MASK|IF_MASK)); + ctx->addr = addr; if (ctx->probe && ctx->probe->pre_handler) ctx->probe->pre_handler(ctx->probe, regs, addr); @@ -227,46 +290,62 @@ static int kmmio_handler(struct pt_regs *regs, unsigned long addr) regs->flags |= TF_MASK; regs->flags &= ~IF_MASK; - /* We hold lock, now we set present bit in PTE and single step. */ + /* Now we set present bit in PTE and single step. */ disarm_kmmio_fault_page(ctx->fpage->page, NULL); put_cpu_var(kmmio_ctx); + rcu_read_unlock(); return 1; -no_kmmio_locked: - spin_unlock(&kmmio_lock); - ctx->active--; -no_kmmio: - preempt_enable_no_resched(); +no_kmmio_ctx: put_cpu_var(kmmio_ctx); - /* page fault not handled by kmmio */ - return 0; +no_kmmio: + rcu_read_unlock(); + preempt_enable_no_resched(); + return 0; /* page fault not handled by kmmio */ } /* * Interrupts are disabled on entry as trap1 is an interrupt gate * and they remain disabled thorough out this function. - * And we hold kmmio lock. + * This must always get called as the pair to kmmio_handler(). */ static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs) { int ret = 0; + struct kmmio_probe *probe; + struct kmmio_fault_page *faultpage; struct kmmio_context *ctx = &get_cpu_var(kmmio_ctx); if (!ctx->active) goto out; + rcu_read_lock(); + + faultpage = get_kmmio_fault_page(ctx->addr); + probe = get_kmmio_probe(ctx->addr); + if (faultpage != ctx->fpage || probe != ctx->probe) { + /* + * The trace setup changed after kmmio_handler() and before + * running this respective post handler. User does not want + * the result anymore. + */ + ctx->probe = NULL; + ctx->fpage = NULL; + } + if (ctx->probe && ctx->probe->post_handler) ctx->probe->post_handler(ctx->probe, condition, regs); - arm_kmmio_fault_page(ctx->fpage->page, NULL); + if (ctx->fpage) + arm_kmmio_fault_page(ctx->fpage->page, NULL); regs->flags &= ~TF_MASK; regs->flags |= ctx->saved_flags; /* These were acquired in kmmio_handler(). */ ctx->active--; - spin_unlock(&kmmio_lock); + BUG_ON(ctx->active); preempt_enable_no_resched(); /* @@ -277,11 +356,13 @@ static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs) if (!(regs->flags & TF_MASK)) ret = 1; + rcu_read_unlock(); out: put_cpu_var(kmmio_ctx); return ret; } +/* You must be holding kmmio_lock. */ static int add_kmmio_fault_page(unsigned long page) { struct kmmio_fault_page *f; @@ -289,6 +370,8 @@ static int add_kmmio_fault_page(unsigned long page) page &= PAGE_MASK; f = get_kmmio_fault_page(page); if (f) { + if (!f->count) + arm_kmmio_fault_page(f->page, NULL); f->count++; return 0; } @@ -299,15 +382,16 @@ static int add_kmmio_fault_page(unsigned long page) f->count = 1; f->page = page; - list_add(&f->list, - &kmmio_page_table[hash_long(f->page, KMMIO_PAGE_HASH_BITS)]); + list_add_rcu(&f->list, kmmio_page_list(f->page)); arm_kmmio_fault_page(f->page, NULL); return 0; } -static void release_kmmio_fault_page(unsigned long page) +/* You must be holding kmmio_lock. */ +static void release_kmmio_fault_page(unsigned long page, + struct kmmio_fault_page **release_list) { struct kmmio_fault_page *f; @@ -317,9 +401,11 @@ static void release_kmmio_fault_page(unsigned long page) return; f->count--; + BUG_ON(f->count < 0); if (!f->count) { disarm_kmmio_fault_page(f->page, NULL); - list_del(&f->list); + f->release_next = *release_list; + *release_list = f; } } @@ -334,68 +420,113 @@ int register_kmmio_probe(struct kmmio_probe *p) ret = -EEXIST; goto out; } - list_add(&p->list, &kmmio_probes); - /*printk("adding fault pages...\n");*/ + list_add_rcu(&p->list, &kmmio_probes); while (size < p->len) { if (add_kmmio_fault_page(p->addr + size)) - printk(KERN_ERR "mmio: Unable to set page fault.\n"); + pr_err("kmmio: Unable to set page fault.\n"); size += PAGE_SIZE; } - - if (!handler_registered) { - if (mmiotrace_register_pf(&kmmio_page_fault)) - printk(KERN_ERR "mmiotrace: Cannot register page " - "fault handler.\n"); - else - handler_registered++; - } - out: spin_unlock_irq(&kmmio_lock); /* * XXX: What should I do here? * Here was a call to global_flush_tlb(), but it does not exist - * anymore. + * anymore. It seems it's not needed after all. */ return ret; } +EXPORT_SYMBOL(register_kmmio_probe); -void unregister_kmmio_probe(struct kmmio_probe *p) +static void rcu_free_kmmio_fault_pages(struct rcu_head *head) { - unsigned long size = 0; - - spin_lock_irq(&kmmio_lock); - while (size < p->len) { - release_kmmio_fault_page(p->addr + size); - size += PAGE_SIZE; + struct kmmio_delayed_release *dr = container_of( + head, + struct kmmio_delayed_release, + rcu); + struct kmmio_fault_page *p = dr->release_list; + while (p) { + struct kmmio_fault_page *next = p->release_next; + BUG_ON(p->count); + kfree(p); + p = next; } - list_del(&p->list); - kmmio_count--; - spin_unlock_irq(&kmmio_lock); + kfree(dr); +} + +static void remove_kmmio_fault_pages(struct rcu_head *head) +{ + struct kmmio_delayed_release *dr = container_of( + head, + struct kmmio_delayed_release, + rcu); + struct kmmio_fault_page *p = dr->release_list; + struct kmmio_fault_page **prevp = &dr->release_list; + unsigned long flags; + spin_lock_irqsave(&kmmio_lock, flags); + while (p) { + if (!p->count) + list_del_rcu(&p->list); + else + *prevp = p->release_next; + prevp = &p->release_next; + p = p->release_next; + } + spin_unlock_irqrestore(&kmmio_lock, flags); + /* This is the real RCU destroy call. */ + call_rcu(&dr->rcu, rcu_free_kmmio_fault_pages); } /* - * According to 2.6.20, mainly x86_64 arch: - * This is being called from do_page_fault(), via the page fault notifier - * chain. The chain is called for both user space faults and kernel space - * faults (address >= TASK_SIZE64), except not on faults serviced by - * vmalloc_fault(). + * Remove a kmmio probe. You have to synchronize_rcu() before you can be + * sure that the callbacks will not be called anymore. * - * We may be in an interrupt or a critical section. Also prefecthing may - * trigger a page fault. We may be in the middle of process switch. - * The page fault hook functionality has put us inside RCU read lock. - * - * Local interrupts are disabled, so preemption cannot happen. - * Do not enable interrupts, do not sleep, and watch out for other CPUs. + * Unregistering a kmmio fault page has three steps: + * 1. release_kmmio_fault_page() + * Disarm the page, wait a grace period to let all faults finish. + * 2. remove_kmmio_fault_pages() + * Remove the pages from kmmio_page_table. + * 3. rcu_free_kmmio_fault_pages() + * Actally free the kmmio_fault_page structs as with RCU. */ -static int kmmio_page_fault(struct pt_regs *regs, unsigned long error_code, - unsigned long address) +void unregister_kmmio_probe(struct kmmio_probe *p) { - if (is_kmmio_active()) - if (kmmio_handler(regs, address) == 1) - return -1; - return 0; + unsigned long size = 0; + struct kmmio_fault_page *release_list = NULL; + struct kmmio_delayed_release *drelease; + + spin_lock_irq(&kmmio_lock); + while (size < p->len) { + release_kmmio_fault_page(p->addr + size, &release_list); + size += PAGE_SIZE; + } + list_del_rcu(&p->list); + kmmio_count--; + spin_unlock_irq(&kmmio_lock); + + drelease = kmalloc(sizeof(*drelease), GFP_ATOMIC); + if (!drelease) { + pr_crit("kmmio: leaking kmmio_fault_page objects.\n"); + return; + } + drelease->release_list = release_list; + + /* + * This is not really RCU here. We have just disarmed a set of + * pages so that they cannot trigger page faults anymore. However, + * we cannot remove the pages from kmmio_page_table, + * because a probe hit might be in flight on another CPU. The + * pages are collected into a list, and they will be removed from + * kmmio_page_table when it is certain that no probe hit related to + * these pages can be in flight. RCU grace period sounds like a + * good choice. + * + * If we removed the pages too early, kmmio page fault handler might + * not find the respective kmmio_fault_page and determine it's not + * a kmmio fault, when it actually is. This would lead to madness. + */ + call_rcu(&drelease->rcu, remove_kmmio_fault_pages); } +EXPORT_SYMBOL(unregister_kmmio_probe); static int kmmio_die_notifier(struct notifier_block *nb, unsigned long val, void *args) diff --git a/arch/x86/kernel/mmiotrace/kmmio.h b/arch/x86/kernel/mmiotrace/kmmio.h deleted file mode 100644 index 85b7f68a3b8a..000000000000 --- a/arch/x86/kernel/mmiotrace/kmmio.h +++ /dev/null @@ -1,58 +0,0 @@ -#ifndef _LINUX_KMMIO_H -#define _LINUX_KMMIO_H - -#include -#include -#include -#include -#include -#include -#include - -struct kmmio_probe; -struct kmmio_fault_page; -struct pt_regs; - -typedef void (*kmmio_pre_handler_t)(struct kmmio_probe *, - struct pt_regs *, unsigned long addr); -typedef void (*kmmio_post_handler_t)(struct kmmio_probe *, - unsigned long condition, struct pt_regs *); - -struct kmmio_probe { - struct list_head list; - - /* start location of the probe point */ - unsigned long addr; - - /* length of the probe region */ - unsigned long len; - - /* Called before addr is executed. */ - kmmio_pre_handler_t pre_handler; - - /* Called after addr is executed, unless... */ - kmmio_post_handler_t post_handler; -}; - -struct kmmio_fault_page { - struct list_head list; - - /* location of the fault page */ - unsigned long page; - - int count; -}; - -/* kmmio is active by some kmmio_probes? */ -static inline int is_kmmio_active(void) -{ - extern unsigned int kmmio_count; - return kmmio_count; -} - -int init_kmmio(void); -void cleanup_kmmio(void); -int register_kmmio_probe(struct kmmio_probe *p); -void unregister_kmmio_probe(struct kmmio_probe *p); - -#endif /* _LINUX_KMMIO_H */ diff --git a/arch/x86/kernel/mmiotrace/mmio-mod.c b/arch/x86/kernel/mmiotrace/mmio-mod.c index f9c609266d83..e1a508588f03 100644 --- a/arch/x86/kernel/mmiotrace/mmio-mod.c +++ b/arch/x86/kernel/mmiotrace/mmio-mod.c @@ -32,7 +32,6 @@ #include #include -#include "kmmio.h" #include "pf_in.h" /* This app's relay channel files will appear in /debug/mmio-trace */ @@ -129,18 +128,17 @@ static void print_pte(unsigned long address) pte_t *pte = lookup_address(address, &level); if (!pte) { - printk(KERN_ERR "Error in %s: no pte for page 0x%08lx\n", - __FUNCTION__, address); + pr_err(MODULE_NAME ": Error in %s: no pte for page 0x%08lx\n", + __func__, address); return; } if (level == PG_LEVEL_2M) { - printk(KERN_EMERG MODULE_NAME ": 4MB pages are not " - "currently supported: %lx\n", - address); + pr_emerg(MODULE_NAME ": 4MB pages are not currently " + "supported: %lx\n", address); BUG(); } - printk(KERN_DEBUG MODULE_NAME ": pte for 0x%lx: 0x%lx 0x%lx\n", + pr_info(MODULE_NAME ": pte for 0x%lx: 0x%lx 0x%lx\n", address, pte_val(*pte), pte_val(*pte) & _PAGE_PRESENT); } @@ -152,7 +150,7 @@ static void print_pte(unsigned long address) static void die_kmmio_nesting_error(struct pt_regs *regs, unsigned long addr) { const struct trap_reason *my_reason = &get_cpu_var(pf_reason); - printk(KERN_EMERG MODULE_NAME ": unexpected fault for address: %lx, " + pr_emerg(MODULE_NAME ": unexpected fault for address: %lx, " "last fault for address: %lx\n", addr, my_reason->addr); print_pte(addr); @@ -160,20 +158,17 @@ static void die_kmmio_nesting_error(struct pt_regs *regs, unsigned long addr) print_symbol(KERN_EMERG "faulting EIP is at %s\n", regs->ip); print_symbol(KERN_EMERG "last faulting EIP was at %s\n", my_reason->ip); - printk(KERN_EMERG - "eax: %08lx ebx: %08lx ecx: %08lx edx: %08lx\n", + pr_emerg("eax: %08lx ebx: %08lx ecx: %08lx edx: %08lx\n", regs->ax, regs->bx, regs->cx, regs->dx); - printk(KERN_EMERG - "esi: %08lx edi: %08lx ebp: %08lx esp: %08lx\n", + pr_emerg("esi: %08lx edi: %08lx ebp: %08lx esp: %08lx\n", regs->si, regs->di, regs->bp, regs->sp); #else print_symbol(KERN_EMERG "faulting RIP is at %s\n", regs->ip); print_symbol(KERN_EMERG "last faulting RIP was at %s\n", my_reason->ip); - printk(KERN_EMERG "rax: %016lx rcx: %016lx rdx: %016lx\n", + pr_emerg("rax: %016lx rcx: %016lx rdx: %016lx\n", regs->ax, regs->cx, regs->dx); - printk(KERN_EMERG "rsi: %016lx rdi: %016lx " - "rbp: %016lx rsp: %016lx\n", + pr_emerg("rsi: %016lx rdi: %016lx rbp: %016lx rsp: %016lx\n", regs->si, regs->di, regs->bp, regs->sp); #endif put_cpu_var(pf_reason); @@ -251,10 +246,15 @@ static void post(struct kmmio_probe *p, unsigned long condition, struct trap_reason *my_reason = &get_cpu_var(pf_reason); struct mm_io_header_rw *my_trace = &get_cpu_var(cpu_trace); + /* + * XXX: This might not get called, if the probe is removed while + * trace hit is on flight. + */ + /* this should always return the active_trace count to 0 */ my_reason->active_traces--; if (my_reason->active_traces) { - printk(KERN_EMERG MODULE_NAME ": unexpected post handler"); + pr_emerg(MODULE_NAME ": unexpected post handler"); BUG(); } @@ -283,16 +283,15 @@ static int subbuf_start_handler(struct rchan_buf *buf, void *subbuf, atomic_t *drop = &per_cpu(dropped, cpu); int count; if (relay_buf_full(buf)) { - if (atomic_inc_return(drop) == 1) { - printk(KERN_ERR MODULE_NAME ": cpu %d buffer full!\n", - cpu); - } + if (atomic_inc_return(drop) == 1) + pr_err(MODULE_NAME ": cpu %d buffer full!\n", cpu); return 0; - } else if ((count = atomic_read(drop))) { - printk(KERN_ERR MODULE_NAME - ": cpu %d buffer no longer full, " - "missed %d events.\n", - cpu, count); + } + count = atomic_read(drop); + if (count) { + pr_err(MODULE_NAME ": cpu %d buffer no longer full, " + "missed %d events.\n", + cpu, count); atomic_sub(count, drop); } @@ -407,8 +406,8 @@ static void ioremap_trace_core(unsigned long offset, unsigned long size, /* Don't trace the low PCI/ISA area, it's always mapped.. */ if (!ISA_trace && (offset < ISA_END_ADDRESS) && (offset + size > ISA_START_ADDRESS)) { - printk(KERN_NOTICE MODULE_NAME ": Ignoring map of low " - "PCI/ISA area (0x%lx-0x%lx)\n", + pr_notice(MODULE_NAME ": Ignoring map of low PCI/ISA area " + "(0x%lx-0x%lx)\n", offset, offset + size); return; } @@ -418,7 +417,7 @@ static void ioremap_trace_core(unsigned long offset, unsigned long size, void __iomem *ioremap_cache_trace(unsigned long offset, unsigned long size) { void __iomem *p = ioremap_cache(offset, size); - printk(KERN_DEBUG MODULE_NAME ": ioremap_cache(0x%lx, 0x%lx) = %p\n", + pr_debug(MODULE_NAME ": ioremap_cache(0x%lx, 0x%lx) = %p\n", offset, size, p); ioremap_trace_core(offset, size, p); return p; @@ -428,7 +427,7 @@ EXPORT_SYMBOL(ioremap_cache_trace); void __iomem *ioremap_nocache_trace(unsigned long offset, unsigned long size) { void __iomem *p = ioremap_nocache(offset, size); - printk(KERN_DEBUG MODULE_NAME ": ioremap_nocache(0x%lx, 0x%lx) = %p\n", + pr_debug(MODULE_NAME ": ioremap_nocache(0x%lx, 0x%lx) = %p\n", offset, size, p); ioremap_trace_core(offset, size, p); return p; @@ -455,7 +454,7 @@ void iounmap_trace(volatile void __iomem *addr) }; struct remap_trace *trace; struct remap_trace *tmp; - printk(KERN_DEBUG MODULE_NAME ": Unmapping %p.\n", addr); + pr_debug(MODULE_NAME ": Unmapping %p.\n", addr); record_timestamp(&event.header); spin_lock(&trace_list_lock); @@ -481,7 +480,7 @@ static void clear_trace_list(void) spin_lock(&trace_list_lock); list_for_each_entry_safe(trace, tmp, &trace_list, list) { - printk(KERN_WARNING MODULE_NAME ": purging non-iounmapped " + pr_warning(MODULE_NAME ": purging non-iounmapped " "trace @0x%08lx, size 0x%lx.\n", trace->probe.addr, trace->probe.len); if (!nommiotrace) @@ -500,39 +499,37 @@ static int __init init(void) dir = debugfs_create_dir(APP_DIR, NULL); if (!dir) { - printk(KERN_ERR MODULE_NAME - ": Couldn't create relay app directory.\n"); + pr_err(MODULE_NAME ": Couldn't create relay app directory.\n"); return -ENOMEM; } chan = create_channel(subbuf_size, n_subbufs); if (!chan) { debugfs_remove(dir); - printk(KERN_ERR MODULE_NAME - ": relay app channel creation failed\n"); + pr_err(MODULE_NAME ": relay app channel creation failed\n"); return -ENOMEM; } - init_kmmio(); + reference_kmmio(); proc_marker_file = create_proc_entry(MARKER_FILE, 0, NULL); if (proc_marker_file) proc_marker_file->write_proc = write_marker; - printk(KERN_DEBUG MODULE_NAME ": loaded.\n"); + pr_debug(MODULE_NAME ": loaded.\n"); if (nommiotrace) - printk(KERN_DEBUG MODULE_NAME ": MMIO tracing disabled.\n"); + pr_info(MODULE_NAME ": MMIO tracing disabled.\n"); if (ISA_trace) - printk(KERN_WARNING MODULE_NAME - ": Warning! low ISA range will be traced.\n"); + pr_warning(MODULE_NAME ": Warning! low ISA range will be " + "traced.\n"); return 0; } static void __exit cleanup(void) { - printk(KERN_DEBUG MODULE_NAME ": unload...\n"); + pr_debug(MODULE_NAME ": unload...\n"); clear_trace_list(); - cleanup_kmmio(); + unreference_kmmio(); remove_proc_entry(MARKER_FILE, NULL); destroy_channel(); if (dir) diff --git a/arch/x86/kernel/mmiotrace/pf_in.c b/arch/x86/kernel/mmiotrace/pf_in.c index 67ea520dde62..efa1911e20ca 100644 --- a/arch/x86/kernel/mmiotrace/pf_in.c +++ b/arch/x86/kernel/mmiotrace/pf_in.c @@ -19,7 +19,7 @@ * */ -/* $Id: pf_in.c,v 1.1.1.1 2002/11/12 05:56:32 brlock Exp $ +/* Id: pf_in.c,v 1.1.1.1 2002/11/12 05:56:32 brlock Exp * Copyright by Intel Crop., 2002 * Louis Zhuang (louis.zhuang@intel.com) * diff --git a/arch/x86/kernel/mmiotrace/testmmiotrace.c b/arch/x86/kernel/mmiotrace/testmmiotrace.c index 40e66b0e6480..5ecff578672b 100644 --- a/arch/x86/kernel/mmiotrace/testmmiotrace.c +++ b/arch/x86/kernel/mmiotrace/testmmiotrace.c @@ -41,8 +41,7 @@ static void do_test(void) { void __iomem *p = ioremap_nocache_trace(mmio_address, 0x4000); if (!p) { - printk(KERN_ERR MODULE_NAME ": could not ioremap IO memory, " - "aborting.\n"); + pr_err(MODULE_NAME ": could not ioremap, aborting.\n"); return; } do_write_test(p); @@ -53,14 +52,14 @@ static void do_test(void) static int __init init(void) { if (mmio_address == 0) { - printk(KERN_ERR MODULE_NAME ": you have to use the module " - "argument mmio_address.\n"); - printk(KERN_ERR MODULE_NAME ": DO NOT LOAD THIS MODULE UNLESS" + pr_err(MODULE_NAME ": you have to use the module argument " + "mmio_address.\n"); + pr_err(MODULE_NAME ": DO NOT LOAD THIS MODULE UNLESS" " YOU REALLY KNOW WHAT YOU ARE DOING!\n"); return -ENXIO; } - printk(KERN_WARNING MODULE_NAME ": WARNING: mapping 16 kB @ 0x%08lx " + pr_warning(MODULE_NAME ": WARNING: mapping 16 kB @ 0x%08lx " "in PCI address space, and writing " "rubbish in there.\n", mmio_address); do_test(); @@ -69,7 +68,7 @@ static int __init init(void) static void __exit cleanup(void) { - printk(KERN_DEBUG MODULE_NAME ": unloaded.\n"); + pr_debug(MODULE_NAME ": unloaded.\n"); } module_init(init); diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index e9a086a1a9ff..8c828a68d3b6 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -49,60 +50,14 @@ #define PF_RSVD (1<<3) #define PF_INSTR (1<<4) -#ifdef CONFIG_MMIOTRACE_HOOKS -static pf_handler_func mmiotrace_pf_handler; /* protected by RCU */ -static DEFINE_SPINLOCK(mmiotrace_handler_lock); - -int mmiotrace_register_pf(pf_handler_func new_pfh) -{ - int ret = 0; - unsigned long flags; - spin_lock_irqsave(&mmiotrace_handler_lock, flags); - if (mmiotrace_pf_handler) - ret = -EBUSY; - else - mmiotrace_pf_handler = new_pfh; - spin_unlock_irqrestore(&mmiotrace_handler_lock, flags); - return ret; -} -EXPORT_SYMBOL_GPL(mmiotrace_register_pf); - -/** - * mmiotrace_unregister_pf: - * The caller must ensure @old_pfh is not in use anymore before freeing it. - * This function does not guarantee it. The handler function pointer is - * protected by RCU, so you can do this by e.g. calling synchronize_rcu(). - */ -int mmiotrace_unregister_pf(pf_handler_func old_pfh) -{ - int ret = 0; - unsigned long flags; - spin_lock_irqsave(&mmiotrace_handler_lock, flags); - if (mmiotrace_pf_handler != old_pfh) - ret = -EPERM; - else - mmiotrace_pf_handler = NULL; - spin_unlock_irqrestore(&mmiotrace_handler_lock, flags); - return ret; -} -EXPORT_SYMBOL_GPL(mmiotrace_unregister_pf); -#endif /* CONFIG_MMIOTRACE_HOOKS */ - -/* returns non-zero if do_page_fault() should return */ -static inline int call_mmiotrace(struct pt_regs *regs, - unsigned long error_code, - unsigned long address) +static inline int kmmio_fault(struct pt_regs *regs, unsigned long addr) { #ifdef CONFIG_MMIOTRACE_HOOKS - int ret = 0; - rcu_read_lock(); - if (mmiotrace_pf_handler) - ret = mmiotrace_pf_handler(regs, error_code, address); - rcu_read_unlock(); - return ret; -#else - return 0; + if (unlikely(is_kmmio_active())) + if (kmmio_handler(regs, addr) == 1) + return -1; #endif + return 0; } static inline int notify_page_fault(struct pt_regs *regs) @@ -657,7 +612,7 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code) if (notify_page_fault(regs)) return; - if (call_mmiotrace(regs, error_code, address)) + if (unlikely(kmmio_fault(regs, address))) return; /* diff --git a/include/asm-x86/kdebug.h b/include/asm-x86/kdebug.h index 7063281040da..96651bb59ba1 100644 --- a/include/asm-x86/kdebug.h +++ b/include/asm-x86/kdebug.h @@ -35,11 +35,4 @@ extern void show_regs(struct pt_regs *regs); extern unsigned long oops_begin(void); extern void oops_end(unsigned long, struct pt_regs *, int signr); -typedef int (*pf_handler_func)(struct pt_regs *regs, - unsigned long error_code, - unsigned long address); - -extern int mmiotrace_register_pf(pf_handler_func new_pfh); -extern int mmiotrace_unregister_pf(pf_handler_func old_pfh); - #endif diff --git a/include/linux/mmiotrace.h b/include/linux/mmiotrace.h index 6ec288f1fe24..d87a6cd8b686 100644 --- a/include/linux/mmiotrace.h +++ b/include/linux/mmiotrace.h @@ -3,6 +3,44 @@ #include +#ifdef __KERNEL__ + +#include + +struct kmmio_probe; +struct pt_regs; + +typedef void (*kmmio_pre_handler_t)(struct kmmio_probe *, + struct pt_regs *, unsigned long addr); +typedef void (*kmmio_post_handler_t)(struct kmmio_probe *, + unsigned long condition, struct pt_regs *); + +struct kmmio_probe { + struct list_head list; + unsigned long addr; /* start location of the probe point */ + unsigned long len; /* length of the probe region */ + kmmio_pre_handler_t pre_handler; /* Called before addr is executed. */ + kmmio_post_handler_t post_handler; /* Called after addr is executed */ +}; + +/* kmmio is active by some kmmio_probes? */ +static inline int is_kmmio_active(void) +{ + extern unsigned int kmmio_count; + return kmmio_count; +} + +extern void reference_kmmio(void); +extern void unreference_kmmio(void); +extern int register_kmmio_probe(struct kmmio_probe *p); +extern void unregister_kmmio_probe(struct kmmio_probe *p); + +/* Called from page fault handler. */ +extern int kmmio_handler(struct pt_regs *regs, unsigned long addr); + +#endif /* __KERNEL__ */ + + /* * If you change anything here, you must bump MMIO_VERSION. * This is the relay data format for user space. From d61fc44853f46fb002228b18aa5f30db21fcd4ac Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Mon, 12 May 2008 21:20:57 +0200 Subject: [PATCH 08/30] x86: mmiotrace, preview 2 Kconfig.debug, Makefile and testmmiotrace.c style fixes. Use real mutex instead of mutex. Fix failure path in register probe func. kmmio: RCU read-locked over single stepping. Generate mapping id's. Make mmio-mod.c built-in and rewrite its locking. Add debugfs file to enable/disable mmiotracing. kmmio: use irqsave spinlocks. Lots of cleanups in mmio-mod.c Marker file moved from /proc into debugfs. Call mmiotrace entrypoints directly from ioremap.c. Signed-off-by: Pekka Paalanen Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/Kconfig.debug | 20 +- arch/x86/kernel/mmiotrace/Makefile | 2 +- arch/x86/kernel/mmiotrace/kmmio.c | 72 ++-- arch/x86/kernel/mmiotrace/mmio-mod.c | 401 ++++++++++++++-------- arch/x86/kernel/mmiotrace/testmmiotrace.c | 15 +- arch/x86/mm/ioremap.c | 9 +- include/linux/mmiotrace.h | 18 +- 7 files changed, 334 insertions(+), 203 deletions(-) diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index 9491c0ae03a3..aa0d6462b1fc 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -170,22 +170,19 @@ config IOMMU_LEAK config MMIOTRACE_HOOKS bool - default n config MMIOTRACE - tristate "Memory mapped IO tracing" + bool "Memory mapped IO tracing" depends on DEBUG_KERNEL && RELAY && DEBUG_FS select MMIOTRACE_HOOKS - default n + default y help - This will build a kernel module called mmiotrace. - Making this a built-in is heavily discouraged. - - Mmiotrace traces Memory Mapped I/O access and is meant for debugging - and reverse engineering. The kernel module offers wrapped - versions of the ioremap family of functions. The driver to be traced - must be modified to call these wrappers. A user space program is - required to collect the MMIO data. + Mmiotrace traces Memory Mapped I/O access and is meant for + debugging and reverse engineering. It is called from the ioremap + implementation and works via page faults. A user space program is + required to collect the MMIO data from debugfs files. + Tracing is disabled by default and can be enabled from a debugfs + file. See http://nouveau.freedesktop.org/wiki/MmioTrace If you are not helping to develop drivers, say N. @@ -193,7 +190,6 @@ config MMIOTRACE config MMIOTRACE_TEST tristate "Test module for mmiotrace" depends on MMIOTRACE && m - default n help This is a dumb module for testing mmiotrace. It is very dangerous as it will write garbage to IO memory starting at a given address. diff --git a/arch/x86/kernel/mmiotrace/Makefile b/arch/x86/kernel/mmiotrace/Makefile index cf1e747b463e..dbcd8d50fb8d 100644 --- a/arch/x86/kernel/mmiotrace/Makefile +++ b/arch/x86/kernel/mmiotrace/Makefile @@ -1,4 +1,4 @@ obj-$(CONFIG_MMIOTRACE_HOOKS) += kmmio.o obj-$(CONFIG_MMIOTRACE) += mmiotrace.o -mmiotrace-objs := pf_in.o mmio-mod.o +mmiotrace-y := pf_in.o mmio-mod.o obj-$(CONFIG_MMIOTRACE_TEST) += testmmiotrace.o diff --git a/arch/x86/kernel/mmiotrace/kmmio.c b/arch/x86/kernel/mmiotrace/kmmio.c index 539a9b19588f..efb467933087 100644 --- a/arch/x86/kernel/mmiotrace/kmmio.c +++ b/arch/x86/kernel/mmiotrace/kmmio.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -59,7 +60,7 @@ struct kmmio_context { static int kmmio_die_notifier(struct notifier_block *nb, unsigned long val, void *args); -static DECLARE_MUTEX(kmmio_init_mutex); +static DEFINE_MUTEX(kmmio_init_mutex); static DEFINE_SPINLOCK(kmmio_lock); /* These are protected by kmmio_lock */ @@ -90,7 +91,7 @@ static struct notifier_block nb_die = { */ void reference_kmmio(void) { - down(&kmmio_init_mutex); + mutex_lock(&kmmio_init_mutex); spin_lock_irq(&kmmio_lock); if (!kmmio_initialized) { int i; @@ -101,7 +102,7 @@ void reference_kmmio(void) } kmmio_initialized++; spin_unlock_irq(&kmmio_lock); - up(&kmmio_init_mutex); + mutex_unlock(&kmmio_init_mutex); } EXPORT_SYMBOL_GPL(reference_kmmio); @@ -115,7 +116,7 @@ void unreference_kmmio(void) { bool unreg = false; - down(&kmmio_init_mutex); + mutex_lock(&kmmio_init_mutex); spin_lock_irq(&kmmio_lock); if (kmmio_initialized == 1) { @@ -128,7 +129,7 @@ void unreference_kmmio(void) if (unreg) unregister_die_notifier(&nb_die); /* calls sync_rcu() */ - up(&kmmio_init_mutex); + mutex_unlock(&kmmio_init_mutex); } EXPORT_SYMBOL(unreference_kmmio); @@ -244,17 +245,13 @@ int kmmio_handler(struct pt_regs *regs, unsigned long addr) * Preemption is now disabled to prevent process switch during * single stepping. We can only handle one active kmmio trace * per cpu, so ensure that we finish it before something else - * gets to run. - * - * XXX what if an interrupt occurs between returning from - * do_page_fault() and entering the single-step exception handler? - * And that interrupt triggers a kmmio trap? - * XXX If we tracing an interrupt service routine or whatever, is - * this enough to keep it on the current cpu? + * gets to run. We also hold the RCU read lock over single + * stepping to avoid looking up the probe and kmmio_fault_page + * again. */ preempt_disable(); - rcu_read_lock(); + faultpage = get_kmmio_fault_page(addr); if (!faultpage) { /* @@ -287,14 +284,24 @@ int kmmio_handler(struct pt_regs *regs, unsigned long addr) if (ctx->probe && ctx->probe->pre_handler) ctx->probe->pre_handler(ctx->probe, regs, addr); + /* + * Enable single-stepping and disable interrupts for the faulting + * context. Local interrupts must not get enabled during stepping. + */ regs->flags |= TF_MASK; regs->flags &= ~IF_MASK; /* Now we set present bit in PTE and single step. */ disarm_kmmio_fault_page(ctx->fpage->page, NULL); + /* + * If another cpu accesses the same page while we are stepping, + * the access will not be caught. It will simply succeed and the + * only downside is we lose the event. If this becomes a problem, + * the user should drop to single cpu before tracing. + */ + put_cpu_var(kmmio_ctx); - rcu_read_unlock(); return 1; no_kmmio_ctx: @@ -313,32 +320,15 @@ no_kmmio: static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs) { int ret = 0; - struct kmmio_probe *probe; - struct kmmio_fault_page *faultpage; struct kmmio_context *ctx = &get_cpu_var(kmmio_ctx); if (!ctx->active) goto out; - rcu_read_lock(); - - faultpage = get_kmmio_fault_page(ctx->addr); - probe = get_kmmio_probe(ctx->addr); - if (faultpage != ctx->fpage || probe != ctx->probe) { - /* - * The trace setup changed after kmmio_handler() and before - * running this respective post handler. User does not want - * the result anymore. - */ - ctx->probe = NULL; - ctx->fpage = NULL; - } - if (ctx->probe && ctx->probe->post_handler) ctx->probe->post_handler(ctx->probe, condition, regs); - if (ctx->fpage) - arm_kmmio_fault_page(ctx->fpage->page, NULL); + arm_kmmio_fault_page(ctx->fpage->page, NULL); regs->flags &= ~TF_MASK; regs->flags |= ctx->saved_flags; @@ -346,6 +336,7 @@ static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs) /* These were acquired in kmmio_handler(). */ ctx->active--; BUG_ON(ctx->active); + rcu_read_unlock(); preempt_enable_no_resched(); /* @@ -355,8 +346,6 @@ static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs) */ if (!(regs->flags & TF_MASK)) ret = 1; - - rcu_read_unlock(); out: put_cpu_var(kmmio_ctx); return ret; @@ -411,15 +400,16 @@ static void release_kmmio_fault_page(unsigned long page, int register_kmmio_probe(struct kmmio_probe *p) { + unsigned long flags; int ret = 0; unsigned long size = 0; - spin_lock_irq(&kmmio_lock); - kmmio_count++; + spin_lock_irqsave(&kmmio_lock, flags); if (get_kmmio_probe(p->addr)) { ret = -EEXIST; goto out; } + kmmio_count++; list_add_rcu(&p->list, &kmmio_probes); while (size < p->len) { if (add_kmmio_fault_page(p->addr + size)) @@ -427,7 +417,7 @@ int register_kmmio_probe(struct kmmio_probe *p) size += PAGE_SIZE; } out: - spin_unlock_irq(&kmmio_lock); + spin_unlock_irqrestore(&kmmio_lock, flags); /* * XXX: What should I do here? * Here was a call to global_flush_tlb(), but it does not exist @@ -478,7 +468,8 @@ static void remove_kmmio_fault_pages(struct rcu_head *head) /* * Remove a kmmio probe. You have to synchronize_rcu() before you can be - * sure that the callbacks will not be called anymore. + * sure that the callbacks will not be called anymore. Only after that + * you may actually release your struct kmmio_probe. * * Unregistering a kmmio fault page has three steps: * 1. release_kmmio_fault_page() @@ -490,18 +481,19 @@ static void remove_kmmio_fault_pages(struct rcu_head *head) */ void unregister_kmmio_probe(struct kmmio_probe *p) { + unsigned long flags; unsigned long size = 0; struct kmmio_fault_page *release_list = NULL; struct kmmio_delayed_release *drelease; - spin_lock_irq(&kmmio_lock); + spin_lock_irqsave(&kmmio_lock, flags); while (size < p->len) { release_kmmio_fault_page(p->addr + size, &release_list); size += PAGE_SIZE; } list_del_rcu(&p->list); kmmio_count--; - spin_unlock_irq(&kmmio_lock); + spin_unlock_irqrestore(&kmmio_lock, flags); drelease = kmalloc(sizeof(*drelease), GFP_ATOMIC); if (!drelease) { diff --git a/arch/x86/kernel/mmiotrace/mmio-mod.c b/arch/x86/kernel/mmiotrace/mmio-mod.c index e1a508588f03..738644061e4e 100644 --- a/arch/x86/kernel/mmiotrace/mmio-mod.c +++ b/arch/x86/kernel/mmiotrace/mmio-mod.c @@ -19,6 +19,8 @@ * * Derived from the read-mod example from relay-examples by Tom Zanussi. */ +#define DEBUG 1 + #include #include #include @@ -34,12 +36,12 @@ #include "pf_in.h" -/* This app's relay channel files will appear in /debug/mmio-trace */ -#define APP_DIR "mmio-trace" -/* the marker injection file in /proc */ -#define MARKER_FILE "mmio-marker" +#define NAME "mmiotrace: " -#define MODULE_NAME "mmiotrace" +/* This app's relay channel files will appear in /debug/mmio-trace */ +static const char APP_DIR[] = "mmio-trace"; +/* the marker injection file in /debug/APP_DIR */ +static const char MARKER_FILE[] = "mmio-marker"; struct trap_reason { unsigned long addr; @@ -48,6 +50,15 @@ struct trap_reason { int active_traces; }; +struct remap_trace { + struct list_head list; + struct kmmio_probe probe; + unsigned long phys; + unsigned long id; +}; + +static const size_t subbuf_size = 256*1024; + /* Accessed per-cpu. */ static DEFINE_PER_CPU(struct trap_reason, pf_reason); static DEFINE_PER_CPU(struct mm_io_header_rw, cpu_trace); @@ -55,33 +66,53 @@ static DEFINE_PER_CPU(struct mm_io_header_rw, cpu_trace); /* Access to this is not per-cpu. */ static DEFINE_PER_CPU(atomic_t, dropped); -static struct file_operations mmio_fops = { - .owner = THIS_MODULE, -}; - -static const size_t subbuf_size = 256*1024; -static struct rchan *chan; static struct dentry *dir; -static struct proc_dir_entry *proc_marker_file; +static struct dentry *enabled_file; +static struct dentry *marker_file; + +static DEFINE_MUTEX(mmiotrace_mutex); +static DEFINE_SPINLOCK(trace_lock); +static atomic_t mmiotrace_enabled; +static LIST_HEAD(trace_list); /* struct remap_trace */ +static struct rchan *chan; + +/* + * Locking in this file: + * - mmiotrace_mutex enforces enable/disable_mmiotrace() critical sections. + * - mmiotrace_enabled may be modified only when holding mmiotrace_mutex + * and trace_lock. + * - Routines depending on is_enabled() must take trace_lock. + * - trace_list users must hold trace_lock. + * - is_enabled() guarantees that chan is valid. + * - pre/post callbacks assume the effect of is_enabled() being true. + */ /* module parameters */ -static unsigned int n_subbufs = 32*4; -static unsigned long filter_offset; -static int nommiotrace; -static int ISA_trace; -static int trace_pc; +static unsigned int n_subbufs = 32*4; +static unsigned long filter_offset; +static int nommiotrace; +static int ISA_trace; +static int trace_pc; +static int enable_now; module_param(n_subbufs, uint, 0); module_param(filter_offset, ulong, 0); module_param(nommiotrace, bool, 0); module_param(ISA_trace, bool, 0); module_param(trace_pc, bool, 0); +module_param(enable_now, bool, 0); MODULE_PARM_DESC(n_subbufs, "Number of 256kB buffers, default 128."); MODULE_PARM_DESC(filter_offset, "Start address of traced mappings."); MODULE_PARM_DESC(nommiotrace, "Disable actual MMIO tracing."); MODULE_PARM_DESC(ISA_trace, "Do not exclude the low ISA range."); MODULE_PARM_DESC(trace_pc, "Record address of faulting instructions."); +MODULE_PARM_DESC(enable_now, "Start mmiotracing immediately on module load."); + +static bool is_enabled(void) +{ + return atomic_read(&mmiotrace_enabled); +} static void record_timestamp(struct mm_io_header *header) { @@ -93,15 +124,15 @@ static void record_timestamp(struct mm_io_header *header) } /* - * Write callback for the /proc entry: + * Write callback for the debugfs entry: * Read a marker and write it to the mmio trace log */ -static int write_marker(struct file *file, const char __user *buffer, - unsigned long count, void *data) +static ssize_t write_marker(struct file *file, const char __user *buffer, + size_t count, loff_t *ppos) { char *event = NULL; struct mm_io_header *headp; - int len = (count > 65535) ? 65535 : count; + ssize_t len = (count > 65535) ? 65535 : count; event = kzalloc(sizeof(*headp) + len, GFP_KERNEL); if (!event) @@ -117,7 +148,12 @@ static int write_marker(struct file *file, const char __user *buffer, return -EFAULT; } - relay_write(chan, event, sizeof(*headp) + len); + spin_lock_irq(&trace_lock); + if (is_enabled()) + relay_write(chan, event, sizeof(*headp) + len); + else + len = -EINVAL; + spin_unlock_irq(&trace_lock); kfree(event); return len; } @@ -128,19 +164,18 @@ static void print_pte(unsigned long address) pte_t *pte = lookup_address(address, &level); if (!pte) { - pr_err(MODULE_NAME ": Error in %s: no pte for page 0x%08lx\n", + pr_err(NAME "Error in %s: no pte for page 0x%08lx\n", __func__, address); return; } if (level == PG_LEVEL_2M) { - pr_emerg(MODULE_NAME ": 4MB pages are not currently " - "supported: %lx\n", address); + pr_emerg(NAME "4MB pages are not currently supported: " + "0x%08lx\n", address); BUG(); } - pr_info(MODULE_NAME ": pte for 0x%lx: 0x%lx 0x%lx\n", - address, pte_val(*pte), - pte_val(*pte) & _PAGE_PRESENT); + pr_info(NAME "pte for 0x%lx: 0x%lx 0x%lx\n", address, pte_val(*pte), + pte_val(*pte) & _PAGE_PRESENT); } /* @@ -150,22 +185,18 @@ static void print_pte(unsigned long address) static void die_kmmio_nesting_error(struct pt_regs *regs, unsigned long addr) { const struct trap_reason *my_reason = &get_cpu_var(pf_reason); - pr_emerg(MODULE_NAME ": unexpected fault for address: %lx, " - "last fault for address: %lx\n", + pr_emerg(NAME "unexpected fault for address: 0x%08lx, " + "last fault for address: 0x%08lx\n", addr, my_reason->addr); print_pte(addr); + print_symbol(KERN_EMERG "faulting IP is at %s\n", regs->ip); + print_symbol(KERN_EMERG "last faulting IP was at %s\n", my_reason->ip); #ifdef __i386__ - print_symbol(KERN_EMERG "faulting EIP is at %s\n", regs->ip); - print_symbol(KERN_EMERG "last faulting EIP was at %s\n", - my_reason->ip); pr_emerg("eax: %08lx ebx: %08lx ecx: %08lx edx: %08lx\n", regs->ax, regs->bx, regs->cx, regs->dx); pr_emerg("esi: %08lx edi: %08lx ebp: %08lx esp: %08lx\n", regs->si, regs->di, regs->bp, regs->sp); #else - print_symbol(KERN_EMERG "faulting RIP is at %s\n", regs->ip); - print_symbol(KERN_EMERG "last faulting RIP was at %s\n", - my_reason->ip); pr_emerg("rax: %016lx rcx: %016lx rdx: %016lx\n", regs->ax, regs->cx, regs->dx); pr_emerg("rsi: %016lx rdi: %016lx rbp: %016lx rsp: %016lx\n", @@ -197,6 +228,10 @@ static void pre(struct kmmio_probe *p, struct pt_regs *regs, my_trace->header.pid = 0; my_trace->header.data_len = sizeof(struct mm_io_rw); my_trace->rw.address = addr; + /* + * struct remap_trace *trace = p->user_data; + * phys = addr - trace->probe.addr + trace->phys; + */ /* * Only record the program counter when requested. @@ -246,15 +281,10 @@ static void post(struct kmmio_probe *p, unsigned long condition, struct trap_reason *my_reason = &get_cpu_var(pf_reason); struct mm_io_header_rw *my_trace = &get_cpu_var(cpu_trace); - /* - * XXX: This might not get called, if the probe is removed while - * trace hit is on flight. - */ - /* this should always return the active_trace count to 0 */ my_reason->active_traces--; if (my_reason->active_traces) { - pr_emerg(MODULE_NAME ": unexpected post handler"); + pr_emerg(NAME "unexpected post handler"); BUG(); } @@ -284,20 +314,23 @@ static int subbuf_start_handler(struct rchan_buf *buf, void *subbuf, int count; if (relay_buf_full(buf)) { if (atomic_inc_return(drop) == 1) - pr_err(MODULE_NAME ": cpu %d buffer full!\n", cpu); + pr_err(NAME "cpu %d buffer full!\n", cpu); return 0; } count = atomic_read(drop); if (count) { - pr_err(MODULE_NAME ": cpu %d buffer no longer full, " - "missed %d events.\n", - cpu, count); + pr_err(NAME "cpu %d buffer no longer full, missed %d events.\n", + cpu, count); atomic_sub(count, drop); } return 1; } +static struct file_operations mmio_fops = { + .owner = THIS_MODULE, +}; + /* file_create() callback. Creates relay file in debugfs. */ static struct dentry *create_buf_file_handler(const char *filename, struct dentry *parent, @@ -333,34 +366,10 @@ static struct rchan_callbacks relay_callbacks = { .remove_buf_file = remove_buf_file_handler, }; -/* - * create_channel - creates channel /debug/APP_DIR/cpuXXX - * Returns channel on success, NULL otherwise - */ -static struct rchan *create_channel(unsigned size, unsigned n) -{ - return relay_open("cpu", dir, size, n, &relay_callbacks, NULL); -} - -/* destroy_channel - destroys channel /debug/APP_DIR/cpuXXX */ -static void destroy_channel(void) -{ - if (chan) { - relay_close(chan); - chan = NULL; - } -} - -struct remap_trace { - struct list_head list; - struct kmmio_probe probe; -}; -static LIST_HEAD(trace_list); -static DEFINE_SPINLOCK(trace_list_lock); - -static void do_ioremap_trace_core(unsigned long offset, unsigned long size, +static void ioremap_trace_core(unsigned long offset, unsigned long size, void __iomem *addr) { + static atomic_t next_id; struct remap_trace *trace = kmalloc(sizeof(*trace), GFP_KERNEL); struct mm_io_header_map event = { .header = { @@ -380,61 +389,49 @@ static void do_ioremap_trace_core(unsigned long offset, unsigned long size, }; record_timestamp(&event.header); + if (!trace) { + pr_err(NAME "kmalloc failed in ioremap\n"); + return; + } + *trace = (struct remap_trace) { .probe = { .addr = (unsigned long)addr, .len = size, .pre_handler = pre, .post_handler = post, - } + .user_data = trace + }, + .phys = offset, + .id = atomic_inc_return(&next_id) }; + spin_lock_irq(&trace_lock); + if (!is_enabled()) + goto not_enabled; + relay_write(chan, &event, sizeof(event)); - spin_lock(&trace_list_lock); list_add_tail(&trace->list, &trace_list); - spin_unlock(&trace_list_lock); if (!nommiotrace) register_kmmio_probe(&trace->probe); + +not_enabled: + spin_unlock_irq(&trace_lock); } -static void ioremap_trace_core(unsigned long offset, unsigned long size, - void __iomem *addr) +void +mmiotrace_ioremap(unsigned long offset, unsigned long size, void __iomem *addr) { + if (!is_enabled()) /* recheck and proper locking in *_core() */ + return; + + pr_debug(NAME "ioremap_*(0x%lx, 0x%lx) = %p\n", offset, size, addr); if ((filter_offset) && (offset != filter_offset)) return; - - /* Don't trace the low PCI/ISA area, it's always mapped.. */ - if (!ISA_trace && (offset < ISA_END_ADDRESS) && - (offset + size > ISA_START_ADDRESS)) { - pr_notice(MODULE_NAME ": Ignoring map of low PCI/ISA area " - "(0x%lx-0x%lx)\n", - offset, offset + size); - return; - } - do_ioremap_trace_core(offset, size, addr); + ioremap_trace_core(offset, size, addr); } -void __iomem *ioremap_cache_trace(unsigned long offset, unsigned long size) -{ - void __iomem *p = ioremap_cache(offset, size); - pr_debug(MODULE_NAME ": ioremap_cache(0x%lx, 0x%lx) = %p\n", - offset, size, p); - ioremap_trace_core(offset, size, p); - return p; -} -EXPORT_SYMBOL(ioremap_cache_trace); - -void __iomem *ioremap_nocache_trace(unsigned long offset, unsigned long size) -{ - void __iomem *p = ioremap_nocache(offset, size); - pr_debug(MODULE_NAME ": ioremap_nocache(0x%lx, 0x%lx) = %p\n", - offset, size, p); - ioremap_trace_core(offset, size, p); - return p; -} -EXPORT_SYMBOL(ioremap_nocache_trace); - -void iounmap_trace(volatile void __iomem *addr) +static void iounmap_trace_core(volatile void __iomem *addr) { struct mm_io_header_map event = { .header = { @@ -454,84 +451,212 @@ void iounmap_trace(volatile void __iomem *addr) }; struct remap_trace *trace; struct remap_trace *tmp; - pr_debug(MODULE_NAME ": Unmapping %p.\n", addr); + struct remap_trace *found_trace = NULL; + + pr_debug(NAME "Unmapping %p.\n", addr); record_timestamp(&event.header); - spin_lock(&trace_list_lock); + spin_lock_irq(&trace_lock); + if (!is_enabled()) + goto not_enabled; + list_for_each_entry_safe(trace, tmp, &trace_list, list) { if ((unsigned long)addr == trace->probe.addr) { if (!nommiotrace) unregister_kmmio_probe(&trace->probe); list_del(&trace->list); - kfree(trace); + found_trace = trace; break; } } - spin_unlock(&trace_list_lock); relay_write(chan, &event, sizeof(event)); - iounmap(addr); + +not_enabled: + spin_unlock_irq(&trace_lock); + if (found_trace) { + synchronize_rcu(); /* unregister_kmmio_probe() requirement */ + kfree(found_trace); + } +} + +void mmiotrace_iounmap(volatile void __iomem *addr) +{ + might_sleep(); + if (is_enabled()) /* recheck and proper locking in *_core() */ + iounmap_trace_core(addr); } -EXPORT_SYMBOL(iounmap_trace); static void clear_trace_list(void) { struct remap_trace *trace; struct remap_trace *tmp; - spin_lock(&trace_list_lock); - list_for_each_entry_safe(trace, tmp, &trace_list, list) { - pr_warning(MODULE_NAME ": purging non-iounmapped " + /* + * No locking required, because the caller ensures we are in a + * critical section via mutex, and is_enabled() is false, + * i.e. nothing can traverse or modify this list. + * Caller also ensures is_enabled() cannot change. + */ + list_for_each_entry(trace, &trace_list, list) { + pr_notice(NAME "purging non-iounmapped " "trace @0x%08lx, size 0x%lx.\n", trace->probe.addr, trace->probe.len); if (!nommiotrace) unregister_kmmio_probe(&trace->probe); + } + synchronize_rcu(); /* unregister_kmmio_probe() requirement */ + + list_for_each_entry_safe(trace, tmp, &trace_list, list) { list_del(&trace->list); kfree(trace); + } +} + +static ssize_t read_enabled_file_bool(struct file *file, + char __user *user_buf, size_t count, loff_t *ppos) +{ + char buf[3]; + + if (is_enabled()) + buf[0] = '1'; + else + buf[0] = '0'; + buf[1] = '\n'; + buf[2] = '\0'; + return simple_read_from_buffer(user_buf, count, ppos, buf, 2); +} + +static void enable_mmiotrace(void); +static void disable_mmiotrace(void); + +static ssize_t write_enabled_file_bool(struct file *file, + const char __user *user_buf, size_t count, loff_t *ppos) +{ + char buf[32]; + int buf_size = min(count, (sizeof(buf)-1)); + + if (copy_from_user(buf, user_buf, buf_size)) + return -EFAULT; + + switch (buf[0]) { + case 'y': + case 'Y': + case '1': + enable_mmiotrace(); + break; + case 'n': + case 'N': + case '0': + disable_mmiotrace(); break; } - spin_unlock(&trace_list_lock); + + return count; +} + +/* this ripped from kernel/kprobes.c */ +static struct file_operations fops_enabled = { + .owner = THIS_MODULE, + .read = read_enabled_file_bool, + .write = write_enabled_file_bool +}; + +static struct file_operations fops_marker = { + .owner = THIS_MODULE, + .write = write_marker +}; + +static void enable_mmiotrace(void) +{ + mutex_lock(&mmiotrace_mutex); + if (is_enabled()) + goto out; + + chan = relay_open("cpu", dir, subbuf_size, n_subbufs, + &relay_callbacks, NULL); + if (!chan) { + pr_err(NAME "relay app channel creation failed.\n"); + goto out; + } + + reference_kmmio(); + + marker_file = debugfs_create_file("marker", 0660, dir, NULL, + &fops_marker); + if (!marker_file) + pr_err(NAME "marker file creation failed.\n"); + + if (nommiotrace) + pr_info(NAME "MMIO tracing disabled.\n"); + if (ISA_trace) + pr_warning(NAME "Warning! low ISA range will be traced.\n"); + spin_lock_irq(&trace_lock); + atomic_inc(&mmiotrace_enabled); + spin_unlock_irq(&trace_lock); + pr_info(NAME "enabled.\n"); +out: + mutex_unlock(&mmiotrace_mutex); +} + +static void disable_mmiotrace(void) +{ + mutex_lock(&mmiotrace_mutex); + if (!is_enabled()) + goto out; + + spin_lock_irq(&trace_lock); + atomic_dec(&mmiotrace_enabled); + BUG_ON(is_enabled()); + spin_unlock_irq(&trace_lock); + + clear_trace_list(); /* guarantees: no more kmmio callbacks */ + unreference_kmmio(); + if (marker_file) { + debugfs_remove(marker_file); + marker_file = NULL; + } + if (chan) { + relay_close(chan); + chan = NULL; + } + + pr_info(NAME "disabled.\n"); +out: + mutex_unlock(&mmiotrace_mutex); } static int __init init(void) { + pr_debug(NAME "load...\n"); if (n_subbufs < 2) return -EINVAL; dir = debugfs_create_dir(APP_DIR, NULL); if (!dir) { - pr_err(MODULE_NAME ": Couldn't create relay app directory.\n"); + pr_err(NAME "Couldn't create relay app directory.\n"); return -ENOMEM; } - chan = create_channel(subbuf_size, n_subbufs); - if (!chan) { + enabled_file = debugfs_create_file("enabled", 0600, dir, NULL, + &fops_enabled); + if (!enabled_file) { + pr_err(NAME "Couldn't create enabled file.\n"); debugfs_remove(dir); - pr_err(MODULE_NAME ": relay app channel creation failed\n"); return -ENOMEM; } - reference_kmmio(); + if (enable_now) + enable_mmiotrace(); - proc_marker_file = create_proc_entry(MARKER_FILE, 0, NULL); - if (proc_marker_file) - proc_marker_file->write_proc = write_marker; - - pr_debug(MODULE_NAME ": loaded.\n"); - if (nommiotrace) - pr_info(MODULE_NAME ": MMIO tracing disabled.\n"); - if (ISA_trace) - pr_warning(MODULE_NAME ": Warning! low ISA range will be " - "traced.\n"); return 0; } static void __exit cleanup(void) { - pr_debug(MODULE_NAME ": unload...\n"); - clear_trace_list(); - unreference_kmmio(); - remove_proc_entry(MARKER_FILE, NULL); - destroy_channel(); + pr_debug(NAME "unload...\n"); + if (enabled_file) + debugfs_remove(enabled_file); + disable_mmiotrace(); if (dir) debugfs_remove(dir); } diff --git a/arch/x86/kernel/mmiotrace/testmmiotrace.c b/arch/x86/kernel/mmiotrace/testmmiotrace.c index 5ecff578672b..cfa60b227c8d 100644 --- a/arch/x86/kernel/mmiotrace/testmmiotrace.c +++ b/arch/x86/kernel/mmiotrace/testmmiotrace.c @@ -4,10 +4,6 @@ #include #include -extern void __iomem *ioremap_nocache_trace(unsigned long offset, - unsigned long size); -extern void iounmap_trace(volatile void __iomem *addr); - #define MODULE_NAME "testmmiotrace" static unsigned long mmio_address; @@ -28,25 +24,24 @@ static void do_write_test(void __iomem *p) static void do_read_test(void __iomem *p) { unsigned int i; - volatile unsigned int v; for (i = 0; i < 256; i++) - v = ioread8(p + i); + ioread8(p + i); for (i = 1024; i < (5 * 1024); i += 2) - v = ioread16(p + i); + ioread16(p + i); for (i = (5 * 1024); i < (16 * 1024); i += 4) - v = ioread32(p + i); + ioread32(p + i); } static void do_test(void) { - void __iomem *p = ioremap_nocache_trace(mmio_address, 0x4000); + void __iomem *p = ioremap_nocache(mmio_address, 0x4000); if (!p) { pr_err(MODULE_NAME ": could not ioremap, aborting.\n"); return; } do_write_test(p); do_read_test(p); - iounmap_trace(p); + iounmap(p); } static int __init init(void) diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 71bb3159031a..8927c878544d 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include @@ -126,6 +127,7 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr, unsigned long new_prot_val; pgprot_t prot; int retval; + void __iomem *ret_addr; /* Don't allow wraparound or zero size */ last_addr = phys_addr + size - 1; @@ -233,7 +235,10 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr, return NULL; } - return (void __iomem *) (vaddr + offset); + ret_addr = (void __iomem *) (vaddr + offset); + mmiotrace_ioremap(phys_addr, size, ret_addr); + + return ret_addr; } /** @@ -325,6 +330,8 @@ void iounmap(volatile void __iomem *addr) addr = (volatile void __iomem *) (PAGE_MASK & (unsigned long __force)addr); + mmiotrace_iounmap(addr); + /* Use the vm area unlocked, assuming the caller ensures there isn't another iounmap for the same address in parallel. Reuse of the virtual address is prevented by diff --git a/include/linux/mmiotrace.h b/include/linux/mmiotrace.h index d87a6cd8b686..cb5efd0c7f51 100644 --- a/include/linux/mmiotrace.h +++ b/include/linux/mmiotrace.h @@ -16,11 +16,12 @@ typedef void (*kmmio_post_handler_t)(struct kmmio_probe *, unsigned long condition, struct pt_regs *); struct kmmio_probe { - struct list_head list; + struct list_head list; /* kmmio internal list */ unsigned long addr; /* start location of the probe point */ unsigned long len; /* length of the probe region */ kmmio_pre_handler_t pre_handler; /* Called before addr is executed. */ kmmio_post_handler_t post_handler; /* Called after addr is executed */ + void *user_data; }; /* kmmio is active by some kmmio_probes? */ @@ -38,6 +39,21 @@ extern void unregister_kmmio_probe(struct kmmio_probe *p); /* Called from page fault handler. */ extern int kmmio_handler(struct pt_regs *regs, unsigned long addr); +/* Called from ioremap.c */ +#ifdef CONFIG_MMIOTRACE +extern void +mmiotrace_ioremap(unsigned long offset, unsigned long size, void __iomem *addr); +extern void mmiotrace_iounmap(volatile void __iomem *addr); +#else +static inline void +mmiotrace_ioremap(unsigned long offset, unsigned long size, void __iomem *addr) +{ +} +static inline void mmiotrace_iounmap(volatile void __iomem *addr) +{ +} +#endif /* CONFIG_MMIOTRACE_HOOKS */ + #endif /* __KERNEL__ */ From f984b51e0779a6dd30feedc41404013ca54e5d05 Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Mon, 12 May 2008 21:20:57 +0200 Subject: [PATCH 09/30] ftrace: add mmiotrace plugin On Sat, 22 Mar 2008 13:07:47 +0100 Ingo Molnar wrote: > > > i'd suggest the following: pull x86.git and sched-devel.git into a > > > single tree [the two will combine without rejects]. Then try to add a > > > kernel/tracing/trace_mmiotrace.c ftrace plugin. The trace_sysprof.c > > > plugin might be a good example. > > > > I did this and now I have mmiotrace enabled/disabled via the tracing > > framework (what do we call this, since ftrace is one of the tracers?). > > cool! could you send the patches for that? (even if they are not fully > functional yet) Patch attached in the end. Nice to see how much code disappeared. I tried to mark all the features I had to break with XXX-comments. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/Kconfig.debug | 3 +- arch/x86/kernel/mmiotrace/mmio-mod.c | 208 ++++----------------------- include/linux/mmiotrace.h | 6 + kernel/trace/Makefile | 1 + kernel/trace/trace_mmiotrace.c | 84 +++++++++++ 5 files changed, 123 insertions(+), 179 deletions(-) create mode 100644 kernel/trace/trace_mmiotrace.c diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index aa0d6462b1fc..7e4b8494078e 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -173,7 +173,8 @@ config MMIOTRACE_HOOKS config MMIOTRACE bool "Memory mapped IO tracing" - depends on DEBUG_KERNEL && RELAY && DEBUG_FS + depends on DEBUG_KERNEL && RELAY + select TRACING select MMIOTRACE_HOOKS default y help diff --git a/arch/x86/kernel/mmiotrace/mmio-mod.c b/arch/x86/kernel/mmiotrace/mmio-mod.c index 738644061e4e..c7a67d7e482b 100644 --- a/arch/x86/kernel/mmiotrace/mmio-mod.c +++ b/arch/x86/kernel/mmiotrace/mmio-mod.c @@ -22,9 +22,8 @@ #define DEBUG 1 #include -#include #include -#include +#include #include #include #include @@ -63,18 +62,18 @@ static const size_t subbuf_size = 256*1024; static DEFINE_PER_CPU(struct trap_reason, pf_reason); static DEFINE_PER_CPU(struct mm_io_header_rw, cpu_trace); +#if 0 /* XXX: no way gather this info anymore */ /* Access to this is not per-cpu. */ static DEFINE_PER_CPU(atomic_t, dropped); +#endif static struct dentry *dir; -static struct dentry *enabled_file; static struct dentry *marker_file; static DEFINE_MUTEX(mmiotrace_mutex); static DEFINE_SPINLOCK(trace_lock); static atomic_t mmiotrace_enabled; static LIST_HEAD(trace_list); /* struct remap_trace */ -static struct rchan *chan; /* * Locking in this file: @@ -93,36 +92,24 @@ static unsigned long filter_offset; static int nommiotrace; static int ISA_trace; static int trace_pc; -static int enable_now; module_param(n_subbufs, uint, 0); module_param(filter_offset, ulong, 0); module_param(nommiotrace, bool, 0); module_param(ISA_trace, bool, 0); module_param(trace_pc, bool, 0); -module_param(enable_now, bool, 0); MODULE_PARM_DESC(n_subbufs, "Number of 256kB buffers, default 128."); MODULE_PARM_DESC(filter_offset, "Start address of traced mappings."); MODULE_PARM_DESC(nommiotrace, "Disable actual MMIO tracing."); MODULE_PARM_DESC(ISA_trace, "Do not exclude the low ISA range."); MODULE_PARM_DESC(trace_pc, "Record address of faulting instructions."); -MODULE_PARM_DESC(enable_now, "Start mmiotracing immediately on module load."); static bool is_enabled(void) { return atomic_read(&mmiotrace_enabled); } -static void record_timestamp(struct mm_io_header *header) -{ - struct timespec now; - - getnstimeofday(&now); - header->sec = now.tv_sec; - header->nsec = now.tv_nsec; -} - /* * Write callback for the debugfs entry: * Read a marker and write it to the mmio trace log @@ -141,7 +128,6 @@ static ssize_t write_marker(struct file *file, const char __user *buffer, headp = (struct mm_io_header *)event; headp->type = MMIO_MAGIC | (MMIO_MARKER << MMIO_OPCODE_SHIFT); headp->data_len = len; - record_timestamp(headp); if (copy_from_user(event + sizeof(*headp), buffer, len)) { kfree(event); @@ -149,9 +135,11 @@ static ssize_t write_marker(struct file *file, const char __user *buffer, } spin_lock_irq(&trace_lock); +#if 0 /* XXX: convert this to use tracing */ if (is_enabled()) relay_write(chan, event, sizeof(*headp) + len); else +#endif len = -EINVAL; spin_unlock_irq(&trace_lock); kfree(event); @@ -242,7 +230,11 @@ static void pre(struct kmmio_probe *p, struct pt_regs *regs, else my_trace->rw.pc = 0; - record_timestamp(&my_trace->header); + /* + * XXX: the timestamp recorded will be *after* the tracing has been + * done, not at the time we hit the instruction. SMP implications + * on event ordering? + */ switch (type) { case REG_READ: @@ -295,77 +287,19 @@ static void post(struct kmmio_probe *p, unsigned long condition, default: break; } - relay_write(chan, my_trace, sizeof(*my_trace)); + + /* + * XXX: Several required values are ignored: + * - mapping id + * - program counter + * Also the address should be physical, not virtual. + */ + mmio_trace_record(my_trace->header.type, my_trace->rw.address, + my_trace->rw.value); put_cpu_var(cpu_trace); put_cpu_var(pf_reason); } -/* - * subbuf_start() relay callback. - * - * Defined so that we know when events are dropped due to the buffer-full - * condition. - */ -static int subbuf_start_handler(struct rchan_buf *buf, void *subbuf, - void *prev_subbuf, size_t prev_padding) -{ - unsigned int cpu = buf->cpu; - atomic_t *drop = &per_cpu(dropped, cpu); - int count; - if (relay_buf_full(buf)) { - if (atomic_inc_return(drop) == 1) - pr_err(NAME "cpu %d buffer full!\n", cpu); - return 0; - } - count = atomic_read(drop); - if (count) { - pr_err(NAME "cpu %d buffer no longer full, missed %d events.\n", - cpu, count); - atomic_sub(count, drop); - } - - return 1; -} - -static struct file_operations mmio_fops = { - .owner = THIS_MODULE, -}; - -/* file_create() callback. Creates relay file in debugfs. */ -static struct dentry *create_buf_file_handler(const char *filename, - struct dentry *parent, - int mode, - struct rchan_buf *buf, - int *is_global) -{ - struct dentry *buf_file; - - mmio_fops.read = relay_file_operations.read; - mmio_fops.open = relay_file_operations.open; - mmio_fops.poll = relay_file_operations.poll; - mmio_fops.mmap = relay_file_operations.mmap; - mmio_fops.release = relay_file_operations.release; - mmio_fops.splice_read = relay_file_operations.splice_read; - - buf_file = debugfs_create_file(filename, mode, parent, buf, - &mmio_fops); - - return buf_file; -} - -/* file_remove() default callback. Removes relay file in debugfs. */ -static int remove_buf_file_handler(struct dentry *dentry) -{ - debugfs_remove(dentry); - return 0; -} - -static struct rchan_callbacks relay_callbacks = { - .subbuf_start = subbuf_start_handler, - .create_buf_file = create_buf_file_handler, - .remove_buf_file = remove_buf_file_handler, -}; - static void ioremap_trace_core(unsigned long offset, unsigned long size, void __iomem *addr) { @@ -387,7 +321,6 @@ static void ioremap_trace_core(unsigned long offset, unsigned long size, .pc = 0 } }; - record_timestamp(&event.header); if (!trace) { pr_err(NAME "kmalloc failed in ioremap\n"); @@ -410,7 +343,10 @@ static void ioremap_trace_core(unsigned long offset, unsigned long size, if (!is_enabled()) goto not_enabled; - relay_write(chan, &event, sizeof(event)); + /* + * XXX: Insufficient data recorded! + */ + mmio_trace_record(event.header.type, event.map.addr, event.map.len); list_add_tail(&trace->list, &trace_list); if (!nommiotrace) register_kmmio_probe(&trace->probe); @@ -454,7 +390,6 @@ static void iounmap_trace_core(volatile void __iomem *addr) struct remap_trace *found_trace = NULL; pr_debug(NAME "Unmapping %p.\n", addr); - record_timestamp(&event.header); spin_lock_irq(&trace_lock); if (!is_enabled()) @@ -469,7 +404,8 @@ static void iounmap_trace_core(volatile void __iomem *addr) break; } } - relay_write(chan, &event, sizeof(event)); + mmio_trace_record(event.header.type, event.map.addr, + found_trace ? found_trace->id : -1); not_enabled: spin_unlock_irq(&trace_lock); @@ -512,77 +448,23 @@ static void clear_trace_list(void) } } -static ssize_t read_enabled_file_bool(struct file *file, - char __user *user_buf, size_t count, loff_t *ppos) -{ - char buf[3]; - - if (is_enabled()) - buf[0] = '1'; - else - buf[0] = '0'; - buf[1] = '\n'; - buf[2] = '\0'; - return simple_read_from_buffer(user_buf, count, ppos, buf, 2); -} - -static void enable_mmiotrace(void); -static void disable_mmiotrace(void); - -static ssize_t write_enabled_file_bool(struct file *file, - const char __user *user_buf, size_t count, loff_t *ppos) -{ - char buf[32]; - int buf_size = min(count, (sizeof(buf)-1)); - - if (copy_from_user(buf, user_buf, buf_size)) - return -EFAULT; - - switch (buf[0]) { - case 'y': - case 'Y': - case '1': - enable_mmiotrace(); - break; - case 'n': - case 'N': - case '0': - disable_mmiotrace(); - break; - } - - return count; -} - -/* this ripped from kernel/kprobes.c */ -static struct file_operations fops_enabled = { - .owner = THIS_MODULE, - .read = read_enabled_file_bool, - .write = write_enabled_file_bool -}; - static struct file_operations fops_marker = { .owner = THIS_MODULE, .write = write_marker }; -static void enable_mmiotrace(void) +void enable_mmiotrace(void) { mutex_lock(&mmiotrace_mutex); if (is_enabled()) goto out; - chan = relay_open("cpu", dir, subbuf_size, n_subbufs, - &relay_callbacks, NULL); - if (!chan) { - pr_err(NAME "relay app channel creation failed.\n"); - goto out; - } - reference_kmmio(); +#if 0 /* XXX: tracing does not support text entries */ marker_file = debugfs_create_file("marker", 0660, dir, NULL, &fops_marker); +#endif if (!marker_file) pr_err(NAME "marker file creation failed.\n"); @@ -598,7 +480,7 @@ out: mutex_unlock(&mmiotrace_mutex); } -static void disable_mmiotrace(void) +void disable_mmiotrace(void) { mutex_lock(&mmiotrace_mutex); if (!is_enabled()) @@ -615,17 +497,13 @@ static void disable_mmiotrace(void) debugfs_remove(marker_file); marker_file = NULL; } - if (chan) { - relay_close(chan); - chan = NULL; - } pr_info(NAME "disabled.\n"); out: mutex_unlock(&mmiotrace_mutex); } -static int __init init(void) +int __init init_mmiotrace(void) { pr_debug(NAME "load...\n"); if (n_subbufs < 2) @@ -636,31 +514,5 @@ static int __init init(void) pr_err(NAME "Couldn't create relay app directory.\n"); return -ENOMEM; } - - enabled_file = debugfs_create_file("enabled", 0600, dir, NULL, - &fops_enabled); - if (!enabled_file) { - pr_err(NAME "Couldn't create enabled file.\n"); - debugfs_remove(dir); - return -ENOMEM; - } - - if (enable_now) - enable_mmiotrace(); - return 0; } - -static void __exit cleanup(void) -{ - pr_debug(NAME "unload...\n"); - if (enabled_file) - debugfs_remove(enabled_file); - disable_mmiotrace(); - if (dir) - debugfs_remove(dir); -} - -module_init(init); -module_exit(cleanup); -MODULE_LICENSE("GPL"); diff --git a/include/linux/mmiotrace.h b/include/linux/mmiotrace.h index cb5efd0c7f51..579b3b06c90e 100644 --- a/include/linux/mmiotrace.h +++ b/include/linux/mmiotrace.h @@ -54,6 +54,12 @@ static inline void mmiotrace_iounmap(volatile void __iomem *addr) } #endif /* CONFIG_MMIOTRACE_HOOKS */ +/* in kernel/trace/trace_mmiotrace.c */ +extern int __init init_mmiotrace(void); +extern void enable_mmiotrace(void); +extern void disable_mmiotrace(void); +extern void mmio_trace_record(u32 type, unsigned long addr, unsigned long arg); + #endif /* __KERNEL__ */ diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index d9efbbfa2bdf..c44a7dce9086 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile @@ -18,5 +18,6 @@ obj-$(CONFIG_FTRACE) += trace_functions.o obj-$(CONFIG_IRQSOFF_TRACER) += trace_irqsoff.o obj-$(CONFIG_PREEMPT_TRACER) += trace_irqsoff.o obj-$(CONFIG_SCHED_TRACER) += trace_sched_wakeup.o +obj-$(CONFIG_MMIOTRACE) += trace_mmiotrace.o libftrace-y := ftrace.o diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c new file mode 100644 index 000000000000..e4dd03cc5aa6 --- /dev/null +++ b/kernel/trace/trace_mmiotrace.c @@ -0,0 +1,84 @@ +/* + * Memory mapped I/O tracing + * + * Copyright (C) 2008 Pekka Paalanen + */ + +#define DEBUG 1 + +#include +#include + +#include "trace.h" + +extern void +__trace_special(void *__tr, void *__data, + unsigned long arg1, unsigned long arg2, unsigned long arg3); + +static struct trace_array *mmio_trace_array; + + +static void mmio_trace_init(struct trace_array *tr) +{ + pr_debug("in %s\n", __func__); + mmio_trace_array = tr; + if (tr->ctrl) + enable_mmiotrace(); +} + +static void mmio_trace_reset(struct trace_array *tr) +{ + pr_debug("in %s\n", __func__); + if (tr->ctrl) + disable_mmiotrace(); +} + +static void mmio_trace_ctrl_update(struct trace_array *tr) +{ + pr_debug("in %s\n", __func__); + if (tr->ctrl) + enable_mmiotrace(); + else + disable_mmiotrace(); +} + +static struct tracer mmio_tracer __read_mostly = +{ + .name = "mmiotrace", + .init = mmio_trace_init, + .reset = mmio_trace_reset, + .ctrl_update = mmio_trace_ctrl_update, +}; + +__init static int init_mmio_trace(void) +{ + int ret = init_mmiotrace(); + if (ret) + return ret; + return register_tracer(&mmio_tracer); +} +device_initcall(init_mmio_trace); + +void mmio_trace_record(u32 type, unsigned long addr, unsigned long arg) +{ + struct trace_array *tr = mmio_trace_array; + struct trace_array_cpu *data = tr->data[smp_processor_id()]; + + if (!current || current->pid == 0) { + /* + * XXX: This is a problem. We need to able to record, no + * matter what. tracing_generic_entry_update() would crash. + */ + static unsigned limit; + if (limit++ < 12) + pr_err("Error in %s: no current.\n", __func__); + return; + } + if (!tr || !data) { + static unsigned limit; + if (limit++ < 12) + pr_err("%s: no tr or data\n", __func__); + return; + } + __trace_special(tr, data, type, addr, arg); +} From bd8ac686c73c7e925fcfe0b02dc4e7b947127864 Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Mon, 12 May 2008 21:20:57 +0200 Subject: [PATCH 10/30] ftrace: mmiotrace, updates here is a patch that makes mmiotrace work almost well within the tracing framework. The patch applies on top of my previous patch. I have my own output formatting in place now. Summary of changes: - fix the NULL dereference that was due to not calling tracing_reset() - add print_line() callback into struct tracer - implement print_line() for mmiotrace, producing up-to-spec text - add my output header, but that is not really called in the right place - rewrote the main structs in mmiotrace - added two new trace entry types: TRACE_MMIO_RW and TRACE_MMIO_MAP - made some functions in trace.c non-static - check current==NULL in tracing_generic_entry_update() - fix(?) comparison in trace_seq_printf() Things seem to work fine except a few issues. Markers (text lines injected into mmiotrace log) are missing, I did not feel hacking them in before we have variable length entries. My output header is printed only for 'trace' file, but not 'trace_pipe'. For some reason, despite my quick fix, iter->trace is NULL in print_trace_line() when called from 'trace_pipe' file, which means I don't get proper output formatting. I only tried by loading nouveau.ko, which just detects the card, and that is traced fine. I didn't try further. Map, two reads and unmap. Works perfectly. I am missing the information about overflows, I'd prefer to have a counter for lost events. I didn't try, but I guess currently there is no way of knowning when it overflows? So, not too far from being fully operational, it seems :-) And looking at the diffstat, there also is some 700-900 lines of user space code that just became obsolete. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/Kconfig.debug | 2 +- arch/x86/kernel/mmiotrace/mmio-mod.c | 140 +++++++----------------- include/linux/mmiotrace.h | 91 +++++----------- kernel/trace/trace.c | 34 ++++++ kernel/trace/trace.h | 14 +++ kernel/trace/trace_mmiotrace.c | 153 ++++++++++++++++++++++----- 6 files changed, 242 insertions(+), 192 deletions(-) diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index 7e4b8494078e..1d6de0d67f99 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -173,7 +173,7 @@ config MMIOTRACE_HOOKS config MMIOTRACE bool "Memory mapped IO tracing" - depends on DEBUG_KERNEL && RELAY + depends on DEBUG_KERNEL select TRACING select MMIOTRACE_HOOKS default y diff --git a/arch/x86/kernel/mmiotrace/mmio-mod.c b/arch/x86/kernel/mmiotrace/mmio-mod.c index c7a67d7e482b..62abc281a512 100644 --- a/arch/x86/kernel/mmiotrace/mmio-mod.c +++ b/arch/x86/kernel/mmiotrace/mmio-mod.c @@ -37,11 +37,6 @@ #define NAME "mmiotrace: " -/* This app's relay channel files will appear in /debug/mmio-trace */ -static const char APP_DIR[] = "mmio-trace"; -/* the marker injection file in /debug/APP_DIR */ -static const char MARKER_FILE[] = "mmio-marker"; - struct trap_reason { unsigned long addr; unsigned long ip; @@ -56,18 +51,15 @@ struct remap_trace { unsigned long id; }; -static const size_t subbuf_size = 256*1024; - /* Accessed per-cpu. */ static DEFINE_PER_CPU(struct trap_reason, pf_reason); -static DEFINE_PER_CPU(struct mm_io_header_rw, cpu_trace); +static DEFINE_PER_CPU(struct mmiotrace_rw, cpu_trace); #if 0 /* XXX: no way gather this info anymore */ /* Access to this is not per-cpu. */ static DEFINE_PER_CPU(atomic_t, dropped); #endif -static struct dentry *dir; static struct dentry *marker_file; static DEFINE_MUTEX(mmiotrace_mutex); @@ -82,24 +74,21 @@ static LIST_HEAD(trace_list); /* struct remap_trace */ * and trace_lock. * - Routines depending on is_enabled() must take trace_lock. * - trace_list users must hold trace_lock. - * - is_enabled() guarantees that chan is valid. + * - is_enabled() guarantees that mmio_trace_record is allowed. * - pre/post callbacks assume the effect of is_enabled() being true. */ /* module parameters */ -static unsigned int n_subbufs = 32*4; static unsigned long filter_offset; static int nommiotrace; static int ISA_trace; static int trace_pc; -module_param(n_subbufs, uint, 0); module_param(filter_offset, ulong, 0); module_param(nommiotrace, bool, 0); module_param(ISA_trace, bool, 0); module_param(trace_pc, bool, 0); -MODULE_PARM_DESC(n_subbufs, "Number of 256kB buffers, default 128."); MODULE_PARM_DESC(filter_offset, "Start address of traced mappings."); MODULE_PARM_DESC(nommiotrace, "Disable actual MMIO tracing."); MODULE_PARM_DESC(ISA_trace, "Do not exclude the low ISA range."); @@ -110,6 +99,7 @@ static bool is_enabled(void) return atomic_read(&mmiotrace_enabled); } +#if 0 /* XXX: needs rewrite */ /* * Write callback for the debugfs entry: * Read a marker and write it to the mmio trace log @@ -145,6 +135,7 @@ static ssize_t write_marker(struct file *file, const char __user *buffer, kfree(event); return len; } +#endif static void print_pte(unsigned long address) { @@ -198,9 +189,10 @@ static void pre(struct kmmio_probe *p, struct pt_regs *regs, unsigned long addr) { struct trap_reason *my_reason = &get_cpu_var(pf_reason); - struct mm_io_header_rw *my_trace = &get_cpu_var(cpu_trace); + struct mmiotrace_rw *my_trace = &get_cpu_var(cpu_trace); const unsigned long instptr = instruction_pointer(regs); const enum reason_type type = get_ins_type(instptr); + struct remap_trace *trace = p->user_data; /* it doesn't make sense to have more than one active trace per cpu */ if (my_reason->active_traces) @@ -212,23 +204,17 @@ static void pre(struct kmmio_probe *p, struct pt_regs *regs, my_reason->addr = addr; my_reason->ip = instptr; - my_trace->header.type = MMIO_MAGIC; - my_trace->header.pid = 0; - my_trace->header.data_len = sizeof(struct mm_io_rw); - my_trace->rw.address = addr; - /* - * struct remap_trace *trace = p->user_data; - * phys = addr - trace->probe.addr + trace->phys; - */ + my_trace->phys = addr - trace->probe.addr + trace->phys; + my_trace->map_id = trace->id; /* * Only record the program counter when requested. * It may taint clean-room reverse engineering. */ if (trace_pc) - my_trace->rw.pc = instptr; + my_trace->pc = instptr; else - my_trace->rw.pc = 0; + my_trace->pc = 0; /* * XXX: the timestamp recorded will be *after* the tracing has been @@ -238,28 +224,25 @@ static void pre(struct kmmio_probe *p, struct pt_regs *regs, switch (type) { case REG_READ: - my_trace->header.type |= - (MMIO_READ << MMIO_OPCODE_SHIFT) | - (get_ins_mem_width(instptr) << MMIO_WIDTH_SHIFT); + my_trace->opcode = MMIO_READ; + my_trace->width = get_ins_mem_width(instptr); break; case REG_WRITE: - my_trace->header.type |= - (MMIO_WRITE << MMIO_OPCODE_SHIFT) | - (get_ins_mem_width(instptr) << MMIO_WIDTH_SHIFT); - my_trace->rw.value = get_ins_reg_val(instptr, regs); + my_trace->opcode = MMIO_WRITE; + my_trace->width = get_ins_mem_width(instptr); + my_trace->value = get_ins_reg_val(instptr, regs); break; case IMM_WRITE: - my_trace->header.type |= - (MMIO_WRITE << MMIO_OPCODE_SHIFT) | - (get_ins_mem_width(instptr) << MMIO_WIDTH_SHIFT); - my_trace->rw.value = get_ins_imm_val(instptr); + my_trace->opcode = MMIO_WRITE; + my_trace->width = get_ins_mem_width(instptr); + my_trace->value = get_ins_imm_val(instptr); break; default: { unsigned char *ip = (unsigned char *)instptr; - my_trace->header.type |= - (MMIO_UNKNOWN_OP << MMIO_OPCODE_SHIFT); - my_trace->rw.value = (*ip) << 16 | *(ip + 1) << 8 | + my_trace->opcode = MMIO_UNKNOWN_OP; + my_trace->width = 0; + my_trace->value = (*ip) << 16 | *(ip + 1) << 8 | *(ip + 2); } } @@ -271,7 +254,7 @@ static void post(struct kmmio_probe *p, unsigned long condition, struct pt_regs *regs) { struct trap_reason *my_reason = &get_cpu_var(pf_reason); - struct mm_io_header_rw *my_trace = &get_cpu_var(cpu_trace); + struct mmiotrace_rw *my_trace = &get_cpu_var(cpu_trace); /* this should always return the active_trace count to 0 */ my_reason->active_traces--; @@ -282,20 +265,13 @@ static void post(struct kmmio_probe *p, unsigned long condition, switch (my_reason->type) { case REG_READ: - my_trace->rw.value = get_ins_reg_val(my_reason->ip, regs); + my_trace->value = get_ins_reg_val(my_reason->ip, regs); break; default: break; } - /* - * XXX: Several required values are ignored: - * - mapping id - * - program counter - * Also the address should be physical, not virtual. - */ - mmio_trace_record(my_trace->header.type, my_trace->rw.address, - my_trace->rw.value); + mmio_trace_rw(my_trace); put_cpu_var(cpu_trace); put_cpu_var(pf_reason); } @@ -305,21 +281,11 @@ static void ioremap_trace_core(unsigned long offset, unsigned long size, { static atomic_t next_id; struct remap_trace *trace = kmalloc(sizeof(*trace), GFP_KERNEL); - struct mm_io_header_map event = { - .header = { - .type = MMIO_MAGIC | - (MMIO_PROBE << MMIO_OPCODE_SHIFT), - .sec = 0, - .nsec = 0, - .pid = 0, - .data_len = sizeof(struct mm_io_map) - }, - .map = { - .phys = offset, - .addr = (unsigned long)addr, - .len = size, - .pc = 0 - } + struct mmiotrace_map map = { + .phys = offset, + .virt = (unsigned long)addr, + .len = size, + .opcode = MMIO_PROBE }; if (!trace) { @@ -338,15 +304,13 @@ static void ioremap_trace_core(unsigned long offset, unsigned long size, .phys = offset, .id = atomic_inc_return(&next_id) }; + map.map_id = trace->id; spin_lock_irq(&trace_lock); if (!is_enabled()) goto not_enabled; - /* - * XXX: Insufficient data recorded! - */ - mmio_trace_record(event.header.type, event.map.addr, event.map.len); + mmio_trace_mapping(&map); list_add_tail(&trace->list, &trace_list); if (!nommiotrace) register_kmmio_probe(&trace->probe); @@ -369,21 +333,11 @@ mmiotrace_ioremap(unsigned long offset, unsigned long size, void __iomem *addr) static void iounmap_trace_core(volatile void __iomem *addr) { - struct mm_io_header_map event = { - .header = { - .type = MMIO_MAGIC | - (MMIO_UNPROBE << MMIO_OPCODE_SHIFT), - .sec = 0, - .nsec = 0, - .pid = 0, - .data_len = sizeof(struct mm_io_map) - }, - .map = { - .phys = 0, - .addr = (unsigned long)addr, - .len = 0, - .pc = 0 - } + struct mmiotrace_map map = { + .phys = 0, + .virt = (unsigned long)addr, + .len = 0, + .opcode = MMIO_UNPROBE }; struct remap_trace *trace; struct remap_trace *tmp; @@ -404,8 +358,8 @@ static void iounmap_trace_core(volatile void __iomem *addr) break; } } - mmio_trace_record(event.header.type, event.map.addr, - found_trace ? found_trace->id : -1); + map.map_id = (found_trace) ? found_trace->id : -1; + mmio_trace_mapping(&map); not_enabled: spin_unlock_irq(&trace_lock); @@ -448,10 +402,12 @@ static void clear_trace_list(void) } } +#if 0 /* XXX: out of order */ static struct file_operations fops_marker = { .owner = THIS_MODULE, .write = write_marker }; +#endif void enable_mmiotrace(void) { @@ -464,9 +420,9 @@ void enable_mmiotrace(void) #if 0 /* XXX: tracing does not support text entries */ marker_file = debugfs_create_file("marker", 0660, dir, NULL, &fops_marker); -#endif if (!marker_file) pr_err(NAME "marker file creation failed.\n"); +#endif if (nommiotrace) pr_info(NAME "MMIO tracing disabled.\n"); @@ -502,17 +458,3 @@ void disable_mmiotrace(void) out: mutex_unlock(&mmiotrace_mutex); } - -int __init init_mmiotrace(void) -{ - pr_debug(NAME "load...\n"); - if (n_subbufs < 2) - return -EINVAL; - - dir = debugfs_create_dir(APP_DIR, NULL); - if (!dir) { - pr_err(NAME "Couldn't create relay app directory.\n"); - return -ENOMEM; - } - return 0; -} diff --git a/include/linux/mmiotrace.h b/include/linux/mmiotrace.h index 579b3b06c90e..c88a9c197d22 100644 --- a/include/linux/mmiotrace.h +++ b/include/linux/mmiotrace.h @@ -54,73 +54,38 @@ static inline void mmiotrace_iounmap(volatile void __iomem *addr) } #endif /* CONFIG_MMIOTRACE_HOOKS */ +enum mm_io_opcode { + MMIO_READ = 0x1, /* struct mmiotrace_rw */ + MMIO_WRITE = 0x2, /* struct mmiotrace_rw */ + MMIO_PROBE = 0x3, /* struct mmiotrace_map */ + MMIO_UNPROBE = 0x4, /* struct mmiotrace_map */ + MMIO_MARKER = 0x5, /* raw char data */ + MMIO_UNKNOWN_OP = 0x6, /* struct mmiotrace_rw */ +}; + +struct mmiotrace_rw { + unsigned long phys; /* PCI address of register */ + unsigned long value; + unsigned long pc; /* optional program counter */ + int map_id; + unsigned char opcode; /* one of MMIO_{READ,WRITE,UNKNOWN_OP} */ + unsigned char width; /* size of register access in bytes */ +}; + +struct mmiotrace_map { + unsigned long phys; /* base address in PCI space */ + unsigned long virt; /* base virtual address */ + unsigned long len; /* mapping size */ + int map_id; + unsigned char opcode; /* MMIO_PROBE or MMIO_UNPROBE */ +}; + /* in kernel/trace/trace_mmiotrace.c */ -extern int __init init_mmiotrace(void); extern void enable_mmiotrace(void); extern void disable_mmiotrace(void); -extern void mmio_trace_record(u32 type, unsigned long addr, unsigned long arg); +extern void mmio_trace_rw(struct mmiotrace_rw *rw); +extern void mmio_trace_mapping(struct mmiotrace_map *map); #endif /* __KERNEL__ */ - -/* - * If you change anything here, you must bump MMIO_VERSION. - * This is the relay data format for user space. - */ -#define MMIO_VERSION 0x04 - -/* mm_io_header.type */ -#define MMIO_OPCODE_MASK 0xff -#define MMIO_OPCODE_SHIFT 0 -#define MMIO_WIDTH_MASK 0xff00 -#define MMIO_WIDTH_SHIFT 8 -#define MMIO_MAGIC (0x6f000000 | (MMIO_VERSION<<16)) -#define MMIO_MAGIC_MASK 0xffff0000 - -enum mm_io_opcode { /* payload type: */ - MMIO_READ = 0x1, /* struct mm_io_rw */ - MMIO_WRITE = 0x2, /* struct mm_io_rw */ - MMIO_PROBE = 0x3, /* struct mm_io_map */ - MMIO_UNPROBE = 0x4, /* struct mm_io_map */ - MMIO_MARKER = 0x5, /* raw char data */ - MMIO_UNKNOWN_OP = 0x6, /* struct mm_io_rw */ -}; - -struct mm_io_header { - __u32 type; /* see MMIO_* macros above */ - __u32 sec; /* timestamp */ - __u32 nsec; - __u32 pid; /* PID of the process, or 0 for kernel core */ - __u16 data_len; /* length of the following payload */ -}; - -struct mm_io_rw { - __u64 address; /* virtual address of register */ - __u64 value; - __u64 pc; /* optional program counter */ -}; - -struct mm_io_map { - __u64 phys; /* base address in PCI space */ - __u64 addr; /* base virtual address */ - __u64 len; /* mapping size */ - __u64 pc; /* optional program counter */ -}; - - -/* - * These structures are used to allow a single relay_write() - * call to write a full packet. - */ - -struct mm_io_header_rw { - struct mm_io_header header; - struct mm_io_rw rw; -} __attribute__((packed)); - -struct mm_io_header_map { - struct mm_io_header header; - struct mm_io_map map; -} __attribute__((packed)); - #endif /* MMIOTRACE_H */ diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 3271916ff033..d14fe49e9638 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -831,6 +831,40 @@ ftrace(struct trace_array *tr, struct trace_array_cpu *data, trace_function(tr, data, ip, parent_ip, flags); } +#ifdef CONFIG_MMIOTRACE +void __trace_mmiotrace_rw(struct trace_array *tr, struct trace_array_cpu *data, + struct mmiotrace_rw *rw) +{ + struct trace_entry *entry; + unsigned long irq_flags; + + spin_lock_irqsave(&data->lock, irq_flags); + entry = tracing_get_trace_entry(tr, data); + tracing_generic_entry_update(entry, 0); + entry->type = TRACE_MMIO_RW; + entry->mmiorw = *rw; + spin_unlock_irqrestore(&data->lock, irq_flags); + + trace_wake_up(); +} + +void __trace_mmiotrace_map(struct trace_array *tr, struct trace_array_cpu *data, + struct mmiotrace_map *map) +{ + struct trace_entry *entry; + unsigned long irq_flags; + + spin_lock_irqsave(&data->lock, irq_flags); + entry = tracing_get_trace_entry(tr, data); + tracing_generic_entry_update(entry, 0); + entry->type = TRACE_MMIO_MAP; + entry->mmiomap = *map; + spin_unlock_irqrestore(&data->lock, irq_flags); + + trace_wake_up(); +} +#endif + void __trace_stack(struct trace_array *tr, struct trace_array_cpu *data, unsigned long flags, diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index c460e85e94ed..0ef9ef74c806 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -5,6 +5,7 @@ #include #include #include +#include enum trace_type { __TRACE_FIRST_TYPE = 0, @@ -14,6 +15,8 @@ enum trace_type { TRACE_WAKE, TRACE_STACK, TRACE_SPECIAL, + TRACE_MMIO_RW, + TRACE_MMIO_MAP, __TRACE_LAST_TYPE }; @@ -75,6 +78,8 @@ struct trace_entry { struct ctx_switch_entry ctx; struct special_entry special; struct stack_entry stack; + struct mmiotrace_rw mmiorw; + struct mmiotrace_map mmiomap; }; }; @@ -255,6 +260,15 @@ extern unsigned long ftrace_update_tot_cnt; extern int DYN_FTRACE_TEST_NAME(void); #endif +#ifdef CONFIG_MMIOTRACE +extern void __trace_mmiotrace_rw(struct trace_array *tr, + struct trace_array_cpu *data, + struct mmiotrace_rw *rw); +extern void __trace_mmiotrace_map(struct trace_array *tr, + struct trace_array_cpu *data, + struct mmiotrace_map *map); +#endif + #ifdef CONFIG_FTRACE_STARTUP_TEST #ifdef CONFIG_FTRACE extern int trace_selftest_startup_function(struct tracer *trace, diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c index e4dd03cc5aa6..3a12b1ad0c63 100644 --- a/kernel/trace/trace_mmiotrace.c +++ b/kernel/trace/trace_mmiotrace.c @@ -11,19 +11,26 @@ #include "trace.h" -extern void -__trace_special(void *__tr, void *__data, - unsigned long arg1, unsigned long arg2, unsigned long arg3); - static struct trace_array *mmio_trace_array; +static void mmio_reset_data(struct trace_array *tr) +{ + int cpu; + + tr->time_start = ftrace_now(tr->cpu); + + for_each_online_cpu(cpu) + tracing_reset(tr->data[cpu]); +} static void mmio_trace_init(struct trace_array *tr) { pr_debug("in %s\n", __func__); mmio_trace_array = tr; - if (tr->ctrl) + if (tr->ctrl) { + mmio_reset_data(tr); enable_mmiotrace(); + } } static void mmio_trace_reset(struct trace_array *tr) @@ -31,15 +38,110 @@ static void mmio_trace_reset(struct trace_array *tr) pr_debug("in %s\n", __func__); if (tr->ctrl) disable_mmiotrace(); + mmio_reset_data(tr); + mmio_trace_array = NULL; } static void mmio_trace_ctrl_update(struct trace_array *tr) { pr_debug("in %s\n", __func__); - if (tr->ctrl) + if (tr->ctrl) { + mmio_reset_data(tr); enable_mmiotrace(); - else + } else { disable_mmiotrace(); + } +} + +/* XXX: This is not called for trace_pipe file! */ +void mmio_print_header(struct trace_iterator *iter) +{ + struct trace_seq *s = &iter->seq; + trace_seq_printf(s, "VERSION broken 20070824\n"); + /* TODO: print /proc/bus/pci/devices contents as PCIDEV lines */ +} + +static int mmio_print_rw(struct trace_iterator *iter) +{ + struct trace_entry *entry = iter->ent; + struct mmiotrace_rw *rw = &entry->mmiorw; + struct trace_seq *s = &iter->seq; + unsigned long long t = ns2usecs(entry->t); + unsigned long usec_rem = do_div(t, 1000000ULL); + unsigned secs = (unsigned long)t; + int ret = 1; + + switch (entry->mmiorw.opcode) { + case MMIO_READ: + ret = trace_seq_printf(s, + "R %d %lu.%06lu %d 0x%lx 0x%lx 0x%lx %d\n", + rw->width, secs, usec_rem, rw->map_id, rw->phys, + rw->value, rw->pc, entry->pid); + break; + case MMIO_WRITE: + ret = trace_seq_printf(s, + "W %d %lu.%06lu %d 0x%lx 0x%lx 0x%lx %d\n", + rw->width, secs, usec_rem, rw->map_id, rw->phys, + rw->value, rw->pc, entry->pid); + break; + case MMIO_UNKNOWN_OP: + ret = trace_seq_printf(s, + "UNKNOWN %lu.%06lu %d 0x%lx %02x,%02x,%02x 0x%lx %d\n", + secs, usec_rem, rw->map_id, rw->phys, + (rw->value >> 16) & 0xff, (rw->value >> 8) & 0xff, + (rw->value >> 0) & 0xff, rw->pc, entry->pid); + break; + default: + ret = trace_seq_printf(s, "rw what?\n"); + break; + } + if (ret) + return 1; + return 0; +} + +static int mmio_print_map(struct trace_iterator *iter) +{ + struct trace_entry *entry = iter->ent; + struct mmiotrace_map *m = &entry->mmiomap; + struct trace_seq *s = &iter->seq; + unsigned long long t = ns2usecs(entry->t); + unsigned long usec_rem = do_div(t, 1000000ULL); + unsigned secs = (unsigned long)t; + int ret = 1; + + switch (entry->mmiorw.opcode) { + case MMIO_PROBE: + ret = trace_seq_printf(s, + "MAP %lu.%06lu %d 0x%lx 0x%lx 0x%lx 0x%lx %d\n", + secs, usec_rem, m->map_id, m->phys, m->virt, m->len, + 0UL, entry->pid); + break; + case MMIO_UNPROBE: + ret = trace_seq_printf(s, + "UNMAP %lu.%06lu %d 0x%lx %d\n", + secs, usec_rem, m->map_id, 0UL, entry->pid); + break; + default: + ret = trace_seq_printf(s, "map what?\n"); + break; + } + if (ret) + return 1; + return 0; +} + +/* return 0 to abort printing without consuming current entry in pipe mode */ +static int mmio_print_line(struct trace_iterator *iter) +{ + switch (iter->ent->type) { + case TRACE_MMIO_RW: + return mmio_print_rw(iter); + case TRACE_MMIO_MAP: + return mmio_print_map(iter); + default: + return 1; /* ignore unknown entries */ + } } static struct tracer mmio_tracer __read_mostly = @@ -47,38 +149,31 @@ static struct tracer mmio_tracer __read_mostly = .name = "mmiotrace", .init = mmio_trace_init, .reset = mmio_trace_reset, + .open = mmio_print_header, .ctrl_update = mmio_trace_ctrl_update, + .print_line = mmio_print_line, }; __init static int init_mmio_trace(void) { - int ret = init_mmiotrace(); - if (ret) - return ret; return register_tracer(&mmio_tracer); } device_initcall(init_mmio_trace); -void mmio_trace_record(u32 type, unsigned long addr, unsigned long arg) +void mmio_trace_rw(struct mmiotrace_rw *rw) { struct trace_array *tr = mmio_trace_array; struct trace_array_cpu *data = tr->data[smp_processor_id()]; - - if (!current || current->pid == 0) { - /* - * XXX: This is a problem. We need to able to record, no - * matter what. tracing_generic_entry_update() would crash. - */ - static unsigned limit; - if (limit++ < 12) - pr_err("Error in %s: no current.\n", __func__); - return; - } - if (!tr || !data) { - static unsigned limit; - if (limit++ < 12) - pr_err("%s: no tr or data\n", __func__); - return; - } - __trace_special(tr, data, type, addr, arg); + __trace_mmiotrace_rw(tr, data, rw); +} + +void mmio_trace_mapping(struct mmiotrace_map *map) +{ + struct trace_array *tr = mmio_trace_array; + struct trace_array_cpu *data; + + preempt_disable(); + data = tr->data[smp_processor_id()]; + __trace_mmiotrace_map(tr, data, map); + preempt_enable(); } From 138295373ccf7625fcb0218dfea114837983bc39 Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Mon, 12 May 2008 21:20:58 +0200 Subject: [PATCH 11/30] ftrace: mmiotrace update, #2 another weekend, another patch. This should apply on top of my previous patch from March 23rd. Summary of changes: - Print PCI device list in output header - work around recursive probe hits on SMP - refactor dis/arm_kmmio_fault_page() and add check for page levels - remove un/reference_kmmio(), the die notifier hook is registered permanently into the list - explicitly check for single stepping in die notifier callback I have tested this version on my UP Athlon64 desktop with Nouveau, and SMP Core 2 Duo laptop with the proprietary nvidia driver. Both systems are 64-bit. One previously unknown bug crept into daylight: the ftrace framework's output routines print the first entry last after buffer has wrapped around. The most important regressions compared to non-ftrace mmiotrace at this time are: - failure of trace_pipe file - illegal lines in output file - unaware of losing data due to buffer full Personally I'd like to see these three solved before submitting to mainline. Other issues may come up once we know when we lose events. Signed-off-by: Pekka Paalanen Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/mmiotrace/kmmio.c | 186 +++++++++++---------------- arch/x86/kernel/mmiotrace/mmio-mod.c | 3 - include/linux/mmiotrace.h | 2 - kernel/trace/trace_mmiotrace.c | 47 ++++++- 4 files changed, 120 insertions(+), 118 deletions(-) diff --git a/arch/x86/kernel/mmiotrace/kmmio.c b/arch/x86/kernel/mmiotrace/kmmio.c index efb467933087..cd0d95fe4fe6 100644 --- a/arch/x86/kernel/mmiotrace/kmmio.c +++ b/arch/x86/kernel/mmiotrace/kmmio.c @@ -5,15 +5,12 @@ * 2008 Pekka Paalanen */ -#include #include #include #include #include #include -#include #include -#include #include #include #include @@ -22,10 +19,9 @@ #include #include #include -#include #include -#include - +#include +#include #include #define KMMIO_PAGE_HASH_BITS 4 @@ -57,14 +53,9 @@ struct kmmio_context { int active; }; -static int kmmio_die_notifier(struct notifier_block *nb, unsigned long val, - void *args); - -static DEFINE_MUTEX(kmmio_init_mutex); static DEFINE_SPINLOCK(kmmio_lock); -/* These are protected by kmmio_lock */ -static int kmmio_initialized; +/* Protected by kmmio_lock */ unsigned int kmmio_count; /* Read-protected by RCU, write-protected by kmmio_lock. */ @@ -79,60 +70,6 @@ static struct list_head *kmmio_page_list(unsigned long page) /* Accessed per-cpu */ static DEFINE_PER_CPU(struct kmmio_context, kmmio_ctx); -/* protected by kmmio_init_mutex */ -static struct notifier_block nb_die = { - .notifier_call = kmmio_die_notifier -}; - -/** - * Makes sure kmmio is initialized and usable. - * This must be called before any other kmmio function defined here. - * May sleep. - */ -void reference_kmmio(void) -{ - mutex_lock(&kmmio_init_mutex); - spin_lock_irq(&kmmio_lock); - if (!kmmio_initialized) { - int i; - for (i = 0; i < KMMIO_PAGE_TABLE_SIZE; i++) - INIT_LIST_HEAD(&kmmio_page_table[i]); - if (register_die_notifier(&nb_die)) - BUG(); - } - kmmio_initialized++; - spin_unlock_irq(&kmmio_lock); - mutex_unlock(&kmmio_init_mutex); -} -EXPORT_SYMBOL_GPL(reference_kmmio); - -/** - * Clean up kmmio after use. This must be called for every call to - * reference_kmmio(). All probes registered after the corresponding - * reference_kmmio() must have been unregistered when calling this. - * May sleep. - */ -void unreference_kmmio(void) -{ - bool unreg = false; - - mutex_lock(&kmmio_init_mutex); - spin_lock_irq(&kmmio_lock); - - if (kmmio_initialized == 1) { - BUG_ON(is_kmmio_active()); - unreg = true; - } - kmmio_initialized--; - BUG_ON(kmmio_initialized < 0); - spin_unlock_irq(&kmmio_lock); - - if (unreg) - unregister_die_notifier(&nb_die); /* calls sync_rcu() */ - mutex_unlock(&kmmio_init_mutex); -} -EXPORT_SYMBOL(unreference_kmmio); - /* * this is basically a dynamic stabbing problem: * Could use the existing prio tree code or @@ -167,58 +104,56 @@ static struct kmmio_fault_page *get_kmmio_fault_page(unsigned long page) return NULL; } -/** Mark the given page as not present. Access to it will trigger a fault. */ -static void arm_kmmio_fault_page(unsigned long page, int *page_level) +static void set_page_present(unsigned long addr, bool present, int *pglevel) { - unsigned long address = page & PAGE_MASK; + pteval_t pteval; + pmdval_t pmdval; int level; - pte_t *pte = lookup_address(address, &level); + pmd_t *pmd; + pte_t *pte = lookup_address(addr, &level); if (!pte) { - pr_err("kmmio: Error in %s: no pte for page 0x%08lx\n", - __func__, page); + pr_err("kmmio: no pte for page 0x%08lx\n", addr); return; } - if (level == PG_LEVEL_2M) { - pmd_t *pmd = (pmd_t *)pte; - set_pmd(pmd, __pmd(pmd_val(*pmd) & ~_PAGE_PRESENT)); - } else { - /* PG_LEVEL_4K */ - set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_PRESENT)); + if (pglevel) + *pglevel = level; + + switch (level) { + case PG_LEVEL_2M: + pmd = (pmd_t *)pte; + pmdval = pmd_val(*pmd) & ~_PAGE_PRESENT; + if (present) + pmdval |= _PAGE_PRESENT; + set_pmd(pmd, __pmd(pmdval)); + break; + + case PG_LEVEL_4K: + pteval = pte_val(*pte) & ~_PAGE_PRESENT; + if (present) + pteval |= _PAGE_PRESENT; + set_pte_atomic(pte, __pte(pteval)); + break; + + default: + pr_err("kmmio: unexpected page level 0x%x.\n", level); + return; } - if (page_level) - *page_level = level; + __flush_tlb_one(addr); +} - __flush_tlb_one(page); +/** Mark the given page as not present. Access to it will trigger a fault. */ +static void arm_kmmio_fault_page(unsigned long page, int *page_level) +{ + set_page_present(page & PAGE_MASK, false, page_level); } /** Mark the given page as present. */ static void disarm_kmmio_fault_page(unsigned long page, int *page_level) { - unsigned long address = page & PAGE_MASK; - int level; - pte_t *pte = lookup_address(address, &level); - - if (!pte) { - pr_err("kmmio: Error in %s: no pte for page 0x%08lx\n", - __func__, page); - return; - } - - if (level == PG_LEVEL_2M) { - pmd_t *pmd = (pmd_t *)pte; - set_pmd(pmd, __pmd(pmd_val(*pmd) | _PAGE_PRESENT)); - } else { - /* PG_LEVEL_4K */ - set_pte(pte, __pte(pte_val(*pte) | _PAGE_PRESENT)); - } - - if (page_level) - *page_level = level; - - __flush_tlb_one(page); + set_page_present(page & PAGE_MASK, true, page_level); } /* @@ -240,6 +175,7 @@ int kmmio_handler(struct pt_regs *regs, unsigned long addr) { struct kmmio_context *ctx; struct kmmio_fault_page *faultpage; + int ret = 0; /* default to fault not handled */ /* * Preemption is now disabled to prevent process switch during @@ -257,21 +193,35 @@ int kmmio_handler(struct pt_regs *regs, unsigned long addr) /* * Either this page fault is not caused by kmmio, or * another CPU just pulled the kmmio probe from under - * our feet. In the latter case all hell breaks loose. + * our feet. The latter case should not be possible. */ goto no_kmmio; } ctx = &get_cpu_var(kmmio_ctx); if (ctx->active) { + disarm_kmmio_fault_page(faultpage->page, NULL); + if (addr == ctx->addr) { + /* + * On SMP we sometimes get recursive probe hits on the + * same address. Context is already saved, fall out. + */ + pr_debug("kmmio: duplicate probe hit on CPU %d, for " + "address 0x%08lx.\n", + smp_processor_id(), addr); + ret = 1; + goto no_kmmio_ctx; + } /* * Prevent overwriting already in-flight context. - * If this page fault really was due to kmmio trap, - * all hell breaks loose. + * This should not happen, let's hope disarming at least + * prevents a panic. */ pr_emerg("kmmio: recursive probe hit on CPU %d, " "for address 0x%08lx. Ignoring.\n", smp_processor_id(), addr); + pr_emerg("kmmio: previous hit was at 0x%08lx.\n", + ctx->addr); goto no_kmmio_ctx; } ctx->active++; @@ -302,14 +252,14 @@ int kmmio_handler(struct pt_regs *regs, unsigned long addr) */ put_cpu_var(kmmio_ctx); - return 1; + return 1; /* fault handled */ no_kmmio_ctx: put_cpu_var(kmmio_ctx); no_kmmio: rcu_read_unlock(); preempt_enable_no_resched(); - return 0; /* page fault not handled by kmmio */ + return ret; } /* @@ -322,8 +272,11 @@ static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs) int ret = 0; struct kmmio_context *ctx = &get_cpu_var(kmmio_ctx); - if (!ctx->active) + if (!ctx->active) { + pr_debug("kmmio: spurious debug trap on CPU %d.\n", + smp_processor_id()); goto out; + } if (ctx->probe && ctx->probe->post_handler) ctx->probe->post_handler(ctx->probe, condition, regs); @@ -525,9 +478,22 @@ static int kmmio_die_notifier(struct notifier_block *nb, unsigned long val, { struct die_args *arg = args; - if (val == DIE_DEBUG) + if (val == DIE_DEBUG && (arg->err & DR_STEP)) if (post_kmmio_handler(arg->err, arg->regs) == 1) return NOTIFY_STOP; return NOTIFY_DONE; } + +static struct notifier_block nb_die = { + .notifier_call = kmmio_die_notifier +}; + +static int __init init_kmmio(void) +{ + int i; + for (i = 0; i < KMMIO_PAGE_TABLE_SIZE; i++) + INIT_LIST_HEAD(&kmmio_page_table[i]); + return register_die_notifier(&nb_die); +} +fs_initcall(init_kmmio); /* should be before device_initcall() */ diff --git a/arch/x86/kernel/mmiotrace/mmio-mod.c b/arch/x86/kernel/mmiotrace/mmio-mod.c index 62abc281a512..8256546d49bf 100644 --- a/arch/x86/kernel/mmiotrace/mmio-mod.c +++ b/arch/x86/kernel/mmiotrace/mmio-mod.c @@ -415,8 +415,6 @@ void enable_mmiotrace(void) if (is_enabled()) goto out; - reference_kmmio(); - #if 0 /* XXX: tracing does not support text entries */ marker_file = debugfs_create_file("marker", 0660, dir, NULL, &fops_marker); @@ -448,7 +446,6 @@ void disable_mmiotrace(void) spin_unlock_irq(&trace_lock); clear_trace_list(); /* guarantees: no more kmmio callbacks */ - unreference_kmmio(); if (marker_file) { debugfs_remove(marker_file); marker_file = NULL; diff --git a/include/linux/mmiotrace.h b/include/linux/mmiotrace.h index c88a9c197d22..dd6b64b160fc 100644 --- a/include/linux/mmiotrace.h +++ b/include/linux/mmiotrace.h @@ -31,8 +31,6 @@ static inline int is_kmmio_active(void) return kmmio_count; } -extern void reference_kmmio(void); -extern void unreference_kmmio(void); extern int register_kmmio_probe(struct kmmio_probe *p); extern void unregister_kmmio_probe(struct kmmio_probe *p); diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c index 3a12b1ad0c63..361472b5788c 100644 --- a/kernel/trace/trace_mmiotrace.c +++ b/kernel/trace/trace_mmiotrace.c @@ -8,6 +8,7 @@ #include #include +#include #include "trace.h" @@ -53,12 +54,52 @@ static void mmio_trace_ctrl_update(struct trace_array *tr) } } +static int mmio_print_pcidev(struct trace_seq *s, const struct pci_dev *dev) +{ + int ret = 0; + int i; + resource_size_t start, end; + const struct pci_driver *drv = pci_dev_driver(dev); + + /* XXX: incomplete checks for trace_seq_printf() return value */ + ret += trace_seq_printf(s, "PCIDEV %02x%02x %04x%04x %x", + dev->bus->number, dev->devfn, + dev->vendor, dev->device, dev->irq); + /* + * XXX: is pci_resource_to_user() appropriate, since we are + * supposed to interpret the __ioremap() phys_addr argument based on + * these printed values? + */ + for (i = 0; i < 7; i++) { + pci_resource_to_user(dev, i, &dev->resource[i], &start, &end); + ret += trace_seq_printf(s, " %llx", + (unsigned long long)(start | + (dev->resource[i].flags & PCI_REGION_FLAG_MASK))); + } + for (i = 0; i < 7; i++) { + pci_resource_to_user(dev, i, &dev->resource[i], &start, &end); + ret += trace_seq_printf(s, " %llx", + dev->resource[i].start < dev->resource[i].end ? + (unsigned long long)(end - start) + 1 : 0); + } + if (drv) + ret += trace_seq_printf(s, " %s\n", drv->name); + else + ret += trace_seq_printf(s, " \n"); + return ret; +} + /* XXX: This is not called for trace_pipe file! */ -void mmio_print_header(struct trace_iterator *iter) +static void mmio_print_header(struct trace_iterator *iter) { struct trace_seq *s = &iter->seq; - trace_seq_printf(s, "VERSION broken 20070824\n"); - /* TODO: print /proc/bus/pci/devices contents as PCIDEV lines */ + struct pci_dev *dev = NULL; + + trace_seq_printf(s, "VERSION 20070824\n"); + + for_each_pci_dev(dev) + mmio_print_pcidev(s, dev); + /* XXX: return value? What if header is very long? */ } static int mmio_print_rw(struct trace_iterator *iter) From 801a175bf601f9a9d5e86e92dee9adeeb6625da8 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:20:58 +0200 Subject: [PATCH 12/30] mmiotrace: ftrace fix Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- kernel/trace/trace.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index d14fe49e9638..4dcc4e85c5d6 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -838,12 +838,16 @@ void __trace_mmiotrace_rw(struct trace_array *tr, struct trace_array_cpu *data, struct trace_entry *entry; unsigned long irq_flags; - spin_lock_irqsave(&data->lock, irq_flags); + raw_local_irq_save(irq_flags); + __raw_spin_lock(&data->lock); + entry = tracing_get_trace_entry(tr, data); tracing_generic_entry_update(entry, 0); entry->type = TRACE_MMIO_RW; entry->mmiorw = *rw; - spin_unlock_irqrestore(&data->lock, irq_flags); + + __raw_spin_unlock(&data->lock); + raw_local_irq_restore(irq_flags); trace_wake_up(); } @@ -854,12 +858,16 @@ void __trace_mmiotrace_map(struct trace_array *tr, struct trace_array_cpu *data, struct trace_entry *entry; unsigned long irq_flags; - spin_lock_irqsave(&data->lock, irq_flags); + raw_local_irq_save(irq_flags); + __raw_spin_lock(&data->lock); + entry = tracing_get_trace_entry(tr, data); tracing_generic_entry_update(entry, 0); entry->type = TRACE_MMIO_MAP; entry->mmiomap = *map; - spin_unlock_irqrestore(&data->lock, irq_flags); + + __raw_spin_unlock(&data->lock); + raw_local_irq_restore(irq_flags); trace_wake_up(); } From 49023168261a7f9a2fd4a1ca1adbfea922556015 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:20:58 +0200 Subject: [PATCH 13/30] mmiotrace: cleanup Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/mmiotrace/kmmio.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/x86/kernel/mmiotrace/kmmio.c b/arch/x86/kernel/mmiotrace/kmmio.c index cd0d95fe4fe6..3ad27b8504a5 100644 --- a/arch/x86/kernel/mmiotrace/kmmio.c +++ b/arch/x86/kernel/mmiotrace/kmmio.c @@ -228,7 +228,7 @@ int kmmio_handler(struct pt_regs *regs, unsigned long addr) ctx->fpage = faultpage; ctx->probe = get_kmmio_probe(addr); - ctx->saved_flags = (regs->flags & (TF_MASK|IF_MASK)); + ctx->saved_flags = (regs->flags & (X86_EFLAGS_TF | X86_EFLAGS_IF)); ctx->addr = addr; if (ctx->probe && ctx->probe->pre_handler) @@ -238,8 +238,8 @@ int kmmio_handler(struct pt_regs *regs, unsigned long addr) * Enable single-stepping and disable interrupts for the faulting * context. Local interrupts must not get enabled during stepping. */ - regs->flags |= TF_MASK; - regs->flags &= ~IF_MASK; + regs->flags |= X86_EFLAGS_TF; + regs->flags &= ~X86_EFLAGS_IF; /* Now we set present bit in PTE and single step. */ disarm_kmmio_fault_page(ctx->fpage->page, NULL); @@ -283,7 +283,7 @@ static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs) arm_kmmio_fault_page(ctx->fpage->page, NULL); - regs->flags &= ~TF_MASK; + regs->flags &= ~X86_EFLAGS_TF; regs->flags |= ctx->saved_flags; /* These were acquired in kmmio_handler(). */ @@ -297,7 +297,7 @@ static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs) * will have TF set, in which case, continue the remaining processing * of do_debug, as if this is not a probe hit. */ - if (!(regs->flags & TF_MASK)) + if (!(regs->flags & X86_EFLAGS_TF)) ret = 1; out: put_cpu_var(kmmio_ctx); From ff3a3e9ba5e4273a8bc10570adab4a390fb90757 Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Mon, 12 May 2008 21:20:59 +0200 Subject: [PATCH 14/30] x86 mmiotrace: move files into arch/x86/mm/. Signed-off-by: Pekka Paalanen Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/Makefile | 2 -- arch/x86/kernel/mmiotrace/Makefile | 4 ---- arch/x86/mm/Makefile | 5 +++++ arch/x86/{kernel/mmiotrace => mm}/kmmio.c | 0 arch/x86/{kernel/mmiotrace => mm}/mmio-mod.c | 0 arch/x86/{kernel/mmiotrace => mm}/pf_in.c | 0 arch/x86/{kernel/mmiotrace => mm}/pf_in.h | 0 arch/x86/{kernel/mmiotrace => mm}/testmmiotrace.c | 0 8 files changed, 5 insertions(+), 6 deletions(-) delete mode 100644 arch/x86/kernel/mmiotrace/Makefile rename arch/x86/{kernel/mmiotrace => mm}/kmmio.c (100%) rename arch/x86/{kernel/mmiotrace => mm}/mmio-mod.c (100%) rename arch/x86/{kernel/mmiotrace => mm}/pf_in.c (100%) rename arch/x86/{kernel/mmiotrace => mm}/pf_in.h (100%) rename arch/x86/{kernel/mmiotrace => mm}/testmmiotrace.c (100%) diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index a51ac153685e..739d49acd2f1 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -79,8 +79,6 @@ obj-$(CONFIG_KGDB) += kgdb.o obj-$(CONFIG_VM86) += vm86_32.o obj-$(CONFIG_EARLY_PRINTK) += early_printk.o -obj-$(CONFIG_MMIOTRACE) += mmiotrace/ - obj-$(CONFIG_HPET_TIMER) += hpet.o obj-$(CONFIG_K8_NB) += k8.o diff --git a/arch/x86/kernel/mmiotrace/Makefile b/arch/x86/kernel/mmiotrace/Makefile deleted file mode 100644 index dbcd8d50fb8d..000000000000 --- a/arch/x86/kernel/mmiotrace/Makefile +++ /dev/null @@ -1,4 +0,0 @@ -obj-$(CONFIG_MMIOTRACE_HOOKS) += kmmio.o -obj-$(CONFIG_MMIOTRACE) += mmiotrace.o -mmiotrace-y := pf_in.o mmio-mod.o -obj-$(CONFIG_MMIOTRACE_TEST) += testmmiotrace.o diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile index b7b3e4c7cfc9..07dab503c9e3 100644 --- a/arch/x86/mm/Makefile +++ b/arch/x86/mm/Makefile @@ -8,6 +8,11 @@ obj-$(CONFIG_X86_PTDUMP) += dump_pagetables.o obj-$(CONFIG_HIGHMEM) += highmem_32.o +obj-$(CONFIG_MMIOTRACE_HOOKS) += kmmio.o +obj-$(CONFIG_MMIOTRACE) += mmiotrace.o +mmiotrace-y := pf_in.o mmio-mod.o +obj-$(CONFIG_MMIOTRACE_TEST) += testmmiotrace.o + ifeq ($(CONFIG_X86_32),y) obj-$(CONFIG_NUMA) += discontig_32.o else diff --git a/arch/x86/kernel/mmiotrace/kmmio.c b/arch/x86/mm/kmmio.c similarity index 100% rename from arch/x86/kernel/mmiotrace/kmmio.c rename to arch/x86/mm/kmmio.c diff --git a/arch/x86/kernel/mmiotrace/mmio-mod.c b/arch/x86/mm/mmio-mod.c similarity index 100% rename from arch/x86/kernel/mmiotrace/mmio-mod.c rename to arch/x86/mm/mmio-mod.c diff --git a/arch/x86/kernel/mmiotrace/pf_in.c b/arch/x86/mm/pf_in.c similarity index 100% rename from arch/x86/kernel/mmiotrace/pf_in.c rename to arch/x86/mm/pf_in.c diff --git a/arch/x86/kernel/mmiotrace/pf_in.h b/arch/x86/mm/pf_in.h similarity index 100% rename from arch/x86/kernel/mmiotrace/pf_in.h rename to arch/x86/mm/pf_in.h diff --git a/arch/x86/kernel/mmiotrace/testmmiotrace.c b/arch/x86/mm/testmmiotrace.c similarity index 100% rename from arch/x86/kernel/mmiotrace/testmmiotrace.c rename to arch/x86/mm/testmmiotrace.c From e4b37ee68609037ffcaa2fcfae47cd31a605bb9e Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Mon, 12 May 2008 21:20:59 +0200 Subject: [PATCH 15/30] x86 mmiotrace: remove ISA_trace parameter. This had become a no-op. Signed-off-by: Pekka Paalanen Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/mm/mmio-mod.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/arch/x86/mm/mmio-mod.c b/arch/x86/mm/mmio-mod.c index 8256546d49bf..ab2bb776d310 100644 --- a/arch/x86/mm/mmio-mod.c +++ b/arch/x86/mm/mmio-mod.c @@ -81,17 +81,14 @@ static LIST_HEAD(trace_list); /* struct remap_trace */ /* module parameters */ static unsigned long filter_offset; static int nommiotrace; -static int ISA_trace; static int trace_pc; module_param(filter_offset, ulong, 0); module_param(nommiotrace, bool, 0); -module_param(ISA_trace, bool, 0); module_param(trace_pc, bool, 0); MODULE_PARM_DESC(filter_offset, "Start address of traced mappings."); MODULE_PARM_DESC(nommiotrace, "Disable actual MMIO tracing."); -MODULE_PARM_DESC(ISA_trace, "Do not exclude the low ISA range."); MODULE_PARM_DESC(trace_pc, "Record address of faulting instructions."); static bool is_enabled(void) @@ -424,8 +421,6 @@ void enable_mmiotrace(void) if (nommiotrace) pr_info(NAME "MMIO tracing disabled.\n"); - if (ISA_trace) - pr_warning(NAME "Warning! low ISA range will be traced.\n"); spin_lock_irq(&trace_lock); atomic_inc(&mmiotrace_enabled); spin_unlock_irq(&trace_lock); From 736ca61fa81874b3fee205a593251b1869d0bcf1 Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Mon, 12 May 2008 21:20:59 +0200 Subject: [PATCH 16/30] x86 mmiotrace: Do not print bogus pid Non-zero pid indicates the MMIO access originated in user space. We do not catch that kind of accesses yet, so always print zero for now. Signed-off-by: Pekka Paalanen Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- kernel/trace/trace_mmiotrace.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c index 361472b5788c..79be4a18ec1e 100644 --- a/kernel/trace/trace_mmiotrace.c +++ b/kernel/trace/trace_mmiotrace.c @@ -117,20 +117,20 @@ static int mmio_print_rw(struct trace_iterator *iter) ret = trace_seq_printf(s, "R %d %lu.%06lu %d 0x%lx 0x%lx 0x%lx %d\n", rw->width, secs, usec_rem, rw->map_id, rw->phys, - rw->value, rw->pc, entry->pid); + rw->value, rw->pc, 0); break; case MMIO_WRITE: ret = trace_seq_printf(s, "W %d %lu.%06lu %d 0x%lx 0x%lx 0x%lx %d\n", rw->width, secs, usec_rem, rw->map_id, rw->phys, - rw->value, rw->pc, entry->pid); + rw->value, rw->pc, 0); break; case MMIO_UNKNOWN_OP: ret = trace_seq_printf(s, "UNKNOWN %lu.%06lu %d 0x%lx %02x,%02x,%02x 0x%lx %d\n", secs, usec_rem, rw->map_id, rw->phys, (rw->value >> 16) & 0xff, (rw->value >> 8) & 0xff, - (rw->value >> 0) & 0xff, rw->pc, entry->pid); + (rw->value >> 0) & 0xff, rw->pc, 0); break; default: ret = trace_seq_printf(s, "rw what?\n"); From c6c67c1afcce71335b18ed8769b1165c468bfb03 Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Mon, 12 May 2008 21:20:59 +0200 Subject: [PATCH 17/30] mmiotrace: add user documentation Signed-off-by: Pekka Paalanen Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- Documentation/tracers/mmiotrace.txt | 153 ++++++++++++++++++++++++++++ arch/x86/Kconfig.debug | 8 +- 2 files changed, 156 insertions(+), 5 deletions(-) create mode 100644 Documentation/tracers/mmiotrace.txt diff --git a/Documentation/tracers/mmiotrace.txt b/Documentation/tracers/mmiotrace.txt new file mode 100644 index 000000000000..84246f703875 --- /dev/null +++ b/Documentation/tracers/mmiotrace.txt @@ -0,0 +1,153 @@ + In-kernel memory-mapped I/O tracing + + +Home page and links to optional user space tools: + + http://nouveau.freedesktop.org/wiki/MmioTrace + +MMIO tracing was originally developed by Intel around 2003 for their Fault +Injection Test Harness. In Dec 2006 - Jan 2007, using the code from Intel, +Jeff Muizelaar created a tool for tracing MMIO accesses with the Nouveau +project in mind. Since then many people have contributed. + +Mmiotrace was built for reverse engineering any memory-mapped IO device with +the Nouveau project as the first real user. Only x86 and x86_64 architectures +are supported. + +Out-of-tree mmiotrace was originally modified for mainline inclusion and +ftrace framework by Pekka Paalanen . + + +Preparation +----------- + +Mmiotrace feature is compiled in by the CONFIG_MMIOTRACE option. Tracing is +disabled by default, so it is safe to have this set to yes. SMP systems are +supported, but tracing is unreliable and may miss events if more than one CPU +is on-line, therefore mmiotrace takes all but one CPU off-line during run-time +activation [not implemented]. + + +Usage Quick Reference +--------------------- + +$ mount -t debugfs debugfs /debug +$ echo mmiotrace > /debug/tracing/current_tracer +$ cat /debug/tracing/trace_pipe > mydump.txt & +Start X or whatever. +$ echo "X is up" > /debug/tracing/marker +$ echo none > /debug/tracing/current_tracer +Check kernel log. + + +Usage +----- + +Make sure debugfs is mounted to /debug. If not, (requires root privileges) +$ mount -t debugfs debugfs /debug + +Check that the driver you are about to trace is not loaded. + +Activate mmiotrace (requires root privileges): +$ echo mmiotrace > /debug/tracing/current_tracer + +Start storing the trace: +$ cat /debug/tracing/trace_pipe > mydump.txt & +The 'cat' process should stay running (sleeping) in the background. + +Load the driver you want to trace and use it. Mmiotrace will only catch MMIO +accesses to areas that are ioremapped while mmiotrace is active. + +[Unimplemented feature:] +During tracing you can place comments (markers) into the trace by +$ echo "X is up" > /debug/tracing/marker +This makes it easier to see which part of the (huge) trace corresponds to +which action. It is recommended to place descriptive markers about what you +do. + +Shut down mmiotrace (requires root privileges): +$ echo none > /debug/tracing/current_tracer +The 'cat' process exits. If it does not, kill it by 'fg' and pressing ctrl+c. + +[This feature is not implemented yet!] +Check your kernel log. If there are messages about mmiotrace losing events, +this is due to buffer overrun, and the trace is incomplete. You should enlarge +the buffers and try again. [How?] + +If you are doing a trace for a driver project, e.g. Nouveau, you should also +do the following before sending your results: +$ lspci -vvv > lspci.txt +$ dmesg > dmesg.txt +$ tar zcf pciid-nick-mmiotrace.tar.gz mydump.txt lspci.txt dmesg.txt +and then send the .tar.gz file. The trace compresses considerably. Replace +"pciid" and "nick" with the PCI ID or model name of your piece of hardware +under investigation and your nick name. + + +How Mmiotrace Works +------------------- + +Access to hardware IO-memory is gained by mapping addresses from PCI bus by +calling one of the ioremap_*() functions. Mmiotrace is hooked into the +__ioremap() function and gets called whenever a mapping is created. Mapping is +an event that is recorded into the trace log. Note, that ISA range mappings +are not caught, since the mapping always exists and is returned directly. + +MMIO accesses are recorded via page faults. Just before __ioremap() returns, +the mapped pages are marked as not present. Any access to the pages causes a +fault. The page fault handler calls mmiotrace to handle the fault. Mmiotrace +marks the page present, sets TF flag to achieve single stepping and exits the +fault handler. The instruction that faulted is executed and debug trap is +entered. Here mmiotrace again marks the page as not present. The instruction +is decoded to get the type of operation (read/write), data width and the value +read or written. These are stored to the trace log. + +Setting the page present in the page fault handler has a race condition on SMP +machines. During the single stepping other CPUs may run freely on that page +and events can be missed without a notice. Re-enabling other CPUs during +tracing is discouraged. + + +Trace Log Format +---------------- + +The raw log is text and easily filtered with e.g. grep and awk. One record is +one line in the log. A record starts with a keyword, followed by keyword +dependant arguments. Arguments are separated by a space, or continue until the +end of line. The format for version 20070824 is as follows: + +Explanation Keyword Space separated arguments +--------------------------------------------------------------------------- + +read event R width, timestamp, map id, physical, value, PC, PID +write event W width, timestamp, map id, physical, value, PC, PID +ioremap event MAP timestamp, map id, physical, virtual, length, PC, PID +iounmap event UNMAP timestamp, map id, PC, PID +marker MARK timestamp, text +version VERSION the string "20070824" +info for reader LSPCI one line from lspci -v +PCI address map PCIDEV space separated /proc/bus/pci/devices data +unk. opcode UNKNOWN timestamp, map id, physical, data, PC, PID + +Timestamp is in seconds with decimals. Physical is a PCI bus address, virtual +is a kernel virtual address. Width is the data width in bytes and value is the +data value. Map id is an arbitrary id number identifying the mapping that was +used in an operation. PC is the program counter and PID is process id. PC is +zero if it is not recorded. PID is always zero as tracing MMIO accesses +originating in user space memory is not yet supported. + +For instance, the following awk filter will pass all 32-bit writes that target +physical addresses in the range [0xfb73ce40, 0xfb800000[ + +$ awk '/W 4 / { adr=strtonum($5); if (adr >= 0xfb73ce40 && +adr < 0xfb800000) print; }' + + +Tools for Developers +-------------------- + +The user space tools include utilities for: +- replacing numeric addresses and values with hardware register names +- replaying MMIO logs, i.e., re-executing the recorded writes + + diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index 1d6de0d67f99..b28ace2be1a3 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -180,12 +180,10 @@ config MMIOTRACE help Mmiotrace traces Memory Mapped I/O access and is meant for debugging and reverse engineering. It is called from the ioremap - implementation and works via page faults. A user space program is - required to collect the MMIO data from debugfs files. - Tracing is disabled by default and can be enabled from a debugfs - file. + implementation and works via page faults. Tracing is disabled by + default and can be enabled run-time. - See http://nouveau.freedesktop.org/wiki/MmioTrace + See Documentation/tracers/mmiotrace.txt. If you are not helping to develop drivers, say N. config MMIOTRACE_TEST From 0663bb6cd9a457fbd8ca95c627bb762d07321a39 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 12 May 2008 21:20:59 +0200 Subject: [PATCH 18/30] mmiotrace: fix printk format Fix gcc printk format warnings: next-20080415/arch/x86/mm/mmio-mod.c: In function 'print_pte': next-20080415/arch/x86/mm/mmio-mod.c:154: warning: format '%lx' expects type 'long unsigned int', but argument 3 has type 'pteval_t' next-20080415/arch/x86/mm/mmio-mod.c:154: warning: format '%lx' expects type 'long unsigned int', but argument 4 has type 'pteval_t' next-20080415/arch/x86/mm/mmio-mod.c: At top level: next-20080415/arch/x86/mm/mmio-mod.c:403: warning: 'downed_cpus' defined but not used Signed-off-by: Randy Dunlap Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/mm/mmio-mod.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/x86/mm/mmio-mod.c b/arch/x86/mm/mmio-mod.c index ab2bb776d310..6d6cac84c045 100644 --- a/arch/x86/mm/mmio-mod.c +++ b/arch/x86/mm/mmio-mod.c @@ -150,8 +150,9 @@ static void print_pte(unsigned long address) "0x%08lx\n", address); BUG(); } - pr_info(NAME "pte for 0x%lx: 0x%lx 0x%lx\n", address, pte_val(*pte), - pte_val(*pte) & _PAGE_PRESENT); + pr_info(NAME "pte for 0x%lx: 0x%llx 0x%llx\n", address, + (unsigned long long)pte_val(*pte), + (unsigned long long)pte_val(*pte) & _PAGE_PRESENT); } /* From 37b3619257d3190f47f233d7ed626d4b9916462c Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 12 May 2008 21:20:59 +0200 Subject: [PATCH 19/30] x86/mmiotrace: uses/depends on PCI Don't try to build mmiotrace when CONFIG_PCI=n. next-20080416/kernel/trace/trace_mmiotrace.c: In function 'mmio_print_pcidev': next-20080416/kernel/trace/trace_mmiotrace.c:62: error: implicit declaration of function 'pci_dev_driver' Signed-off-by: Randy Dunlap Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/Kconfig.debug | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index b28ace2be1a3..1e53df0ba08c 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -173,7 +173,7 @@ config MMIOTRACE_HOOKS config MMIOTRACE bool "Memory mapped IO tracing" - depends on DEBUG_KERNEL + depends on DEBUG_KERNEL && PCI select TRACING select MMIOTRACE_HOOKS default y @@ -181,7 +181,7 @@ config MMIOTRACE Mmiotrace traces Memory Mapped I/O access and is meant for debugging and reverse engineering. It is called from the ioremap implementation and works via page faults. Tracing is disabled by - default and can be enabled run-time. + default and can be enabled at run-time. See Documentation/tracers/mmiotrace.txt. If you are not helping to develop drivers, say N. From 7423d1115f18627666d475fccc7c62394406ff8c Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Mon, 12 May 2008 21:21:02 +0200 Subject: [PATCH 20/30] x86 mmiotrace: dynamically disable non-boot CPUs From 8979ee55cb6a429c4edd72ebec2244b849f6a79a Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Sat, 12 Apr 2008 00:18:57 +0300 Mmiotrace is not reliable with multiple CPUs and may miss events. Drop to single CPU when mmiotrace is activated. Signed-off-by: Pekka Paalanen Signed-off-by: Ingo Molnar --- arch/x86/mm/mmio-mod.c | 61 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) diff --git a/arch/x86/mm/mmio-mod.c b/arch/x86/mm/mmio-mod.c index 6d6cac84c045..1f77d8532037 100644 --- a/arch/x86/mm/mmio-mod.c +++ b/arch/x86/mm/mmio-mod.c @@ -32,6 +32,7 @@ #include /* for ISA_START_ADDRESS */ #include #include +#include #include "pf_in.h" @@ -400,6 +401,64 @@ static void clear_trace_list(void) } } +#ifdef CONFIG_HOTPLUG_CPU +static cpumask_t downed_cpus; + +static void enter_uniprocessor(void) +{ + int cpu; + int err; + + get_online_cpus(); + downed_cpus = cpu_online_map; + cpu_clear(first_cpu(cpu_online_map), downed_cpus); + if (num_online_cpus() > 1) + pr_notice(NAME "Disabling non-boot CPUs...\n"); + put_online_cpus(); + + for_each_cpu_mask(cpu, downed_cpus) { + err = cpu_down(cpu); + if (!err) { + pr_info(NAME "CPU%d is down.\n", cpu); + } else { + pr_err(NAME "Error taking CPU%d down: %d\n", cpu, err); + } + } + if (num_online_cpus() > 1) + pr_warning(NAME "multiple CPUs still online, " + "may miss events.\n"); +} + +static void leave_uniprocessor(void) +{ + int cpu; + int err; + + if (cpus_weight(downed_cpus) == 0) + return; + pr_notice(NAME "Re-enabling CPUs...\n"); + for_each_cpu_mask(cpu, downed_cpus) { + err = cpu_up(cpu); + if (!err) + pr_info(NAME "enabled CPU%d.\n", cpu); + else + pr_err(NAME "cannot re-enable CPU%d: %d\n", cpu, err); + } +} + +#else /* !CONFIG_HOTPLUG_CPU */ +static void enter_uniprocessor(void) +{ + if (num_online_cpus() > 1) + pr_warning(NAME "multiple CPUs are online, may miss events. " + "Suggest booting with maxcpus=1 kernel argument.\n"); +} + +static void leave_uniprocessor(void) +{ +} +#endif + #if 0 /* XXX: out of order */ static struct file_operations fops_marker = { .owner = THIS_MODULE, @@ -422,6 +481,7 @@ void enable_mmiotrace(void) if (nommiotrace) pr_info(NAME "MMIO tracing disabled.\n"); + enter_uniprocessor(); spin_lock_irq(&trace_lock); atomic_inc(&mmiotrace_enabled); spin_unlock_irq(&trace_lock); @@ -442,6 +502,7 @@ void disable_mmiotrace(void) spin_unlock_irq(&trace_lock); clear_trace_list(); /* guarantees: no more kmmio callbacks */ + leave_uniprocessor(); if (marker_file) { debugfs_remove(marker_file); marker_file = NULL; From d0a7e8ca5b996d36219e6fc002907291c8ee677b Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Mon, 12 May 2008 21:21:02 +0200 Subject: [PATCH 21/30] mmiotrace: print header using the read hook. Now the header is printed only for `trace_pipe' file. Signed-off-by: Pekka Paalanen Signed-off-by: Ingo Molnar --- kernel/trace/trace_mmiotrace.c | 60 ++++++++++++++++++++++++++++++---- 1 file changed, 53 insertions(+), 7 deletions(-) diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c index 79be4a18ec1e..6d2edbdde939 100644 --- a/kernel/trace/trace_mmiotrace.c +++ b/kernel/trace/trace_mmiotrace.c @@ -12,6 +12,10 @@ #include "trace.h" +struct header_iter { + struct pci_dev *dev; +}; + static struct trace_array *mmio_trace_array; static void mmio_reset_data(struct trace_array *tr) @@ -89,17 +93,57 @@ static int mmio_print_pcidev(struct trace_seq *s, const struct pci_dev *dev) return ret; } -/* XXX: This is not called for trace_pipe file! */ -static void mmio_print_header(struct trace_iterator *iter) +static void destroy_header_iter(struct header_iter *hiter) { + if (!hiter) + return; + pci_dev_put(hiter->dev); + kfree(hiter); +} + +static void mmio_pipe_open(struct trace_iterator *iter) +{ + struct header_iter *hiter; struct trace_seq *s = &iter->seq; - struct pci_dev *dev = NULL; trace_seq_printf(s, "VERSION 20070824\n"); - for_each_pci_dev(dev) - mmio_print_pcidev(s, dev); - /* XXX: return value? What if header is very long? */ + hiter = kzalloc(sizeof(*hiter), GFP_KERNEL); + if (!hiter) + return; + + hiter->dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, NULL); + iter->private = hiter; +} + +/* XXX: This is not called when the pipe is closed! */ +static void mmio_close(struct trace_iterator *iter) +{ + struct header_iter *hiter = iter->private; + destroy_header_iter(hiter); + iter->private = NULL; +} + +static ssize_t mmio_read(struct trace_iterator *iter, struct file *filp, + char __user *ubuf, size_t cnt, loff_t *ppos) +{ + ssize_t ret; + struct header_iter *hiter = iter->private; + struct trace_seq *s = &iter->seq; + + if (!hiter) + return 0; + + mmio_print_pcidev(s, hiter->dev); + hiter->dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, hiter->dev); + + if (!hiter->dev) { + destroy_header_iter(hiter); + iter->private = NULL; + } + + ret = trace_seq_to_user(s, ubuf, cnt); + return (ret == -EBUSY) ? 0 : ret; } static int mmio_print_rw(struct trace_iterator *iter) @@ -190,7 +234,9 @@ static struct tracer mmio_tracer __read_mostly = .name = "mmiotrace", .init = mmio_trace_init, .reset = mmio_trace_reset, - .open = mmio_print_header, + .pipe_open = mmio_pipe_open, + .close = mmio_close, + .read = mmio_read, .ctrl_update = mmio_trace_ctrl_update, .print_line = mmio_print_line, }; From 2039238b79b51a50f8477f53f33750e1c3fc146a Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Mon, 12 May 2008 21:21:02 +0200 Subject: [PATCH 22/30] mmiotrace: print overrun counts Signed-off-by: Pekka Paalanen Signed-off-by: Ingo Molnar --- kernel/trace/trace_mmiotrace.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c index 6d2edbdde939..d0f649a2203d 100644 --- a/kernel/trace/trace_mmiotrace.c +++ b/kernel/trace/trace_mmiotrace.c @@ -17,11 +17,13 @@ struct header_iter { }; static struct trace_array *mmio_trace_array; +static bool overrun_detected; static void mmio_reset_data(struct trace_array *tr) { int cpu; + overrun_detected = false; tr->time_start = ftrace_now(tr->cpu); for_each_online_cpu(cpu) @@ -124,12 +126,34 @@ static void mmio_close(struct trace_iterator *iter) iter->private = NULL; } +static unsigned long count_overruns(struct trace_iterator *iter) +{ + int cpu; + unsigned long cnt = 0; + for_each_online_cpu(cpu) { + cnt += iter->overrun[cpu]; + iter->overrun[cpu] = 0; + } + return cnt; +} + static ssize_t mmio_read(struct trace_iterator *iter, struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) { ssize_t ret; struct header_iter *hiter = iter->private; struct trace_seq *s = &iter->seq; + unsigned long n; + + n = count_overruns(iter); + if (n) { + /* XXX: This is later than where events were lost. */ + trace_seq_printf(s, "MARK 0.000000 Lost %lu events.\n", n); + if (!overrun_detected) + pr_warning("mmiotrace has lost events.\n"); + overrun_detected = true; + goto print_out; + } if (!hiter) return 0; @@ -142,6 +166,7 @@ static ssize_t mmio_read(struct trace_iterator *iter, struct file *filp, iter->private = NULL; } +print_out: ret = trace_seq_to_user(s, ubuf, cnt); return (ret == -EBUSY) ? 0 : ret; } From e0fd5c2fa188311667267c02a702ae699a9fc2bd Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Mon, 12 May 2008 21:21:02 +0200 Subject: [PATCH 23/30] mmiotrace: do not print bogus pid for maps either Signed-off-by: Pekka Paalanen Signed-off-by: Ingo Molnar --- kernel/trace/trace_mmiotrace.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c index d0f649a2203d..3c1dacdc2d85 100644 --- a/kernel/trace/trace_mmiotrace.c +++ b/kernel/trace/trace_mmiotrace.c @@ -225,12 +225,12 @@ static int mmio_print_map(struct trace_iterator *iter) ret = trace_seq_printf(s, "MAP %lu.%06lu %d 0x%lx 0x%lx 0x%lx 0x%lx %d\n", secs, usec_rem, m->map_id, m->phys, m->virt, m->len, - 0UL, entry->pid); + 0UL, 0); break; case MMIO_UNPROBE: ret = trace_seq_printf(s, "UNMAP %lu.%06lu %d 0x%lx %d\n", - secs, usec_rem, m->map_id, 0UL, entry->pid); + secs, usec_rem, m->map_id, 0UL, 0); break; default: ret = trace_seq_printf(s, "map what?\n"); From 6f6f394d9ca61fcc73ecfd2f2bf58e92dc1ab269 Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Mon, 12 May 2008 21:21:03 +0200 Subject: [PATCH 24/30] doc: update mmiotrace doc to current status Signed-off-by: Pekka Paalanen Signed-off-by: Ingo Molnar --- Documentation/tracers/mmiotrace.txt | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/Documentation/tracers/mmiotrace.txt b/Documentation/tracers/mmiotrace.txt index 84246f703875..a4afb560a45b 100644 --- a/Documentation/tracers/mmiotrace.txt +++ b/Documentation/tracers/mmiotrace.txt @@ -25,7 +25,8 @@ Mmiotrace feature is compiled in by the CONFIG_MMIOTRACE option. Tracing is disabled by default, so it is safe to have this set to yes. SMP systems are supported, but tracing is unreliable and may miss events if more than one CPU is on-line, therefore mmiotrace takes all but one CPU off-line during run-time -activation [not implemented]. +activation. You can re-enable CPUs by hand, but you have been warned, there +is no way to automatically detect if you are losing events due to CPUs racing. Usage Quick Reference @@ -37,7 +38,7 @@ $ cat /debug/tracing/trace_pipe > mydump.txt & Start X or whatever. $ echo "X is up" > /debug/tracing/marker $ echo none > /debug/tracing/current_tracer -Check kernel log. +Check for lost events. Usage @@ -67,12 +68,22 @@ do. Shut down mmiotrace (requires root privileges): $ echo none > /debug/tracing/current_tracer -The 'cat' process exits. If it does not, kill it by 'fg' and pressing ctrl+c. +The 'cat' process exits. If it does not, kill it by issuing 'fg' command and +pressing ctrl+c. -[This feature is not implemented yet!] -Check your kernel log. If there are messages about mmiotrace losing events, -this is due to buffer overrun, and the trace is incomplete. You should enlarge -the buffers and try again. [How?] +Check that mmiotrace did not lose events due to a buffer filling up. Either +$ grep -i lost mydump.txt +which tells you exactly how many events were lost, or use +$ dmesg +to view your kernel log and look for "mmiotrace has lost events" warning. If +events were lost, the trace is incomplete. You should enlarge the buffers and +try again. Buffers are enlarged by first seeing how large the current buffers +are: +$ cat /debug/tracing/trace_entries +gives you a number. Approximately double this number and write it back, for +instance: +$ echo 128000 > /debug/tracing/trace_entries +Then start again from the top. If you are doing a trace for a driver project, e.g. Nouveau, you should also do the following before sending your results: From 970e6fa03885f32cc43e42cb08c73a5f54cd8bd9 Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Mon, 12 May 2008 21:21:03 +0200 Subject: [PATCH 25/30] mmiotrace: code style cleanups From c2da03771e29159627c5c7b9509ec70bce9f91ee Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Mon, 28 Apr 2008 21:25:22 +0300 Signed-off-by: Pekka Paalanen Signed-off-by: Ingo Molnar --- arch/x86/mm/kmmio.c | 4 ++-- arch/x86/mm/mmio-mod.c | 7 +++---- arch/x86/mm/testmmiotrace.c | 2 +- include/linux/mmiotrace.h | 6 +----- 4 files changed, 7 insertions(+), 12 deletions(-) diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c index 3ad27b8504a5..6a92d9111b64 100644 --- a/arch/x86/mm/kmmio.c +++ b/arch/x86/mm/kmmio.c @@ -17,10 +17,10 @@ #include #include #include -#include +#include #include #include -#include +#include #include #include diff --git a/arch/x86/mm/mmio-mod.c b/arch/x86/mm/mmio-mod.c index 1f77d8532037..a8d2a0019da4 100644 --- a/arch/x86/mm/mmio-mod.c +++ b/arch/x86/mm/mmio-mod.c @@ -24,7 +24,7 @@ #include #include #include -#include +#include #include #include #include @@ -418,11 +418,10 @@ static void enter_uniprocessor(void) for_each_cpu_mask(cpu, downed_cpus) { err = cpu_down(cpu); - if (!err) { + if (!err) pr_info(NAME "CPU%d is down.\n", cpu); - } else { + else pr_err(NAME "Error taking CPU%d down: %d\n", cpu, err); - } } if (num_online_cpus() > 1) pr_warning(NAME "multiple CPUs still online, " diff --git a/arch/x86/mm/testmmiotrace.c b/arch/x86/mm/testmmiotrace.c index cfa60b227c8d..d877c5b423ef 100644 --- a/arch/x86/mm/testmmiotrace.c +++ b/arch/x86/mm/testmmiotrace.c @@ -2,7 +2,7 @@ * Written by Pekka Paalanen, 2008 */ #include -#include +#include #define MODULE_NAME "testmmiotrace" diff --git a/include/linux/mmiotrace.h b/include/linux/mmiotrace.h index dd6b64b160fc..de8e91258da7 100644 --- a/include/linux/mmiotrace.h +++ b/include/linux/mmiotrace.h @@ -1,9 +1,7 @@ #ifndef MMIOTRACE_H #define MMIOTRACE_H -#include - -#ifdef __KERNEL__ +#include #include @@ -84,6 +82,4 @@ extern void disable_mmiotrace(void); extern void mmio_trace_rw(struct mmiotrace_rw *rw); extern void mmio_trace_mapping(struct mmiotrace_map *map); -#endif /* __KERNEL__ */ - #endif /* MMIOTRACE_H */ From 87e547fe41a8b57d6d80afc67a0031fbe477eb0d Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Mon, 12 May 2008 21:21:03 +0200 Subject: [PATCH 26/30] x86 mmiotrace: fix page-unaligned ioremaps mmiotrace_ioremap() expects to receive the original unaligned map phys address and size. Also fix {un,}register_kmmio_probe() to deal properly with unaligned size. Signed-off-by: Pekka Paalanen Signed-off-by: Ingo Molnar --- arch/x86/mm/ioremap.c | 4 +++- arch/x86/mm/kmmio.c | 13 +++++++++++-- arch/x86/mm/mmio-mod.c | 1 + 3 files changed, 15 insertions(+), 3 deletions(-) diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 8927c878544d..a7c80a6e8622 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -123,6 +123,8 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr, { unsigned long pfn, offset, vaddr; resource_size_t last_addr; + const resource_size_t unaligned_phys_addr = phys_addr; + const unsigned long unaligned_size = size; struct vm_struct *area; unsigned long new_prot_val; pgprot_t prot; @@ -236,7 +238,7 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr, } ret_addr = (void __iomem *) (vaddr + offset); - mmiotrace_ioremap(phys_addr, size, ret_addr); + mmiotrace_ioremap(unaligned_phys_addr, unaligned_size, ret_addr); return ret_addr; } diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c index 6a92d9111b64..93b1797666cb 100644 --- a/arch/x86/mm/kmmio.c +++ b/arch/x86/mm/kmmio.c @@ -351,11 +351,19 @@ static void release_kmmio_fault_page(unsigned long page, } } +/* + * With page-unaligned ioremaps, one or two armed pages may contain + * addresses from outside the intended mapping. Events for these addresses + * are currently silently dropped. The events may result only from programming + * mistakes by accessing addresses before the beginning or past the end of a + * mapping. + */ int register_kmmio_probe(struct kmmio_probe *p) { unsigned long flags; int ret = 0; unsigned long size = 0; + const unsigned long size_lim = p->len + (p->addr & ~PAGE_MASK); spin_lock_irqsave(&kmmio_lock, flags); if (get_kmmio_probe(p->addr)) { @@ -364,7 +372,7 @@ int register_kmmio_probe(struct kmmio_probe *p) } kmmio_count++; list_add_rcu(&p->list, &kmmio_probes); - while (size < p->len) { + while (size < size_lim) { if (add_kmmio_fault_page(p->addr + size)) pr_err("kmmio: Unable to set page fault.\n"); size += PAGE_SIZE; @@ -436,11 +444,12 @@ void unregister_kmmio_probe(struct kmmio_probe *p) { unsigned long flags; unsigned long size = 0; + const unsigned long size_lim = p->len + (p->addr & ~PAGE_MASK); struct kmmio_fault_page *release_list = NULL; struct kmmio_delayed_release *drelease; spin_lock_irqsave(&kmmio_lock, flags); - while (size < p->len) { + while (size < size_lim) { release_kmmio_fault_page(p->addr + size, &release_list); size += PAGE_SIZE; } diff --git a/arch/x86/mm/mmio-mod.c b/arch/x86/mm/mmio-mod.c index a8d2a0019da4..278998c1998f 100644 --- a/arch/x86/mm/mmio-mod.c +++ b/arch/x86/mm/mmio-mod.c @@ -280,6 +280,7 @@ static void ioremap_trace_core(unsigned long offset, unsigned long size, { static atomic_t next_id; struct remap_trace *trace = kmalloc(sizeof(*trace), GFP_KERNEL); + /* These are page-unaligned. */ struct mmiotrace_map map = { .phys = offset, .virt = (unsigned long)addr, From dee310d0adf41019aca476052ac3085ff286d9be Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Mon, 12 May 2008 21:21:03 +0200 Subject: [PATCH 27/30] x86 mmiotrace: use resource_size_t for phys addresses Signed-off-by: Pekka Paalanen Signed-off-by: Ingo Molnar --- arch/x86/mm/mmio-mod.c | 11 ++++++----- include/linux/mmiotrace.h | 14 +++++++------- kernel/trace/trace_mmiotrace.c | 20 ++++++++++++-------- 3 files changed, 25 insertions(+), 20 deletions(-) diff --git a/arch/x86/mm/mmio-mod.c b/arch/x86/mm/mmio-mod.c index 278998c1998f..3b04a0126121 100644 --- a/arch/x86/mm/mmio-mod.c +++ b/arch/x86/mm/mmio-mod.c @@ -48,7 +48,7 @@ struct trap_reason { struct remap_trace { struct list_head list; struct kmmio_probe probe; - unsigned long phys; + resource_size_t phys; unsigned long id; }; @@ -275,7 +275,7 @@ static void post(struct kmmio_probe *p, unsigned long condition, put_cpu_var(pf_reason); } -static void ioremap_trace_core(unsigned long offset, unsigned long size, +static void ioremap_trace_core(resource_size_t offset, unsigned long size, void __iomem *addr) { static atomic_t next_id; @@ -319,13 +319,14 @@ not_enabled: spin_unlock_irq(&trace_lock); } -void -mmiotrace_ioremap(unsigned long offset, unsigned long size, void __iomem *addr) +void mmiotrace_ioremap(resource_size_t offset, unsigned long size, + void __iomem *addr) { if (!is_enabled()) /* recheck and proper locking in *_core() */ return; - pr_debug(NAME "ioremap_*(0x%lx, 0x%lx) = %p\n", offset, size, addr); + pr_debug(NAME "ioremap_*(0x%llx, 0x%lx) = %p\n", + (unsigned long long)offset, size, addr); if ((filter_offset) && (offset != filter_offset)) return; ioremap_trace_core(offset, size, addr); diff --git a/include/linux/mmiotrace.h b/include/linux/mmiotrace.h index de8e91258da7..5cbbc374e945 100644 --- a/include/linux/mmiotrace.h +++ b/include/linux/mmiotrace.h @@ -2,7 +2,6 @@ #define MMIOTRACE_H #include - #include struct kmmio_probe; @@ -37,14 +36,15 @@ extern int kmmio_handler(struct pt_regs *regs, unsigned long addr); /* Called from ioremap.c */ #ifdef CONFIG_MMIOTRACE -extern void -mmiotrace_ioremap(unsigned long offset, unsigned long size, void __iomem *addr); +extern void mmiotrace_ioremap(resource_size_t offset, unsigned long size, + void __iomem *addr); extern void mmiotrace_iounmap(volatile void __iomem *addr); #else -static inline void -mmiotrace_ioremap(unsigned long offset, unsigned long size, void __iomem *addr) +static inline void mmiotrace_ioremap(resource_size_t offset, + unsigned long size, void __iomem *addr) { } + static inline void mmiotrace_iounmap(volatile void __iomem *addr) { } @@ -60,7 +60,7 @@ enum mm_io_opcode { }; struct mmiotrace_rw { - unsigned long phys; /* PCI address of register */ + resource_size_t phys; /* PCI address of register */ unsigned long value; unsigned long pc; /* optional program counter */ int map_id; @@ -69,7 +69,7 @@ struct mmiotrace_rw { }; struct mmiotrace_map { - unsigned long phys; /* base address in PCI space */ + resource_size_t phys; /* base address in PCI space */ unsigned long virt; /* base virtual address */ unsigned long len; /* mapping size */ int map_id; diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c index 3c1dacdc2d85..b13dc19dcbb4 100644 --- a/kernel/trace/trace_mmiotrace.c +++ b/kernel/trace/trace_mmiotrace.c @@ -184,20 +184,23 @@ static int mmio_print_rw(struct trace_iterator *iter) switch (entry->mmiorw.opcode) { case MMIO_READ: ret = trace_seq_printf(s, - "R %d %lu.%06lu %d 0x%lx 0x%lx 0x%lx %d\n", - rw->width, secs, usec_rem, rw->map_id, rw->phys, + "R %d %lu.%06lu %d 0x%llx 0x%lx 0x%lx %d\n", + rw->width, secs, usec_rem, rw->map_id, + (unsigned long long)rw->phys, rw->value, rw->pc, 0); break; case MMIO_WRITE: ret = trace_seq_printf(s, - "W %d %lu.%06lu %d 0x%lx 0x%lx 0x%lx %d\n", - rw->width, secs, usec_rem, rw->map_id, rw->phys, + "W %d %lu.%06lu %d 0x%llx 0x%lx 0x%lx %d\n", + rw->width, secs, usec_rem, rw->map_id, + (unsigned long long)rw->phys, rw->value, rw->pc, 0); break; case MMIO_UNKNOWN_OP: ret = trace_seq_printf(s, - "UNKNOWN %lu.%06lu %d 0x%lx %02x,%02x,%02x 0x%lx %d\n", - secs, usec_rem, rw->map_id, rw->phys, + "UNKNOWN %lu.%06lu %d 0x%llx %02x,%02x,%02x 0x%lx %d\n", + secs, usec_rem, rw->map_id, + (unsigned long long)rw->phys, (rw->value >> 16) & 0xff, (rw->value >> 8) & 0xff, (rw->value >> 0) & 0xff, rw->pc, 0); break; @@ -223,8 +226,9 @@ static int mmio_print_map(struct trace_iterator *iter) switch (entry->mmiorw.opcode) { case MMIO_PROBE: ret = trace_seq_printf(s, - "MAP %lu.%06lu %d 0x%lx 0x%lx 0x%lx 0x%lx %d\n", - secs, usec_rem, m->map_id, m->phys, m->virt, m->len, + "MAP %lu.%06lu %d 0x%llx 0x%lx 0x%lx 0x%lx %d\n", + secs, usec_rem, m->map_id, + (unsigned long long)m->phys, m->virt, m->len, 0UL, 0); break; case MMIO_UNPROBE: From a50445d76c22a34ae149704ea5adaef171c8acb7 Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Mon, 12 May 2008 21:21:03 +0200 Subject: [PATCH 28/30] mmiotrace: rename kmmio_probe::user_data to :private. Signed-off-by: Pekka Paalanen Signed-off-by: Ingo Molnar --- arch/x86/mm/mmio-mod.c | 4 ++-- include/linux/mmiotrace.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/mm/mmio-mod.c b/arch/x86/mm/mmio-mod.c index 3b04a0126121..ed0e0e90b3ef 100644 --- a/arch/x86/mm/mmio-mod.c +++ b/arch/x86/mm/mmio-mod.c @@ -191,7 +191,7 @@ static void pre(struct kmmio_probe *p, struct pt_regs *regs, struct mmiotrace_rw *my_trace = &get_cpu_var(cpu_trace); const unsigned long instptr = instruction_pointer(regs); const enum reason_type type = get_ins_type(instptr); - struct remap_trace *trace = p->user_data; + struct remap_trace *trace = p->private; /* it doesn't make sense to have more than one active trace per cpu */ if (my_reason->active_traces) @@ -299,7 +299,7 @@ static void ioremap_trace_core(resource_size_t offset, unsigned long size, .len = size, .pre_handler = pre, .post_handler = post, - .user_data = trace + .private = trace }, .phys = offset, .id = atomic_inc_return(&next_id) diff --git a/include/linux/mmiotrace.h b/include/linux/mmiotrace.h index 5cbbc374e945..61d19e1b7a0b 100644 --- a/include/linux/mmiotrace.h +++ b/include/linux/mmiotrace.h @@ -18,7 +18,7 @@ struct kmmio_probe { unsigned long len; /* length of the probe region */ kmmio_pre_handler_t pre_handler; /* Called before addr is executed. */ kmmio_post_handler_t post_handler; /* Called after addr is executed */ - void *user_data; + void *private; }; /* kmmio is active by some kmmio_probes? */ From 790e2a290b499b0400254e6870ec27969065d122 Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Mon, 12 May 2008 21:21:14 +0200 Subject: [PATCH 29/30] x86 mmiotrace: page level is unsigned Fixes some sparse warnings. Signed-off-by: Pekka Paalanen Signed-off-by: Ingo Molnar --- arch/x86/mm/kmmio.c | 13 +++++++------ arch/x86/mm/mmio-mod.c | 2 +- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c index 93b1797666cb..b65871e6bba6 100644 --- a/arch/x86/mm/kmmio.c +++ b/arch/x86/mm/kmmio.c @@ -104,11 +104,12 @@ static struct kmmio_fault_page *get_kmmio_fault_page(unsigned long page) return NULL; } -static void set_page_present(unsigned long addr, bool present, int *pglevel) +static void set_page_present(unsigned long addr, bool present, + unsigned int *pglevel) { pteval_t pteval; pmdval_t pmdval; - int level; + unsigned int level; pmd_t *pmd; pte_t *pte = lookup_address(addr, &level); @@ -145,15 +146,15 @@ static void set_page_present(unsigned long addr, bool present, int *pglevel) } /** Mark the given page as not present. Access to it will trigger a fault. */ -static void arm_kmmio_fault_page(unsigned long page, int *page_level) +static void arm_kmmio_fault_page(unsigned long page, unsigned int *pglevel) { - set_page_present(page & PAGE_MASK, false, page_level); + set_page_present(page & PAGE_MASK, false, pglevel); } /** Mark the given page as present. */ -static void disarm_kmmio_fault_page(unsigned long page, int *page_level) +static void disarm_kmmio_fault_page(unsigned long page, unsigned int *pglevel) { - set_page_present(page & PAGE_MASK, true, page_level); + set_page_present(page & PAGE_MASK, true, pglevel); } /* diff --git a/arch/x86/mm/mmio-mod.c b/arch/x86/mm/mmio-mod.c index ed0e0e90b3ef..e7397e108beb 100644 --- a/arch/x86/mm/mmio-mod.c +++ b/arch/x86/mm/mmio-mod.c @@ -137,7 +137,7 @@ static ssize_t write_marker(struct file *file, const char __user *buffer, static void print_pte(unsigned long address) { - int level; + unsigned int level; pte_t *pte = lookup_address(address, &level); if (!pte) { From 668a6c3654560aef8741642478973e205a4f02bf Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 19 May 2008 13:35:24 +0200 Subject: [PATCH 30/30] - fix mmioftrace + rcu merge interaction Signed-off-by: Thomas Gleixner --- arch/x86/mm/kmmio.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c index b65871e6bba6..93d82038af4b 100644 --- a/arch/x86/mm/kmmio.c +++ b/arch/x86/mm/kmmio.c @@ -6,6 +6,7 @@ */ #include +#include #include #include #include