From bd2172f58094b3f8afa017e68f3f0b57577824e1 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Tue, 16 Dec 2008 16:19:54 +0100 Subject: [PATCH 01/31] oprofile: rename kernel-wide identifiers This patch renames kernel-wide identifiers to something more oprofile specific names. Cc: Andrew Morton Signed-off-by: Robert Richter --- drivers/oprofile/cpu_buffer.c | 12 ++++++------ drivers/oprofile/event_buffer.c | 4 ++-- drivers/oprofile/oprof.c | 4 ++-- drivers/oprofile/oprof.h | 8 ++++---- drivers/oprofile/oprofile_files.c | 27 ++++++++++++++------------- 5 files changed, 28 insertions(+), 27 deletions(-) diff --git a/drivers/oprofile/cpu_buffer.c b/drivers/oprofile/cpu_buffer.c index 61090969158f..fcf96f608d86 100644 --- a/drivers/oprofile/cpu_buffer.c +++ b/drivers/oprofile/cpu_buffer.c @@ -66,7 +66,7 @@ void free_cpu_buffers(void) unsigned long oprofile_get_cpu_buffer_size(void) { - return fs_cpu_buffer_size; + return oprofile_cpu_buffer_size; } void oprofile_cpu_buffer_inc_smpl_lost(void) @@ -81,7 +81,7 @@ int alloc_cpu_buffers(void) { int i; - unsigned long buffer_size = fs_cpu_buffer_size; + unsigned long buffer_size = oprofile_cpu_buffer_size; op_ring_buffer_read = ring_buffer_alloc(buffer_size, OP_BUFFER_FLAGS); if (!op_ring_buffer_read) @@ -238,7 +238,7 @@ void oprofile_add_ext_sample(unsigned long pc, struct pt_regs * const regs, { struct oprofile_cpu_buffer *cpu_buf = &__get_cpu_var(cpu_buffer); - if (!backtrace_depth) { + if (!oprofile_backtrace_depth) { log_sample(cpu_buf, pc, is_kernel, event); return; } @@ -251,7 +251,7 @@ void oprofile_add_ext_sample(unsigned long pc, struct pt_regs * const regs, * source of this event */ if (log_sample(cpu_buf, pc, is_kernel, event)) - oprofile_ops.backtrace(regs, backtrace_depth); + oprofile_ops.backtrace(regs, oprofile_backtrace_depth); oprofile_end_trace(cpu_buf); } @@ -308,8 +308,8 @@ void oprofile_add_ibs_sample(struct pt_regs * const regs, if (fail) goto fail; - if (backtrace_depth) - oprofile_ops.backtrace(regs, backtrace_depth); + if (oprofile_backtrace_depth) + oprofile_ops.backtrace(regs, oprofile_backtrace_depth); return; diff --git a/drivers/oprofile/event_buffer.c b/drivers/oprofile/event_buffer.c index 191a3202cecc..2b7ae366ceb1 100644 --- a/drivers/oprofile/event_buffer.c +++ b/drivers/oprofile/event_buffer.c @@ -73,8 +73,8 @@ int alloc_event_buffer(void) unsigned long flags; spin_lock_irqsave(&oprofilefs_lock, flags); - buffer_size = fs_buffer_size; - buffer_watershed = fs_buffer_watershed; + buffer_size = oprofile_buffer_size; + buffer_watershed = oprofile_buffer_watershed; spin_unlock_irqrestore(&oprofilefs_lock, flags); if (buffer_watershed >= buffer_size) diff --git a/drivers/oprofile/oprof.c b/drivers/oprofile/oprof.c index cd375907f26f..3cffce90f82a 100644 --- a/drivers/oprofile/oprof.c +++ b/drivers/oprofile/oprof.c @@ -23,7 +23,7 @@ struct oprofile_operations oprofile_ops; unsigned long oprofile_started; -unsigned long backtrace_depth; +unsigned long oprofile_backtrace_depth; static unsigned long is_setup; static DEFINE_MUTEX(start_mutex); @@ -172,7 +172,7 @@ int oprofile_set_backtrace(unsigned long val) goto out; } - backtrace_depth = val; + oprofile_backtrace_depth = val; out: mutex_unlock(&start_mutex); diff --git a/drivers/oprofile/oprof.h b/drivers/oprofile/oprof.h index 5df0c21a608f..c288d3c24b50 100644 --- a/drivers/oprofile/oprof.h +++ b/drivers/oprofile/oprof.h @@ -21,12 +21,12 @@ void oprofile_stop(void); struct oprofile_operations; -extern unsigned long fs_buffer_size; -extern unsigned long fs_cpu_buffer_size; -extern unsigned long fs_buffer_watershed; +extern unsigned long oprofile_buffer_size; +extern unsigned long oprofile_cpu_buffer_size; +extern unsigned long oprofile_buffer_watershed; extern struct oprofile_operations oprofile_ops; extern unsigned long oprofile_started; -extern unsigned long backtrace_depth; +extern unsigned long oprofile_backtrace_depth; struct super_block; struct dentry; diff --git a/drivers/oprofile/oprofile_files.c b/drivers/oprofile/oprofile_files.c index d8201998b0b7..5d36ffc30dd5 100644 --- a/drivers/oprofile/oprofile_files.c +++ b/drivers/oprofile/oprofile_files.c @@ -14,17 +14,18 @@ #include "oprofile_stats.h" #include "oprof.h" -#define FS_BUFFER_SIZE_DEFAULT 131072 -#define FS_CPU_BUFFER_SIZE_DEFAULT 8192 -#define FS_BUFFER_WATERSHED_DEFAULT 32768 /* FIXME: tune */ +#define BUFFER_SIZE_DEFAULT 131072 +#define CPU_BUFFER_SIZE_DEFAULT 8192 +#define BUFFER_WATERSHED_DEFAULT 32768 /* FIXME: tune */ -unsigned long fs_buffer_size; -unsigned long fs_cpu_buffer_size; -unsigned long fs_buffer_watershed; +unsigned long oprofile_buffer_size; +unsigned long oprofile_cpu_buffer_size; +unsigned long oprofile_buffer_watershed; static ssize_t depth_read(struct file *file, char __user *buf, size_t count, loff_t *offset) { - return oprofilefs_ulong_to_user(backtrace_depth, buf, count, offset); + return oprofilefs_ulong_to_user(oprofile_backtrace_depth, buf, count, + offset); } @@ -125,16 +126,16 @@ static const struct file_operations dump_fops = { void oprofile_create_files(struct super_block *sb, struct dentry *root) { /* reinitialize default values */ - fs_buffer_size = FS_BUFFER_SIZE_DEFAULT; - fs_cpu_buffer_size = FS_CPU_BUFFER_SIZE_DEFAULT; - fs_buffer_watershed = FS_BUFFER_WATERSHED_DEFAULT; + oprofile_buffer_size = BUFFER_SIZE_DEFAULT; + oprofile_cpu_buffer_size = CPU_BUFFER_SIZE_DEFAULT; + oprofile_buffer_watershed = BUFFER_WATERSHED_DEFAULT; oprofilefs_create_file(sb, root, "enable", &enable_fops); oprofilefs_create_file_perm(sb, root, "dump", &dump_fops, 0666); oprofilefs_create_file(sb, root, "buffer", &event_buffer_fops); - oprofilefs_create_ulong(sb, root, "buffer_size", &fs_buffer_size); - oprofilefs_create_ulong(sb, root, "buffer_watershed", &fs_buffer_watershed); - oprofilefs_create_ulong(sb, root, "cpu_buffer_size", &fs_cpu_buffer_size); + oprofilefs_create_ulong(sb, root, "buffer_size", &oprofile_buffer_size); + oprofilefs_create_ulong(sb, root, "buffer_watershed", &oprofile_buffer_watershed); + oprofilefs_create_ulong(sb, root, "cpu_buffer_size", &oprofile_cpu_buffer_size); oprofilefs_create_file(sb, root, "cpu_type", &cpu_type_fops); oprofilefs_create_file(sb, root, "backtrace_depth", &depth_fops); oprofilefs_create_file(sb, root, "pointer_size", &pointer_size_fops); From 6d2c53f3cd81e33eec17aa99845d43e599986982 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Wed, 24 Dec 2008 16:53:53 +0100 Subject: [PATCH 02/31] oprofile: rename cpu buffer functions This patch renames cpu buffer functions to something more oprofile specific names. Functions will be moved to the global name space. Cc: Andrew Morton Signed-off-by: Robert Richter --- drivers/oprofile/buffer_sync.c | 10 +++++----- drivers/oprofile/cpu_buffer.c | 4 ++-- drivers/oprofile/cpu_buffer.h | 10 +++++----- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/oprofile/buffer_sync.c b/drivers/oprofile/buffer_sync.c index 737bd9484822..d295d92b57f0 100644 --- a/drivers/oprofile/buffer_sync.c +++ b/drivers/oprofile/buffer_sync.c @@ -331,7 +331,7 @@ static void add_ibs_begin(int cpu, int code, struct mm_struct *mm) off_t offset; struct op_sample *sample; - sample = cpu_buffer_read_entry(cpu); + sample = op_cpu_buffer_read_entry(cpu); if (!sample) goto Error; rip = sample->eip; @@ -370,7 +370,7 @@ static void add_ibs_begin(int cpu, int code, struct mm_struct *mm) count = IBS_OP_CODE_SIZE; /*IBS OP is 5 int64s*/ for (i = 0; i < count; i++) { - sample = cpu_buffer_read_entry(cpu); + sample = op_cpu_buffer_read_entry(cpu); if (!sample) goto Error; add_event_entry(sample->eip); @@ -537,11 +537,11 @@ void sync_buffer(int cpu) add_cpu_switch(cpu); - cpu_buffer_reset(cpu); - available = cpu_buffer_entries(cpu); + op_cpu_buffer_reset(cpu); + available = op_cpu_buffer_entries(cpu); for (i = 0; i < available; ++i) { - struct op_sample *s = cpu_buffer_read_entry(cpu); + struct op_sample *s = op_cpu_buffer_read_entry(cpu); if (!s) break; diff --git a/drivers/oprofile/cpu_buffer.c b/drivers/oprofile/cpu_buffer.c index fcf96f608d86..e52c085cd186 100644 --- a/drivers/oprofile/cpu_buffer.c +++ b/drivers/oprofile/cpu_buffer.c @@ -152,14 +152,14 @@ add_sample(struct oprofile_cpu_buffer *cpu_buf, struct op_entry entry; int ret; - ret = cpu_buffer_write_entry(&entry); + ret = op_cpu_buffer_write_entry(&entry); if (ret) return ret; entry.sample->eip = pc; entry.sample->event = event; - ret = cpu_buffer_write_commit(&entry); + ret = op_cpu_buffer_write_commit(&entry); if (ret) return ret; diff --git a/drivers/oprofile/cpu_buffer.h b/drivers/oprofile/cpu_buffer.h index aacb0f0bc566..83d491e273fe 100644 --- a/drivers/oprofile/cpu_buffer.h +++ b/drivers/oprofile/cpu_buffer.h @@ -64,7 +64,7 @@ DECLARE_PER_CPU(struct oprofile_cpu_buffer, cpu_buffer); * reset these to invalid values; the next sample collected will * populate the buffer with proper values to initialize the buffer */ -static inline void cpu_buffer_reset(int cpu) +static inline void op_cpu_buffer_reset(int cpu) { struct oprofile_cpu_buffer *cpu_buf = &per_cpu(cpu_buffer, cpu); @@ -72,7 +72,7 @@ static inline void cpu_buffer_reset(int cpu) cpu_buf->last_task = NULL; } -static inline int cpu_buffer_write_entry(struct op_entry *entry) +static inline int op_cpu_buffer_write_entry(struct op_entry *entry) { entry->event = ring_buffer_lock_reserve(op_ring_buffer_write, sizeof(struct op_sample), @@ -88,13 +88,13 @@ static inline int cpu_buffer_write_entry(struct op_entry *entry) return 0; } -static inline int cpu_buffer_write_commit(struct op_entry *entry) +static inline int op_cpu_buffer_write_commit(struct op_entry *entry) { return ring_buffer_unlock_commit(op_ring_buffer_write, entry->event, entry->irq_flags); } -static inline struct op_sample *cpu_buffer_read_entry(int cpu) +static inline struct op_sample *op_cpu_buffer_read_entry(int cpu) { struct ring_buffer_event *e; e = ring_buffer_consume(op_ring_buffer_read, cpu, NULL); @@ -111,7 +111,7 @@ static inline struct op_sample *cpu_buffer_read_entry(int cpu) } /* "acquire" as many cpu buffer slots as we can */ -static inline unsigned long cpu_buffer_entries(int cpu) +static inline unsigned long op_cpu_buffer_entries(int cpu) { return ring_buffer_entries_cpu(op_ring_buffer_read, cpu) + ring_buffer_entries_cpu(op_ring_buffer_write, cpu); From 9966718daee592fbdc523703b2d8200009642506 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Tue, 16 Dec 2008 16:19:54 +0100 Subject: [PATCH 03/31] oprofile: remove ring buffer inline functions in cpu_buffer.h This patch moves ring buffer inline functions to cpu_buffer.c. Cc: Andrew Morton Signed-off-by: Robert Richter --- drivers/oprofile/cpu_buffer.c | 48 +++++++++++++++++++++++++++++++-- drivers/oprofile/cpu_buffer.h | 50 +++-------------------------------- 2 files changed, 50 insertions(+), 48 deletions(-) diff --git a/drivers/oprofile/cpu_buffer.c b/drivers/oprofile/cpu_buffer.c index e52c085cd186..cd67d4dd30b7 100644 --- a/drivers/oprofile/cpu_buffer.c +++ b/drivers/oprofile/cpu_buffer.c @@ -45,8 +45,8 @@ * can be changed to a single buffer solution when the ring buffer * access is implemented as non-locking atomic code. */ -struct ring_buffer *op_ring_buffer_read; -struct ring_buffer *op_ring_buffer_write; +static struct ring_buffer *op_ring_buffer_read; +static struct ring_buffer *op_ring_buffer_write; DEFINE_PER_CPU(struct oprofile_cpu_buffer, cpu_buffer); static void wq_sync_buffer(struct work_struct *work); @@ -145,6 +145,50 @@ void end_cpu_work(void) flush_scheduled_work(); } +int op_cpu_buffer_write_entry(struct op_entry *entry) +{ + entry->event = ring_buffer_lock_reserve(op_ring_buffer_write, + sizeof(struct op_sample), + &entry->irq_flags); + if (entry->event) + entry->sample = ring_buffer_event_data(entry->event); + else + entry->sample = NULL; + + if (!entry->sample) + return -ENOMEM; + + return 0; +} + +int op_cpu_buffer_write_commit(struct op_entry *entry) +{ + return ring_buffer_unlock_commit(op_ring_buffer_write, entry->event, + entry->irq_flags); +} + +struct op_sample *op_cpu_buffer_read_entry(int cpu) +{ + struct ring_buffer_event *e; + e = ring_buffer_consume(op_ring_buffer_read, cpu, NULL); + if (e) + return ring_buffer_event_data(e); + if (ring_buffer_swap_cpu(op_ring_buffer_read, + op_ring_buffer_write, + cpu)) + return NULL; + e = ring_buffer_consume(op_ring_buffer_read, cpu, NULL); + if (e) + return ring_buffer_event_data(e); + return NULL; +} + +unsigned long op_cpu_buffer_entries(int cpu) +{ + return ring_buffer_entries_cpu(op_ring_buffer_read, cpu) + + ring_buffer_entries_cpu(op_ring_buffer_write, cpu); +} + static inline int add_sample(struct oprofile_cpu_buffer *cpu_buf, unsigned long pc, unsigned long event) diff --git a/drivers/oprofile/cpu_buffer.h b/drivers/oprofile/cpu_buffer.h index 83d491e273fe..cd28abc06960 100644 --- a/drivers/oprofile/cpu_buffer.h +++ b/drivers/oprofile/cpu_buffer.h @@ -54,8 +54,6 @@ struct oprofile_cpu_buffer { struct delayed_work work; }; -extern struct ring_buffer *op_ring_buffer_read; -extern struct ring_buffer *op_ring_buffer_write; DECLARE_PER_CPU(struct oprofile_cpu_buffer, cpu_buffer); /* @@ -72,50 +70,10 @@ static inline void op_cpu_buffer_reset(int cpu) cpu_buf->last_task = NULL; } -static inline int op_cpu_buffer_write_entry(struct op_entry *entry) -{ - entry->event = ring_buffer_lock_reserve(op_ring_buffer_write, - sizeof(struct op_sample), - &entry->irq_flags); - if (entry->event) - entry->sample = ring_buffer_event_data(entry->event); - else - entry->sample = NULL; - - if (!entry->sample) - return -ENOMEM; - - return 0; -} - -static inline int op_cpu_buffer_write_commit(struct op_entry *entry) -{ - return ring_buffer_unlock_commit(op_ring_buffer_write, entry->event, - entry->irq_flags); -} - -static inline struct op_sample *op_cpu_buffer_read_entry(int cpu) -{ - struct ring_buffer_event *e; - e = ring_buffer_consume(op_ring_buffer_read, cpu, NULL); - if (e) - return ring_buffer_event_data(e); - if (ring_buffer_swap_cpu(op_ring_buffer_read, - op_ring_buffer_write, - cpu)) - return NULL; - e = ring_buffer_consume(op_ring_buffer_read, cpu, NULL); - if (e) - return ring_buffer_event_data(e); - return NULL; -} - -/* "acquire" as many cpu buffer slots as we can */ -static inline unsigned long op_cpu_buffer_entries(int cpu) -{ - return ring_buffer_entries_cpu(op_ring_buffer_read, cpu) - + ring_buffer_entries_cpu(op_ring_buffer_write, cpu); -} +int op_cpu_buffer_write_entry(struct op_entry *entry); +int op_cpu_buffer_write_commit(struct op_entry *entry); +struct op_sample *op_cpu_buffer_read_entry(int cpu); +unsigned long op_cpu_buffer_entries(int cpu); /* transient events for the CPU buffer -> event buffer */ #define CPU_IS_KERNEL 1 From 83bd9243956f30d91851b272988a237999b35b10 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Mon, 15 Dec 2008 15:09:50 +0100 Subject: [PATCH 04/31] x86/oprofile: fix pci_dev use count for AMD northbridge devices This patch fixes the PCI device use count for AMD northbridge devices. In case of an IBS LVT initialization failure, the PCI device is released now by calling pci_dev_put(). If there are no initialization errors, the devices are released in pci_get_device() while iterating. Signed-off-by: Robert Richter --- arch/x86/oprofile/op_model_amd.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c index 98658f25f542..c5b5c7fb3ced 100644 --- a/arch/x86/oprofile/op_model_amd.c +++ b/arch/x86/oprofile/op_model_amd.c @@ -409,6 +409,7 @@ static int init_ibs_nmi(void) | IBSCTL_LVTOFFSETVAL); pci_read_config_dword(cpu_cfg, IBSCTL, &value); if (value != (ibs_eilvt_off | IBSCTL_LVTOFFSETVAL)) { + pci_dev_put(cpu_cfg); printk(KERN_DEBUG "Failed to setup IBS LVT offset, " "IBSCTL = 0x%08x", value); return 1; From 300157768f050dabc73a99d958b504282088a132 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Tue, 23 Dec 2008 01:35:12 +0100 Subject: [PATCH 05/31] oprofile: reordering some code in cpu_buffer.c Reordering code to keep alloc/free functions together. Signed-off-by: Robert Richter --- drivers/oprofile/cpu_buffer.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/oprofile/cpu_buffer.c b/drivers/oprofile/cpu_buffer.c index cd67d4dd30b7..b353b19bd786 100644 --- a/drivers/oprofile/cpu_buffer.c +++ b/drivers/oprofile/cpu_buffer.c @@ -54,16 +54,6 @@ static void wq_sync_buffer(struct work_struct *work); #define DEFAULT_TIMER_EXPIRE (HZ / 10) static int work_enabled; -void free_cpu_buffers(void) -{ - if (op_ring_buffer_read) - ring_buffer_free(op_ring_buffer_read); - op_ring_buffer_read = NULL; - if (op_ring_buffer_write) - ring_buffer_free(op_ring_buffer_write); - op_ring_buffer_write = NULL; -} - unsigned long oprofile_get_cpu_buffer_size(void) { return oprofile_cpu_buffer_size; @@ -77,6 +67,16 @@ void oprofile_cpu_buffer_inc_smpl_lost(void) cpu_buf->sample_lost_overflow++; } +void free_cpu_buffers(void) +{ + if (op_ring_buffer_read) + ring_buffer_free(op_ring_buffer_read); + op_ring_buffer_read = NULL; + if (op_ring_buffer_write) + ring_buffer_free(op_ring_buffer_write); + op_ring_buffer_write = NULL; +} + int alloc_cpu_buffers(void) { int i; From d45d23bed4bf7b25b7dcc336552a251db1aa1279 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Tue, 16 Dec 2008 12:00:10 +0100 Subject: [PATCH 06/31] oprofile: add inline function __oprofile_add_ext_sample() This patch adds the inline function __oprofile_add_ext_sample() to cpu_buffer.c and thus reduces overhead when calling oprofile_add_sample(). Signed-off-by: Robert Richter --- drivers/oprofile/cpu_buffer.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/oprofile/cpu_buffer.c b/drivers/oprofile/cpu_buffer.c index b353b19bd786..9e66c384e016 100644 --- a/drivers/oprofile/cpu_buffer.c +++ b/drivers/oprofile/cpu_buffer.c @@ -277,8 +277,9 @@ static void oprofile_end_trace(struct oprofile_cpu_buffer *cpu_buf) cpu_buf->tracing = 0; } -void oprofile_add_ext_sample(unsigned long pc, struct pt_regs * const regs, - unsigned long event, int is_kernel) +static inline void +__oprofile_add_ext_sample(unsigned long pc, struct pt_regs * const regs, + unsigned long event, int is_kernel) { struct oprofile_cpu_buffer *cpu_buf = &__get_cpu_var(cpu_buffer); @@ -299,12 +300,18 @@ void oprofile_add_ext_sample(unsigned long pc, struct pt_regs * const regs, oprofile_end_trace(cpu_buf); } +void oprofile_add_ext_sample(unsigned long pc, struct pt_regs * const regs, + unsigned long event, int is_kernel) +{ + __oprofile_add_ext_sample(pc, regs, event, is_kernel); +} + void oprofile_add_sample(struct pt_regs * const regs, unsigned long event) { int is_kernel = !user_mode(regs); unsigned long pc = profile_pc(regs); - oprofile_add_ext_sample(pc, regs, event, is_kernel); + __oprofile_add_ext_sample(pc, regs, event, is_kernel); } #ifdef CONFIG_OPROFILE_IBS From 9741b309bb4493eedd3cdb5c97b566338a0da2cc Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Thu, 18 Dec 2008 19:44:20 +0100 Subject: [PATCH 07/31] oprofile: simplify add_sample() This patch removes add_us_sample() and simplifies add_sample(). Code is much more readable now. Signed-off-by: Robert Richter --- drivers/oprofile/buffer_sync.c | 39 +++++++++++++++++----------------- 1 file changed, 19 insertions(+), 20 deletions(-) diff --git a/drivers/oprofile/buffer_sync.c b/drivers/oprofile/buffer_sync.c index d295d92b57f0..0abe29e7e4c7 100644 --- a/drivers/oprofile/buffer_sync.c +++ b/drivers/oprofile/buffer_sync.c @@ -392,11 +392,29 @@ static void add_sample_entry(unsigned long offset, unsigned long event) } -static int add_us_sample(struct mm_struct *mm, struct op_sample *s) +/* + * Add a sample to the global event buffer. If possible the + * sample is converted into a persistent dentry/offset pair + * for later lookup from userspace. Return 0 on failure. + */ +static int +add_sample(struct mm_struct *mm, struct op_sample *s, int in_kernel) { unsigned long cookie; off_t offset; + if (in_kernel) { + add_sample_entry(s->eip, s->event); + return 1; + } + + /* add userspace sample */ + + if (!mm) { + atomic_inc(&oprofile_stats.sample_lost_no_mm); + return 0; + } + cookie = lookup_dcookie(mm, s->eip, &offset); if (cookie == INVALID_COOKIE) { @@ -415,25 +433,6 @@ static int add_us_sample(struct mm_struct *mm, struct op_sample *s) } -/* Add a sample to the global event buffer. If possible the - * sample is converted into a persistent dentry/offset pair - * for later lookup from userspace. - */ -static int -add_sample(struct mm_struct *mm, struct op_sample *s, int in_kernel) -{ - if (in_kernel) { - add_sample_entry(s->eip, s->event); - return 1; - } else if (mm) { - return add_us_sample(mm, s); - } else { - atomic_inc(&oprofile_stats.sample_lost_no_mm); - } - return 0; -} - - static void release_mm(struct mm_struct *mm) { if (!mm) From 317f33bce6d43367a2fd170bc87ba18a88d2621d Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Thu, 18 Dec 2008 19:44:20 +0100 Subject: [PATCH 08/31] oprofile: simplify sync_buffer() Make code more readable. No functional changes. Signed-off-by: Robert Richter --- drivers/oprofile/buffer_sync.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/drivers/oprofile/buffer_sync.c b/drivers/oprofile/buffer_sync.c index 0abe29e7e4c7..22cdb5108360 100644 --- a/drivers/oprofile/buffer_sync.c +++ b/drivers/oprofile/buffer_sync.c @@ -579,12 +579,20 @@ void sync_buffer(int cpu) add_user_ctx_switch(new, cookie); break; } - } else if (state >= sb_bt_start && - !add_sample(mm, s, in_kernel)) { - if (state == sb_bt_start) { - state = sb_bt_ignore; - atomic_inc(&oprofile_stats.bt_lost_no_mapping); - } + continue; + } + + if (state < sb_bt_start) + /* ignore sample */ + continue; + + if (add_sample(mm, s, in_kernel)) + continue; + + /* ignore backtraces if failed to add a sample */ + if (state == sb_bt_start) { + state = sb_bt_ignore; + atomic_inc(&oprofile_stats.bt_lost_no_mapping); } } release_mm(mm); From 6352d92dec0c4b833c12a169e86762c05d0396f3 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Thu, 18 Dec 2008 22:09:13 +0100 Subject: [PATCH 09/31] oprofile: simplify oprofile_begin_trace() This patch removes the unused return parameter in oprofile_begin_trace(). Also, oprofile_begin_trace() and oprofile_end_trace() are inline now. Signed-off-by: Robert Richter --- drivers/oprofile/cpu_buffer.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/oprofile/cpu_buffer.c b/drivers/oprofile/cpu_buffer.c index 9e66c384e016..435bd6e08d5b 100644 --- a/drivers/oprofile/cpu_buffer.c +++ b/drivers/oprofile/cpu_buffer.c @@ -265,14 +265,13 @@ fail: return 0; } -static int oprofile_begin_trace(struct oprofile_cpu_buffer *cpu_buf) +static inline void oprofile_begin_trace(struct oprofile_cpu_buffer *cpu_buf) { add_code(cpu_buf, CPU_TRACE_BEGIN); cpu_buf->tracing = 1; - return 1; } -static void oprofile_end_trace(struct oprofile_cpu_buffer *cpu_buf) +static inline void oprofile_end_trace(struct oprofile_cpu_buffer *cpu_buf) { cpu_buf->tracing = 0; } @@ -288,8 +287,7 @@ __oprofile_add_ext_sample(unsigned long pc, struct pt_regs * const regs, return; } - if (!oprofile_begin_trace(cpu_buf)) - return; + oprofile_begin_trace(cpu_buf); /* * if log_sample() fail we can't backtrace since we lost the @@ -297,6 +295,7 @@ __oprofile_add_ext_sample(unsigned long pc, struct pt_regs * const regs, */ if (log_sample(cpu_buf, pc, is_kernel, event)) oprofile_ops.backtrace(regs, oprofile_backtrace_depth); + oprofile_end_trace(cpu_buf); } From 3967e93e063d7ee608f465cbccb65abb518e9d33 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Tue, 30 Dec 2008 05:10:58 +0100 Subject: [PATCH 10/31] oprofile: simplify add_sample() in cpu_buffer.c Signed-off-by: Robert Richter --- drivers/oprofile/cpu_buffer.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/oprofile/cpu_buffer.c b/drivers/oprofile/cpu_buffer.c index 435bd6e08d5b..d92f0020502e 100644 --- a/drivers/oprofile/cpu_buffer.c +++ b/drivers/oprofile/cpu_buffer.c @@ -203,11 +203,7 @@ add_sample(struct oprofile_cpu_buffer *cpu_buf, entry.sample->eip = pc; entry.sample->event = event; - ret = op_cpu_buffer_write_commit(&entry); - if (ret) - return ret; - - return 0; + return op_cpu_buffer_write_commit(&entry); } static inline int From dbe6e2835e32461e7d592077947081c32f3da1d5 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Tue, 16 Dec 2008 11:01:18 +0100 Subject: [PATCH 11/31] oprofile: simplify add_ibs_begin() Signed-off-by: Robert Richter --- drivers/oprofile/buffer_sync.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/oprofile/buffer_sync.c b/drivers/oprofile/buffer_sync.c index 22cdb5108360..7415d2e6b3a1 100644 --- a/drivers/oprofile/buffer_sync.c +++ b/drivers/oprofile/buffer_sync.c @@ -333,7 +333,7 @@ static void add_ibs_begin(int cpu, int code, struct mm_struct *mm) sample = op_cpu_buffer_read_entry(cpu); if (!sample) - goto Error; + return; rip = sample->eip; #ifdef __LP64__ @@ -372,15 +372,12 @@ static void add_ibs_begin(int cpu, int code, struct mm_struct *mm) for (i = 0; i < count; i++) { sample = op_cpu_buffer_read_entry(cpu); if (!sample) - goto Error; + return; add_event_entry(sample->eip); add_event_entry(sample->event); } return; - -Error: - return; } #endif From 8d15df84a42b140a8262a325b987a283ef9f5f63 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Wed, 24 Dec 2008 15:42:58 +0100 Subject: [PATCH 12/31] oprofile: remove unused components in struct oprofile_cpu_buffer Signed-off-by: Robert Richter --- drivers/oprofile/cpu_buffer.c | 2 -- drivers/oprofile/cpu_buffer.h | 2 -- 2 files changed, 4 deletions(-) diff --git a/drivers/oprofile/cpu_buffer.c b/drivers/oprofile/cpu_buffer.c index d92f0020502e..b426ae8ad2e2 100644 --- a/drivers/oprofile/cpu_buffer.c +++ b/drivers/oprofile/cpu_buffer.c @@ -97,8 +97,6 @@ int alloc_cpu_buffers(void) b->last_is_kernel = -1; b->tracing = 0; b->buffer_size = buffer_size; - b->tail_pos = 0; - b->head_pos = 0; b->sample_received = 0; b->sample_lost_overflow = 0; b->backtrace_aborted = 0; diff --git a/drivers/oprofile/cpu_buffer.h b/drivers/oprofile/cpu_buffer.h index cd28abc06960..65b763ad72da 100644 --- a/drivers/oprofile/cpu_buffer.h +++ b/drivers/oprofile/cpu_buffer.h @@ -40,8 +40,6 @@ struct op_entry { }; struct oprofile_cpu_buffer { - volatile unsigned long head_pos; - volatile unsigned long tail_pos; unsigned long buffer_size; struct task_struct *last_task; int last_is_kernel; From f4ff2364417f0092e49f6a3aa474549a56697f2d Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Mon, 5 Jan 2009 11:27:52 +0100 Subject: [PATCH 13/31] oprofile: remove unused ibs macro Signed-off-by: Robert Richter --- drivers/oprofile/cpu_buffer.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/oprofile/cpu_buffer.c b/drivers/oprofile/cpu_buffer.c index b426ae8ad2e2..8ae37c9d0ec4 100644 --- a/drivers/oprofile/cpu_buffer.c +++ b/drivers/oprofile/cpu_buffer.c @@ -309,8 +309,6 @@ void oprofile_add_sample(struct pt_regs * const regs, unsigned long event) #ifdef CONFIG_OPROFILE_IBS -#define MAX_IBS_SAMPLE_SIZE 14 - void oprofile_add_ibs_sample(struct pt_regs * const regs, unsigned int * const ibs_sample, int ibs_code) { From 8350c78734e67ac1f8bfd4eb14b70ff4d01a9a12 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Fri, 19 Dec 2008 12:59:28 +0100 Subject: [PATCH 14/31] oprofile: remove backtrace code for ibs This code is broken since a TRACE_BEGIN_CODE is never sent to the daemon. The data becomes corrupt since the backtrace is interpreted as ibs sample. Signed-off-by: Robert Richter --- drivers/oprofile/buffer_sync.c | 2 -- drivers/oprofile/cpu_buffer.c | 10 ++-------- 2 files changed, 2 insertions(+), 10 deletions(-) diff --git a/drivers/oprofile/buffer_sync.c b/drivers/oprofile/buffer_sync.c index 7415d2e6b3a1..e61e25fda1ad 100644 --- a/drivers/oprofile/buffer_sync.c +++ b/drivers/oprofile/buffer_sync.c @@ -557,11 +557,9 @@ void sync_buffer(int cpu) break; #ifdef CONFIG_OPROFILE_IBS case IBS_FETCH_BEGIN: - state = sb_bt_start; add_ibs_begin(cpu, IBS_FETCH_CODE, mm); break; case IBS_OP_BEGIN: - state = sb_bt_start; add_ibs_begin(cpu, IBS_OP_CODE, mm); break; #endif diff --git a/drivers/oprofile/cpu_buffer.c b/drivers/oprofile/cpu_buffer.c index 8ae37c9d0ec4..92bf8c0d86fe 100644 --- a/drivers/oprofile/cpu_buffer.c +++ b/drivers/oprofile/cpu_buffer.c @@ -347,17 +347,11 @@ void oprofile_add_ibs_sample(struct pt_regs * const regs, fail = fail || add_sample(cpu_buf, ibs_sample[10], ibs_sample[11]); } - if (fail) - goto fail; - - if (oprofile_backtrace_depth) - oprofile_ops.backtrace(regs, oprofile_backtrace_depth); - - return; + if (!fail) + return; fail: cpu_buf->sample_lost_overflow++; - return; } #endif From 6368a1f4d99fe9a1990ef3f04ab2d2ce9dad0a7c Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Mon, 29 Dec 2008 18:44:21 +0100 Subject: [PATCH 15/31] oprofile: making add_sample_entry() inline Signed-off-by: Robert Richter --- drivers/oprofile/buffer_sync.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/oprofile/buffer_sync.c b/drivers/oprofile/buffer_sync.c index e61e25fda1ad..bf8fcc7163da 100644 --- a/drivers/oprofile/buffer_sync.c +++ b/drivers/oprofile/buffer_sync.c @@ -382,7 +382,7 @@ static void add_ibs_begin(int cpu, int code, struct mm_struct *mm) #endif -static void add_sample_entry(unsigned long offset, unsigned long event) +static inline void add_sample_entry(unsigned long offset, unsigned long event) { add_event_entry(offset); add_event_entry(event); From fc81be8ca29e28bfb89aa23359036a8ad4118d0f Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Thu, 18 Dec 2008 00:28:27 +0100 Subject: [PATCH 16/31] oprofile: rename variable ibs_allowed to has_ibs in op_model_amd.c This patch renames ibs_allowed to has_ibs. Varible name fits better now. Signed-off-by: Robert Richter --- arch/x86/oprofile/op_model_amd.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c index c5b5c7fb3ced..423a95438cbc 100644 --- a/arch/x86/oprofile/op_model_amd.c +++ b/arch/x86/oprofile/op_model_amd.c @@ -106,7 +106,7 @@ struct ibs_op_sample { unsigned int ibs_dc_phys_high; }; -static int ibs_allowed; /* AMD Family10h and later */ +static int has_ibs; /* AMD Family10h and later */ struct op_ibs_config { unsigned long op_enabled; @@ -201,7 +201,7 @@ op_amd_handle_ibs(struct pt_regs * const regs, struct ibs_fetch_sample ibs_fetch; struct ibs_op_sample ibs_op; - if (!ibs_allowed) + if (!has_ibs) return 1; if (ibs_config.fetch_enabled) { @@ -305,14 +305,14 @@ static void op_amd_start(struct op_msrs const * const msrs) } #ifdef CONFIG_OPROFILE_IBS - if (ibs_allowed && ibs_config.fetch_enabled) { + if (has_ibs && ibs_config.fetch_enabled) { low = (ibs_config.max_cnt_fetch >> 4) & 0xFFFF; high = ((ibs_config.rand_en & 0x1) << 25) /* bit 57 */ + IBS_FETCH_HIGH_ENABLE; wrmsr(MSR_AMD64_IBSFETCHCTL, low, high); } - if (ibs_allowed && ibs_config.op_enabled) { + if (has_ibs && ibs_config.op_enabled) { low = ((ibs_config.max_cnt_op >> 4) & 0xFFFF) + ((ibs_config.dispatched_ops & 0x1) << 19) /* bit 19 */ + IBS_OP_LOW_ENABLE; @@ -341,14 +341,14 @@ static void op_amd_stop(struct op_msrs const * const msrs) } #ifdef CONFIG_OPROFILE_IBS - if (ibs_allowed && ibs_config.fetch_enabled) { + if (has_ibs && ibs_config.fetch_enabled) { /* clear max count and enable */ low = 0; high = 0; wrmsr(MSR_AMD64_IBSFETCHCTL, low, high); } - if (ibs_allowed && ibs_config.op_enabled) { + if (has_ibs && ibs_config.op_enabled) { /* clear max count and enable */ low = 0; high = 0; @@ -437,20 +437,20 @@ static int init_ibs_nmi(void) /* uninitialize the APIC for the IBS interrupts if needed */ static void clear_ibs_nmi(void) { - if (ibs_allowed) + if (has_ibs) on_each_cpu(apic_clear_ibs_nmi_per_cpu, NULL, 1); } /* initialize the APIC for the IBS interrupts if available */ static void ibs_init(void) { - ibs_allowed = boot_cpu_has(X86_FEATURE_IBS); + has_ibs = boot_cpu_has(X86_FEATURE_IBS); - if (!ibs_allowed) + if (!has_ibs) return; if (init_ibs_nmi()) { - ibs_allowed = 0; + has_ibs = 0; return; } @@ -459,7 +459,7 @@ static void ibs_init(void) static void ibs_exit(void) { - if (!ibs_allowed) + if (!has_ibs) return; clear_ibs_nmi(); @@ -479,7 +479,7 @@ static int setup_ibs_files(struct super_block *sb, struct dentry *root) if (ret) return ret; - if (!ibs_allowed) + if (!has_ibs) return ret; /* model specific files */ From d0e233846dcef56ae78f6d8fd0e0cba85a2a1489 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Tue, 23 Dec 2008 04:03:05 +0100 Subject: [PATCH 17/31] oprofile: rename add_sample() in cpu_buffer.c Rename the fucntion to op_add_sample() since there is a collision with another one with the same name in buffer_sync.c. Signed-off-by: Robert Richter --- drivers/oprofile/cpu_buffer.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/oprofile/cpu_buffer.c b/drivers/oprofile/cpu_buffer.c index 92bf8c0d86fe..ac79f6676033 100644 --- a/drivers/oprofile/cpu_buffer.c +++ b/drivers/oprofile/cpu_buffer.c @@ -188,8 +188,8 @@ unsigned long op_cpu_buffer_entries(int cpu) } static inline int -add_sample(struct oprofile_cpu_buffer *cpu_buf, - unsigned long pc, unsigned long event) +op_add_sample(struct oprofile_cpu_buffer *cpu_buf, + unsigned long pc, unsigned long event) { struct op_entry entry; int ret; @@ -207,7 +207,7 @@ add_sample(struct oprofile_cpu_buffer *cpu_buf, static inline int add_code(struct oprofile_cpu_buffer *buffer, unsigned long value) { - return add_sample(buffer, ESCAPE_CODE, value); + return op_add_sample(buffer, ESCAPE_CODE, value); } /* This must be safe from any context. It's safe writing here @@ -249,7 +249,7 @@ static int log_sample(struct oprofile_cpu_buffer *cpu_buf, unsigned long pc, goto fail; } - if (add_sample(cpu_buf, pc, event)) + if (op_add_sample(cpu_buf, pc, event)) goto fail; return 1; @@ -337,14 +337,14 @@ void oprofile_add_ibs_sample(struct pt_regs * const regs, } fail = fail || add_code(cpu_buf, ibs_code); - fail = fail || add_sample(cpu_buf, ibs_sample[0], ibs_sample[1]); - fail = fail || add_sample(cpu_buf, ibs_sample[2], ibs_sample[3]); - fail = fail || add_sample(cpu_buf, ibs_sample[4], ibs_sample[5]); + fail = fail || op_add_sample(cpu_buf, ibs_sample[0], ibs_sample[1]); + fail = fail || op_add_sample(cpu_buf, ibs_sample[2], ibs_sample[3]); + fail = fail || op_add_sample(cpu_buf, ibs_sample[4], ibs_sample[5]); if (ibs_code == IBS_OP_BEGIN) { - fail = fail || add_sample(cpu_buf, ibs_sample[6], ibs_sample[7]); - fail = fail || add_sample(cpu_buf, ibs_sample[8], ibs_sample[9]); - fail = fail || add_sample(cpu_buf, ibs_sample[10], ibs_sample[11]); + fail = fail || op_add_sample(cpu_buf, ibs_sample[6], ibs_sample[7]); + fail = fail || op_add_sample(cpu_buf, ibs_sample[8], ibs_sample[9]); + fail = fail || op_add_sample(cpu_buf, ibs_sample[10], ibs_sample[11]); } if (!fail) @@ -376,7 +376,7 @@ void oprofile_add_trace(unsigned long pc) if (pc == ESCAPE_CODE) goto fail; - if (add_sample(cpu_buf, pc, 0)) + if (op_add_sample(cpu_buf, pc, 0)) goto fail; return; From d358e75fc40cc3bbab11654ba0a88b232c543d12 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Mon, 5 Jan 2009 13:14:04 +0100 Subject: [PATCH 18/31] oprofile: rename variables in add_ibs_begin() This unifies usage of variable names within oprofile. Signed-off-by: Robert Richter --- drivers/oprofile/buffer_sync.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/drivers/oprofile/buffer_sync.c b/drivers/oprofile/buffer_sync.c index bf8fcc7163da..21fd249b6e0b 100644 --- a/drivers/oprofile/buffer_sync.c +++ b/drivers/oprofile/buffer_sync.c @@ -325,36 +325,36 @@ static void add_trace_begin(void) */ static void add_ibs_begin(int cpu, int code, struct mm_struct *mm) { - unsigned long rip; + unsigned long pc; int i, count; - unsigned long ibs_cookie = 0; + unsigned long cookie = 0; off_t offset; struct op_sample *sample; sample = op_cpu_buffer_read_entry(cpu); if (!sample) return; - rip = sample->eip; + pc = sample->eip; #ifdef __LP64__ - rip += sample->event << 32; + pc += sample->event << 32; #endif if (mm) { - ibs_cookie = lookup_dcookie(mm, rip, &offset); + cookie = lookup_dcookie(mm, pc, &offset); - if (ibs_cookie == NO_COOKIE) - offset = rip; - if (ibs_cookie == INVALID_COOKIE) { + if (cookie == NO_COOKIE) + offset = pc; + if (cookie == INVALID_COOKIE) { atomic_inc(&oprofile_stats.sample_lost_no_mapping); - offset = rip; + offset = pc; } - if (ibs_cookie != last_cookie) { - add_cookie_switch(ibs_cookie); - last_cookie = ibs_cookie; + if (cookie != last_cookie) { + add_cookie_switch(cookie); + last_cookie = cookie; } } else - offset = rip; + offset = pc; add_event_entry(ESCAPE_CODE); add_event_entry(code); From 2cc28b9f261dd28d69767a34682ce55a27d928ed Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Thu, 25 Dec 2008 17:26:07 +0100 Subject: [PATCH 19/31] oprofile: add op_cpu_buffer_write_reserve() This function prepares the cpu buffer to write a sample. Struct op_entry is used during operations on the ring buffer while struct op_sample contains the data that is stored in the ring buffer. Struct entry can be uninitialized. The function reserves a data array that is specified by size. Use op_cpu_buffer_write_commit() after preparing the sample. In case of errors a null pointer is returned, otherwise the pointer to the sample. Signed-off-by: Robert Richter --- drivers/oprofile/cpu_buffer.c | 43 ++++++++++++++++++++++++----------- drivers/oprofile/cpu_buffer.h | 9 ++++++-- 2 files changed, 37 insertions(+), 15 deletions(-) diff --git a/drivers/oprofile/cpu_buffer.c b/drivers/oprofile/cpu_buffer.c index ac79f6676033..934ff159e70e 100644 --- a/drivers/oprofile/cpu_buffer.c +++ b/drivers/oprofile/cpu_buffer.c @@ -1,11 +1,12 @@ /** * @file cpu_buffer.c * - * @remark Copyright 2002 OProfile authors + * @remark Copyright 2002-2009 OProfile authors * @remark Read the file COPYING * * @author John Levon * @author Barry Kasindorf + * @author Robert Richter * * Each CPU has a local buffer that stores PC value/event * pairs. We also log context switches when we notice them. @@ -143,20 +144,36 @@ void end_cpu_work(void) flush_scheduled_work(); } -int op_cpu_buffer_write_entry(struct op_entry *entry) +/* + * This function prepares the cpu buffer to write a sample. + * + * Struct op_entry is used during operations on the ring buffer while + * struct op_sample contains the data that is stored in the ring + * buffer. Struct entry can be uninitialized. The function reserves a + * data array that is specified by size. Use + * op_cpu_buffer_write_commit() after preparing the sample. In case of + * errors a null pointer is returned, otherwise the pointer to the + * sample. + * + */ +struct op_sample +*op_cpu_buffer_write_reserve(struct op_entry *entry, unsigned long size) { - entry->event = ring_buffer_lock_reserve(op_ring_buffer_write, - sizeof(struct op_sample), - &entry->irq_flags); + entry->event = ring_buffer_lock_reserve + (op_ring_buffer_write, sizeof(struct op_sample) + + size * sizeof(entry->sample->data[0]), &entry->irq_flags); if (entry->event) entry->sample = ring_buffer_event_data(entry->event); else entry->sample = NULL; if (!entry->sample) - return -ENOMEM; + return NULL; - return 0; + entry->size = size; + entry->data = entry->sample->data; + + return entry->sample; } int op_cpu_buffer_write_commit(struct op_entry *entry) @@ -192,14 +209,14 @@ op_add_sample(struct oprofile_cpu_buffer *cpu_buf, unsigned long pc, unsigned long event) { struct op_entry entry; - int ret; + struct op_sample *sample; - ret = op_cpu_buffer_write_entry(&entry); - if (ret) - return ret; + sample = op_cpu_buffer_write_reserve(&entry, 0); + if (!sample) + return -ENOMEM; - entry.sample->eip = pc; - entry.sample->event = event; + sample->eip = pc; + sample->event = event; return op_cpu_buffer_write_commit(&entry); } diff --git a/drivers/oprofile/cpu_buffer.h b/drivers/oprofile/cpu_buffer.h index 65b763ad72da..2d4bfdeb7fba 100644 --- a/drivers/oprofile/cpu_buffer.h +++ b/drivers/oprofile/cpu_buffer.h @@ -1,10 +1,11 @@ /** * @file cpu_buffer.h * - * @remark Copyright 2002 OProfile authors + * @remark Copyright 2002-2009 OProfile authors * @remark Read the file COPYING * * @author John Levon + * @author Robert Richter */ #ifndef OPROFILE_CPU_BUFFER_H @@ -31,12 +32,15 @@ void end_cpu_work(void); struct op_sample { unsigned long eip; unsigned long event; + unsigned long data[0]; }; struct op_entry { struct ring_buffer_event *event; struct op_sample *sample; unsigned long irq_flags; + unsigned long size; + unsigned long *data; }; struct oprofile_cpu_buffer { @@ -68,7 +72,8 @@ static inline void op_cpu_buffer_reset(int cpu) cpu_buf->last_task = NULL; } -int op_cpu_buffer_write_entry(struct op_entry *entry); +struct op_sample +*op_cpu_buffer_write_reserve(struct op_entry *entry, unsigned long size); int op_cpu_buffer_write_commit(struct op_entry *entry); struct op_sample *op_cpu_buffer_read_entry(int cpu); unsigned long op_cpu_buffer_entries(int cpu); From 2d87b14cf8d0b07720de26d90789d02124141616 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Tue, 30 Dec 2008 04:10:46 +0100 Subject: [PATCH 20/31] oprofile: modify op_cpu_buffer_read_entry() This implements the support of samples with attached data. Signed-off-by: Robert Richter --- drivers/oprofile/buffer_sync.c | 23 +++++++++++++---------- drivers/oprofile/cpu_buffer.c | 14 +++++++++++--- drivers/oprofile/cpu_buffer.h | 2 +- 3 files changed, 25 insertions(+), 14 deletions(-) diff --git a/drivers/oprofile/buffer_sync.c b/drivers/oprofile/buffer_sync.c index 21fd249b6e0b..908202afbae9 100644 --- a/drivers/oprofile/buffer_sync.c +++ b/drivers/oprofile/buffer_sync.c @@ -329,9 +329,10 @@ static void add_ibs_begin(int cpu, int code, struct mm_struct *mm) int i, count; unsigned long cookie = 0; off_t offset; + struct op_entry entry; struct op_sample *sample; - sample = op_cpu_buffer_read_entry(cpu); + sample = op_cpu_buffer_read_entry(&entry, cpu); if (!sample) return; pc = sample->eip; @@ -370,7 +371,7 @@ static void add_ibs_begin(int cpu, int code, struct mm_struct *mm) count = IBS_OP_CODE_SIZE; /*IBS OP is 5 int64s*/ for (i = 0; i < count; i++) { - sample = op_cpu_buffer_read_entry(cpu); + sample = op_cpu_buffer_read_entry(&entry, cpu); if (!sample) return; add_event_entry(sample->eip); @@ -528,6 +529,8 @@ void sync_buffer(int cpu) sync_buffer_state state = sb_buffer_start; unsigned int i; unsigned long available; + struct op_entry entry; + struct op_sample *sample; mutex_lock(&buffer_mutex); @@ -537,19 +540,19 @@ void sync_buffer(int cpu) available = op_cpu_buffer_entries(cpu); for (i = 0; i < available; ++i) { - struct op_sample *s = op_cpu_buffer_read_entry(cpu); - if (!s) + sample = op_cpu_buffer_read_entry(&entry, cpu); + if (!sample) break; - if (is_code(s->eip)) { - switch (s->event) { + if (is_code(sample->eip)) { + switch (sample->event) { case 0: case CPU_IS_KERNEL: /* kernel/userspace switch */ - in_kernel = s->event; + in_kernel = sample->event; if (state == sb_buffer_start) state = sb_sample_start; - add_kernel_ctx_switch(s->event); + add_kernel_ctx_switch(sample->event); break; case CPU_TRACE_BEGIN: state = sb_bt_start; @@ -566,7 +569,7 @@ void sync_buffer(int cpu) default: /* userspace context switch */ oldmm = mm; - new = (struct task_struct *)s->event; + new = (struct task_struct *)sample->event; release_mm(oldmm); mm = take_tasks_mm(new); if (mm != oldmm) @@ -581,7 +584,7 @@ void sync_buffer(int cpu) /* ignore sample */ continue; - if (add_sample(mm, s, in_kernel)) + if (add_sample(mm, sample, in_kernel)) continue; /* ignore backtraces if failed to add a sample */ diff --git a/drivers/oprofile/cpu_buffer.c b/drivers/oprofile/cpu_buffer.c index 934ff159e70e..400f7fcffdbe 100644 --- a/drivers/oprofile/cpu_buffer.c +++ b/drivers/oprofile/cpu_buffer.c @@ -182,20 +182,28 @@ int op_cpu_buffer_write_commit(struct op_entry *entry) entry->irq_flags); } -struct op_sample *op_cpu_buffer_read_entry(int cpu) +struct op_sample *op_cpu_buffer_read_entry(struct op_entry *entry, int cpu) { struct ring_buffer_event *e; e = ring_buffer_consume(op_ring_buffer_read, cpu, NULL); if (e) - return ring_buffer_event_data(e); + goto event; if (ring_buffer_swap_cpu(op_ring_buffer_read, op_ring_buffer_write, cpu)) return NULL; e = ring_buffer_consume(op_ring_buffer_read, cpu, NULL); if (e) - return ring_buffer_event_data(e); + goto event; return NULL; + +event: + entry->event = e; + entry->sample = ring_buffer_event_data(e); + entry->size = (ring_buffer_event_length(e) - sizeof(struct op_sample)) + / sizeof(entry->sample->data[0]); + entry->data = entry->sample->data; + return entry->sample; } unsigned long op_cpu_buffer_entries(int cpu) diff --git a/drivers/oprofile/cpu_buffer.h b/drivers/oprofile/cpu_buffer.h index 2d4bfdeb7fba..d7c0545ef8b2 100644 --- a/drivers/oprofile/cpu_buffer.h +++ b/drivers/oprofile/cpu_buffer.h @@ -75,7 +75,7 @@ static inline void op_cpu_buffer_reset(int cpu) struct op_sample *op_cpu_buffer_write_reserve(struct op_entry *entry, unsigned long size); int op_cpu_buffer_write_commit(struct op_entry *entry); -struct op_sample *op_cpu_buffer_read_entry(int cpu); +struct op_sample *op_cpu_buffer_read_entry(struct op_entry *entry, int cpu); unsigned long op_cpu_buffer_entries(int cpu); /* transient events for the CPU buffer -> event buffer */ From ae735e9964b4584923f2997d98a8d80ae9c1a75c Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Thu, 25 Dec 2008 17:26:07 +0100 Subject: [PATCH 21/31] oprofile: rework implementation of cpu buffer events Special events such as task or context switches are marked with an escape code in the cpu buffer followed by an event code or a task identifier. There is one escape code per event. To make escape sequences also available for data samples the internal cpu buffer format must be changed. The current implementation does not allow the extension of event codes since this would lead to collisions with the task identifiers. To avoid this, this patch introduces an event mask that allows the storage of multiple events with one escape code. Now, task identifiers are stored in the data section of the sample. The implementation also allows the usage of custom data in a sample. As a side effect the new code is much more readable and easier to understand. Signed-off-by: Robert Richter --- arch/x86/oprofile/op_model_amd.c | 8 +- drivers/oprofile/buffer_sync.c | 44 +++++----- drivers/oprofile/cpu_buffer.c | 139 +++++++++++++++++-------------- drivers/oprofile/cpu_buffer.h | 12 +-- 4 files changed, 107 insertions(+), 96 deletions(-) diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c index 423a95438cbc..f101724db80a 100644 --- a/arch/x86/oprofile/op_model_amd.c +++ b/arch/x86/oprofile/op_model_amd.c @@ -2,7 +2,7 @@ * @file op_model_amd.c * athlon / K7 / K8 / Family 10h model-specific MSR operations * - * @remark Copyright 2002-2008 OProfile authors + * @remark Copyright 2002-2009 OProfile authors * @remark Read the file COPYING * * @author John Levon @@ -10,7 +10,7 @@ * @author Graydon Hoare * @author Robert Richter * @author Barry Kasindorf -*/ + */ #include #include @@ -62,8 +62,8 @@ static unsigned long reset_value[NUM_COUNTERS]; /* Codes used in cpu_buffer.c */ /* This produces duplicate code, need to be fixed */ -#define IBS_FETCH_BEGIN 3 -#define IBS_OP_BEGIN 4 +#define IBS_FETCH_BEGIN (1UL << 4) +#define IBS_OP_BEGIN (1UL << 5) /* * The function interface needs to be fixed, something like add diff --git a/drivers/oprofile/buffer_sync.c b/drivers/oprofile/buffer_sync.c index 908202afbae9..d969bb13a252 100644 --- a/drivers/oprofile/buffer_sync.c +++ b/drivers/oprofile/buffer_sync.c @@ -1,11 +1,12 @@ /** * @file buffer_sync.c * - * @remark Copyright 2002 OProfile authors + * @remark Copyright 2002-2009 OProfile authors * @remark Read the file COPYING * * @author John Levon * @author Barry Kasindorf + * @author Robert Richter * * This is the core of the buffer management. Each * CPU buffer is processed and entered into the @@ -529,6 +530,7 @@ void sync_buffer(int cpu) sync_buffer_state state = sb_buffer_start; unsigned int i; unsigned long available; + unsigned long flags; struct op_entry entry; struct op_sample *sample; @@ -545,38 +547,34 @@ void sync_buffer(int cpu) break; if (is_code(sample->eip)) { - switch (sample->event) { - case 0: - case CPU_IS_KERNEL: - /* kernel/userspace switch */ - in_kernel = sample->event; - if (state == sb_buffer_start) - state = sb_sample_start; - add_kernel_ctx_switch(sample->event); - break; - case CPU_TRACE_BEGIN: + flags = sample->event; + if (flags & TRACE_BEGIN) { state = sb_bt_start; add_trace_begin(); - break; -#ifdef CONFIG_OPROFILE_IBS - case IBS_FETCH_BEGIN: - add_ibs_begin(cpu, IBS_FETCH_CODE, mm); - break; - case IBS_OP_BEGIN: - add_ibs_begin(cpu, IBS_OP_CODE, mm); - break; -#endif - default: + } + if (flags & KERNEL_CTX_SWITCH) { + /* kernel/userspace switch */ + in_kernel = flags & IS_KERNEL; + if (state == sb_buffer_start) + state = sb_sample_start; + add_kernel_ctx_switch(flags & IS_KERNEL); + } + if (flags & USER_CTX_SWITCH) { /* userspace context switch */ oldmm = mm; - new = (struct task_struct *)sample->event; + new = (struct task_struct *)sample->data[0]; release_mm(oldmm); mm = take_tasks_mm(new); if (mm != oldmm) cookie = get_exec_dcookie(mm); add_user_ctx_switch(new, cookie); - break; } +#ifdef CONFIG_OPROFILE_IBS + if (flags & IBS_FETCH_BEGIN) + add_ibs_begin(cpu, IBS_FETCH_CODE, mm); + if (flags & IBS_OP_BEGIN) + add_ibs_begin(cpu, IBS_OP_CODE, mm); +#endif continue; } diff --git a/drivers/oprofile/cpu_buffer.c b/drivers/oprofile/cpu_buffer.c index 400f7fcffdbe..e859d23cfc57 100644 --- a/drivers/oprofile/cpu_buffer.c +++ b/drivers/oprofile/cpu_buffer.c @@ -212,6 +212,59 @@ unsigned long op_cpu_buffer_entries(int cpu) + ring_buffer_entries_cpu(op_ring_buffer_write, cpu); } +static int +op_add_code(struct oprofile_cpu_buffer *cpu_buf, unsigned long backtrace, + int is_kernel, struct task_struct *task) +{ + struct op_entry entry; + struct op_sample *sample; + unsigned long flags; + int size; + + flags = 0; + + if (backtrace) + flags |= TRACE_BEGIN; + + /* notice a switch from user->kernel or vice versa */ + is_kernel = !!is_kernel; + if (cpu_buf->last_is_kernel != is_kernel) { + cpu_buf->last_is_kernel = is_kernel; + flags |= KERNEL_CTX_SWITCH; + if (is_kernel) + flags |= IS_KERNEL; + } + + /* notice a task switch */ + if (cpu_buf->last_task != task) { + cpu_buf->last_task = task; + flags |= USER_CTX_SWITCH; + } + + if (!flags) + /* nothing to do */ + return 0; + + if (flags & USER_CTX_SWITCH) + size = 1; + else + size = 0; + + sample = op_cpu_buffer_write_reserve(&entry, size); + if (!sample) + return -ENOMEM; + + sample->eip = ESCAPE_CODE; + sample->event = flags; + + if (size) + sample->data[0] = (unsigned long)task; + + op_cpu_buffer_write_commit(&entry); + + return 0; +} + static inline int op_add_sample(struct oprofile_cpu_buffer *cpu_buf, unsigned long pc, unsigned long event) @@ -229,26 +282,18 @@ op_add_sample(struct oprofile_cpu_buffer *cpu_buf, return op_cpu_buffer_write_commit(&entry); } -static inline int -add_code(struct oprofile_cpu_buffer *buffer, unsigned long value) -{ - return op_add_sample(buffer, ESCAPE_CODE, value); -} - -/* This must be safe from any context. It's safe writing here - * because of the head/tail separation of the writer and reader - * of the CPU buffer. +/* + * This must be safe from any context. * * is_kernel is needed because on some architectures you cannot * tell if you are in kernel or user space simply by looking at * pc. We tag this in the buffer by generating kernel enter/exit * events whenever is_kernel changes */ -static int log_sample(struct oprofile_cpu_buffer *cpu_buf, unsigned long pc, - int is_kernel, unsigned long event) +static int +log_sample(struct oprofile_cpu_buffer *cpu_buf, unsigned long pc, + unsigned long backtrace, int is_kernel, unsigned long event) { - struct task_struct *task; - cpu_buf->sample_received++; if (pc == ESCAPE_CODE) { @@ -256,23 +301,8 @@ static int log_sample(struct oprofile_cpu_buffer *cpu_buf, unsigned long pc, return 0; } - is_kernel = !!is_kernel; - - task = current; - - /* notice a switch from user->kernel or vice versa */ - if (cpu_buf->last_is_kernel != is_kernel) { - cpu_buf->last_is_kernel = is_kernel; - if (add_code(cpu_buf, is_kernel)) - goto fail; - } - - /* notice a task switch */ - if (cpu_buf->last_task != task) { - cpu_buf->last_task = task; - if (add_code(cpu_buf, (unsigned long)task)) - goto fail; - } + if (op_add_code(cpu_buf, backtrace, is_kernel, current)) + goto fail; if (op_add_sample(cpu_buf, pc, event)) goto fail; @@ -286,7 +316,6 @@ fail: static inline void oprofile_begin_trace(struct oprofile_cpu_buffer *cpu_buf) { - add_code(cpu_buf, CPU_TRACE_BEGIN); cpu_buf->tracing = 1; } @@ -300,21 +329,21 @@ __oprofile_add_ext_sample(unsigned long pc, struct pt_regs * const regs, unsigned long event, int is_kernel) { struct oprofile_cpu_buffer *cpu_buf = &__get_cpu_var(cpu_buffer); - - if (!oprofile_backtrace_depth) { - log_sample(cpu_buf, pc, is_kernel, event); - return; - } - - oprofile_begin_trace(cpu_buf); + unsigned long backtrace = oprofile_backtrace_depth; /* * if log_sample() fail we can't backtrace since we lost the * source of this event */ - if (log_sample(cpu_buf, pc, is_kernel, event)) - oprofile_ops.backtrace(regs, oprofile_backtrace_depth); + if (!log_sample(cpu_buf, pc, backtrace, is_kernel, event)) + /* failed */ + return; + if (!backtrace) + return; + + oprofile_begin_trace(cpu_buf); + oprofile_ops.backtrace(regs, backtrace); oprofile_end_trace(cpu_buf); } @@ -339,29 +368,14 @@ void oprofile_add_ibs_sample(struct pt_regs * const regs, { int is_kernel = !user_mode(regs); struct oprofile_cpu_buffer *cpu_buf = &__get_cpu_var(cpu_buffer); - struct task_struct *task; int fail = 0; cpu_buf->sample_received++; - /* notice a switch from user->kernel or vice versa */ - if (cpu_buf->last_is_kernel != is_kernel) { - if (add_code(cpu_buf, is_kernel)) - goto fail; - cpu_buf->last_is_kernel = is_kernel; - } + /* backtraces disabled for ibs */ + fail = fail || op_add_code(cpu_buf, 0, is_kernel, current); - /* notice a task switch */ - if (!is_kernel) { - task = current; - if (cpu_buf->last_task != task) { - if (add_code(cpu_buf, (unsigned long)task)) - goto fail; - cpu_buf->last_task = task; - } - } - - fail = fail || add_code(cpu_buf, ibs_code); + fail = fail || op_add_sample(cpu_buf, ESCAPE_CODE, ibs_code); fail = fail || op_add_sample(cpu_buf, ibs_sample[0], ibs_sample[1]); fail = fail || op_add_sample(cpu_buf, ibs_sample[2], ibs_sample[3]); fail = fail || op_add_sample(cpu_buf, ibs_sample[4], ibs_sample[5]); @@ -372,11 +386,8 @@ void oprofile_add_ibs_sample(struct pt_regs * const regs, fail = fail || op_add_sample(cpu_buf, ibs_sample[10], ibs_sample[11]); } - if (!fail) - return; - -fail: - cpu_buf->sample_lost_overflow++; + if (fail) + cpu_buf->sample_lost_overflow++; } #endif @@ -384,7 +395,7 @@ fail: void oprofile_add_pc(unsigned long pc, int is_kernel, unsigned long event) { struct oprofile_cpu_buffer *cpu_buf = &__get_cpu_var(cpu_buffer); - log_sample(cpu_buf, pc, is_kernel, event); + log_sample(cpu_buf, pc, 0, is_kernel, event); } void oprofile_add_trace(unsigned long pc) diff --git a/drivers/oprofile/cpu_buffer.h b/drivers/oprofile/cpu_buffer.h index d7c0545ef8b2..e634dcf2f26f 100644 --- a/drivers/oprofile/cpu_buffer.h +++ b/drivers/oprofile/cpu_buffer.h @@ -78,10 +78,12 @@ int op_cpu_buffer_write_commit(struct op_entry *entry); struct op_sample *op_cpu_buffer_read_entry(struct op_entry *entry, int cpu); unsigned long op_cpu_buffer_entries(int cpu); -/* transient events for the CPU buffer -> event buffer */ -#define CPU_IS_KERNEL 1 -#define CPU_TRACE_BEGIN 2 -#define IBS_FETCH_BEGIN 3 -#define IBS_OP_BEGIN 4 +/* extra data flags */ +#define KERNEL_CTX_SWITCH (1UL << 0) +#define IS_KERNEL (1UL << 1) +#define TRACE_BEGIN (1UL << 2) +#define USER_CTX_SWITCH (1UL << 3) +#define IBS_FETCH_BEGIN (1UL << 4) +#define IBS_OP_BEGIN (1UL << 5) #endif /* OPROFILE_CPU_BUFFER_H */ From d9928c25a6960cf128c2078a89fe6f8e0180ff60 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Thu, 25 Dec 2008 17:26:07 +0100 Subject: [PATCH 22/31] oprofile: add op_cpu_buffer_add_data() This function can be used to attach data to a sample. It returns the remaining free buffer size that has been reserved with op_cpu_buffer_write_reserve(). Signed-off-by: Robert Richter --- drivers/oprofile/cpu_buffer.c | 2 +- drivers/oprofile/cpu_buffer.h | 12 ++++++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/oprofile/cpu_buffer.c b/drivers/oprofile/cpu_buffer.c index e859d23cfc57..1b6590746be4 100644 --- a/drivers/oprofile/cpu_buffer.c +++ b/drivers/oprofile/cpu_buffer.c @@ -258,7 +258,7 @@ op_add_code(struct oprofile_cpu_buffer *cpu_buf, unsigned long backtrace, sample->event = flags; if (size) - sample->data[0] = (unsigned long)task; + op_cpu_buffer_add_data(&entry, (unsigned long)task); op_cpu_buffer_write_commit(&entry); diff --git a/drivers/oprofile/cpu_buffer.h b/drivers/oprofile/cpu_buffer.h index e634dcf2f26f..e178dd2799c4 100644 --- a/drivers/oprofile/cpu_buffer.h +++ b/drivers/oprofile/cpu_buffer.h @@ -78,6 +78,18 @@ int op_cpu_buffer_write_commit(struct op_entry *entry); struct op_sample *op_cpu_buffer_read_entry(struct op_entry *entry, int cpu); unsigned long op_cpu_buffer_entries(int cpu); +/* returns the remaining free size of data in the entry */ +static inline +int op_cpu_buffer_add_data(struct op_entry *entry, unsigned long val) +{ + if (!entry->size) + return 0; + *entry->data = val; + entry->size--; + entry->data++; + return entry->size; +} + /* extra data flags */ #define KERNEL_CTX_SWITCH (1UL << 0) #define IS_KERNEL (1UL << 1) From bd7dc46f770d317ada1348294ff1f319243b803b Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Tue, 6 Jan 2009 03:56:50 +0100 Subject: [PATCH 23/31] oprofile: add op_cpu_buffer_get_data() This function provides access to attached data of a sample. It returns the size of data including the current value. Also, op_cpu_buffer_get_size() is available to check if there is data attached. Signed-off-by: Robert Richter --- drivers/oprofile/buffer_sync.c | 6 ++++-- drivers/oprofile/cpu_buffer.h | 20 ++++++++++++++++++++ 2 files changed, 24 insertions(+), 2 deletions(-) diff --git a/drivers/oprofile/buffer_sync.c b/drivers/oprofile/buffer_sync.c index d969bb13a252..f9031d31eeb7 100644 --- a/drivers/oprofile/buffer_sync.c +++ b/drivers/oprofile/buffer_sync.c @@ -524,6 +524,7 @@ void sync_buffer(int cpu) { struct mm_struct *mm = NULL; struct mm_struct *oldmm; + unsigned long val; struct task_struct *new; unsigned long cookie = 0; int in_kernel = 1; @@ -559,10 +560,11 @@ void sync_buffer(int cpu) state = sb_sample_start; add_kernel_ctx_switch(flags & IS_KERNEL); } - if (flags & USER_CTX_SWITCH) { + if (flags & USER_CTX_SWITCH + && op_cpu_buffer_get_data(&entry, &val)) { /* userspace context switch */ + new = (struct task_struct *)val; oldmm = mm; - new = (struct task_struct *)sample->data[0]; release_mm(oldmm); mm = take_tasks_mm(new); if (mm != oldmm) diff --git a/drivers/oprofile/cpu_buffer.h b/drivers/oprofile/cpu_buffer.h index e178dd2799c4..f34376046573 100644 --- a/drivers/oprofile/cpu_buffer.h +++ b/drivers/oprofile/cpu_buffer.h @@ -90,6 +90,26 @@ int op_cpu_buffer_add_data(struct op_entry *entry, unsigned long val) return entry->size; } +/* returns the size of data in the entry */ +static inline +int op_cpu_buffer_get_size(struct op_entry *entry) +{ + return entry->size; +} + +/* returns 0 if empty or the size of data including the current value */ +static inline +int op_cpu_buffer_get_data(struct op_entry *entry, unsigned long *val) +{ + int size = entry->size; + if (!size) + return 0; + *val = *entry->data; + entry->size--; + entry->data++; + return size; +} + /* extra data flags */ #define KERNEL_CTX_SWITCH (1UL << 0) #define IS_KERNEL (1UL << 1) From 1acda878e20ea0cd3708ba66dca67d52eaafdd2b Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Mon, 5 Jan 2009 10:35:31 +0100 Subject: [PATCH 24/31] oprofile: use new data sample format for ibs The new ring buffer implementation allows the storage of samples with different size. This patch implements the usage of the new sample format to store ibs samples in the cpu buffer. Until now, writing to the cpu buffer could lead to incomplete sampling sequences since IBS samples were transfered in multiple samples. Due to a full buffer, data could be lost at any time. This can't happen any more since the complete data is reserved in advance and then stored in a single sample. Signed-off-by: Robert Richter --- arch/x86/oprofile/op_model_amd.c | 119 +++++++++++-------------------- drivers/oprofile/buffer_sync.c | 53 ++++---------- drivers/oprofile/cpu_buffer.c | 39 +++++----- drivers/oprofile/cpu_buffer.h | 2 - 4 files changed, 76 insertions(+), 137 deletions(-) diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c index f101724db80a..cf310aeb462c 100644 --- a/arch/x86/oprofile/op_model_amd.c +++ b/arch/x86/oprofile/op_model_amd.c @@ -22,6 +22,7 @@ #include "op_x86_model.h" #include "op_counter.h" +#include "../../../drivers/oprofile/cpu_buffer.h" #define NUM_COUNTERS 4 #define NUM_CONTROLS 4 @@ -60,51 +61,16 @@ static unsigned long reset_value[NUM_COUNTERS]; #define IBS_OP_LOW_VALID_BIT (1ULL<<18) /* bit 18 */ #define IBS_OP_LOW_ENABLE (1ULL<<17) /* bit 17 */ -/* Codes used in cpu_buffer.c */ -/* This produces duplicate code, need to be fixed */ -#define IBS_FETCH_BEGIN (1UL << 4) -#define IBS_OP_BEGIN (1UL << 5) - /* * The function interface needs to be fixed, something like add * data. Should then be added to linux/oprofile.h. */ -extern void -oprofile_add_ibs_sample(struct pt_regs * const regs, - unsigned int * const ibs_sample, int ibs_code); +extern +void oprofile_add_data(struct op_entry *entry, struct pt_regs * const regs, + unsigned long pc, int code, int size); -struct ibs_fetch_sample { - /* MSRC001_1031 IBS Fetch Linear Address Register */ - unsigned int ibs_fetch_lin_addr_low; - unsigned int ibs_fetch_lin_addr_high; - /* MSRC001_1030 IBS Fetch Control Register */ - unsigned int ibs_fetch_ctl_low; - unsigned int ibs_fetch_ctl_high; - /* MSRC001_1032 IBS Fetch Physical Address Register */ - unsigned int ibs_fetch_phys_addr_low; - unsigned int ibs_fetch_phys_addr_high; -}; - -struct ibs_op_sample { - /* MSRC001_1034 IBS Op Logical Address Register (IbsRIP) */ - unsigned int ibs_op_rip_low; - unsigned int ibs_op_rip_high; - /* MSRC001_1035 IBS Op Data Register */ - unsigned int ibs_op_data1_low; - unsigned int ibs_op_data1_high; - /* MSRC001_1036 IBS Op Data 2 Register */ - unsigned int ibs_op_data2_low; - unsigned int ibs_op_data2_high; - /* MSRC001_1037 IBS Op Data 3 Register */ - unsigned int ibs_op_data3_low; - unsigned int ibs_op_data3_high; - /* MSRC001_1038 IBS DC Linear Address Register (IbsDcLinAd) */ - unsigned int ibs_dc_linear_low; - unsigned int ibs_dc_linear_high; - /* MSRC001_1039 IBS DC Physical Address Register (IbsDcPhysAd) */ - unsigned int ibs_dc_phys_low; - unsigned int ibs_dc_phys_high; -}; +#define IBS_FETCH_SIZE 6 +#define IBS_OP_SIZE 12 static int has_ibs; /* AMD Family10h and later */ @@ -197,9 +163,9 @@ static inline int op_amd_handle_ibs(struct pt_regs * const regs, struct op_msrs const * const msrs) { - unsigned int low, high; - struct ibs_fetch_sample ibs_fetch; - struct ibs_op_sample ibs_op; + u32 low, high; + u64 msr; + struct op_entry entry; if (!has_ibs) return 1; @@ -207,21 +173,19 @@ op_amd_handle_ibs(struct pt_regs * const regs, if (ibs_config.fetch_enabled) { rdmsr(MSR_AMD64_IBSFETCHCTL, low, high); if (high & IBS_FETCH_HIGH_VALID_BIT) { - ibs_fetch.ibs_fetch_ctl_high = high; - ibs_fetch.ibs_fetch_ctl_low = low; - rdmsr(MSR_AMD64_IBSFETCHLINAD, low, high); - ibs_fetch.ibs_fetch_lin_addr_high = high; - ibs_fetch.ibs_fetch_lin_addr_low = low; - rdmsr(MSR_AMD64_IBSFETCHPHYSAD, low, high); - ibs_fetch.ibs_fetch_phys_addr_high = high; - ibs_fetch.ibs_fetch_phys_addr_low = low; - - oprofile_add_ibs_sample(regs, - (unsigned int *)&ibs_fetch, - IBS_FETCH_BEGIN); + rdmsrl(MSR_AMD64_IBSFETCHLINAD, msr); + oprofile_add_data(&entry, regs, msr, IBS_FETCH_CODE, + IBS_FETCH_SIZE); + op_cpu_buffer_add_data(&entry, (u32)msr); + op_cpu_buffer_add_data(&entry, (u32)(msr >> 32)); + op_cpu_buffer_add_data(&entry, low); + op_cpu_buffer_add_data(&entry, high); + rdmsrl(MSR_AMD64_IBSFETCHPHYSAD, msr); + op_cpu_buffer_add_data(&entry, (u32)msr); + op_cpu_buffer_add_data(&entry, (u32)(msr >> 32)); + op_cpu_buffer_write_commit(&entry); /* reenable the IRQ */ - rdmsr(MSR_AMD64_IBSFETCHCTL, low, high); high &= ~IBS_FETCH_HIGH_VALID_BIT; high |= IBS_FETCH_HIGH_ENABLE; low &= IBS_FETCH_LOW_MAX_CNT_MASK; @@ -232,30 +196,29 @@ op_amd_handle_ibs(struct pt_regs * const regs, if (ibs_config.op_enabled) { rdmsr(MSR_AMD64_IBSOPCTL, low, high); if (low & IBS_OP_LOW_VALID_BIT) { - rdmsr(MSR_AMD64_IBSOPRIP, low, high); - ibs_op.ibs_op_rip_low = low; - ibs_op.ibs_op_rip_high = high; - rdmsr(MSR_AMD64_IBSOPDATA, low, high); - ibs_op.ibs_op_data1_low = low; - ibs_op.ibs_op_data1_high = high; - rdmsr(MSR_AMD64_IBSOPDATA2, low, high); - ibs_op.ibs_op_data2_low = low; - ibs_op.ibs_op_data2_high = high; - rdmsr(MSR_AMD64_IBSOPDATA3, low, high); - ibs_op.ibs_op_data3_low = low; - ibs_op.ibs_op_data3_high = high; - rdmsr(MSR_AMD64_IBSDCLINAD, low, high); - ibs_op.ibs_dc_linear_low = low; - ibs_op.ibs_dc_linear_high = high; - rdmsr(MSR_AMD64_IBSDCPHYSAD, low, high); - ibs_op.ibs_dc_phys_low = low; - ibs_op.ibs_dc_phys_high = high; + rdmsrl(MSR_AMD64_IBSOPRIP, msr); + oprofile_add_data(&entry, regs, msr, IBS_OP_CODE, + IBS_OP_SIZE); + op_cpu_buffer_add_data(&entry, (u32)msr); + op_cpu_buffer_add_data(&entry, (u32)(msr >> 32)); + rdmsrl(MSR_AMD64_IBSOPDATA, msr); + op_cpu_buffer_add_data(&entry, (u32)msr); + op_cpu_buffer_add_data(&entry, (u32)(msr >> 32)); + rdmsrl(MSR_AMD64_IBSOPDATA2, msr); + op_cpu_buffer_add_data(&entry, (u32)msr); + op_cpu_buffer_add_data(&entry, (u32)(msr >> 32)); + rdmsrl(MSR_AMD64_IBSOPDATA3, msr); + op_cpu_buffer_add_data(&entry, (u32)msr); + op_cpu_buffer_add_data(&entry, (u32)(msr >> 32)); + rdmsrl(MSR_AMD64_IBSDCLINAD, msr); + op_cpu_buffer_add_data(&entry, (u32)msr); + op_cpu_buffer_add_data(&entry, (u32)(msr >> 32)); + rdmsrl(MSR_AMD64_IBSDCPHYSAD, msr); + op_cpu_buffer_add_data(&entry, (u32)msr); + op_cpu_buffer_add_data(&entry, (u32)(msr >> 32)); + op_cpu_buffer_write_commit(&entry); /* reenable the IRQ */ - oprofile_add_ibs_sample(regs, - (unsigned int *)&ibs_op, - IBS_OP_BEGIN); - rdmsr(MSR_AMD64_IBSOPCTL, low, high); high = 0; low &= ~IBS_OP_LOW_VALID_BIT; low |= IBS_OP_LOW_ENABLE; diff --git a/drivers/oprofile/buffer_sync.c b/drivers/oprofile/buffer_sync.c index f9031d31eeb7..d692fdc1a211 100644 --- a/drivers/oprofile/buffer_sync.c +++ b/drivers/oprofile/buffer_sync.c @@ -318,29 +318,18 @@ static void add_trace_begin(void) #ifdef CONFIG_OPROFILE_IBS -#define IBS_FETCH_CODE_SIZE 2 -#define IBS_OP_CODE_SIZE 5 - -/* - * Add IBS fetch and op entries to event buffer - */ -static void add_ibs_begin(int cpu, int code, struct mm_struct *mm) +static void add_data(struct op_entry *entry, struct mm_struct *mm) { - unsigned long pc; - int i, count; - unsigned long cookie = 0; + unsigned long code, pc, val; + unsigned long cookie; off_t offset; - struct op_entry entry; - struct op_sample *sample; - sample = op_cpu_buffer_read_entry(&entry, cpu); - if (!sample) + if (!op_cpu_buffer_get_data(entry, &code)) + return; + if (!op_cpu_buffer_get_data(entry, &pc)) + return; + if (!op_cpu_buffer_get_size(entry)) return; - pc = sample->eip; - -#ifdef __LP64__ - pc += sample->event << 32; -#endif if (mm) { cookie = lookup_dcookie(mm, pc, &offset); @@ -362,24 +351,8 @@ static void add_ibs_begin(int cpu, int code, struct mm_struct *mm) add_event_entry(code); add_event_entry(offset); /* Offset from Dcookie */ - /* we send the Dcookie offset, but send the raw Linear Add also*/ - add_event_entry(sample->eip); - add_event_entry(sample->event); - - if (code == IBS_FETCH_CODE) - count = IBS_FETCH_CODE_SIZE; /*IBS FETCH is 2 int64s*/ - else - count = IBS_OP_CODE_SIZE; /*IBS OP is 5 int64s*/ - - for (i = 0; i < count; i++) { - sample = op_cpu_buffer_read_entry(&entry, cpu); - if (!sample) - return; - add_event_entry(sample->eip); - add_event_entry(sample->event); - } - - return; + while (op_cpu_buffer_get_data(entry, &val)) + add_event_entry(val); } #endif @@ -572,10 +545,8 @@ void sync_buffer(int cpu) add_user_ctx_switch(new, cookie); } #ifdef CONFIG_OPROFILE_IBS - if (flags & IBS_FETCH_BEGIN) - add_ibs_begin(cpu, IBS_FETCH_CODE, mm); - if (flags & IBS_OP_BEGIN) - add_ibs_begin(cpu, IBS_OP_CODE, mm); + if (op_cpu_buffer_get_size(&entry)) + add_data(&entry, mm); #endif continue; } diff --git a/drivers/oprofile/cpu_buffer.c b/drivers/oprofile/cpu_buffer.c index 1b6590746be4..ddba9d01f09b 100644 --- a/drivers/oprofile/cpu_buffer.c +++ b/drivers/oprofile/cpu_buffer.c @@ -363,31 +363,38 @@ void oprofile_add_sample(struct pt_regs * const regs, unsigned long event) #ifdef CONFIG_OPROFILE_IBS -void oprofile_add_ibs_sample(struct pt_regs * const regs, - unsigned int * const ibs_sample, int ibs_code) +/* + * Add samples with data to the ring buffer. + * + * Use op_cpu_buffer_add_data(&entry, val) to add data and + * op_cpu_buffer_write_commit(&entry) to commit the sample. + */ +void oprofile_add_data(struct op_entry *entry, struct pt_regs * const regs, + unsigned long pc, int code, int size) { + struct op_sample *sample; int is_kernel = !user_mode(regs); struct oprofile_cpu_buffer *cpu_buf = &__get_cpu_var(cpu_buffer); - int fail = 0; cpu_buf->sample_received++; - /* backtraces disabled for ibs */ - fail = fail || op_add_code(cpu_buf, 0, is_kernel, current); + /* no backtraces for samples with data */ + if (op_add_code(cpu_buf, 0, is_kernel, current)) + goto fail; - fail = fail || op_add_sample(cpu_buf, ESCAPE_CODE, ibs_code); - fail = fail || op_add_sample(cpu_buf, ibs_sample[0], ibs_sample[1]); - fail = fail || op_add_sample(cpu_buf, ibs_sample[2], ibs_sample[3]); - fail = fail || op_add_sample(cpu_buf, ibs_sample[4], ibs_sample[5]); + sample = op_cpu_buffer_write_reserve(entry, size + 2); + if (!sample) + goto fail; + sample->eip = ESCAPE_CODE; + sample->event = 0; /* no flags */ - if (ibs_code == IBS_OP_BEGIN) { - fail = fail || op_add_sample(cpu_buf, ibs_sample[6], ibs_sample[7]); - fail = fail || op_add_sample(cpu_buf, ibs_sample[8], ibs_sample[9]); - fail = fail || op_add_sample(cpu_buf, ibs_sample[10], ibs_sample[11]); - } + op_cpu_buffer_add_data(entry, code); + op_cpu_buffer_add_data(entry, pc); - if (fail) - cpu_buf->sample_lost_overflow++; + return; + +fail: + cpu_buf->sample_lost_overflow++; } #endif diff --git a/drivers/oprofile/cpu_buffer.h b/drivers/oprofile/cpu_buffer.h index f34376046573..525cc4d13d8d 100644 --- a/drivers/oprofile/cpu_buffer.h +++ b/drivers/oprofile/cpu_buffer.h @@ -115,7 +115,5 @@ int op_cpu_buffer_get_data(struct op_entry *entry, unsigned long *val) #define IS_KERNEL (1UL << 1) #define TRACE_BEGIN (1UL << 2) #define USER_CTX_SWITCH (1UL << 3) -#define IBS_FETCH_BEGIN (1UL << 4) -#define IBS_OP_BEGIN (1UL << 5) #endif /* OPROFILE_CPU_BUFFER_H */ From 465634adc1d09b490c8ee31885575be39d375d53 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Wed, 7 Jan 2009 15:32:11 +0100 Subject: [PATCH 25/31] ring_buffer: fix ring_buffer_event_length() Function ring_buffer_event_length() provides an interface to detect the length of data stored in an entry. However, the length contains offsets depending on the internal usage. This makes it unusable. This patch fixes this and now ring_buffer_event_length() returns the alligned length that has been used in ring_buffer_lock_reserve(). Cc: Steven Rostedt Signed-off-by: Robert Richter --- kernel/trace/ring_buffer.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 30d57dd01a85..d42b882dfe4b 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -117,7 +117,13 @@ rb_event_length(struct ring_buffer_event *event) */ unsigned ring_buffer_event_length(struct ring_buffer_event *event) { - return rb_event_length(event); + unsigned length = rb_event_length(event); + if (event->type != RINGBUF_TYPE_DATA) + return length; + length -= RB_EVNT_HDR_SIZE; + if (length > RB_MAX_SMALL_DATA + sizeof(event->array[0])) + length -= sizeof(event->array[0]); + return length; } EXPORT_SYMBOL_GPL(ring_buffer_event_length); From ebf8d974e298018f0b4ee02b1b097bf5500d3d27 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Wed, 7 Jan 2009 00:20:57 +0100 Subject: [PATCH 26/31] oprofile: remove #ifdef CONFIG_OPROFILE_IBS in non-ibs code The ifdefs can be removed since the code is no longer ibs specific and can be used for other purposes as well. IBS specific code is only in op_model_amd.c. Signed-off-by: Robert Richter --- drivers/oprofile/buffer_sync.c | 6 ------ drivers/oprofile/cpu_buffer.c | 4 ---- 2 files changed, 10 deletions(-) diff --git a/drivers/oprofile/buffer_sync.c b/drivers/oprofile/buffer_sync.c index d692fdc1a211..ac014cb27915 100644 --- a/drivers/oprofile/buffer_sync.c +++ b/drivers/oprofile/buffer_sync.c @@ -316,8 +316,6 @@ static void add_trace_begin(void) add_event_entry(TRACE_BEGIN_CODE); } -#ifdef CONFIG_OPROFILE_IBS - static void add_data(struct op_entry *entry, struct mm_struct *mm) { unsigned long code, pc, val; @@ -355,8 +353,6 @@ static void add_data(struct op_entry *entry, struct mm_struct *mm) add_event_entry(val); } -#endif - static inline void add_sample_entry(unsigned long offset, unsigned long event) { add_event_entry(offset); @@ -544,10 +540,8 @@ void sync_buffer(int cpu) cookie = get_exec_dcookie(mm); add_user_ctx_switch(new, cookie); } -#ifdef CONFIG_OPROFILE_IBS if (op_cpu_buffer_get_size(&entry)) add_data(&entry, mm); -#endif continue; } diff --git a/drivers/oprofile/cpu_buffer.c b/drivers/oprofile/cpu_buffer.c index ddba9d01f09b..b846af632c81 100644 --- a/drivers/oprofile/cpu_buffer.c +++ b/drivers/oprofile/cpu_buffer.c @@ -361,8 +361,6 @@ void oprofile_add_sample(struct pt_regs * const regs, unsigned long event) __oprofile_add_ext_sample(pc, regs, event, is_kernel); } -#ifdef CONFIG_OPROFILE_IBS - /* * Add samples with data to the ring buffer. * @@ -397,8 +395,6 @@ fail: cpu_buf->sample_lost_overflow++; } -#endif - void oprofile_add_pc(unsigned long pc, int is_kernel, unsigned long event) { struct oprofile_cpu_buffer *cpu_buf = &__get_cpu_var(cpu_buffer); From 14f0ca8eaea42a5b5a69cfcb699665dd2618db5f Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Wed, 7 Jan 2009 21:50:22 +0100 Subject: [PATCH 27/31] oprofile: make new cpu buffer functions part of the api This patch creates the new functions oprofile_write_reserve() oprofile_add_data() oprofile_write_commit() and makes them part of the oprofile api. Signed-off-by: Robert Richter --- arch/x86/oprofile/op_model_amd.c | 57 ++++++++++++++------------------ drivers/oprofile/cpu_buffer.c | 17 ++++++++-- drivers/oprofile/cpu_buffer.h | 8 +---- include/linux/oprofile.h | 18 ++++++++++ 4 files changed, 57 insertions(+), 43 deletions(-) diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c index cf310aeb462c..8fdf06e4edf9 100644 --- a/arch/x86/oprofile/op_model_amd.c +++ b/arch/x86/oprofile/op_model_amd.c @@ -22,7 +22,6 @@ #include "op_x86_model.h" #include "op_counter.h" -#include "../../../drivers/oprofile/cpu_buffer.h" #define NUM_COUNTERS 4 #define NUM_CONTROLS 4 @@ -61,14 +60,6 @@ static unsigned long reset_value[NUM_COUNTERS]; #define IBS_OP_LOW_VALID_BIT (1ULL<<18) /* bit 18 */ #define IBS_OP_LOW_ENABLE (1ULL<<17) /* bit 17 */ -/* - * The function interface needs to be fixed, something like add - * data. Should then be added to linux/oprofile.h. - */ -extern -void oprofile_add_data(struct op_entry *entry, struct pt_regs * const regs, - unsigned long pc, int code, int size); - #define IBS_FETCH_SIZE 6 #define IBS_OP_SIZE 12 @@ -174,16 +165,16 @@ op_amd_handle_ibs(struct pt_regs * const regs, rdmsr(MSR_AMD64_IBSFETCHCTL, low, high); if (high & IBS_FETCH_HIGH_VALID_BIT) { rdmsrl(MSR_AMD64_IBSFETCHLINAD, msr); - oprofile_add_data(&entry, regs, msr, IBS_FETCH_CODE, - IBS_FETCH_SIZE); - op_cpu_buffer_add_data(&entry, (u32)msr); - op_cpu_buffer_add_data(&entry, (u32)(msr >> 32)); - op_cpu_buffer_add_data(&entry, low); - op_cpu_buffer_add_data(&entry, high); + oprofile_write_reserve(&entry, regs, msr, + IBS_FETCH_CODE, IBS_FETCH_SIZE); + oprofile_add_data(&entry, (u32)msr); + oprofile_add_data(&entry, (u32)(msr >> 32)); + oprofile_add_data(&entry, low); + oprofile_add_data(&entry, high); rdmsrl(MSR_AMD64_IBSFETCHPHYSAD, msr); - op_cpu_buffer_add_data(&entry, (u32)msr); - op_cpu_buffer_add_data(&entry, (u32)(msr >> 32)); - op_cpu_buffer_write_commit(&entry); + oprofile_add_data(&entry, (u32)msr); + oprofile_add_data(&entry, (u32)(msr >> 32)); + oprofile_write_commit(&entry); /* reenable the IRQ */ high &= ~IBS_FETCH_HIGH_VALID_BIT; @@ -197,26 +188,26 @@ op_amd_handle_ibs(struct pt_regs * const regs, rdmsr(MSR_AMD64_IBSOPCTL, low, high); if (low & IBS_OP_LOW_VALID_BIT) { rdmsrl(MSR_AMD64_IBSOPRIP, msr); - oprofile_add_data(&entry, regs, msr, IBS_OP_CODE, - IBS_OP_SIZE); - op_cpu_buffer_add_data(&entry, (u32)msr); - op_cpu_buffer_add_data(&entry, (u32)(msr >> 32)); + oprofile_write_reserve(&entry, regs, msr, + IBS_OP_CODE, IBS_OP_SIZE); + oprofile_add_data(&entry, (u32)msr); + oprofile_add_data(&entry, (u32)(msr >> 32)); rdmsrl(MSR_AMD64_IBSOPDATA, msr); - op_cpu_buffer_add_data(&entry, (u32)msr); - op_cpu_buffer_add_data(&entry, (u32)(msr >> 32)); + oprofile_add_data(&entry, (u32)msr); + oprofile_add_data(&entry, (u32)(msr >> 32)); rdmsrl(MSR_AMD64_IBSOPDATA2, msr); - op_cpu_buffer_add_data(&entry, (u32)msr); - op_cpu_buffer_add_data(&entry, (u32)(msr >> 32)); + oprofile_add_data(&entry, (u32)msr); + oprofile_add_data(&entry, (u32)(msr >> 32)); rdmsrl(MSR_AMD64_IBSOPDATA3, msr); - op_cpu_buffer_add_data(&entry, (u32)msr); - op_cpu_buffer_add_data(&entry, (u32)(msr >> 32)); + oprofile_add_data(&entry, (u32)msr); + oprofile_add_data(&entry, (u32)(msr >> 32)); rdmsrl(MSR_AMD64_IBSDCLINAD, msr); - op_cpu_buffer_add_data(&entry, (u32)msr); - op_cpu_buffer_add_data(&entry, (u32)(msr >> 32)); + oprofile_add_data(&entry, (u32)msr); + oprofile_add_data(&entry, (u32)(msr >> 32)); rdmsrl(MSR_AMD64_IBSDCPHYSAD, msr); - op_cpu_buffer_add_data(&entry, (u32)msr); - op_cpu_buffer_add_data(&entry, (u32)(msr >> 32)); - op_cpu_buffer_write_commit(&entry); + oprofile_add_data(&entry, (u32)msr); + oprofile_add_data(&entry, (u32)(msr >> 32)); + oprofile_write_commit(&entry); /* reenable the IRQ */ high = 0; diff --git a/drivers/oprofile/cpu_buffer.c b/drivers/oprofile/cpu_buffer.c index b846af632c81..2e03b6d796d3 100644 --- a/drivers/oprofile/cpu_buffer.c +++ b/drivers/oprofile/cpu_buffer.c @@ -364,10 +364,11 @@ void oprofile_add_sample(struct pt_regs * const regs, unsigned long event) /* * Add samples with data to the ring buffer. * - * Use op_cpu_buffer_add_data(&entry, val) to add data and - * op_cpu_buffer_write_commit(&entry) to commit the sample. + * Use oprofile_add_data(&entry, val) to add data and + * oprofile_write_commit(&entry) to commit the sample. */ -void oprofile_add_data(struct op_entry *entry, struct pt_regs * const regs, +void +oprofile_write_reserve(struct op_entry *entry, struct pt_regs * const regs, unsigned long pc, int code, int size) { struct op_sample *sample; @@ -395,6 +396,16 @@ fail: cpu_buf->sample_lost_overflow++; } +int oprofile_add_data(struct op_entry *entry, unsigned long val) +{ + return op_cpu_buffer_add_data(entry, val); +} + +int oprofile_write_commit(struct op_entry *entry) +{ + return op_cpu_buffer_write_commit(entry); +} + void oprofile_add_pc(unsigned long pc, int is_kernel, unsigned long event) { struct oprofile_cpu_buffer *cpu_buf = &__get_cpu_var(cpu_buffer); diff --git a/drivers/oprofile/cpu_buffer.h b/drivers/oprofile/cpu_buffer.h index 525cc4d13d8d..63f81c44846a 100644 --- a/drivers/oprofile/cpu_buffer.h +++ b/drivers/oprofile/cpu_buffer.h @@ -35,13 +35,7 @@ struct op_sample { unsigned long data[0]; }; -struct op_entry { - struct ring_buffer_event *event; - struct op_sample *sample; - unsigned long irq_flags; - unsigned long size; - unsigned long *data; -}; +struct op_entry; struct oprofile_cpu_buffer { unsigned long buffer_size; diff --git a/include/linux/oprofile.h b/include/linux/oprofile.h index 1ce9fe572e51..1d9518bc4c58 100644 --- a/include/linux/oprofile.h +++ b/include/linux/oprofile.h @@ -164,4 +164,22 @@ void oprofile_put_buff(unsigned long *buf, unsigned int start, unsigned long oprofile_get_cpu_buffer_size(void); void oprofile_cpu_buffer_inc_smpl_lost(void); +/* cpu buffer functions */ + +struct op_sample; + +struct op_entry { + struct ring_buffer_event *event; + struct op_sample *sample; + unsigned long irq_flags; + unsigned long size; + unsigned long *data; +}; + +void oprofile_write_reserve(struct op_entry *entry, + struct pt_regs * const regs, + unsigned long pc, int code, int size); +int oprofile_add_data(struct op_entry *entry, unsigned long val); +int oprofile_write_commit(struct op_entry *entry); + #endif /* OPROFILE_H */ From 9b93418e7ee59dbc96d44cfde7f65f886e54dba9 Mon Sep 17 00:00:00 2001 From: Carl Love Date: Mon, 1 Dec 2008 16:18:34 -0800 Subject: [PATCH 28/31] powerpc/oprofile: IBM CELL: cleanup and restructuring This patch restructures and cleans up the code a bit to make it easier to add new functionality later. The patch makes no functional changes to the existing code. Signed-off-by: Carl Love Signed-off-by: Robert Richter --- arch/powerpc/oprofile/cell/spu_profiler.c | 24 +- arch/powerpc/oprofile/op_model_cell.c | 320 ++++++++++++---------- 2 files changed, 191 insertions(+), 153 deletions(-) diff --git a/arch/powerpc/oprofile/cell/spu_profiler.c b/arch/powerpc/oprofile/cell/spu_profiler.c index dd499c3e9da7..8b1b9ccaff9f 100644 --- a/arch/powerpc/oprofile/cell/spu_profiler.c +++ b/arch/powerpc/oprofile/cell/spu_profiler.c @@ -31,8 +31,8 @@ static unsigned int profiling_interval; #define SPU_PC_MASK 0xFFFF -static DEFINE_SPINLOCK(sample_array_lock); -unsigned long sample_array_lock_flags; +static DEFINE_SPINLOCK(oprof_spu_smpl_arry_lck); +unsigned long oprof_spu_smpl_arry_lck_flags; void set_spu_profiling_frequency(unsigned int freq_khz, unsigned int cycles_reset) { @@ -145,13 +145,13 @@ static enum hrtimer_restart profile_spus(struct hrtimer *timer) * sample array must be loaded and then processed for a given * cpu. The sample array is not per cpu. */ - spin_lock_irqsave(&sample_array_lock, - sample_array_lock_flags); + spin_lock_irqsave(&oprof_spu_smpl_arry_lck, + oprof_spu_smpl_arry_lck_flags); num_samples = cell_spu_pc_collection(cpu); if (num_samples == 0) { - spin_unlock_irqrestore(&sample_array_lock, - sample_array_lock_flags); + spin_unlock_irqrestore(&oprof_spu_smpl_arry_lck, + oprof_spu_smpl_arry_lck_flags); continue; } @@ -162,8 +162,8 @@ static enum hrtimer_restart profile_spus(struct hrtimer *timer) num_samples); } - spin_unlock_irqrestore(&sample_array_lock, - sample_array_lock_flags); + spin_unlock_irqrestore(&oprof_spu_smpl_arry_lck, + oprof_spu_smpl_arry_lck_flags); } smp_wmb(); /* insure spu event buffer updates are written */ @@ -182,13 +182,13 @@ static enum hrtimer_restart profile_spus(struct hrtimer *timer) static struct hrtimer timer; /* - * Entry point for SPU profiling. + * Entry point for SPU cycle profiling. * NOTE: SPU profiling is done system-wide, not per-CPU. * * cycles_reset is the count value specified by the user when * setting up OProfile to count SPU_CYCLES. */ -int start_spu_profiling(unsigned int cycles_reset) +int start_spu_profiling_cycles(unsigned int cycles_reset) { ktime_t kt; @@ -212,10 +212,10 @@ int start_spu_profiling(unsigned int cycles_reset) return 0; } -void stop_spu_profiling(void) +void stop_spu_profiling_cycles(void) { spu_prof_running = 0; hrtimer_cancel(&timer); kfree(samples); - pr_debug("SPU_PROF: stop_spu_profiling issued\n"); + pr_debug("SPU_PROF: stop_spu_profiling_cycles issued\n"); } diff --git a/arch/powerpc/oprofile/op_model_cell.c b/arch/powerpc/oprofile/op_model_cell.c index 25a4ec2514a3..ad7f32c848f8 100644 --- a/arch/powerpc/oprofile/op_model_cell.c +++ b/arch/powerpc/oprofile/op_model_cell.c @@ -40,14 +40,9 @@ #include "../platforms/cell/interrupt.h" #include "cell/pr_util.h" -static void cell_global_stop_spu(void); - -/* - * spu_cycle_reset is the number of cycles between samples. - * This variable is used for SPU profiling and should ONLY be set - * at the beginning of cell_reg_setup; otherwise, it's read-only. - */ -static unsigned int spu_cycle_reset; +#define PPU_PROFILING 0 +#define SPU_PROFILING_CYCLES 1 +#define SPU_PROFILING_EVENTS 2 #define NUM_SPUS_PER_NODE 8 #define SPU_CYCLES_EVENT_NUM 2 /* event number for SPU_CYCLES */ @@ -66,6 +61,14 @@ static unsigned int spu_cycle_reset; #define MAX_SPU_COUNT 0xFFFFFF /* maximum 24 bit LFSR value */ +/* + * spu_cycle_reset is the number of cycles between samples. + * This variable is used for SPU profiling and should ONLY be set + * at the beginning of cell_reg_setup; otherwise, it's read-only. + */ +static unsigned int spu_cycle_reset; +static unsigned int profiling_mode; + struct pmc_cntrl_data { unsigned long vcntr; unsigned long evnts; @@ -122,7 +125,6 @@ static struct { #define GET_INPUT_CONTROL(x) ((x & 0x00000004) >> 2) static DEFINE_PER_CPU(unsigned long[NR_PHYS_CTRS], pmc_values); - static struct pmc_cntrl_data pmc_cntrl[NUM_THREADS][NR_PHYS_CTRS]; /* @@ -165,7 +167,7 @@ static int spu_rtas_token; /* token for SPU cycle profiling */ static u32 reset_value[NR_PHYS_CTRS]; static int num_counters; static int oprofile_running; -static DEFINE_SPINLOCK(virt_cntr_lock); +static DEFINE_SPINLOCK(cntr_lock); static u32 ctr_enabled; @@ -367,7 +369,7 @@ static void write_pm_cntrl(int cpu) if (pm_regs.pm_cntrl.stop_at_max == 1) val |= CBE_PM_STOP_AT_MAX; - if (pm_regs.pm_cntrl.trace_mode == 1) + if (pm_regs.pm_cntrl.trace_mode != 0) val |= CBE_PM_TRACE_MODE_SET(pm_regs.pm_cntrl.trace_mode); if (pm_regs.pm_cntrl.freeze == 1) @@ -441,7 +443,7 @@ static void cell_virtual_cntr(unsigned long data) * not both playing with the counters on the same node. */ - spin_lock_irqsave(&virt_cntr_lock, flags); + spin_lock_irqsave(&cntr_lock, flags); prev_hdw_thread = hdw_thread; @@ -527,7 +529,7 @@ static void cell_virtual_cntr(unsigned long data) cbe_enable_pm(cpu); } - spin_unlock_irqrestore(&virt_cntr_lock, flags); + spin_unlock_irqrestore(&cntr_lock, flags); mod_timer(&timer_virt_cntr, jiffies + HZ / 10); } @@ -541,44 +543,30 @@ static void start_virt_cntrs(void) add_timer(&timer_virt_cntr); } -/* This function is called once for all cpus combined */ -static int cell_reg_setup(struct op_counter_config *ctr, +static int cell_reg_setup_spu_cycles(struct op_counter_config *ctr, struct op_system_config *sys, int num_ctrs) { - int i, j, cpu; - spu_cycle_reset = 0; - - if (ctr[0].event == SPU_CYCLES_EVENT_NUM) { - spu_cycle_reset = ctr[0].count; - - /* - * Each node will need to make the rtas call to start - * and stop SPU profiling. Get the token once and store it. - */ - spu_rtas_token = rtas_token("ibm,cbe-spu-perftools"); - - if (unlikely(spu_rtas_token == RTAS_UNKNOWN_SERVICE)) { - printk(KERN_ERR - "%s: rtas token ibm,cbe-spu-perftools unknown\n", - __func__); - return -EIO; - } - } - - pm_rtas_token = rtas_token("ibm,cbe-perftools"); + spu_cycle_reset = ctr[0].count; /* - * For all events excetp PPU CYCLEs, each node will need to make - * the rtas cbe-perftools call to setup and reset the debug bus. - * Make the token lookup call once and store it in the global - * variable pm_rtas_token. + * Each node will need to make the rtas call to start + * and stop SPU profiling. Get the token once and store it. */ - if (unlikely(pm_rtas_token == RTAS_UNKNOWN_SERVICE)) { + spu_rtas_token = rtas_token("ibm,cbe-spu-perftools"); + + if (unlikely(spu_rtas_token == RTAS_UNKNOWN_SERVICE)) { printk(KERN_ERR - "%s: rtas token ibm,cbe-perftools unknown\n", + "%s: rtas token ibm,cbe-spu-perftools unknown\n", __func__); return -EIO; } + return 0; +} + +static int cell_reg_setup_ppu(struct op_counter_config *ctr, + struct op_system_config *sys, int num_ctrs) +{ + int i, j, cpu; num_counters = num_ctrs; @@ -665,6 +653,41 @@ static int cell_reg_setup(struct op_counter_config *ctr, } +/* This function is called once for all cpus combined */ +static int cell_reg_setup(struct op_counter_config *ctr, + struct op_system_config *sys, int num_ctrs) +{ + int ret; + + spu_cycle_reset = 0; + + /* + * For all events except PPU CYCLEs, each node will need to make + * the rtas cbe-perftools call to setup and reset the debug bus. + * Make the token lookup call once and store it in the global + * variable pm_rtas_token. + */ + pm_rtas_token = rtas_token("ibm,cbe-perftools"); + + if (unlikely(pm_rtas_token == RTAS_UNKNOWN_SERVICE)) { + printk(KERN_ERR + "%s: rtas token ibm,cbe-perftools unknown\n", + __func__); + return -EIO; + } + + if (ctr[0].event == SPU_CYCLES_EVENT_NUM) { + profiling_mode = SPU_PROFILING_CYCLES; + ret = cell_reg_setup_spu_cycles(ctr, sys, num_ctrs); + } else { + profiling_mode = PPU_PROFILING; + ret = cell_reg_setup_ppu(ctr, sys, num_ctrs); + } + + return ret; +} + + /* This function is called once for each cpu */ static int cell_cpu_setup(struct op_counter_config *cntr) @@ -673,7 +696,11 @@ static int cell_cpu_setup(struct op_counter_config *cntr) u32 num_enabled = 0; int i; - if (spu_cycle_reset) + /* Cycle based SPU profiling does not use the performance + * counters. The trace array is configured to collect + * the data. + */ + if (profiling_mode == SPU_PROFILING_CYCLES) return 0; /* There is one performance monitor per processor chip (i.e. node), @@ -686,7 +713,6 @@ static int cell_cpu_setup(struct op_counter_config *cntr) cbe_disable_pm(cpu); cbe_disable_pm_interrupts(cpu); - cbe_write_pm(cpu, pm_interval, 0); cbe_write_pm(cpu, pm_start_stop, 0); cbe_write_pm(cpu, group_control, pm_regs.group_control); cbe_write_pm(cpu, debug_bus_control, pm_regs.debug_bus_control); @@ -885,7 +911,94 @@ static struct notifier_block cpu_freq_notifier_block = { }; #endif -static int cell_global_start_spu(struct op_counter_config *ctr) +/* + * Note the generic OProfile stop calls do not support returning + * an error on stop. Hence, will not return an error if the FW + * calls fail on stop. Failure to reset the debug bus is not an issue. + * Failure to disable the SPU profiling is not an issue. The FW calls + * to enable the performance counters and debug bus will work even if + * the hardware was not cleanly reset. + */ +static void cell_global_stop_spu_cycles(void) +{ + int subfunc, rtn_value; + unsigned int lfsr_value; + int cpu; + + oprofile_running = 0; + +#ifdef CONFIG_CPU_FREQ + cpufreq_unregister_notifier(&cpu_freq_notifier_block, + CPUFREQ_TRANSITION_NOTIFIER); +#endif + + for_each_online_cpu(cpu) { + if (cbe_get_hw_thread_id(cpu)) + continue; + + subfunc = 3; /* + * 2 - activate SPU tracing, + * 3 - deactivate + */ + lfsr_value = 0x8f100000; + + rtn_value = rtas_call(spu_rtas_token, 3, 1, NULL, + subfunc, cbe_cpu_to_node(cpu), + lfsr_value); + + if (unlikely(rtn_value != 0)) { + printk(KERN_ERR + "%s: rtas call ibm,cbe-spu-perftools " \ + "failed, return = %d\n", + __func__, rtn_value); + } + + /* Deactivate the signals */ + pm_rtas_reset_signals(cbe_cpu_to_node(cpu)); + } + + if (profiling_mode == SPU_PROFILING_CYCLES) + stop_spu_profiling_cycles(); +} + +static void cell_global_stop_ppu(void) +{ + int cpu; + + /* + * This routine will be called once for the system. + * There is one performance monitor per node, so we + * only need to perform this function once per node. + */ + del_timer_sync(&timer_virt_cntr); + oprofile_running = 0; + smp_wmb(); + + for_each_online_cpu(cpu) { + if (cbe_get_hw_thread_id(cpu)) + continue; + + cbe_sync_irq(cbe_cpu_to_node(cpu)); + /* Stop the counters */ + cbe_disable_pm(cpu); + + /* Deactivate the signals */ + pm_rtas_reset_signals(cbe_cpu_to_node(cpu)); + + /* Deactivate interrupts */ + cbe_disable_pm_interrupts(cpu); + } +} + +static void cell_global_stop(void) +{ + if (profiling_mode == PPU_PROFILING) + cell_global_stop_ppu(); + else + cell_global_stop_spu_cycles(); +} + +static int cell_global_start_spu_cycles(struct op_counter_config *ctr) { int subfunc; unsigned int lfsr_value; @@ -955,14 +1068,14 @@ static int cell_global_start_spu(struct op_counter_config *ctr) if (unlikely(ret != 0)) { printk(KERN_ERR - "%s: rtas call ibm,cbe-spu-perftools failed, return = %d\n", - __func__, ret); + "%s: rtas call ibm,cbe-spu-perftools failed, " \ + "return = %d\n", __func__, ret); rtas_error = -EIO; goto out; } } - rtas_error = start_spu_profiling(spu_cycle_reset); + rtas_error = start_spu_profiling_cycles(spu_cycle_reset); if (rtas_error) goto out_stop; @@ -970,7 +1083,7 @@ static int cell_global_start_spu(struct op_counter_config *ctr) return 0; out_stop: - cell_global_stop_spu(); /* clean up the PMU/debug bus */ + cell_global_stop_spu_cycles(); /* clean up the PMU/debug bus */ out: return rtas_error; } @@ -1024,99 +1137,15 @@ static int cell_global_start_ppu(struct op_counter_config *ctr) static int cell_global_start(struct op_counter_config *ctr) { - if (spu_cycle_reset) - return cell_global_start_spu(ctr); + if (profiling_mode == SPU_PROFILING_CYCLES) + return cell_global_start_spu_cycles(ctr); else return cell_global_start_ppu(ctr); } -/* - * Note the generic OProfile stop calls do not support returning - * an error on stop. Hence, will not return an error if the FW - * calls fail on stop. Failure to reset the debug bus is not an issue. - * Failure to disable the SPU profiling is not an issue. The FW calls - * to enable the performance counters and debug bus will work even if - * the hardware was not cleanly reset. - */ -static void cell_global_stop_spu(void) -{ - int subfunc, rtn_value; - unsigned int lfsr_value; - int cpu; - oprofile_running = 0; - -#ifdef CONFIG_CPU_FREQ - cpufreq_unregister_notifier(&cpu_freq_notifier_block, - CPUFREQ_TRANSITION_NOTIFIER); -#endif - - for_each_online_cpu(cpu) { - if (cbe_get_hw_thread_id(cpu)) - continue; - - subfunc = 3; /* - * 2 - activate SPU tracing, - * 3 - deactivate - */ - lfsr_value = 0x8f100000; - - rtn_value = rtas_call(spu_rtas_token, 3, 1, NULL, - subfunc, cbe_cpu_to_node(cpu), - lfsr_value); - - if (unlikely(rtn_value != 0)) { - printk(KERN_ERR - "%s: rtas call ibm,cbe-spu-perftools failed, return = %d\n", - __func__, rtn_value); - } - - /* Deactivate the signals */ - pm_rtas_reset_signals(cbe_cpu_to_node(cpu)); - } - - stop_spu_profiling(); -} - -static void cell_global_stop_ppu(void) -{ - int cpu; - - /* - * This routine will be called once for the system. - * There is one performance monitor per node, so we - * only need to perform this function once per node. - */ - del_timer_sync(&timer_virt_cntr); - oprofile_running = 0; - smp_wmb(); - - for_each_online_cpu(cpu) { - if (cbe_get_hw_thread_id(cpu)) - continue; - - cbe_sync_irq(cbe_cpu_to_node(cpu)); - /* Stop the counters */ - cbe_disable_pm(cpu); - - /* Deactivate the signals */ - pm_rtas_reset_signals(cbe_cpu_to_node(cpu)); - - /* Deactivate interrupts */ - cbe_disable_pm_interrupts(cpu); - } -} - -static void cell_global_stop(void) -{ - if (spu_cycle_reset) - cell_global_stop_spu(); - else - cell_global_stop_ppu(); -} - -static void cell_handle_interrupt(struct pt_regs *regs, - struct op_counter_config *ctr) +static void cell_handle_interrupt_ppu(struct pt_regs *regs, + struct op_counter_config *ctr) { u32 cpu; u64 pc; @@ -1132,7 +1161,7 @@ static void cell_handle_interrupt(struct pt_regs *regs, * routine are not running at the same time. See the * cell_virtual_cntr() routine for additional comments. */ - spin_lock_irqsave(&virt_cntr_lock, flags); + spin_lock_irqsave(&cntr_lock, flags); /* * Need to disable and reenable the performance counters @@ -1185,7 +1214,14 @@ static void cell_handle_interrupt(struct pt_regs *regs, */ cbe_enable_pm(cpu); } - spin_unlock_irqrestore(&virt_cntr_lock, flags); + spin_unlock_irqrestore(&cntr_lock, flags); +} + +static void cell_handle_interrupt(struct pt_regs *regs, + struct op_counter_config *ctr) +{ + if (profiling_mode == PPU_PROFILING) + cell_handle_interrupt_ppu(regs, ctr); } /* @@ -1195,7 +1231,8 @@ static void cell_handle_interrupt(struct pt_regs *regs, */ static int cell_sync_start(void) { - if (spu_cycle_reset) + if ((profiling_mode == SPU_PROFILING_CYCLES) || + (profiling_mode == SPU_PROFILING_EVENTS)) return spu_sync_start(); else return DO_GENERIC_SYNC; @@ -1203,7 +1240,8 @@ static int cell_sync_start(void) static int cell_sync_stop(void) { - if (spu_cycle_reset) + if ((profiling_mode == SPU_PROFILING_CYCLES) || + (profiling_mode == SPU_PROFILING_EVENTS)) return spu_sync_stop(); else return 1; From 014cef91ecef9d5e85f9c98a2efbf8a8c4710510 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Thu, 8 Jan 2009 15:29:47 +0100 Subject: [PATCH 29/31] powerpc/oprofile: fix cell/pr_util.h Signed-off-by: Robert Richter --- arch/powerpc/oprofile/cell/pr_util.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/oprofile/cell/pr_util.h b/arch/powerpc/oprofile/cell/pr_util.h index 628009c01958..bca7207bd92a 100644 --- a/arch/powerpc/oprofile/cell/pr_util.h +++ b/arch/powerpc/oprofile/cell/pr_util.h @@ -89,9 +89,9 @@ void vma_map_free(struct vma_to_fileoffset_map *map); * Entry point for SPU profiling. * cycles_reset is the SPU_CYCLES count value specified by the user. */ -int start_spu_profiling(unsigned int cycles_reset); +int start_spu_profiling_cycles(unsigned int cycles_reset); -void stop_spu_profiling(void); +void stop_spu_profiling_cycles(void); /* add the necessary profiling hooks */ From 883823291d22e06736f1056da6d8303291d6bbf9 Mon Sep 17 00:00:00 2001 From: Carl Love Date: Mon, 1 Dec 2008 16:18:36 -0800 Subject: [PATCH 30/31] powerpc/oprofile: IBM CELL: add SPU event profiling support This patch adds the SPU event based profiling funcitonality for the IBM Cell processor. Previously, the CELL OProfile kernel code supported PPU event, PPU cycle profiling and SPU cycle profiling. The addition of SPU event profiling allows the users to identify where in their SPU code various SPU evnets are occuring. This should help users further identify issues with their code. Note, SPU profiling has some limitations due to HW constraints. Only one event at a time can be used for profiling and SPU event profiling must be time sliced across all of the SPUs in a node. The patch adds a new arch specific file to the OProfile file system. The file has bit 0 set to indicate that the kernel supports SPU event profiling. The user tool must check this file/bit to make sure the kernel supports SPU event profiling before trying to do SPU event profiling. The user tool check is part of the user tool patch for SPU event profiling. Signed-off-by: Carl Love Signed-off-by: Robert Richter --- arch/powerpc/include/asm/cell-pmu.h | 2 + arch/powerpc/include/asm/oprofile_impl.h | 6 + arch/powerpc/oprofile/cell/pr_util.h | 7 +- arch/powerpc/oprofile/cell/spu_profiler.c | 34 +- arch/powerpc/oprofile/common.c | 22 + arch/powerpc/oprofile/op_model_cell.c | 490 +++++++++++++++++++++- 6 files changed, 545 insertions(+), 16 deletions(-) diff --git a/arch/powerpc/include/asm/cell-pmu.h b/arch/powerpc/include/asm/cell-pmu.h index 8066eede3a0c..b4b7338ad79e 100644 --- a/arch/powerpc/include/asm/cell-pmu.h +++ b/arch/powerpc/include/asm/cell-pmu.h @@ -37,9 +37,11 @@ #define CBE_PM_STOP_AT_MAX 0x40000000 #define CBE_PM_TRACE_MODE_GET(pm_control) (((pm_control) >> 28) & 0x3) #define CBE_PM_TRACE_MODE_SET(mode) (((mode) & 0x3) << 28) +#define CBE_PM_TRACE_BUF_OVFLW(bit) (((bit) & 0x1) << 17) #define CBE_PM_COUNT_MODE_SET(count) (((count) & 0x3) << 18) #define CBE_PM_FREEZE_ALL_CTRS 0x00100000 #define CBE_PM_ENABLE_EXT_TRACE 0x00008000 +#define CBE_PM_SPU_ADDR_TRACE_SET(msk) (((msk) & 0x3) << 9) /* Macros for the trace_address register. */ #define CBE_PM_TRACE_BUF_FULL 0x00000800 diff --git a/arch/powerpc/include/asm/oprofile_impl.h b/arch/powerpc/include/asm/oprofile_impl.h index 95035c602ba6..639dc96077ab 100644 --- a/arch/powerpc/include/asm/oprofile_impl.h +++ b/arch/powerpc/include/asm/oprofile_impl.h @@ -32,6 +32,12 @@ struct op_system_config { unsigned long mmcr0; unsigned long mmcr1; unsigned long mmcra; +#ifdef CONFIG_OPROFILE_CELL + /* Register for oprofile user tool to check cell kernel profiling + * suport. + */ + unsigned long cell_support; +#endif #endif unsigned long enable_kernel; unsigned long enable_user; diff --git a/arch/powerpc/oprofile/cell/pr_util.h b/arch/powerpc/oprofile/cell/pr_util.h index bca7207bd92a..a048b0b72be3 100644 --- a/arch/powerpc/oprofile/cell/pr_util.h +++ b/arch/powerpc/oprofile/cell/pr_util.h @@ -30,6 +30,10 @@ extern struct delayed_work spu_work; extern int spu_prof_running; +#define TRACE_ARRAY_SIZE 1024 + +extern spinlock_t oprof_spu_smpl_arry_lck; + struct spu_overlay_info { /* map of sections within an SPU overlay */ unsigned int vma; /* SPU virtual memory address from elf */ unsigned int size; /* size of section from elf */ @@ -90,9 +94,10 @@ void vma_map_free(struct vma_to_fileoffset_map *map); * cycles_reset is the SPU_CYCLES count value specified by the user. */ int start_spu_profiling_cycles(unsigned int cycles_reset); +void start_spu_profiling_events(void); void stop_spu_profiling_cycles(void); - +void stop_spu_profiling_events(void); /* add the necessary profiling hooks */ int spu_sync_start(void); diff --git a/arch/powerpc/oprofile/cell/spu_profiler.c b/arch/powerpc/oprofile/cell/spu_profiler.c index 8b1b9ccaff9f..de170b7ae71b 100644 --- a/arch/powerpc/oprofile/cell/spu_profiler.c +++ b/arch/powerpc/oprofile/cell/spu_profiler.c @@ -18,11 +18,21 @@ #include #include "pr_util.h" -#define TRACE_ARRAY_SIZE 1024 #define SCALE_SHIFT 14 static u32 *samples; +/* spu_prof_running is a flag used to indicate if spu profiling is enabled + * or not. It is set by the routines start_spu_profiling_cycles() and + * start_spu_profiling_events(). The flag is cleared by the routines + * stop_spu_profiling_cycles() and stop_spu_profiling_events(). These + * routines are called via global_start() and global_stop() which are called in + * op_powerpc_start() and op_powerpc_stop(). These routines are called once + * per system as a result of the user starting/stopping oprofile. Hence, only + * one CPU per user at a time will be changing the value of spu_prof_running. + * In general, OProfile does not protect against multiple users trying to run + * OProfile at a time. + */ int spu_prof_running; static unsigned int profiling_interval; @@ -31,7 +41,7 @@ static unsigned int profiling_interval; #define SPU_PC_MASK 0xFFFF -static DEFINE_SPINLOCK(oprof_spu_smpl_arry_lck); +DEFINE_SPINLOCK(oprof_spu_smpl_arry_lck); unsigned long oprof_spu_smpl_arry_lck_flags; void set_spu_profiling_frequency(unsigned int freq_khz, unsigned int cycles_reset) @@ -212,6 +222,21 @@ int start_spu_profiling_cycles(unsigned int cycles_reset) return 0; } +/* + * Entry point for SPU event profiling. + * NOTE: SPU profiling is done system-wide, not per-CPU. + * + * cycles_reset is the count value specified by the user when + * setting up OProfile to count SPU_CYCLES. + */ +void start_spu_profiling_events(void) +{ + spu_prof_running = 1; + schedule_delayed_work(&spu_work, DEFAULT_TIMER_EXPIRE); + + return; +} + void stop_spu_profiling_cycles(void) { spu_prof_running = 0; @@ -219,3 +244,8 @@ void stop_spu_profiling_cycles(void) kfree(samples); pr_debug("SPU_PROF: stop_spu_profiling_cycles issued\n"); } + +void stop_spu_profiling_events(void) +{ + spu_prof_running = 0; +} diff --git a/arch/powerpc/oprofile/common.c b/arch/powerpc/oprofile/common.c index 17807acb05d9..21f16edf6c8d 100644 --- a/arch/powerpc/oprofile/common.c +++ b/arch/powerpc/oprofile/common.c @@ -132,6 +132,28 @@ static int op_powerpc_create_files(struct super_block *sb, struct dentry *root) oprofilefs_create_ulong(sb, root, "mmcr0", &sys.mmcr0); oprofilefs_create_ulong(sb, root, "mmcr1", &sys.mmcr1); oprofilefs_create_ulong(sb, root, "mmcra", &sys.mmcra); +#ifdef CONFIG_OPROFILE_CELL + /* create a file the user tool can check to see what level of profiling + * support exits with this kernel. Initialize bit mask to indicate + * what support the kernel has: + * bit 0 - Supports SPU event profiling in addition to PPU + * event and cycles; and SPU cycle profiling + * bits 1-31 - Currently unused. + * + * If the file does not exist, then the kernel only supports SPU + * cycle profiling, PPU event and cycle profiling. + */ + oprofilefs_create_ulong(sb, root, "cell_support", &sys.cell_support); + sys.cell_support = 0x1; /* Note, the user OProfile tool must check + * that this bit is set before attempting to + * user SPU event profiling. Older kernels + * will not have this file, hence the user + * tool is not allowed to do SPU event + * profiling on older kernels. Older kernels + * will accept SPU events but collected data + * is garbage. + */ +#endif #endif for (i = 0; i < model->num_counters; ++i) { diff --git a/arch/powerpc/oprofile/op_model_cell.c b/arch/powerpc/oprofile/op_model_cell.c index ad7f32c848f8..ff96cbfb89bb 100644 --- a/arch/powerpc/oprofile/op_model_cell.c +++ b/arch/powerpc/oprofile/op_model_cell.c @@ -44,6 +44,12 @@ #define SPU_PROFILING_CYCLES 1 #define SPU_PROFILING_EVENTS 2 +#define SPU_EVENT_NUM_START 4100 +#define SPU_EVENT_NUM_STOP 4399 +#define SPU_PROFILE_EVENT_ADDR 4363 /* spu, address trace, decimal */ +#define SPU_PROFILE_EVENT_ADDR_MASK_A 0x146 /* sub unit set to zero */ +#define SPU_PROFILE_EVENT_ADDR_MASK_B 0x186 /* sub unit set to zero */ + #define NUM_SPUS_PER_NODE 8 #define SPU_CYCLES_EVENT_NUM 2 /* event number for SPU_CYCLES */ @@ -61,6 +67,12 @@ #define MAX_SPU_COUNT 0xFFFFFF /* maximum 24 bit LFSR value */ +/* Minumum HW interval timer setting to send value to trace buffer is 10 cycle. + * To configure counter to send value every N cycles set counter to + * 2^32 - 1 - N. + */ +#define NUM_INTERVAL_CYC 0xFFFFFFFF - 10 + /* * spu_cycle_reset is the number of cycles between samples. * This variable is used for SPU profiling and should ONLY be set @@ -68,6 +80,7 @@ */ static unsigned int spu_cycle_reset; static unsigned int profiling_mode; +static int spu_evnt_phys_spu_indx; struct pmc_cntrl_data { unsigned long vcntr; @@ -108,6 +121,8 @@ struct pm_cntrl { u16 trace_mode; u16 freeze; u16 count_mode; + u16 spu_addr_trace; + u8 trace_buf_ovflw; }; static struct { @@ -125,6 +140,7 @@ static struct { #define GET_INPUT_CONTROL(x) ((x & 0x00000004) >> 2) static DEFINE_PER_CPU(unsigned long[NR_PHYS_CTRS], pmc_values); +static unsigned long spu_pm_cnt[MAX_NUMNODES * NUM_SPUS_PER_NODE]; static struct pmc_cntrl_data pmc_cntrl[NUM_THREADS][NR_PHYS_CTRS]; /* @@ -154,6 +170,7 @@ static u32 hdw_thread; static u32 virt_cntr_inter_mask; static struct timer_list timer_virt_cntr; +static struct timer_list timer_spu_event_swap; /* * pm_signal needs to be global since it is initialized in @@ -372,9 +389,13 @@ static void write_pm_cntrl(int cpu) if (pm_regs.pm_cntrl.trace_mode != 0) val |= CBE_PM_TRACE_MODE_SET(pm_regs.pm_cntrl.trace_mode); + if (pm_regs.pm_cntrl.trace_buf_ovflw == 1) + val |= CBE_PM_TRACE_BUF_OVFLW(pm_regs.pm_cntrl.trace_buf_ovflw); if (pm_regs.pm_cntrl.freeze == 1) val |= CBE_PM_FREEZE_ALL_CTRS; + val |= CBE_PM_SPU_ADDR_TRACE_SET(pm_regs.pm_cntrl.spu_addr_trace); + /* * Routine set_count_mode must be called previously to set * the count mode based on the user selection of user and kernel. @@ -563,9 +584,184 @@ static int cell_reg_setup_spu_cycles(struct op_counter_config *ctr, return 0; } +/* Unfortunately, the hardware will only support event profiling + * on one SPU per node at a time. Therefore, we must time slice + * the profiling across all SPUs in the node. Note, we do this + * in parallel for each node. The following routine is called + * periodically based on kernel timer to switch which SPU is + * being monitored in a round robbin fashion. + */ +static void spu_evnt_swap(unsigned long data) +{ + int node; + int cur_phys_spu, nxt_phys_spu, cur_spu_evnt_phys_spu_indx; + unsigned long flags; + int cpu; + int ret; + u32 interrupt_mask; + + + /* enable interrupts on cntr 0 */ + interrupt_mask = CBE_PM_CTR_OVERFLOW_INTR(0); + + hdw_thread = 0; + + /* Make sure spu event interrupt handler and spu event swap + * don't access the counters simultaneously. + */ + spin_lock_irqsave(&cntr_lock, flags); + + cur_spu_evnt_phys_spu_indx = spu_evnt_phys_spu_indx; + + if (++(spu_evnt_phys_spu_indx) == NUM_SPUS_PER_NODE) + spu_evnt_phys_spu_indx = 0; + + pm_signal[0].sub_unit = spu_evnt_phys_spu_indx; + pm_signal[1].sub_unit = spu_evnt_phys_spu_indx; + pm_signal[2].sub_unit = spu_evnt_phys_spu_indx; + + /* switch the SPU being profiled on each node */ + for_each_online_cpu(cpu) { + if (cbe_get_hw_thread_id(cpu)) + continue; + + node = cbe_cpu_to_node(cpu); + cur_phys_spu = (node * NUM_SPUS_PER_NODE) + + cur_spu_evnt_phys_spu_indx; + nxt_phys_spu = (node * NUM_SPUS_PER_NODE) + + spu_evnt_phys_spu_indx; + + /* + * stop counters, save counter values, restore counts + * for previous physical SPU + */ + cbe_disable_pm(cpu); + cbe_disable_pm_interrupts(cpu); + + spu_pm_cnt[cur_phys_spu] + = cbe_read_ctr(cpu, 0); + + /* restore previous count for the next spu to sample */ + /* NOTE, hardware issue, counter will not start if the + * counter value is at max (0xFFFFFFFF). + */ + if (spu_pm_cnt[nxt_phys_spu] >= 0xFFFFFFFF) + cbe_write_ctr(cpu, 0, 0xFFFFFFF0); + else + cbe_write_ctr(cpu, 0, spu_pm_cnt[nxt_phys_spu]); + + pm_rtas_reset_signals(cbe_cpu_to_node(cpu)); + + /* setup the debug bus measure the one event and + * the two events to route the next SPU's PC on + * the debug bus + */ + ret = pm_rtas_activate_signals(cbe_cpu_to_node(cpu), 3); + if (ret) + printk(KERN_ERR + "%s: pm_rtas_activate_signals failed, SPU event swap\n", + __func__); + + /* clear the trace buffer, don't want to take PC for + * previous SPU*/ + cbe_write_pm(cpu, trace_address, 0); + + enable_ctr(cpu, 0, pm_regs.pm07_cntrl); + + /* Enable interrupts on the CPU thread that is starting */ + cbe_enable_pm_interrupts(cpu, hdw_thread, + interrupt_mask); + cbe_enable_pm(cpu); + } + + spin_unlock_irqrestore(&cntr_lock, flags); + + /* swap approximately every 0.1 seconds */ + mod_timer(&timer_spu_event_swap, jiffies + HZ / 25); +} + +static void start_spu_event_swap(void) +{ + init_timer(&timer_spu_event_swap); + timer_spu_event_swap.function = spu_evnt_swap; + timer_spu_event_swap.data = 0UL; + timer_spu_event_swap.expires = jiffies + HZ / 25; + add_timer(&timer_spu_event_swap); +} + +static int cell_reg_setup_spu_events(struct op_counter_config *ctr, + struct op_system_config *sys, int num_ctrs) +{ + int i; + + /* routine is called once for all nodes */ + + spu_evnt_phys_spu_indx = 0; + /* + * For all events except PPU CYCLEs, each node will need to make + * the rtas cbe-perftools call to setup and reset the debug bus. + * Make the token lookup call once and store it in the global + * variable pm_rtas_token. + */ + pm_rtas_token = rtas_token("ibm,cbe-perftools"); + + if (unlikely(pm_rtas_token == RTAS_UNKNOWN_SERVICE)) { + printk(KERN_ERR + "%s: rtas token ibm,cbe-perftools unknown\n", + __func__); + return -EIO; + } + + /* setup the pm_control register settings, + * settings will be written per node by the + * cell_cpu_setup() function. + */ + pm_regs.pm_cntrl.trace_buf_ovflw = 1; + + /* Use the occurrence trace mode to have SPU PC saved + * to the trace buffer. Occurrence data in trace buffer + * is not used. Bit 2 must be set to store SPU addresses. + */ + pm_regs.pm_cntrl.trace_mode = 2; + + pm_regs.pm_cntrl.spu_addr_trace = 0x1; /* using debug bus + event 2 & 3 */ + + /* setup the debug bus event array with the SPU PC routing events. + * Note, pm_signal[0] will be filled in by set_pm_event() call below. + */ + pm_signal[1].signal_group = SPU_PROFILE_EVENT_ADDR / 100; + pm_signal[1].bus_word = GET_BUS_WORD(SPU_PROFILE_EVENT_ADDR_MASK_A); + pm_signal[1].bit = SPU_PROFILE_EVENT_ADDR % 100; + pm_signal[1].sub_unit = spu_evnt_phys_spu_indx; + + pm_signal[2].signal_group = SPU_PROFILE_EVENT_ADDR / 100; + pm_signal[2].bus_word = GET_BUS_WORD(SPU_PROFILE_EVENT_ADDR_MASK_B); + pm_signal[2].bit = SPU_PROFILE_EVENT_ADDR % 100; + pm_signal[2].sub_unit = spu_evnt_phys_spu_indx; + + /* Set the user selected spu event to profile on, + * note, only one SPU profiling event is supported + */ + num_counters = 1; /* Only support one SPU event at a time */ + set_pm_event(0, ctr[0].event, ctr[0].unit_mask); + + reset_value[0] = 0xFFFFFFFF - ctr[0].count; + + /* global, used by cell_cpu_setup */ + ctr_enabled |= 1; + + /* Initialize the count for each SPU to the reset value */ + for (i=0; i < MAX_NUMNODES * NUM_SPUS_PER_NODE; i++) + spu_pm_cnt[i] = reset_value[0]; + + return 0; +} + static int cell_reg_setup_ppu(struct op_counter_config *ctr, struct op_system_config *sys, int num_ctrs) { + /* routine is called once for all nodes */ int i, j, cpu; num_counters = num_ctrs; @@ -577,14 +773,6 @@ static int cell_reg_setup_ppu(struct op_counter_config *ctr, __func__); return -EIO; } - pm_regs.group_control = 0; - pm_regs.debug_bus_control = 0; - - /* setup the pm_control register */ - memset(&pm_regs.pm_cntrl, 0, sizeof(struct pm_cntrl)); - pm_regs.pm_cntrl.stop_at_max = 1; - pm_regs.pm_cntrl.trace_mode = 0; - pm_regs.pm_cntrl.freeze = 1; set_count_mode(sys->enable_kernel, sys->enable_user); @@ -657,10 +845,20 @@ static int cell_reg_setup_ppu(struct op_counter_config *ctr, static int cell_reg_setup(struct op_counter_config *ctr, struct op_system_config *sys, int num_ctrs) { - int ret; - + int ret=0; spu_cycle_reset = 0; + /* initialize the spu_arr_trace value, will be reset if + * doing spu event profiling. + */ + pm_regs.group_control = 0; + pm_regs.debug_bus_control = 0; + pm_regs.pm_cntrl.stop_at_max = 1; + pm_regs.pm_cntrl.trace_mode = 0; + pm_regs.pm_cntrl.freeze = 1; + pm_regs.pm_cntrl.trace_buf_ovflw = 0; + pm_regs.pm_cntrl.spu_addr_trace = 0; + /* * For all events except PPU CYCLEs, each node will need to make * the rtas cbe-perftools call to setup and reset the debug bus. @@ -679,6 +877,18 @@ static int cell_reg_setup(struct op_counter_config *ctr, if (ctr[0].event == SPU_CYCLES_EVENT_NUM) { profiling_mode = SPU_PROFILING_CYCLES; ret = cell_reg_setup_spu_cycles(ctr, sys, num_ctrs); + } else if ((ctr[0].event >= SPU_EVENT_NUM_START) && + (ctr[0].event <= SPU_EVENT_NUM_STOP)) { + profiling_mode = SPU_PROFILING_EVENTS; + spu_cycle_reset = ctr[0].count; + + /* for SPU event profiling, need to setup the + * pm_signal array with the events to route the + * SPU PC before making the FW call. Note, only + * one SPU event for profiling can be specified + * at a time. + */ + cell_reg_setup_spu_events(ctr, sys, num_ctrs); } else { profiling_mode = PPU_PROFILING; ret = cell_reg_setup_ppu(ctr, sys, num_ctrs); @@ -695,6 +905,7 @@ static int cell_cpu_setup(struct op_counter_config *cntr) u32 cpu = smp_processor_id(); u32 num_enabled = 0; int i; + int ret; /* Cycle based SPU profiling does not use the performance * counters. The trace array is configured to collect @@ -729,7 +940,20 @@ static int cell_cpu_setup(struct op_counter_config *cntr) * The pm_rtas_activate_signals will return -EIO if the FW * call failed. */ - return pm_rtas_activate_signals(cbe_cpu_to_node(cpu), num_enabled); + if (profiling_mode == SPU_PROFILING_EVENTS) { + /* For SPU event profiling also need to setup the + * pm interval timer + */ + ret = pm_rtas_activate_signals(cbe_cpu_to_node(cpu), + num_enabled+2); + /* store PC from debug bus to Trace buffer as often + * as possible (every 10 cycles) + */ + cbe_write_pm(cpu, pm_interval, NUM_INTERVAL_CYC); + return ret; + } else + return pm_rtas_activate_signals(cbe_cpu_to_node(cpu), + num_enabled); } #define ENTRIES 303 @@ -926,6 +1150,7 @@ static void cell_global_stop_spu_cycles(void) int cpu; oprofile_running = 0; + smp_wmb(); #ifdef CONFIG_CPU_FREQ cpufreq_unregister_notifier(&cpu_freq_notifier_block, @@ -957,8 +1182,33 @@ static void cell_global_stop_spu_cycles(void) pm_rtas_reset_signals(cbe_cpu_to_node(cpu)); } - if (profiling_mode == SPU_PROFILING_CYCLES) - stop_spu_profiling_cycles(); + stop_spu_profiling_cycles(); +} + +static void cell_global_stop_spu_events(void) +{ + int cpu; + oprofile_running = 0; + + stop_spu_profiling_events(); + smp_wmb(); + + for_each_online_cpu(cpu) { + if (cbe_get_hw_thread_id(cpu)) + continue; + + cbe_sync_irq(cbe_cpu_to_node(cpu)); + /* Stop the counters */ + cbe_disable_pm(cpu); + cbe_write_pm07_control(cpu, 0, 0); + + /* Deactivate the signals */ + pm_rtas_reset_signals(cbe_cpu_to_node(cpu)); + + /* Deactivate interrupts */ + cbe_disable_pm_interrupts(cpu); + } + del_timer_sync(&timer_spu_event_swap); } static void cell_global_stop_ppu(void) @@ -994,6 +1244,8 @@ static void cell_global_stop(void) { if (profiling_mode == PPU_PROFILING) cell_global_stop_ppu(); + else if (profiling_mode == SPU_PROFILING_EVENTS) + cell_global_stop_spu_events(); else cell_global_stop_spu_cycles(); } @@ -1088,6 +1340,69 @@ out: return rtas_error; } +static int cell_global_start_spu_events(struct op_counter_config *ctr) +{ + int cpu; + u32 interrupt_mask = 0; + int rtn = 0; + + hdw_thread = 0; + + /* spu event profiling, uses the performance counters to generate + * an interrupt. The hardware is setup to store the SPU program + * counter into the trace array. The occurrence mode is used to + * enable storing data to the trace buffer. The bits are set + * to send/store the SPU address in the trace buffer. The debug + * bus must be setup to route the SPU program counter onto the + * debug bus. The occurrence data in the trace buffer is not used. + */ + + /* This routine gets called once for the system. + * There is one performance monitor per node, so we + * only need to perform this function once per node. + */ + + for_each_online_cpu(cpu) { + if (cbe_get_hw_thread_id(cpu)) + continue; + + /* + * Setup SPU event-based profiling. + * Set perf_mon_control bit 0 to a zero before + * enabling spu collection hardware. + * + * Only support one SPU event on one SPU per node. + */ + if (ctr_enabled & 1) { + cbe_write_ctr(cpu, 0, reset_value[0]); + enable_ctr(cpu, 0, pm_regs.pm07_cntrl); + interrupt_mask |= + CBE_PM_CTR_OVERFLOW_INTR(0); + } else { + /* Disable counter */ + cbe_write_pm07_control(cpu, 0, 0); + } + + cbe_get_and_clear_pm_interrupts(cpu); + cbe_enable_pm_interrupts(cpu, hdw_thread, interrupt_mask); + cbe_enable_pm(cpu); + + /* clear the trace buffer */ + cbe_write_pm(cpu, trace_address, 0); + } + + /* Start the timer to time slice collecting the event profile + * on each of the SPUs. Note, can collect profile on one SPU + * per node at a time. + */ + start_spu_event_swap(); + start_spu_profiling_events(); + oprofile_running = 1; + smp_wmb(); + + return rtn; +} + static int cell_global_start_ppu(struct op_counter_config *ctr) { u32 cpu, i; @@ -1139,11 +1454,158 @@ static int cell_global_start(struct op_counter_config *ctr) { if (profiling_mode == SPU_PROFILING_CYCLES) return cell_global_start_spu_cycles(ctr); + else if (profiling_mode == SPU_PROFILING_EVENTS) + return cell_global_start_spu_events(ctr); else return cell_global_start_ppu(ctr); } +/* The SPU interrupt handler + * + * SPU event profiling works as follows: + * The pm_signal[0] holds the one SPU event to be measured. It is routed on + * the debug bus using word 0 or 1. The value of pm_signal[1] and + * pm_signal[2] contain the necessary events to route the SPU program + * counter for the selected SPU onto the debug bus using words 2 and 3. + * The pm_interval register is setup to write the SPU PC value into the + * trace buffer at the maximum rate possible. The trace buffer is configured + * to store the PCs, wrapping when it is full. The performance counter is + * intialized to the max hardware count minus the number of events, N, between + * samples. Once the N events have occured, a HW counter overflow occurs + * causing the generation of a HW counter interrupt which also stops the + * writing of the SPU PC values to the trace buffer. Hence the last PC + * written to the trace buffer is the SPU PC that we want. Unfortunately, + * we have to read from the beginning of the trace buffer to get to the + * last value written. We just hope the PPU has nothing better to do then + * service this interrupt. The PC for the specific SPU being profiled is + * extracted from the trace buffer processed and stored. The trace buffer + * is cleared, interrupts are cleared, the counter is reset to max - N. + * A kernel timer is used to periodically call the routine spu_evnt_swap() + * to switch to the next physical SPU in the node to profile in round robbin + * order. This way data is collected for all SPUs on the node. It does mean + * that we need to use a relatively small value of N to ensure enough samples + * on each SPU are collected each SPU is being profiled 1/8 of the time. + * It may also be necessary to use a longer sample collection period. + */ +static void cell_handle_interrupt_spu(struct pt_regs *regs, + struct op_counter_config *ctr) +{ + u32 cpu, cpu_tmp; + u64 trace_entry; + u32 interrupt_mask; + u64 trace_buffer[2]; + u64 last_trace_buffer; + u32 sample; + u32 trace_addr; + unsigned long sample_array_lock_flags; + int spu_num; + unsigned long flags; + + /* Make sure spu event interrupt handler and spu event swap + * don't access the counters simultaneously. + */ + cpu = smp_processor_id(); + spin_lock_irqsave(&cntr_lock, flags); + + cpu_tmp = cpu; + cbe_disable_pm(cpu); + + interrupt_mask = cbe_get_and_clear_pm_interrupts(cpu); + + sample = 0xABCDEF; + trace_entry = 0xfedcba; + last_trace_buffer = 0xdeadbeaf; + + if ((oprofile_running == 1) && (interrupt_mask != 0)) { + /* disable writes to trace buff */ + cbe_write_pm(cpu, pm_interval, 0); + + /* only have one perf cntr being used, cntr 0 */ + if ((interrupt_mask & CBE_PM_CTR_OVERFLOW_INTR(0)) + && ctr[0].enabled) + /* The SPU PC values will be read + * from the trace buffer, reset counter + */ + + cbe_write_ctr(cpu, 0, reset_value[0]); + + trace_addr = cbe_read_pm(cpu, trace_address); + + while (!(trace_addr & CBE_PM_TRACE_BUF_EMPTY)) { + /* There is data in the trace buffer to process + * Read the buffer until you get to the last + * entry. This is the value we want. + */ + + cbe_read_trace_buffer(cpu, trace_buffer); + trace_addr = cbe_read_pm(cpu, trace_address); + } + + /* SPU Address 16 bit count format for 128 bit + * HW trace buffer is used for the SPU PC storage + * HDR bits 0:15 + * SPU Addr 0 bits 16:31 + * SPU Addr 1 bits 32:47 + * unused bits 48:127 + * + * HDR: bit4 = 1 SPU Address 0 valid + * HDR: bit5 = 1 SPU Address 1 valid + * - unfortunately, the valid bits don't seem to work + * + * Note trace_buffer[0] holds bits 0:63 of the HW + * trace buffer, trace_buffer[1] holds bits 64:127 + */ + + trace_entry = trace_buffer[0] + & 0x00000000FFFF0000; + + /* only top 16 of the 18 bit SPU PC address + * is stored in trace buffer, hence shift right + * by 16 -2 bits */ + sample = trace_entry >> 14; + last_trace_buffer = trace_buffer[0]; + + spu_num = spu_evnt_phys_spu_indx + + (cbe_cpu_to_node(cpu) * NUM_SPUS_PER_NODE); + + /* make sure only one process at a time is calling + * spu_sync_buffer() + */ + spin_lock_irqsave(&oprof_spu_smpl_arry_lck, + sample_array_lock_flags); + spu_sync_buffer(spu_num, &sample, 1); + spin_unlock_irqrestore(&oprof_spu_smpl_arry_lck, + sample_array_lock_flags); + + smp_wmb(); /* insure spu event buffer updates are written + * don't want events intermingled... */ + + /* The counters were frozen by the interrupt. + * Reenable the interrupt and restart the counters. + */ + cbe_write_pm(cpu, pm_interval, NUM_INTERVAL_CYC); + cbe_enable_pm_interrupts(cpu, hdw_thread, + virt_cntr_inter_mask); + + /* clear the trace buffer, re-enable writes to trace buff */ + cbe_write_pm(cpu, trace_address, 0); + cbe_write_pm(cpu, pm_interval, NUM_INTERVAL_CYC); + + /* The writes to the various performance counters only writes + * to a latch. The new values (interrupt setting bits, reset + * counter value etc.) are not copied to the actual registers + * until the performance monitor is enabled. In order to get + * this to work as desired, the permormance monitor needs to + * be disabled while writing to the latches. This is a + * HW design issue. + */ + write_pm_cntrl(cpu); + cbe_enable_pm(cpu); + } + spin_unlock_irqrestore(&cntr_lock, flags); +} + static void cell_handle_interrupt_ppu(struct pt_regs *regs, struct op_counter_config *ctr) { @@ -1222,6 +1684,8 @@ static void cell_handle_interrupt(struct pt_regs *regs, { if (profiling_mode == PPU_PROFILING) cell_handle_interrupt_ppu(regs, ctr); + else + cell_handle_interrupt_spu(regs, ctr); } /* From 25006644e6042aab4bb7cdc4bfc5777cd3141df7 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Thu, 8 Jan 2009 15:39:49 +0100 Subject: [PATCH 31/31] powerpc/oprofile: fix whitespaces in op_model_cell.c Signed-off-by: Robert Richter --- arch/powerpc/oprofile/op_model_cell.c | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/arch/powerpc/oprofile/op_model_cell.c b/arch/powerpc/oprofile/op_model_cell.c index ff96cbfb89bb..ae06c6236d9c 100644 --- a/arch/powerpc/oprofile/op_model_cell.c +++ b/arch/powerpc/oprofile/op_model_cell.c @@ -355,13 +355,13 @@ static void set_pm_event(u32 ctr, int event, u32 unit_mask) for (i = 0; i < NUM_DEBUG_BUS_WORDS; i++) { if (bus_word & (1 << i)) { pm_regs.debug_bus_control |= - (bus_type << (30 - (2 * i))); + (bus_type << (30 - (2 * i))); for (j = 0; j < NUM_INPUT_BUS_WORDS; j++) { if (input_bus[j] == 0xff) { input_bus[j] = i; pm_regs.group_control |= - (i << (30 - (2 * j))); + (i << (30 - (2 * j))); break; } @@ -503,7 +503,7 @@ static void cell_virtual_cntr(unsigned long data) cbe_disable_pm_interrupts(cpu); for (i = 0; i < num_counters; i++) { per_cpu(pmc_values, cpu + prev_hdw_thread)[i] - = cbe_read_ctr(cpu, i); + = cbe_read_ctr(cpu, i); if (per_cpu(pmc_values, cpu + next_hdw_thread)[i] == 0xFFFFFFFF) @@ -639,7 +639,7 @@ static void spu_evnt_swap(unsigned long data) cbe_disable_pm_interrupts(cpu); spu_pm_cnt[cur_phys_spu] - = cbe_read_ctr(cpu, 0); + = cbe_read_ctr(cpu, 0); /* restore previous count for the next spu to sample */ /* NOTE, hardware issue, counter will not start if the @@ -658,9 +658,8 @@ static void spu_evnt_swap(unsigned long data) */ ret = pm_rtas_activate_signals(cbe_cpu_to_node(cpu), 3); if (ret) - printk(KERN_ERR - "%s: pm_rtas_activate_signals failed, SPU event swap\n", - __func__); + printk(KERN_ERR "%s: pm_rtas_activate_signals failed, " + "SPU event swap\n", __func__); /* clear the trace buffer, don't want to take PC for * previous SPU*/ @@ -1316,7 +1315,7 @@ static int cell_global_start_spu_cycles(struct op_counter_config *ctr) /* start profiling */ ret = rtas_call(spu_rtas_token, 3, 1, NULL, subfunc, - cbe_cpu_to_node(cpu), lfsr_value); + cbe_cpu_to_node(cpu), lfsr_value); if (unlikely(ret != 0)) { printk(KERN_ERR @@ -1397,7 +1396,7 @@ static int cell_global_start_spu_events(struct op_counter_config *ctr) */ start_spu_event_swap(); start_spu_profiling_events(); - oprofile_running = 1; + oprofile_running = 1; smp_wmb(); return rtn; @@ -1422,8 +1421,7 @@ static int cell_global_start_ppu(struct op_counter_config *ctr) if (ctr_enabled & (1 << i)) { cbe_write_ctr(cpu, i, reset_value[i]); enable_ctr(cpu, i, pm_regs.pm07_cntrl); - interrupt_mask |= - CBE_PM_CTR_OVERFLOW_INTR(i); + interrupt_mask |= CBE_PM_CTR_OVERFLOW_INTR(i); } else { /* Disable counter */ cbe_write_pm07_control(cpu, i, 0); @@ -1517,13 +1515,13 @@ static void cell_handle_interrupt_spu(struct pt_regs *regs, trace_entry = 0xfedcba; last_trace_buffer = 0xdeadbeaf; - if ((oprofile_running == 1) && (interrupt_mask != 0)) { + if ((oprofile_running == 1) && (interrupt_mask != 0)) { /* disable writes to trace buff */ cbe_write_pm(cpu, pm_interval, 0); /* only have one perf cntr being used, cntr 0 */ if ((interrupt_mask & CBE_PM_CTR_OVERFLOW_INTR(0)) - && ctr[0].enabled) + && ctr[0].enabled) /* The SPU PC values will be read * from the trace buffer, reset counter */