From 18e1d1b115f79ab1f9137f2b4c72f87c218a5fb9 Mon Sep 17 00:00:00 2001 From: Alex Dowad Date: Fri, 13 Mar 2015 20:04:18 +0200 Subject: [PATCH 01/15] arc: copy_thread(): rename 'arg' argument to 'kthread_arg' The 'arg' argument to copy_thread() is only ever used when forking a new kernel thread. Hence, rename it to 'kthread_arg' for clarity. Signed-off-by: Alex Dowad Signed-off-by: Vineet Gupta --- arch/arc/kernel/process.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/arch/arc/kernel/process.c b/arch/arc/kernel/process.c index 98c00a2d4dd9..7a8ea6ed2c5c 100644 --- a/arch/arc/kernel/process.c +++ b/arch/arc/kernel/process.c @@ -49,7 +49,10 @@ void arch_cpu_idle(void) asmlinkage void ret_from_fork(void); -/* Layout of Child kernel mode stack as setup at the end of this function is +/* + * Copy architecture-specific thread state + * + * Layout of Child kernel mode stack as setup at the end of this function is * * | ... | * | ... | @@ -81,7 +84,7 @@ asmlinkage void ret_from_fork(void); * ------------------ <===== END of PAGE */ int copy_thread(unsigned long clone_flags, - unsigned long usp, unsigned long arg, + unsigned long usp, unsigned long kthread_arg, struct task_struct *p) { struct pt_regs *c_regs; /* child's pt_regs */ @@ -112,7 +115,7 @@ int copy_thread(unsigned long clone_flags, if (unlikely(p->flags & PF_KTHREAD)) { memset(c_regs, 0, sizeof(struct pt_regs)); - c_callee->r13 = arg; /* argument to kernel thread */ + c_callee->r13 = kthread_arg; c_callee->r14 = usp; /* function */ return 0; From 5971d81517752749a8155bb3f9b1ff1309bf3ea5 Mon Sep 17 00:00:00 2001 From: Mischa Jonker Date: Thu, 16 May 2013 23:16:29 +0200 Subject: [PATCH 02/15] ARC: [nsimosci] Update defconfig Signed-off-by: Mischa Jonker --- arch/arc/configs/nsimosci_defconfig | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/arch/arc/configs/nsimosci_defconfig b/arch/arc/configs/nsimosci_defconfig index 278dacf2a3f9..d2ac4e56ba1d 100644 --- a/arch/arc/configs/nsimosci_defconfig +++ b/arch/arc/configs/nsimosci_defconfig @@ -2,6 +2,9 @@ CONFIG_CROSS_COMPILE="arc-linux-uclibc-" # CONFIG_LOCALVERSION_AUTO is not set CONFIG_DEFAULT_HOSTNAME="ARCLinux" # CONFIG_SWAP is not set +CONFIG_SYSVIPC=y +# CONFIG_CROSS_MEMORY_ATTACH is not set +CONFIG_NO_HZ=y CONFIG_HIGH_RES_TIMERS=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y @@ -9,7 +12,7 @@ CONFIG_NAMESPACES=y # CONFIG_UTS_NS is not set # CONFIG_PID_NS is not set CONFIG_BLK_DEV_INITRD=y -CONFIG_INITRAMFS_SOURCE="../arc_initramfs" +CONFIG_INITRAMFS_SOURCE="../arc_initramfs/" CONFIG_KALLSYMS_ALL=y CONFIG_EMBEDDED=y # CONFIG_SLUB_DEBUG is not set @@ -21,12 +24,9 @@ CONFIG_MODULES=y # CONFIG_IOSCHED_DEADLINE is not set # CONFIG_IOSCHED_CFQ is not set CONFIG_ARC_PLAT_FPGA_LEGACY=y -# CONFIG_ARC_IDE is not set -# CONFIG_ARCTANGENT_EMAC is not set # CONFIG_ARC_HAS_RTSC is not set CONFIG_ARC_BUILTIN_DTB_NAME="nsimosci" # CONFIG_COMPACTION is not set -# CONFIG_CROSS_MEMORY_ATTACH is not set CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y @@ -39,23 +39,23 @@ CONFIG_INET=y # CONFIG_FIRMWARE_IN_KERNEL is not set # CONFIG_BLK_DEV is not set CONFIG_NETDEVICES=y -# CONFIG_INPUT_MOUSEDEV_PSAUX is not set +# CONFIG_INPUT_MOUSEDEV is not set +CONFIG_INPUT_EVDEV=y # CONFIG_MOUSE_PS2_ALPS is not set # CONFIG_MOUSE_PS2_LOGIPS2PP is not set # CONFIG_MOUSE_PS2_SYNAPTICS is not set +# CONFIG_MOUSE_PS2_CYPRESS is not set # CONFIG_MOUSE_PS2_TRACKPOINT is not set CONFIG_MOUSE_PS2_TOUCHKIT=y -# CONFIG_SERIO_I8042 is not set # CONFIG_SERIO_SERPORT is not set CONFIG_SERIO_ARC_PS2=y # CONFIG_LEGACY_PTYS is not set # CONFIG_DEVKMEM is not set CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y -CONFIG_SERIAL_8250_DW=y +CONFIG_SERIAL_8250_NR_UARTS=1 +CONFIG_SERIAL_8250_RUNTIME_UARTS=1 CONFIG_SERIAL_OF_PLATFORM=y -CONFIG_SERIAL_ARC=y -CONFIG_SERIAL_ARC_CONSOLE=y # CONFIG_HW_RANDOM is not set # CONFIG_HWMON is not set CONFIG_FB=y @@ -72,4 +72,3 @@ CONFIG_TMPFS=y CONFIG_NFS_FS=y # CONFIG_ENABLE_WARN_DEPRECATED is not set # CONFIG_ENABLE_MUST_CHECK is not set -CONFIG_XZ_DEC=y From 1425d5e72c8c41300d66a24b81ad911cb1239e97 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Thu, 27 Mar 2014 11:59:02 +0530 Subject: [PATCH 03/15] ARC: Fix WRITE_BCR * There was obvious bit rot due to lack of use * Old naming was confusing since BCR are read only Signed-off-by: Vineet Gupta --- arch/arc/include/asm/arcregs.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arc/include/asm/arcregs.h b/arch/arc/include/asm/arcregs.h index be33db8a2ee3..221380a73b0a 100644 --- a/arch/arc/include/asm/arcregs.h +++ b/arch/arc/include/asm/arcregs.h @@ -173,11 +173,11 @@ } \ } -#define WRITE_BCR(reg, into) \ +#define WRITE_AUX(reg, into) \ { \ unsigned int tmp; \ if (sizeof(tmp) == sizeof(into)) { \ - tmp = (*(unsigned int *)(into)); \ + tmp = (*(unsigned int *)&(into)); \ write_aux_reg(reg, tmp); \ } else { \ extern void bogus_undefined(void); \ From dc9e234f91c77a98a8911bf02619275f51b14bfc Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Mon, 22 Sep 2014 17:54:45 +0530 Subject: [PATCH 04/15] ARC: cosmetic: Remove unused ECR bitfield masks Signed-off-by: Vineet Gupta --- arch/arc/include/asm/arcregs.h | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/arch/arc/include/asm/arcregs.h b/arch/arc/include/asm/arcregs.h index 221380a73b0a..fe2c3cf82281 100644 --- a/arch/arc/include/asm/arcregs.h +++ b/arch/arc/include/asm/arcregs.h @@ -50,11 +50,7 @@ * [15: 8] = Exception Cause Code * [ 7: 0] = Exception Parameters (for certain types only) */ -#define ECR_VEC_MASK 0xff0000 -#define ECR_CODE_MASK 0x00ff00 -#define ECR_PARAM_MASK 0x0000ff - -/* Exception Cause Vector Values */ +#define ECR_V_MEM_ERR 0x01 #define ECR_V_INSN_ERR 0x02 #define ECR_V_MACH_CHK 0x20 #define ECR_V_ITLB_MISS 0x21 @@ -62,7 +58,8 @@ #define ECR_V_PROTV 0x23 #define ECR_V_TRAP 0x25 -/* Protection Violation Exception Cause Code Values */ +/* DTLB Miss and Protection Violation Cause Codes */ + #define ECR_C_PROTV_INST_FETCH 0x00 #define ECR_C_PROTV_LOAD 0x01 #define ECR_C_PROTV_STORE 0x02 From 0dfb8ec70fd67be02096eaf9898feb94950d6f06 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Mon, 13 Oct 2014 15:29:50 +0530 Subject: [PATCH 05/15] ARC: rename unhandled exception handler Signed-off-by: Vineet Gupta --- arch/arc/kernel/traps.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arc/kernel/traps.c b/arch/arc/kernel/traps.c index 3eadfdabc322..c927aa84e652 100644 --- a/arch/arc/kernel/traps.c +++ b/arch/arc/kernel/traps.c @@ -42,7 +42,7 @@ void die(const char *str, struct pt_regs *regs, unsigned long address) * -for kernel, chk if due to copy_(to|from)_user, otherwise die() */ static noinline int -handle_exception(const char *str, struct pt_regs *regs, siginfo_t *info) +unhandled_exception(const char *str, struct pt_regs *regs, siginfo_t *info) { if (user_mode(regs)) { struct task_struct *tsk = current; @@ -71,7 +71,7 @@ int name(unsigned long address, struct pt_regs *regs) \ .si_code = sicode, \ .si_addr = (void __user *)address, \ }; \ - return handle_exception(str, regs, &info);\ + return unhandled_exception(str, regs, &info);\ } /* From de60c1a1849c57e864f02f0d921993982b1648f8 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Fri, 7 Nov 2014 19:19:37 +0530 Subject: [PATCH 06/15] ARC: fold __builtin_constant_p() into test_bit() This makes test_bit() more like its siblings *_bit() routines. Also add some comments about the constant @nr micro-optimization Signed-off-by: Vineet Gupta --- arch/arc/include/asm/bitops.h | 31 ++++++++++++++++++------------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/arch/arc/include/asm/bitops.h b/arch/arc/include/asm/bitops.h index 1a5bf07eefe2..4051e9525939 100644 --- a/arch/arc/include/asm/bitops.h +++ b/arch/arc/include/asm/bitops.h @@ -32,6 +32,20 @@ static inline void set_bit(unsigned long nr, volatile unsigned long *m) m += nr >> 5; + /* + * ARC ISA micro-optimization: + * + * Instructions dealing with bitpos only consider lower 5 bits (0-31) + * e.g (x << 33) is handled like (x << 1) by ASL instruction + * (mem pointer still needs adjustment to point to next word) + * + * Hence the masking to clamp @nr arg can be elided in general. + * + * However if @nr is a constant (above assumed it in a register), + * and greater than 31, gcc can optimize away (x << 33) to 0, + * as overflow, given the 32-bit ISA. Thus masking needs to be done + * for constant @nr, but no code is generated due to const prop. + */ if (__builtin_constant_p(nr)) nr &= 0x1f; @@ -374,29 +388,20 @@ __test_and_change_bit(unsigned long nr, volatile unsigned long *m) * This routine doesn't need to be atomic. */ static inline int -__constant_test_bit(unsigned int nr, const volatile unsigned long *addr) -{ - return ((1UL << (nr & 31)) & - (((const volatile unsigned int *)addr)[nr >> 5])) != 0; -} - -static inline int -__test_bit(unsigned int nr, const volatile unsigned long *addr) +test_bit(unsigned int nr, const volatile unsigned long *addr) { unsigned long mask; addr += nr >> 5; - /* ARC700 only considers 5 bits in bit-fiddling insn */ + if (__builtin_constant_p(nr)) + nr &= 0x1f; + mask = 1 << nr; return ((mask & *addr) != 0); } -#define test_bit(nr, addr) (__builtin_constant_p(nr) ? \ - __constant_test_bit((nr), (addr)) : \ - __test_bit((nr), (addr))) - /* * Count the number of zeros, starting from MSB * Helper for fls( ) friends From a44ec8bd2a55c7644d458d8e79d83bd9204e1796 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Sun, 8 Mar 2015 14:18:21 +0530 Subject: [PATCH 07/15] ARC: Fix RTT boot printing Signed-off-by: Vineet Gupta --- arch/arc/include/asm/arcregs.h | 1 + arch/arc/kernel/setup.c | 5 ++++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/arc/include/asm/arcregs.h b/arch/arc/include/asm/arcregs.h index fe2c3cf82281..e2b1b1211b0d 100644 --- a/arch/arc/include/asm/arcregs.h +++ b/arch/arc/include/asm/arcregs.h @@ -30,6 +30,7 @@ #define ARC_REG_D_UNCACH_BCR 0x6A #define ARC_REG_BPU_BCR 0xc0 #define ARC_REG_ISA_CFG_BCR 0xc1 +#define ARC_REG_RTT_BCR 0xF2 #define ARC_REG_SMART_BCR 0xFF /* status32 Bits Positions */ diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c index 900f68a70088..1d167c6df8ca 100644 --- a/arch/arc/kernel/setup.c +++ b/arch/arc/kernel/setup.c @@ -120,7 +120,10 @@ static void read_arc_build_cfg_regs(void) READ_BCR(ARC_REG_SMART_BCR, bcr); cpu->extn.smart = bcr.ver ? 1 : 0; - cpu->extn.debug = cpu->extn.ap | cpu->extn.smart; + READ_BCR(ARC_REG_RTT_BCR, bcr); + cpu->extn.rtt = bcr.ver ? 1 : 0; + + cpu->extn.debug = cpu->extn.ap | cpu->extn.smart | cpu->extn.rtt; } static const struct cpuinfo_data arc_cpu_tbl[] = { From f2e2013f757204d4e0a009597722c75bb2332796 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Fri, 13 Feb 2015 13:59:53 +0530 Subject: [PATCH 08/15] ARC: mem init spring cleaning - No functional changes Signed-off-by: Vineet Gupta --- arch/arc/mm/init.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/arch/arc/mm/init.c b/arch/arc/mm/init.c index 523412369f70..d44eedd8c322 100644 --- a/arch/arc/mm/init.c +++ b/arch/arc/mm/init.c @@ -71,7 +71,7 @@ early_param("initrd", early_initrd); */ void __init setup_arch_memory(void) { - unsigned long zones_size[MAX_NR_ZONES] = { 0, 0 }; + unsigned long zones_size[MAX_NR_ZONES]; unsigned long end_mem = CONFIG_LINUX_LINK_BASE + arc_mem_sz; init_mm.start_code = (unsigned long)_text; @@ -90,7 +90,7 @@ void __init setup_arch_memory(void) /*------------- externs in mm need setting up ---------------*/ /* first page of system - kernel .vector starts here */ - min_low_pfn = PFN_DOWN(CONFIG_LINUX_LINK_BASE); + min_low_pfn = ARCH_PFN_OFFSET; /* Last usable page of low mem (no HIGHMEM yet for ARC port) */ max_low_pfn = max_pfn = PFN_DOWN(end_mem); @@ -111,7 +111,7 @@ void __init setup_arch_memory(void) /*-------------- node setup --------------------------------*/ memset(zones_size, 0, sizeof(zones_size)); - zones_size[ZONE_NORMAL] = max_low_pfn - min_low_pfn; + zones_size[ZONE_NORMAL] = max_mapnr; /* * We can't use the helper free_area_init(zones[]) because it uses @@ -123,6 +123,8 @@ void __init setup_arch_memory(void) zones_size, /* num pages per zone */ min_low_pfn, /* first pfn of node */ NULL); /* NO holes */ + + high_memory = (void *)end_mem; } /* @@ -133,7 +135,6 @@ void __init setup_arch_memory(void) */ void __init mem_init(void) { - high_memory = (void *)(CONFIG_LINUX_LINK_BASE + arc_mem_sz); free_all_bootmem(); mem_init_print_info(NULL); } From 03c94fcf954d6bc5e23460e200d23a2c0fe5cd2e Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Mon, 17 Nov 2014 17:13:03 +0530 Subject: [PATCH 09/15] ARC: perf: make @arc_pmu static global Signed-off-by: Vineet Gupta --- arch/arc/kernel/perf_event.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/arch/arc/kernel/perf_event.c b/arch/arc/kernel/perf_event.c index ae1c485cbc68..64261c2711b1 100644 --- a/arch/arc/kernel/perf_event.c +++ b/arch/arc/kernel/perf_event.c @@ -25,6 +25,8 @@ struct arc_pmu { int ev_hw_idx[PERF_COUNT_ARC_HW_MAX]; }; +static struct arc_pmu *arc_pmu; + /* read counter #idx; note that counter# != event# on ARC! */ static uint64_t arc_pmu_read_counter(int idx) { @@ -47,7 +49,6 @@ static uint64_t arc_pmu_read_counter(int idx) static void arc_perf_event_update(struct perf_event *event, struct hw_perf_event *hwc, int idx) { - struct arc_pmu *arc_pmu = container_of(event->pmu, struct arc_pmu, pmu); uint64_t prev_raw_count, new_raw_count; int64_t delta; @@ -95,7 +96,6 @@ static int arc_pmu_cache_event(u64 config) /* initializes hw_perf_event structure if event is supported */ static int arc_pmu_event_init(struct perf_event *event) { - struct arc_pmu *arc_pmu = container_of(event->pmu, struct arc_pmu, pmu); struct hw_perf_event *hwc = &event->hw; int ret; @@ -183,8 +183,6 @@ static void arc_pmu_stop(struct perf_event *event, int flags) static void arc_pmu_del(struct perf_event *event, int flags) { - struct arc_pmu *arc_pmu = container_of(event->pmu, struct arc_pmu, pmu); - arc_pmu_stop(event, PERF_EF_UPDATE); __clear_bit(event->hw.idx, arc_pmu->used_mask); @@ -194,7 +192,6 @@ static void arc_pmu_del(struct perf_event *event, int flags) /* allocate hardware counter and optionally start counting */ static int arc_pmu_add(struct perf_event *event, int flags) { - struct arc_pmu *arc_pmu = container_of(event->pmu, struct arc_pmu, pmu); struct hw_perf_event *hwc = &event->hw; int idx = hwc->idx; From bde80c237e49983e2b26dfa9925325a070b71de7 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Wed, 15 Apr 2015 19:44:07 +0530 Subject: [PATCH 10/15] ARC: perf: Add some comments/debug stuff Signed-off-by: Vineet Gupta --- arch/arc/include/asm/perf_event.h | 53 ++++++++++++++----------------- arch/arc/kernel/perf_event.c | 18 ++++++++--- 2 files changed, 37 insertions(+), 34 deletions(-) diff --git a/arch/arc/include/asm/perf_event.h b/arch/arc/include/asm/perf_event.h index cbf755e32a03..1c45667c1367 100644 --- a/arch/arc/include/asm/perf_event.h +++ b/arch/arc/include/asm/perf_event.h @@ -57,26 +57,7 @@ struct arc_reg_cc_build { #define PERF_COUNT_ARC_HW_MAX (PERF_COUNT_HW_MAX + 6) /* - * The "generalized" performance events seem to really be a copy - * of the available events on x86 processors; the mapping to ARC - * events is not always possible 1-to-1. Fortunately, there doesn't - * seem to be an exact definition for these events, so we can cheat - * a bit where necessary. - * - * In particular, the following PERF events may behave a bit differently - * compared to other architectures: - * - * PERF_COUNT_HW_CPU_CYCLES - * Cycles not in halted state - * - * PERF_COUNT_HW_REF_CPU_CYCLES - * Reference cycles not in halted state, same as PERF_COUNT_HW_CPU_CYCLES - * for now as we don't do Dynamic Voltage/Frequency Scaling (yet) - * - * PERF_COUNT_HW_BUS_CYCLES - * Unclear what this means, Intel uses 0x013c, which according to - * their datasheet means "unhalted reference cycles". It sounds similar - * to PERF_COUNT_HW_REF_CPU_CYCLES, and we use the same counter for it. + * Some ARC pct quirks: * * PERF_COUNT_HW_STALLED_CYCLES_BACKEND * PERF_COUNT_HW_STALLED_CYCLES_FRONTEND @@ -91,21 +72,35 @@ struct arc_reg_cc_build { * Note that I$ cache misses aren't counted by either of the two! */ +/* + * ARC PCT has hardware conditions with fixed "names" but variable "indexes" + * (based on a specific RTL build) + * Below is the static map between perf generic/arc specific event_id and + * h/w condition names. + * At the time of probe, we loop thru each index and find it's name to + * complete the mapping of perf event_id to h/w index as latter is needed + * to program the counter really + */ static const char * const arc_pmu_ev_hw_map[] = { + /* count cycles */ [PERF_COUNT_HW_CPU_CYCLES] = "crun", [PERF_COUNT_HW_REF_CPU_CYCLES] = "crun", [PERF_COUNT_HW_BUS_CYCLES] = "crun", - [PERF_COUNT_HW_INSTRUCTIONS] = "iall", - [PERF_COUNT_HW_BRANCH_MISSES] = "bpfail", - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = "ijmp", + [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = "bflush", [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = "bstall", - [PERF_COUNT_ARC_DCLM] = "dclm", - [PERF_COUNT_ARC_DCSM] = "dcsm", - [PERF_COUNT_ARC_ICM] = "icm", - [PERF_COUNT_ARC_BPOK] = "bpok", - [PERF_COUNT_ARC_EDTLB] = "edtlb", - [PERF_COUNT_ARC_EITLB] = "eitlb", + + /* counts condition */ + [PERF_COUNT_HW_INSTRUCTIONS] = "iall", + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = "ijmp", + [PERF_COUNT_ARC_BPOK] = "bpok", /* NP-NT, PT-T, PNT-NT */ + [PERF_COUNT_HW_BRANCH_MISSES] = "bpfail", /* NP-T, PT-NT, PNT-T */ + + [PERF_COUNT_ARC_DCLM] = "dclm", /* D-cache Load Miss */ + [PERF_COUNT_ARC_DCSM] = "dcsm", /* D-cache Store Miss */ + [PERF_COUNT_ARC_ICM] = "icm", /* I-cache Miss */ + [PERF_COUNT_ARC_EDTLB] = "edtlb", /* D-TLB Miss */ + [PERF_COUNT_ARC_EITLB] = "eitlb", /* I-TLB Miss */ }; #define C(_x) PERF_COUNT_HW_CACHE_##_x diff --git a/arch/arc/kernel/perf_event.c b/arch/arc/kernel/perf_event.c index 64261c2711b1..181baeed4495 100644 --- a/arch/arc/kernel/perf_event.c +++ b/arch/arc/kernel/perf_event.c @@ -90,6 +90,10 @@ static int arc_pmu_cache_event(u64 config) if (ret == CACHE_OP_UNSUPPORTED) return -ENOENT; + pr_debug("init cache event: type/op/result %d/%d/%d with h/w %d \'%s\'\n", + cache_type, cache_op, cache_result, ret, + arc_pmu_ev_hw_map[ret]); + return ret; } @@ -106,8 +110,9 @@ static int arc_pmu_event_init(struct perf_event *event) if (arc_pmu->ev_hw_idx[event->attr.config] < 0) return -ENOENT; hwc->config = arc_pmu->ev_hw_idx[event->attr.config]; - pr_debug("initializing event %d with cfg %d\n", - (int) event->attr.config, (int) hwc->config); + pr_debug("init event %d with h/w %d \'%s\'\n", + (int) event->attr.config, (int) hwc->config, + arc_pmu_ev_hw_map[event->attr.config]); return 0; case PERF_TYPE_HW_CACHE: ret = arc_pmu_cache_event(event->attr.config); @@ -260,19 +265,22 @@ static int arc_pmu_device_probe(struct platform_device *pdev) arc_pmu->n_counters, arc_pmu->counter_size, cc_bcr.c); cc_name.str[8] = 0; - for (i = 0; i < PERF_COUNT_HW_MAX; i++) + for (i = 0; i < PERF_COUNT_ARC_HW_MAX; i++) arc_pmu->ev_hw_idx[i] = -1; + /* loop thru all available h/w condition indexes */ for (j = 0; j < cc_bcr.c; j++) { write_aux_reg(ARC_REG_CC_INDEX, j); cc_name.indiv.word0 = read_aux_reg(ARC_REG_CC_NAME0); cc_name.indiv.word1 = read_aux_reg(ARC_REG_CC_NAME1); + + /* See if it has been mapped to a perf event_id */ for (i = 0; i < ARRAY_SIZE(arc_pmu_ev_hw_map); i++) { if (arc_pmu_ev_hw_map[i] && !strcmp(arc_pmu_ev_hw_map[i], cc_name.str) && strlen(arc_pmu_ev_hw_map[i])) { - pr_debug("mapping %d to idx %d with name %s\n", - i, j, cc_name.str); + pr_debug("mapping perf event %2d to h/w event \'%8s\' (idx %d)\n", + i, cc_name.str, j); arc_pmu->ev_hw_idx[i] = j; } } From 0a8a47679351f00145ddeaa0a05b510e67b780be Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Wed, 7 Jan 2015 13:14:07 +0530 Subject: [PATCH 11/15] ARC: perf: support cache hit/miss ratio Signed-off-by: Vineet Gupta --- arch/arc/include/asm/perf_event.h | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/arch/arc/include/asm/perf_event.h b/arch/arc/include/asm/perf_event.h index 1c45667c1367..2b8880e953a2 100644 --- a/arch/arc/include/asm/perf_event.h +++ b/arch/arc/include/asm/perf_event.h @@ -54,7 +54,10 @@ struct arc_reg_cc_build { #define PERF_COUNT_ARC_BPOK (PERF_COUNT_HW_MAX + 3) #define PERF_COUNT_ARC_EDTLB (PERF_COUNT_HW_MAX + 4) #define PERF_COUNT_ARC_EITLB (PERF_COUNT_HW_MAX + 5) -#define PERF_COUNT_ARC_HW_MAX (PERF_COUNT_HW_MAX + 6) +#define PERF_COUNT_ARC_LDC (PERF_COUNT_HW_MAX + 6) +#define PERF_COUNT_ARC_STC (PERF_COUNT_HW_MAX + 7) + +#define PERF_COUNT_ARC_HW_MAX (PERF_COUNT_HW_MAX + 8) /* * Some ARC pct quirks: @@ -96,6 +99,9 @@ static const char * const arc_pmu_ev_hw_map[] = { [PERF_COUNT_ARC_BPOK] = "bpok", /* NP-NT, PT-T, PNT-NT */ [PERF_COUNT_HW_BRANCH_MISSES] = "bpfail", /* NP-T, PT-NT, PNT-T */ + [PERF_COUNT_ARC_LDC] = "imemrdc", /* Instr: mem read cached */ + [PERF_COUNT_ARC_STC] = "imemwrc", /* Instr: mem write cached */ + [PERF_COUNT_ARC_DCLM] = "dclm", /* D-cache Load Miss */ [PERF_COUNT_ARC_DCSM] = "dcsm", /* D-cache Store Miss */ [PERF_COUNT_ARC_ICM] = "icm", /* I-cache Miss */ @@ -109,11 +115,11 @@ static const char * const arc_pmu_ev_hw_map[] = { static const unsigned arc_pmu_cache_map[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { [C(L1D)] = { [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_ACCESS)] = PERF_COUNT_ARC_LDC, [C(RESULT_MISS)] = PERF_COUNT_ARC_DCLM, }, [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_ACCESS)] = PERF_COUNT_ARC_STC, [C(RESULT_MISS)] = PERF_COUNT_ARC_DCSM, }, [C(OP_PREFETCH)] = { @@ -123,7 +129,7 @@ static const unsigned arc_pmu_cache_map[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { }, [C(L1I)] = { [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_ACCESS)] = PERF_COUNT_HW_INSTRUCTIONS, [C(RESULT_MISS)] = PERF_COUNT_ARC_ICM, }, [C(OP_WRITE)] = { @@ -151,9 +157,10 @@ static const unsigned arc_pmu_cache_map[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { }, [C(DTLB)] = { [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_ACCESS)] = PERF_COUNT_ARC_LDC, [C(RESULT_MISS)] = PERF_COUNT_ARC_EDTLB, }, + /* DTLB LD/ST Miss not segregated by h/w*/ [C(OP_WRITE)] = { [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, From 389e3160b9b0002f8e7c95e9c0af5da6da311892 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Tue, 12 Nov 2013 11:00:03 +0100 Subject: [PATCH 12/15] ARC: perf: Add kernel callchain support Signed-off-by: Mischa Jonker Signed-off-by: Vineet Gupta --- arch/arc/kernel/perf_event.c | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/arch/arc/kernel/perf_event.c b/arch/arc/kernel/perf_event.c index 181baeed4495..a6ad1e09e4be 100644 --- a/arch/arc/kernel/perf_event.c +++ b/arch/arc/kernel/perf_event.c @@ -16,6 +16,7 @@ #include #include #include +#include struct arc_pmu { struct pmu pmu; @@ -25,6 +26,34 @@ struct arc_pmu { int ev_hw_idx[PERF_COUNT_ARC_HW_MAX]; }; +struct arc_callchain_trace { + int depth; + void *perf_stuff; +}; + +static int callchain_trace(unsigned int addr, void *data) +{ + struct arc_callchain_trace *ctrl = data; + struct perf_callchain_entry *entry = ctrl->perf_stuff; + perf_callchain_store(entry, addr); + + if (ctrl->depth++ < 3) + return 0; + + return -1; +} + +void +perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs) +{ + struct arc_callchain_trace ctrl = { + .depth = 0, + .perf_stuff = entry, + }; + + arc_unwind_core(NULL, regs, callchain_trace, &ctrl); +} + static struct arc_pmu *arc_pmu; /* read counter #idx; note that counter# != event# on ARC! */ From 22f6b899125063c5dc955dd378c408b14d80b020 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Fri, 12 Jul 2013 15:55:54 +0200 Subject: [PATCH 13/15] ARC: perf: add user space attribution in callchains The actual user space unwinding is more involved, so simply capture the user space PC Signed-off-by: Vineet Gupta --- arch/arc/kernel/perf_event.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/arch/arc/kernel/perf_event.c b/arch/arc/kernel/perf_event.c index a6ad1e09e4be..109118a4245e 100644 --- a/arch/arc/kernel/perf_event.c +++ b/arch/arc/kernel/perf_event.c @@ -54,6 +54,16 @@ perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs) arc_unwind_core(NULL, regs, callchain_trace, &ctrl); } +void +perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs) +{ + /* + * User stack can't be unwound trivially with kernel dwarf unwinder + * So for now just record the user PC + */ + perf_callchain_store(entry, instruction_pointer(regs)); +} + static struct arc_pmu *arc_pmu; /* read counter #idx; note that counter# != event# on ARC! */ From 30fdd373f24cc50e250c71a6e2df89505e267804 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Wed, 15 Apr 2015 16:35:38 +0530 Subject: [PATCH 14/15] ARC: perf: Rename DT binding to not confuse with power mgmt Signed-off-by: Vineet Gupta --- Documentation/devicetree/bindings/arc/pct.txt | 20 ++++++++++++++++ Documentation/devicetree/bindings/arc/pmu.txt | 24 ------------------- arch/arc/boot/dts/angel4.dts | 2 +- arch/arc/kernel/perf_event.c | 4 ++-- 4 files changed, 23 insertions(+), 27 deletions(-) create mode 100644 Documentation/devicetree/bindings/arc/pct.txt delete mode 100644 Documentation/devicetree/bindings/arc/pmu.txt diff --git a/Documentation/devicetree/bindings/arc/pct.txt b/Documentation/devicetree/bindings/arc/pct.txt new file mode 100644 index 000000000000..7b9588444f20 --- /dev/null +++ b/Documentation/devicetree/bindings/arc/pct.txt @@ -0,0 +1,20 @@ +* ARC Performance Counters + +The ARC700 can be configured with a pipeline performance monitor for counting +CPU and cache events like cache misses and hits. Like conventional PCT there +are 100+ hardware conditions dynamically mapped to upto 32 counters + +Note that: + * The ARC 700 PCT does not support interrupts; although HW events may be + counted, the HW events themselves cannot serve as a trigger for a sample. + +Required properties: + +- compatible : should contain + "snps,arc700-pct" + +Example: + +pmu { + compatible = "snps,arc700-pct"; +}; diff --git a/Documentation/devicetree/bindings/arc/pmu.txt b/Documentation/devicetree/bindings/arc/pmu.txt deleted file mode 100644 index 49d517340de3..000000000000 --- a/Documentation/devicetree/bindings/arc/pmu.txt +++ /dev/null @@ -1,24 +0,0 @@ -* ARC Performance Monitor Unit - -The ARC 700 can be configured with a pipeline performance monitor for counting -CPU and cache events like cache misses and hits. - -Note that: - * ARC 700 refers to a family of ARC processor cores; - - There is only one type of PMU available for the whole family; - - The PMU may support different sets of events; supported events are probed - at boot time, as required by the reference manual. - - * The ARC 700 PMU does not support interrupts; although HW events may be - counted, the HW events themselves cannot serve as a trigger for a sample. - -Required properties: - -- compatible : should contain - "snps,arc700-pmu" - -Example: - -pmu { - compatible = "snps,arc700-pmu"; -}; diff --git a/arch/arc/boot/dts/angel4.dts b/arch/arc/boot/dts/angel4.dts index 757e0c62c4f9..3b076fbd8366 100644 --- a/arch/arc/boot/dts/angel4.dts +++ b/arch/arc/boot/dts/angel4.dts @@ -64,7 +64,7 @@ }; arcpmu0: pmu { - compatible = "snps,arc700-pmu"; + compatible = "snps,arc700-pct"; }; }; }; diff --git a/arch/arc/kernel/perf_event.c b/arch/arc/kernel/perf_event.c index 109118a4245e..165e0b604167 100644 --- a/arch/arc/kernel/perf_event.c +++ b/arch/arc/kernel/perf_event.c @@ -346,7 +346,7 @@ static int arc_pmu_device_probe(struct platform_device *pdev) #ifdef CONFIG_OF static const struct of_device_id arc_pmu_match[] = { - { .compatible = "snps,arc700-pmu" }, + { .compatible = "snps,arc700-pct" }, {}, }; MODULE_DEVICE_TABLE(of, arc_pmu_match); @@ -354,7 +354,7 @@ MODULE_DEVICE_TABLE(of, arc_pmu_match); static struct platform_driver arc_pmu_driver = { .driver = { - .name = "arc700-pmu", + .name = "arc700-pct", .of_match_table = of_match_ptr(arc_pmu_match), }, .probe = arc_pmu_device_probe, From d8f6ad85cbb740b7e8ca5275b12838fab685540c Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Mon, 20 Apr 2015 16:49:30 +0530 Subject: [PATCH 15/15] ARC: perf: don't add code for impossible case Signed-off-by: Vineet Gupta --- arch/arc/kernel/perf_event.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/arch/arc/kernel/perf_event.c b/arch/arc/kernel/perf_event.c index 165e0b604167..fd2ec50102f2 100644 --- a/arch/arc/kernel/perf_event.c +++ b/arch/arc/kernel/perf_event.c @@ -288,10 +288,7 @@ static int arc_pmu_device_probe(struct platform_device *pdev) BUG_ON(pct_bcr.c > ARC_PMU_MAX_HWEVENTS); READ_BCR(ARC_REG_CC_BUILD, cc_bcr); - if (!cc_bcr.v) { - pr_err("Performance counters exist, but no countable conditions?\n"); - return -ENODEV; - } + BUG_ON(!cc_bcr.v); /* Counters exist but No countable conditions ? */ arc_pmu = devm_kzalloc(&pdev->dev, sizeof(struct arc_pmu), GFP_KERNEL); if (!arc_pmu)