powerpc/perf: Add data source encodings for power10 platform
The code represent memory/cache level data based on PERF_MEM_LVL_* namespace, which is in the process of deprication in the favour of newer composite PERF_MEM_{LVLNUM_,REMOTE_,SNOOPX_,HOPS_} fields. Add data source encodings to represent cache/memory data based on newer composite PERF_MEM_{LVLNUM_,REMOTE_,SNOOPX_,HOPS_} fields. Add data source encodings to represent data coming from local memory/Remote memory/distant memory and remote/distant cache hits. In order to represent data coming from OpenCAPI cache/memory, we use LVLNUM "PMEM" field which is used to present persistent memory accesses. Result in power10 system with patch changes: localhost:# ./perf mem report --sort="mem,sym,dso" --stdio # Overhead Samples Memory access Symbol Shared Object # ........ ............ ........................ .......................... ................ # 29.46% 2331 L1 or L1 hit [.] __random libc-2.28.so 23.11% 2121 L1 or L1 hit [.] producer_populate_cache producer_consumer 18.56% 1758 L1 or L1 hit [.] __random_r libc-2.28.so 15.64% 1559 L2 or L2 hit [.] __random libc-2.28.so ..... 0.09% 5 Remote socket, same board Any cache hit [.] __random libc-2.28.so 0.07% 4 Remote socket, same board Any cache hit [.] __random libc-2.28.so ..... Signed-off-by: Kajol Jain <kjain@linux.ibm.com> Reviewed-by: Madhavan Srinivasan <maddy@linux.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20211206091749.87585-5-kjain@linux.ibm.com
This commit is contained in:
Родитель
4a20ee1061
Коммит
6ed05a8efd
|
@ -229,13 +229,28 @@ static inline u64 isa207_find_source(u64 idx, u32 sub_idx)
|
|||
ret = PH(LVL, L3) | LEVEL(L3) | P(SNOOP, HIT);
|
||||
break;
|
||||
case 4:
|
||||
if (sub_idx <= 1)
|
||||
ret = PH(LVL, LOC_RAM);
|
||||
else if (sub_idx > 1 && sub_idx <= 2)
|
||||
ret = PH(LVL, REM_RAM1);
|
||||
else
|
||||
ret = PH(LVL, REM_RAM2);
|
||||
ret |= P(SNOOP, HIT);
|
||||
if (cpu_has_feature(CPU_FTR_ARCH_31)) {
|
||||
ret = P(SNOOP, HIT);
|
||||
|
||||
if (sub_idx == 1)
|
||||
ret |= PH(LVL, LOC_RAM) | LEVEL(RAM);
|
||||
else if (sub_idx == 2 || sub_idx == 3)
|
||||
ret |= P(LVL, HIT) | LEVEL(PMEM);
|
||||
else if (sub_idx == 4)
|
||||
ret |= PH(LVL, REM_RAM1) | REM | LEVEL(RAM) | P(HOPS, 2);
|
||||
else if (sub_idx == 5 || sub_idx == 7)
|
||||
ret |= P(LVL, HIT) | LEVEL(PMEM) | REM;
|
||||
else if (sub_idx == 6)
|
||||
ret |= PH(LVL, REM_RAM2) | REM | LEVEL(RAM) | P(HOPS, 3);
|
||||
} else {
|
||||
if (sub_idx <= 1)
|
||||
ret = PH(LVL, LOC_RAM);
|
||||
else if (sub_idx > 1 && sub_idx <= 2)
|
||||
ret = PH(LVL, REM_RAM1);
|
||||
else
|
||||
ret = PH(LVL, REM_RAM2);
|
||||
ret |= P(SNOOP, HIT);
|
||||
}
|
||||
break;
|
||||
case 5:
|
||||
if (cpu_has_feature(CPU_FTR_ARCH_31)) {
|
||||
|
@ -261,11 +276,26 @@ static inline u64 isa207_find_source(u64 idx, u32 sub_idx)
|
|||
}
|
||||
break;
|
||||
case 6:
|
||||
ret = PH(LVL, REM_CCE2);
|
||||
if ((sub_idx == 0) || (sub_idx == 2))
|
||||
ret |= P(SNOOP, HIT);
|
||||
else if ((sub_idx == 1) || (sub_idx == 3))
|
||||
ret |= P(SNOOP, HITM);
|
||||
if (cpu_has_feature(CPU_FTR_ARCH_31)) {
|
||||
if (sub_idx == 0)
|
||||
ret = PH(LVL, REM_CCE1) | LEVEL(ANY_CACHE) | REM |
|
||||
P(SNOOP, HIT) | P(HOPS, 2);
|
||||
else if (sub_idx == 1)
|
||||
ret = PH(LVL, REM_CCE1) | LEVEL(ANY_CACHE) | REM |
|
||||
P(SNOOP, HITM) | P(HOPS, 2);
|
||||
else if (sub_idx == 2)
|
||||
ret = PH(LVL, REM_CCE2) | LEVEL(ANY_CACHE) | REM |
|
||||
P(SNOOP, HIT) | P(HOPS, 3);
|
||||
else if (sub_idx == 3)
|
||||
ret = PH(LVL, REM_CCE2) | LEVEL(ANY_CACHE) | REM |
|
||||
P(SNOOP, HITM) | P(HOPS, 3);
|
||||
} else {
|
||||
ret = PH(LVL, REM_CCE2);
|
||||
if (sub_idx == 0 || sub_idx == 2)
|
||||
ret |= P(SNOOP, HIT);
|
||||
else if (sub_idx == 1 || sub_idx == 3)
|
||||
ret |= P(SNOOP, HITM);
|
||||
}
|
||||
break;
|
||||
case 7:
|
||||
ret = PM(LVL, L1);
|
||||
|
|
Загрузка…
Ссылка в новой задаче