perf/x86: Add support for perf text poke event for text_poke_bp_batch() callers
Add support for perf text poke event for text_poke_bp_batch() callers. That includes jump labels. See comments for more details. Signed-off-by: Adrian Hunter <adrian.hunter@intel.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org> Link: https://lkml.kernel.org/r/20200512121922.8997-3-adrian.hunter@intel.com
This commit is contained in:
Родитель
e17d43b93e
Коммит
d769811ca9
|
@ -3,6 +3,7 @@
|
|||
|
||||
#include <linux/module.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/perf_event.h>
|
||||
#include <linux/mutex.h>
|
||||
#include <linux/list.h>
|
||||
#include <linux/stringify.h>
|
||||
|
@ -1001,6 +1002,7 @@ struct text_poke_loc {
|
|||
s32 rel32;
|
||||
u8 opcode;
|
||||
const u8 text[POKE_MAX_OPCODE_SIZE];
|
||||
u8 old;
|
||||
};
|
||||
|
||||
struct bp_patching_desc {
|
||||
|
@ -1168,8 +1170,10 @@ static void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries
|
|||
/*
|
||||
* First step: add a int3 trap to the address that will be patched.
|
||||
*/
|
||||
for (i = 0; i < nr_entries; i++)
|
||||
for (i = 0; i < nr_entries; i++) {
|
||||
tp[i].old = *(u8 *)text_poke_addr(&tp[i]);
|
||||
text_poke(text_poke_addr(&tp[i]), &int3, INT3_INSN_SIZE);
|
||||
}
|
||||
|
||||
text_poke_sync();
|
||||
|
||||
|
@ -1177,14 +1181,45 @@ static void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries
|
|||
* Second step: update all but the first byte of the patched range.
|
||||
*/
|
||||
for (do_sync = 0, i = 0; i < nr_entries; i++) {
|
||||
u8 old[POKE_MAX_OPCODE_SIZE] = { tp[i].old, };
|
||||
int len = text_opcode_size(tp[i].opcode);
|
||||
|
||||
if (len - INT3_INSN_SIZE > 0) {
|
||||
memcpy(old + INT3_INSN_SIZE,
|
||||
text_poke_addr(&tp[i]) + INT3_INSN_SIZE,
|
||||
len - INT3_INSN_SIZE);
|
||||
text_poke(text_poke_addr(&tp[i]) + INT3_INSN_SIZE,
|
||||
(const char *)tp[i].text + INT3_INSN_SIZE,
|
||||
len - INT3_INSN_SIZE);
|
||||
do_sync++;
|
||||
}
|
||||
|
||||
/*
|
||||
* Emit a perf event to record the text poke, primarily to
|
||||
* support Intel PT decoding which must walk the executable code
|
||||
* to reconstruct the trace. The flow up to here is:
|
||||
* - write INT3 byte
|
||||
* - IPI-SYNC
|
||||
* - write instruction tail
|
||||
* At this point the actual control flow will be through the
|
||||
* INT3 and handler and not hit the old or new instruction.
|
||||
* Intel PT outputs FUP/TIP packets for the INT3, so the flow
|
||||
* can still be decoded. Subsequently:
|
||||
* - emit RECORD_TEXT_POKE with the new instruction
|
||||
* - IPI-SYNC
|
||||
* - write first byte
|
||||
* - IPI-SYNC
|
||||
* So before the text poke event timestamp, the decoder will see
|
||||
* either the old instruction flow or FUP/TIP of INT3. After the
|
||||
* text poke event timestamp, the decoder will see either the
|
||||
* new instruction flow or FUP/TIP of INT3. Thus decoders can
|
||||
* use the timestamp as the point at which to modify the
|
||||
* executable code.
|
||||
* The old instruction is recorded so that the event can be
|
||||
* processed forwards or backwards.
|
||||
*/
|
||||
perf_event_text_poke(text_poke_addr(&tp[i]), old, len,
|
||||
tp[i].text, len);
|
||||
}
|
||||
|
||||
if (do_sync) {
|
||||
|
|
Загрузка…
Ссылка в новой задаче