- Rename the staging files to give them some meaning.
   Just stage1,stag2,etc, does not show what they are for
 
 - Check for NULL from allocation in bootconfig
 
 - Hold event mutex for dyn_event call in user events
 
 - Mark user events to broken (to work on the API)
 
 - Remove eBPF updates from user events
 
 - Remove user events from uapi header to keep it from being installed.
 
 - Move ftrace_graph_is_dead() into inline as it is called from hot paths
   and also convert it into a static branch.
 -----BEGIN PGP SIGNATURE-----
 
 iIoEABYIADIWIQRRSw7ePDh/lE+zeZMp5XQQmuv6qgUCYkmyIBQccm9zdGVkdEBn
 b29kbWlzLm9yZwAKCRAp5XQQmuv6qutfAQD90gbUgFMFe2akF5sKhonF5T6mm0+w
 BsWqNlBEKBxmfwD+Krfpxql/PKp/gCufcIUUkYC4E6Wl9akf3eO1qQel1Ao=
 =ZTn1
 -----END PGP SIGNATURE-----

Merge tag 'trace-v5.18-2' of git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-trace

Pull more tracing updates from Steven Rostedt:

 - Rename the staging files to give them some meaning. Just
   stage1,stag2,etc, does not show what they are for

 - Check for NULL from allocation in bootconfig

 - Hold event mutex for dyn_event call in user events

 - Mark user events to broken (to work on the API)

 - Remove eBPF updates from user events

 - Remove user events from uapi header to keep it from being installed.

 - Move ftrace_graph_is_dead() into inline as it is called from hot
   paths and also convert it into a static branch.

* tag 'trace-v5.18-2' of git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-trace:
  tracing: Move user_events.h temporarily out of include/uapi
  ftrace: Make ftrace_graph_is_dead() a static branch
  tracing: Set user_events to BROKEN
  tracing/user_events: Remove eBPF interfaces
  tracing/user_events: Hold event_mutex during dyn_event_add
  proc: bootconfig: Add null pointer check
  tracing: Rename the staging files for trace_events
This commit is contained in:
Linus Torvalds 2022-04-03 12:26:01 -07:00
Родитель 34a53ff911 5cfff569ca
Коммит 09bb8856d4
15 изменённых файлов: 44 добавлений и 167 удалений

Просмотреть файл

@ -7,7 +7,7 @@ user_events: User-based Event Tracing
Overview Overview
-------- --------
User based trace events allow user processes to create events and trace data User based trace events allow user processes to create events and trace data
that can be viewed via existing tools, such as ftrace, perf and eBPF. that can be viewed via existing tools, such as ftrace and perf.
To enable this feature, build your kernel with CONFIG_USER_EVENTS=y. To enable this feature, build your kernel with CONFIG_USER_EVENTS=y.
Programs can view status of the events via Programs can view status of the events via
@ -67,8 +67,7 @@ The command string format is as follows::
Supported Flags Supported Flags
^^^^^^^^^^^^^^^ ^^^^^^^^^^^^^^^
**BPF_ITER** - EBPF programs attached to this event will get the raw iovec None yet
struct instead of any data copies for max performance.
Field Format Field Format
^^^^^^^^^^^^ ^^^^^^^^^^^^
@ -160,7 +159,7 @@ The following values are defined to aid in checking what has been attached:
**EVENT_STATUS_FTRACE** - Bit set if ftrace has been attached (Bit 0). **EVENT_STATUS_FTRACE** - Bit set if ftrace has been attached (Bit 0).
**EVENT_STATUS_PERF** - Bit set if perf/eBPF has been attached (Bit 1). **EVENT_STATUS_PERF** - Bit set if perf has been attached (Bit 1).
Writing Data Writing Data
------------ ------------
@ -204,13 +203,6 @@ It's advised for user programs to do the following::
**NOTE:** *The write_index is not emitted out into the trace being recorded.* **NOTE:** *The write_index is not emitted out into the trace being recorded.*
EBPF
----
EBPF programs that attach to a user-based event tracepoint are given a pointer
to a struct user_bpf_context. The bpf context contains the data type (which can
be a user or kernel buffer, or can be a pointer to the iovec) and the data
length that was emitted (minus the write_index).
Example Code Example Code
------------ ------------
See sample code in samples/user_events. See sample code in samples/user_events.

Просмотреть файл

@ -32,6 +32,8 @@ static int __init copy_xbc_key_value_list(char *dst, size_t size)
int ret = 0; int ret = 0;
key = kzalloc(XBC_KEYLEN_MAX, GFP_KERNEL); key = kzalloc(XBC_KEYLEN_MAX, GFP_KERNEL);
if (!key)
return -ENOMEM;
xbc_for_each_key_value(leaf, val) { xbc_for_each_key_value(leaf, val) {
ret = xbc_node_compose_key(leaf, key, XBC_KEYLEN_MAX); ret = xbc_node_compose_key(leaf, key, XBC_KEYLEN_MAX);

Просмотреть файл

@ -9,6 +9,7 @@
#include <linux/trace_recursion.h> #include <linux/trace_recursion.h>
#include <linux/trace_clock.h> #include <linux/trace_clock.h>
#include <linux/jump_label.h>
#include <linux/kallsyms.h> #include <linux/kallsyms.h>
#include <linux/linkage.h> #include <linux/linkage.h>
#include <linux/bitops.h> #include <linux/bitops.h>
@ -1018,7 +1019,20 @@ unsigned long ftrace_graph_ret_addr(struct task_struct *task, int *idx,
extern int register_ftrace_graph(struct fgraph_ops *ops); extern int register_ftrace_graph(struct fgraph_ops *ops);
extern void unregister_ftrace_graph(struct fgraph_ops *ops); extern void unregister_ftrace_graph(struct fgraph_ops *ops);
extern bool ftrace_graph_is_dead(void); /**
* ftrace_graph_is_dead - returns true if ftrace_graph_stop() was called
*
* ftrace_graph_stop() is called when a severe error is detected in
* the function graph tracing. This function is called by the critical
* paths of function graph to keep those paths from doing any more harm.
*/
DECLARE_STATIC_KEY_FALSE(kill_ftrace_graph);
static inline bool ftrace_graph_is_dead(void)
{
return static_branch_unlikely(&kill_ftrace_graph);
}
extern void ftrace_graph_stop(void); extern void ftrace_graph_stop(void);
/* The current handlers in use */ /* The current handlers in use */

Просмотреть файл

@ -32,9 +32,6 @@
/* Create dynamic location entry within a 32-bit value */ /* Create dynamic location entry within a 32-bit value */
#define DYN_LOC(offset, size) ((size) << 16 | (offset)) #define DYN_LOC(offset, size) ((size) << 16 | (offset))
/* Use raw iterator for attached BPF program(s), no affect on ftrace/perf */
#define FLAG_BPF_ITER (1 << 0)
/* /*
* Describes an event registration and stores the results of the registration. * Describes an event registration and stores the results of the registration.
* This structure is passed to the DIAG_IOCSREG ioctl, callers at a minimum * This structure is passed to the DIAG_IOCSREG ioctl, callers at a minimum
@ -63,54 +60,4 @@ struct user_reg {
/* Requests to delete a user_event */ /* Requests to delete a user_event */
#define DIAG_IOCSDEL _IOW(DIAG_IOC_MAGIC, 1, char*) #define DIAG_IOCSDEL _IOW(DIAG_IOC_MAGIC, 1, char*)
/* Data type that was passed to the BPF program */
enum {
/* Data resides in kernel space */
USER_BPF_DATA_KERNEL,
/* Data resides in user space */
USER_BPF_DATA_USER,
/* Data is a pointer to a user_bpf_iter structure */
USER_BPF_DATA_ITER,
};
/*
* Describes an iovec iterator that BPF programs can use to access data for
* a given user_event write() / writev() call.
*/
struct user_bpf_iter {
/* Offset of the data within the first iovec */
__u32 iov_offset;
/* Number of iovec structures */
__u32 nr_segs;
/* Pointer to iovec structures */
const struct iovec *iov;
};
/* Context that BPF programs receive when attached to a user_event */
struct user_bpf_context {
/* Data type being passed (see union below) */
__u32 data_type;
/* Length of the data */
__u32 data_len;
/* Pointer to data, varies by data type */
union {
/* Kernel data (data_type == USER_BPF_DATA_KERNEL) */
void *kdata;
/* User data (data_type == USER_BPF_DATA_USER) */
void *udata;
/* Direct iovec (data_type == USER_BPF_DATA_ITER) */
struct user_bpf_iter *iter;
};
};
#endif /* _UAPI_LINUX_USER_EVENTS_H */ #endif /* _UAPI_LINUX_USER_EVENTS_H */

Просмотреть файл

@ -35,7 +35,7 @@
/* Stage 1 creates the structure of the recorded event layout */ /* Stage 1 creates the structure of the recorded event layout */
#include "stages/stage1_defines.h" #include "stages/stage1_struct_define.h"
#undef DECLARE_CUSTOM_EVENT_CLASS #undef DECLARE_CUSTOM_EVENT_CLASS
#define DECLARE_CUSTOM_EVENT_CLASS(name, proto, args, tstruct, assign, print) \ #define DECLARE_CUSTOM_EVENT_CLASS(name, proto, args, tstruct, assign, print) \
@ -56,7 +56,7 @@
/* Stage 2 creates the custom class */ /* Stage 2 creates the custom class */
#include "stages/stage2_defines.h" #include "stages/stage2_data_offsets.h"
#undef DECLARE_CUSTOM_EVENT_CLASS #undef DECLARE_CUSTOM_EVENT_CLASS
#define DECLARE_CUSTOM_EVENT_CLASS(call, proto, args, tstruct, assign, print) \ #define DECLARE_CUSTOM_EVENT_CLASS(call, proto, args, tstruct, assign, print) \
@ -71,7 +71,7 @@
/* Stage 3 create the way to print the custom event */ /* Stage 3 create the way to print the custom event */
#include "stages/stage3_defines.h" #include "stages/stage3_trace_output.h"
#undef DECLARE_CUSTOM_EVENT_CLASS #undef DECLARE_CUSTOM_EVENT_CLASS
#define DECLARE_CUSTOM_EVENT_CLASS(call, proto, args, tstruct, assign, print) \ #define DECLARE_CUSTOM_EVENT_CLASS(call, proto, args, tstruct, assign, print) \
@ -102,7 +102,7 @@ static struct trace_event_functions trace_custom_event_type_funcs_##call = { \
/* Stage 4 creates the offset layout for the fields */ /* Stage 4 creates the offset layout for the fields */
#include "stages/stage4_defines.h" #include "stages/stage4_event_fields.h"
#undef DECLARE_CUSTOM_EVENT_CLASS #undef DECLARE_CUSTOM_EVENT_CLASS
#define DECLARE_CUSTOM_EVENT_CLASS(call, proto, args, tstruct, func, print) \ #define DECLARE_CUSTOM_EVENT_CLASS(call, proto, args, tstruct, func, print) \
@ -114,7 +114,7 @@ static struct trace_event_fields trace_custom_event_fields_##call[] = { \
/* Stage 5 creates the helper function for dynamic fields */ /* Stage 5 creates the helper function for dynamic fields */
#include "stages/stage5_defines.h" #include "stages/stage5_get_offsets.h"
#undef DECLARE_CUSTOM_EVENT_CLASS #undef DECLARE_CUSTOM_EVENT_CLASS
#define DECLARE_CUSTOM_EVENT_CLASS(call, proto, args, tstruct, assign, print) \ #define DECLARE_CUSTOM_EVENT_CLASS(call, proto, args, tstruct, assign, print) \
@ -134,7 +134,7 @@ static inline notrace int trace_custom_event_get_offsets_##call( \
/* Stage 6 creates the probe function that records the event */ /* Stage 6 creates the probe function that records the event */
#include "stages/stage6_defines.h" #include "stages/stage6_event_callback.h"
#undef DECLARE_CUSTOM_EVENT_CLASS #undef DECLARE_CUSTOM_EVENT_CLASS
#define DECLARE_CUSTOM_EVENT_CLASS(call, proto, args, tstruct, assign, print) \ #define DECLARE_CUSTOM_EVENT_CLASS(call, proto, args, tstruct, assign, print) \
@ -182,7 +182,7 @@ static inline void ftrace_test_custom_probe_##call(void) \
/* Stage 7 creates the actual class and event structure for the custom event */ /* Stage 7 creates the actual class and event structure for the custom event */
#include "stages/stage7_defines.h" #include "stages/stage7_class_define.h"
#undef DECLARE_CUSTOM_EVENT_CLASS #undef DECLARE_CUSTOM_EVENT_CLASS
#define DECLARE_CUSTOM_EVENT_CLASS(call, proto, args, tstruct, assign, print) \ #define DECLARE_CUSTOM_EVENT_CLASS(call, proto, args, tstruct, assign, print) \

Просмотреть файл

@ -45,7 +45,7 @@
PARAMS(print)); \ PARAMS(print)); \
DEFINE_EVENT(name, name, PARAMS(proto), PARAMS(args)); DEFINE_EVENT(name, name, PARAMS(proto), PARAMS(args));
#include "stages/stage1_defines.h" #include "stages/stage1_struct_define.h"
#undef DECLARE_EVENT_CLASS #undef DECLARE_EVENT_CLASS
#define DECLARE_EVENT_CLASS(name, proto, args, tstruct, assign, print) \ #define DECLARE_EVENT_CLASS(name, proto, args, tstruct, assign, print) \
@ -109,7 +109,7 @@
* The size of an array is also encoded, in the higher 16 bits of <item>. * The size of an array is also encoded, in the higher 16 bits of <item>.
*/ */
#include "stages/stage2_defines.h" #include "stages/stage2_data_offsets.h"
#undef DECLARE_EVENT_CLASS #undef DECLARE_EVENT_CLASS
#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \ #define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \
@ -181,7 +181,7 @@
* in binary. * in binary.
*/ */
#include "stages/stage3_defines.h" #include "stages/stage3_trace_output.h"
#undef DECLARE_EVENT_CLASS #undef DECLARE_EVENT_CLASS
#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \ #define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \
@ -236,7 +236,7 @@ static struct trace_event_functions trace_event_type_funcs_##call = { \
#include TRACE_INCLUDE(TRACE_INCLUDE_FILE) #include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
#include "stages/stage4_defines.h" #include "stages/stage4_event_fields.h"
#undef DECLARE_EVENT_CLASS #undef DECLARE_EVENT_CLASS
#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, func, print) \ #define DECLARE_EVENT_CLASS(call, proto, args, tstruct, func, print) \
@ -249,7 +249,7 @@ static struct trace_event_fields trace_event_fields_##call[] = { \
#include TRACE_INCLUDE(TRACE_INCLUDE_FILE) #include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
#include "stages/stage5_defines.h" #include "stages/stage5_get_offsets.h"
#undef DECLARE_EVENT_CLASS #undef DECLARE_EVENT_CLASS
#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \ #define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \
@ -372,7 +372,7 @@ static inline notrace int trace_event_get_offsets_##call( \
#define _TRACE_PERF_INIT(call) #define _TRACE_PERF_INIT(call)
#endif /* CONFIG_PERF_EVENTS */ #endif /* CONFIG_PERF_EVENTS */
#include "stages/stage6_defines.h" #include "stages/stage6_event_callback.h"
#undef DECLARE_EVENT_CLASS #undef DECLARE_EVENT_CLASS
#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \ #define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \
@ -418,7 +418,7 @@ static inline void ftrace_test_probe_##call(void) \
#include TRACE_INCLUDE(TRACE_INCLUDE_FILE) #include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
#include "stages/stage7_defines.h" #include "stages/stage7_class_define.h"
#undef DECLARE_EVENT_CLASS #undef DECLARE_EVENT_CLASS
#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \ #define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \

Просмотреть файл

@ -7,6 +7,7 @@
* *
* Highly modified by Steven Rostedt (VMware). * Highly modified by Steven Rostedt (VMware).
*/ */
#include <linux/jump_label.h>
#include <linux/suspend.h> #include <linux/suspend.h>
#include <linux/ftrace.h> #include <linux/ftrace.h>
#include <linux/slab.h> #include <linux/slab.h>
@ -23,24 +24,12 @@
#define ASSIGN_OPS_HASH(opsname, val) #define ASSIGN_OPS_HASH(opsname, val)
#endif #endif
static bool kill_ftrace_graph; DEFINE_STATIC_KEY_FALSE(kill_ftrace_graph);
int ftrace_graph_active; int ftrace_graph_active;
/* Both enabled by default (can be cleared by function_graph tracer flags */ /* Both enabled by default (can be cleared by function_graph tracer flags */
static bool fgraph_sleep_time = true; static bool fgraph_sleep_time = true;
/**
* ftrace_graph_is_dead - returns true if ftrace_graph_stop() was called
*
* ftrace_graph_stop() is called when a severe error is detected in
* the function graph tracing. This function is called by the critical
* paths of function graph to keep those paths from doing any more harm.
*/
bool ftrace_graph_is_dead(void)
{
return kill_ftrace_graph;
}
/** /**
* ftrace_graph_stop - set to permanently disable function graph tracing * ftrace_graph_stop - set to permanently disable function graph tracing
* *
@ -51,7 +40,7 @@ bool ftrace_graph_is_dead(void)
*/ */
void ftrace_graph_stop(void) void ftrace_graph_stop(void)
{ {
kill_ftrace_graph = true; static_branch_enable(&kill_ftrace_graph);
} }
/* Add a function return address to the trace stack on thread info.*/ /* Add a function return address to the trace stack on thread info.*/

Просмотреть файл

@ -47,9 +47,6 @@
#define MAX_FIELD_ARRAY_SIZE 1024 #define MAX_FIELD_ARRAY_SIZE 1024
#define MAX_FIELD_ARG_NAME 256 #define MAX_FIELD_ARG_NAME 256
#define MAX_BPF_COPY_SIZE PAGE_SIZE
#define MAX_STACK_BPF_DATA 512
static char *register_page_data; static char *register_page_data;
static DEFINE_MUTEX(reg_mutex); static DEFINE_MUTEX(reg_mutex);
@ -410,19 +407,6 @@ parse:
type[0] != 'u', FILTER_OTHER); type[0] != 'u', FILTER_OTHER);
} }
static void user_event_parse_flags(struct user_event *user, char *flags)
{
char *flag;
if (flags == NULL)
return;
while ((flag = strsep(&flags, ",")) != NULL) {
if (strcmp(flag, "BPF_ITER") == 0)
user->flags |= FLAG_BPF_ITER;
}
}
static int user_event_parse_fields(struct user_event *user, char *args) static int user_event_parse_fields(struct user_event *user, char *args)
{ {
char *field; char *field;
@ -718,64 +702,14 @@ discard:
} }
#ifdef CONFIG_PERF_EVENTS #ifdef CONFIG_PERF_EVENTS
static void user_event_bpf(struct user_event *user, struct iov_iter *i)
{
struct user_bpf_context context;
struct user_bpf_iter bpf_i;
char fast_data[MAX_STACK_BPF_DATA];
void *temp = NULL;
if ((user->flags & FLAG_BPF_ITER) && iter_is_iovec(i)) {
/* Raw iterator */
context.data_type = USER_BPF_DATA_ITER;
context.data_len = i->count;
context.iter = &bpf_i;
bpf_i.iov_offset = i->iov_offset;
bpf_i.iov = i->iov;
bpf_i.nr_segs = i->nr_segs;
} else if (i->nr_segs == 1 && iter_is_iovec(i)) {
/* Single buffer from user */
context.data_type = USER_BPF_DATA_USER;
context.data_len = i->count;
context.udata = i->iov->iov_base + i->iov_offset;
} else {
/* Multi buffer from user */
struct iov_iter copy = *i;
size_t copy_size = min_t(size_t, i->count, MAX_BPF_COPY_SIZE);
context.data_type = USER_BPF_DATA_KERNEL;
context.kdata = fast_data;
if (unlikely(copy_size > sizeof(fast_data))) {
temp = kmalloc(copy_size, GFP_NOWAIT);
if (temp)
context.kdata = temp;
else
copy_size = sizeof(fast_data);
}
context.data_len = copy_nofault(context.kdata,
copy_size, &copy);
}
trace_call_bpf(&user->call, &context);
kfree(temp);
}
/* /*
* Writes the user supplied payload out to perf ring buffer or eBPF program. * Writes the user supplied payload out to perf ring buffer.
*/ */
static void user_event_perf(struct user_event *user, struct iov_iter *i, static void user_event_perf(struct user_event *user, struct iov_iter *i,
void *tpdata, bool *faulted) void *tpdata, bool *faulted)
{ {
struct hlist_head *perf_head; struct hlist_head *perf_head;
if (bpf_prog_array_valid(&user->call))
user_event_bpf(user, i);
perf_head = this_cpu_ptr(user->call.perf_events); perf_head = this_cpu_ptr(user->call.perf_events);
if (perf_head && !hlist_empty(perf_head)) { if (perf_head && !hlist_empty(perf_head)) {
@ -1141,8 +1075,6 @@ static int user_event_parse(char *name, char *args, char *flags,
user->tracepoint.name = name; user->tracepoint.name = name;
user_event_parse_flags(user, flags);
ret = user_event_parse_fields(user, args); ret = user_event_parse_fields(user, args);
if (ret) if (ret)
@ -1170,11 +1102,11 @@ static int user_event_parse(char *name, char *args, char *flags,
#endif #endif
mutex_lock(&event_mutex); mutex_lock(&event_mutex);
ret = user_event_trace_register(user); ret = user_event_trace_register(user);
mutex_unlock(&event_mutex);
if (ret) if (ret)
goto put_user; goto put_user_lock;
user->index = index; user->index = index;
@ -1186,8 +1118,12 @@ static int user_event_parse(char *name, char *args, char *flags,
set_bit(user->index, page_bitmap); set_bit(user->index, page_bitmap);
hash_add(register_table, &user->node, key); hash_add(register_table, &user->node, key);
mutex_unlock(&event_mutex);
*newuser = user; *newuser = user;
return 0; return 0;
put_user_lock:
mutex_unlock(&event_mutex);
put_user: put_user:
user_event_destroy_fields(user); user_event_destroy_fields(user);
user_event_destroy_validators(user); user_event_destroy_validators(user);
@ -1580,9 +1516,6 @@ static int user_seq_show(struct seq_file *m, void *p)
busy++; busy++;
} }
if (flags & FLAG_BPF_ITER)
seq_puts(m, " FLAG:BPF_ITER");
seq_puts(m, "\n"); seq_puts(m, "\n");
active++; active++;
} }