From 393c3714081a53795bbff0e985d24146def6f57f Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Thu, 18 Nov 2021 15:00:08 -0800 Subject: [PATCH 01/43] kernfs: switch global kernfs_rwsem lock to per-fs lock The kernfs implementation has big lock granularity(kernfs_rwsem) so every kernfs-based(e.g., sysfs, cgroup) fs are able to compete the lock. It makes trouble for some cases to wait the global lock for a long time even though they are totally independent contexts each other. A general example is process A goes under direct reclaim with holding the lock when it accessed the file in sysfs and process B is waiting the lock with exclusive mode and then process C is waiting the lock until process B could finish the job after it gets the lock from process A. This patch switches the global kernfs_rwsem to per-fs lock, which put the rwsem into kernfs_root. Suggested-by: Tejun Heo Acked-by: Tejun Heo Signed-off-by: Minchan Kim Link: https://lore.kernel.org/r/20211118230008.2679780-1-minchan@kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/kernfs/dir.c | 110 ++++++++++++++++++++++++----------------- fs/kernfs/file.c | 6 ++- fs/kernfs/inode.c | 22 ++++++--- fs/kernfs/mount.c | 15 +++--- fs/kernfs/symlink.c | 5 +- include/linux/kernfs.h | 2 + 6 files changed, 97 insertions(+), 63 deletions(-) diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c index 8e0a1378a4b1..13cae0ccce74 100644 --- a/fs/kernfs/dir.c +++ b/fs/kernfs/dir.c @@ -17,7 +17,6 @@ #include "kernfs-internal.h" -DECLARE_RWSEM(kernfs_rwsem); static DEFINE_SPINLOCK(kernfs_rename_lock); /* kn->parent and ->name */ static char kernfs_pr_cont_buf[PATH_MAX]; /* protected by rename_lock */ static DEFINE_SPINLOCK(kernfs_idr_lock); /* root->ino_idr */ @@ -26,7 +25,7 @@ static DEFINE_SPINLOCK(kernfs_idr_lock); /* root->ino_idr */ static bool kernfs_active(struct kernfs_node *kn) { - lockdep_assert_held(&kernfs_rwsem); + lockdep_assert_held(&kernfs_root(kn)->kernfs_rwsem); return atomic_read(&kn->active) >= 0; } @@ -457,14 +456,15 @@ void kernfs_put_active(struct kernfs_node *kn) * return after draining is complete. */ static void kernfs_drain(struct kernfs_node *kn) - __releases(&kernfs_rwsem) __acquires(&kernfs_rwsem) + __releases(&kernfs_root(kn)->kernfs_rwsem) + __acquires(&kernfs_root(kn)->kernfs_rwsem) { struct kernfs_root *root = kernfs_root(kn); - lockdep_assert_held_write(&kernfs_rwsem); + lockdep_assert_held_write(&root->kernfs_rwsem); WARN_ON_ONCE(kernfs_active(kn)); - up_write(&kernfs_rwsem); + up_write(&root->kernfs_rwsem); if (kernfs_lockdep(kn)) { rwsem_acquire(&kn->dep_map, 0, 0, _RET_IP_); @@ -483,7 +483,7 @@ static void kernfs_drain(struct kernfs_node *kn) kernfs_drain_open_files(kn); - down_write(&kernfs_rwsem); + down_write(&root->kernfs_rwsem); } /** @@ -718,11 +718,12 @@ err_unlock: int kernfs_add_one(struct kernfs_node *kn) { struct kernfs_node *parent = kn->parent; + struct kernfs_root *root = kernfs_root(parent); struct kernfs_iattrs *ps_iattr; bool has_ns; int ret; - down_write(&kernfs_rwsem); + down_write(&root->kernfs_rwsem); ret = -EINVAL; has_ns = kernfs_ns_enabled(parent); @@ -753,7 +754,7 @@ int kernfs_add_one(struct kernfs_node *kn) ps_iattr->ia_mtime = ps_iattr->ia_ctime; } - up_write(&kernfs_rwsem); + up_write(&root->kernfs_rwsem); /* * Activate the new node unless CREATE_DEACTIVATED is requested. @@ -767,7 +768,7 @@ int kernfs_add_one(struct kernfs_node *kn) return 0; out_unlock: - up_write(&kernfs_rwsem); + up_write(&root->kernfs_rwsem); return ret; } @@ -788,7 +789,7 @@ static struct kernfs_node *kernfs_find_ns(struct kernfs_node *parent, bool has_ns = kernfs_ns_enabled(parent); unsigned int hash; - lockdep_assert_held(&kernfs_rwsem); + lockdep_assert_held(&kernfs_root(parent)->kernfs_rwsem); if (has_ns != (bool)ns) { WARN(1, KERN_WARNING "kernfs: ns %s in '%s' for '%s'\n", @@ -820,7 +821,7 @@ static struct kernfs_node *kernfs_walk_ns(struct kernfs_node *parent, size_t len; char *p, *name; - lockdep_assert_held_read(&kernfs_rwsem); + lockdep_assert_held_read(&kernfs_root(parent)->kernfs_rwsem); /* grab kernfs_rename_lock to piggy back on kernfs_pr_cont_buf */ spin_lock_irq(&kernfs_rename_lock); @@ -859,11 +860,12 @@ struct kernfs_node *kernfs_find_and_get_ns(struct kernfs_node *parent, const char *name, const void *ns) { struct kernfs_node *kn; + struct kernfs_root *root = kernfs_root(parent); - down_read(&kernfs_rwsem); + down_read(&root->kernfs_rwsem); kn = kernfs_find_ns(parent, name, ns); kernfs_get(kn); - up_read(&kernfs_rwsem); + up_read(&root->kernfs_rwsem); return kn; } @@ -883,11 +885,12 @@ struct kernfs_node *kernfs_walk_and_get_ns(struct kernfs_node *parent, const char *path, const void *ns) { struct kernfs_node *kn; + struct kernfs_root *root = kernfs_root(parent); - down_read(&kernfs_rwsem); + down_read(&root->kernfs_rwsem); kn = kernfs_walk_ns(parent, path, ns); kernfs_get(kn); - up_read(&kernfs_rwsem); + up_read(&root->kernfs_rwsem); return kn; } @@ -912,6 +915,7 @@ struct kernfs_root *kernfs_create_root(struct kernfs_syscall_ops *scops, return ERR_PTR(-ENOMEM); idr_init(&root->ino_idr); + init_rwsem(&root->kernfs_rwsem); INIT_LIST_HEAD(&root->supers); /* @@ -1035,6 +1039,7 @@ struct kernfs_node *kernfs_create_empty_dir(struct kernfs_node *parent, static int kernfs_dop_revalidate(struct dentry *dentry, unsigned int flags) { struct kernfs_node *kn; + struct kernfs_root *root; if (flags & LOOKUP_RCU) return -ECHILD; @@ -1046,18 +1051,19 @@ static int kernfs_dop_revalidate(struct dentry *dentry, unsigned int flags) /* If the kernfs parent node has changed discard and * proceed to ->lookup. */ - down_read(&kernfs_rwsem); spin_lock(&dentry->d_lock); parent = kernfs_dentry_node(dentry->d_parent); if (parent) { + spin_unlock(&dentry->d_lock); + root = kernfs_root(parent); + down_read(&root->kernfs_rwsem); if (kernfs_dir_changed(parent, dentry)) { - spin_unlock(&dentry->d_lock); - up_read(&kernfs_rwsem); + up_read(&root->kernfs_rwsem); return 0; } - } - spin_unlock(&dentry->d_lock); - up_read(&kernfs_rwsem); + up_read(&root->kernfs_rwsem); + } else + spin_unlock(&dentry->d_lock); /* The kernfs parent node hasn't changed, leave the * dentry negative and return success. @@ -1066,7 +1072,8 @@ static int kernfs_dop_revalidate(struct dentry *dentry, unsigned int flags) } kn = kernfs_dentry_node(dentry); - down_read(&kernfs_rwsem); + root = kernfs_root(kn); + down_read(&root->kernfs_rwsem); /* The kernfs node has been deactivated */ if (!kernfs_active(kn)) @@ -1085,10 +1092,10 @@ static int kernfs_dop_revalidate(struct dentry *dentry, unsigned int flags) kernfs_info(dentry->d_sb)->ns != kn->ns) goto out_bad; - up_read(&kernfs_rwsem); + up_read(&root->kernfs_rwsem); return 1; out_bad: - up_read(&kernfs_rwsem); + up_read(&root->kernfs_rwsem); return 0; } @@ -1102,10 +1109,12 @@ static struct dentry *kernfs_iop_lookup(struct inode *dir, { struct kernfs_node *parent = dir->i_private; struct kernfs_node *kn; + struct kernfs_root *root; struct inode *inode = NULL; const void *ns = NULL; - down_read(&kernfs_rwsem); + root = kernfs_root(parent); + down_read(&root->kernfs_rwsem); if (kernfs_ns_enabled(parent)) ns = kernfs_info(dir->i_sb)->ns; @@ -1116,7 +1125,7 @@ static struct dentry *kernfs_iop_lookup(struct inode *dir, * create a negative. */ if (!kernfs_active(kn)) { - up_read(&kernfs_rwsem); + up_read(&root->kernfs_rwsem); return NULL; } inode = kernfs_get_inode(dir->i_sb, kn); @@ -1131,7 +1140,7 @@ static struct dentry *kernfs_iop_lookup(struct inode *dir, */ if (!IS_ERR(inode)) kernfs_set_rev(parent, dentry); - up_read(&kernfs_rwsem); + up_read(&root->kernfs_rwsem); /* instantiate and hash (possibly negative) dentry */ return d_splice_alias(inode, dentry); @@ -1254,7 +1263,7 @@ static struct kernfs_node *kernfs_next_descendant_post(struct kernfs_node *pos, { struct rb_node *rbn; - lockdep_assert_held_write(&kernfs_rwsem); + lockdep_assert_held_write(&kernfs_root(root)->kernfs_rwsem); /* if first iteration, visit leftmost descendant which may be root */ if (!pos) @@ -1289,8 +1298,9 @@ static struct kernfs_node *kernfs_next_descendant_post(struct kernfs_node *pos, void kernfs_activate(struct kernfs_node *kn) { struct kernfs_node *pos; + struct kernfs_root *root = kernfs_root(kn); - down_write(&kernfs_rwsem); + down_write(&root->kernfs_rwsem); pos = NULL; while ((pos = kernfs_next_descendant_post(pos, kn))) { @@ -1304,14 +1314,14 @@ void kernfs_activate(struct kernfs_node *kn) pos->flags |= KERNFS_ACTIVATED; } - up_write(&kernfs_rwsem); + up_write(&root->kernfs_rwsem); } static void __kernfs_remove(struct kernfs_node *kn) { struct kernfs_node *pos; - lockdep_assert_held_write(&kernfs_rwsem); + lockdep_assert_held_write(&kernfs_root(kn)->kernfs_rwsem); /* * Short-circuit if non-root @kn has already finished removal. @@ -1381,9 +1391,11 @@ static void __kernfs_remove(struct kernfs_node *kn) */ void kernfs_remove(struct kernfs_node *kn) { - down_write(&kernfs_rwsem); + struct kernfs_root *root = kernfs_root(kn); + + down_write(&root->kernfs_rwsem); __kernfs_remove(kn); - up_write(&kernfs_rwsem); + up_write(&root->kernfs_rwsem); } /** @@ -1469,8 +1481,9 @@ void kernfs_unbreak_active_protection(struct kernfs_node *kn) bool kernfs_remove_self(struct kernfs_node *kn) { bool ret; + struct kernfs_root *root = kernfs_root(kn); - down_write(&kernfs_rwsem); + down_write(&root->kernfs_rwsem); kernfs_break_active_protection(kn); /* @@ -1498,9 +1511,9 @@ bool kernfs_remove_self(struct kernfs_node *kn) atomic_read(&kn->active) == KN_DEACTIVATED_BIAS) break; - up_write(&kernfs_rwsem); + up_write(&root->kernfs_rwsem); schedule(); - down_write(&kernfs_rwsem); + down_write(&root->kernfs_rwsem); } finish_wait(waitq, &wait); WARN_ON_ONCE(!RB_EMPTY_NODE(&kn->rb)); @@ -1513,7 +1526,7 @@ bool kernfs_remove_self(struct kernfs_node *kn) */ kernfs_unbreak_active_protection(kn); - up_write(&kernfs_rwsem); + up_write(&root->kernfs_rwsem); return ret; } @@ -1530,6 +1543,7 @@ int kernfs_remove_by_name_ns(struct kernfs_node *parent, const char *name, const void *ns) { struct kernfs_node *kn; + struct kernfs_root *root; if (!parent) { WARN(1, KERN_WARNING "kernfs: can not remove '%s', no directory\n", @@ -1537,13 +1551,14 @@ int kernfs_remove_by_name_ns(struct kernfs_node *parent, const char *name, return -ENOENT; } - down_write(&kernfs_rwsem); + root = kernfs_root(parent); + down_write(&root->kernfs_rwsem); kn = kernfs_find_ns(parent, name, ns); if (kn) __kernfs_remove(kn); - up_write(&kernfs_rwsem); + up_write(&root->kernfs_rwsem); if (kn) return 0; @@ -1562,6 +1577,7 @@ int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent, const char *new_name, const void *new_ns) { struct kernfs_node *old_parent; + struct kernfs_root *root; const char *old_name = NULL; int error; @@ -1569,7 +1585,8 @@ int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent, if (!kn->parent) return -EINVAL; - down_write(&kernfs_rwsem); + root = kernfs_root(kn); + down_write(&root->kernfs_rwsem); error = -ENOENT; if (!kernfs_active(kn) || !kernfs_active(new_parent) || @@ -1623,7 +1640,7 @@ int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent, error = 0; out: - up_write(&kernfs_rwsem); + up_write(&root->kernfs_rwsem); return error; } @@ -1694,11 +1711,14 @@ static int kernfs_fop_readdir(struct file *file, struct dir_context *ctx) struct dentry *dentry = file->f_path.dentry; struct kernfs_node *parent = kernfs_dentry_node(dentry); struct kernfs_node *pos = file->private_data; + struct kernfs_root *root; const void *ns = NULL; if (!dir_emit_dots(file, ctx)) return 0; - down_read(&kernfs_rwsem); + + root = kernfs_root(parent); + down_read(&root->kernfs_rwsem); if (kernfs_ns_enabled(parent)) ns = kernfs_info(dentry->d_sb)->ns; @@ -1715,12 +1735,12 @@ static int kernfs_fop_readdir(struct file *file, struct dir_context *ctx) file->private_data = pos; kernfs_get(pos); - up_read(&kernfs_rwsem); + up_read(&root->kernfs_rwsem); if (!dir_emit(ctx, name, len, ino, type)) return 0; - down_read(&kernfs_rwsem); + down_read(&root->kernfs_rwsem); } - up_read(&kernfs_rwsem); + up_read(&root->kernfs_rwsem); file->private_data = NULL; ctx->pos = INT_MAX; return 0; diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c index 60e2a86c535e..9414a7a60a9f 100644 --- a/fs/kernfs/file.c +++ b/fs/kernfs/file.c @@ -847,6 +847,7 @@ static void kernfs_notify_workfn(struct work_struct *work) { struct kernfs_node *kn; struct kernfs_super_info *info; + struct kernfs_root *root; repeat: /* pop one off the notify_list */ spin_lock_irq(&kernfs_notify_lock); @@ -859,8 +860,9 @@ repeat: kn->attr.notify_next = NULL; spin_unlock_irq(&kernfs_notify_lock); + root = kernfs_root(kn); /* kick fsnotify */ - down_write(&kernfs_rwsem); + down_write(&root->kernfs_rwsem); list_for_each_entry(info, &kernfs_root(kn)->supers, node) { struct kernfs_node *parent; @@ -898,7 +900,7 @@ repeat: iput(inode); } - up_write(&kernfs_rwsem); + up_write(&root->kernfs_rwsem); kernfs_put(kn); goto repeat; } diff --git a/fs/kernfs/inode.c b/fs/kernfs/inode.c index c0eae1725435..3d783d80f5da 100644 --- a/fs/kernfs/inode.c +++ b/fs/kernfs/inode.c @@ -99,10 +99,11 @@ int __kernfs_setattr(struct kernfs_node *kn, const struct iattr *iattr) int kernfs_setattr(struct kernfs_node *kn, const struct iattr *iattr) { int ret; + struct kernfs_root *root = kernfs_root(kn); - down_write(&kernfs_rwsem); + down_write(&root->kernfs_rwsem); ret = __kernfs_setattr(kn, iattr); - up_write(&kernfs_rwsem); + up_write(&root->kernfs_rwsem); return ret; } @@ -111,12 +112,14 @@ int kernfs_iop_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, { struct inode *inode = d_inode(dentry); struct kernfs_node *kn = inode->i_private; + struct kernfs_root *root; int error; if (!kn) return -EINVAL; - down_write(&kernfs_rwsem); + root = kernfs_root(kn); + down_write(&root->kernfs_rwsem); error = setattr_prepare(&init_user_ns, dentry, iattr); if (error) goto out; @@ -129,7 +132,7 @@ int kernfs_iop_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, setattr_copy(&init_user_ns, inode, iattr); out: - up_write(&kernfs_rwsem); + up_write(&root->kernfs_rwsem); return error; } @@ -184,13 +187,14 @@ int kernfs_iop_getattr(struct user_namespace *mnt_userns, { struct inode *inode = d_inode(path->dentry); struct kernfs_node *kn = inode->i_private; + struct kernfs_root *root = kernfs_root(kn); - down_read(&kernfs_rwsem); + down_read(&root->kernfs_rwsem); spin_lock(&inode->i_lock); kernfs_refresh_inode(kn, inode); generic_fillattr(&init_user_ns, inode, stat); spin_unlock(&inode->i_lock); - up_read(&kernfs_rwsem); + up_read(&root->kernfs_rwsem); return 0; } @@ -274,19 +278,21 @@ int kernfs_iop_permission(struct user_namespace *mnt_userns, struct inode *inode, int mask) { struct kernfs_node *kn; + struct kernfs_root *root; int ret; if (mask & MAY_NOT_BLOCK) return -ECHILD; kn = inode->i_private; + root = kernfs_root(kn); - down_read(&kernfs_rwsem); + down_read(&root->kernfs_rwsem); spin_lock(&inode->i_lock); kernfs_refresh_inode(kn, inode); ret = generic_permission(&init_user_ns, inode, mask); spin_unlock(&inode->i_lock); - up_read(&kernfs_rwsem); + up_read(&root->kernfs_rwsem); return ret; } diff --git a/fs/kernfs/mount.c b/fs/kernfs/mount.c index f2f909d09f52..cfa79715fc1a 100644 --- a/fs/kernfs/mount.c +++ b/fs/kernfs/mount.c @@ -236,6 +236,7 @@ struct dentry *kernfs_node_dentry(struct kernfs_node *kn, static int kernfs_fill_super(struct super_block *sb, struct kernfs_fs_context *kfc) { struct kernfs_super_info *info = kernfs_info(sb); + struct kernfs_root *kf_root = kfc->root; struct inode *inode; struct dentry *root; @@ -255,9 +256,9 @@ static int kernfs_fill_super(struct super_block *sb, struct kernfs_fs_context *k sb->s_shrink.seeks = 0; /* get root inode, initialize and unlock it */ - down_read(&kernfs_rwsem); + down_read(&kf_root->kernfs_rwsem); inode = kernfs_get_inode(sb, info->root->kn); - up_read(&kernfs_rwsem); + up_read(&kf_root->kernfs_rwsem); if (!inode) { pr_debug("kernfs: could not get root inode\n"); return -ENOMEM; @@ -334,6 +335,7 @@ int kernfs_get_tree(struct fs_context *fc) if (!sb->s_root) { struct kernfs_super_info *info = kernfs_info(sb); + struct kernfs_root *root = kfc->root; kfc->new_sb_created = true; @@ -344,9 +346,9 @@ int kernfs_get_tree(struct fs_context *fc) } sb->s_flags |= SB_ACTIVE; - down_write(&kernfs_rwsem); + down_write(&root->kernfs_rwsem); list_add(&info->node, &info->root->supers); - up_write(&kernfs_rwsem); + up_write(&root->kernfs_rwsem); } fc->root = dget(sb->s_root); @@ -371,10 +373,11 @@ void kernfs_free_fs_context(struct fs_context *fc) void kernfs_kill_sb(struct super_block *sb) { struct kernfs_super_info *info = kernfs_info(sb); + struct kernfs_root *root = info->root; - down_write(&kernfs_rwsem); + down_write(&root->kernfs_rwsem); list_del(&info->node); - up_write(&kernfs_rwsem); + up_write(&root->kernfs_rwsem); /* * Remove the superblock from fs_supers/s_instances diff --git a/fs/kernfs/symlink.c b/fs/kernfs/symlink.c index 19a6c71c6ff5..0ab13824822f 100644 --- a/fs/kernfs/symlink.c +++ b/fs/kernfs/symlink.c @@ -113,11 +113,12 @@ static int kernfs_getlink(struct inode *inode, char *path) struct kernfs_node *kn = inode->i_private; struct kernfs_node *parent = kn->parent; struct kernfs_node *target = kn->symlink.target_kn; + struct kernfs_root *root = kernfs_root(parent); int error; - down_read(&kernfs_rwsem); + down_read(&root->kernfs_rwsem); error = kernfs_get_target_path(parent, target, path); - up_read(&kernfs_rwsem); + up_read(&root->kernfs_rwsem); return error; } diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h index 3ccce6f24548..9f650986a81b 100644 --- a/include/linux/kernfs.h +++ b/include/linux/kernfs.h @@ -16,6 +16,7 @@ #include #include #include +#include struct file; struct dentry; @@ -197,6 +198,7 @@ struct kernfs_root { struct list_head supers; wait_queue_head_t deactivate_waitq; + struct rw_semaphore kernfs_rwsem; }; struct kernfs_open_file { From a6914afcdf0e3fb853fce0e0c04710be7427b62f Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 10 Nov 2021 12:31:28 +0200 Subject: [PATCH 02/43] kobject: Replace kernel.h with the necessary inclusions When kernel.h is used in the headers it adds a lot into dependency hell, especially when there are circular dependencies are involved. Replace kernel.h inclusion with the list of what is really being used. Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20211110103128.59888-1-andriy.shevchenko@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/kobject.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/kobject.h b/include/linux/kobject.h index efd56f990a46..c740062b4b1a 100644 --- a/include/linux/kobject.h +++ b/include/linux/kobject.h @@ -19,10 +19,10 @@ #include #include #include +#include #include #include #include -#include #include #include #include From 2043727c2882928a10161ddee52b196b7db402fd Mon Sep 17 00:00:00 2001 From: Cai Huoqing Date: Fri, 5 Nov 2021 15:15:09 +0800 Subject: [PATCH 03/43] driver core: platform: Make use of the helper function dev_err_probe() When possible using dev_err_probe() helps to properly deal with the PROBE_DEFER error, the benefit is that DEFER issue will be logged in the devices_deferred debugfs file. Signed-off-by: Cai Huoqing Link: https://lore.kernel.org/r/20211105071509.969-1-caihuoqing@baidu.com Signed-off-by: Greg Kroah-Hartman --- drivers/base/platform.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/base/platform.c b/drivers/base/platform.c index 598acf93a360..7109351366c8 100644 --- a/drivers/base/platform.c +++ b/drivers/base/platform.c @@ -258,8 +258,9 @@ int platform_get_irq(struct platform_device *dev, unsigned int num) int ret; ret = platform_get_irq_optional(dev, num); - if (ret < 0 && ret != -EPROBE_DEFER) - dev_err(&dev->dev, "IRQ index %u not found\n", num); + if (ret < 0) + return dev_err_probe(&dev->dev, ret, + "IRQ index %u not found\n", num); return ret; } From 3722e7c3c654ad8fedc0767f617519a12b4c7bfd Mon Sep 17 00:00:00 2001 From: Kohei Tarumizu Date: Wed, 1 Dec 2021 20:59:56 +0900 Subject: [PATCH 04/43] docs: document the sysfs ABI for "nohz_full" Add missing documentation of sysfs ABI for "nohz_full". It was added by commit 6570a9a1ce3a("show nohz_full cpus in sysfs"). However, there is no documentation for these interface. Signed-off-by: Kohei Tarumizu Link: https://lore.kernel.org/r/20211201115957.254224-2-tarumizu.kohei@fujitsu.com Signed-off-by: Greg Kroah-Hartman --- Documentation/ABI/testing/sysfs-devices-system-cpu | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu index 69c65da16dff..f84c32f0bdf6 100644 --- a/Documentation/ABI/testing/sysfs-devices-system-cpu +++ b/Documentation/ABI/testing/sysfs-devices-system-cpu @@ -666,3 +666,10 @@ Description: Preferred MTE tag checking mode ================ ============================================== See also: Documentation/arm64/memory-tagging-extension.rst + +What: /sys/devices/system/cpu/nohz_full +Date: Apr 2015 +Contact: Linux kernel mailing list +Description: + (RO) the list of CPUs that are in nohz_full mode. + These CPUs are set by boot parameter "nohz_full=". From 02bf607413e6110321ae75698c8ecbfa82eaafa8 Mon Sep 17 00:00:00 2001 From: Kohei Tarumizu Date: Wed, 1 Dec 2021 20:59:57 +0900 Subject: [PATCH 05/43] docs: document the sysfs ABI for "isolated" Add missing documentation of sysfs ABI for "isolated". It was added by commit 59f30abe94bf("show isolated cpus in sysfs"). However, there is no documentation for these interface. Signed-off-by: Kohei Tarumizu Link: https://lore.kernel.org/r/20211201115957.254224-3-tarumizu.kohei@fujitsu.com Signed-off-by: Greg Kroah-Hartman --- Documentation/ABI/testing/sysfs-devices-system-cpu | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu index f84c32f0bdf6..61f5676a7429 100644 --- a/Documentation/ABI/testing/sysfs-devices-system-cpu +++ b/Documentation/ABI/testing/sysfs-devices-system-cpu @@ -673,3 +673,11 @@ Contact: Linux kernel mailing list Description: (RO) the list of CPUs that are in nohz_full mode. These CPUs are set by boot parameter "nohz_full=". + +What: /sys/devices/system/cpu/isolated +Date: Apr 2015 +Contact: Linux kernel mailing list +Description: + (RO) the list of CPUs that are isolated and don't + participate in load balancing. These CPUs are set by + boot parameter "isolcpus=". From 555a0ce4558d87d5b97c4321f34b19e051c7b0c1 Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Wed, 1 Dec 2021 15:16:48 -0800 Subject: [PATCH 06/43] kernfs: prevent early freeing of root node Marek reported the warning below. ========================= WARNING: held lock freed! 5.16.0-rc2+ #10984 Not tainted ------------------------- kworker/1:0/18 is freeing memory ffff00004034e200-ffff00004034e3ff, with a lock still held there! ffff00004034e348 (&root->kernfs_rwsem){++++}-{3:3}, at: __kernfs_remove+0x310/0x37c 3 locks held by kworker/1:0/18: #0: ffff000040107938 ((wq_completion)cgroup_destroy){+.+.}-{0:0}, at: process_one_work+0x1f0/0x6f0 #1: ffff80000b55bdc0 ((work_completion)(&(&css->destroy_rwork)->work)){+.+.}-{0:0}, at: process_one_work+0x1f0/0x6f0 #2: ffff00004034e348 (&root->kernfs_rwsem){++++}-{3:3}, at: __kernfs_remove+0x310/0x37c stack backtrace: CPU: 1 PID: 18 Comm: kworker/1:0 Not tainted 5.16.0-rc2+ #10984 Hardware name: Raspberry Pi 4 Model B (DT) Workqueue: cgroup_destroy css_free_rwork_fn Call trace: dump_backtrace+0x0/0x1ac show_stack+0x18/0x24 dump_stack_lvl+0x8c/0xb8 dump_stack+0x18/0x34 debug_check_no_locks_freed+0x124/0x140 kfree+0xf0/0x3a4 kernfs_put+0x1f8/0x224 __kernfs_remove+0x1b8/0x37c kernfs_destroy_root+0x38/0x50 css_free_rwork_fn+0x288/0x3d4 process_one_work+0x288/0x6f0 worker_thread+0x74/0x470 kthread+0x188/0x194 ret_from_fork+0x10/0x20 Since kernfs moves the kernfs_rwsem lock into root, it couldn't hold the lock when the root node is tearing down. Thus, get the refcount of root node. Fixes: 393c3714081a ("kernfs: switch global kernfs_rwsem lock to per-fs lock") Reported-by: Marek Szyprowski Tested-by: Marek Szyprowski Acked-by: Tejun Heo Signed-off-by: Minchan Kim Link: https://lore.kernel.org/r/20211201231648.1027165-1-minchan@kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/kernfs/dir.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c index 13cae0ccce74..e6d9772ddb4c 100644 --- a/fs/kernfs/dir.c +++ b/fs/kernfs/dir.c @@ -961,7 +961,13 @@ struct kernfs_root *kernfs_create_root(struct kernfs_syscall_ops *scops, */ void kernfs_destroy_root(struct kernfs_root *root) { - kernfs_remove(root->kn); /* will also free @root */ + /* + * kernfs_remove holds kernfs_rwsem from the root so the root + * shouldn't be freed during the operation. + */ + kernfs_get(root->kn); + kernfs_remove(root->kn); + kernfs_put(root->kn); /* will also free @root */ } /** From 2c4dcd7fd57b20a21b65da04d89c38a7217d79cf Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 29 Nov 2021 14:03:07 +0100 Subject: [PATCH 07/43] topology/sysfs: export die attributes only if an architectures has support The die_id and die_cpus topology sysfs attributes have been added with commit 0e344d8c709f ("cpu/topology: Export die_id") and commit 2e4c54dac7b3 ("topology: Create core_cpus and die_cpus sysfs attributes"). While they are currently only used and useful for x86 they are still present with bogus default values for all architectures. Instead of enforcing such new sysfs attributes to all architectures, make them only optional visible if an architecture opts in by defining both the topology_die_id and topology_die_cpumask attributes. This is similar to what was done when the book and drawer topology levels were introduced: avoid useless and therefore confusing sysfs attributes for architectures which cannot make use of them. This should not break any existing applications, since this is a rather new interface and applications should be able to handle also older kernel versions without such attributes - besides that they contain only useful information for x86. Acked-by: Peter Zijlstra (Intel) Signed-off-by: Heiko Carstens Link: https://lore.kernel.org/r/20211129130309.3256168-2-hca@linux.ibm.com Signed-off-by: Greg Kroah-Hartman --- Documentation/admin-guide/cputopology.rst | 3 +++ drivers/base/topology.c | 8 ++++++++ include/linux/topology.h | 4 ++++ 3 files changed, 15 insertions(+) diff --git a/Documentation/admin-guide/cputopology.rst b/Documentation/admin-guide/cputopology.rst index 6b62e182baf4..c68d07533c45 100644 --- a/Documentation/admin-guide/cputopology.rst +++ b/Documentation/admin-guide/cputopology.rst @@ -11,6 +11,9 @@ Architecture-neutral, drivers/base/topology.c, exports these attributes. However, the book and drawer related sysfs files will only be created if CONFIG_SCHED_BOOK and CONFIG_SCHED_DRAWER are selected, respectively. +The die hierarchy related sysfs files will only be created if an architecture +provides the related macros as described below. + CONFIG_SCHED_BOOK and CONFIG_SCHED_DRAWER are currently only used on s390, where they reflect the cpu and cache hierarchy. diff --git a/drivers/base/topology.c b/drivers/base/topology.c index 8f2b641d0b8c..f079a55793ec 100644 --- a/drivers/base/topology.c +++ b/drivers/base/topology.c @@ -45,8 +45,10 @@ static ssize_t name##_list_read(struct file *file, struct kobject *kobj, \ define_id_show_func(physical_package_id); static DEVICE_ATTR_RO(physical_package_id); +#ifdef TOPOLOGY_DIE_SYSFS define_id_show_func(die_id); static DEVICE_ATTR_RO(die_id); +#endif define_id_show_func(cluster_id); static DEVICE_ATTR_RO(cluster_id); @@ -70,9 +72,11 @@ define_siblings_read_func(cluster_cpus, cluster_cpumask); static BIN_ATTR_RO(cluster_cpus, 0); static BIN_ATTR_RO(cluster_cpus_list, 0); +#ifdef TOPOLOGY_DIE_SYSFS define_siblings_read_func(die_cpus, die_cpumask); static BIN_ATTR_RO(die_cpus, 0); static BIN_ATTR_RO(die_cpus_list, 0); +#endif define_siblings_read_func(package_cpus, core_cpumask); static BIN_ATTR_RO(package_cpus, 0); @@ -103,8 +107,10 @@ static struct bin_attribute *bin_attrs[] = { &bin_attr_core_siblings_list, &bin_attr_cluster_cpus, &bin_attr_cluster_cpus_list, +#ifdef TOPOLOGY_DIE_SYSFS &bin_attr_die_cpus, &bin_attr_die_cpus_list, +#endif &bin_attr_package_cpus, &bin_attr_package_cpus_list, #ifdef CONFIG_SCHED_BOOK @@ -120,7 +126,9 @@ static struct bin_attribute *bin_attrs[] = { static struct attribute *default_attrs[] = { &dev_attr_physical_package_id.attr, +#ifdef TOPOLOGY_DIE_SYSFS &dev_attr_die_id.attr, +#endif &dev_attr_cluster_id.attr, &dev_attr_core_id.attr, #ifdef CONFIG_SCHED_BOOK diff --git a/include/linux/topology.h b/include/linux/topology.h index 0b3704ad13c8..8d1bdae76230 100644 --- a/include/linux/topology.h +++ b/include/linux/topology.h @@ -180,6 +180,10 @@ static inline int cpu_to_mem(int cpu) #endif /* [!]CONFIG_HAVE_MEMORYLESS_NODES */ +#if defined(topology_die_id) && defined(topology_die_cpumask) +#define TOPOLOGY_DIE_SYSFS +#endif + #ifndef topology_physical_package_id #define topology_physical_package_id(cpu) ((void)(cpu), -1) #endif From e795707703b32fecdd7467afcc33ff1e92416c05 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 29 Nov 2021 14:03:08 +0100 Subject: [PATCH 08/43] topology/sysfs: export cluster attributes only if an architectures has support The cluster_id and cluster_cpus topology sysfs attributes have been added with commit c5e22feffdd7 ("topology: Represent clusters of CPUs within a die"). They are currently only used for x86, arm64, and riscv (via generic arch topology), however they are still present with bogus default values for all other architectures. Instead of enforcing such new sysfs attributes to all architectures, make them only optional visible if an architecture opts in by defining both the topology_cluster_id and topology_cluster_cpumask attributes. This is similar to what was done when the book and drawer topology levels were introduced: avoid useless and therefore confusing sysfs attributes for architectures which cannot make use of them. This should not break any existing applications, since this is a new interface introduced with the v5.16 merge window. Acked-by: Peter Zijlstra (Intel) Signed-off-by: Heiko Carstens Link: https://lore.kernel.org/r/20211129130309.3256168-3-hca@linux.ibm.com Signed-off-by: Greg Kroah-Hartman --- Documentation/admin-guide/cputopology.rst | 4 ++-- drivers/base/topology.c | 8 ++++++++ include/linux/topology.h | 3 +++ 3 files changed, 13 insertions(+), 2 deletions(-) diff --git a/Documentation/admin-guide/cputopology.rst b/Documentation/admin-guide/cputopology.rst index c68d07533c45..ad2238b41439 100644 --- a/Documentation/admin-guide/cputopology.rst +++ b/Documentation/admin-guide/cputopology.rst @@ -11,8 +11,8 @@ Architecture-neutral, drivers/base/topology.c, exports these attributes. However, the book and drawer related sysfs files will only be created if CONFIG_SCHED_BOOK and CONFIG_SCHED_DRAWER are selected, respectively. -The die hierarchy related sysfs files will only be created if an architecture -provides the related macros as described below. +The die and cluster hierarchy related sysfs files will only be created if an +architecture provides the related macros as described below. CONFIG_SCHED_BOOK and CONFIG_SCHED_DRAWER are currently only used on s390, where they reflect the cpu and cache hierarchy. diff --git a/drivers/base/topology.c b/drivers/base/topology.c index f079a55793ec..9d049724e4b4 100644 --- a/drivers/base/topology.c +++ b/drivers/base/topology.c @@ -50,8 +50,10 @@ define_id_show_func(die_id); static DEVICE_ATTR_RO(die_id); #endif +#ifdef TOPOLOGY_CLUSTER_SYSFS define_id_show_func(cluster_id); static DEVICE_ATTR_RO(cluster_id); +#endif define_id_show_func(core_id); static DEVICE_ATTR_RO(core_id); @@ -68,9 +70,11 @@ define_siblings_read_func(core_siblings, core_cpumask); static BIN_ATTR_RO(core_siblings, 0); static BIN_ATTR_RO(core_siblings_list, 0); +#ifdef TOPOLOGY_CLUSTER_SYSFS define_siblings_read_func(cluster_cpus, cluster_cpumask); static BIN_ATTR_RO(cluster_cpus, 0); static BIN_ATTR_RO(cluster_cpus_list, 0); +#endif #ifdef TOPOLOGY_DIE_SYSFS define_siblings_read_func(die_cpus, die_cpumask); @@ -105,8 +109,10 @@ static struct bin_attribute *bin_attrs[] = { &bin_attr_thread_siblings_list, &bin_attr_core_siblings, &bin_attr_core_siblings_list, +#ifdef TOPOLOGY_CLUSTER_SYSFS &bin_attr_cluster_cpus, &bin_attr_cluster_cpus_list, +#endif #ifdef TOPOLOGY_DIE_SYSFS &bin_attr_die_cpus, &bin_attr_die_cpus_list, @@ -129,7 +135,9 @@ static struct attribute *default_attrs[] = { #ifdef TOPOLOGY_DIE_SYSFS &dev_attr_die_id.attr, #endif +#ifdef TOPOLOGY_CLUSTER_SYSFS &dev_attr_cluster_id.attr, +#endif &dev_attr_core_id.attr, #ifdef CONFIG_SCHED_BOOK &dev_attr_book_id.attr, diff --git a/include/linux/topology.h b/include/linux/topology.h index 8d1bdae76230..d52be69037db 100644 --- a/include/linux/topology.h +++ b/include/linux/topology.h @@ -183,6 +183,9 @@ static inline int cpu_to_mem(int cpu) #if defined(topology_die_id) && defined(topology_die_cpumask) #define TOPOLOGY_DIE_SYSFS #endif +#if defined(topology_cluster_id) && defined(topology_cluster_cpumask) +#define TOPOLOGY_CLUSTER_SYSFS +#endif #ifndef topology_physical_package_id #define topology_physical_package_id(cpu) ((void)(cpu), -1) From f1045056c726440469d89d23c13734bcd6c0d15b Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 29 Nov 2021 14:03:09 +0100 Subject: [PATCH 09/43] topology/sysfs: rework book and drawer topology ifdefery Provide default defines for the topology_book_[id|cpumask] and topology_drawer_[id|cpumask] macros just like for each other topology level. This way all topology levels are handled in a similar way. Still the the book and drawer levels are only used on s390, and also the sysfs attributes are only created on s390. However other architectures may opt in if wanted. Acked-by: Peter Zijlstra (Intel) Signed-off-by: Heiko Carstens Link: https://lore.kernel.org/r/20211129130309.3256168-4-hca@linux.ibm.com Signed-off-by: Greg Kroah-Hartman --- Documentation/admin-guide/cputopology.rst | 36 ++++++++++------------- drivers/base/topology.c | 12 ++++---- include/linux/topology.h | 18 ++++++++++++ 3 files changed, 39 insertions(+), 27 deletions(-) diff --git a/Documentation/admin-guide/cputopology.rst b/Documentation/admin-guide/cputopology.rst index ad2238b41439..677ba1c2c820 100644 --- a/Documentation/admin-guide/cputopology.rst +++ b/Documentation/admin-guide/cputopology.rst @@ -8,14 +8,9 @@ to /proc/cpuinfo output of some architectures. They reside in Documentation/ABI/stable/sysfs-devices-system-cpu. Architecture-neutral, drivers/base/topology.c, exports these attributes. -However, the book and drawer related sysfs files will only be created if -CONFIG_SCHED_BOOK and CONFIG_SCHED_DRAWER are selected, respectively. - -The die and cluster hierarchy related sysfs files will only be created if an -architecture provides the related macros as described below. - -CONFIG_SCHED_BOOK and CONFIG_SCHED_DRAWER are currently only used on s390, -where they reflect the cpu and cache hierarchy. +However the die, cluster, book, and drawer hierarchy related sysfs files will +only be created if an architecture provides the related macros as described +below. For an architecture to support this feature, it must define some of these macros in include/asm-XXX/topology.h:: @@ -42,19 +37,18 @@ To be consistent on all architectures, include/linux/topology.h provides default definitions for any of the above macros that are not defined by include/asm-XXX/topology.h: -1) topology_physical_package_id: -1 -2) topology_die_id: -1 -3) topology_cluster_id: -1 -4) topology_core_id: 0 -5) topology_sibling_cpumask: just the given CPU -6) topology_core_cpumask: just the given CPU -7) topology_cluster_cpumask: just the given CPU -8) topology_die_cpumask: just the given CPU - -For architectures that don't support books (CONFIG_SCHED_BOOK) there are no -default definitions for topology_book_id() and topology_book_cpumask(). -For architectures that don't support drawers (CONFIG_SCHED_DRAWER) there are -no default definitions for topology_drawer_id() and topology_drawer_cpumask(). + 1) topology_physical_package_id: -1 + 2) topology_die_id: -1 + 3) topology_cluster_id: -1 + 4) topology_core_id: 0 + 5) topology_book_id: -1 + 6) topology_drawer_id: -1 + 7) topology_sibling_cpumask: just the given CPU + 8) topology_core_cpumask: just the given CPU + 9) topology_cluster_cpumask: just the given CPU +10) topology_die_cpumask: just the given CPU +11) topology_book_cpumask: just the given CPU +12) topology_drawer_cpumask: just the given CPU Additionally, CPU topology information is provided under /sys/devices/system/cpu and includes these files. The internal diff --git a/drivers/base/topology.c b/drivers/base/topology.c index 9d049724e4b4..fc24e89f9592 100644 --- a/drivers/base/topology.c +++ b/drivers/base/topology.c @@ -86,7 +86,7 @@ define_siblings_read_func(package_cpus, core_cpumask); static BIN_ATTR_RO(package_cpus, 0); static BIN_ATTR_RO(package_cpus_list, 0); -#ifdef CONFIG_SCHED_BOOK +#ifdef TOPOLOGY_BOOK_SYSFS define_id_show_func(book_id); static DEVICE_ATTR_RO(book_id); define_siblings_read_func(book_siblings, book_cpumask); @@ -94,7 +94,7 @@ static BIN_ATTR_RO(book_siblings, 0); static BIN_ATTR_RO(book_siblings_list, 0); #endif -#ifdef CONFIG_SCHED_DRAWER +#ifdef TOPOLOGY_DRAWER_SYSFS define_id_show_func(drawer_id); static DEVICE_ATTR_RO(drawer_id); define_siblings_read_func(drawer_siblings, drawer_cpumask); @@ -119,11 +119,11 @@ static struct bin_attribute *bin_attrs[] = { #endif &bin_attr_package_cpus, &bin_attr_package_cpus_list, -#ifdef CONFIG_SCHED_BOOK +#ifdef TOPOLOGY_BOOK_SYSFS &bin_attr_book_siblings, &bin_attr_book_siblings_list, #endif -#ifdef CONFIG_SCHED_DRAWER +#ifdef TOPOLOGY_DRAWER_SYSFS &bin_attr_drawer_siblings, &bin_attr_drawer_siblings_list, #endif @@ -139,10 +139,10 @@ static struct attribute *default_attrs[] = { &dev_attr_cluster_id.attr, #endif &dev_attr_core_id.attr, -#ifdef CONFIG_SCHED_BOOK +#ifdef TOPOLOGY_BOOK_SYSFS &dev_attr_book_id.attr, #endif -#ifdef CONFIG_SCHED_DRAWER +#ifdef TOPOLOGY_DRAWER_SYSFS &dev_attr_drawer_id.attr, #endif NULL diff --git a/include/linux/topology.h b/include/linux/topology.h index d52be69037db..a6e201758ae9 100644 --- a/include/linux/topology.h +++ b/include/linux/topology.h @@ -186,6 +186,12 @@ static inline int cpu_to_mem(int cpu) #if defined(topology_cluster_id) && defined(topology_cluster_cpumask) #define TOPOLOGY_CLUSTER_SYSFS #endif +#if defined(topology_book_id) && defined(topology_book_cpumask) +#define TOPOLOGY_BOOK_SYSFS +#endif +#if defined(topology_drawer_id) && defined(topology_drawer_cpumask) +#define TOPOLOGY_DRAWER_SYSFS +#endif #ifndef topology_physical_package_id #define topology_physical_package_id(cpu) ((void)(cpu), -1) @@ -199,6 +205,12 @@ static inline int cpu_to_mem(int cpu) #ifndef topology_core_id #define topology_core_id(cpu) ((void)(cpu), 0) #endif +#ifndef topology_book_id +#define topology_book_id(cpu) ((void)(cpu), -1) +#endif +#ifndef topology_drawer_id +#define topology_drawer_id(cpu) ((void)(cpu), -1) +#endif #ifndef topology_sibling_cpumask #define topology_sibling_cpumask(cpu) cpumask_of(cpu) #endif @@ -211,6 +223,12 @@ static inline int cpu_to_mem(int cpu) #ifndef topology_die_cpumask #define topology_die_cpumask(cpu) cpumask_of(cpu) #endif +#ifndef topology_book_cpumask +#define topology_book_cpumask(cpu) cpumask_of(cpu) +#endif +#ifndef topology_drawer_cpumask +#define topology_drawer_cpumask(cpu) cpumask_of(cpu) +#endif #if defined(CONFIG_SCHED_SMT) && !defined(cpu_smt_mask) static inline const struct cpumask *cpu_smt_mask(int cpu) From b247703873c4d5088eee52d7902495b24967ce8e Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Wed, 1 Dec 2021 20:42:59 -0800 Subject: [PATCH 10/43] Documentation/auxiliary_bus: Clarify auxiliary_device creation The documentation for creating an auxiliary device is a 3 step not a 2 step process. Specifically the requirements of setting the name, id, dev.release, and dev.parent fields was not clear as a precursor to the '2 step' process documented. Clarify by declaring this a 3 step process starting with setting the fields of struct auxiliary_device correctly. Also add some sample code and tie the change into the rest of the documentation. Signed-off-by: Ira Weiny Link: https://lore.kernel.org/r/20211202044305.4006853-2-ira.weiny@intel.com Signed-off-by: Greg Kroah-Hartman --- Documentation/driver-api/auxiliary_bus.rst | 81 ++++++++++++++++------ drivers/base/auxiliary.c | 4 +- 2 files changed, 63 insertions(+), 22 deletions(-) diff --git a/Documentation/driver-api/auxiliary_bus.rst b/Documentation/driver-api/auxiliary_bus.rst index ef902daf0d68..7dbb4f16462a 100644 --- a/Documentation/driver-api/auxiliary_bus.rst +++ b/Documentation/driver-api/auxiliary_bus.rst @@ -71,26 +71,14 @@ they are not physical devices that are controlled by DT/ACPI. The same argument applies for not using MFD in this scenario as MFD relies on individual function devices being physical devices. -Auxiliary Device -================ +Auxiliary Device Creation +========================= An auxiliary_device represents a part of its parent device's functionality. It is given a name that, combined with the registering drivers KBUILD_MODNAME, creates a match_name that is used for driver binding, and an id that combined with the match_name provide a unique name to register with the bus subsystem. -Registering an auxiliary_device is a two-step process. First call -auxiliary_device_init(), which checks several aspects of the auxiliary_device -struct and performs a device_initialize(). After this step completes, any -error state must have a call to auxiliary_device_uninit() in its resolution path. -The second step in registering an auxiliary_device is to perform a call to -auxiliary_device_add(), which sets the name of the device and add the device to -the bus. - -Unregistering an auxiliary_device is also a two-step process to mirror the -register process. First call auxiliary_device_delete(), then call -auxiliary_device_uninit(). - .. code-block:: c struct auxiliary_device { @@ -99,15 +87,68 @@ auxiliary_device_uninit(). u32 id; }; -If two auxiliary_devices both with a match_name "mod.foo" are registered onto -the bus, they must have unique id values (e.g. "x" and "y") so that the -registered devices names are "mod.foo.x" and "mod.foo.y". If match_name + id -are not unique, then the device_add fails and generates an error message. +Registering an auxiliary_device is a three-step process. + +First, a 'struct auxiliary_device' needs to be defined or allocated for each +sub-device desired. The name, id, dev.release, and dev.parent fields of this +structure must be filled in as follows. + +The 'name' field is to be given a name that is recognized by the auxiliary +driver. If two auxiliary_devices with the same match_name, eg +"mod.MY_DEVICE_NAME", are registered onto the bus, they must have unique id +values (e.g. "x" and "y") so that the registered devices names are "mod.foo.x" +and "mod.foo.y". If match_name + id are not unique, then the device_add fails +and generates an error message. The auxiliary_device.dev.type.release or auxiliary_device.dev.release must be -populated with a non-NULL pointer to successfully register the auxiliary_device. +populated with a non-NULL pointer to successfully register the +auxiliary_device. This release call is where resources associated with the +auxiliary device must be free'ed. Because once the device is placed on the bus +the parent driver can not tell what other code may have a reference to this +data. + +The auxiliary_device.dev.parent should be set. Typically to the registering +drivers device. + +Second, call auxiliary_device_init(), which checks several aspects of the +auxiliary_device struct and performs a device_initialize(). After this step +completes, any error state must have a call to auxiliary_device_uninit() in its +resolution path. + +The third and final step in registering an auxiliary_device is to perform a +call to auxiliary_device_add(), which sets the name of the device and adds the +device to the bus. + +.. code-block:: c + + struct auxiliary_device *my_aux_dev = my_aux_dev_alloc(xxx); + + /* Step 1: */ + my_aux_dev->name = MY_DEVICE_NAME; + my_aux_dev->id = my_unique_id_alloc(xxx); + my_aux_dev->dev.release = my_aux_dev_release; + my_aux_dev->dev.parent = my_dev; + + /* Step 2: */ + if (auxiliary_device_init(my_aux_dev)) + goto fail; + + /* Step 3: */ + if (auxiliary_device_add(my_aux_dev)) { + auxiliary_device_uninit(my_aux_dev); + goto fail; + } + +Unregistering an auxiliary_device is a two-step process to mirror the register +process. First call auxiliary_device_delete(), then call +auxiliary_device_uninit(). + + +.. code-block:: c + + auxiliary_device_delete(my_dev->my_aux_dev); + auxiliary_device_uninit(my_dev->my_aux_dev); -The auxiliary_device.dev.parent must also be populated. Auxiliary Device Memory Model and Lifespan ------------------------------------------ diff --git a/drivers/base/auxiliary.c b/drivers/base/auxiliary.c index 9230c9472bb0..70a8dbcd31b7 100644 --- a/drivers/base/auxiliary.c +++ b/drivers/base/auxiliary.c @@ -117,7 +117,7 @@ static struct bus_type auxiliary_bus_type = { * auxiliary_device_init - check auxiliary_device and initialize * @auxdev: auxiliary device struct * - * This is the first step in the two-step process to register an + * This is the second step in the three-step process to register an * auxiliary_device. * * When this function returns an error code, then the device_initialize will @@ -155,7 +155,7 @@ EXPORT_SYMBOL_GPL(auxiliary_device_init); * @auxdev: auxiliary bus device to add to the bus * @modname: name of the parent device's driver module * - * This is the second step in the two-step process to register an + * This is the third step in the three-step process to register an * auxiliary_device. * * This function must be called after a successful call to From 0d058a206adadf0c38f891b19dbdcb2963a1cd20 Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Wed, 1 Dec 2021 20:43:00 -0800 Subject: [PATCH 11/43] Documentation/auxiliary_bus: Clarify match_name Provide example code for how the match name is formed and where it is supposed to be set. Signed-off-by: Ira Weiny Link: https://lore.kernel.org/r/20211202044305.4006853-3-ira.weiny@intel.com Signed-off-by: Greg Kroah-Hartman --- Documentation/driver-api/auxiliary_bus.rst | 33 ++++++++++++++++++++-- 1 file changed, 30 insertions(+), 3 deletions(-) diff --git a/Documentation/driver-api/auxiliary_bus.rst b/Documentation/driver-api/auxiliary_bus.rst index 7dbb4f16462a..b041a72dc322 100644 --- a/Documentation/driver-api/auxiliary_bus.rst +++ b/Documentation/driver-api/auxiliary_bus.rst @@ -78,6 +78,9 @@ An auxiliary_device represents a part of its parent device's functionality. It is given a name that, combined with the registering drivers KBUILD_MODNAME, creates a match_name that is used for driver binding, and an id that combined with the match_name provide a unique name to register with the bus subsystem. +For example, a driver registering an auxiliary device is named 'foo_mod.ko' and +the subdevice is named 'foo_dev'. The match name is therefore +'foo_mod.foo_dev'. .. code-block:: c @@ -95,9 +98,9 @@ structure must be filled in as follows. The 'name' field is to be given a name that is recognized by the auxiliary driver. If two auxiliary_devices with the same match_name, eg -"mod.MY_DEVICE_NAME", are registered onto the bus, they must have unique id -values (e.g. "x" and "y") so that the registered devices names are "mod.foo.x" -and "mod.foo.y". If match_name + id are not unique, then the device_add fails +"foo_mod.foo_dev", are registered onto the bus, they must have unique id +values (e.g. "x" and "y") so that the registered devices names are "foo_mod.foo_dev.x" +and "foo_mod.foo_dev.y". If match_name + id are not unique, then the device_add fails and generates an error message. The auxiliary_device.dev.type.release or auxiliary_device.dev.release must be @@ -121,6 +124,10 @@ device to the bus. .. code-block:: c + #define MY_DEVICE_NAME "foo_dev" + + ... + struct auxiliary_device *my_aux_dev = my_aux_dev_alloc(xxx); /* Step 1: */ @@ -139,6 +146,9 @@ device to the bus. goto fail; } + ... + + Unregistering an auxiliary_device is a two-step process to mirror the register process. First call auxiliary_device_delete(), then call auxiliary_device_uninit(). @@ -205,6 +215,23 @@ Auxiliary drivers register themselves with the bus by calling auxiliary_driver_register(). The id_table contains the match_names of auxiliary devices that a driver can bind with. +.. code-block:: c + + static const struct auxiliary_device_id my_auxiliary_id_table[] = { + { .name = "foo_mod.foo_dev" }, + {}, + }; + + MODULE_DEVICE_TABLE(auxiliary, my_auxiliary_id_table); + + struct auxiliary_driver my_drv = { + .name = "myauxiliarydrv", + .id_table = my_auxiliary_id_table, + .probe = my_drv_probe, + .remove = my_drv_remove + }; + + Example Usage ============= From cb2ba75935558e65f2c4fe411a78c375b2c64cb4 Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Wed, 1 Dec 2021 20:43:01 -0800 Subject: [PATCH 12/43] Documentation/auxiliary_bus: Update Auxiliary device lifespan It was unclear when the auxiliary device objects were to be free'ed by the parent (registering) driver. Also there are some patterns like using devm_add_action_or_reset() which are helpful to mention to those using the interface to ensure they don't double free or miss freeing the auxiliary devices. Signed-off-by: Ira Weiny Link: https://lore.kernel.org/r/20211202044305.4006853-4-ira.weiny@intel.com Signed-off-by: Greg Kroah-Hartman --- Documentation/driver-api/auxiliary_bus.rst | 32 ++++++++++++++-------- 1 file changed, 21 insertions(+), 11 deletions(-) diff --git a/Documentation/driver-api/auxiliary_bus.rst b/Documentation/driver-api/auxiliary_bus.rst index b041a72dc322..3786e4664a1e 100644 --- a/Documentation/driver-api/auxiliary_bus.rst +++ b/Documentation/driver-api/auxiliary_bus.rst @@ -164,9 +164,15 @@ Auxiliary Device Memory Model and Lifespan ------------------------------------------ The registering driver is the entity that allocates memory for the -auxiliary_device and register it on the auxiliary bus. It is important to note +auxiliary_device and registers it on the auxiliary bus. It is important to note that, as opposed to the platform bus, the registering driver is wholly -responsible for the management for the memory used for the driver object. +responsible for the management of the memory used for the device object. + +To be clear the memory for the auxiliary_device is freed in the release() +callback defined by the registering driver. The registering driver should only +call auxiliary_device_delete() and then auxiliary_device_uninit() when it is +done with the device. The release() function is then automatically called if +and when other code releases their reference to the devices. A parent object, defined in the shared header file, contains the auxiliary_device. It also contains a pointer to the shared object(s), which @@ -177,18 +183,22 @@ from the pointer to the auxiliary_device, that is passed during the call to the auxiliary_driver's probe function, up to the parent object, and then have access to the shared object(s). -The memory for the auxiliary_device is freed only in its release() callback -flow as defined by its registering driver. - The memory for the shared object(s) must have a lifespan equal to, or greater -than, the lifespan of the memory for the auxiliary_device. The auxiliary_driver -should only consider that this shared object is valid as long as the -auxiliary_device is still registered on the auxiliary bus. It is up to the -registering driver to manage (e.g. free or keep available) the memory for the -shared object beyond the life of the auxiliary_device. +than, the lifespan of the memory for the auxiliary_device. The +auxiliary_driver should only consider that the shared object is valid as long +as the auxiliary_device is still registered on the auxiliary bus. It is up to +the registering driver to manage (e.g. free or keep available) the memory for +the shared object beyond the life of the auxiliary_device. The registering driver must unregister all auxiliary devices before its own -driver.remove() is completed. +driver.remove() is completed. An easy way to ensure this is to use the +devm_add_action_or_reset() call to register a function against the parent device +which unregisters the auxiliary device object(s). + +Finally, any operations which operate on the auxiliary devices must continue to +function (if only to return an error) after the registering driver unregisters +the auxiliary device. + Auxiliary Drivers ================= From 05021dca787be566886875dc0c683552e1c67e5e Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Wed, 1 Dec 2021 20:43:02 -0800 Subject: [PATCH 13/43] Documentation/auxiliary_bus: Clarify __auxiliary_driver_register __auxiliary_driver_register is not intended to be called directly unless a custom name is required. Add documentation for this fact. Signed-off-by: Ira Weiny Link: https://lore.kernel.org/r/20211202044305.4006853-5-ira.weiny@intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/base/auxiliary.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/base/auxiliary.c b/drivers/base/auxiliary.c index 70a8dbcd31b7..7192f7d03a05 100644 --- a/drivers/base/auxiliary.c +++ b/drivers/base/auxiliary.c @@ -225,6 +225,11 @@ EXPORT_SYMBOL_GPL(auxiliary_find_device); * @auxdrv: auxiliary_driver structure * @owner: owning module/driver * @modname: KBUILD_MODNAME for parent driver + * + * The expectation is that users will call the "auxiliary_driver_register" + * macro so that the caller's KBUILD_MODNAME is automatically inserted for the + * modname parameter. Only if a user requires a custom name would this version + * be called directly. */ int __auxiliary_driver_register(struct auxiliary_driver *auxdrv, struct module *owner, const char *modname) From 14866a7db8da1f61fb6135c461b733694eea9580 Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Wed, 1 Dec 2021 20:43:03 -0800 Subject: [PATCH 14/43] Documentation/auxiliary_bus: Add example code for module_auxiliary_driver() Add an example code snipit to the module_auxiliary_driver() documentation which is consistent with the other example code in the elsewhere in the documentation. Signed-off-by: Ira Weiny Link: https://lore.kernel.org/r/20211202044305.4006853-6-ira.weiny@intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/auxiliary_bus.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/include/linux/auxiliary_bus.h b/include/linux/auxiliary_bus.h index fc51d45f106b..605b27aab693 100644 --- a/include/linux/auxiliary_bus.h +++ b/include/linux/auxiliary_bus.h @@ -66,6 +66,10 @@ void auxiliary_driver_unregister(struct auxiliary_driver *auxdrv); * Helper macro for auxiliary drivers which do not do anything special in * module init/exit. This eliminates a lot of boilerplate. Each module may only * use this macro once, and calling it replaces module_init() and module_exit() + * + * .. code-block:: c + * + * module_auxiliary_driver(my_drv); */ #define module_auxiliary_driver(__auxiliary_driver) \ module_driver(__auxiliary_driver, auxiliary_driver_register, auxiliary_driver_unregister) From 8a2d6ffe7740cb8c944968aa9b0705e20afeef87 Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Wed, 1 Dec 2021 20:43:04 -0800 Subject: [PATCH 15/43] Documentation/auxiliary_bus: Clarify the release of devices from find device auxiliary_find_device() takes a proper get_device() reference on the device before returning the matched device. Users of this call should be informed that they need to properly release this reference with put_device(). Signed-off-by: Ira Weiny Link: https://lore.kernel.org/r/20211202044305.4006853-7-ira.weiny@intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/base/auxiliary.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/base/auxiliary.c b/drivers/base/auxiliary.c index 7192f7d03a05..ab5315681a42 100644 --- a/drivers/base/auxiliary.c +++ b/drivers/base/auxiliary.c @@ -202,6 +202,8 @@ EXPORT_SYMBOL_GPL(__auxiliary_device_add); * This function returns a reference to a device that is 'found' * for later use, as determined by the @match callback. * + * The reference returned should be released with put_device(). + * * The callback should return 0 if the device doesn't match and non-zero * if it does. If the callback returns non-zero, this function will * return to the caller and not iterate over any more devices. From e1b5186810cc7d4ec60447032636b8e6772dbbc6 Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Wed, 1 Dec 2021 20:43:05 -0800 Subject: [PATCH 16/43] Documentation/auxiliary_bus: Move the text into the code The code and documentation are more difficult to maintain when kept separately. This is further compounded when the standard structure documentation infrastructure is not used. Move the documentation into the code, use the standard documentation infrastructure, add current documented functions, and reference the text in the rst file. Suggested-by: Greg Kroah-Hartman Signed-off-by: Ira Weiny Link: https://lore.kernel.org/r/20211202044305.4006853-8-ira.weiny@intel.com Signed-off-by: Greg Kroah-Hartman --- Documentation/driver-api/auxiliary_bus.rst | 298 ++------------------- drivers/base/auxiliary.c | 141 ++++++++++ include/linux/auxiliary_bus.h | 160 +++++++++++ 3 files changed, 318 insertions(+), 281 deletions(-) diff --git a/Documentation/driver-api/auxiliary_bus.rst b/Documentation/driver-api/auxiliary_bus.rst index 3786e4664a1e..cec84908fbc0 100644 --- a/Documentation/driver-api/auxiliary_bus.rst +++ b/Documentation/driver-api/auxiliary_bus.rst @@ -6,309 +6,45 @@ Auxiliary Bus ============= -In some subsystems, the functionality of the core device (PCI/ACPI/other) is -too complex for a single device to be managed by a monolithic driver -(e.g. Sound Open Firmware), multiple devices might implement a common -intersection of functionality (e.g. NICs + RDMA), or a driver may want to -export an interface for another subsystem to drive (e.g. SIOV Physical Function -export Virtual Function management). A split of the functionality into child- -devices representing sub-domains of functionality makes it possible to -compartmentalize, layer, and distribute domain-specific concerns via a Linux -device-driver model. - -An example for this kind of requirement is the audio subsystem where a single -IP is handling multiple entities such as HDMI, Soundwire, local devices such as -mics/speakers etc. The split for the core's functionality can be arbitrary or -be defined by the DSP firmware topology and include hooks for test/debug. This -allows for the audio core device to be minimal and focused on hardware-specific -control and communication. - -Each auxiliary_device represents a part of its parent functionality. The -generic behavior can be extended and specialized as needed by encapsulating an -auxiliary_device within other domain-specific structures and the use of .ops -callbacks. Devices on the auxiliary bus do not share any structures and the use -of a communication channel with the parent is domain-specific. - -Note that ops are intended as a way to augment instance behavior within a class -of auxiliary devices, it is not the mechanism for exporting common -infrastructure from the parent. Consider EXPORT_SYMBOL_NS() to convey -infrastructure from the parent module to the auxiliary module(s). - +.. kernel-doc:: drivers/base/auxiliary.c + :doc: PURPOSE When Should the Auxiliary Bus Be Used ===================================== -The auxiliary bus is to be used when a driver and one or more kernel modules, -who share a common header file with the driver, need a mechanism to connect and -provide access to a shared object allocated by the auxiliary_device's -registering driver. The registering driver for the auxiliary_device(s) and the -kernel module(s) registering auxiliary_drivers can be from the same subsystem, -or from multiple subsystems. +.. kernel-doc:: drivers/base/auxiliary.c + :doc: USAGE -The emphasis here is on a common generic interface that keeps subsystem -customization out of the bus infrastructure. - -One example is a PCI network device that is RDMA-capable and exports a child -device to be driven by an auxiliary_driver in the RDMA subsystem. The PCI -driver allocates and registers an auxiliary_device for each physical -function on the NIC. The RDMA driver registers an auxiliary_driver that claims -each of these auxiliary_devices. This conveys data/ops published by the parent -PCI device/driver to the RDMA auxiliary_driver. - -Another use case is for the PCI device to be split out into multiple sub -functions. For each sub function an auxiliary_device is created. A PCI sub -function driver binds to such devices that creates its own one or more class -devices. A PCI sub function auxiliary device is likely to be contained in a -struct with additional attributes such as user defined sub function number and -optional attributes such as resources and a link to the parent device. These -attributes could be used by systemd/udev; and hence should be initialized -before a driver binds to an auxiliary_device. - -A key requirement for utilizing the auxiliary bus is that there is no -dependency on a physical bus, device, register accesses or regmap support. -These individual devices split from the core cannot live on the platform bus as -they are not physical devices that are controlled by DT/ACPI. The same -argument applies for not using MFD in this scenario as MFD relies on individual -function devices being physical devices. Auxiliary Device Creation ========================= -An auxiliary_device represents a part of its parent device's functionality. It -is given a name that, combined with the registering drivers KBUILD_MODNAME, -creates a match_name that is used for driver binding, and an id that combined -with the match_name provide a unique name to register with the bus subsystem. -For example, a driver registering an auxiliary device is named 'foo_mod.ko' and -the subdevice is named 'foo_dev'. The match name is therefore -'foo_mod.foo_dev'. - -.. code-block:: c - - struct auxiliary_device { - struct device dev; - const char *name; - u32 id; - }; - -Registering an auxiliary_device is a three-step process. - -First, a 'struct auxiliary_device' needs to be defined or allocated for each -sub-device desired. The name, id, dev.release, and dev.parent fields of this -structure must be filled in as follows. - -The 'name' field is to be given a name that is recognized by the auxiliary -driver. If two auxiliary_devices with the same match_name, eg -"foo_mod.foo_dev", are registered onto the bus, they must have unique id -values (e.g. "x" and "y") so that the registered devices names are "foo_mod.foo_dev.x" -and "foo_mod.foo_dev.y". If match_name + id are not unique, then the device_add fails -and generates an error message. - -The auxiliary_device.dev.type.release or auxiliary_device.dev.release must be -populated with a non-NULL pointer to successfully register the -auxiliary_device. This release call is where resources associated with the -auxiliary device must be free'ed. Because once the device is placed on the bus -the parent driver can not tell what other code may have a reference to this -data. - -The auxiliary_device.dev.parent should be set. Typically to the registering -drivers device. - -Second, call auxiliary_device_init(), which checks several aspects of the -auxiliary_device struct and performs a device_initialize(). After this step -completes, any error state must have a call to auxiliary_device_uninit() in its -resolution path. - -The third and final step in registering an auxiliary_device is to perform a -call to auxiliary_device_add(), which sets the name of the device and adds the -device to the bus. - -.. code-block:: c - - #define MY_DEVICE_NAME "foo_dev" - - ... - - struct auxiliary_device *my_aux_dev = my_aux_dev_alloc(xxx); - - /* Step 1: */ - my_aux_dev->name = MY_DEVICE_NAME; - my_aux_dev->id = my_unique_id_alloc(xxx); - my_aux_dev->dev.release = my_aux_dev_release; - my_aux_dev->dev.parent = my_dev; - - /* Step 2: */ - if (auxiliary_device_init(my_aux_dev)) - goto fail; - - /* Step 3: */ - if (auxiliary_device_add(my_aux_dev)) { - auxiliary_device_uninit(my_aux_dev); - goto fail; - } - - ... - - -Unregistering an auxiliary_device is a two-step process to mirror the register -process. First call auxiliary_device_delete(), then call -auxiliary_device_uninit(). - - -.. code-block:: c - - auxiliary_device_delete(my_dev->my_aux_dev); - auxiliary_device_uninit(my_dev->my_aux_dev); +.. kernel-doc:: include/linux/auxiliary_bus.h + :identifiers: auxiliary_device +.. kernel-doc:: drivers/base/auxiliary.c + :identifiers: auxiliary_device_init __auxiliary_device_add + auxiliary_find_device Auxiliary Device Memory Model and Lifespan ------------------------------------------ -The registering driver is the entity that allocates memory for the -auxiliary_device and registers it on the auxiliary bus. It is important to note -that, as opposed to the platform bus, the registering driver is wholly -responsible for the management of the memory used for the device object. - -To be clear the memory for the auxiliary_device is freed in the release() -callback defined by the registering driver. The registering driver should only -call auxiliary_device_delete() and then auxiliary_device_uninit() when it is -done with the device. The release() function is then automatically called if -and when other code releases their reference to the devices. - -A parent object, defined in the shared header file, contains the -auxiliary_device. It also contains a pointer to the shared object(s), which -also is defined in the shared header. Both the parent object and the shared -object(s) are allocated by the registering driver. This layout allows the -auxiliary_driver's registering module to perform a container_of() call to go -from the pointer to the auxiliary_device, that is passed during the call to the -auxiliary_driver's probe function, up to the parent object, and then have -access to the shared object(s). - -The memory for the shared object(s) must have a lifespan equal to, or greater -than, the lifespan of the memory for the auxiliary_device. The -auxiliary_driver should only consider that the shared object is valid as long -as the auxiliary_device is still registered on the auxiliary bus. It is up to -the registering driver to manage (e.g. free or keep available) the memory for -the shared object beyond the life of the auxiliary_device. - -The registering driver must unregister all auxiliary devices before its own -driver.remove() is completed. An easy way to ensure this is to use the -devm_add_action_or_reset() call to register a function against the parent device -which unregisters the auxiliary device object(s). - -Finally, any operations which operate on the auxiliary devices must continue to -function (if only to return an error) after the registering driver unregisters -the auxiliary device. +.. kernel-doc:: include/linux/auxiliary_bus.h + :doc: DEVICE_LIFESPAN Auxiliary Drivers ================= -Auxiliary drivers follow the standard driver model convention, where -discovery/enumeration is handled by the core, and drivers -provide probe() and remove() methods. They support power management -and shutdown notifications using the standard conventions. - -.. code-block:: c - - struct auxiliary_driver { - int (*probe)(struct auxiliary_device *, - const struct auxiliary_device_id *id); - void (*remove)(struct auxiliary_device *); - void (*shutdown)(struct auxiliary_device *); - int (*suspend)(struct auxiliary_device *, pm_message_t); - int (*resume)(struct auxiliary_device *); - struct device_driver driver; - const struct auxiliary_device_id *id_table; - }; - -Auxiliary drivers register themselves with the bus by calling -auxiliary_driver_register(). The id_table contains the match_names of auxiliary -devices that a driver can bind with. - -.. code-block:: c - - static const struct auxiliary_device_id my_auxiliary_id_table[] = { - { .name = "foo_mod.foo_dev" }, - {}, - }; - - MODULE_DEVICE_TABLE(auxiliary, my_auxiliary_id_table); - - struct auxiliary_driver my_drv = { - .name = "myauxiliarydrv", - .id_table = my_auxiliary_id_table, - .probe = my_drv_probe, - .remove = my_drv_remove - }; +.. kernel-doc:: include/linux/auxiliary_bus.h + :identifiers: auxiliary_driver module_auxiliary_driver +.. kernel-doc:: drivers/base/auxiliary.c + :identifiers: __auxiliary_driver_register auxiliary_driver_unregister Example Usage ============= -Auxiliary devices are created and registered by a subsystem-level core device -that needs to break up its functionality into smaller fragments. One way to -extend the scope of an auxiliary_device is to encapsulate it within a domain- -pecific structure defined by the parent device. This structure contains the -auxiliary_device and any associated shared data/callbacks needed to establish -the connection with the parent. +.. kernel-doc:: drivers/base/auxiliary.c + :doc: EXAMPLE -An example is: - -.. code-block:: c - - struct foo { - struct auxiliary_device auxdev; - void (*connect)(struct auxiliary_device *auxdev); - void (*disconnect)(struct auxiliary_device *auxdev); - void *data; - }; - -The parent device then registers the auxiliary_device by calling -auxiliary_device_init(), and then auxiliary_device_add(), with the pointer to -the auxdev member of the above structure. The parent provides a name for the -auxiliary_device that, combined with the parent's KBUILD_MODNAME, creates a -match_name that is be used for matching and binding with a driver. - -Whenever an auxiliary_driver is registered, based on the match_name, the -auxiliary_driver's probe() is invoked for the matching devices. The -auxiliary_driver can also be encapsulated inside custom drivers that make the -core device's functionality extensible by adding additional domain-specific ops -as follows: - -.. code-block:: c - - struct my_ops { - void (*send)(struct auxiliary_device *auxdev); - void (*receive)(struct auxiliary_device *auxdev); - }; - - - struct my_driver { - struct auxiliary_driver auxiliary_drv; - const struct my_ops ops; - }; - -An example of this type of usage is: - -.. code-block:: c - - const struct auxiliary_device_id my_auxiliary_id_table[] = { - { .name = "foo_mod.foo_dev" }, - { }, - }; - - const struct my_ops my_custom_ops = { - .send = my_tx, - .receive = my_rx, - }; - - const struct my_driver my_drv = { - .auxiliary_drv = { - .name = "myauxiliarydrv", - .id_table = my_auxiliary_id_table, - .probe = my_probe, - .remove = my_remove, - .shutdown = my_shutdown, - }, - .ops = my_custom_ops, - }; diff --git a/drivers/base/auxiliary.c b/drivers/base/auxiliary.c index ab5315681a42..8c5e65930617 100644 --- a/drivers/base/auxiliary.c +++ b/drivers/base/auxiliary.c @@ -17,6 +17,147 @@ #include #include "base.h" +/** + * DOC: PURPOSE + * + * In some subsystems, the functionality of the core device (PCI/ACPI/other) is + * too complex for a single device to be managed by a monolithic driver (e.g. + * Sound Open Firmware), multiple devices might implement a common intersection + * of functionality (e.g. NICs + RDMA), or a driver may want to export an + * interface for another subsystem to drive (e.g. SIOV Physical Function export + * Virtual Function management). A split of the functionality into child- + * devices representing sub-domains of functionality makes it possible to + * compartmentalize, layer, and distribute domain-specific concerns via a Linux + * device-driver model. + * + * An example for this kind of requirement is the audio subsystem where a + * single IP is handling multiple entities such as HDMI, Soundwire, local + * devices such as mics/speakers etc. The split for the core's functionality + * can be arbitrary or be defined by the DSP firmware topology and include + * hooks for test/debug. This allows for the audio core device to be minimal + * and focused on hardware-specific control and communication. + * + * Each auxiliary_device represents a part of its parent functionality. The + * generic behavior can be extended and specialized as needed by encapsulating + * an auxiliary_device within other domain-specific structures and the use of + * .ops callbacks. Devices on the auxiliary bus do not share any structures and + * the use of a communication channel with the parent is domain-specific. + * + * Note that ops are intended as a way to augment instance behavior within a + * class of auxiliary devices, it is not the mechanism for exporting common + * infrastructure from the parent. Consider EXPORT_SYMBOL_NS() to convey + * infrastructure from the parent module to the auxiliary module(s). + */ + +/** + * DOC: USAGE + * + * The auxiliary bus is to be used when a driver and one or more kernel + * modules, who share a common header file with the driver, need a mechanism to + * connect and provide access to a shared object allocated by the + * auxiliary_device's registering driver. The registering driver for the + * auxiliary_device(s) and the kernel module(s) registering auxiliary_drivers + * can be from the same subsystem, or from multiple subsystems. + * + * The emphasis here is on a common generic interface that keeps subsystem + * customization out of the bus infrastructure. + * + * One example is a PCI network device that is RDMA-capable and exports a child + * device to be driven by an auxiliary_driver in the RDMA subsystem. The PCI + * driver allocates and registers an auxiliary_device for each physical + * function on the NIC. The RDMA driver registers an auxiliary_driver that + * claims each of these auxiliary_devices. This conveys data/ops published by + * the parent PCI device/driver to the RDMA auxiliary_driver. + * + * Another use case is for the PCI device to be split out into multiple sub + * functions. For each sub function an auxiliary_device is created. A PCI sub + * function driver binds to such devices that creates its own one or more class + * devices. A PCI sub function auxiliary device is likely to be contained in a + * struct with additional attributes such as user defined sub function number + * and optional attributes such as resources and a link to the parent device. + * These attributes could be used by systemd/udev; and hence should be + * initialized before a driver binds to an auxiliary_device. + * + * A key requirement for utilizing the auxiliary bus is that there is no + * dependency on a physical bus, device, register accesses or regmap support. + * These individual devices split from the core cannot live on the platform bus + * as they are not physical devices that are controlled by DT/ACPI. The same + * argument applies for not using MFD in this scenario as MFD relies on + * individual function devices being physical devices. + */ + +/** + * DOC: EXAMPLE + * + * Auxiliary devices are created and registered by a subsystem-level core + * device that needs to break up its functionality into smaller fragments. One + * way to extend the scope of an auxiliary_device is to encapsulate it within a + * domain- pecific structure defined by the parent device. This structure + * contains the auxiliary_device and any associated shared data/callbacks + * needed to establish the connection with the parent. + * + * An example is: + * + * .. code-block:: c + * + * struct foo { + * struct auxiliary_device auxdev; + * void (*connect)(struct auxiliary_device *auxdev); + * void (*disconnect)(struct auxiliary_device *auxdev); + * void *data; + * }; + * + * The parent device then registers the auxiliary_device by calling + * auxiliary_device_init(), and then auxiliary_device_add(), with the pointer + * to the auxdev member of the above structure. The parent provides a name for + * the auxiliary_device that, combined with the parent's KBUILD_MODNAME, + * creates a match_name that is be used for matching and binding with a driver. + * + * Whenever an auxiliary_driver is registered, based on the match_name, the + * auxiliary_driver's probe() is invoked for the matching devices. The + * auxiliary_driver can also be encapsulated inside custom drivers that make + * the core device's functionality extensible by adding additional + * domain-specific ops as follows: + * + * .. code-block:: c + * + * struct my_ops { + * void (*send)(struct auxiliary_device *auxdev); + * void (*receive)(struct auxiliary_device *auxdev); + * }; + * + * + * struct my_driver { + * struct auxiliary_driver auxiliary_drv; + * const struct my_ops ops; + * }; + * + * An example of this type of usage is: + * + * .. code-block:: c + * + * const struct auxiliary_device_id my_auxiliary_id_table[] = { + * { .name = "foo_mod.foo_dev" }, + * { }, + * }; + * + * const struct my_ops my_custom_ops = { + * .send = my_tx, + * .receive = my_rx, + * }; + * + * const struct my_driver my_drv = { + * .auxiliary_drv = { + * .name = "myauxiliarydrv", + * .id_table = my_auxiliary_id_table, + * .probe = my_probe, + * .remove = my_remove, + * .shutdown = my_shutdown, + * }, + * .ops = my_custom_ops, + * }; + */ + static const struct auxiliary_device_id *auxiliary_match_id(const struct auxiliary_device_id *id, const struct auxiliary_device *auxdev) { diff --git a/include/linux/auxiliary_bus.h b/include/linux/auxiliary_bus.h index 605b27aab693..e6d8b5c16226 100644 --- a/include/linux/auxiliary_bus.h +++ b/include/linux/auxiliary_bus.h @@ -11,12 +11,172 @@ #include #include +/** + * DOC: DEVICE_LIFESPAN + * + * The registering driver is the entity that allocates memory for the + * auxiliary_device and registers it on the auxiliary bus. It is important to + * note that, as opposed to the platform bus, the registering driver is wholly + * responsible for the management of the memory used for the device object. + * + * To be clear the memory for the auxiliary_device is freed in the release() + * callback defined by the registering driver. The registering driver should + * only call auxiliary_device_delete() and then auxiliary_device_uninit() when + * it is done with the device. The release() function is then automatically + * called if and when other code releases their reference to the devices. + * + * A parent object, defined in the shared header file, contains the + * auxiliary_device. It also contains a pointer to the shared object(s), which + * also is defined in the shared header. Both the parent object and the shared + * object(s) are allocated by the registering driver. This layout allows the + * auxiliary_driver's registering module to perform a container_of() call to go + * from the pointer to the auxiliary_device, that is passed during the call to + * the auxiliary_driver's probe function, up to the parent object, and then + * have access to the shared object(s). + * + * The memory for the shared object(s) must have a lifespan equal to, or + * greater than, the lifespan of the memory for the auxiliary_device. The + * auxiliary_driver should only consider that the shared object is valid as + * long as the auxiliary_device is still registered on the auxiliary bus. It + * is up to the registering driver to manage (e.g. free or keep available) the + * memory for the shared object beyond the life of the auxiliary_device. + * + * The registering driver must unregister all auxiliary devices before its own + * driver.remove() is completed. An easy way to ensure this is to use the + * devm_add_action_or_reset() call to register a function against the parent + * device which unregisters the auxiliary device object(s). + * + * Finally, any operations which operate on the auxiliary devices must continue + * to function (if only to return an error) after the registering driver + * unregisters the auxiliary device. + */ + +/** + * struct auxiliary_device - auxiliary device object. + * @dev: Device, + * The release and parent fields of the device structure must be filled + * in + * @name: Match name found by the auxiliary device driver, + * @id: unique identitier if multiple devices of the same name are exported, + * + * An auxiliary_device represents a part of its parent device's functionality. + * It is given a name that, combined with the registering drivers + * KBUILD_MODNAME, creates a match_name that is used for driver binding, and an + * id that combined with the match_name provide a unique name to register with + * the bus subsystem. For example, a driver registering an auxiliary device is + * named 'foo_mod.ko' and the subdevice is named 'foo_dev'. The match name is + * therefore 'foo_mod.foo_dev'. + * + * Registering an auxiliary_device is a three-step process. + * + * First, a 'struct auxiliary_device' needs to be defined or allocated for each + * sub-device desired. The name, id, dev.release, and dev.parent fields of + * this structure must be filled in as follows. + * + * The 'name' field is to be given a name that is recognized by the auxiliary + * driver. If two auxiliary_devices with the same match_name, eg + * "foo_mod.foo_dev", are registered onto the bus, they must have unique id + * values (e.g. "x" and "y") so that the registered devices names are + * "foo_mod.foo_dev.x" and "foo_mod.foo_dev.y". If match_name + id are not + * unique, then the device_add fails and generates an error message. + * + * The auxiliary_device.dev.type.release or auxiliary_device.dev.release must + * be populated with a non-NULL pointer to successfully register the + * auxiliary_device. This release call is where resources associated with the + * auxiliary device must be free'ed. Because once the device is placed on the + * bus the parent driver can not tell what other code may have a reference to + * this data. + * + * The auxiliary_device.dev.parent should be set. Typically to the registering + * drivers device. + * + * Second, call auxiliary_device_init(), which checks several aspects of the + * auxiliary_device struct and performs a device_initialize(). After this step + * completes, any error state must have a call to auxiliary_device_uninit() in + * its resolution path. + * + * The third and final step in registering an auxiliary_device is to perform a + * call to auxiliary_device_add(), which sets the name of the device and adds + * the device to the bus. + * + * .. code-block:: c + * + * #define MY_DEVICE_NAME "foo_dev" + * + * ... + * + * struct auxiliary_device *my_aux_dev = my_aux_dev_alloc(xxx); + * + * // Step 1: + * my_aux_dev->name = MY_DEVICE_NAME; + * my_aux_dev->id = my_unique_id_alloc(xxx); + * my_aux_dev->dev.release = my_aux_dev_release; + * my_aux_dev->dev.parent = my_dev; + * + * // Step 2: + * if (auxiliary_device_init(my_aux_dev)) + * goto fail; + * + * // Step 3: + * if (auxiliary_device_add(my_aux_dev)) { + * auxiliary_device_uninit(my_aux_dev); + * goto fail; + * } + * + * ... + * + * + * Unregistering an auxiliary_device is a two-step process to mirror the + * register process. First call auxiliary_device_delete(), then call + * auxiliary_device_uninit(). + * + * .. code-block:: c + * + * auxiliary_device_delete(my_dev->my_aux_dev); + * auxiliary_device_uninit(my_dev->my_aux_dev); + */ struct auxiliary_device { struct device dev; const char *name; u32 id; }; +/** + * struct auxiliary_driver - Definition of an auxiliary bus driver + * @probe: Called when a matching device is added to the bus. + * @remove: Called when device is removed from the bus. + * @shutdown: Called at shut-down time to quiesce the device. + * @suspend: Called to put the device to sleep mode. Usually to a power state. + * @resume: Called to bring a device from sleep mode. + * @name: Driver name. + * @driver: Core driver structure. + * @id_table: Table of devices this driver should match on the bus. + * + * Auxiliary drivers follow the standard driver model convention, where + * discovery/enumeration is handled by the core, and drivers provide probe() + * and remove() methods. They support power management and shutdown + * notifications using the standard conventions. + * + * Auxiliary drivers register themselves with the bus by calling + * auxiliary_driver_register(). The id_table contains the match_names of + * auxiliary devices that a driver can bind with. + * + * .. code-block:: c + * + * static const struct auxiliary_device_id my_auxiliary_id_table[] = { + * { .name = "foo_mod.foo_dev" }, + * {}, + * }; + * + * MODULE_DEVICE_TABLE(auxiliary, my_auxiliary_id_table); + * + * struct auxiliary_driver my_drv = { + * .name = "myauxiliarydrv", + * .id_table = my_auxiliary_id_table, + * .probe = my_drv_probe, + * .remove = my_drv_remove + * }; + */ struct auxiliary_driver { int (*probe)(struct auxiliary_device *auxdev, const struct auxiliary_device_id *id); void (*remove)(struct auxiliary_device *auxdev); From c097af1d0a8483b44fa30e86b311991d76b6ae67 Mon Sep 17 00:00:00 2001 From: Daniel Scally Date: Sun, 28 Nov 2021 23:24:55 +0000 Subject: [PATCH 17/43] device property: Check fwnode->secondary when finding properties fwnode_property_get_reference_args() searches for named properties against a fwnode_handle, but these could instead be against the fwnode's secondary. If the property isn't found against the primary, check the secondary to see if it's there instead. Reviewed-by: Andy Shevchenko Reviewed-by: Hans de Goede Signed-off-by: Daniel Scally Link: https://lore.kernel.org/r/20211128232455.39332-1-djrscally@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/base/property.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/base/property.c b/drivers/base/property.c index f1f35b48ab8b..11e3d6308eb0 100644 --- a/drivers/base/property.c +++ b/drivers/base/property.c @@ -478,8 +478,17 @@ int fwnode_property_get_reference_args(const struct fwnode_handle *fwnode, unsigned int nargs, unsigned int index, struct fwnode_reference_args *args) { - return fwnode_call_int_op(fwnode, get_reference_args, prop, nargs_prop, - nargs, index, args); + int ret; + + ret = fwnode_call_int_op(fwnode, get_reference_args, prop, nargs_prop, + nargs, index, args); + + if (ret < 0 && !IS_ERR_OR_NULL(fwnode) && + !IS_ERR_OR_NULL(fwnode->secondary)) + ret = fwnode_call_int_op(fwnode->secondary, get_reference_args, + prop, nargs_prop, nargs, index, args); + + return ret; } EXPORT_SYMBOL_GPL(fwnode_property_get_reference_args); From aa483f3ce655ed9ee4f32d050d1822eec2d20ada Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 6 Dec 2021 13:53:11 +0100 Subject: [PATCH 18/43] topology/sysfs: get rid of htmldoc warning Stephen Rothwell reported the following warning caused by commit f1045056c726 ("topology/sysfs: rework book and drawer topology ifdefery"): Documentation/admin-guide/cputopology.rst:49: WARNING: Block quote ends without a blank line; unexpected unindent. To fix this remove the extra indentation again. Fixes: f1045056c726 ("topology/sysfs: rework book and drawer topology ifdefery") Reported-by: Stephen Rothwell Signed-off-by: Heiko Carstens Link: https://lore.kernel.org/r/Ya4Ht2K9x2+lUtuR@osiris Signed-off-by: Greg Kroah-Hartman --- Documentation/admin-guide/cputopology.rst | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/Documentation/admin-guide/cputopology.rst b/Documentation/admin-guide/cputopology.rst index 677ba1c2c820..d29cacc9b3c3 100644 --- a/Documentation/admin-guide/cputopology.rst +++ b/Documentation/admin-guide/cputopology.rst @@ -37,15 +37,15 @@ To be consistent on all architectures, include/linux/topology.h provides default definitions for any of the above macros that are not defined by include/asm-XXX/topology.h: - 1) topology_physical_package_id: -1 - 2) topology_die_id: -1 - 3) topology_cluster_id: -1 - 4) topology_core_id: 0 - 5) topology_book_id: -1 - 6) topology_drawer_id: -1 - 7) topology_sibling_cpumask: just the given CPU - 8) topology_core_cpumask: just the given CPU - 9) topology_cluster_cpumask: just the given CPU +1) topology_physical_package_id: -1 +2) topology_die_id: -1 +3) topology_cluster_id: -1 +4) topology_core_id: 0 +5) topology_book_id: -1 +6) topology_drawer_id: -1 +7) topology_sibling_cpumask: just the given CPU +8) topology_core_cpumask: just the given CPU +9) topology_cluster_cpumask: just the given CPU 10) topology_die_cpumask: just the given CPU 11) topology_book_cpumask: just the given CPU 12) topology_drawer_cpumask: just the given CPU From 79f1c7304295bbbc611bc53cfd5425b777b3e840 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 9 Dec 2021 14:30:08 +0200 Subject: [PATCH 19/43] kernfs: Replace kernel.h with the necessary inclusions When kernel.h is used in the headers it adds a lot into dependency hell, especially when there are circular dependencies are involved. Replace kernel.h inclusion with the list of what is really being used. Acked-by: Tejun Heo Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20211209123008.3391-1-andriy.shevchenko@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/kernfs.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h index 9f650986a81b..861c4f0f8a29 100644 --- a/include/linux/kernfs.h +++ b/include/linux/kernfs.h @@ -6,7 +6,6 @@ #ifndef __LINUX_KERNFS_H #define __LINUX_KERNFS_H -#include #include #include #include @@ -14,6 +13,8 @@ #include #include #include +#include +#include #include #include #include @@ -23,6 +24,7 @@ struct dentry; struct iattr; struct seq_file; struct vm_area_struct; +struct vm_operations_struct; struct super_block; struct file_system_type; struct poll_table_struct; From 80b3485f7d7bdb2468f2a9d6a346a1132d248309 Mon Sep 17 00:00:00 2001 From: "David E. Box" Date: Tue, 7 Dec 2021 17:50:10 -0800 Subject: [PATCH 20/43] PCI: Add #defines for accessing PCIe DVSEC fields Add #defines for accessing Vendor ID, Revision, Length, and ID offsets in the Designated Vendor Specific Extended Capability (DVSEC). Defined in PCIe r5.0, sec 7.9.6. Reviewed-by: Rafael J. Wysocki Acked-by: Bjorn Helgaas Signed-off-by: David E. Box Link: https://lore.kernel.org/r/20211208015015.891275-2-david.e.box@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/pci_regs.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h index ff6ccbc6efe9..318f3f1f9e92 100644 --- a/include/uapi/linux/pci_regs.h +++ b/include/uapi/linux/pci_regs.h @@ -1086,7 +1086,11 @@ /* Designated Vendor-Specific (DVSEC, PCI_EXT_CAP_ID_DVSEC) */ #define PCI_DVSEC_HEADER1 0x4 /* Designated Vendor-Specific Header1 */ +#define PCI_DVSEC_HEADER1_VID(x) ((x) & 0xffff) +#define PCI_DVSEC_HEADER1_REV(x) (((x) >> 16) & 0xf) +#define PCI_DVSEC_HEADER1_LEN(x) (((x) >> 20) & 0xfff) #define PCI_DVSEC_HEADER2 0x8 /* Designated Vendor-Specific Header2 */ +#define PCI_DVSEC_HEADER2_ID(x) ((x) & 0xffff) /* Data Link Feature */ #define PCI_DLF_CAP 0x04 /* Capabilities Register */ From 365481e42a8a95c55e43e8cc236138718e762e7b Mon Sep 17 00:00:00 2001 From: "David E. Box" Date: Tue, 7 Dec 2021 17:50:11 -0800 Subject: [PATCH 21/43] driver core: auxiliary bus: Add driver data helpers Adds get/set driver data helpers for auxiliary devices. Reviewed-by: Mark Gross Reviewed-by: Andy Shevchenko Signed-off-by: David E. Box Link: https://lore.kernel.org/r/20211208015015.891275-3-david.e.box@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/auxiliary_bus.h | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/include/linux/auxiliary_bus.h b/include/linux/auxiliary_bus.h index e6d8b5c16226..de21d9d24a95 100644 --- a/include/linux/auxiliary_bus.h +++ b/include/linux/auxiliary_bus.h @@ -188,6 +188,16 @@ struct auxiliary_driver { const struct auxiliary_device_id *id_table; }; +static inline void *auxiliary_get_drvdata(struct auxiliary_device *auxdev) +{ + return dev_get_drvdata(&auxdev->dev); +} + +static inline void auxiliary_set_drvdata(struct auxiliary_device *auxdev, void *data) +{ + dev_set_drvdata(&auxdev->dev, data); +} + static inline struct auxiliary_device *to_auxiliary_dev(struct device *dev) { return container_of(dev, struct auxiliary_device, dev); From a3c8f906ed5fc1d4895b5e1a5c6ad6e942d6c0ca Mon Sep 17 00:00:00 2001 From: "David E. Box" Date: Tue, 7 Dec 2021 17:50:12 -0800 Subject: [PATCH 22/43] platform/x86/intel: Move intel_pmt from MFD to Auxiliary Bus Intel Platform Monitoring Technology (PMT) support is indicated by presence of an Intel defined PCIe Designated Vendor Specific Extended Capabilities (DVSEC) structure with a PMT specific ID. The current MFD implementation creates child devices for each PMT feature, currently telemetry, watcher, and crashlog. However DVSEC structures may also be used by Intel to indicate support for other features. The Out Of Band Management Services Module (OOBMSM) uses DVSEC to enumerate several features, including PMT. In order to support them it is necessary to modify the intel_pmt driver to handle the creation of the child devices more generically. To that end, modify the driver to create child devices for any VSEC/DVSEC features on supported devices (indicated by PCI ID). Additionally, move the implementation from MFD to the Auxiliary bus. VSEC/DVSEC features are really multifunctional PCI devices, not platform devices as MFD was designed for. Auxiliary bus gives more flexibility by allowing the definition of custom structures that can be shared between associated auxiliary devices and the parent device. Also, rename the driver from intel_pmt to intel_vsec to better reflect the purpose. This series also removes the current runtime pm support which was not complete to begin with. None of the current devices require runtime pm. However the support will be replaced when a device is added that requires it. Reviewed-by: Mark Gross Acked-by: Hans de Goede Signed-off-by: David E. Box Link: https://lore.kernel.org/r/20211208015015.891275-4-david.e.box@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- MAINTAINERS | 12 +- drivers/mfd/Kconfig | 10 - drivers/mfd/Makefile | 1 - drivers/mfd/intel_pmt.c | 261 ------------- drivers/platform/x86/intel/Kconfig | 11 + drivers/platform/x86/intel/Makefile | 2 + drivers/platform/x86/intel/pmt/Kconfig | 4 +- drivers/platform/x86/intel/pmt/class.c | 21 +- drivers/platform/x86/intel/pmt/class.h | 5 +- drivers/platform/x86/intel/pmt/crashlog.c | 47 +-- drivers/platform/x86/intel/pmt/telemetry.c | 46 +-- drivers/platform/x86/intel/vsec.c | 408 +++++++++++++++++++++ drivers/platform/x86/intel/vsec.h | 43 +++ 13 files changed, 536 insertions(+), 335 deletions(-) delete mode 100644 drivers/mfd/intel_pmt.c create mode 100644 drivers/platform/x86/intel/vsec.c create mode 100644 drivers/platform/x86/intel/vsec.h diff --git a/MAINTAINERS b/MAINTAINERS index 5250298d2817..ea919889075f 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9741,10 +9741,9 @@ S: Maintained F: drivers/mfd/intel_soc_pmic* F: include/linux/mfd/intel_soc_pmic* -INTEL PMT DRIVER -M: "David E. Box" -S: Maintained -F: drivers/mfd/intel_pmt.c +INTEL PMT DRIVERS +M: David E. Box +S: Supported F: drivers/platform/x86/intel/pmt/ INTEL PRO/WIRELESS 2100, 2200BG, 2915ABG NETWORK CONNECTION SUPPORT @@ -9811,6 +9810,11 @@ L: platform-driver-x86@vger.kernel.org S: Maintained F: drivers/platform/x86/intel/uncore-frequency.c +INTEL VENDOR SPECIFIC EXTENDED CAPABILITIES DRIVER +M: David E. Box +S: Supported +F: drivers/platform/x86/intel/vsec.* + INTEL VIRTUAL BUTTON DRIVER M: AceLan Kao L: platform-driver-x86@vger.kernel.org diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig index 3fb480818599..ac7b23eb62c2 100644 --- a/drivers/mfd/Kconfig +++ b/drivers/mfd/Kconfig @@ -692,16 +692,6 @@ config MFD_INTEL_PMC_BXT Register and P-unit access. In addition this creates devices for iTCO watchdog and telemetry that are part of the PMC. -config MFD_INTEL_PMT - tristate "Intel Platform Monitoring Technology (PMT) support" - depends on X86 && PCI - select MFD_CORE - help - The Intel Platform Monitoring Technology (PMT) is an interface that - provides access to hardware monitor registers. This driver supports - Telemetry, Watcher, and Crashlog PMT capabilities/devices for - platforms starting from Tiger Lake. - config MFD_IPAQ_MICRO bool "Atmel Micro ASIC (iPAQ h3100/h3600/h3700) Support" depends on SA1100_H3100 || SA1100_H3600 diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile index 0b1b629aef3e..31734d9318e2 100644 --- a/drivers/mfd/Makefile +++ b/drivers/mfd/Makefile @@ -211,7 +211,6 @@ obj-$(CONFIG_MFD_INTEL_LPSS) += intel-lpss.o obj-$(CONFIG_MFD_INTEL_LPSS_PCI) += intel-lpss-pci.o obj-$(CONFIG_MFD_INTEL_LPSS_ACPI) += intel-lpss-acpi.o obj-$(CONFIG_MFD_INTEL_PMC_BXT) += intel_pmc_bxt.o -obj-$(CONFIG_MFD_INTEL_PMT) += intel_pmt.o obj-$(CONFIG_MFD_PALMAS) += palmas.o obj-$(CONFIG_MFD_VIPERBOARD) += viperboard.o obj-$(CONFIG_MFD_NTXEC) += ntxec.o diff --git a/drivers/mfd/intel_pmt.c b/drivers/mfd/intel_pmt.c deleted file mode 100644 index dd7eb614c28e..000000000000 --- a/drivers/mfd/intel_pmt.c +++ /dev/null @@ -1,261 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Intel Platform Monitoring Technology PMT driver - * - * Copyright (c) 2020, Intel Corporation. - * All Rights Reserved. - * - * Author: David E. Box - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -/* Intel DVSEC capability vendor space offsets */ -#define INTEL_DVSEC_ENTRIES 0xA -#define INTEL_DVSEC_SIZE 0xB -#define INTEL_DVSEC_TABLE 0xC -#define INTEL_DVSEC_TABLE_BAR(x) ((x) & GENMASK(2, 0)) -#define INTEL_DVSEC_TABLE_OFFSET(x) ((x) & GENMASK(31, 3)) -#define INTEL_DVSEC_ENTRY_SIZE 4 - -/* PMT capabilities */ -#define DVSEC_INTEL_ID_TELEMETRY 2 -#define DVSEC_INTEL_ID_WATCHER 3 -#define DVSEC_INTEL_ID_CRASHLOG 4 - -struct intel_dvsec_header { - u16 length; - u16 id; - u8 num_entries; - u8 entry_size; - u8 tbir; - u32 offset; -}; - -enum pmt_quirks { - /* Watcher capability not supported */ - PMT_QUIRK_NO_WATCHER = BIT(0), - - /* Crashlog capability not supported */ - PMT_QUIRK_NO_CRASHLOG = BIT(1), - - /* Use shift instead of mask to read discovery table offset */ - PMT_QUIRK_TABLE_SHIFT = BIT(2), - - /* DVSEC not present (provided in driver data) */ - PMT_QUIRK_NO_DVSEC = BIT(3), -}; - -struct pmt_platform_info { - unsigned long quirks; - struct intel_dvsec_header **capabilities; -}; - -static const struct pmt_platform_info tgl_info = { - .quirks = PMT_QUIRK_NO_WATCHER | PMT_QUIRK_NO_CRASHLOG | - PMT_QUIRK_TABLE_SHIFT, -}; - -/* DG1 Platform with DVSEC quirk*/ -static struct intel_dvsec_header dg1_telemetry = { - .length = 0x10, - .id = 2, - .num_entries = 1, - .entry_size = 3, - .tbir = 0, - .offset = 0x466000, -}; - -static struct intel_dvsec_header *dg1_capabilities[] = { - &dg1_telemetry, - NULL -}; - -static const struct pmt_platform_info dg1_info = { - .quirks = PMT_QUIRK_NO_DVSEC, - .capabilities = dg1_capabilities, -}; - -static int pmt_add_dev(struct pci_dev *pdev, struct intel_dvsec_header *header, - unsigned long quirks) -{ - struct device *dev = &pdev->dev; - struct resource *res, *tmp; - struct mfd_cell *cell; - const char *name; - int count = header->num_entries; - int size = header->entry_size; - int id = header->id; - int i; - - switch (id) { - case DVSEC_INTEL_ID_TELEMETRY: - name = "pmt_telemetry"; - break; - case DVSEC_INTEL_ID_WATCHER: - if (quirks & PMT_QUIRK_NO_WATCHER) { - dev_info(dev, "Watcher not supported\n"); - return -EINVAL; - } - name = "pmt_watcher"; - break; - case DVSEC_INTEL_ID_CRASHLOG: - if (quirks & PMT_QUIRK_NO_CRASHLOG) { - dev_info(dev, "Crashlog not supported\n"); - return -EINVAL; - } - name = "pmt_crashlog"; - break; - default: - return -EINVAL; - } - - if (!header->num_entries || !header->entry_size) { - dev_err(dev, "Invalid count or size for %s header\n", name); - return -EINVAL; - } - - cell = devm_kzalloc(dev, sizeof(*cell), GFP_KERNEL); - if (!cell) - return -ENOMEM; - - res = devm_kcalloc(dev, count, sizeof(*res), GFP_KERNEL); - if (!res) - return -ENOMEM; - - if (quirks & PMT_QUIRK_TABLE_SHIFT) - header->offset >>= 3; - - /* - * The PMT DVSEC contains the starting offset and count for a block of - * discovery tables, each providing access to monitoring facilities for - * a section of the device. Create a resource list of these tables to - * provide to the driver. - */ - for (i = 0, tmp = res; i < count; i++, tmp++) { - tmp->start = pdev->resource[header->tbir].start + - header->offset + i * (size << 2); - tmp->end = tmp->start + (size << 2) - 1; - tmp->flags = IORESOURCE_MEM; - } - - cell->resources = res; - cell->num_resources = count; - cell->name = name; - - return devm_mfd_add_devices(dev, PLATFORM_DEVID_AUTO, cell, 1, NULL, 0, - NULL); -} - -static int pmt_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) -{ - struct pmt_platform_info *info; - unsigned long quirks = 0; - bool found_devices = false; - int ret, pos = 0; - - ret = pcim_enable_device(pdev); - if (ret) - return ret; - - info = (struct pmt_platform_info *)id->driver_data; - - if (info) - quirks = info->quirks; - - if (info && (info->quirks & PMT_QUIRK_NO_DVSEC)) { - struct intel_dvsec_header **header; - - header = info->capabilities; - while (*header) { - ret = pmt_add_dev(pdev, *header, quirks); - if (ret) - dev_warn(&pdev->dev, - "Failed to add device for DVSEC id %d\n", - (*header)->id); - else - found_devices = true; - - ++header; - } - } else { - do { - struct intel_dvsec_header header; - u32 table; - u16 vid; - - pos = pci_find_next_ext_capability(pdev, pos, PCI_EXT_CAP_ID_DVSEC); - if (!pos) - break; - - pci_read_config_word(pdev, pos + PCI_DVSEC_HEADER1, &vid); - if (vid != PCI_VENDOR_ID_INTEL) - continue; - - pci_read_config_word(pdev, pos + PCI_DVSEC_HEADER2, - &header.id); - pci_read_config_byte(pdev, pos + INTEL_DVSEC_ENTRIES, - &header.num_entries); - pci_read_config_byte(pdev, pos + INTEL_DVSEC_SIZE, - &header.entry_size); - pci_read_config_dword(pdev, pos + INTEL_DVSEC_TABLE, - &table); - - header.tbir = INTEL_DVSEC_TABLE_BAR(table); - header.offset = INTEL_DVSEC_TABLE_OFFSET(table); - - ret = pmt_add_dev(pdev, &header, quirks); - if (ret) - continue; - - found_devices = true; - } while (true); - } - - if (!found_devices) - return -ENODEV; - - pm_runtime_put(&pdev->dev); - pm_runtime_allow(&pdev->dev); - - return 0; -} - -static void pmt_pci_remove(struct pci_dev *pdev) -{ - pm_runtime_forbid(&pdev->dev); - pm_runtime_get_sync(&pdev->dev); -} - -#define PCI_DEVICE_ID_INTEL_PMT_ADL 0x467d -#define PCI_DEVICE_ID_INTEL_PMT_DG1 0x490e -#define PCI_DEVICE_ID_INTEL_PMT_OOBMSM 0x09a7 -#define PCI_DEVICE_ID_INTEL_PMT_TGL 0x9a0d -static const struct pci_device_id pmt_pci_ids[] = { - { PCI_DEVICE_DATA(INTEL, PMT_ADL, &tgl_info) }, - { PCI_DEVICE_DATA(INTEL, PMT_DG1, &dg1_info) }, - { PCI_DEVICE_DATA(INTEL, PMT_OOBMSM, NULL) }, - { PCI_DEVICE_DATA(INTEL, PMT_TGL, &tgl_info) }, - { } -}; -MODULE_DEVICE_TABLE(pci, pmt_pci_ids); - -static struct pci_driver pmt_pci_driver = { - .name = "intel-pmt", - .id_table = pmt_pci_ids, - .probe = pmt_pci_probe, - .remove = pmt_pci_remove, -}; -module_pci_driver(pmt_pci_driver); - -MODULE_AUTHOR("David E. Box "); -MODULE_DESCRIPTION("Intel Platform Monitoring Technology PMT driver"); -MODULE_LICENSE("GPL v2"); diff --git a/drivers/platform/x86/intel/Kconfig b/drivers/platform/x86/intel/Kconfig index 38ce3e344589..35a5d1a5eba8 100644 --- a/drivers/platform/x86/intel/Kconfig +++ b/drivers/platform/x86/intel/Kconfig @@ -184,4 +184,15 @@ config INTEL_UNCORE_FREQ_CONTROL To compile this driver as a module, choose M here: the module will be called intel-uncore-frequency. +config INTEL_VSEC + tristate "Intel Vendor Specific Extended Capabilities Driver" + depends on PCI + select AUXILIARY_BUS + help + Adds support for feature drivers exposed using Intel PCIe VSEC and + DVSEC. + + To compile this driver as a module, choose M here: the module will + be called intel_vsec. + endif # X86_PLATFORM_DRIVERS_INTEL diff --git a/drivers/platform/x86/intel/Makefile b/drivers/platform/x86/intel/Makefile index 7c24be2423d8..8ecdf709fb17 100644 --- a/drivers/platform/x86/intel/Makefile +++ b/drivers/platform/x86/intel/Makefile @@ -26,6 +26,8 @@ intel_int0002_vgpio-y := int0002_vgpio.o obj-$(CONFIG_INTEL_INT0002_VGPIO) += intel_int0002_vgpio.o intel_oaktrail-y := oaktrail.o obj-$(CONFIG_INTEL_OAKTRAIL) += intel_oaktrail.o +intel_vsec-y := vsec.o +obj-$(CONFIG_INTEL_VSEC) += intel_vsec.o # Intel PMIC / PMC / P-Unit drivers intel_bxtwc_tmu-y := bxtwc_tmu.o diff --git a/drivers/platform/x86/intel/pmt/Kconfig b/drivers/platform/x86/intel/pmt/Kconfig index d630f883a717..e916fc966221 100644 --- a/drivers/platform/x86/intel/pmt/Kconfig +++ b/drivers/platform/x86/intel/pmt/Kconfig @@ -17,7 +17,7 @@ config INTEL_PMT_CLASS config INTEL_PMT_TELEMETRY tristate "Intel Platform Monitoring Technology (PMT) Telemetry driver" - depends on MFD_INTEL_PMT + depends on INTEL_VSEC select INTEL_PMT_CLASS help The Intel Platform Monitory Technology (PMT) Telemetry driver provides @@ -29,7 +29,7 @@ config INTEL_PMT_TELEMETRY config INTEL_PMT_CRASHLOG tristate "Intel Platform Monitoring Technology (PMT) Crashlog driver" - depends on MFD_INTEL_PMT + depends on INTEL_VSEC select INTEL_PMT_CLASS help The Intel Platform Monitoring Technology (PMT) crashlog driver provides diff --git a/drivers/platform/x86/intel/pmt/class.c b/drivers/platform/x86/intel/pmt/class.c index 659b1073033c..1c9e3f3ea41c 100644 --- a/drivers/platform/x86/intel/pmt/class.c +++ b/drivers/platform/x86/intel/pmt/class.c @@ -13,6 +13,7 @@ #include #include +#include "../vsec.h" #include "class.h" #define PMT_XA_START 0 @@ -281,31 +282,29 @@ fail_dev_create: return ret; } -int intel_pmt_dev_create(struct intel_pmt_entry *entry, - struct intel_pmt_namespace *ns, - struct platform_device *pdev, int idx) +int intel_pmt_dev_create(struct intel_pmt_entry *entry, struct intel_pmt_namespace *ns, + struct intel_vsec_device *intel_vsec_dev, int idx) { + struct device *dev = &intel_vsec_dev->auxdev.dev; struct intel_pmt_header header; struct resource *disc_res; - int ret = -ENODEV; + int ret; - disc_res = platform_get_resource(pdev, IORESOURCE_MEM, idx); - if (!disc_res) - return ret; + disc_res = &intel_vsec_dev->resource[idx]; - entry->disc_table = devm_platform_ioremap_resource(pdev, idx); + entry->disc_table = devm_ioremap_resource(dev, disc_res); if (IS_ERR(entry->disc_table)) return PTR_ERR(entry->disc_table); - ret = ns->pmt_header_decode(entry, &header, &pdev->dev); + ret = ns->pmt_header_decode(entry, &header, dev); if (ret) return ret; - ret = intel_pmt_populate_entry(entry, &header, &pdev->dev, disc_res); + ret = intel_pmt_populate_entry(entry, &header, dev, disc_res); if (ret) return ret; - return intel_pmt_dev_register(entry, ns, &pdev->dev); + return intel_pmt_dev_register(entry, ns, dev); } EXPORT_SYMBOL_GPL(intel_pmt_dev_create); diff --git a/drivers/platform/x86/intel/pmt/class.h b/drivers/platform/x86/intel/pmt/class.h index 1337019c2873..db11d58867ce 100644 --- a/drivers/platform/x86/intel/pmt/class.h +++ b/drivers/platform/x86/intel/pmt/class.h @@ -2,13 +2,14 @@ #ifndef _INTEL_PMT_CLASS_H #define _INTEL_PMT_CLASS_H -#include #include #include #include #include #include +#include "../vsec.h" + /* PMT access types */ #define ACCESS_BARID 2 #define ACCESS_LOCAL 3 @@ -47,7 +48,7 @@ struct intel_pmt_namespace { bool intel_pmt_is_early_client_hw(struct device *dev); int intel_pmt_dev_create(struct intel_pmt_entry *entry, struct intel_pmt_namespace *ns, - struct platform_device *pdev, int idx); + struct intel_vsec_device *dev, int idx); void intel_pmt_dev_destroy(struct intel_pmt_entry *entry, struct intel_pmt_namespace *ns); #endif diff --git a/drivers/platform/x86/intel/pmt/crashlog.c b/drivers/platform/x86/intel/pmt/crashlog.c index 1c1021f04d3c..34daf9df168b 100644 --- a/drivers/platform/x86/intel/pmt/crashlog.c +++ b/drivers/platform/x86/intel/pmt/crashlog.c @@ -8,6 +8,7 @@ * Author: "Alexander Duyck" */ +#include #include #include #include @@ -15,10 +16,9 @@ #include #include +#include "../vsec.h" #include "class.h" -#define DRV_NAME "pmt_crashlog" - /* Crashlog discovery header types */ #define CRASH_TYPE_OOBMSM 1 @@ -257,34 +257,34 @@ static struct intel_pmt_namespace pmt_crashlog_ns = { /* * initialization */ -static int pmt_crashlog_remove(struct platform_device *pdev) +static void pmt_crashlog_remove(struct auxiliary_device *auxdev) { - struct pmt_crashlog_priv *priv = platform_get_drvdata(pdev); + struct pmt_crashlog_priv *priv = auxiliary_get_drvdata(auxdev); int i; for (i = 0; i < priv->num_entries; i++) intel_pmt_dev_destroy(&priv->entry[i].entry, &pmt_crashlog_ns); - - return 0; } -static int pmt_crashlog_probe(struct platform_device *pdev) +static int pmt_crashlog_probe(struct auxiliary_device *auxdev, + const struct auxiliary_device_id *id) { + struct intel_vsec_device *intel_vsec_dev = auxdev_to_ivdev(auxdev); struct pmt_crashlog_priv *priv; size_t size; int i, ret; - size = struct_size(priv, entry, pdev->num_resources); - priv = devm_kzalloc(&pdev->dev, size, GFP_KERNEL); + size = struct_size(priv, entry, intel_vsec_dev->num_resources); + priv = devm_kzalloc(&auxdev->dev, size, GFP_KERNEL); if (!priv) return -ENOMEM; - platform_set_drvdata(pdev, priv); + auxiliary_set_drvdata(auxdev, priv); - for (i = 0; i < pdev->num_resources; i++) { + for (i = 0; i < intel_vsec_dev->num_resources; i++) { struct intel_pmt_entry *entry = &priv->entry[i].entry; - ret = intel_pmt_dev_create(entry, &pmt_crashlog_ns, pdev, i); + ret = intel_pmt_dev_create(entry, &pmt_crashlog_ns, intel_vsec_dev, i); if (ret < 0) goto abort_probe; if (ret) @@ -295,26 +295,30 @@ static int pmt_crashlog_probe(struct platform_device *pdev) return 0; abort_probe: - pmt_crashlog_remove(pdev); + pmt_crashlog_remove(auxdev); return ret; } -static struct platform_driver pmt_crashlog_driver = { - .driver = { - .name = DRV_NAME, - }, - .remove = pmt_crashlog_remove, - .probe = pmt_crashlog_probe, +static const struct auxiliary_device_id pmt_crashlog_id_table[] = { + { .name = "intel_vsec.crashlog" }, + {} +}; +MODULE_DEVICE_TABLE(auxiliary, pmt_crashlog_id_table); + +static struct auxiliary_driver pmt_crashlog_aux_driver = { + .id_table = pmt_crashlog_id_table, + .remove = pmt_crashlog_remove, + .probe = pmt_crashlog_probe, }; static int __init pmt_crashlog_init(void) { - return platform_driver_register(&pmt_crashlog_driver); + return auxiliary_driver_register(&pmt_crashlog_aux_driver); } static void __exit pmt_crashlog_exit(void) { - platform_driver_unregister(&pmt_crashlog_driver); + auxiliary_driver_unregister(&pmt_crashlog_aux_driver); xa_destroy(&crashlog_array); } @@ -323,5 +327,4 @@ module_exit(pmt_crashlog_exit); MODULE_AUTHOR("Alexander Duyck "); MODULE_DESCRIPTION("Intel PMT Crashlog driver"); -MODULE_ALIAS("platform:" DRV_NAME); MODULE_LICENSE("GPL v2"); diff --git a/drivers/platform/x86/intel/pmt/telemetry.c b/drivers/platform/x86/intel/pmt/telemetry.c index 38d52651c572..6b6f3e2a617a 100644 --- a/drivers/platform/x86/intel/pmt/telemetry.c +++ b/drivers/platform/x86/intel/pmt/telemetry.c @@ -8,6 +8,7 @@ * Author: "David E. Box" */ +#include #include #include #include @@ -15,10 +16,9 @@ #include #include +#include "../vsec.h" #include "class.h" -#define TELEM_DEV_NAME "pmt_telemetry" - #define TELEM_SIZE_OFFSET 0x0 #define TELEM_GUID_OFFSET 0x4 #define TELEM_BASE_OFFSET 0x8 @@ -79,34 +79,33 @@ static struct intel_pmt_namespace pmt_telem_ns = { .pmt_header_decode = pmt_telem_header_decode, }; -static int pmt_telem_remove(struct platform_device *pdev) +static void pmt_telem_remove(struct auxiliary_device *auxdev) { - struct pmt_telem_priv *priv = platform_get_drvdata(pdev); + struct pmt_telem_priv *priv = auxiliary_get_drvdata(auxdev); int i; for (i = 0; i < priv->num_entries; i++) intel_pmt_dev_destroy(&priv->entry[i], &pmt_telem_ns); - - return 0; } -static int pmt_telem_probe(struct platform_device *pdev) +static int pmt_telem_probe(struct auxiliary_device *auxdev, const struct auxiliary_device_id *id) { + struct intel_vsec_device *intel_vsec_dev = auxdev_to_ivdev(auxdev); struct pmt_telem_priv *priv; size_t size; int i, ret; - size = struct_size(priv, entry, pdev->num_resources); - priv = devm_kzalloc(&pdev->dev, size, GFP_KERNEL); + size = struct_size(priv, entry, intel_vsec_dev->num_resources); + priv = devm_kzalloc(&auxdev->dev, size, GFP_KERNEL); if (!priv) return -ENOMEM; - platform_set_drvdata(pdev, priv); + auxiliary_set_drvdata(auxdev, priv); - for (i = 0; i < pdev->num_resources; i++) { + for (i = 0; i < intel_vsec_dev->num_resources; i++) { struct intel_pmt_entry *entry = &priv->entry[i]; - ret = intel_pmt_dev_create(entry, &pmt_telem_ns, pdev, i); + ret = intel_pmt_dev_create(entry, &pmt_telem_ns, intel_vsec_dev, i); if (ret < 0) goto abort_probe; if (ret) @@ -117,32 +116,35 @@ static int pmt_telem_probe(struct platform_device *pdev) return 0; abort_probe: - pmt_telem_remove(pdev); + pmt_telem_remove(auxdev); return ret; } -static struct platform_driver pmt_telem_driver = { - .driver = { - .name = TELEM_DEV_NAME, - }, - .remove = pmt_telem_remove, - .probe = pmt_telem_probe, +static const struct auxiliary_device_id pmt_telem_id_table[] = { + { .name = "intel_vsec.telemetry" }, + {} +}; +MODULE_DEVICE_TABLE(auxiliary, pmt_telem_id_table); + +static struct auxiliary_driver pmt_telem_aux_driver = { + .id_table = pmt_telem_id_table, + .remove = pmt_telem_remove, + .probe = pmt_telem_probe, }; static int __init pmt_telem_init(void) { - return platform_driver_register(&pmt_telem_driver); + return auxiliary_driver_register(&pmt_telem_aux_driver); } module_init(pmt_telem_init); static void __exit pmt_telem_exit(void) { - platform_driver_unregister(&pmt_telem_driver); + auxiliary_driver_unregister(&pmt_telem_aux_driver); xa_destroy(&telem_array); } module_exit(pmt_telem_exit); MODULE_AUTHOR("David E. Box "); MODULE_DESCRIPTION("Intel PMT Telemetry driver"); -MODULE_ALIAS("platform:" TELEM_DEV_NAME); MODULE_LICENSE("GPL v2"); diff --git a/drivers/platform/x86/intel/vsec.c b/drivers/platform/x86/intel/vsec.c new file mode 100644 index 000000000000..c3bdd75ed690 --- /dev/null +++ b/drivers/platform/x86/intel/vsec.c @@ -0,0 +1,408 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Intel Vendor Specific Extended Capabilities auxiliary bus driver + * + * Copyright (c) 2021, Intel Corporation. + * All Rights Reserved. + * + * Author: David E. Box + * + * This driver discovers and creates auxiliary devices for Intel defined PCIe + * "Vendor Specific" and "Designated Vendor Specific" Extended Capabilities, + * VSEC and DVSEC respectively. The driver supports features on specific PCIe + * endpoints that exist primarily to expose them. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "vsec.h" + +/* Intel DVSEC offsets */ +#define INTEL_DVSEC_ENTRIES 0xA +#define INTEL_DVSEC_SIZE 0xB +#define INTEL_DVSEC_TABLE 0xC +#define INTEL_DVSEC_TABLE_BAR(x) ((x) & GENMASK(2, 0)) +#define INTEL_DVSEC_TABLE_OFFSET(x) ((x) & GENMASK(31, 3)) +#define TABLE_OFFSET_SHIFT 3 + +static DEFINE_IDA(intel_vsec_ida); + +/** + * struct intel_vsec_header - Common fields of Intel VSEC and DVSEC registers. + * @rev: Revision ID of the VSEC/DVSEC register space + * @length: Length of the VSEC/DVSEC register space + * @id: ID of the feature + * @num_entries: Number of instances of the feature + * @entry_size: Size of the discovery table for each feature + * @tbir: BAR containing the discovery tables + * @offset: BAR offset of start of the first discovery table + */ +struct intel_vsec_header { + u8 rev; + u16 length; + u16 id; + u8 num_entries; + u8 entry_size; + u8 tbir; + u32 offset; +}; + +/* Platform specific data */ +struct intel_vsec_platform_info { + struct intel_vsec_header **capabilities; + unsigned long quirks; +}; + +enum intel_vsec_id { + VSEC_ID_TELEMETRY = 2, + VSEC_ID_WATCHER = 3, + VSEC_ID_CRASHLOG = 4, +}; + +static enum intel_vsec_id intel_vsec_allow_list[] = { + VSEC_ID_TELEMETRY, + VSEC_ID_WATCHER, + VSEC_ID_CRASHLOG, +}; + +static const char *intel_vsec_name(enum intel_vsec_id id) +{ + switch (id) { + case VSEC_ID_TELEMETRY: + return "telemetry"; + + case VSEC_ID_WATCHER: + return "watcher"; + + case VSEC_ID_CRASHLOG: + return "crashlog"; + + default: + return NULL; + } +} + +static bool intel_vsec_allowed(u16 id) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(intel_vsec_allow_list); i++) + if (intel_vsec_allow_list[i] == id) + return true; + + return false; +} + +static bool intel_vsec_disabled(u16 id, unsigned long quirks) +{ + switch (id) { + case VSEC_ID_WATCHER: + return !!(quirks & VSEC_QUIRK_NO_WATCHER); + + case VSEC_ID_CRASHLOG: + return !!(quirks & VSEC_QUIRK_NO_CRASHLOG); + + default: + return false; + } +} + +static void intel_vsec_remove_aux(void *data) +{ + auxiliary_device_delete(data); + auxiliary_device_uninit(data); +} + +static void intel_vsec_dev_release(struct device *dev) +{ + struct intel_vsec_device *intel_vsec_dev = dev_to_ivdev(dev); + + ida_free(intel_vsec_dev->ida, intel_vsec_dev->auxdev.id); + kfree(intel_vsec_dev->resource); + kfree(intel_vsec_dev); +} + +static int intel_vsec_add_aux(struct pci_dev *pdev, struct intel_vsec_device *intel_vsec_dev, + const char *name) +{ + struct auxiliary_device *auxdev = &intel_vsec_dev->auxdev; + int ret; + + ret = ida_alloc(intel_vsec_dev->ida, GFP_KERNEL); + if (ret < 0) { + kfree(intel_vsec_dev); + return ret; + } + + auxdev->id = ret; + auxdev->name = name; + auxdev->dev.parent = &pdev->dev; + auxdev->dev.release = intel_vsec_dev_release; + + ret = auxiliary_device_init(auxdev); + if (ret < 0) { + ida_free(intel_vsec_dev->ida, auxdev->id); + kfree(intel_vsec_dev->resource); + kfree(intel_vsec_dev); + return ret; + } + + ret = auxiliary_device_add(auxdev); + if (ret < 0) { + auxiliary_device_uninit(auxdev); + return ret; + } + + return devm_add_action_or_reset(&pdev->dev, intel_vsec_remove_aux, auxdev); +} + +static int intel_vsec_add_dev(struct pci_dev *pdev, struct intel_vsec_header *header, + unsigned long quirks) +{ + struct intel_vsec_device *intel_vsec_dev; + struct resource *res, *tmp; + int i; + + if (!intel_vsec_allowed(header->id) || intel_vsec_disabled(header->id, quirks)) + return -EINVAL; + + if (!header->num_entries) { + dev_dbg(&pdev->dev, "Invalid 0 entry count for header id %d\n", header->id); + return -EINVAL; + } + + if (!header->entry_size) { + dev_dbg(&pdev->dev, "Invalid 0 entry size for header id %d\n", header->id); + return -EINVAL; + } + + intel_vsec_dev = kzalloc(sizeof(*intel_vsec_dev), GFP_KERNEL); + if (!intel_vsec_dev) + return -ENOMEM; + + res = kcalloc(header->num_entries, sizeof(*res), GFP_KERNEL); + if (!res) { + kfree(intel_vsec_dev); + return -ENOMEM; + } + + if (quirks & VSEC_QUIRK_TABLE_SHIFT) + header->offset >>= TABLE_OFFSET_SHIFT; + + /* + * The DVSEC/VSEC contains the starting offset and count for a block of + * discovery tables. Create a resource array of these tables to the + * auxiliary device driver. + */ + for (i = 0, tmp = res; i < header->num_entries; i++, tmp++) { + tmp->start = pdev->resource[header->tbir].start + + header->offset + i * (header->entry_size * sizeof(u32)); + tmp->end = tmp->start + (header->entry_size * sizeof(u32)) - 1; + tmp->flags = IORESOURCE_MEM; + } + + intel_vsec_dev->pcidev = pdev; + intel_vsec_dev->resource = res; + intel_vsec_dev->num_resources = header->num_entries; + intel_vsec_dev->quirks = quirks; + intel_vsec_dev->ida = &intel_vsec_ida; + + return intel_vsec_add_aux(pdev, intel_vsec_dev, intel_vsec_name(header->id)); +} + +static bool intel_vsec_walk_header(struct pci_dev *pdev, unsigned long quirks, + struct intel_vsec_header **header) +{ + bool have_devices = false; + int ret; + + for ( ; *header; header++) { + ret = intel_vsec_add_dev(pdev, *header, quirks); + if (ret) + dev_info(&pdev->dev, "Could not add device for DVSEC id %d\n", + (*header)->id); + else + have_devices = true; + } + + return have_devices; +} + +static bool intel_vsec_walk_dvsec(struct pci_dev *pdev, unsigned long quirks) +{ + bool have_devices = false; + int pos = 0; + + do { + struct intel_vsec_header header; + u32 table, hdr; + u16 vid; + int ret; + + pos = pci_find_next_ext_capability(pdev, pos, PCI_EXT_CAP_ID_DVSEC); + if (!pos) + break; + + pci_read_config_dword(pdev, pos + PCI_DVSEC_HEADER1, &hdr); + vid = PCI_DVSEC_HEADER1_VID(hdr); + if (vid != PCI_VENDOR_ID_INTEL) + continue; + + /* Support only revision 1 */ + header.rev = PCI_DVSEC_HEADER1_REV(hdr); + if (header.rev != 1) { + dev_info(&pdev->dev, "Unsupported DVSEC revision %d\n", header.rev); + continue; + } + + header.length = PCI_DVSEC_HEADER1_LEN(hdr); + + pci_read_config_byte(pdev, pos + INTEL_DVSEC_ENTRIES, &header.num_entries); + pci_read_config_byte(pdev, pos + INTEL_DVSEC_SIZE, &header.entry_size); + pci_read_config_dword(pdev, pos + INTEL_DVSEC_TABLE, &table); + + header.tbir = INTEL_DVSEC_TABLE_BAR(table); + header.offset = INTEL_DVSEC_TABLE_OFFSET(table); + + pci_read_config_dword(pdev, pos + PCI_DVSEC_HEADER2, &hdr); + header.id = PCI_DVSEC_HEADER2_ID(hdr); + + ret = intel_vsec_add_dev(pdev, &header, quirks); + if (ret) + continue; + + have_devices = true; + } while (true); + + return have_devices; +} + +static bool intel_vsec_walk_vsec(struct pci_dev *pdev, unsigned long quirks) +{ + bool have_devices = false; + int pos = 0; + + do { + struct intel_vsec_header header; + u32 table, hdr; + int ret; + + pos = pci_find_next_ext_capability(pdev, pos, PCI_EXT_CAP_ID_VNDR); + if (!pos) + break; + + pci_read_config_dword(pdev, pos + PCI_VNDR_HEADER, &hdr); + + /* Support only revision 1 */ + header.rev = PCI_VNDR_HEADER_REV(hdr); + if (header.rev != 1) { + dev_info(&pdev->dev, "Unsupported VSEC revision %d\n", header.rev); + continue; + } + + header.id = PCI_VNDR_HEADER_ID(hdr); + header.length = PCI_VNDR_HEADER_LEN(hdr); + + /* entry, size, and table offset are the same as DVSEC */ + pci_read_config_byte(pdev, pos + INTEL_DVSEC_ENTRIES, &header.num_entries); + pci_read_config_byte(pdev, pos + INTEL_DVSEC_SIZE, &header.entry_size); + pci_read_config_dword(pdev, pos + INTEL_DVSEC_TABLE, &table); + + header.tbir = INTEL_DVSEC_TABLE_BAR(table); + header.offset = INTEL_DVSEC_TABLE_OFFSET(table); + + ret = intel_vsec_add_dev(pdev, &header, quirks); + if (ret) + continue; + + have_devices = true; + } while (true); + + return have_devices; +} + +static int intel_vsec_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) +{ + struct intel_vsec_platform_info *info; + bool have_devices = false; + unsigned long quirks = 0; + int ret; + + ret = pcim_enable_device(pdev); + if (ret) + return ret; + + info = (struct intel_vsec_platform_info *)id->driver_data; + if (info) + quirks = info->quirks; + + if (intel_vsec_walk_dvsec(pdev, quirks)) + have_devices = true; + + if (intel_vsec_walk_vsec(pdev, quirks)) + have_devices = true; + + if (info && (info->quirks & VSEC_QUIRK_NO_DVSEC) && + intel_vsec_walk_header(pdev, quirks, info->capabilities)) + have_devices = true; + + if (!have_devices) + return -ENODEV; + + return 0; +} + +/* TGL info */ +static const struct intel_vsec_platform_info tgl_info = { + .quirks = VSEC_QUIRK_NO_WATCHER | VSEC_QUIRK_NO_CRASHLOG | VSEC_QUIRK_TABLE_SHIFT, +}; + +/* DG1 info */ +static struct intel_vsec_header dg1_telemetry = { + .length = 0x10, + .id = 2, + .num_entries = 1, + .entry_size = 3, + .tbir = 0, + .offset = 0x466000, +}; + +static struct intel_vsec_header *dg1_capabilities[] = { + &dg1_telemetry, + NULL +}; + +static const struct intel_vsec_platform_info dg1_info = { + .capabilities = dg1_capabilities, + .quirks = VSEC_QUIRK_NO_DVSEC, +}; + +#define PCI_DEVICE_ID_INTEL_VSEC_ADL 0x467d +#define PCI_DEVICE_ID_INTEL_VSEC_DG1 0x490e +#define PCI_DEVICE_ID_INTEL_VSEC_OOBMSM 0x09a7 +#define PCI_DEVICE_ID_INTEL_VSEC_TGL 0x9a0d +static const struct pci_device_id intel_vsec_pci_ids[] = { + { PCI_DEVICE_DATA(INTEL, VSEC_ADL, &tgl_info) }, + { PCI_DEVICE_DATA(INTEL, VSEC_DG1, &dg1_info) }, + { PCI_DEVICE_DATA(INTEL, VSEC_OOBMSM, NULL) }, + { PCI_DEVICE_DATA(INTEL, VSEC_TGL, &tgl_info) }, + { } +}; +MODULE_DEVICE_TABLE(pci, intel_vsec_pci_ids); + +static struct pci_driver intel_vsec_pci_driver = { + .name = "intel_vsec", + .id_table = intel_vsec_pci_ids, + .probe = intel_vsec_pci_probe, +}; +module_pci_driver(intel_vsec_pci_driver); + +MODULE_AUTHOR("David E. Box "); +MODULE_DESCRIPTION("Intel Extended Capabilities auxiliary bus driver"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/platform/x86/intel/vsec.h b/drivers/platform/x86/intel/vsec.h new file mode 100644 index 000000000000..4cc36678e8c5 --- /dev/null +++ b/drivers/platform/x86/intel/vsec.h @@ -0,0 +1,43 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _VSEC_H +#define _VSEC_H + +#include +#include + +struct pci_dev; +struct resource; + +enum intel_vsec_quirks { + /* Watcher feature not supported */ + VSEC_QUIRK_NO_WATCHER = BIT(0), + + /* Crashlog feature not supported */ + VSEC_QUIRK_NO_CRASHLOG = BIT(1), + + /* Use shift instead of mask to read discovery table offset */ + VSEC_QUIRK_TABLE_SHIFT = BIT(2), + + /* DVSEC not present (provided in driver data) */ + VSEC_QUIRK_NO_DVSEC = BIT(3), +}; + +struct intel_vsec_device { + struct auxiliary_device auxdev; + struct pci_dev *pcidev; + struct resource *resource; + struct ida *ida; + unsigned long quirks; + int num_resources; +}; + +static inline struct intel_vsec_device *dev_to_ivdev(struct device *dev) +{ + return container_of(dev, struct intel_vsec_device, auxdev.dev); +} + +static inline struct intel_vsec_device *auxdev_to_ivdev(struct auxiliary_device *auxdev) +{ + return container_of(auxdev, struct intel_vsec_device, auxdev); +} +#endif From 27963d3da4d2e0602948cf5f346a7035e694c3b4 Mon Sep 17 00:00:00 2001 From: "David E. Box" Date: Tue, 21 Dec 2021 15:58:49 -0800 Subject: [PATCH 23/43] RDMA/irdma: Use auxiliary_device driver data helpers Use auxiliary_get_drvdata and auxiliary_set_drvdata helpers. Reviewed-by: Cezary Rojewski Signed-off-by: David E. Box Link: https://lore.kernel.org/r/20211221235852.323752-2-david.e.box@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/irdma/main.c | 4 ++-- drivers/infiniband/hw/mlx5/main.c | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/infiniband/hw/irdma/main.c b/drivers/infiniband/hw/irdma/main.c index 51a41359e0b4..9ccf4d683f8a 100644 --- a/drivers/infiniband/hw/irdma/main.c +++ b/drivers/infiniband/hw/irdma/main.c @@ -207,7 +207,7 @@ static void irdma_remove(struct auxiliary_device *aux_dev) struct iidc_auxiliary_dev, adev); struct ice_pf *pf = iidc_adev->pf; - struct irdma_device *iwdev = dev_get_drvdata(&aux_dev->dev); + struct irdma_device *iwdev = auxiliary_get_drvdata(aux_dev); irdma_ib_unregister_device(iwdev); ice_rdma_update_vsi_filter(pf, iwdev->vsi_num, false); @@ -294,7 +294,7 @@ static int irdma_probe(struct auxiliary_device *aux_dev, const struct auxiliary_ ice_rdma_update_vsi_filter(pf, iwdev->vsi_num, true); ibdev_dbg(&iwdev->ibdev, "INIT: Gen2 PF[%d] device probe success\n", PCI_FUNC(rf->pcidev->devfn)); - dev_set_drvdata(&aux_dev->dev, iwdev); + auxiliary_set_drvdata(aux_dev, iwdev); return 0; diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 5ec8bd2f0b2f..85f526c861e9 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -4422,7 +4422,7 @@ static int mlx5r_mp_probe(struct auxiliary_device *adev, } mutex_unlock(&mlx5_ib_multiport_mutex); - dev_set_drvdata(&adev->dev, mpi); + auxiliary_set_drvdata(adev, mpi); return 0; } @@ -4430,7 +4430,7 @@ static void mlx5r_mp_remove(struct auxiliary_device *adev) { struct mlx5_ib_multiport_info *mpi; - mpi = dev_get_drvdata(&adev->dev); + mpi = auxiliary_get_drvdata(adev); mutex_lock(&mlx5_ib_multiport_mutex); if (mpi->ibdev) mlx5_ib_unbind_slave_port(mpi->ibdev, mpi); @@ -4480,7 +4480,7 @@ static int mlx5r_probe(struct auxiliary_device *adev, return ret; } - dev_set_drvdata(&adev->dev, dev); + auxiliary_set_drvdata(adev, dev); return 0; } @@ -4488,7 +4488,7 @@ static void mlx5r_remove(struct auxiliary_device *adev) { struct mlx5_ib_dev *dev; - dev = dev_get_drvdata(&adev->dev); + dev = auxiliary_get_drvdata(adev); __mlx5_ib_remove(dev, dev->profile, MLX5_IB_STAGE_MAX); } From 3edac08e18961b3f2ac883f1dc209f36deb218be Mon Sep 17 00:00:00 2001 From: "David E. Box" Date: Tue, 21 Dec 2021 15:58:50 -0800 Subject: [PATCH 24/43] soundwire: intel: Use auxiliary_device driver data helpers Use auxiliary_get_drvdata and auxiliary_set_drvdata helpers. Reviewed-by: Andy Shevchenko Reviewed-by: Cezary Rojewski Signed-off-by: David E. Box Link: https://lore.kernel.org/r/20211221235852.323752-3-david.e.box@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/soundwire/intel.c | 8 ++++---- drivers/soundwire/intel_init.c | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/soundwire/intel.c b/drivers/soundwire/intel.c index 78037ffdb09b..d082d18e41a9 100644 --- a/drivers/soundwire/intel.c +++ b/drivers/soundwire/intel.c @@ -1293,7 +1293,7 @@ static int intel_link_probe(struct auxiliary_device *auxdev, bus->ops = &sdw_intel_ops; /* set driver data, accessed by snd_soc_dai_get_drvdata() */ - dev_set_drvdata(dev, cdns); + auxiliary_set_drvdata(auxdev, cdns); /* use generic bandwidth allocation algorithm */ sdw->cdns.bus.compute_params = sdw_compute_params; @@ -1321,7 +1321,7 @@ int intel_link_startup(struct auxiliary_device *auxdev) { struct sdw_cdns_stream_config config; struct device *dev = &auxdev->dev; - struct sdw_cdns *cdns = dev_get_drvdata(dev); + struct sdw_cdns *cdns = auxiliary_get_drvdata(auxdev); struct sdw_intel *sdw = cdns_to_intel(cdns); struct sdw_bus *bus = &cdns->bus; int link_flags; @@ -1463,7 +1463,7 @@ err_init: static void intel_link_remove(struct auxiliary_device *auxdev) { struct device *dev = &auxdev->dev; - struct sdw_cdns *cdns = dev_get_drvdata(dev); + struct sdw_cdns *cdns = auxiliary_get_drvdata(auxdev); struct sdw_intel *sdw = cdns_to_intel(cdns); struct sdw_bus *bus = &cdns->bus; @@ -1488,7 +1488,7 @@ int intel_link_process_wakeen_event(struct auxiliary_device *auxdev) void __iomem *shim; u16 wake_sts; - sdw = dev_get_drvdata(dev); + sdw = auxiliary_get_drvdata(auxdev); bus = &sdw->cdns.bus; if (bus->prop.hw_disabled || !sdw->startup_done) { diff --git a/drivers/soundwire/intel_init.c b/drivers/soundwire/intel_init.c index e329022e1669..d99807765dfe 100644 --- a/drivers/soundwire/intel_init.c +++ b/drivers/soundwire/intel_init.c @@ -244,7 +244,7 @@ static struct sdw_intel_ctx goto err; link = &ldev->link_res; - link->cdns = dev_get_drvdata(&ldev->auxdev.dev); + link->cdns = auxiliary_get_drvdata(&ldev->auxdev); if (!link->cdns) { dev_err(&adev->dev, "failed to get link->cdns\n"); From a5f8ef0baf9a018cde00bd0a960060a306d9021f Mon Sep 17 00:00:00 2001 From: "David E. Box" Date: Tue, 21 Dec 2021 15:58:51 -0800 Subject: [PATCH 25/43] net/mlx5e: Use auxiliary_device driver data helpers Use auxiliary_get_drvdata and auxiliary_set_drvdata helpers. Reviewed-by: Cezary Rojewski Signed-off-by: David E. Box Link: https://lore.kernel.org/r/20211221235852.323752-4-david.e.box@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 65571593ec5c..244ce8f4e286 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -5389,7 +5389,7 @@ void mlx5e_destroy_netdev(struct mlx5e_priv *priv) static int mlx5e_resume(struct auxiliary_device *adev) { struct mlx5_adev *edev = container_of(adev, struct mlx5_adev, adev); - struct mlx5e_priv *priv = dev_get_drvdata(&adev->dev); + struct mlx5e_priv *priv = auxiliary_get_drvdata(adev); struct net_device *netdev = priv->netdev; struct mlx5_core_dev *mdev = edev->mdev; int err; @@ -5412,7 +5412,7 @@ static int mlx5e_resume(struct auxiliary_device *adev) static int mlx5e_suspend(struct auxiliary_device *adev, pm_message_t state) { - struct mlx5e_priv *priv = dev_get_drvdata(&adev->dev); + struct mlx5e_priv *priv = auxiliary_get_drvdata(adev); struct net_device *netdev = priv->netdev; struct mlx5_core_dev *mdev = priv->mdev; @@ -5456,7 +5456,7 @@ static int mlx5e_probe(struct auxiliary_device *adev, mlx5e_build_nic_netdev(netdev); priv = netdev_priv(netdev); - dev_set_drvdata(&adev->dev, priv); + auxiliary_set_drvdata(adev, priv); priv->profile = profile; priv->ppriv = NULL; @@ -5504,7 +5504,7 @@ err_destroy_netdev: static void mlx5e_remove(struct auxiliary_device *adev) { - struct mlx5e_priv *priv = dev_get_drvdata(&adev->dev); + struct mlx5e_priv *priv = auxiliary_get_drvdata(adev); pm_message_t state = {}; mlx5e_dcbnl_delete_app(priv); From 45e3a279841f13243c45928d7ec2e67d56b37067 Mon Sep 17 00:00:00 2001 From: "David E. Box" Date: Tue, 21 Dec 2021 15:58:52 -0800 Subject: [PATCH 26/43] vdpa/mlx5: Use auxiliary_device driver data helpers Use auxiliary_get_drvdata and auxiliary_set_drvdata helpers. Reviewed-by: Cezary Rojewski Signed-off-by: David E. Box Link: https://lore.kernel.org/r/20211221235852.323752-5-david.e.box@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/vdpa/mlx5/net/mlx5_vnet.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c index 63813fbb5f62..cf59f7e17c6d 100644 --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c @@ -2683,7 +2683,7 @@ static int mlx5v_probe(struct auxiliary_device *adev, if (err) goto reg_err; - dev_set_drvdata(&adev->dev, mgtdev); + auxiliary_set_drvdata(adev, mgtdev); return 0; @@ -2696,7 +2696,7 @@ static void mlx5v_remove(struct auxiliary_device *adev) { struct mlx5_vdpa_mgmtdev *mgtdev; - mgtdev = dev_get_drvdata(&adev->dev); + mgtdev = auxiliary_get_drvdata(adev); vdpa_mgmtdev_unregister(&mgtdev->mgtdev); kfree(mgtdev); } From 67e532a42cf4c6c214ed39e33e617bca29508f4e Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 22 Dec 2021 11:42:13 +0100 Subject: [PATCH 27/43] driver core: platform: document registration-failure requirement Add an explicit comment to document that the reference initialised by platform_device_register() needs to be released by a call to platform_device_put() also when registration fails (cf. device_register()). Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20211222104213.5673-1-johan@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/base/platform.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/base/platform.c b/drivers/base/platform.c index 7109351366c8..6cb04ac48bf0 100644 --- a/drivers/base/platform.c +++ b/drivers/base/platform.c @@ -763,6 +763,10 @@ EXPORT_SYMBOL_GPL(platform_device_del); /** * platform_device_register - add a platform-level device * @pdev: platform device we're adding + * + * NOTE: _Never_ directly free @pdev after calling this function, even if it + * returned an error! Always use platform_device_put() to give up the + * reference initialised in this function instead. */ int platform_device_register(struct platform_device *pdev) { From ee6d3dd4ed48ab24b74bab3c3977b8218518247d Mon Sep 17 00:00:00 2001 From: Wedson Almeida Filho Date: Fri, 24 Dec 2021 23:13:45 +0000 Subject: [PATCH 28/43] driver core: make kobj_type constant. This way instances of kobj_type (which contain function pointers) can be stored in .rodata, which means that they cannot be [easily/accidentally] modified at runtime. Signed-off-by: Wedson Almeida Filho Link: https://lore.kernel.org/r/20211224231345.777370-1-wedsonaf@google.com Signed-off-by: Greg Kroah-Hartman --- Documentation/core-api/kobject.rst | 4 ++-- drivers/base/bus.c | 2 +- drivers/base/core.c | 2 +- include/linux/kobject.h | 8 ++++---- kernel/params.c | 2 +- lib/kobject.c | 8 ++++---- 6 files changed, 13 insertions(+), 13 deletions(-) diff --git a/Documentation/core-api/kobject.rst b/Documentation/core-api/kobject.rst index 2739f8b72575..d3b5bf9f643a 100644 --- a/Documentation/core-api/kobject.rst +++ b/Documentation/core-api/kobject.rst @@ -118,7 +118,7 @@ Initialization of kobjects Code which creates a kobject must, of course, initialize that object. Some of the internal fields are setup with a (mandatory) call to kobject_init():: - void kobject_init(struct kobject *kobj, struct kobj_type *ktype); + void kobject_init(struct kobject *kobj, const struct kobj_type *ktype); The ktype is required for a kobject to be created properly, as every kobject must have an associated kobj_type. After calling kobject_init(), to @@ -156,7 +156,7 @@ kobject_name():: There is a helper function to both initialize and add the kobject to the kernel at the same time, called surprisingly enough kobject_init_and_add():: - int kobject_init_and_add(struct kobject *kobj, struct kobj_type *ktype, + int kobject_init_and_add(struct kobject *kobj, const struct kobj_type *ktype, struct kobject *parent, const char *fmt, ...); The arguments are the same as the individual kobject_init() and diff --git a/drivers/base/bus.c b/drivers/base/bus.c index bdc98c5713d5..a64454f5f8c0 100644 --- a/drivers/base/bus.c +++ b/drivers/base/bus.c @@ -165,7 +165,7 @@ static struct kobj_type bus_ktype = { static int bus_uevent_filter(struct kset *kset, struct kobject *kobj) { - struct kobj_type *ktype = get_ktype(kobj); + const struct kobj_type *ktype = get_ktype(kobj); if (ktype == &bus_ktype) return 1; diff --git a/drivers/base/core.c b/drivers/base/core.c index fd034d742447..d712ea11066b 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -2263,7 +2263,7 @@ static struct kobj_type device_ktype = { static int dev_uevent_filter(struct kset *kset, struct kobject *kobj) { - struct kobj_type *ktype = get_ktype(kobj); + const struct kobj_type *ktype = get_ktype(kobj); if (ktype == &device_ktype) { struct device *dev = kobj_to_dev(kobj); diff --git a/include/linux/kobject.h b/include/linux/kobject.h index c740062b4b1a..683172b2e094 100644 --- a/include/linux/kobject.h +++ b/include/linux/kobject.h @@ -66,7 +66,7 @@ struct kobject { struct list_head entry; struct kobject *parent; struct kset *kset; - struct kobj_type *ktype; + const struct kobj_type *ktype; struct kernfs_node *sd; /* sysfs directory entry */ struct kref kref; #ifdef CONFIG_DEBUG_KOBJECT_RELEASE @@ -90,13 +90,13 @@ static inline const char *kobject_name(const struct kobject *kobj) return kobj->name; } -extern void kobject_init(struct kobject *kobj, struct kobj_type *ktype); +extern void kobject_init(struct kobject *kobj, const struct kobj_type *ktype); extern __printf(3, 4) __must_check int kobject_add(struct kobject *kobj, struct kobject *parent, const char *fmt, ...); extern __printf(4, 5) __must_check int kobject_init_and_add(struct kobject *kobj, - struct kobj_type *ktype, struct kobject *parent, + const struct kobj_type *ktype, struct kobject *parent, const char *fmt, ...); extern void kobject_del(struct kobject *kobj); @@ -217,7 +217,7 @@ static inline void kset_put(struct kset *k) kobject_put(&k->kobj); } -static inline struct kobj_type *get_ktype(struct kobject *kobj) +static inline const struct kobj_type *get_ktype(struct kobject *kobj) { return kobj->ktype; } diff --git a/kernel/params.c b/kernel/params.c index 8299bd764e42..9b90e3c4d3c0 100644 --- a/kernel/params.c +++ b/kernel/params.c @@ -928,7 +928,7 @@ static const struct sysfs_ops module_sysfs_ops = { static int uevent_filter(struct kset *kset, struct kobject *kobj) { - struct kobj_type *ktype = get_ktype(kobj); + const struct kobj_type *ktype = get_ktype(kobj); if (ktype == &module_ktype) return 1; diff --git a/lib/kobject.c b/lib/kobject.c index 4a56f519139d..56fa037501b5 100644 --- a/lib/kobject.c +++ b/lib/kobject.c @@ -65,7 +65,7 @@ void kobject_get_ownership(struct kobject *kobj, kuid_t *uid, kgid_t *gid) */ static int populate_dir(struct kobject *kobj) { - struct kobj_type *t = get_ktype(kobj); + const struct kobj_type *t = get_ktype(kobj); struct attribute *attr; int error = 0; int i; @@ -346,7 +346,7 @@ EXPORT_SYMBOL(kobject_set_name); * to kobject_put(), not by a call to kfree directly to ensure that all of * the memory is cleaned up properly. */ -void kobject_init(struct kobject *kobj, struct kobj_type *ktype) +void kobject_init(struct kobject *kobj, const struct kobj_type *ktype) { char *err_str; @@ -461,7 +461,7 @@ EXPORT_SYMBOL(kobject_add); * same type of error handling after a call to kobject_add() and kobject * lifetime rules are the same here. */ -int kobject_init_and_add(struct kobject *kobj, struct kobj_type *ktype, +int kobject_init_and_add(struct kobject *kobj, const struct kobj_type *ktype, struct kobject *parent, const char *fmt, ...) { va_list args; @@ -679,7 +679,7 @@ EXPORT_SYMBOL(kobject_get_unless_zero); static void kobject_cleanup(struct kobject *kobj) { struct kobject *parent = kobj->parent; - struct kobj_type *t = get_ktype(kobj); + const struct kobj_type *t = get_ktype(kobj); const char *name = kobj->name; pr_debug("kobject: '%s' (%p): %s, parent %p\n", From cf6299b6101903c31bddb0065804b2121ed510c7 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 27 Dec 2021 17:39:24 +0100 Subject: [PATCH 29/43] kobject: remove kset from struct kset_uevent_ops callbacks There is no need to pass the pointer to the kset in the struct kset_uevent_ops callbacks as no one uses it, so just remove that pointer entirely. Reviewed-by: Rafael J. Wysocki Reviewed-by: Wedson Almeida Filho Link: https://lore.kernel.org/r/20211227163924.3970661-1-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- Documentation/core-api/kobject.rst | 7 +++---- Documentation/translations/zh_CN/core-api/kobject.rst | 7 +++---- drivers/base/bus.c | 2 +- drivers/base/core.c | 11 +++++------ drivers/dma-buf/dma-buf-sysfs-stats.c | 2 +- fs/dlm/lockspace.c | 3 +-- fs/gfs2/sys.c | 3 +-- include/linux/kobject.h | 7 +++---- kernel/params.c | 2 +- lib/kobject_uevent.c | 6 +++--- 10 files changed, 22 insertions(+), 28 deletions(-) diff --git a/Documentation/core-api/kobject.rst b/Documentation/core-api/kobject.rst index d3b5bf9f643a..3d6e3107315d 100644 --- a/Documentation/core-api/kobject.rst +++ b/Documentation/core-api/kobject.rst @@ -373,10 +373,9 @@ If a kset wishes to control the uevent operations of the kobjects associated with it, it can use the struct kset_uevent_ops to handle it:: struct kset_uevent_ops { - int (* const filter)(struct kset *kset, struct kobject *kobj); - const char *(* const name)(struct kset *kset, struct kobject *kobj); - int (* const uevent)(struct kset *kset, struct kobject *kobj, - struct kobj_uevent_env *env); + int (* const filter)(struct kobject *kobj); + const char *(* const name)(struct kobject *kobj); + int (* const uevent)(struct kobject *kobj, struct kobj_uevent_env *env); }; diff --git a/Documentation/translations/zh_CN/core-api/kobject.rst b/Documentation/translations/zh_CN/core-api/kobject.rst index b7c37794cc7f..95634083dca0 100644 --- a/Documentation/translations/zh_CN/core-api/kobject.rst +++ b/Documentation/translations/zh_CN/core-api/kobject.rst @@ -325,10 +325,9 @@ ksets 结构体kset_uevent_ops来处理它:: struct kset_uevent_ops { - int (* const filter)(struct kset *kset, struct kobject *kobj); - const char *(* const name)(struct kset *kset, struct kobject *kobj); - int (* const uevent)(struct kset *kset, struct kobject *kobj, - struct kobj_uevent_env *env); + int (* const filter)(struct kobject *kobj); + const char *(* const name)(struct kobject *kobj); + int (* const uevent)(struct kobject *kobj, struct kobj_uevent_env *env); }; diff --git a/drivers/base/bus.c b/drivers/base/bus.c index a64454f5f8c0..97936ec49bde 100644 --- a/drivers/base/bus.c +++ b/drivers/base/bus.c @@ -163,7 +163,7 @@ static struct kobj_type bus_ktype = { .release = bus_release, }; -static int bus_uevent_filter(struct kset *kset, struct kobject *kobj) +static int bus_uevent_filter(struct kobject *kobj) { const struct kobj_type *ktype = get_ktype(kobj); diff --git a/drivers/base/core.c b/drivers/base/core.c index d712ea11066b..60d703ebd123 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -2261,7 +2261,7 @@ static struct kobj_type device_ktype = { }; -static int dev_uevent_filter(struct kset *kset, struct kobject *kobj) +static int dev_uevent_filter(struct kobject *kobj) { const struct kobj_type *ktype = get_ktype(kobj); @@ -2275,7 +2275,7 @@ static int dev_uevent_filter(struct kset *kset, struct kobject *kobj) return 0; } -static const char *dev_uevent_name(struct kset *kset, struct kobject *kobj) +static const char *dev_uevent_name(struct kobject *kobj) { struct device *dev = kobj_to_dev(kobj); @@ -2286,8 +2286,7 @@ static const char *dev_uevent_name(struct kset *kset, struct kobject *kobj) return NULL; } -static int dev_uevent(struct kset *kset, struct kobject *kobj, - struct kobj_uevent_env *env) +static int dev_uevent(struct kobject *kobj, struct kobj_uevent_env *env) { struct device *dev = kobj_to_dev(kobj); int retval = 0; @@ -2382,7 +2381,7 @@ static ssize_t uevent_show(struct device *dev, struct device_attribute *attr, /* respect filter */ if (kset->uevent_ops && kset->uevent_ops->filter) - if (!kset->uevent_ops->filter(kset, &dev->kobj)) + if (!kset->uevent_ops->filter(&dev->kobj)) goto out; env = kzalloc(sizeof(struct kobj_uevent_env), GFP_KERNEL); @@ -2390,7 +2389,7 @@ static ssize_t uevent_show(struct device *dev, struct device_attribute *attr, return -ENOMEM; /* let the kset specific function add its keys */ - retval = kset->uevent_ops->uevent(kset, &dev->kobj, env); + retval = kset->uevent_ops->uevent(&dev->kobj, env); if (retval) goto out; diff --git a/drivers/dma-buf/dma-buf-sysfs-stats.c b/drivers/dma-buf/dma-buf-sysfs-stats.c index 053baadcada9..2bba0babcb62 100644 --- a/drivers/dma-buf/dma-buf-sysfs-stats.c +++ b/drivers/dma-buf/dma-buf-sysfs-stats.c @@ -132,7 +132,7 @@ void dma_buf_stats_teardown(struct dma_buf *dmabuf) /* Statistics files do not need to send uevents. */ -static int dmabuf_sysfs_uevent_filter(struct kset *kset, struct kobject *kobj) +static int dmabuf_sysfs_uevent_filter(struct kobject *kobj) { return 0; } diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c index 10eddfa6c3d7..0bbb346cb892 100644 --- a/fs/dlm/lockspace.c +++ b/fs/dlm/lockspace.c @@ -216,8 +216,7 @@ static int do_uevent(struct dlm_ls *ls, int in) return ls->ls_uevent_result; } -static int dlm_uevent(struct kset *kset, struct kobject *kobj, - struct kobj_uevent_env *env) +static int dlm_uevent(struct kobject *kobj, struct kobj_uevent_env *env) { struct dlm_ls *ls = container_of(kobj, struct dlm_ls, ls_kobj); diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c index c0a34d9ddee4..a6002b2d146d 100644 --- a/fs/gfs2/sys.c +++ b/fs/gfs2/sys.c @@ -767,8 +767,7 @@ void gfs2_sys_fs_del(struct gfs2_sbd *sdp) wait_for_completion(&sdp->sd_kobj_unregister); } -static int gfs2_uevent(struct kset *kset, struct kobject *kobj, - struct kobj_uevent_env *env) +static int gfs2_uevent(struct kobject *kobj, struct kobj_uevent_env *env) { struct gfs2_sbd *sdp = container_of(kobj, struct gfs2_sbd, sd_kobj); struct super_block *s = sdp->sd_vfs; diff --git a/include/linux/kobject.h b/include/linux/kobject.h index 683172b2e094..ad90b49824dc 100644 --- a/include/linux/kobject.h +++ b/include/linux/kobject.h @@ -153,10 +153,9 @@ struct kobj_uevent_env { }; struct kset_uevent_ops { - int (* const filter)(struct kset *kset, struct kobject *kobj); - const char *(* const name)(struct kset *kset, struct kobject *kobj); - int (* const uevent)(struct kset *kset, struct kobject *kobj, - struct kobj_uevent_env *env); + int (* const filter)(struct kobject *kobj); + const char *(* const name)(struct kobject *kobj); + int (* const uevent)(struct kobject *kobj, struct kobj_uevent_env *env); }; struct kobj_attribute { diff --git a/kernel/params.c b/kernel/params.c index 9b90e3c4d3c0..5b92310425c5 100644 --- a/kernel/params.c +++ b/kernel/params.c @@ -926,7 +926,7 @@ static const struct sysfs_ops module_sysfs_ops = { .store = module_attr_store, }; -static int uevent_filter(struct kset *kset, struct kobject *kobj) +static int uevent_filter(struct kobject *kobj) { const struct kobj_type *ktype = get_ktype(kobj); diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c index c87d5b6a8a55..7c44b7ae4c5c 100644 --- a/lib/kobject_uevent.c +++ b/lib/kobject_uevent.c @@ -501,7 +501,7 @@ int kobject_uevent_env(struct kobject *kobj, enum kobject_action action, } /* skip the event, if the filter returns zero. */ if (uevent_ops && uevent_ops->filter) - if (!uevent_ops->filter(kset, kobj)) { + if (!uevent_ops->filter(kobj)) { pr_debug("kobject: '%s' (%p): %s: filter function " "caused the event to drop!\n", kobject_name(kobj), kobj, __func__); @@ -510,7 +510,7 @@ int kobject_uevent_env(struct kobject *kobj, enum kobject_action action, /* originating subsystem */ if (uevent_ops && uevent_ops->name) - subsystem = uevent_ops->name(kset, kobj); + subsystem = uevent_ops->name(kobj); else subsystem = kobject_name(&kset->kobj); if (!subsystem) { @@ -554,7 +554,7 @@ int kobject_uevent_env(struct kobject *kobj, enum kobject_action action, /* let the kset specific function add its stuff */ if (uevent_ops && uevent_ops->uevent) { - retval = uevent_ops->uevent(kset, kobj, env); + retval = uevent_ops->uevent(kobj, env); if (retval) { pr_debug("kobject: '%s' (%p): %s: uevent() returned " "%d\n", kobject_name(kobj), kobj, From a6b9a6149d850ab94acc4db9473f124a062be322 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 28 Dec 2021 15:42:52 +0100 Subject: [PATCH 30/43] nilfs2: use default_groups in kobj_type There are currently 2 ways to create a set of sysfs files for a kobj_type, through the default_attrs field, and the default_groups field. Move the nilfs2 code to use default_groups field which has been the preferred way since aa30f47cf666 ("kobject: Add support for default attribute groups to kobj_type") so that we can soon get rid of the obsolete default_attrs field. Acked-by: Ryusuke Konishi Link: https://lore.kernel.org/r/20211228144252.390554-1-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- fs/nilfs2/sysfs.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/fs/nilfs2/sysfs.c b/fs/nilfs2/sysfs.c index 81f35c5b5a40..379d22e28ed6 100644 --- a/fs/nilfs2/sysfs.c +++ b/fs/nilfs2/sysfs.c @@ -57,7 +57,7 @@ static void nilfs_##name##_attr_release(struct kobject *kobj) \ complete(&subgroups->sg_##name##_kobj_unregister); \ } \ static struct kobj_type nilfs_##name##_ktype = { \ - .default_attrs = nilfs_##name##_attrs, \ + .default_groups = nilfs_##name##_groups, \ .sysfs_ops = &nilfs_##name##_attr_ops, \ .release = nilfs_##name##_attr_release, \ } @@ -129,6 +129,7 @@ static struct attribute *nilfs_snapshot_attrs[] = { NILFS_SNAPSHOT_ATTR_LIST(README), NULL, }; +ATTRIBUTE_GROUPS(nilfs_snapshot); static ssize_t nilfs_snapshot_attr_show(struct kobject *kobj, struct attribute *attr, char *buf) @@ -166,7 +167,7 @@ static const struct sysfs_ops nilfs_snapshot_attr_ops = { }; static struct kobj_type nilfs_snapshot_ktype = { - .default_attrs = nilfs_snapshot_attrs, + .default_groups = nilfs_snapshot_groups, .sysfs_ops = &nilfs_snapshot_attr_ops, .release = nilfs_snapshot_attr_release, }; @@ -226,6 +227,7 @@ static struct attribute *nilfs_mounted_snapshots_attrs[] = { NILFS_MOUNTED_SNAPSHOTS_ATTR_LIST(README), NULL, }; +ATTRIBUTE_GROUPS(nilfs_mounted_snapshots); NILFS_DEV_INT_GROUP_OPS(mounted_snapshots, dev); NILFS_DEV_INT_GROUP_TYPE(mounted_snapshots, dev); @@ -339,6 +341,7 @@ static struct attribute *nilfs_checkpoints_attrs[] = { NILFS_CHECKPOINTS_ATTR_LIST(README), NULL, }; +ATTRIBUTE_GROUPS(nilfs_checkpoints); NILFS_DEV_INT_GROUP_OPS(checkpoints, dev); NILFS_DEV_INT_GROUP_TYPE(checkpoints, dev); @@ -428,6 +431,7 @@ static struct attribute *nilfs_segments_attrs[] = { NILFS_SEGMENTS_ATTR_LIST(README), NULL, }; +ATTRIBUTE_GROUPS(nilfs_segments); NILFS_DEV_INT_GROUP_OPS(segments, dev); NILFS_DEV_INT_GROUP_TYPE(segments, dev); @@ -689,6 +693,7 @@ static struct attribute *nilfs_segctor_attrs[] = { NILFS_SEGCTOR_ATTR_LIST(README), NULL, }; +ATTRIBUTE_GROUPS(nilfs_segctor); NILFS_DEV_INT_GROUP_OPS(segctor, dev); NILFS_DEV_INT_GROUP_TYPE(segctor, dev); @@ -816,6 +821,7 @@ static struct attribute *nilfs_superblock_attrs[] = { NILFS_SUPERBLOCK_ATTR_LIST(README), NULL, }; +ATTRIBUTE_GROUPS(nilfs_superblock); NILFS_DEV_INT_GROUP_OPS(superblock, dev); NILFS_DEV_INT_GROUP_TYPE(superblock, dev); @@ -924,6 +930,7 @@ static struct attribute *nilfs_dev_attrs[] = { NILFS_DEV_ATTR_LIST(README), NULL, }; +ATTRIBUTE_GROUPS(nilfs_dev); static ssize_t nilfs_dev_attr_show(struct kobject *kobj, struct attribute *attr, char *buf) @@ -961,7 +968,7 @@ static const struct sysfs_ops nilfs_dev_attr_ops = { }; static struct kobj_type nilfs_dev_ktype = { - .default_attrs = nilfs_dev_attrs, + .default_groups = nilfs_dev_groups, .sysfs_ops = &nilfs_dev_attr_ops, .release = nilfs_dev_attr_release, }; From 67aa58e8d4b07b436971326af6319258e0926f33 Mon Sep 17 00:00:00 2001 From: Mark-PK Tsai Date: Tue, 28 Dec 2021 17:27:07 +0800 Subject: [PATCH 31/43] driver core: Simplify async probe test code by using ktime_ms_delta() Simplify async probe test code by using ktime_ms_delta(). Signed-off-by: Mark-PK Tsai Link: https://lore.kernel.org/r/20211228092707.29987-1-mark-pk.tsai@mediatek.com Signed-off-by: Greg Kroah-Hartman --- drivers/base/test/test_async_driver_probe.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/drivers/base/test/test_async_driver_probe.c b/drivers/base/test/test_async_driver_probe.c index 3bb7beb127a9..4d1976ca5072 100644 --- a/drivers/base/test/test_async_driver_probe.c +++ b/drivers/base/test/test_async_driver_probe.c @@ -104,7 +104,7 @@ static int __init test_async_probe_init(void) struct platform_device **pdev = NULL; int async_id = 0, sync_id = 0; unsigned long long duration; - ktime_t calltime, delta; + ktime_t calltime; int err, nid, cpu; pr_info("registering first set of asynchronous devices...\n"); @@ -133,8 +133,7 @@ static int __init test_async_probe_init(void) goto err_unregister_async_devs; } - delta = ktime_sub(ktime_get(), calltime); - duration = (unsigned long long) ktime_to_ms(delta); + duration = (unsigned long long)ktime_ms_delta(ktime_get(), calltime); pr_info("registration took %lld msecs\n", duration); if (duration > TEST_PROBE_THRESHOLD) { pr_err("test failed: probe took too long\n"); @@ -161,8 +160,7 @@ static int __init test_async_probe_init(void) async_id++; } - delta = ktime_sub(ktime_get(), calltime); - duration = (unsigned long long) ktime_to_ms(delta); + duration = (unsigned long long)ktime_ms_delta(ktime_get(), calltime); dev_info(&(*pdev)->dev, "registration took %lld msecs\n", duration); if (duration > TEST_PROBE_THRESHOLD) { @@ -197,8 +195,7 @@ static int __init test_async_probe_init(void) goto err_unregister_sync_devs; } - delta = ktime_sub(ktime_get(), calltime); - duration = (unsigned long long) ktime_to_ms(delta); + duration = (unsigned long long)ktime_ms_delta(ktime_get(), calltime); pr_info("registration took %lld msecs\n", duration); if (duration < TEST_PROBE_THRESHOLD) { dev_err(&(*pdev)->dev, @@ -223,8 +220,7 @@ static int __init test_async_probe_init(void) sync_id++; - delta = ktime_sub(ktime_get(), calltime); - duration = (unsigned long long) ktime_to_ms(delta); + duration = (unsigned long long)ktime_ms_delta(ktime_get(), calltime); dev_info(&(*pdev)->dev, "registration took %lld msecs\n", duration); if (duration < TEST_PROBE_THRESHOLD) { From 28f0c335dd4a1a4b44b3e6c6402825a93132e1a4 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 22 Dec 2021 17:50:20 +0500 Subject: [PATCH 32/43] devtmpfs: mount with noexec and nosuid devtmpfs is writable. Add the noexec and nosuid as default mount flags to prevent code execution from /dev. The systems who don't use systemd and who rely on CONFIG_DEVTMPFS_MOUNT=y are the ones to be protected by this patch. Other systems are fine with the udev solution. No sane program should be relying on executing from /dev. So this patch reduces the attack surface. It doesn't prevent any specific attack, but it reduces the possibility that someone can use /dev as a place to put executable code. Chrome OS has been carrying this patch for several years. It seems trivial and simple solution to improve the protection of /dev when CONFIG_DEVTMPFS_MOUNT=y. Original patch: https://lore.kernel.org/lkml/20121120215059.GA1859@www.outflux.net/ Cc: ellyjones@chromium.org Cc: Kay Sievers Cc: Roland Eggner Co-developed-by: Muhammad Usama Anjum Signed-off-by: Kees Cook Signed-off-by: Muhammad Usama Anjum Link: https://lore.kernel.org/r/YcMfDOyrg647RCmd@debian-BULLSEYE-live-builder-AMD64 Signed-off-by: Greg Kroah-Hartman --- drivers/base/Kconfig | 11 +++++++++++ drivers/base/devtmpfs.c | 10 ++++++++-- 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/drivers/base/Kconfig b/drivers/base/Kconfig index ffcbe2bc460e..6f04b831a5c0 100644 --- a/drivers/base/Kconfig +++ b/drivers/base/Kconfig @@ -62,6 +62,17 @@ config DEVTMPFS_MOUNT rescue mode with init=/bin/sh, even when the /dev directory on the rootfs is completely empty. +config DEVTMPFS_SAFE + bool "Use nosuid,noexec mount options on devtmpfs" + depends on DEVTMPFS + help + This instructs the kernel to include the MS_NOEXEC and MS_NOSUID mount + flags when mounting devtmpfs. + + Notice: If enabled, things like /dev/mem cannot be mmapped + with the PROT_EXEC flag. This can break, for example, non-KMS + video drivers. + config STANDALONE bool "Select only drivers that don't need compile-time external firmware" default y diff --git a/drivers/base/devtmpfs.c b/drivers/base/devtmpfs.c index 8be352ab4ddb..1e2c2d3882e2 100644 --- a/drivers/base/devtmpfs.c +++ b/drivers/base/devtmpfs.c @@ -29,6 +29,12 @@ #include #include "base.h" +#ifdef CONFIG_DEVTMPFS_SAFE +#define DEVTMPFS_MFLAGS (MS_SILENT | MS_NOEXEC | MS_NOSUID) +#else +#define DEVTMPFS_MFLAGS (MS_SILENT) +#endif + static struct task_struct *thread; static int __initdata mount_dev = IS_ENABLED(CONFIG_DEVTMPFS_MOUNT); @@ -363,7 +369,7 @@ int __init devtmpfs_mount(void) if (!thread) return 0; - err = init_mount("devtmpfs", "dev", "devtmpfs", MS_SILENT, NULL); + err = init_mount("devtmpfs", "dev", "devtmpfs", DEVTMPFS_MFLAGS, NULL); if (err) printk(KERN_INFO "devtmpfs: error mounting %i\n", err); else @@ -412,7 +418,7 @@ static noinline int __init devtmpfs_setup(void *p) err = ksys_unshare(CLONE_NEWNS); if (err) goto out; - err = init_mount("devtmpfs", "/", "devtmpfs", MS_SILENT, NULL); + err = init_mount("devtmpfs", "/", "devtmpfs", DEVTMPFS_MFLAGS, NULL); if (err) goto out; init_chdir("/.."); /* will traverse into overmounted root */ From f083266487690124481eac0869da850406fb3ed3 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 29 Aug 2021 09:18:53 +0200 Subject: [PATCH 33/43] headers/uninline: Uninline single-use function: kobject_has_children() This was the only usage of in , so we'll able to decouple the two after this change. Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- drivers/base/core.c | 17 +++++++++++++++++ include/linux/kobject.h | 17 ----------------- 2 files changed, 17 insertions(+), 17 deletions(-) diff --git a/drivers/base/core.c b/drivers/base/core.c index 60d703ebd123..603941b00988 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -3028,6 +3028,23 @@ static inline struct kobject *get_glue_dir(struct device *dev) return dev->kobj.parent; } +/** + * kobject_has_children - Returns whether a kobject has children. + * @kobj: the object to test + * + * This will return whether a kobject has other kobjects as children. + * + * It does NOT account for the presence of attribute files, only sub + * directories. It also assumes there is no concurrent addition or + * removal of such children, and thus relies on external locking. + */ +static inline bool kobject_has_children(struct kobject *kobj) +{ + WARN_ON_ONCE(kref_read(&kobj->kref) == 0); + + return kobj->sd && kobj->sd->dir.subdirs; +} + /* * make sure cleaning up dir as the last step, we need to make * sure .release handler of kobject is run with holding the diff --git a/include/linux/kobject.h b/include/linux/kobject.h index ad90b49824dc..c7b47399b36a 100644 --- a/include/linux/kobject.h +++ b/include/linux/kobject.h @@ -117,23 +117,6 @@ extern void kobject_get_ownership(struct kobject *kobj, kuid_t *uid, kgid_t *gid); extern char *kobject_get_path(struct kobject *kobj, gfp_t flag); -/** - * kobject_has_children - Returns whether a kobject has children. - * @kobj: the object to test - * - * This will return whether a kobject has other kobjects as children. - * - * It does NOT account for the presence of attribute files, only sub - * directories. It also assumes there is no concurrent addition or - * removal of such children, and thus relies on external locking. - */ -static inline bool kobject_has_children(struct kobject *kobj) -{ - WARN_ON_ONCE(kref_read(&kobj->kref) == 0); - - return kobj->sd && kobj->sd->dir.subdirs; -} - struct kobj_type { void (*release)(struct kobject *kobj); const struct sysfs_ops *sysfs_ops; From 99a6a4b39575b39c34fc024400b25a52343fbe07 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 4 Jan 2022 17:22:40 +0100 Subject: [PATCH 34/43] sh: sq: use default_groups in kobj_type There are currently 2 ways to create a set of sysfs files for a kobj_type, through the default_attrs field, and the default_groups field. Move the sh sq sysfs code to use default_groups field which has been the preferred way since aa30f47cf666 ("kobject: Add support for default attribute groups to kobj_type") so that we can soon get rid of the obsolete default_attrs field. Cc: Yoshinori Sato Cc: Rich Felker Cc: linux-sh@vger.kernel.org Tested-by: Rob Landley Link: https://lore.kernel.org/r/20220104162240.1309639-1-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- arch/sh/kernel/cpu/sh4/sq.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/sh/kernel/cpu/sh4/sq.c b/arch/sh/kernel/cpu/sh4/sq.c index d432164b23b7..a76b94e41e91 100644 --- a/arch/sh/kernel/cpu/sh4/sq.c +++ b/arch/sh/kernel/cpu/sh4/sq.c @@ -324,6 +324,7 @@ static struct attribute *sq_sysfs_attrs[] = { &mapping_attr.attr, NULL, }; +ATTRIBUTE_GROUPS(sq_sysfs); static const struct sysfs_ops sq_sysfs_ops = { .show = sq_sysfs_show, @@ -332,7 +333,7 @@ static const struct sysfs_ops sq_sysfs_ops = { static struct kobj_type ktype_percpu_entry = { .sysfs_ops = &sq_sysfs_ops, - .default_attrs = sq_sysfs_attrs, + .default_groups = sq_sysfs_groups, }; static int sq_dev_add(struct device *dev, struct subsys_interface *sif) From f54dfdf7c625aad722a7d1508f99e8272bc4800c Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 5 Jan 2022 18:56:50 +0100 Subject: [PATCH 35/43] firmware: memmap: use default_groups in kobj_type There are currently 2 ways to create a set of sysfs files for a kobj_type, through the default_attrs field, and the default_groups field. Move the firmware memmap sysfs code to use default_groups field which has been the preferred way since aa30f47cf666 ("kobject: Add support for default attribute groups to kobj_type") so that we can soon get rid of the obsolete default_attrs field. Link: https://lore.kernel.org/r/20220105175650.2640758-1-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- drivers/firmware/memmap.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/firmware/memmap.c b/drivers/firmware/memmap.c index 24945e2da77b..8e59be3782cb 100644 --- a/drivers/firmware/memmap.c +++ b/drivers/firmware/memmap.c @@ -69,6 +69,7 @@ static struct attribute *def_attrs[] = { &memmap_type_attr.attr, NULL }; +ATTRIBUTE_GROUPS(def); static const struct sysfs_ops memmap_attr_ops = { .show = memmap_attr_show, @@ -118,7 +119,7 @@ static void __meminit release_firmware_map_entry(struct kobject *kobj) static struct kobj_type __refdata memmap_ktype = { .release = release_firmware_map_entry, .sysfs_ops = &memmap_attr_ops, - .default_attrs = def_attrs, + .default_groups = def_groups, }; /* From ad8a5d1d2f577843dba14e4727c86e952ffbd5f1 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 5 Jan 2022 19:31:33 +0100 Subject: [PATCH 36/43] qemu_fw_cfg: use default_groups in kobj_type There are currently 2 ways to create a set of sysfs files for a kobj_type, through the default_attrs field, and the default_groups field. Move the firmware qemu_fw_cfg sysfs code to use default_groups field which has been the preferred way since aa30f47cf666 ("kobject: Add support for default attribute groups to kobj_type") so that we can soon get rid of the obsolete default_attrs field. Cc: Gabriel Somlo Cc: "Michael S. Tsirkin" Cc: qemu-devel@nongnu.org Acked-by: Michael S. Tsirkin Link: https://lore.kernel.org/r/20220105183133.2812848-1-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- drivers/firmware/qemu_fw_cfg.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/firmware/qemu_fw_cfg.c b/drivers/firmware/qemu_fw_cfg.c index 172c751a4f6c..c62f05420d32 100644 --- a/drivers/firmware/qemu_fw_cfg.c +++ b/drivers/firmware/qemu_fw_cfg.c @@ -395,7 +395,7 @@ static void fw_cfg_sysfs_cache_cleanup(void) } } -/* default_attrs: per-entry attributes and show methods */ +/* per-entry attributes and show methods */ #define FW_CFG_SYSFS_ATTR(_attr) \ struct fw_cfg_sysfs_attribute fw_cfg_sysfs_attr_##_attr = { \ @@ -428,6 +428,7 @@ static struct attribute *fw_cfg_sysfs_entry_attrs[] = { &fw_cfg_sysfs_attr_name.attr, NULL, }; +ATTRIBUTE_GROUPS(fw_cfg_sysfs_entry); /* sysfs_ops: find fw_cfg_[entry, attribute] and call appropriate show method */ static ssize_t fw_cfg_sysfs_attr_show(struct kobject *kobj, struct attribute *a, @@ -454,7 +455,7 @@ static void fw_cfg_sysfs_release_entry(struct kobject *kobj) /* kobj_type: ties together all properties required to register an entry */ static struct kobj_type fw_cfg_sysfs_entry_ktype = { - .default_attrs = fw_cfg_sysfs_entry_attrs, + .default_groups = fw_cfg_sysfs_entry_groups, .sysfs_ops = &fw_cfg_sysfs_attr_ops, .release = fw_cfg_sysfs_release_entry, }; From ab6d0f57be58c075bd249a97c81c3557b5e3e7cf Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 5 Jan 2022 19:17:19 +0100 Subject: [PATCH 37/43] firmware: dmi-sysfs: use default_groups in kobj_type There are currently 2 ways to create a set of sysfs files for a kobj_type, through the default_attrs field, and the default_groups field. Move the firmware dmi-sysfs sysfs code to use default_groups field which has been the preferred way since aa30f47cf666 ("kobject: Add support for default attribute groups to kobj_type") so that we can soon get rid of the obsolete default_attrs field. Link: https://lore.kernel.org/r/20220105181719.2737782-1-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- drivers/firmware/dmi-sysfs.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/firmware/dmi-sysfs.c b/drivers/firmware/dmi-sysfs.c index 8b8127fa8955..3a353776bd34 100644 --- a/drivers/firmware/dmi-sysfs.c +++ b/drivers/firmware/dmi-sysfs.c @@ -302,12 +302,12 @@ static struct attribute *dmi_sysfs_sel_attrs[] = { &dmi_sysfs_attr_sel_per_log_type_descriptor_length.attr, NULL, }; - +ATTRIBUTE_GROUPS(dmi_sysfs_sel); static struct kobj_type dmi_system_event_log_ktype = { .release = dmi_entry_free, .sysfs_ops = &dmi_sysfs_specialize_attr_ops, - .default_attrs = dmi_sysfs_sel_attrs, + .default_groups = dmi_sysfs_sel_groups, }; typedef u8 (*sel_io_reader)(const struct dmi_system_event_log *sel, @@ -518,6 +518,7 @@ static struct attribute *dmi_sysfs_entry_attrs[] = { &dmi_sysfs_attr_entry_position.attr, NULL, }; +ATTRIBUTE_GROUPS(dmi_sysfs_entry); static ssize_t dmi_entry_raw_read_helper(struct dmi_sysfs_entry *entry, const struct dmi_header *dh, @@ -565,7 +566,7 @@ static void dmi_sysfs_entry_release(struct kobject *kobj) static struct kobj_type dmi_sysfs_entry_ktype = { .release = dmi_sysfs_entry_release, .sysfs_ops = &dmi_sysfs_attr_ops, - .default_attrs = dmi_sysfs_entry_attrs, + .default_groups = dmi_sysfs_entry_groups, }; static struct kset *dmi_kset; From 3407d826c18d9aed3c51545cc26ff9a2fda65463 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 5 Jan 2022 19:26:34 +0100 Subject: [PATCH 38/43] firmware: edd: remove empty default_attrs array The default_attrs array of attributes for the edd sysfs entries is totally empty for some reason, and a list of attributes is added later after the object is created (which should be fixed up later as it's racy). Because this pointer is never used, and is empty, and we are trying to remove all default_attrs usages, just delete it. Link: https://lore.kernel.org/r/20220105182634.2802684-1-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- drivers/firmware/edd.c | 9 --------- 1 file changed, 9 deletions(-) diff --git a/drivers/firmware/edd.c b/drivers/firmware/edd.c index 14d0970a7198..69353dd0ea22 100644 --- a/drivers/firmware/edd.c +++ b/drivers/firmware/edd.c @@ -574,14 +574,6 @@ static EDD_DEVICE_ATTR(interface, 0444, edd_show_interface, edd_has_edd30); static EDD_DEVICE_ATTR(host_bus, 0444, edd_show_host_bus, edd_has_edd30); static EDD_DEVICE_ATTR(mbr_signature, 0444, edd_show_mbr_signature, edd_has_mbr_signature); - -/* These are default attributes that are added for every edd - * device discovered. There are none. - */ -static struct attribute * def_attrs[] = { - NULL, -}; - /* These attributes are conditional and only added for some devices. */ static struct edd_attribute * edd_attrs[] = { &edd_attr_raw_data, @@ -619,7 +611,6 @@ static void edd_release(struct kobject * kobj) static struct kobj_type edd_ktype = { .release = edd_release, .sysfs_ops = &edd_attr_ops, - .default_attrs = def_attrs, }; static struct kset *edd_kset; From 885e50253bfd6750327a265405461496d6af1639 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Fri, 31 Dec 2021 11:39:00 +0800 Subject: [PATCH 39/43] driver core: Move driver_sysfs_remove() after driver_sysfs_add() The driver_sysfs_remove() should be called after driver_sysfs_add() in really_probe(). The out-of-order driver_sysfs_remove() tries to remove some nonexistent nodes under the device and driver sysfs nodes. This is allowed, hence this change doesn't fix any problem, just a cleanup. Signed-off-by: Lu Baolu Link: https://lore.kernel.org/r/20211231033901.2168664-2-baolu.lu@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/base/dd.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/base/dd.c b/drivers/base/dd.c index 68ea1f949daa..f6fac016b811 100644 --- a/drivers/base/dd.c +++ b/drivers/base/dd.c @@ -577,14 +577,14 @@ re_probe: if (dev->bus->dma_configure) { ret = dev->bus->dma_configure(dev); if (ret) - goto probe_failed; + goto sysfs_failed; } ret = driver_sysfs_add(dev); if (ret) { pr_err("%s: driver_sysfs_add(%s) failed\n", __func__, dev_name(dev)); - goto probe_failed; + goto sysfs_failed; } if (dev->pm_domain && dev->pm_domain->activate) { @@ -657,6 +657,8 @@ dev_groups_failed: else if (drv->remove) drv->remove(dev); probe_failed: + driver_sysfs_remove(dev); +sysfs_failed: if (dev->bus) blocking_notifier_call_chain(&dev->bus->p->bus_notifier, BUS_NOTIFY_DRIVER_NOT_BOUND, dev); @@ -666,7 +668,6 @@ pinctrl_bind_failed: arch_teardown_dma_ops(dev); kfree(dev->dma_range_map); dev->dma_range_map = NULL; - driver_sysfs_remove(dev); dev->driver = NULL; dev_set_drvdata(dev, NULL); if (dev->pm_domain && dev->pm_domain->dismiss) From 00eb74ea2c14418042347eaa34c6b73ac6ec1e76 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Fri, 31 Dec 2021 11:39:01 +0800 Subject: [PATCH 40/43] driver core: Make bus notifiers in right order in really_probe() If a driver cannot be bound to a device, the correct bus notifier order should be: - BUS_NOTIFY_BIND_DRIVER: driver is about to be bound - BUS_NOTIFY_DRIVER_NOT_BOUND: driver failed to be bound or no notifier if the failure happens before the actual binding. The really_probe() notifies a BUS_NOTIFY_DRIVER_NOT_BOUND event without a BUS_NOTIFY_BIND_DRIVER if .dma_configure() returns failure. This change makes the notifiers in order. Signed-off-by: Lu Baolu Link: https://lore.kernel.org/r/20211231033901.2168664-3-baolu.lu@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/base/dd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/base/dd.c b/drivers/base/dd.c index f6fac016b811..9eaaff2f556c 100644 --- a/drivers/base/dd.c +++ b/drivers/base/dd.c @@ -577,7 +577,7 @@ re_probe: if (dev->bus->dma_configure) { ret = dev->bus->dma_configure(dev); if (ret) - goto sysfs_failed; + goto pinctrl_bind_failed; } ret = driver_sysfs_add(dev); From 358fcf5ddbec4e6706405847d6a666f5933a6c25 Mon Sep 17 00:00:00 2001 From: Michal Suchanek Date: Tue, 4 Jan 2022 18:05:05 +0100 Subject: [PATCH 41/43] debugfs: lockdown: Allow reading debugfs files that are not world readable When the kernel is locked down the kernel allows reading only debugfs files with mode 444. Mode 400 is also valid but is not allowed. Make the 444 into a mask. Fixes: 5496197f9b08 ("debugfs: Restrict debugfs when the kernel is locked down") Signed-off-by: Michal Suchanek Link: https://lore.kernel.org/r/20220104170505.10248-1-msuchanek@suse.de Signed-off-by: Greg Kroah-Hartman --- fs/debugfs/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c index 7d162b0efbf0..950c63fa4d0b 100644 --- a/fs/debugfs/file.c +++ b/fs/debugfs/file.c @@ -147,7 +147,7 @@ static int debugfs_locked_down(struct inode *inode, struct file *filp, const struct file_operations *real_fops) { - if ((inode->i_mode & 07777) == 0444 && + if ((inode->i_mode & 07777 & ~0444) == 0 && !(filp->f_mode & FMODE_WRITE) && !real_fops->unlocked_ioctl && !real_fops->compat_ioctl && From 0589e8889dce8e0f0ea5bbf757f38865e2a469c1 Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Fri, 31 Dec 2021 08:04:25 +0000 Subject: [PATCH 42/43] drivers/firmware: Add missing platform_device_put() in sysfb_create_simplefb Add the missing platform_device_put() before return from sysfb_create_simplefb() in the error handling case. Fixes: 8633ef82f101 ("drivers/firmware: consolidate EFI framebuffer setup for all arches") Signed-off-by: Miaoqian Lin Link: https://lore.kernel.org/r/20211231080431.15385-1-linmq006@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/firmware/sysfb_simplefb.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/firmware/sysfb_simplefb.c b/drivers/firmware/sysfb_simplefb.c index b86761904949..303a491e520d 100644 --- a/drivers/firmware/sysfb_simplefb.c +++ b/drivers/firmware/sysfb_simplefb.c @@ -113,12 +113,16 @@ __init int sysfb_create_simplefb(const struct screen_info *si, sysfb_apply_efi_quirks(pd); ret = platform_device_add_resources(pd, &res, 1); - if (ret) + if (ret) { + platform_device_put(pd); return ret; + } ret = platform_device_add_data(pd, mode, sizeof(*mode)); - if (ret) + if (ret) { + platform_device_put(pd); return ret; + } return platform_device_add(pd); } From c9512fd032acfe6f5198c30b6e7e52e0a7df5d31 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 4 Jan 2022 11:50:24 +0100 Subject: [PATCH 43/43] kobject documentation: remove default_attrs information Since commit aa30f47cf666 ("kobject: Add support for default attribute groups to kobj_type") we have been encouraging the use of default_groups instead of default_attrs, so reflect that information in the documentation as well so that no new users get added while the kernel is converted over to not use this field anymore. Cc: "Rafael J. Wysocki" Cc: Jonathan Corbet Cc: linux-doc@vger.kernel.org Signed-off-by: Greg Kroah-Hartman Link: https://lore.kernel.org/r/20220104105024.1014313-1-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- Documentation/core-api/kobject.rst | 5 ++--- Documentation/translations/zh_CN/core-api/kobject.rst | 5 ++--- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/Documentation/core-api/kobject.rst b/Documentation/core-api/kobject.rst index 3d6e3107315d..7310247310a0 100644 --- a/Documentation/core-api/kobject.rst +++ b/Documentation/core-api/kobject.rst @@ -299,7 +299,6 @@ kobj_type:: struct kobj_type { void (*release)(struct kobject *kobj); const struct sysfs_ops *sysfs_ops; - struct attribute **default_attrs; const struct attribute_group **default_groups; const struct kobj_ns_type_operations *(*child_ns_type)(struct kobject *kobj); const void *(*namespace)(struct kobject *kobj); @@ -313,10 +312,10 @@ call kobject_init() or kobject_init_and_add(). The release field in struct kobj_type is, of course, a pointer to the release() method for this type of kobject. The other two fields (sysfs_ops -and default_attrs) control how objects of this type are represented in +and default_groups) control how objects of this type are represented in sysfs; they are beyond the scope of this document. -The default_attrs pointer is a list of default attributes that will be +The default_groups pointer is a list of default attributes that will be automatically created for any kobject that is registered with this ktype. diff --git a/Documentation/translations/zh_CN/core-api/kobject.rst b/Documentation/translations/zh_CN/core-api/kobject.rst index 95634083dca0..0747b472fdea 100644 --- a/Documentation/translations/zh_CN/core-api/kobject.rst +++ b/Documentation/translations/zh_CN/core-api/kobject.rst @@ -258,7 +258,6 @@ kobject_put()以避免错误的发生是一个很好的做法。 struct kobj_type { void (*release)(struct kobject *kobj); const struct sysfs_ops *sysfs_ops; - struct attribute **default_attrs; const struct attribute_group **default_groups; const struct kobj_ns_type_operations *(*child_ns_type)(struct kobject *kobj); const void *(*namespace)(struct kobject *kobj); @@ -271,10 +270,10 @@ kobject_init()或kobject_init_and_add()时必须指定一个指向该结构的 指针。 当然,kobj_type结构中的release字段是指向这种类型的kobject的release() -方法的一个指针。另外两个字段(sysfs_ops 和 default_attrs)控制这种 +方法的一个指针。另外两个字段(sysfs_ops 和 default_groups)控制这种 类型的对象如何在 sysfs 中被表示;它们超出了本文的范围。 -default_attrs 指针是一个默认属性的列表,它将为任何用这个 ktype 注册 +default_groups 指针是一个默认属性的列表,它将为任何用这个 ktype 注册 的 kobject 自动创建。