close-range-v5.9
-----BEGIN PGP SIGNATURE----- iHUEABYKAB0WIQRAhzRXHqcMeLMyaSiRxhvAZXjcogUCXygcpgAKCRCRxhvAZXjc ogPeAQDv1ncqtNroFAC4pJ4tQhH7JSjW0OltiMk/AocY/J2SdQD9GJ15luYJ0/om 697q/Z68sndRynhdoZlMuf3oYuBlHQw= =3ZhE -----END PGP SIGNATURE----- Merge tag 'close-range-v5.9' of git://git.kernel.org/pub/scm/linux/kernel/git/brauner/linux Pull close_range() implementation from Christian Brauner: "This adds the close_range() syscall. It allows to efficiently close a range of file descriptors up to all file descriptors of a calling task. This is coordinated with the FreeBSD folks which have copied our version of this syscall and in the meantime have already merged it in April 2019: https://reviews.freebsd.org/D21627 https://svnweb.freebsd.org/base?view=revision&revision=359836 The syscall originally came up in a discussion around the new mount API and making new file descriptor types cloexec by default. During this discussion, Al suggested the close_range() syscall. First, it helps to close all file descriptors of an exec()ing task. This can be done safely via (quoting Al's example from [1] verbatim): /* that exec is sensitive */ unshare(CLONE_FILES); /* we don't want anything past stderr here */ close_range(3, ~0U); execve(....); The code snippet above is one way of working around the problem that file descriptors are not cloexec by default. This is aggravated by the fact that we can't just switch them over without massively regressing userspace. For a whole class of programs having an in-kernel method of closing all file descriptors is very helpful (e.g. demons, service managers, programming language standard libraries, container managers etc.). Second, it allows userspace to avoid implementing closing all file descriptors by parsing through /proc/<pid>/fd/* and calling close() on each file descriptor and other hacks. From looking at various large(ish) userspace code bases this or similar patterns are very common in service managers, container runtimes, and programming language runtimes/standard libraries such as Python or Rust. In addition, the syscall will also work for tasks that do not have procfs mounted and on kernels that do not have procfs support compiled in. In such situations the only way to make sure that all file descriptors are closed is to call close() on each file descriptor up to UINT_MAX or RLIMIT_NOFILE, OPEN_MAX trickery. Based on Linus' suggestion close_range() also comes with a new flag CLOSE_RANGE_UNSHARE to more elegantly handle file descriptor dropping right before exec. This would usually be expressed in the sequence: unshare(CLONE_FILES); close_range(3, ~0U); as pointed out by Linus it might be desirable to have this be a part of close_range() itself under a new flag CLOSE_RANGE_UNSHARE which gets especially handy when we're closing all file descriptors above a certain threshold. Test-suite as always included" * tag 'close-range-v5.9' of git://git.kernel.org/pub/scm/linux/kernel/git/brauner/linux: tests: add CLOSE_RANGE_UNSHARE tests close_range: add CLOSE_RANGE_UNSHARE tests: add close_range() tests arch: wire-up close_range() open: add close_range()
This commit is contained in:
Коммит
4f30a60aa7
|
@ -475,6 +475,7 @@
|
|||
543 common fspick sys_fspick
|
||||
544 common pidfd_open sys_pidfd_open
|
||||
# 545 reserved for clone3
|
||||
546 common close_range sys_close_range
|
||||
547 common openat2 sys_openat2
|
||||
548 common pidfd_getfd sys_pidfd_getfd
|
||||
549 common faccessat2 sys_faccessat2
|
||||
|
|
|
@ -449,6 +449,7 @@
|
|||
433 common fspick sys_fspick
|
||||
434 common pidfd_open sys_pidfd_open
|
||||
435 common clone3 sys_clone3
|
||||
436 common close_range sys_close_range
|
||||
437 common openat2 sys_openat2
|
||||
438 common pidfd_getfd sys_pidfd_getfd
|
||||
439 common faccessat2 sys_faccessat2
|
||||
|
|
|
@ -879,6 +879,8 @@ __SYSCALL(__NR_fspick, sys_fspick)
|
|||
__SYSCALL(__NR_pidfd_open, sys_pidfd_open)
|
||||
#define __NR_clone3 435
|
||||
__SYSCALL(__NR_clone3, sys_clone3)
|
||||
#define __NR_close_range 436
|
||||
__SYSCALL(__NR_close_range, sys_close_range)
|
||||
#define __NR_openat2 437
|
||||
__SYSCALL(__NR_openat2, sys_openat2)
|
||||
#define __NR_pidfd_getfd 438
|
||||
|
|
|
@ -356,6 +356,7 @@
|
|||
433 common fspick sys_fspick
|
||||
434 common pidfd_open sys_pidfd_open
|
||||
# 435 reserved for clone3
|
||||
436 common close_range sys_close_range
|
||||
437 common openat2 sys_openat2
|
||||
438 common pidfd_getfd sys_pidfd_getfd
|
||||
439 common faccessat2 sys_faccessat2
|
||||
|
|
|
@ -435,6 +435,7 @@
|
|||
433 common fspick sys_fspick
|
||||
434 common pidfd_open sys_pidfd_open
|
||||
435 common clone3 __sys_clone3
|
||||
436 common close_range sys_close_range
|
||||
437 common openat2 sys_openat2
|
||||
438 common pidfd_getfd sys_pidfd_getfd
|
||||
439 common faccessat2 sys_faccessat2
|
||||
|
|
|
@ -441,6 +441,7 @@
|
|||
433 common fspick sys_fspick
|
||||
434 common pidfd_open sys_pidfd_open
|
||||
435 common clone3 sys_clone3
|
||||
436 common close_range sys_close_range
|
||||
437 common openat2 sys_openat2
|
||||
438 common pidfd_getfd sys_pidfd_getfd
|
||||
439 common faccessat2 sys_faccessat2
|
||||
|
|
|
@ -374,6 +374,7 @@
|
|||
433 n32 fspick sys_fspick
|
||||
434 n32 pidfd_open sys_pidfd_open
|
||||
435 n32 clone3 __sys_clone3
|
||||
436 n32 close_range sys_close_range
|
||||
437 n32 openat2 sys_openat2
|
||||
438 n32 pidfd_getfd sys_pidfd_getfd
|
||||
439 n32 faccessat2 sys_faccessat2
|
||||
|
|
|
@ -350,6 +350,7 @@
|
|||
433 n64 fspick sys_fspick
|
||||
434 n64 pidfd_open sys_pidfd_open
|
||||
435 n64 clone3 __sys_clone3
|
||||
436 n64 close_range sys_close_range
|
||||
437 n64 openat2 sys_openat2
|
||||
438 n64 pidfd_getfd sys_pidfd_getfd
|
||||
439 n64 faccessat2 sys_faccessat2
|
||||
|
|
|
@ -423,6 +423,7 @@
|
|||
433 o32 fspick sys_fspick
|
||||
434 o32 pidfd_open sys_pidfd_open
|
||||
435 o32 clone3 __sys_clone3
|
||||
436 o32 close_range sys_close_range
|
||||
437 o32 openat2 sys_openat2
|
||||
438 o32 pidfd_getfd sys_pidfd_getfd
|
||||
439 o32 faccessat2 sys_faccessat2
|
||||
|
|
|
@ -433,6 +433,7 @@
|
|||
433 common fspick sys_fspick
|
||||
434 common pidfd_open sys_pidfd_open
|
||||
435 common clone3 sys_clone3_wrapper
|
||||
436 common close_range sys_close_range
|
||||
437 common openat2 sys_openat2
|
||||
438 common pidfd_getfd sys_pidfd_getfd
|
||||
439 common faccessat2 sys_faccessat2
|
||||
|
|
|
@ -525,6 +525,7 @@
|
|||
435 32 clone3 ppc_clone3 sys_clone3
|
||||
435 64 clone3 sys_clone3
|
||||
435 spu clone3 sys_ni_syscall
|
||||
436 common close_range sys_close_range
|
||||
437 common openat2 sys_openat2
|
||||
438 common pidfd_getfd sys_pidfd_getfd
|
||||
439 common faccessat2 sys_faccessat2
|
||||
|
|
|
@ -438,6 +438,7 @@
|
|||
433 common fspick sys_fspick sys_fspick
|
||||
434 common pidfd_open sys_pidfd_open sys_pidfd_open
|
||||
435 common clone3 sys_clone3 sys_clone3
|
||||
436 common close_range sys_close_range sys_close_range
|
||||
437 common openat2 sys_openat2 sys_openat2
|
||||
438 common pidfd_getfd sys_pidfd_getfd sys_pidfd_getfd
|
||||
439 common faccessat2 sys_faccessat2 sys_faccessat2
|
||||
|
|
|
@ -438,6 +438,7 @@
|
|||
433 common fspick sys_fspick
|
||||
434 common pidfd_open sys_pidfd_open
|
||||
# 435 reserved for clone3
|
||||
436 common close_range sys_close_range
|
||||
437 common openat2 sys_openat2
|
||||
438 common pidfd_getfd sys_pidfd_getfd
|
||||
439 common faccessat2 sys_faccessat2
|
||||
|
|
|
@ -481,6 +481,7 @@
|
|||
433 common fspick sys_fspick
|
||||
434 common pidfd_open sys_pidfd_open
|
||||
# 435 reserved for clone3
|
||||
436 common close_range sys_close_range
|
||||
437 common openat2 sys_openat2
|
||||
438 common pidfd_getfd sys_pidfd_getfd
|
||||
439 common faccessat2 sys_faccessat2
|
||||
|
|
|
@ -440,6 +440,7 @@
|
|||
433 i386 fspick sys_fspick
|
||||
434 i386 pidfd_open sys_pidfd_open
|
||||
435 i386 clone3 sys_clone3
|
||||
436 i386 close_range sys_close_range
|
||||
437 i386 openat2 sys_openat2
|
||||
438 i386 pidfd_getfd sys_pidfd_getfd
|
||||
439 i386 faccessat2 sys_faccessat2
|
||||
|
|
|
@ -357,6 +357,7 @@
|
|||
433 common fspick sys_fspick
|
||||
434 common pidfd_open sys_pidfd_open
|
||||
435 common clone3 sys_clone3
|
||||
436 common close_range sys_close_range
|
||||
437 common openat2 sys_openat2
|
||||
438 common pidfd_getfd sys_pidfd_getfd
|
||||
439 common faccessat2 sys_faccessat2
|
||||
|
|
|
@ -406,6 +406,7 @@
|
|||
433 common fspick sys_fspick
|
||||
434 common pidfd_open sys_pidfd_open
|
||||
435 common clone3 sys_clone3
|
||||
436 common close_range sys_close_range
|
||||
437 common openat2 sys_openat2
|
||||
438 common pidfd_getfd sys_pidfd_getfd
|
||||
439 common faccessat2 sys_faccessat2
|
||||
|
|
121
fs/file.c
121
fs/file.c
|
@ -10,6 +10,7 @@
|
|||
#include <linux/syscalls.h>
|
||||
#include <linux/export.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/sched/signal.h>
|
||||
#include <linux/slab.h>
|
||||
|
@ -18,6 +19,7 @@
|
|||
#include <linux/bitops.h>
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/rcupdate.h>
|
||||
#include <linux/close_range.h>
|
||||
#include <net/sock.h>
|
||||
|
||||
unsigned int sysctl_nr_open __read_mostly = 1024*1024;
|
||||
|
@ -265,12 +267,22 @@ static unsigned int count_open_files(struct fdtable *fdt)
|
|||
return i;
|
||||
}
|
||||
|
||||
static unsigned int sane_fdtable_size(struct fdtable *fdt, unsigned int max_fds)
|
||||
{
|
||||
unsigned int count;
|
||||
|
||||
count = count_open_files(fdt);
|
||||
if (max_fds < NR_OPEN_DEFAULT)
|
||||
max_fds = NR_OPEN_DEFAULT;
|
||||
return min(count, max_fds);
|
||||
}
|
||||
|
||||
/*
|
||||
* Allocate a new files structure and copy contents from the
|
||||
* passed in files structure.
|
||||
* errorp will be valid only when the returned files_struct is NULL.
|
||||
*/
|
||||
struct files_struct *dup_fd(struct files_struct *oldf, int *errorp)
|
||||
struct files_struct *dup_fd(struct files_struct *oldf, unsigned int max_fds, int *errorp)
|
||||
{
|
||||
struct files_struct *newf;
|
||||
struct file **old_fds, **new_fds;
|
||||
|
@ -297,7 +309,7 @@ struct files_struct *dup_fd(struct files_struct *oldf, int *errorp)
|
|||
|
||||
spin_lock(&oldf->file_lock);
|
||||
old_fdt = files_fdtable(oldf);
|
||||
open_files = count_open_files(old_fdt);
|
||||
open_files = sane_fdtable_size(old_fdt, max_fds);
|
||||
|
||||
/*
|
||||
* Check whether we need to allocate a larger fd array and fd set.
|
||||
|
@ -328,7 +340,7 @@ struct files_struct *dup_fd(struct files_struct *oldf, int *errorp)
|
|||
*/
|
||||
spin_lock(&oldf->file_lock);
|
||||
old_fdt = files_fdtable(oldf);
|
||||
open_files = count_open_files(old_fdt);
|
||||
open_files = sane_fdtable_size(old_fdt, max_fds);
|
||||
}
|
||||
|
||||
copy_fd_bitmaps(new_fdt, old_fdt, open_files);
|
||||
|
@ -625,12 +637,9 @@ void fd_install(unsigned int fd, struct file *file)
|
|||
|
||||
EXPORT_SYMBOL(fd_install);
|
||||
|
||||
/*
|
||||
* The same warnings as for __alloc_fd()/__fd_install() apply here...
|
||||
*/
|
||||
int __close_fd(struct files_struct *files, unsigned fd)
|
||||
static struct file *pick_file(struct files_struct *files, unsigned fd)
|
||||
{
|
||||
struct file *file;
|
||||
struct file *file = NULL;
|
||||
struct fdtable *fdt;
|
||||
|
||||
spin_lock(&files->file_lock);
|
||||
|
@ -642,15 +651,105 @@ int __close_fd(struct files_struct *files, unsigned fd)
|
|||
goto out_unlock;
|
||||
rcu_assign_pointer(fdt->fd[fd], NULL);
|
||||
__put_unused_fd(files, fd);
|
||||
spin_unlock(&files->file_lock);
|
||||
return filp_close(file, files);
|
||||
|
||||
out_unlock:
|
||||
spin_unlock(&files->file_lock);
|
||||
return -EBADF;
|
||||
return file;
|
||||
}
|
||||
|
||||
/*
|
||||
* The same warnings as for __alloc_fd()/__fd_install() apply here...
|
||||
*/
|
||||
int __close_fd(struct files_struct *files, unsigned fd)
|
||||
{
|
||||
struct file *file;
|
||||
|
||||
file = pick_file(files, fd);
|
||||
if (!file)
|
||||
return -EBADF;
|
||||
|
||||
return filp_close(file, files);
|
||||
}
|
||||
EXPORT_SYMBOL(__close_fd); /* for ksys_close() */
|
||||
|
||||
/**
|
||||
* __close_range() - Close all file descriptors in a given range.
|
||||
*
|
||||
* @fd: starting file descriptor to close
|
||||
* @max_fd: last file descriptor to close
|
||||
*
|
||||
* This closes a range of file descriptors. All file descriptors
|
||||
* from @fd up to and including @max_fd are closed.
|
||||
*/
|
||||
int __close_range(unsigned fd, unsigned max_fd, unsigned int flags)
|
||||
{
|
||||
unsigned int cur_max;
|
||||
struct task_struct *me = current;
|
||||
struct files_struct *cur_fds = me->files, *fds = NULL;
|
||||
|
||||
if (flags & ~CLOSE_RANGE_UNSHARE)
|
||||
return -EINVAL;
|
||||
|
||||
if (fd > max_fd)
|
||||
return -EINVAL;
|
||||
|
||||
rcu_read_lock();
|
||||
cur_max = files_fdtable(cur_fds)->max_fds;
|
||||
rcu_read_unlock();
|
||||
|
||||
/* cap to last valid index into fdtable */
|
||||
cur_max--;
|
||||
|
||||
if (flags & CLOSE_RANGE_UNSHARE) {
|
||||
int ret;
|
||||
unsigned int max_unshare_fds = NR_OPEN_MAX;
|
||||
|
||||
/*
|
||||
* If the requested range is greater than the current maximum,
|
||||
* we're closing everything so only copy all file descriptors
|
||||
* beneath the lowest file descriptor.
|
||||
*/
|
||||
if (max_fd >= cur_max)
|
||||
max_unshare_fds = fd;
|
||||
|
||||
ret = unshare_fd(CLONE_FILES, max_unshare_fds, &fds);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
/*
|
||||
* We used to share our file descriptor table, and have now
|
||||
* created a private one, make sure we're using it below.
|
||||
*/
|
||||
if (fds)
|
||||
swap(cur_fds, fds);
|
||||
}
|
||||
|
||||
max_fd = min(max_fd, cur_max);
|
||||
while (fd <= max_fd) {
|
||||
struct file *file;
|
||||
|
||||
file = pick_file(cur_fds, fd++);
|
||||
if (!file)
|
||||
continue;
|
||||
|
||||
filp_close(file, cur_fds);
|
||||
cond_resched();
|
||||
}
|
||||
|
||||
if (fds) {
|
||||
/*
|
||||
* We're done closing the files we were supposed to. Time to install
|
||||
* the new file descriptor table and drop the old one.
|
||||
*/
|
||||
task_lock(me);
|
||||
me->files = cur_fds;
|
||||
task_unlock(me);
|
||||
put_files_struct(fds);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* variant of __close_fd that gets a ref on the file for later fput.
|
||||
* The caller must ensure that filp_close() called on the file, and then
|
||||
|
|
17
fs/open.c
17
fs/open.c
|
@ -1310,6 +1310,23 @@ SYSCALL_DEFINE1(close, unsigned int, fd)
|
|||
return retval;
|
||||
}
|
||||
|
||||
/**
|
||||
* close_range() - Close all file descriptors in a given range.
|
||||
*
|
||||
* @fd: starting file descriptor to close
|
||||
* @max_fd: last file descriptor to close
|
||||
* @flags: reserved for future extensions
|
||||
*
|
||||
* This closes a range of file descriptors. All file descriptors
|
||||
* from @fd up to and including @max_fd are closed.
|
||||
* Currently, errors to close a given file descriptor are ignored.
|
||||
*/
|
||||
SYSCALL_DEFINE3(close_range, unsigned int, fd, unsigned int, max_fd,
|
||||
unsigned int, flags)
|
||||
{
|
||||
return __close_range(fd, max_fd, flags);
|
||||
}
|
||||
|
||||
/*
|
||||
* This routine simulates a hangup on the tty, to arrange that users
|
||||
* are given clean terminals at login time.
|
||||
|
|
|
@ -22,6 +22,7 @@
|
|||
* as this is the granularity returned by copy_fdset().
|
||||
*/
|
||||
#define NR_OPEN_DEFAULT BITS_PER_LONG
|
||||
#define NR_OPEN_MAX ~0U
|
||||
|
||||
struct fdtable {
|
||||
unsigned int max_fds;
|
||||
|
@ -109,7 +110,7 @@ struct files_struct *get_files_struct(struct task_struct *);
|
|||
void put_files_struct(struct files_struct *fs);
|
||||
void reset_files_struct(struct files_struct *);
|
||||
int unshare_files(struct files_struct **);
|
||||
struct files_struct *dup_fd(struct files_struct *, int *) __latent_entropy;
|
||||
struct files_struct *dup_fd(struct files_struct *, unsigned, int *) __latent_entropy;
|
||||
void do_close_on_exec(struct files_struct *);
|
||||
int iterate_fd(struct files_struct *, unsigned,
|
||||
int (*)(const void *, struct file *, unsigned),
|
||||
|
@ -121,7 +122,10 @@ extern void __fd_install(struct files_struct *files,
|
|||
unsigned int fd, struct file *file);
|
||||
extern int __close_fd(struct files_struct *files,
|
||||
unsigned int fd);
|
||||
extern int __close_range(unsigned int fd, unsigned int max_fd, unsigned int flags);
|
||||
extern int __close_fd_get_file(unsigned int fd, struct file **res);
|
||||
extern int unshare_fd(unsigned long unshare_flags, unsigned int max_fds,
|
||||
struct files_struct **new_fdp);
|
||||
|
||||
extern struct kmem_cache *files_cachep;
|
||||
|
||||
|
|
|
@ -444,6 +444,8 @@ asmlinkage long sys_openat(int dfd, const char __user *filename, int flags,
|
|||
asmlinkage long sys_openat2(int dfd, const char __user *filename,
|
||||
struct open_how *how, size_t size);
|
||||
asmlinkage long sys_close(unsigned int fd);
|
||||
asmlinkage long sys_close_range(unsigned int fd, unsigned int max_fd,
|
||||
unsigned int flags);
|
||||
asmlinkage long sys_vhangup(void);
|
||||
|
||||
/* fs/pipe.c */
|
||||
|
|
|
@ -850,6 +850,8 @@ __SYSCALL(__NR_pidfd_open, sys_pidfd_open)
|
|||
#define __NR_clone3 435
|
||||
__SYSCALL(__NR_clone3, sys_clone3)
|
||||
#endif
|
||||
#define __NR_close_range 436
|
||||
__SYSCALL(__NR_close_range, sys_close_range)
|
||||
|
||||
#define __NR_openat2 437
|
||||
__SYSCALL(__NR_openat2, sys_openat2)
|
||||
|
|
|
@ -0,0 +1,9 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
|
||||
#ifndef _UAPI_LINUX_CLOSE_RANGE_H
|
||||
#define _UAPI_LINUX_CLOSE_RANGE_H
|
||||
|
||||
/* Unshare the file descriptor table before closing file descriptors. */
|
||||
#define CLOSE_RANGE_UNSHARE (1U << 1)
|
||||
|
||||
#endif /* _UAPI_LINUX_CLOSE_RANGE_H */
|
||||
|
|
@ -1479,7 +1479,7 @@ static int copy_files(unsigned long clone_flags, struct task_struct *tsk)
|
|||
goto out;
|
||||
}
|
||||
|
||||
newf = dup_fd(oldf, &error);
|
||||
newf = dup_fd(oldf, NR_OPEN_MAX, &error);
|
||||
if (!newf)
|
||||
goto out;
|
||||
|
||||
|
@ -2866,14 +2866,15 @@ static int unshare_fs(unsigned long unshare_flags, struct fs_struct **new_fsp)
|
|||
/*
|
||||
* Unshare file descriptor table if it is being shared
|
||||
*/
|
||||
static int unshare_fd(unsigned long unshare_flags, struct files_struct **new_fdp)
|
||||
int unshare_fd(unsigned long unshare_flags, unsigned int max_fds,
|
||||
struct files_struct **new_fdp)
|
||||
{
|
||||
struct files_struct *fd = current->files;
|
||||
int error = 0;
|
||||
|
||||
if ((unshare_flags & CLONE_FILES) &&
|
||||
(fd && atomic_read(&fd->count) > 1)) {
|
||||
*new_fdp = dup_fd(fd, &error);
|
||||
*new_fdp = dup_fd(fd, max_fds, &error);
|
||||
if (!*new_fdp)
|
||||
return error;
|
||||
}
|
||||
|
@ -2933,7 +2934,7 @@ int ksys_unshare(unsigned long unshare_flags)
|
|||
err = unshare_fs(unshare_flags, &new_fs);
|
||||
if (err)
|
||||
goto bad_unshare_out;
|
||||
err = unshare_fd(unshare_flags, &new_fd);
|
||||
err = unshare_fd(unshare_flags, NR_OPEN_MAX, &new_fd);
|
||||
if (err)
|
||||
goto bad_unshare_cleanup_fs;
|
||||
err = unshare_userns(unshare_flags, &new_cred);
|
||||
|
@ -3022,7 +3023,7 @@ int unshare_files(struct files_struct **displaced)
|
|||
struct files_struct *copy = NULL;
|
||||
int error;
|
||||
|
||||
error = unshare_fd(CLONE_FILES, ©);
|
||||
error = unshare_fd(CLONE_FILES, NR_OPEN_MAX, ©);
|
||||
if (error || !copy) {
|
||||
*displaced = NULL;
|
||||
return error;
|
||||
|
|
|
@ -6,6 +6,7 @@ TARGETS += breakpoints
|
|||
TARGETS += capabilities
|
||||
TARGETS += cgroup
|
||||
TARGETS += clone3
|
||||
TARGETS += core
|
||||
TARGETS += cpufreq
|
||||
TARGETS += cpu-hotplug
|
||||
TARGETS += drivers/dma-buf
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
close_range_test
|
|
@ -0,0 +1,7 @@
|
|||
# SPDX-License-Identifier: GPL-2.0-only
|
||||
CFLAGS += -g -I../../../../usr/include/
|
||||
|
||||
TEST_GEN_PROGS := close_range_test
|
||||
|
||||
include ../lib.mk
|
||||
|
|
@ -0,0 +1,227 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
#define _GNU_SOURCE
|
||||
#include <errno.h>
|
||||
#include <fcntl.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <limits.h>
|
||||
#include <stdbool.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <syscall.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include "../kselftest_harness.h"
|
||||
#include "../clone3/clone3_selftests.h"
|
||||
|
||||
#ifndef __NR_close_range
|
||||
#define __NR_close_range -1
|
||||
#endif
|
||||
|
||||
#ifndef CLOSE_RANGE_UNSHARE
|
||||
#define CLOSE_RANGE_UNSHARE (1U << 1)
|
||||
#endif
|
||||
|
||||
static inline int sys_close_range(unsigned int fd, unsigned int max_fd,
|
||||
unsigned int flags)
|
||||
{
|
||||
return syscall(__NR_close_range, fd, max_fd, flags);
|
||||
}
|
||||
|
||||
#ifndef ARRAY_SIZE
|
||||
#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
|
||||
#endif
|
||||
|
||||
TEST(close_range)
|
||||
{
|
||||
int i, ret;
|
||||
int open_fds[101];
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(open_fds); i++) {
|
||||
int fd;
|
||||
|
||||
fd = open("/dev/null", O_RDONLY | O_CLOEXEC);
|
||||
ASSERT_GE(fd, 0) {
|
||||
if (errno == ENOENT)
|
||||
XFAIL(return, "Skipping test since /dev/null does not exist");
|
||||
}
|
||||
|
||||
open_fds[i] = fd;
|
||||
}
|
||||
|
||||
EXPECT_EQ(-1, sys_close_range(open_fds[0], open_fds[100], -1)) {
|
||||
if (errno == ENOSYS)
|
||||
XFAIL(return, "close_range() syscall not supported");
|
||||
}
|
||||
|
||||
EXPECT_EQ(0, sys_close_range(open_fds[0], open_fds[50], 0));
|
||||
|
||||
for (i = 0; i <= 50; i++)
|
||||
EXPECT_EQ(-1, fcntl(open_fds[i], F_GETFL));
|
||||
|
||||
for (i = 51; i <= 100; i++)
|
||||
EXPECT_GT(fcntl(open_fds[i], F_GETFL), -1);
|
||||
|
||||
/* create a couple of gaps */
|
||||
close(57);
|
||||
close(78);
|
||||
close(81);
|
||||
close(82);
|
||||
close(84);
|
||||
close(90);
|
||||
|
||||
EXPECT_EQ(0, sys_close_range(open_fds[51], open_fds[92], 0));
|
||||
|
||||
for (i = 51; i <= 92; i++)
|
||||
EXPECT_EQ(-1, fcntl(open_fds[i], F_GETFL));
|
||||
|
||||
for (i = 93; i <= 100; i++)
|
||||
EXPECT_GT(fcntl(open_fds[i], F_GETFL), -1);
|
||||
|
||||
/* test that the kernel caps and still closes all fds */
|
||||
EXPECT_EQ(0, sys_close_range(open_fds[93], open_fds[99], 0));
|
||||
|
||||
for (i = 93; i <= 99; i++)
|
||||
EXPECT_EQ(-1, fcntl(open_fds[i], F_GETFL));
|
||||
|
||||
EXPECT_GT(fcntl(open_fds[i], F_GETFL), -1);
|
||||
|
||||
EXPECT_EQ(0, sys_close_range(open_fds[100], open_fds[100], 0));
|
||||
|
||||
EXPECT_EQ(-1, fcntl(open_fds[100], F_GETFL));
|
||||
}
|
||||
|
||||
TEST(close_range_unshare)
|
||||
{
|
||||
int i, ret, status;
|
||||
pid_t pid;
|
||||
int open_fds[101];
|
||||
struct clone_args args = {
|
||||
.flags = CLONE_FILES,
|
||||
.exit_signal = SIGCHLD,
|
||||
};
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(open_fds); i++) {
|
||||
int fd;
|
||||
|
||||
fd = open("/dev/null", O_RDONLY | O_CLOEXEC);
|
||||
ASSERT_GE(fd, 0) {
|
||||
if (errno == ENOENT)
|
||||
XFAIL(return, "Skipping test since /dev/null does not exist");
|
||||
}
|
||||
|
||||
open_fds[i] = fd;
|
||||
}
|
||||
|
||||
pid = sys_clone3(&args, sizeof(args));
|
||||
ASSERT_GE(pid, 0);
|
||||
|
||||
if (pid == 0) {
|
||||
ret = sys_close_range(open_fds[0], open_fds[50],
|
||||
CLOSE_RANGE_UNSHARE);
|
||||
if (ret)
|
||||
exit(EXIT_FAILURE);
|
||||
|
||||
for (i = 0; i <= 50; i++)
|
||||
if (fcntl(open_fds[i], F_GETFL) != -1)
|
||||
exit(EXIT_FAILURE);
|
||||
|
||||
for (i = 51; i <= 100; i++)
|
||||
if (fcntl(open_fds[i], F_GETFL) == -1)
|
||||
exit(EXIT_FAILURE);
|
||||
|
||||
/* create a couple of gaps */
|
||||
close(57);
|
||||
close(78);
|
||||
close(81);
|
||||
close(82);
|
||||
close(84);
|
||||
close(90);
|
||||
|
||||
ret = sys_close_range(open_fds[51], open_fds[92],
|
||||
CLOSE_RANGE_UNSHARE);
|
||||
if (ret)
|
||||
exit(EXIT_FAILURE);
|
||||
|
||||
for (i = 51; i <= 92; i++)
|
||||
if (fcntl(open_fds[i], F_GETFL) != -1)
|
||||
exit(EXIT_FAILURE);
|
||||
|
||||
for (i = 93; i <= 100; i++)
|
||||
if (fcntl(open_fds[i], F_GETFL) == -1)
|
||||
exit(EXIT_FAILURE);
|
||||
|
||||
/* test that the kernel caps and still closes all fds */
|
||||
ret = sys_close_range(open_fds[93], open_fds[99],
|
||||
CLOSE_RANGE_UNSHARE);
|
||||
if (ret)
|
||||
exit(EXIT_FAILURE);
|
||||
|
||||
for (i = 93; i <= 99; i++)
|
||||
if (fcntl(open_fds[i], F_GETFL) != -1)
|
||||
exit(EXIT_FAILURE);
|
||||
|
||||
if (fcntl(open_fds[100], F_GETFL) == -1)
|
||||
exit(EXIT_FAILURE);
|
||||
|
||||
ret = sys_close_range(open_fds[100], open_fds[100],
|
||||
CLOSE_RANGE_UNSHARE);
|
||||
if (ret)
|
||||
exit(EXIT_FAILURE);
|
||||
|
||||
if (fcntl(open_fds[100], F_GETFL) != -1)
|
||||
exit(EXIT_FAILURE);
|
||||
|
||||
exit(EXIT_SUCCESS);
|
||||
}
|
||||
|
||||
EXPECT_EQ(waitpid(pid, &status, 0), pid);
|
||||
EXPECT_EQ(true, WIFEXITED(status));
|
||||
EXPECT_EQ(0, WEXITSTATUS(status));
|
||||
}
|
||||
|
||||
TEST(close_range_unshare_capped)
|
||||
{
|
||||
int i, ret, status;
|
||||
pid_t pid;
|
||||
int open_fds[101];
|
||||
struct clone_args args = {
|
||||
.flags = CLONE_FILES,
|
||||
.exit_signal = SIGCHLD,
|
||||
};
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(open_fds); i++) {
|
||||
int fd;
|
||||
|
||||
fd = open("/dev/null", O_RDONLY | O_CLOEXEC);
|
||||
ASSERT_GE(fd, 0) {
|
||||
if (errno == ENOENT)
|
||||
XFAIL(return, "Skipping test since /dev/null does not exist");
|
||||
}
|
||||
|
||||
open_fds[i] = fd;
|
||||
}
|
||||
|
||||
pid = sys_clone3(&args, sizeof(args));
|
||||
ASSERT_GE(pid, 0);
|
||||
|
||||
if (pid == 0) {
|
||||
ret = sys_close_range(open_fds[0], UINT_MAX,
|
||||
CLOSE_RANGE_UNSHARE);
|
||||
if (ret)
|
||||
exit(EXIT_FAILURE);
|
||||
|
||||
for (i = 0; i <= 100; i++)
|
||||
if (fcntl(open_fds[i], F_GETFL) != -1)
|
||||
exit(EXIT_FAILURE);
|
||||
|
||||
exit(EXIT_SUCCESS);
|
||||
}
|
||||
|
||||
EXPECT_EQ(waitpid(pid, &status, 0), pid);
|
||||
EXPECT_EQ(true, WIFEXITED(status));
|
||||
EXPECT_EQ(0, WEXITSTATUS(status));
|
||||
}
|
||||
|
||||
TEST_HARNESS_MAIN
|
Загрузка…
Ссылка в новой задаче