vfs: implement readahead(2) using POSIX_FADV_WILLNEED

The implementation of readahead(2) syscall is identical to that of
fadvise64(POSIX_FADV_WILLNEED) with a few exceptions:
1. readahead(2) returns -EINVAL for !mapping->a_ops and fadvise64()
   ignores the request and returns 0.
2. fadvise64() checks for integer overflow corner case
3. fadvise64() calls the optional filesystem fadvise() file operation

Unite the two implementations by calling vfs_fadvise() from readahead(2)
syscall. Check the !mapping->a_ops in readahead(2) syscall to preserve
documented syscall ABI behaviour.

Suggested-by: Miklos Szeredi <mszeredi@redhat.com>
Fixes: d1d04ef857 ("ovl: stack file ops")
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
This commit is contained in:
Amir Goldstein 2018-08-29 08:41:29 +03:00 коммит произвёл Miklos Szeredi
Родитель 45cd0faae3
Коммит 3d8f761531
3 изменённых файлов: 21 добавлений и 30 удалений

Просмотреть файл

@ -32,7 +32,7 @@ ifdef CONFIG_CROSS_MEMORY_ATTACH
mmu-$(CONFIG_MMU) += process_vm_access.o mmu-$(CONFIG_MMU) += process_vm_access.o
endif endif
obj-y := filemap.o mempool.o oom_kill.o \ obj-y := filemap.o mempool.o oom_kill.o fadvise.o \
maccess.o page_alloc.o page-writeback.o \ maccess.o page_alloc.o page-writeback.o \
readahead.o swap.o truncate.o vmscan.o shmem.o \ readahead.o swap.o truncate.o vmscan.o shmem.o \
util.o mmzone.o vmstat.o backing-dev.o \ util.o mmzone.o vmstat.o backing-dev.o \
@ -49,7 +49,6 @@ else
obj-y += bootmem.o obj-y += bootmem.o
endif endif
obj-$(CONFIG_ADVISE_SYSCALLS) += fadvise.o
ifdef CONFIG_MMU ifdef CONFIG_MMU
obj-$(CONFIG_ADVISE_SYSCALLS) += madvise.o obj-$(CONFIG_ADVISE_SYSCALLS) += madvise.o
endif endif

Просмотреть файл

@ -188,6 +188,8 @@ int vfs_fadvise(struct file *file, loff_t offset, loff_t len, int advice)
} }
EXPORT_SYMBOL(vfs_fadvise); EXPORT_SYMBOL(vfs_fadvise);
#ifdef CONFIG_ADVISE_SYSCALLS
int ksys_fadvise64_64(int fd, loff_t offset, loff_t len, int advice) int ksys_fadvise64_64(int fd, loff_t offset, loff_t len, int advice)
{ {
struct fd f = fdget(fd); struct fd f = fdget(fd);
@ -215,3 +217,4 @@ SYSCALL_DEFINE4(fadvise64, int, fd, loff_t, offset, size_t, len, int, advice)
} }
#endif #endif
#endif

Просмотреть файл

@ -20,6 +20,7 @@
#include <linux/file.h> #include <linux/file.h>
#include <linux/mm_inline.h> #include <linux/mm_inline.h>
#include <linux/blk-cgroup.h> #include <linux/blk-cgroup.h>
#include <linux/fadvise.h>
#include "internal.h" #include "internal.h"
@ -575,24 +576,6 @@ page_cache_async_readahead(struct address_space *mapping,
} }
EXPORT_SYMBOL_GPL(page_cache_async_readahead); EXPORT_SYMBOL_GPL(page_cache_async_readahead);
static ssize_t
do_readahead(struct address_space *mapping, struct file *filp,
pgoff_t index, unsigned long nr)
{
if (!mapping || !mapping->a_ops)
return -EINVAL;
/*
* Readahead doesn't make sense for DAX inodes, but we don't want it
* to report a failure either. Instead, we just return success and
* don't do any work.
*/
if (dax_mapping(mapping))
return 0;
return force_page_cache_readahead(mapping, filp, index, nr);
}
ssize_t ksys_readahead(int fd, loff_t offset, size_t count) ssize_t ksys_readahead(int fd, loff_t offset, size_t count)
{ {
ssize_t ret; ssize_t ret;
@ -600,16 +583,22 @@ ssize_t ksys_readahead(int fd, loff_t offset, size_t count)
ret = -EBADF; ret = -EBADF;
f = fdget(fd); f = fdget(fd);
if (f.file) { if (!f.file || !(f.file->f_mode & FMODE_READ))
if (f.file->f_mode & FMODE_READ) { goto out;
struct address_space *mapping = f.file->f_mapping;
pgoff_t start = offset >> PAGE_SHIFT; /*
pgoff_t end = (offset + count - 1) >> PAGE_SHIFT; * The readahead() syscall is intended to run only on files
unsigned long len = end - start + 1; * that can execute readahead. If readahead is not possible
ret = do_readahead(mapping, f.file, start, len); * on this file, then we must return -EINVAL.
} */
ret = -EINVAL;
if (!f.file->f_mapping || !f.file->f_mapping->a_ops ||
!S_ISREG(file_inode(f.file)->i_mode))
goto out;
ret = vfs_fadvise(f.file, offset, count, POSIX_FADV_WILLNEED);
out:
fdput(f); fdput(f);
}
return ret; return ret;
} }