Merge branch 'akpm' (rest of patches from Andrew)
Merge the left-over patches from Andrew Morton. This merges the remaining two patches from Andrew's pile of "little bit more MM". I mulled it over, and we emailed back and forth with Josef, and he pointed out where I was wrong. Rule #51 of kernel maintenance: when somebody makes it clear that they know the code better than you did, stop arguing and just apply the damn patch. Add a third patch by me to add a comment for the case that I had thought was buggy and Josef corrected me on. * emailed patches from Andrew Morton <akpm@linux-foundation.org>: filemap: add a comment about FAULT_FLAG_RETRY_NOWAIT behavior filemap: drop the mmap_sem for all blocking operations filemap: kill page_cache_read usage in filemap_fault
This commit is contained in:
Коммит
f91f2ee54a
|
@ -239,6 +239,7 @@ pgoff_t page_cache_prev_miss(struct address_space *mapping,
|
||||||
#define FGP_WRITE 0x00000008
|
#define FGP_WRITE 0x00000008
|
||||||
#define FGP_NOFS 0x00000010
|
#define FGP_NOFS 0x00000010
|
||||||
#define FGP_NOWAIT 0x00000020
|
#define FGP_NOWAIT 0x00000020
|
||||||
|
#define FGP_FOR_MMAP 0x00000040
|
||||||
|
|
||||||
struct page *pagecache_get_page(struct address_space *mapping, pgoff_t offset,
|
struct page *pagecache_get_page(struct address_space *mapping, pgoff_t offset,
|
||||||
int fgp_flags, gfp_t cache_gfp_mask);
|
int fgp_flags, gfp_t cache_gfp_mask);
|
||||||
|
|
208
mm/filemap.c
208
mm/filemap.c
|
@ -1587,6 +1587,9 @@ EXPORT_SYMBOL(find_lock_entry);
|
||||||
* @gfp_mask and added to the page cache and the VM's LRU
|
* @gfp_mask and added to the page cache and the VM's LRU
|
||||||
* list. The page is returned locked and with an increased
|
* list. The page is returned locked and with an increased
|
||||||
* refcount.
|
* refcount.
|
||||||
|
* - FGP_FOR_MMAP: Similar to FGP_CREAT, only we want to allow the caller to do
|
||||||
|
* its own locking dance if the page is already in cache, or unlock the page
|
||||||
|
* before returning if we had to add the page to pagecache.
|
||||||
*
|
*
|
||||||
* If FGP_LOCK or FGP_CREAT are specified then the function may sleep even
|
* If FGP_LOCK or FGP_CREAT are specified then the function may sleep even
|
||||||
* if the GFP flags specified for FGP_CREAT are atomic.
|
* if the GFP flags specified for FGP_CREAT are atomic.
|
||||||
|
@ -1641,7 +1644,7 @@ no_page:
|
||||||
if (!page)
|
if (!page)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
if (WARN_ON_ONCE(!(fgp_flags & FGP_LOCK)))
|
if (WARN_ON_ONCE(!(fgp_flags & (FGP_LOCK | FGP_FOR_MMAP))))
|
||||||
fgp_flags |= FGP_LOCK;
|
fgp_flags |= FGP_LOCK;
|
||||||
|
|
||||||
/* Init accessed so avoid atomic mark_page_accessed later */
|
/* Init accessed so avoid atomic mark_page_accessed later */
|
||||||
|
@ -1655,6 +1658,13 @@ no_page:
|
||||||
if (err == -EEXIST)
|
if (err == -EEXIST)
|
||||||
goto repeat;
|
goto repeat;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* add_to_page_cache_lru locks the page, and for mmap we expect
|
||||||
|
* an unlocked page.
|
||||||
|
*/
|
||||||
|
if (page && (fgp_flags & FGP_FOR_MMAP))
|
||||||
|
unlock_page(page);
|
||||||
}
|
}
|
||||||
|
|
||||||
return page;
|
return page;
|
||||||
|
@ -2379,64 +2389,98 @@ out:
|
||||||
EXPORT_SYMBOL(generic_file_read_iter);
|
EXPORT_SYMBOL(generic_file_read_iter);
|
||||||
|
|
||||||
#ifdef CONFIG_MMU
|
#ifdef CONFIG_MMU
|
||||||
/**
|
#define MMAP_LOTSAMISS (100)
|
||||||
* page_cache_read - adds requested page to the page cache if not already there
|
static struct file *maybe_unlock_mmap_for_io(struct vm_fault *vmf,
|
||||||
* @file: file to read
|
struct file *fpin)
|
||||||
* @offset: page index
|
|
||||||
* @gfp_mask: memory allocation flags
|
|
||||||
*
|
|
||||||
* This adds the requested page to the page cache if it isn't already there,
|
|
||||||
* and schedules an I/O to read in its contents from disk.
|
|
||||||
*
|
|
||||||
* Return: %0 on success, negative error code otherwise.
|
|
||||||
*/
|
|
||||||
static int page_cache_read(struct file *file, pgoff_t offset, gfp_t gfp_mask)
|
|
||||||
{
|
{
|
||||||
struct address_space *mapping = file->f_mapping;
|
int flags = vmf->flags;
|
||||||
struct page *page;
|
|
||||||
int ret;
|
|
||||||
|
|
||||||
do {
|
if (fpin)
|
||||||
page = __page_cache_alloc(gfp_mask);
|
return fpin;
|
||||||
if (!page)
|
|
||||||
return -ENOMEM;
|
|
||||||
|
|
||||||
ret = add_to_page_cache_lru(page, mapping, offset, gfp_mask);
|
/*
|
||||||
if (ret == 0)
|
* FAULT_FLAG_RETRY_NOWAIT means we don't want to wait on page locks or
|
||||||
ret = mapping->a_ops->readpage(file, page);
|
* anything, so we only pin the file and drop the mmap_sem if only
|
||||||
else if (ret == -EEXIST)
|
* FAULT_FLAG_ALLOW_RETRY is set.
|
||||||
ret = 0; /* losing race to add is OK */
|
*/
|
||||||
|
if ((flags & (FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_RETRY_NOWAIT)) ==
|
||||||
put_page(page);
|
FAULT_FLAG_ALLOW_RETRY) {
|
||||||
|
fpin = get_file(vmf->vma->vm_file);
|
||||||
} while (ret == AOP_TRUNCATED_PAGE);
|
up_read(&vmf->vma->vm_mm->mmap_sem);
|
||||||
|
}
|
||||||
return ret;
|
return fpin;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* lock_page_maybe_drop_mmap - lock the page, possibly dropping the mmap_sem
|
||||||
|
* @vmf - the vm_fault for this fault.
|
||||||
|
* @page - the page to lock.
|
||||||
|
* @fpin - the pointer to the file we may pin (or is already pinned).
|
||||||
|
*
|
||||||
|
* This works similar to lock_page_or_retry in that it can drop the mmap_sem.
|
||||||
|
* It differs in that it actually returns the page locked if it returns 1 and 0
|
||||||
|
* if it couldn't lock the page. If we did have to drop the mmap_sem then fpin
|
||||||
|
* will point to the pinned file and needs to be fput()'ed at a later point.
|
||||||
|
*/
|
||||||
|
static int lock_page_maybe_drop_mmap(struct vm_fault *vmf, struct page *page,
|
||||||
|
struct file **fpin)
|
||||||
|
{
|
||||||
|
if (trylock_page(page))
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* NOTE! This will make us return with VM_FAULT_RETRY, but with
|
||||||
|
* the mmap_sem still held. That's how FAULT_FLAG_RETRY_NOWAIT
|
||||||
|
* is supposed to work. We have way too many special cases..
|
||||||
|
*/
|
||||||
|
if (vmf->flags & FAULT_FLAG_RETRY_NOWAIT)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
*fpin = maybe_unlock_mmap_for_io(vmf, *fpin);
|
||||||
|
if (vmf->flags & FAULT_FLAG_KILLABLE) {
|
||||||
|
if (__lock_page_killable(page)) {
|
||||||
|
/*
|
||||||
|
* We didn't have the right flags to drop the mmap_sem,
|
||||||
|
* but all fault_handlers only check for fatal signals
|
||||||
|
* if we return VM_FAULT_RETRY, so we need to drop the
|
||||||
|
* mmap_sem here and return 0 if we don't have a fpin.
|
||||||
|
*/
|
||||||
|
if (*fpin == NULL)
|
||||||
|
up_read(&vmf->vma->vm_mm->mmap_sem);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
} else
|
||||||
|
__lock_page(page);
|
||||||
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
#define MMAP_LOTSAMISS (100)
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Synchronous readahead happens when we don't even find
|
* Synchronous readahead happens when we don't even find a page in the page
|
||||||
* a page in the page cache at all.
|
* cache at all. We don't want to perform IO under the mmap sem, so if we have
|
||||||
|
* to drop the mmap sem we return the file that was pinned in order for us to do
|
||||||
|
* that. If we didn't pin a file then we return NULL. The file that is
|
||||||
|
* returned needs to be fput()'ed when we're done with it.
|
||||||
*/
|
*/
|
||||||
static void do_sync_mmap_readahead(struct vm_fault *vmf)
|
static struct file *do_sync_mmap_readahead(struct vm_fault *vmf)
|
||||||
{
|
{
|
||||||
struct file *file = vmf->vma->vm_file;
|
struct file *file = vmf->vma->vm_file;
|
||||||
struct file_ra_state *ra = &file->f_ra;
|
struct file_ra_state *ra = &file->f_ra;
|
||||||
struct address_space *mapping = file->f_mapping;
|
struct address_space *mapping = file->f_mapping;
|
||||||
|
struct file *fpin = NULL;
|
||||||
pgoff_t offset = vmf->pgoff;
|
pgoff_t offset = vmf->pgoff;
|
||||||
|
|
||||||
/* If we don't want any read-ahead, don't bother */
|
/* If we don't want any read-ahead, don't bother */
|
||||||
if (vmf->vma->vm_flags & VM_RAND_READ)
|
if (vmf->vma->vm_flags & VM_RAND_READ)
|
||||||
return;
|
return fpin;
|
||||||
if (!ra->ra_pages)
|
if (!ra->ra_pages)
|
||||||
return;
|
return fpin;
|
||||||
|
|
||||||
if (vmf->vma->vm_flags & VM_SEQ_READ) {
|
if (vmf->vma->vm_flags & VM_SEQ_READ) {
|
||||||
|
fpin = maybe_unlock_mmap_for_io(vmf, fpin);
|
||||||
page_cache_sync_readahead(mapping, ra, file, offset,
|
page_cache_sync_readahead(mapping, ra, file, offset,
|
||||||
ra->ra_pages);
|
ra->ra_pages);
|
||||||
return;
|
return fpin;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Avoid banging the cache line if not needed */
|
/* Avoid banging the cache line if not needed */
|
||||||
|
@ -2448,37 +2492,44 @@ static void do_sync_mmap_readahead(struct vm_fault *vmf)
|
||||||
* stop bothering with read-ahead. It will only hurt.
|
* stop bothering with read-ahead. It will only hurt.
|
||||||
*/
|
*/
|
||||||
if (ra->mmap_miss > MMAP_LOTSAMISS)
|
if (ra->mmap_miss > MMAP_LOTSAMISS)
|
||||||
return;
|
return fpin;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* mmap read-around
|
* mmap read-around
|
||||||
*/
|
*/
|
||||||
|
fpin = maybe_unlock_mmap_for_io(vmf, fpin);
|
||||||
ra->start = max_t(long, 0, offset - ra->ra_pages / 2);
|
ra->start = max_t(long, 0, offset - ra->ra_pages / 2);
|
||||||
ra->size = ra->ra_pages;
|
ra->size = ra->ra_pages;
|
||||||
ra->async_size = ra->ra_pages / 4;
|
ra->async_size = ra->ra_pages / 4;
|
||||||
ra_submit(ra, mapping, file);
|
ra_submit(ra, mapping, file);
|
||||||
|
return fpin;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Asynchronous readahead happens when we find the page and PG_readahead,
|
* Asynchronous readahead happens when we find the page and PG_readahead,
|
||||||
* so we want to possibly extend the readahead further..
|
* so we want to possibly extend the readahead further. We return the file that
|
||||||
|
* was pinned if we have to drop the mmap_sem in order to do IO.
|
||||||
*/
|
*/
|
||||||
static void do_async_mmap_readahead(struct vm_fault *vmf,
|
static struct file *do_async_mmap_readahead(struct vm_fault *vmf,
|
||||||
struct page *page)
|
struct page *page)
|
||||||
{
|
{
|
||||||
struct file *file = vmf->vma->vm_file;
|
struct file *file = vmf->vma->vm_file;
|
||||||
struct file_ra_state *ra = &file->f_ra;
|
struct file_ra_state *ra = &file->f_ra;
|
||||||
struct address_space *mapping = file->f_mapping;
|
struct address_space *mapping = file->f_mapping;
|
||||||
|
struct file *fpin = NULL;
|
||||||
pgoff_t offset = vmf->pgoff;
|
pgoff_t offset = vmf->pgoff;
|
||||||
|
|
||||||
/* If we don't want any read-ahead, don't bother */
|
/* If we don't want any read-ahead, don't bother */
|
||||||
if (vmf->vma->vm_flags & VM_RAND_READ)
|
if (vmf->vma->vm_flags & VM_RAND_READ)
|
||||||
return;
|
return fpin;
|
||||||
if (ra->mmap_miss > 0)
|
if (ra->mmap_miss > 0)
|
||||||
ra->mmap_miss--;
|
ra->mmap_miss--;
|
||||||
if (PageReadahead(page))
|
if (PageReadahead(page)) {
|
||||||
|
fpin = maybe_unlock_mmap_for_io(vmf, fpin);
|
||||||
page_cache_async_readahead(mapping, ra, file,
|
page_cache_async_readahead(mapping, ra, file,
|
||||||
page, offset, ra->ra_pages);
|
page, offset, ra->ra_pages);
|
||||||
|
}
|
||||||
|
return fpin;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -2510,6 +2561,7 @@ vm_fault_t filemap_fault(struct vm_fault *vmf)
|
||||||
{
|
{
|
||||||
int error;
|
int error;
|
||||||
struct file *file = vmf->vma->vm_file;
|
struct file *file = vmf->vma->vm_file;
|
||||||
|
struct file *fpin = NULL;
|
||||||
struct address_space *mapping = file->f_mapping;
|
struct address_space *mapping = file->f_mapping;
|
||||||
struct file_ra_state *ra = &file->f_ra;
|
struct file_ra_state *ra = &file->f_ra;
|
||||||
struct inode *inode = mapping->host;
|
struct inode *inode = mapping->host;
|
||||||
|
@ -2531,23 +2583,26 @@ vm_fault_t filemap_fault(struct vm_fault *vmf)
|
||||||
* We found the page, so try async readahead before
|
* We found the page, so try async readahead before
|
||||||
* waiting for the lock.
|
* waiting for the lock.
|
||||||
*/
|
*/
|
||||||
do_async_mmap_readahead(vmf, page);
|
fpin = do_async_mmap_readahead(vmf, page);
|
||||||
} else if (!page) {
|
} else if (!page) {
|
||||||
/* No page in the page cache at all */
|
/* No page in the page cache at all */
|
||||||
do_sync_mmap_readahead(vmf);
|
|
||||||
count_vm_event(PGMAJFAULT);
|
count_vm_event(PGMAJFAULT);
|
||||||
count_memcg_event_mm(vmf->vma->vm_mm, PGMAJFAULT);
|
count_memcg_event_mm(vmf->vma->vm_mm, PGMAJFAULT);
|
||||||
ret = VM_FAULT_MAJOR;
|
ret = VM_FAULT_MAJOR;
|
||||||
|
fpin = do_sync_mmap_readahead(vmf);
|
||||||
retry_find:
|
retry_find:
|
||||||
page = find_get_page(mapping, offset);
|
page = pagecache_get_page(mapping, offset,
|
||||||
if (!page)
|
FGP_CREAT|FGP_FOR_MMAP,
|
||||||
goto no_cached_page;
|
vmf->gfp_mask);
|
||||||
|
if (!page) {
|
||||||
|
if (fpin)
|
||||||
|
goto out_retry;
|
||||||
|
return vmf_error(-ENOMEM);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!lock_page_or_retry(page, vmf->vma->vm_mm, vmf->flags)) {
|
if (!lock_page_maybe_drop_mmap(vmf, page, &fpin))
|
||||||
put_page(page);
|
goto out_retry;
|
||||||
return ret | VM_FAULT_RETRY;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Did it get truncated? */
|
/* Did it get truncated? */
|
||||||
if (unlikely(page->mapping != mapping)) {
|
if (unlikely(page->mapping != mapping)) {
|
||||||
|
@ -2564,6 +2619,16 @@ retry_find:
|
||||||
if (unlikely(!PageUptodate(page)))
|
if (unlikely(!PageUptodate(page)))
|
||||||
goto page_not_uptodate;
|
goto page_not_uptodate;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We've made it this far and we had to drop our mmap_sem, now is the
|
||||||
|
* time to return to the upper layer and have it re-find the vma and
|
||||||
|
* redo the fault.
|
||||||
|
*/
|
||||||
|
if (fpin) {
|
||||||
|
unlock_page(page);
|
||||||
|
goto out_retry;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Found the page and have a reference on it.
|
* Found the page and have a reference on it.
|
||||||
* We must recheck i_size under page lock.
|
* We must recheck i_size under page lock.
|
||||||
|
@ -2578,28 +2643,6 @@ retry_find:
|
||||||
vmf->page = page;
|
vmf->page = page;
|
||||||
return ret | VM_FAULT_LOCKED;
|
return ret | VM_FAULT_LOCKED;
|
||||||
|
|
||||||
no_cached_page:
|
|
||||||
/*
|
|
||||||
* We're only likely to ever get here if MADV_RANDOM is in
|
|
||||||
* effect.
|
|
||||||
*/
|
|
||||||
error = page_cache_read(file, offset, vmf->gfp_mask);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* The page we want has now been added to the page cache.
|
|
||||||
* In the unlikely event that someone removed it in the
|
|
||||||
* meantime, we'll just come back here and read it again.
|
|
||||||
*/
|
|
||||||
if (error >= 0)
|
|
||||||
goto retry_find;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* An error return from page_cache_read can result if the
|
|
||||||
* system is low on memory, or a problem occurs while trying
|
|
||||||
* to schedule I/O.
|
|
||||||
*/
|
|
||||||
return vmf_error(error);
|
|
||||||
|
|
||||||
page_not_uptodate:
|
page_not_uptodate:
|
||||||
/*
|
/*
|
||||||
* Umm, take care of errors if the page isn't up-to-date.
|
* Umm, take care of errors if the page isn't up-to-date.
|
||||||
|
@ -2608,12 +2651,15 @@ page_not_uptodate:
|
||||||
* and we need to check for errors.
|
* and we need to check for errors.
|
||||||
*/
|
*/
|
||||||
ClearPageError(page);
|
ClearPageError(page);
|
||||||
|
fpin = maybe_unlock_mmap_for_io(vmf, fpin);
|
||||||
error = mapping->a_ops->readpage(file, page);
|
error = mapping->a_ops->readpage(file, page);
|
||||||
if (!error) {
|
if (!error) {
|
||||||
wait_on_page_locked(page);
|
wait_on_page_locked(page);
|
||||||
if (!PageUptodate(page))
|
if (!PageUptodate(page))
|
||||||
error = -EIO;
|
error = -EIO;
|
||||||
}
|
}
|
||||||
|
if (fpin)
|
||||||
|
goto out_retry;
|
||||||
put_page(page);
|
put_page(page);
|
||||||
|
|
||||||
if (!error || error == AOP_TRUNCATED_PAGE)
|
if (!error || error == AOP_TRUNCATED_PAGE)
|
||||||
|
@ -2622,6 +2668,18 @@ page_not_uptodate:
|
||||||
/* Things didn't work out. Return zero to tell the mm layer so. */
|
/* Things didn't work out. Return zero to tell the mm layer so. */
|
||||||
shrink_readahead_size_eio(file, ra);
|
shrink_readahead_size_eio(file, ra);
|
||||||
return VM_FAULT_SIGBUS;
|
return VM_FAULT_SIGBUS;
|
||||||
|
|
||||||
|
out_retry:
|
||||||
|
/*
|
||||||
|
* We dropped the mmap_sem, we need to return to the fault handler to
|
||||||
|
* re-find the vma and come back and find our hopefully still populated
|
||||||
|
* page.
|
||||||
|
*/
|
||||||
|
if (page)
|
||||||
|
put_page(page);
|
||||||
|
if (fpin)
|
||||||
|
fput(fpin);
|
||||||
|
return ret | VM_FAULT_RETRY;
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL(filemap_fault);
|
EXPORT_SYMBOL(filemap_fault);
|
||||||
|
|
||||||
|
|
Загрузка…
Ссылка в новой задаче