Merge branch 'akpm' (rest of patches from Andrew)

Merge the left-over patches from Andrew Morton. This merges the remaining two patches from Andrew's pile of "little bit more MM". I mulled it over, and we emailed back and forth with Josef, and he pointed out where I was wrong. Rule #51 of kernel maintenance: when somebody makes it clear that they know the code better than you did, stop arguing and just apply the damn patch. Add a third patch by me to add a comment for the case that I had thought was buggy and Josef corrected me on. * emailed patches from Andrew Morton <akpm@linux-foundation.org>: filemap: add a comment about FAULT_FLAG_RETRY_NOWAIT behavior filemap: drop the mmap_sem for all blocking operations filemap: kill page_cache_read usage in filemap_fault
2019-03-15 12:00:45 -07:00 · 2019-03-15 12:00:45 -07:00 · f91f2ee54a
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@ -239,6 +239,7 @@ pgoff_t page_cache_prev_miss(struct address_space *mapping,
 #define FGP_WRITE		0x00000008
 #define FGP_NOFS		0x00000010
 #define FGP_NOWAIT		0x00000020
 #define FGP_FOR_MMAP		0x00000040
 struct page *pagecache_get_page(struct address_space *mapping, pgoff_t offset,
 		int fgp_flags, gfp_t cache_gfp_mask);
--- a/mm/filemap.c
+++ b/mm/filemap.c
@ -1587,6 +1587,9 @@ EXPORT_SYMBOL(find_lock_entry);
 *   @gfp_mask and added to the page cache and the VM's LRU
 *   list. The page is returned locked and with an increased
 *   refcount.
 * - FGP_FOR_MMAP: Similar to FGP_CREAT, only we want to allow the caller to do
 *   its own locking dance if the page is already in cache, or unlock the page
 *   before returning if we had to add the page to pagecache.
 *
 * If FGP_LOCK or FGP_CREAT are specified then the function may sleep even
 * if the GFP flags specified for FGP_CREAT are atomic.
@ -1641,7 +1644,7 @@ no_page:
 		if (!page)
 			return NULL;
-		if (WARN_ON_ONCE(!(fgp_flags & FGP_LOCK)))
+		if (WARN_ON_ONCE(!(fgp_flags & (FGP_LOCK | FGP_FOR_MMAP))))
 			fgp_flags |= FGP_LOCK;
 		/* Init accessed so avoid atomic mark_page_accessed later */
@ -1655,6 +1658,13 @@ no_page:
 			if (err == -EEXIST)
 				goto repeat;
 		}
 		/*
 		 * add_to_page_cache_lru locks the page, and for mmap we expect
 		 * an unlocked page.
 		 */
 		if (page && (fgp_flags & FGP_FOR_MMAP))
 			unlock_page(page);
 	}
 	return page;
@ -2379,64 +2389,98 @@ out:
 EXPORT_SYMBOL(generic_file_read_iter);
 #ifdef CONFIG_MMU
-/**
+#define MMAP_LOTSAMISS  (100)
- * page_cache_read - adds requested page to the page cache if not already there
+static struct file *maybe_unlock_mmap_for_io(struct vm_fault *vmf,
- * @file:	file to read
+					     struct file *fpin)
 * @offset:	page index
 * @gfp_mask:	memory allocation flags
 *
 * This adds the requested page to the page cache if it isn't already there,
 * and schedules an I/O to read in its contents from disk.
 *
 * Return: %0 on success, negative error code otherwise.
 */
 static int page_cache_read(struct file *file, pgoff_t offset, gfp_t gfp_mask)
 {
-	struct address_space *mapping = file->f_mapping;
+	int flags = vmf->flags;
 	struct page *page;
 	int ret;
-	do {
+	if (fpin)
-		page = __page_cache_alloc(gfp_mask);
+		return fpin;
 		if (!page)
 			return -ENOMEM;
-		ret = add_to_page_cache_lru(page, mapping, offset, gfp_mask);
+	/*
-		if (ret == 0)
+	 * FAULT_FLAG_RETRY_NOWAIT means we don't want to wait on page locks or
-			ret = mapping->a_ops->readpage(file, page);
+	 * anything, so we only pin the file and drop the mmap_sem if only
-		else if (ret == -EEXIST)
+	 * FAULT_FLAG_ALLOW_RETRY is set.
-			ret = 0; /* losing race to add is OK */
+	 */
-
+	if ((flags & (FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_RETRY_NOWAIT)) ==
-		put_page(page);
+	    FAULT_FLAG_ALLOW_RETRY) {
-
+		fpin = get_file(vmf->vma->vm_file);
-	} while (ret == AOP_TRUNCATED_PAGE);
+		up_read(&vmf->vma->vm_mm->mmap_sem);
-
+	}
-	return ret;
+	return fpin;
 }
 /*
 * lock_page_maybe_drop_mmap - lock the page, possibly dropping the mmap_sem
 * @vmf - the vm_fault for this fault.
 * @page - the page to lock.
 * @fpin - the pointer to the file we may pin (or is already pinned).
 *
 * This works similar to lock_page_or_retry in that it can drop the mmap_sem.
 * It differs in that it actually returns the page locked if it returns 1 and 0
 * if it couldn't lock the page.  If we did have to drop the mmap_sem then fpin
 * will point to the pinned file and needs to be fput()'ed at a later point.
 */
 static int lock_page_maybe_drop_mmap(struct vm_fault *vmf, struct page *page,
 				     struct file **fpin)
 {
 	if (trylock_page(page))
 		return 1;
 	/*
 	 * NOTE! This will make us return with VM_FAULT_RETRY, but with
 	 * the mmap_sem still held. That's how FAULT_FLAG_RETRY_NOWAIT
 	 * is supposed to work. We have way too many special cases..
 	 */
 	if (vmf->flags & FAULT_FLAG_RETRY_NOWAIT)
 		return 0;
 	*fpin = maybe_unlock_mmap_for_io(vmf, *fpin);
 	if (vmf->flags & FAULT_FLAG_KILLABLE) {
 		if (__lock_page_killable(page)) {
 			/*
 			 * We didn't have the right flags to drop the mmap_sem,
 			 * but all fault_handlers only check for fatal signals
 			 * if we return VM_FAULT_RETRY, so we need to drop the
 			 * mmap_sem here and return 0 if we don't have a fpin.
 			 */
 			if (*fpin == NULL)
 				up_read(&vmf->vma->vm_mm->mmap_sem);
 			return 0;
 		}
 	} else
 		__lock_page(page);
 	return 1;
 }
 #define MMAP_LOTSAMISS  (100)
 /*
- * Synchronous readahead happens when we don't even find
+ * Synchronous readahead happens when we don't even find a page in the page
- * a page in the page cache at all.
+ * cache at all.  We don't want to perform IO under the mmap sem, so if we have
 * to drop the mmap sem we return the file that was pinned in order for us to do
 * that.  If we didn't pin a file then we return NULL.  The file that is
 * returned needs to be fput()'ed when we're done with it.
 */
-static void do_sync_mmap_readahead(struct vm_fault *vmf)
+static struct file *do_sync_mmap_readahead(struct vm_fault *vmf)
 {
 	struct file *file = vmf->vma->vm_file;
 	struct file_ra_state *ra = &file->f_ra;
 	struct address_space *mapping = file->f_mapping;
 	struct file *fpin = NULL;
 	pgoff_t offset = vmf->pgoff;
 	/* If we don't want any read-ahead, don't bother */
 	if (vmf->vma->vm_flags & VM_RAND_READ)
-		return;
+		return fpin;
 	if (!ra->ra_pages)
-		return;
+		return fpin;
 	if (vmf->vma->vm_flags & VM_SEQ_READ) {
 		fpin = maybe_unlock_mmap_for_io(vmf, fpin);
 		page_cache_sync_readahead(mapping, ra, file, offset,
 					  ra->ra_pages);
-		return;
+		return fpin;
 	}
 	/* Avoid banging the cache line if not needed */
@ -2448,37 +2492,44 @@ static void do_sync_mmap_readahead(struct vm_fault *vmf)
 	 * stop bothering with read-ahead. It will only hurt.
 	 */
 	if (ra->mmap_miss > MMAP_LOTSAMISS)
-		return;
+		return fpin;
 	/*
 	 * mmap read-around
 	 */
 	fpin = maybe_unlock_mmap_for_io(vmf, fpin);
 	ra->start = max_t(long, 0, offset - ra->ra_pages / 2);
 	ra->size = ra->ra_pages;
 	ra->async_size = ra->ra_pages / 4;
 	ra_submit(ra, mapping, file);
 	return fpin;
 }
 /*
 * Asynchronous readahead happens when we find the page and PG_readahead,
- * so we want to possibly extend the readahead further..
+ * so we want to possibly extend the readahead further.  We return the file that
 * was pinned if we have to drop the mmap_sem in order to do IO.
 */
-static void do_async_mmap_readahead(struct vm_fault *vmf,
+static struct file *do_async_mmap_readahead(struct vm_fault *vmf,
-				    struct page *page)
+					    struct page *page)
 {
 	struct file *file = vmf->vma->vm_file;
 	struct file_ra_state *ra = &file->f_ra;
 	struct address_space *mapping = file->f_mapping;
 	struct file *fpin = NULL;
 	pgoff_t offset = vmf->pgoff;
 	/* If we don't want any read-ahead, don't bother */
 	if (vmf->vma->vm_flags & VM_RAND_READ)
-		return;
+		return fpin;
 	if (ra->mmap_miss > 0)
 		ra->mmap_miss--;
-	if (PageReadahead(page))
+	if (PageReadahead(page)) {
 		fpin = maybe_unlock_mmap_for_io(vmf, fpin);
 		page_cache_async_readahead(mapping, ra, file,
 					   page, offset, ra->ra_pages);
 	}
 	return fpin;
 }
 /**
@ -2510,6 +2561,7 @@ vm_fault_t filemap_fault(struct vm_fault *vmf)
 {
 	int error;
 	struct file *file = vmf->vma->vm_file;
 	struct file *fpin = NULL;
 	struct address_space *mapping = file->f_mapping;
 	struct file_ra_state *ra = &file->f_ra;
 	struct inode *inode = mapping->host;
@ -2531,23 +2583,26 @@ vm_fault_t filemap_fault(struct vm_fault *vmf)
 		 * We found the page, so try async readahead before
 		 * waiting for the lock.
 		 */
-		do_async_mmap_readahead(vmf, page);
+		fpin = do_async_mmap_readahead(vmf, page);
 	} else if (!page) {
 		/* No page in the page cache at all */
 		do_sync_mmap_readahead(vmf);
 		count_vm_event(PGMAJFAULT);
 		count_memcg_event_mm(vmf->vma->vm_mm, PGMAJFAULT);
 		ret = VM_FAULT_MAJOR;
 		fpin = do_sync_mmap_readahead(vmf);
 retry_find:
-		page = find_get_page(mapping, offset);
+		page = pagecache_get_page(mapping, offset,
-		if (!page)
+					  FGP_CREAT|FGP_FOR_MMAP,
-			goto no_cached_page;
+					  vmf->gfp_mask);
 		if (!page) {
 			if (fpin)
 				goto out_retry;
 			return vmf_error(-ENOMEM);
 		}
 	}
-	if (!lock_page_or_retry(page, vmf->vma->vm_mm, vmf->flags)) {
+	if (!lock_page_maybe_drop_mmap(vmf, page, &fpin))
-		put_page(page);
+		goto out_retry;
 		return ret | VM_FAULT_RETRY;
 	}
 	/* Did it get truncated? */
 	if (unlikely(page->mapping != mapping)) {
@ -2564,6 +2619,16 @@ retry_find:
 	if (unlikely(!PageUptodate(page)))
 		goto page_not_uptodate;
 	/*
 	 * We've made it this far and we had to drop our mmap_sem, now is the
 	 * time to return to the upper layer and have it re-find the vma and
 	 * redo the fault.
 	 */
 	if (fpin) {
 		unlock_page(page);
 		goto out_retry;
 	}
 	/*
 	 * Found the page and have a reference on it.
 	 * We must recheck i_size under page lock.
@ -2578,28 +2643,6 @@ retry_find:
 	vmf->page = page;
 	return ret | VM_FAULT_LOCKED;
 no_cached_page:
 	/*
 	 * We're only likely to ever get here if MADV_RANDOM is in
 	 * effect.
 	 */
 	error = page_cache_read(file, offset, vmf->gfp_mask);
 	/*
 	 * The page we want has now been added to the page cache.
 	 * In the unlikely event that someone removed it in the
 	 * meantime, we'll just come back here and read it again.
 	 */
 	if (error >= 0)
 		goto retry_find;
 	/*
 	 * An error return from page_cache_read can result if the
 	 * system is low on memory, or a problem occurs while trying
 	 * to schedule I/O.
 	 */
 	return vmf_error(error);
 page_not_uptodate:
 	/*
 	 * Umm, take care of errors if the page isn't up-to-date.
@ -2608,12 +2651,15 @@ page_not_uptodate:
 	 * and we need to check for errors.
 	 */
 	ClearPageError(page);
 	fpin = maybe_unlock_mmap_for_io(vmf, fpin);
 	error = mapping->a_ops->readpage(file, page);
 	if (!error) {
 		wait_on_page_locked(page);
 		if (!PageUptodate(page))
 			error = -EIO;
 	}
 	if (fpin)
 		goto out_retry;
 	put_page(page);
 	if (!error || error == AOP_TRUNCATED_PAGE)
@ -2622,6 +2668,18 @@ page_not_uptodate:
 	/* Things didn't work out. Return zero to tell the mm layer so. */
 	shrink_readahead_size_eio(file, ra);
 	return VM_FAULT_SIGBUS;
 out_retry:
 	/*
 	 * We dropped the mmap_sem, we need to return to the fault handler to
 	 * re-find the vma and come back and find our hopefully still populated
 	 * page.
 	 */
 	if (page)
 		put_page(page);
 	if (fpin)
 		fput(fpin);
 	return ret | VM_FAULT_RETRY;
 }
 EXPORT_SYMBOL(filemap_fault);