From b71450e2cc4b3c79f33c5bd276d152af9bd54f79 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruenba@redhat.com>
Date: Thu, 5 May 2022 18:19:13 -0700
Subject: [PATCH 1/3] iomap: iomap_write_failed fix

The @lend parameter of truncate_pagecache_range() should be the offset
of the last byte of the hole, not the first byte beyond it.

Fixes: ae259a9c8593 ("fs: introduce iomap infrastructure")
Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
---
 fs/iomap/buffered-io.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index 8ce8720093b9..358ee1fb6f0d 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -531,7 +531,8 @@ iomap_write_failed(struct inode *inode, loff_t pos, unsigned len)
 	 * write started inside the existing inode size.
 	 */
 	if (pos + len > i_size)
-		truncate_pagecache_range(inode, max(pos, i_size), pos + len);
+		truncate_pagecache_range(inode, max(pos, i_size),
+					 pos + len - 1);
 }
 
 static int iomap_read_folio_sync(loff_t block_start, struct folio *folio,

From d74999c8c060dfeaf9977b91baa3c795fc183a84 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruenba@redhat.com>
Date: Thu, 5 May 2022 18:19:13 -0700
Subject: [PATCH 2/3] iomap: iomap_write_end cleanup

In iomap_write_end(), only call iomap_write_failed() on the byte range
that has failed.  This should improve code readability, but doesn't fix
an actual bug because iomap_write_failed() is called after updating the
file size here and it only affects the memory beyond the end of the
file.

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
---
 fs/iomap/buffered-io.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index 358ee1fb6f0d..8fb9b2797fc5 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -734,7 +734,7 @@ static size_t iomap_write_end(struct iomap_iter *iter, loff_t pos, size_t len,
 	folio_put(folio);
 
 	if (ret < len)
-		iomap_write_failed(iter->inode, pos, len);
+		iomap_write_failed(iter->inode, pos + ret, len - ret);
 	return ret;
 }
 

From e9c3a8e820ed0eeb2be05072f29f80d1b79f053b Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <djwong@kernel.org>
Date: Mon, 16 May 2022 15:27:38 -0700
Subject: [PATCH 3/3] iomap: don't invalidate folios after writeback errors

XFS has the unique behavior (as compared to the other Linux filesystems)
that on writeback errors it will completely invalidate the affected
folio and force the page cache to reread the contents from disk.  All
other filesystems leave the page mapped and up to date.

This is a rude awakening for user programs, since (in the case where
write fails but reread doesn't) file contents will appear to revert to
old disk contents with no notification other than an EIO on fsync.  This
might have been annoying back in the days when iomap dealt with one page
at a time, but with multipage folios, we can now throw away *megabytes*
worth of data for a single write error.

On *most* Linux filesystems, a program can respond to an EIO on write by
redirtying the entire file and scheduling it for writeback.  This isn't
foolproof, since the page that failed writeback is no longer dirty and
could be evicted, but programs that want to recover properly *also*
have to detect XFS and regenerate every write they've made to the file.

When running xfs/314 on arm64, I noticed a UAF when xfs_discard_folio
invalidates multipage folios that could be undergoing writeback.  If,
say, we have a 256K folio caching a mix of written and unwritten
extents, it's possible that we could start writeback of the first (say)
64K of the folio and then hit a writeback error on the next 64K.  We
then free the iop attached to the folio, which is really bad because
writeback completion on the first 64k will trip over the "blocks per
folio > 1 && !iop" assertion.

This can't be fixed by only invalidating the folio if writeback fails at
the start of the folio, since the folio is marked !uptodate, which trips
other assertions elsewhere.  Get rid of the whole behavior entirely.

Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
---
 fs/iomap/buffered-io.c | 1 -
 fs/xfs/xfs_aops.c      | 4 +---
 2 files changed, 1 insertion(+), 4 deletions(-)

diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index 8fb9b2797fc5..94b53cbdefad 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -1387,7 +1387,6 @@ iomap_writepage_map(struct iomap_writepage_ctx *wpc,
 		if (wpc->ops->discard_folio)
 			wpc->ops->discard_folio(folio, pos);
 		if (!count) {
-			folio_clear_uptodate(folio);
 			folio_unlock(folio);
 			goto done;
 		}
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 90b7f4d127de..f6216d0fb0c2 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -464,7 +464,7 @@ xfs_discard_folio(
 	int			error;
 
 	if (xfs_is_shutdown(mp))
-		goto out_invalidate;
+		return;
 
 	xfs_alert_ratelimited(mp,
 		"page discard on page "PTR_FMT", inode 0x%llx, pos %llu.",
@@ -474,8 +474,6 @@ xfs_discard_folio(
 			i_blocks_per_folio(inode, folio) - pageoff_fsb);
 	if (error && !xfs_is_shutdown(mp))
 		xfs_alert(mp, "page discard unable to remove delalloc mapping.");
-out_invalidate:
-	iomap_invalidate_folio(folio, offset, folio_size(folio) - offset);
 }
 
 static const struct iomap_writeback_ops xfs_writeback_ops = {