Changes since last update:
- fix some more CONFIG_XFS_RT related build problems - fix data loss when writeback at eof races eofblocks gc and loses - invalidate page cache after fs finishes a dio write - remove dirty page state when invalidating pages so releasepage does the right thing when handed a dirty page -----BEGIN PGP SIGNATURE----- Version: GnuPG v1 iQIcBAABCgAGBQJZ5jqbAAoJEPh/dxk0SrTrtfMP/jcQ6lTDcpnQ7XEP2fg2dXjx 2+z8uI7Mjr5wo2qfIWHc8nZHZ+8KRak4U28rTlrXkeVbJ79x3Z+SzeipP76dGHXB u9MD7uacTD6BDT7R8/bux7g7KrPATVJYJiT3PRHZ5ysUT6i9KnREdbaKpgOwhMcI Ivd9ROZHx62CmZhsbfLzD+Ccy9/mGBR5OmT8nQlsuD8cEcFU5u1afaJ2/YlCjNLN c16Q8dhGXed7tjduiYCzsxDiewJMzSfcGdyk6yCwXdR3zcI3RdhXUN5FRH0R9GB2 xxG1n5Q4qgtgODGgcPUl9WG8mfhVvEcuZGioxChQrxCEcaHt1Waop0fOixLy9J3Q lUn4qjA5S+VBqa6XsKCSCkiZdDtncSedvMRQYef09q8DGAouwAtN/Z3BVM24oyWU k5888Gt4EHZK6V3lz3qPMmGFxfuPL6GeyEvIYUezpVIYsmp0sLQTeNFUW+XC7fb/ tOBNom4ARHFmSb5da7uwJvesNZBVFSpFQtxkcx1OL0rhTqlKIfPP61dLznKhqUTL 2NhaFjnznYenSEK2CsP+V3CtQrCxywdqDNnOEgTgKJbWPpsYMX63z/Cmtm0A7Qdz BAbGc+OSBLqelwsWNnNzTWPHk33SKxtIxGTe8gKbKbrzbR7mxyJxHKEwpZvWIqh+ 8eTdgJb1wgJyqtBsTSHN =UY00 -----END PGP SIGNATURE----- Merge tag 'xfs-4.14-fixes-6' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux Pull xfs fixes from Darrick Wong: - fix some more CONFIG_XFS_RT related build problems - fix data loss when writeback at eof races eofblocks gc and loses - invalidate page cache after fs finishes a dio write - remove dirty page state when invalidating pages so releasepage does the right thing when handed a dirty page * tag 'xfs-4.14-fixes-6' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux: xfs: move two more RT specific functions into CONFIG_XFS_RT xfs: trim writepage mapping to within eof fs: invalidate page cache after end_io() in dio completion xfs: cancel dirty pages on invalidation
This commit is contained in:
Коммит
73d3393ada
|
@ -265,12 +265,24 @@ static ssize_t dio_complete(struct dio *dio, ssize_t ret, unsigned int flags)
|
||||||
if (ret == 0)
|
if (ret == 0)
|
||||||
ret = transferred;
|
ret = transferred;
|
||||||
|
|
||||||
|
if (dio->end_io) {
|
||||||
|
// XXX: ki_pos??
|
||||||
|
err = dio->end_io(dio->iocb, offset, ret, dio->private);
|
||||||
|
if (err)
|
||||||
|
ret = err;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Try again to invalidate clean pages which might have been cached by
|
* Try again to invalidate clean pages which might have been cached by
|
||||||
* non-direct readahead, or faulted in by get_user_pages() if the source
|
* non-direct readahead, or faulted in by get_user_pages() if the source
|
||||||
* of the write was an mmap'ed region of the file we're writing. Either
|
* of the write was an mmap'ed region of the file we're writing. Either
|
||||||
* one is a pretty crazy thing to do, so we don't support it 100%. If
|
* one is a pretty crazy thing to do, so we don't support it 100%. If
|
||||||
* this invalidation fails, tough, the write still worked...
|
* this invalidation fails, tough, the write still worked...
|
||||||
|
*
|
||||||
|
* And this page cache invalidation has to be after dio->end_io(), as
|
||||||
|
* some filesystems convert unwritten extents to real allocations in
|
||||||
|
* end_io() when necessary, otherwise a racing buffer read would cache
|
||||||
|
* zeros from unwritten extents.
|
||||||
*/
|
*/
|
||||||
if (flags & DIO_COMPLETE_INVALIDATE &&
|
if (flags & DIO_COMPLETE_INVALIDATE &&
|
||||||
ret > 0 && dio->op == REQ_OP_WRITE &&
|
ret > 0 && dio->op == REQ_OP_WRITE &&
|
||||||
|
@ -281,14 +293,6 @@ static ssize_t dio_complete(struct dio *dio, ssize_t ret, unsigned int flags)
|
||||||
WARN_ON_ONCE(err);
|
WARN_ON_ONCE(err);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (dio->end_io) {
|
|
||||||
|
|
||||||
// XXX: ki_pos??
|
|
||||||
err = dio->end_io(dio->iocb, offset, ret, dio->private);
|
|
||||||
if (err)
|
|
||||||
ret = err;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!(dio->flags & DIO_SKIP_DIO_COUNT))
|
if (!(dio->flags & DIO_SKIP_DIO_COUNT))
|
||||||
inode_dio_end(dio->inode);
|
inode_dio_end(dio->inode);
|
||||||
|
|
||||||
|
|
41
fs/iomap.c
41
fs/iomap.c
|
@ -714,23 +714,9 @@ static ssize_t iomap_dio_complete(struct iomap_dio *dio)
|
||||||
{
|
{
|
||||||
struct kiocb *iocb = dio->iocb;
|
struct kiocb *iocb = dio->iocb;
|
||||||
struct inode *inode = file_inode(iocb->ki_filp);
|
struct inode *inode = file_inode(iocb->ki_filp);
|
||||||
|
loff_t offset = iocb->ki_pos;
|
||||||
ssize_t ret;
|
ssize_t ret;
|
||||||
|
|
||||||
/*
|
|
||||||
* Try again to invalidate clean pages which might have been cached by
|
|
||||||
* non-direct readahead, or faulted in by get_user_pages() if the source
|
|
||||||
* of the write was an mmap'ed region of the file we're writing. Either
|
|
||||||
* one is a pretty crazy thing to do, so we don't support it 100%. If
|
|
||||||
* this invalidation fails, tough, the write still worked...
|
|
||||||
*/
|
|
||||||
if (!dio->error &&
|
|
||||||
(dio->flags & IOMAP_DIO_WRITE) && inode->i_mapping->nrpages) {
|
|
||||||
ret = invalidate_inode_pages2_range(inode->i_mapping,
|
|
||||||
iocb->ki_pos >> PAGE_SHIFT,
|
|
||||||
(iocb->ki_pos + dio->size - 1) >> PAGE_SHIFT);
|
|
||||||
WARN_ON_ONCE(ret);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (dio->end_io) {
|
if (dio->end_io) {
|
||||||
ret = dio->end_io(iocb,
|
ret = dio->end_io(iocb,
|
||||||
dio->error ? dio->error : dio->size,
|
dio->error ? dio->error : dio->size,
|
||||||
|
@ -742,12 +728,33 @@ static ssize_t iomap_dio_complete(struct iomap_dio *dio)
|
||||||
if (likely(!ret)) {
|
if (likely(!ret)) {
|
||||||
ret = dio->size;
|
ret = dio->size;
|
||||||
/* check for short read */
|
/* check for short read */
|
||||||
if (iocb->ki_pos + ret > dio->i_size &&
|
if (offset + ret > dio->i_size &&
|
||||||
!(dio->flags & IOMAP_DIO_WRITE))
|
!(dio->flags & IOMAP_DIO_WRITE))
|
||||||
ret = dio->i_size - iocb->ki_pos;
|
ret = dio->i_size - offset;
|
||||||
iocb->ki_pos += ret;
|
iocb->ki_pos += ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Try again to invalidate clean pages which might have been cached by
|
||||||
|
* non-direct readahead, or faulted in by get_user_pages() if the source
|
||||||
|
* of the write was an mmap'ed region of the file we're writing. Either
|
||||||
|
* one is a pretty crazy thing to do, so we don't support it 100%. If
|
||||||
|
* this invalidation fails, tough, the write still worked...
|
||||||
|
*
|
||||||
|
* And this page cache invalidation has to be after dio->end_io(), as
|
||||||
|
* some filesystems convert unwritten extents to real allocations in
|
||||||
|
* end_io() when necessary, otherwise a racing buffer read would cache
|
||||||
|
* zeros from unwritten extents.
|
||||||
|
*/
|
||||||
|
if (!dio->error &&
|
||||||
|
(dio->flags & IOMAP_DIO_WRITE) && inode->i_mapping->nrpages) {
|
||||||
|
int err;
|
||||||
|
err = invalidate_inode_pages2_range(inode->i_mapping,
|
||||||
|
offset >> PAGE_SHIFT,
|
||||||
|
(offset + dio->size - 1) >> PAGE_SHIFT);
|
||||||
|
WARN_ON_ONCE(err);
|
||||||
|
}
|
||||||
|
|
||||||
inode_dio_end(file_inode(iocb->ki_filp));
|
inode_dio_end(file_inode(iocb->ki_filp));
|
||||||
kfree(dio);
|
kfree(dio);
|
||||||
|
|
||||||
|
|
|
@ -3852,6 +3852,17 @@ xfs_trim_extent(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* trim extent to within eof */
|
||||||
|
void
|
||||||
|
xfs_trim_extent_eof(
|
||||||
|
struct xfs_bmbt_irec *irec,
|
||||||
|
struct xfs_inode *ip)
|
||||||
|
|
||||||
|
{
|
||||||
|
xfs_trim_extent(irec, 0, XFS_B_TO_FSB(ip->i_mount,
|
||||||
|
i_size_read(VFS_I(ip))));
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Trim the returned map to the required bounds
|
* Trim the returned map to the required bounds
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -208,6 +208,7 @@ void xfs_bmap_trace_exlist(struct xfs_inode *ip, xfs_extnum_t cnt,
|
||||||
|
|
||||||
void xfs_trim_extent(struct xfs_bmbt_irec *irec, xfs_fileoff_t bno,
|
void xfs_trim_extent(struct xfs_bmbt_irec *irec, xfs_fileoff_t bno,
|
||||||
xfs_filblks_t len);
|
xfs_filblks_t len);
|
||||||
|
void xfs_trim_extent_eof(struct xfs_bmbt_irec *, struct xfs_inode *);
|
||||||
int xfs_bmap_add_attrfork(struct xfs_inode *ip, int size, int rsvd);
|
int xfs_bmap_add_attrfork(struct xfs_inode *ip, int size, int rsvd);
|
||||||
void xfs_bmap_local_to_extents_empty(struct xfs_inode *ip, int whichfork);
|
void xfs_bmap_local_to_extents_empty(struct xfs_inode *ip, int whichfork);
|
||||||
void xfs_bmap_add_free(struct xfs_mount *mp, struct xfs_defer_ops *dfops,
|
void xfs_bmap_add_free(struct xfs_mount *mp, struct xfs_defer_ops *dfops,
|
||||||
|
|
|
@ -446,6 +446,19 @@ xfs_imap_valid(
|
||||||
{
|
{
|
||||||
offset >>= inode->i_blkbits;
|
offset >>= inode->i_blkbits;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We have to make sure the cached mapping is within EOF to protect
|
||||||
|
* against eofblocks trimming on file release leaving us with a stale
|
||||||
|
* mapping. Otherwise, a page for a subsequent file extending buffered
|
||||||
|
* write could get picked up by this writeback cycle and written to the
|
||||||
|
* wrong blocks.
|
||||||
|
*
|
||||||
|
* Note that what we really want here is a generic mapping invalidation
|
||||||
|
* mechanism to protect us from arbitrary extent modifying contexts, not
|
||||||
|
* just eofblocks.
|
||||||
|
*/
|
||||||
|
xfs_trim_extent_eof(imap, XFS_I(inode));
|
||||||
|
|
||||||
return offset >= imap->br_startoff &&
|
return offset >= imap->br_startoff &&
|
||||||
offset < imap->br_startoff + imap->br_blockcount;
|
offset < imap->br_startoff + imap->br_blockcount;
|
||||||
}
|
}
|
||||||
|
@ -735,6 +748,14 @@ xfs_vm_invalidatepage(
|
||||||
{
|
{
|
||||||
trace_xfs_invalidatepage(page->mapping->host, page, offset,
|
trace_xfs_invalidatepage(page->mapping->host, page, offset,
|
||||||
length);
|
length);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If we are invalidating the entire page, clear the dirty state from it
|
||||||
|
* so that we can check for attempts to release dirty cached pages in
|
||||||
|
* xfs_vm_releasepage().
|
||||||
|
*/
|
||||||
|
if (offset == 0 && length >= PAGE_SIZE)
|
||||||
|
cancel_dirty_page(page);
|
||||||
block_invalidatepage(page, offset, length);
|
block_invalidatepage(page, offset, length);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1190,25 +1211,27 @@ xfs_vm_releasepage(
|
||||||
* mm accommodates an old ext3 case where clean pages might not have had
|
* mm accommodates an old ext3 case where clean pages might not have had
|
||||||
* the dirty bit cleared. Thus, it can send actual dirty pages to
|
* the dirty bit cleared. Thus, it can send actual dirty pages to
|
||||||
* ->releasepage() via shrink_active_list(). Conversely,
|
* ->releasepage() via shrink_active_list(). Conversely,
|
||||||
* block_invalidatepage() can send pages that are still marked dirty
|
* block_invalidatepage() can send pages that are still marked dirty but
|
||||||
* but otherwise have invalidated buffers.
|
* otherwise have invalidated buffers.
|
||||||
*
|
*
|
||||||
* We want to release the latter to avoid unnecessary buildup of the
|
* We want to release the latter to avoid unnecessary buildup of the
|
||||||
* LRU, skip the former and warn if we've left any lingering
|
* LRU, so xfs_vm_invalidatepage() clears the page dirty flag on pages
|
||||||
* delalloc/unwritten buffers on clean pages. Skip pages with delalloc
|
* that are entirely invalidated and need to be released. Hence the
|
||||||
* or unwritten buffers and warn if the page is not dirty. Otherwise
|
* only time we should get dirty pages here is through
|
||||||
* try to release the buffers.
|
* shrink_active_list() and so we can simply skip those now.
|
||||||
|
*
|
||||||
|
* warn if we've left any lingering delalloc/unwritten buffers on clean
|
||||||
|
* or invalidated pages we are about to release.
|
||||||
*/
|
*/
|
||||||
|
if (PageDirty(page))
|
||||||
|
return 0;
|
||||||
|
|
||||||
xfs_count_page_state(page, &delalloc, &unwritten);
|
xfs_count_page_state(page, &delalloc, &unwritten);
|
||||||
|
|
||||||
if (delalloc) {
|
if (WARN_ON_ONCE(delalloc))
|
||||||
WARN_ON_ONCE(!PageDirty(page));
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
if (WARN_ON_ONCE(unwritten))
|
||||||
if (unwritten) {
|
|
||||||
WARN_ON_ONCE(!PageDirty(page));
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
|
||||||
|
|
||||||
return try_to_free_buffers(page);
|
return try_to_free_buffers(page);
|
||||||
}
|
}
|
||||||
|
|
|
@ -367,29 +367,6 @@ xfs_getfsmap_datadev_helper(
|
||||||
return xfs_getfsmap_helper(cur->bc_tp, info, rec, rec_daddr);
|
return xfs_getfsmap_helper(cur->bc_tp, info, rec, rec_daddr);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Transform a rtbitmap "record" into a fsmap */
|
|
||||||
STATIC int
|
|
||||||
xfs_getfsmap_rtdev_rtbitmap_helper(
|
|
||||||
struct xfs_trans *tp,
|
|
||||||
struct xfs_rtalloc_rec *rec,
|
|
||||||
void *priv)
|
|
||||||
{
|
|
||||||
struct xfs_mount *mp = tp->t_mountp;
|
|
||||||
struct xfs_getfsmap_info *info = priv;
|
|
||||||
struct xfs_rmap_irec irec;
|
|
||||||
xfs_daddr_t rec_daddr;
|
|
||||||
|
|
||||||
rec_daddr = XFS_FSB_TO_BB(mp, rec->ar_startblock);
|
|
||||||
|
|
||||||
irec.rm_startblock = rec->ar_startblock;
|
|
||||||
irec.rm_blockcount = rec->ar_blockcount;
|
|
||||||
irec.rm_owner = XFS_RMAP_OWN_NULL; /* "free" */
|
|
||||||
irec.rm_offset = 0;
|
|
||||||
irec.rm_flags = 0;
|
|
||||||
|
|
||||||
return xfs_getfsmap_helper(tp, info, &irec, rec_daddr);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Transform a bnobt irec into a fsmap */
|
/* Transform a bnobt irec into a fsmap */
|
||||||
STATIC int
|
STATIC int
|
||||||
xfs_getfsmap_datadev_bnobt_helper(
|
xfs_getfsmap_datadev_bnobt_helper(
|
||||||
|
@ -475,6 +452,30 @@ xfs_getfsmap_logdev(
|
||||||
return xfs_getfsmap_helper(tp, info, &rmap, 0);
|
return xfs_getfsmap_helper(tp, info, &rmap, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef CONFIG_XFS_RT
|
||||||
|
/* Transform a rtbitmap "record" into a fsmap */
|
||||||
|
STATIC int
|
||||||
|
xfs_getfsmap_rtdev_rtbitmap_helper(
|
||||||
|
struct xfs_trans *tp,
|
||||||
|
struct xfs_rtalloc_rec *rec,
|
||||||
|
void *priv)
|
||||||
|
{
|
||||||
|
struct xfs_mount *mp = tp->t_mountp;
|
||||||
|
struct xfs_getfsmap_info *info = priv;
|
||||||
|
struct xfs_rmap_irec irec;
|
||||||
|
xfs_daddr_t rec_daddr;
|
||||||
|
|
||||||
|
rec_daddr = XFS_FSB_TO_BB(mp, rec->ar_startblock);
|
||||||
|
|
||||||
|
irec.rm_startblock = rec->ar_startblock;
|
||||||
|
irec.rm_blockcount = rec->ar_blockcount;
|
||||||
|
irec.rm_owner = XFS_RMAP_OWN_NULL; /* "free" */
|
||||||
|
irec.rm_offset = 0;
|
||||||
|
irec.rm_flags = 0;
|
||||||
|
|
||||||
|
return xfs_getfsmap_helper(tp, info, &irec, rec_daddr);
|
||||||
|
}
|
||||||
|
|
||||||
/* Execute a getfsmap query against the realtime device. */
|
/* Execute a getfsmap query against the realtime device. */
|
||||||
STATIC int
|
STATIC int
|
||||||
__xfs_getfsmap_rtdev(
|
__xfs_getfsmap_rtdev(
|
||||||
|
@ -521,7 +522,6 @@ __xfs_getfsmap_rtdev(
|
||||||
return query_fn(tp, info);
|
return query_fn(tp, info);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef CONFIG_XFS_RT
|
|
||||||
/* Actually query the realtime bitmap. */
|
/* Actually query the realtime bitmap. */
|
||||||
STATIC int
|
STATIC int
|
||||||
xfs_getfsmap_rtdev_rtbitmap_query(
|
xfs_getfsmap_rtdev_rtbitmap_query(
|
||||||
|
|
Загрузка…
Ссылка в новой задаче