[XFS] Fix memory corruption with small buffer reads
When we have multiple buffers in a single page for a blocksize == pagesize filesystem we might overwrite the page contents if two callers hit it shortly after each other. To prevent that we need to keep the page locked until I/O is completed and the page marked uptodate. Thanks to Eric Sandeen for triaging this bug and finding a reproducible testcase and Dave Chinner for additional advice. This should fix kernel.org bz #10421. Tested-by: Eric Sandeen <sandeen@sandeen.net> SGI-PV: 981813 SGI-Modid: xfs-linux-melb:xfs-kern:31173a Signed-off-by: Christoph Hellwig <hch@infradead.org> Signed-off-by: David Chinner <dgc@sgi.com> Signed-off-by: Lachlan McIlroy <lachlan@sgi.com>
This commit is contained in:
Родитель
c8f5f12e46
Коммит
6ab455eeaf
|
@ -387,6 +387,8 @@ _xfs_buf_lookup_pages(
|
||||||
if (unlikely(page == NULL)) {
|
if (unlikely(page == NULL)) {
|
||||||
if (flags & XBF_READ_AHEAD) {
|
if (flags & XBF_READ_AHEAD) {
|
||||||
bp->b_page_count = i;
|
bp->b_page_count = i;
|
||||||
|
for (i = 0; i < bp->b_page_count; i++)
|
||||||
|
unlock_page(bp->b_pages[i]);
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -416,17 +418,24 @@ _xfs_buf_lookup_pages(
|
||||||
ASSERT(!PagePrivate(page));
|
ASSERT(!PagePrivate(page));
|
||||||
if (!PageUptodate(page)) {
|
if (!PageUptodate(page)) {
|
||||||
page_count--;
|
page_count--;
|
||||||
if (blocksize < PAGE_CACHE_SIZE && !PagePrivate(page)) {
|
if (blocksize >= PAGE_CACHE_SIZE) {
|
||||||
|
if (flags & XBF_READ)
|
||||||
|
bp->b_flags |= _XBF_PAGE_LOCKED;
|
||||||
|
} else if (!PagePrivate(page)) {
|
||||||
if (test_page_region(page, offset, nbytes))
|
if (test_page_region(page, offset, nbytes))
|
||||||
page_count++;
|
page_count++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
unlock_page(page);
|
|
||||||
bp->b_pages[i] = page;
|
bp->b_pages[i] = page;
|
||||||
offset = 0;
|
offset = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!(bp->b_flags & _XBF_PAGE_LOCKED)) {
|
||||||
|
for (i = 0; i < bp->b_page_count; i++)
|
||||||
|
unlock_page(bp->b_pages[i]);
|
||||||
|
}
|
||||||
|
|
||||||
if (page_count == bp->b_page_count)
|
if (page_count == bp->b_page_count)
|
||||||
bp->b_flags |= XBF_DONE;
|
bp->b_flags |= XBF_DONE;
|
||||||
|
|
||||||
|
@ -746,6 +755,7 @@ xfs_buf_associate_memory(
|
||||||
bp->b_count_desired = len;
|
bp->b_count_desired = len;
|
||||||
bp->b_buffer_length = buflen;
|
bp->b_buffer_length = buflen;
|
||||||
bp->b_flags |= XBF_MAPPED;
|
bp->b_flags |= XBF_MAPPED;
|
||||||
|
bp->b_flags &= ~_XBF_PAGE_LOCKED;
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -1093,8 +1103,10 @@ _xfs_buf_ioend(
|
||||||
xfs_buf_t *bp,
|
xfs_buf_t *bp,
|
||||||
int schedule)
|
int schedule)
|
||||||
{
|
{
|
||||||
if (atomic_dec_and_test(&bp->b_io_remaining) == 1)
|
if (atomic_dec_and_test(&bp->b_io_remaining) == 1) {
|
||||||
|
bp->b_flags &= ~_XBF_PAGE_LOCKED;
|
||||||
xfs_buf_ioend(bp, schedule);
|
xfs_buf_ioend(bp, schedule);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
STATIC void
|
STATIC void
|
||||||
|
@ -1125,6 +1137,9 @@ xfs_buf_bio_end_io(
|
||||||
|
|
||||||
if (--bvec >= bio->bi_io_vec)
|
if (--bvec >= bio->bi_io_vec)
|
||||||
prefetchw(&bvec->bv_page->flags);
|
prefetchw(&bvec->bv_page->flags);
|
||||||
|
|
||||||
|
if (bp->b_flags & _XBF_PAGE_LOCKED)
|
||||||
|
unlock_page(page);
|
||||||
} while (bvec >= bio->bi_io_vec);
|
} while (bvec >= bio->bi_io_vec);
|
||||||
|
|
||||||
_xfs_buf_ioend(bp, 1);
|
_xfs_buf_ioend(bp, 1);
|
||||||
|
@ -1163,7 +1178,8 @@ _xfs_buf_ioapply(
|
||||||
* filesystem block size is not smaller than the page size.
|
* filesystem block size is not smaller than the page size.
|
||||||
*/
|
*/
|
||||||
if ((bp->b_buffer_length < PAGE_CACHE_SIZE) &&
|
if ((bp->b_buffer_length < PAGE_CACHE_SIZE) &&
|
||||||
(bp->b_flags & XBF_READ) &&
|
((bp->b_flags & (XBF_READ|_XBF_PAGE_LOCKED)) ==
|
||||||
|
(XBF_READ|_XBF_PAGE_LOCKED)) &&
|
||||||
(blocksize >= PAGE_CACHE_SIZE)) {
|
(blocksize >= PAGE_CACHE_SIZE)) {
|
||||||
bio = bio_alloc(GFP_NOIO, 1);
|
bio = bio_alloc(GFP_NOIO, 1);
|
||||||
|
|
||||||
|
|
|
@ -66,6 +66,25 @@ typedef enum {
|
||||||
_XBF_PAGES = (1 << 18), /* backed by refcounted pages */
|
_XBF_PAGES = (1 << 18), /* backed by refcounted pages */
|
||||||
_XBF_RUN_QUEUES = (1 << 19),/* run block device task queue */
|
_XBF_RUN_QUEUES = (1 << 19),/* run block device task queue */
|
||||||
_XBF_DELWRI_Q = (1 << 21), /* buffer on delwri queue */
|
_XBF_DELWRI_Q = (1 << 21), /* buffer on delwri queue */
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Special flag for supporting metadata blocks smaller than a FSB.
|
||||||
|
*
|
||||||
|
* In this case we can have multiple xfs_buf_t on a single page and
|
||||||
|
* need to lock out concurrent xfs_buf_t readers as they only
|
||||||
|
* serialise access to the buffer.
|
||||||
|
*
|
||||||
|
* If the FSB size >= PAGE_CACHE_SIZE case, we have no serialisation
|
||||||
|
* between reads of the page. Hence we can have one thread read the
|
||||||
|
* page and modify it, but then race with another thread that thinks
|
||||||
|
* the page is not up-to-date and hence reads it again.
|
||||||
|
*
|
||||||
|
* The result is that the first modifcation to the page is lost.
|
||||||
|
* This sort of AGF/AGI reading race can happen when unlinking inodes
|
||||||
|
* that require truncation and results in the AGI unlinked list
|
||||||
|
* modifications being lost.
|
||||||
|
*/
|
||||||
|
_XBF_PAGE_LOCKED = (1 << 22),
|
||||||
} xfs_buf_flags_t;
|
} xfs_buf_flags_t;
|
||||||
|
|
||||||
typedef enum {
|
typedef enum {
|
||||||
|
|
Загрузка…
Ссылка в новой задаче