ext4: fix deadlock in journal_unmap_buffer()
We cannot wait for transaction commit in journal_unmap_buffer() because we hold page lock which ranks below transaction start. We solve the issue by bailing out of journal_unmap_buffer() and jbd2_journal_invalidatepage() with -EBUSY. Caller is then responsible for waiting for transaction commit to finish and try invalidation again. Since the issue can happen only for page stradding i_size, it is simple enough to manually call jbd2_journal_invalidatepage() for such page from ext4_setattr(), check the return value and wait if necessary. Signed-off-by: Jan Kara <jack@suse.cz> Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
This commit is contained in:
Родитель
4520fb3c36
Коммит
53e872681f
|
@ -2894,7 +2894,7 @@ static void ext4_invalidatepage(struct page *page, unsigned long offset)
|
||||||
block_invalidatepage(page, offset);
|
block_invalidatepage(page, offset);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ext4_journalled_invalidatepage(struct page *page,
|
static int __ext4_journalled_invalidatepage(struct page *page,
|
||||||
unsigned long offset)
|
unsigned long offset)
|
||||||
{
|
{
|
||||||
journal_t *journal = EXT4_JOURNAL(page->mapping->host);
|
journal_t *journal = EXT4_JOURNAL(page->mapping->host);
|
||||||
|
@ -2907,7 +2907,14 @@ static void ext4_journalled_invalidatepage(struct page *page,
|
||||||
if (offset == 0)
|
if (offset == 0)
|
||||||
ClearPageChecked(page);
|
ClearPageChecked(page);
|
||||||
|
|
||||||
jbd2_journal_invalidatepage(journal, page, offset);
|
return jbd2_journal_invalidatepage(journal, page, offset);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Wrapper for aops... */
|
||||||
|
static void ext4_journalled_invalidatepage(struct page *page,
|
||||||
|
unsigned long offset)
|
||||||
|
{
|
||||||
|
WARN_ON(__ext4_journalled_invalidatepage(page, offset) < 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int ext4_releasepage(struct page *page, gfp_t wait)
|
static int ext4_releasepage(struct page *page, gfp_t wait)
|
||||||
|
@ -4313,6 +4320,47 @@ int ext4_write_inode(struct inode *inode, struct writeback_control *wbc)
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* In data=journal mode ext4_journalled_invalidatepage() may fail to invalidate
|
||||||
|
* buffers that are attached to a page stradding i_size and are undergoing
|
||||||
|
* commit. In that case we have to wait for commit to finish and try again.
|
||||||
|
*/
|
||||||
|
static void ext4_wait_for_tail_page_commit(struct inode *inode)
|
||||||
|
{
|
||||||
|
struct page *page;
|
||||||
|
unsigned offset;
|
||||||
|
journal_t *journal = EXT4_SB(inode->i_sb)->s_journal;
|
||||||
|
tid_t commit_tid = 0;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
offset = inode->i_size & (PAGE_CACHE_SIZE - 1);
|
||||||
|
/*
|
||||||
|
* All buffers in the last page remain valid? Then there's nothing to
|
||||||
|
* do. We do the check mainly to optimize the common PAGE_CACHE_SIZE ==
|
||||||
|
* blocksize case
|
||||||
|
*/
|
||||||
|
if (offset > PAGE_CACHE_SIZE - (1 << inode->i_blkbits))
|
||||||
|
return;
|
||||||
|
while (1) {
|
||||||
|
page = find_lock_page(inode->i_mapping,
|
||||||
|
inode->i_size >> PAGE_CACHE_SHIFT);
|
||||||
|
if (!page)
|
||||||
|
return;
|
||||||
|
ret = __ext4_journalled_invalidatepage(page, offset);
|
||||||
|
unlock_page(page);
|
||||||
|
page_cache_release(page);
|
||||||
|
if (ret != -EBUSY)
|
||||||
|
return;
|
||||||
|
commit_tid = 0;
|
||||||
|
read_lock(&journal->j_state_lock);
|
||||||
|
if (journal->j_committing_transaction)
|
||||||
|
commit_tid = journal->j_committing_transaction->t_tid;
|
||||||
|
read_unlock(&journal->j_state_lock);
|
||||||
|
if (commit_tid)
|
||||||
|
jbd2_log_wait_commit(journal, commit_tid);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* ext4_setattr()
|
* ext4_setattr()
|
||||||
*
|
*
|
||||||
|
@ -4426,16 +4474,28 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
|
||||||
}
|
}
|
||||||
|
|
||||||
if (attr->ia_valid & ATTR_SIZE) {
|
if (attr->ia_valid & ATTR_SIZE) {
|
||||||
if (attr->ia_size != i_size_read(inode)) {
|
if (attr->ia_size != inode->i_size) {
|
||||||
truncate_setsize(inode, attr->ia_size);
|
loff_t oldsize = inode->i_size;
|
||||||
/* Inode size will be reduced, wait for dio in flight.
|
|
||||||
* Temporarily disable dioread_nolock to prevent
|
i_size_write(inode, attr->ia_size);
|
||||||
* livelock. */
|
/*
|
||||||
|
* Blocks are going to be removed from the inode. Wait
|
||||||
|
* for dio in flight. Temporarily disable
|
||||||
|
* dioread_nolock to prevent livelock.
|
||||||
|
*/
|
||||||
if (orphan) {
|
if (orphan) {
|
||||||
|
if (!ext4_should_journal_data(inode)) {
|
||||||
ext4_inode_block_unlocked_dio(inode);
|
ext4_inode_block_unlocked_dio(inode);
|
||||||
inode_dio_wait(inode);
|
inode_dio_wait(inode);
|
||||||
ext4_inode_resume_unlocked_dio(inode);
|
ext4_inode_resume_unlocked_dio(inode);
|
||||||
|
} else
|
||||||
|
ext4_wait_for_tail_page_commit(inode);
|
||||||
}
|
}
|
||||||
|
/*
|
||||||
|
* Truncate pagecache after we've waited for commit
|
||||||
|
* in data=journal mode to make pages freeable.
|
||||||
|
*/
|
||||||
|
truncate_pagecache(inode, oldsize, inode->i_size);
|
||||||
}
|
}
|
||||||
ext4_truncate(inode);
|
ext4_truncate(inode);
|
||||||
}
|
}
|
||||||
|
|
|
@ -1840,7 +1840,6 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh,
|
||||||
|
|
||||||
BUFFER_TRACE(bh, "entry");
|
BUFFER_TRACE(bh, "entry");
|
||||||
|
|
||||||
retry:
|
|
||||||
/*
|
/*
|
||||||
* It is safe to proceed here without the j_list_lock because the
|
* It is safe to proceed here without the j_list_lock because the
|
||||||
* buffers cannot be stolen by try_to_free_buffers as long as we are
|
* buffers cannot be stolen by try_to_free_buffers as long as we are
|
||||||
|
@ -1935,14 +1934,11 @@ retry:
|
||||||
* for commit and try again.
|
* for commit and try again.
|
||||||
*/
|
*/
|
||||||
if (partial_page) {
|
if (partial_page) {
|
||||||
tid_t tid = journal->j_committing_transaction->t_tid;
|
|
||||||
|
|
||||||
jbd2_journal_put_journal_head(jh);
|
jbd2_journal_put_journal_head(jh);
|
||||||
spin_unlock(&journal->j_list_lock);
|
spin_unlock(&journal->j_list_lock);
|
||||||
jbd_unlock_bh_state(bh);
|
jbd_unlock_bh_state(bh);
|
||||||
write_unlock(&journal->j_state_lock);
|
write_unlock(&journal->j_state_lock);
|
||||||
jbd2_log_wait_commit(journal, tid);
|
return -EBUSY;
|
||||||
goto retry;
|
|
||||||
}
|
}
|
||||||
/*
|
/*
|
||||||
* OK, buffer won't be reachable after truncate. We just set
|
* OK, buffer won't be reachable after truncate. We just set
|
||||||
|
@ -2003,21 +1999,23 @@ zap_buffer_unlocked:
|
||||||
* @page: page to flush
|
* @page: page to flush
|
||||||
* @offset: length of page to invalidate.
|
* @offset: length of page to invalidate.
|
||||||
*
|
*
|
||||||
* Reap page buffers containing data after offset in page.
|
* Reap page buffers containing data after offset in page. Can return -EBUSY
|
||||||
*
|
* if buffers are part of the committing transaction and the page is straddling
|
||||||
|
* i_size. Caller then has to wait for current commit and try again.
|
||||||
*/
|
*/
|
||||||
void jbd2_journal_invalidatepage(journal_t *journal,
|
int jbd2_journal_invalidatepage(journal_t *journal,
|
||||||
struct page *page,
|
struct page *page,
|
||||||
unsigned long offset)
|
unsigned long offset)
|
||||||
{
|
{
|
||||||
struct buffer_head *head, *bh, *next;
|
struct buffer_head *head, *bh, *next;
|
||||||
unsigned int curr_off = 0;
|
unsigned int curr_off = 0;
|
||||||
int may_free = 1;
|
int may_free = 1;
|
||||||
|
int ret = 0;
|
||||||
|
|
||||||
if (!PageLocked(page))
|
if (!PageLocked(page))
|
||||||
BUG();
|
BUG();
|
||||||
if (!page_has_buffers(page))
|
if (!page_has_buffers(page))
|
||||||
return;
|
return 0;
|
||||||
|
|
||||||
/* We will potentially be playing with lists other than just the
|
/* We will potentially be playing with lists other than just the
|
||||||
* data lists (especially for journaled data mode), so be
|
* data lists (especially for journaled data mode), so be
|
||||||
|
@ -2031,9 +2029,11 @@ void jbd2_journal_invalidatepage(journal_t *journal,
|
||||||
if (offset <= curr_off) {
|
if (offset <= curr_off) {
|
||||||
/* This block is wholly outside the truncation point */
|
/* This block is wholly outside the truncation point */
|
||||||
lock_buffer(bh);
|
lock_buffer(bh);
|
||||||
may_free &= journal_unmap_buffer(journal, bh,
|
ret = journal_unmap_buffer(journal, bh, offset > 0);
|
||||||
offset > 0);
|
|
||||||
unlock_buffer(bh);
|
unlock_buffer(bh);
|
||||||
|
if (ret < 0)
|
||||||
|
return ret;
|
||||||
|
may_free &= ret;
|
||||||
}
|
}
|
||||||
curr_off = next_off;
|
curr_off = next_off;
|
||||||
bh = next;
|
bh = next;
|
||||||
|
@ -2044,6 +2044,7 @@ void jbd2_journal_invalidatepage(journal_t *journal,
|
||||||
if (may_free && try_to_free_buffers(page))
|
if (may_free && try_to_free_buffers(page))
|
||||||
J_ASSERT(!page_has_buffers(page));
|
J_ASSERT(!page_has_buffers(page));
|
||||||
}
|
}
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
|
@ -1098,7 +1098,7 @@ void jbd2_journal_set_triggers(struct buffer_head *,
|
||||||
extern int jbd2_journal_dirty_metadata (handle_t *, struct buffer_head *);
|
extern int jbd2_journal_dirty_metadata (handle_t *, struct buffer_head *);
|
||||||
extern int jbd2_journal_forget (handle_t *, struct buffer_head *);
|
extern int jbd2_journal_forget (handle_t *, struct buffer_head *);
|
||||||
extern void journal_sync_buffer (struct buffer_head *);
|
extern void journal_sync_buffer (struct buffer_head *);
|
||||||
extern void jbd2_journal_invalidatepage(journal_t *,
|
extern int jbd2_journal_invalidatepage(journal_t *,
|
||||||
struct page *, unsigned long);
|
struct page *, unsigned long);
|
||||||
extern int jbd2_journal_try_to_free_buffers(journal_t *, struct page *, gfp_t);
|
extern int jbd2_journal_try_to_free_buffers(journal_t *, struct page *, gfp_t);
|
||||||
extern int jbd2_journal_stop(handle_t *);
|
extern int jbd2_journal_stop(handle_t *);
|
||||||
|
|
Загрузка…
Ссылка в новой задаче