Bug and regression fixes for 6.5-rc3 for ext4's mballoc and jbd2's
checkpoint code. -----BEGIN PGP SIGNATURE----- iQEzBAABCAAdFiEEK2m5VNv+CHkogTfJ8vlZVpUNgaMFAmS9HHIACgkQ8vlZVpUN gaNg/wf8DAf9PMn2bmQ309Acs5E8Qi1bga/ofNnoBcDNC7k+iKGAGgwOnCn+ity4 32KiA5Yh7tzoYvZHUTE5k297mN+4AX4DyAREh1cVITohRxm3BpXYZzezdLSieS8b 7RAdOinaWzs0dBjwNqkKVrTL3jduD704DnefrtHFvwqzBf/QSVSaACoPACqCFyxx TFvutv2h5ifjS7fsjKXrXjHUAYMJCYzJNOcTW1OUb8rknUhCaKyoCkFht4PawuVx h1wTkP87RW/bTgHA7Kqrq4BY2nLg8U0B3U/4qmW7wMjUYyAPLTUXKD3Ewj7XaFMA UYRBr7xba8GWqyOURb3TvzrwRjqTJg== =inak -----END PGP SIGNATURE----- Merge tag 'ext4_for_linus-6.5-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4 Pull ext4 fixes from Ted Ts'o: "Bug and regression fixes for 6.5-rc3 for ext4's mballoc and jbd2's checkpoint code" * tag 'ext4_for_linus-6.5-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: ext4: fix rbtree traversal bug in ext4_mb_use_preallocated ext4: fix off by one issue in ext4_mb_choose_next_group_best_avail() ext4: correct inline offset when handling xattrs in inode body jbd2: remove __journal_try_to_free_buffer() jbd2: fix a race when checking checkpoint buffer busy jbd2: Fix wrongly judgement for buffer head removing while doing checkpoint jbd2: remove journal_clean_one_cp_list() jbd2: remove t_checkpoint_io_list jbd2: recheck chechpointing non-dirty buffer
This commit is contained in:
Коммит
15b593ba68
|
@ -1006,14 +1006,11 @@ static void ext4_mb_choose_next_group_best_avail(struct ext4_allocation_context
|
|||
* fls() instead since we need to know the actual length while modifying
|
||||
* goal length.
|
||||
*/
|
||||
order = fls(ac->ac_g_ex.fe_len);
|
||||
order = fls(ac->ac_g_ex.fe_len) - 1;
|
||||
min_order = order - sbi->s_mb_best_avail_max_trim_order;
|
||||
if (min_order < 0)
|
||||
min_order = 0;
|
||||
|
||||
if (1 << min_order < ac->ac_o_ex.fe_len)
|
||||
min_order = fls(ac->ac_o_ex.fe_len) + 1;
|
||||
|
||||
if (sbi->s_stripe > 0) {
|
||||
/*
|
||||
* We are assuming that stripe size is always a multiple of
|
||||
|
@ -1021,9 +1018,16 @@ static void ext4_mb_choose_next_group_best_avail(struct ext4_allocation_context
|
|||
*/
|
||||
num_stripe_clusters = EXT4_NUM_B2C(sbi, sbi->s_stripe);
|
||||
if (1 << min_order < num_stripe_clusters)
|
||||
min_order = fls(num_stripe_clusters);
|
||||
/*
|
||||
* We consider 1 order less because later we round
|
||||
* up the goal len to num_stripe_clusters
|
||||
*/
|
||||
min_order = fls(num_stripe_clusters) - 1;
|
||||
}
|
||||
|
||||
if (1 << min_order < ac->ac_o_ex.fe_len)
|
||||
min_order = fls(ac->ac_o_ex.fe_len);
|
||||
|
||||
for (i = order; i >= min_order; i--) {
|
||||
int frag_order;
|
||||
/*
|
||||
|
@ -4761,8 +4765,8 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
|
|||
int order, i;
|
||||
struct ext4_inode_info *ei = EXT4_I(ac->ac_inode);
|
||||
struct ext4_locality_group *lg;
|
||||
struct ext4_prealloc_space *tmp_pa, *cpa = NULL;
|
||||
ext4_lblk_t tmp_pa_start, tmp_pa_end;
|
||||
struct ext4_prealloc_space *tmp_pa = NULL, *cpa = NULL;
|
||||
loff_t tmp_pa_end;
|
||||
struct rb_node *iter;
|
||||
ext4_fsblk_t goal_block;
|
||||
|
||||
|
@ -4770,47 +4774,151 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
|
|||
if (!(ac->ac_flags & EXT4_MB_HINT_DATA))
|
||||
return false;
|
||||
|
||||
/* first, try per-file preallocation */
|
||||
/*
|
||||
* first, try per-file preallocation by searching the inode pa rbtree.
|
||||
*
|
||||
* Here, we can't do a direct traversal of the tree because
|
||||
* ext4_mb_discard_group_preallocation() can paralelly mark the pa
|
||||
* deleted and that can cause direct traversal to skip some entries.
|
||||
*/
|
||||
read_lock(&ei->i_prealloc_lock);
|
||||
|
||||
if (RB_EMPTY_ROOT(&ei->i_prealloc_node)) {
|
||||
goto try_group_pa;
|
||||
}
|
||||
|
||||
/*
|
||||
* Step 1: Find a pa with logical start immediately adjacent to the
|
||||
* original logical start. This could be on the left or right.
|
||||
*
|
||||
* (tmp_pa->pa_lstart never changes so we can skip locking for it).
|
||||
*/
|
||||
for (iter = ei->i_prealloc_node.rb_node; iter;
|
||||
iter = ext4_mb_pa_rb_next_iter(ac->ac_o_ex.fe_logical,
|
||||
tmp_pa_start, iter)) {
|
||||
tmp_pa->pa_lstart, iter)) {
|
||||
tmp_pa = rb_entry(iter, struct ext4_prealloc_space,
|
||||
pa_node.inode_node);
|
||||
}
|
||||
|
||||
/* all fields in this condition don't change,
|
||||
* so we can skip locking for them */
|
||||
tmp_pa_start = tmp_pa->pa_lstart;
|
||||
tmp_pa_end = tmp_pa->pa_lstart + EXT4_C2B(sbi, tmp_pa->pa_len);
|
||||
/*
|
||||
* Step 2: The adjacent pa might be to the right of logical start, find
|
||||
* the left adjacent pa. After this step we'd have a valid tmp_pa whose
|
||||
* logical start is towards the left of original request's logical start
|
||||
*/
|
||||
if (tmp_pa->pa_lstart > ac->ac_o_ex.fe_logical) {
|
||||
struct rb_node *tmp;
|
||||
tmp = rb_prev(&tmp_pa->pa_node.inode_node);
|
||||
|
||||
/* original request start doesn't lie in this PA */
|
||||
if (ac->ac_o_ex.fe_logical < tmp_pa_start ||
|
||||
ac->ac_o_ex.fe_logical >= tmp_pa_end)
|
||||
continue;
|
||||
|
||||
/* non-extent files can't have physical blocks past 2^32 */
|
||||
if (!(ext4_test_inode_flag(ac->ac_inode, EXT4_INODE_EXTENTS)) &&
|
||||
(tmp_pa->pa_pstart + EXT4_C2B(sbi, tmp_pa->pa_len) >
|
||||
EXT4_MAX_BLOCK_FILE_PHYS)) {
|
||||
if (tmp) {
|
||||
tmp_pa = rb_entry(tmp, struct ext4_prealloc_space,
|
||||
pa_node.inode_node);
|
||||
} else {
|
||||
/*
|
||||
* Since PAs don't overlap, we won't find any
|
||||
* other PA to satisfy this.
|
||||
* If there is no adjacent pa to the left then finding
|
||||
* an overlapping pa is not possible hence stop searching
|
||||
* inode pa tree
|
||||
*/
|
||||
goto try_group_pa;
|
||||
}
|
||||
}
|
||||
|
||||
BUG_ON(!(tmp_pa && tmp_pa->pa_lstart <= ac->ac_o_ex.fe_logical));
|
||||
|
||||
/*
|
||||
* Step 3: If the left adjacent pa is deleted, keep moving left to find
|
||||
* the first non deleted adjacent pa. After this step we should have a
|
||||
* valid tmp_pa which is guaranteed to be non deleted.
|
||||
*/
|
||||
for (iter = &tmp_pa->pa_node.inode_node;; iter = rb_prev(iter)) {
|
||||
if (!iter) {
|
||||
/*
|
||||
* no non deleted left adjacent pa, so stop searching
|
||||
* inode pa tree
|
||||
*/
|
||||
goto try_group_pa;
|
||||
}
|
||||
tmp_pa = rb_entry(iter, struct ext4_prealloc_space,
|
||||
pa_node.inode_node);
|
||||
spin_lock(&tmp_pa->pa_lock);
|
||||
if (tmp_pa->pa_deleted == 0) {
|
||||
/*
|
||||
* We will keep holding the pa_lock from
|
||||
* this point on because we don't want group discard
|
||||
* to delete this pa underneath us. Since group
|
||||
* discard is anyways an ENOSPC operation it
|
||||
* should be okay for it to wait a few more cycles.
|
||||
*/
|
||||
break;
|
||||
}
|
||||
|
||||
/* found preallocated blocks, use them */
|
||||
spin_lock(&tmp_pa->pa_lock);
|
||||
if (tmp_pa->pa_deleted == 0 && tmp_pa->pa_free &&
|
||||
likely(ext4_mb_pa_goal_check(ac, tmp_pa))) {
|
||||
atomic_inc(&tmp_pa->pa_count);
|
||||
ext4_mb_use_inode_pa(ac, tmp_pa);
|
||||
} else {
|
||||
spin_unlock(&tmp_pa->pa_lock);
|
||||
read_unlock(&ei->i_prealloc_lock);
|
||||
return true;
|
||||
}
|
||||
spin_unlock(&tmp_pa->pa_lock);
|
||||
}
|
||||
|
||||
BUG_ON(!(tmp_pa && tmp_pa->pa_lstart <= ac->ac_o_ex.fe_logical));
|
||||
BUG_ON(tmp_pa->pa_deleted == 1);
|
||||
|
||||
/*
|
||||
* Step 4: We now have the non deleted left adjacent pa. Only this
|
||||
* pa can possibly satisfy the request hence check if it overlaps
|
||||
* original logical start and stop searching if it doesn't.
|
||||
*/
|
||||
tmp_pa_end = (loff_t)tmp_pa->pa_lstart + EXT4_C2B(sbi, tmp_pa->pa_len);
|
||||
|
||||
if (ac->ac_o_ex.fe_logical >= tmp_pa_end) {
|
||||
spin_unlock(&tmp_pa->pa_lock);
|
||||
goto try_group_pa;
|
||||
}
|
||||
|
||||
/* non-extent files can't have physical blocks past 2^32 */
|
||||
if (!(ext4_test_inode_flag(ac->ac_inode, EXT4_INODE_EXTENTS)) &&
|
||||
(tmp_pa->pa_pstart + EXT4_C2B(sbi, tmp_pa->pa_len) >
|
||||
EXT4_MAX_BLOCK_FILE_PHYS)) {
|
||||
/*
|
||||
* Since PAs don't overlap, we won't find any other PA to
|
||||
* satisfy this.
|
||||
*/
|
||||
spin_unlock(&tmp_pa->pa_lock);
|
||||
goto try_group_pa;
|
||||
}
|
||||
|
||||
if (tmp_pa->pa_free && likely(ext4_mb_pa_goal_check(ac, tmp_pa))) {
|
||||
atomic_inc(&tmp_pa->pa_count);
|
||||
ext4_mb_use_inode_pa(ac, tmp_pa);
|
||||
spin_unlock(&tmp_pa->pa_lock);
|
||||
read_unlock(&ei->i_prealloc_lock);
|
||||
return true;
|
||||
} else {
|
||||
/*
|
||||
* We found a valid overlapping pa but couldn't use it because
|
||||
* it had no free blocks. This should ideally never happen
|
||||
* because:
|
||||
*
|
||||
* 1. When a new inode pa is added to rbtree it must have
|
||||
* pa_free > 0 since otherwise we won't actually need
|
||||
* preallocation.
|
||||
*
|
||||
* 2. An inode pa that is in the rbtree can only have it's
|
||||
* pa_free become zero when another thread calls:
|
||||
* ext4_mb_new_blocks
|
||||
* ext4_mb_use_preallocated
|
||||
* ext4_mb_use_inode_pa
|
||||
*
|
||||
* 3. Further, after the above calls make pa_free == 0, we will
|
||||
* immediately remove it from the rbtree in:
|
||||
* ext4_mb_new_blocks
|
||||
* ext4_mb_release_context
|
||||
* ext4_mb_put_pa
|
||||
*
|
||||
* 4. Since the pa_free becoming 0 and pa_free getting removed
|
||||
* from tree both happen in ext4_mb_new_blocks, which is always
|
||||
* called with i_data_sem held for data allocations, we can be
|
||||
* sure that another process will never see a pa in rbtree with
|
||||
* pa_free == 0.
|
||||
*/
|
||||
WARN_ON_ONCE(tmp_pa->pa_free == 0);
|
||||
}
|
||||
spin_unlock(&tmp_pa->pa_lock);
|
||||
try_group_pa:
|
||||
read_unlock(&ei->i_prealloc_lock);
|
||||
|
||||
/* can we use group allocation? */
|
||||
|
|
|
@ -1782,6 +1782,20 @@ static int ext4_xattr_set_entry(struct ext4_xattr_info *i,
|
|||
memmove(here, (void *)here + size,
|
||||
(void *)last - (void *)here + sizeof(__u32));
|
||||
memset(last, 0, size);
|
||||
|
||||
/*
|
||||
* Update i_inline_off - moved ibody region might contain
|
||||
* system.data attribute. Handling a failure here won't
|
||||
* cause other complications for setting an xattr.
|
||||
*/
|
||||
if (!is_block && ext4_has_inline_data(inode)) {
|
||||
ret = ext4_find_inline_data_nolock(inode);
|
||||
if (ret) {
|
||||
ext4_warning_inode(inode,
|
||||
"unable to update i_inline_off");
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
} else if (s->not_found) {
|
||||
/* Insert new name. */
|
||||
size_t size = EXT4_XATTR_LEN(name_len);
|
||||
|
|
|
@ -27,7 +27,7 @@
|
|||
*
|
||||
* Called with j_list_lock held.
|
||||
*/
|
||||
static inline void __buffer_unlink_first(struct journal_head *jh)
|
||||
static inline void __buffer_unlink(struct journal_head *jh)
|
||||
{
|
||||
transaction_t *transaction = jh->b_cp_transaction;
|
||||
|
||||
|
@ -40,45 +40,6 @@ static inline void __buffer_unlink_first(struct journal_head *jh)
|
|||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Unlink a buffer from a transaction checkpoint(io) list.
|
||||
*
|
||||
* Called with j_list_lock held.
|
||||
*/
|
||||
static inline void __buffer_unlink(struct journal_head *jh)
|
||||
{
|
||||
transaction_t *transaction = jh->b_cp_transaction;
|
||||
|
||||
__buffer_unlink_first(jh);
|
||||
if (transaction->t_checkpoint_io_list == jh) {
|
||||
transaction->t_checkpoint_io_list = jh->b_cpnext;
|
||||
if (transaction->t_checkpoint_io_list == jh)
|
||||
transaction->t_checkpoint_io_list = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Move a buffer from the checkpoint list to the checkpoint io list
|
||||
*
|
||||
* Called with j_list_lock held
|
||||
*/
|
||||
static inline void __buffer_relink_io(struct journal_head *jh)
|
||||
{
|
||||
transaction_t *transaction = jh->b_cp_transaction;
|
||||
|
||||
__buffer_unlink_first(jh);
|
||||
|
||||
if (!transaction->t_checkpoint_io_list) {
|
||||
jh->b_cpnext = jh->b_cpprev = jh;
|
||||
} else {
|
||||
jh->b_cpnext = transaction->t_checkpoint_io_list;
|
||||
jh->b_cpprev = transaction->t_checkpoint_io_list->b_cpprev;
|
||||
jh->b_cpprev->b_cpnext = jh;
|
||||
jh->b_cpnext->b_cpprev = jh;
|
||||
}
|
||||
transaction->t_checkpoint_io_list = jh;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check a checkpoint buffer could be release or not.
|
||||
*
|
||||
|
@ -183,6 +144,7 @@ __flush_batch(journal_t *journal, int *batch_count)
|
|||
struct buffer_head *bh = journal->j_chkpt_bhs[i];
|
||||
BUFFER_TRACE(bh, "brelse");
|
||||
__brelse(bh);
|
||||
journal->j_chkpt_bhs[i] = NULL;
|
||||
}
|
||||
*batch_count = 0;
|
||||
}
|
||||
|
@ -242,15 +204,6 @@ restart:
|
|||
jh = transaction->t_checkpoint_list;
|
||||
bh = jh2bh(jh);
|
||||
|
||||
if (buffer_locked(bh)) {
|
||||
get_bh(bh);
|
||||
spin_unlock(&journal->j_list_lock);
|
||||
wait_on_buffer(bh);
|
||||
/* the journal_head may have gone by now */
|
||||
BUFFER_TRACE(bh, "brelse");
|
||||
__brelse(bh);
|
||||
goto retry;
|
||||
}
|
||||
if (jh->b_transaction != NULL) {
|
||||
transaction_t *t = jh->b_transaction;
|
||||
tid_t tid = t->t_tid;
|
||||
|
@ -285,30 +238,50 @@ restart:
|
|||
spin_lock(&journal->j_list_lock);
|
||||
goto restart;
|
||||
}
|
||||
if (!buffer_dirty(bh)) {
|
||||
if (!trylock_buffer(bh)) {
|
||||
/*
|
||||
* The buffer is locked, it may be writing back, or
|
||||
* flushing out in the last couple of cycles, or
|
||||
* re-adding into a new transaction, need to check
|
||||
* it again until it's unlocked.
|
||||
*/
|
||||
get_bh(bh);
|
||||
spin_unlock(&journal->j_list_lock);
|
||||
wait_on_buffer(bh);
|
||||
/* the journal_head may have gone by now */
|
||||
BUFFER_TRACE(bh, "brelse");
|
||||
__brelse(bh);
|
||||
goto retry;
|
||||
} else if (!buffer_dirty(bh)) {
|
||||
unlock_buffer(bh);
|
||||
BUFFER_TRACE(bh, "remove from checkpoint");
|
||||
if (__jbd2_journal_remove_checkpoint(jh))
|
||||
/* The transaction was released; we're done */
|
||||
/*
|
||||
* If the transaction was released or the checkpoint
|
||||
* list was empty, we're done.
|
||||
*/
|
||||
if (__jbd2_journal_remove_checkpoint(jh) ||
|
||||
!transaction->t_checkpoint_list)
|
||||
goto out;
|
||||
continue;
|
||||
} else {
|
||||
unlock_buffer(bh);
|
||||
/*
|
||||
* We are about to write the buffer, it could be
|
||||
* raced by some other transaction shrink or buffer
|
||||
* re-log logic once we release the j_list_lock,
|
||||
* leave it on the checkpoint list and check status
|
||||
* again to make sure it's clean.
|
||||
*/
|
||||
BUFFER_TRACE(bh, "queue");
|
||||
get_bh(bh);
|
||||
J_ASSERT_BH(bh, !buffer_jwrite(bh));
|
||||
journal->j_chkpt_bhs[batch_count++] = bh;
|
||||
transaction->t_chp_stats.cs_written++;
|
||||
transaction->t_checkpoint_list = jh->b_cpnext;
|
||||
}
|
||||
/*
|
||||
* Important: we are about to write the buffer, and
|
||||
* possibly block, while still holding the journal
|
||||
* lock. We cannot afford to let the transaction
|
||||
* logic start messing around with this buffer before
|
||||
* we write it to disk, as that would break
|
||||
* recoverability.
|
||||
*/
|
||||
BUFFER_TRACE(bh, "queue");
|
||||
get_bh(bh);
|
||||
J_ASSERT_BH(bh, !buffer_jwrite(bh));
|
||||
journal->j_chkpt_bhs[batch_count++] = bh;
|
||||
__buffer_relink_io(jh);
|
||||
transaction->t_chp_stats.cs_written++;
|
||||
|
||||
if ((batch_count == JBD2_NR_BATCH) ||
|
||||
need_resched() ||
|
||||
spin_needbreak(&journal->j_list_lock))
|
||||
need_resched() || spin_needbreak(&journal->j_list_lock) ||
|
||||
jh2bh(transaction->t_checkpoint_list) == journal->j_chkpt_bhs[0])
|
||||
goto unlock_and_flush;
|
||||
}
|
||||
|
||||
|
@ -322,38 +295,6 @@ restart:
|
|||
goto restart;
|
||||
}
|
||||
|
||||
/*
|
||||
* Now we issued all of the transaction's buffers, let's deal
|
||||
* with the buffers that are out for I/O.
|
||||
*/
|
||||
restart2:
|
||||
/* Did somebody clean up the transaction in the meanwhile? */
|
||||
if (journal->j_checkpoint_transactions != transaction ||
|
||||
transaction->t_tid != this_tid)
|
||||
goto out;
|
||||
|
||||
while (transaction->t_checkpoint_io_list) {
|
||||
jh = transaction->t_checkpoint_io_list;
|
||||
bh = jh2bh(jh);
|
||||
if (buffer_locked(bh)) {
|
||||
get_bh(bh);
|
||||
spin_unlock(&journal->j_list_lock);
|
||||
wait_on_buffer(bh);
|
||||
/* the journal_head may have gone by now */
|
||||
BUFFER_TRACE(bh, "brelse");
|
||||
__brelse(bh);
|
||||
spin_lock(&journal->j_list_lock);
|
||||
goto restart2;
|
||||
}
|
||||
|
||||
/*
|
||||
* Now in whatever state the buffer currently is, we
|
||||
* know that it has been written out and so we can
|
||||
* drop it from the list
|
||||
*/
|
||||
if (__jbd2_journal_remove_checkpoint(jh))
|
||||
break;
|
||||
}
|
||||
out:
|
||||
spin_unlock(&journal->j_list_lock);
|
||||
result = jbd2_cleanup_journal_tail(journal);
|
||||
|
@ -409,19 +350,24 @@ int jbd2_cleanup_journal_tail(journal_t *journal)
|
|||
/* Checkpoint list management */
|
||||
|
||||
/*
|
||||
* journal_clean_one_cp_list
|
||||
* journal_shrink_one_cp_list
|
||||
*
|
||||
* Find all the written-back checkpoint buffers in the given list and
|
||||
* release them. If 'destroy' is set, clean all buffers unconditionally.
|
||||
* Find all the written-back checkpoint buffers in the given list
|
||||
* and try to release them. If the whole transaction is released, set
|
||||
* the 'released' parameter. Return the number of released checkpointed
|
||||
* buffers.
|
||||
*
|
||||
* Called with j_list_lock held.
|
||||
* Returns 1 if we freed the transaction, 0 otherwise.
|
||||
*/
|
||||
static int journal_clean_one_cp_list(struct journal_head *jh, bool destroy)
|
||||
static unsigned long journal_shrink_one_cp_list(struct journal_head *jh,
|
||||
bool destroy, bool *released)
|
||||
{
|
||||
struct journal_head *last_jh;
|
||||
struct journal_head *next_jh = jh;
|
||||
unsigned long nr_freed = 0;
|
||||
int ret;
|
||||
|
||||
*released = false;
|
||||
if (!jh)
|
||||
return 0;
|
||||
|
||||
|
@ -430,57 +376,15 @@ static int journal_clean_one_cp_list(struct journal_head *jh, bool destroy)
|
|||
jh = next_jh;
|
||||
next_jh = jh->b_cpnext;
|
||||
|
||||
if (!destroy && __cp_buffer_busy(jh))
|
||||
return 0;
|
||||
|
||||
if (__jbd2_journal_remove_checkpoint(jh))
|
||||
return 1;
|
||||
/*
|
||||
* This function only frees up some memory
|
||||
* if possible so we dont have an obligation
|
||||
* to finish processing. Bail out if preemption
|
||||
* requested:
|
||||
*/
|
||||
if (need_resched())
|
||||
return 0;
|
||||
} while (jh != last_jh);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* journal_shrink_one_cp_list
|
||||
*
|
||||
* Find 'nr_to_scan' written-back checkpoint buffers in the given list
|
||||
* and try to release them. If the whole transaction is released, set
|
||||
* the 'released' parameter. Return the number of released checkpointed
|
||||
* buffers.
|
||||
*
|
||||
* Called with j_list_lock held.
|
||||
*/
|
||||
static unsigned long journal_shrink_one_cp_list(struct journal_head *jh,
|
||||
unsigned long *nr_to_scan,
|
||||
bool *released)
|
||||
{
|
||||
struct journal_head *last_jh;
|
||||
struct journal_head *next_jh = jh;
|
||||
unsigned long nr_freed = 0;
|
||||
int ret;
|
||||
|
||||
if (!jh || *nr_to_scan == 0)
|
||||
return 0;
|
||||
|
||||
last_jh = jh->b_cpprev;
|
||||
do {
|
||||
jh = next_jh;
|
||||
next_jh = jh->b_cpnext;
|
||||
|
||||
(*nr_to_scan)--;
|
||||
if (__cp_buffer_busy(jh))
|
||||
continue;
|
||||
if (destroy) {
|
||||
ret = __jbd2_journal_remove_checkpoint(jh);
|
||||
} else {
|
||||
ret = jbd2_journal_try_remove_checkpoint(jh);
|
||||
if (ret < 0)
|
||||
continue;
|
||||
}
|
||||
|
||||
nr_freed++;
|
||||
ret = __jbd2_journal_remove_checkpoint(jh);
|
||||
if (ret) {
|
||||
*released = true;
|
||||
break;
|
||||
|
@ -488,7 +392,7 @@ static unsigned long journal_shrink_one_cp_list(struct journal_head *jh,
|
|||
|
||||
if (need_resched())
|
||||
break;
|
||||
} while (jh != last_jh && *nr_to_scan);
|
||||
} while (jh != last_jh);
|
||||
|
||||
return nr_freed;
|
||||
}
|
||||
|
@ -506,11 +410,11 @@ unsigned long jbd2_journal_shrink_checkpoint_list(journal_t *journal,
|
|||
unsigned long *nr_to_scan)
|
||||
{
|
||||
transaction_t *transaction, *last_transaction, *next_transaction;
|
||||
bool released;
|
||||
bool __maybe_unused released;
|
||||
tid_t first_tid = 0, last_tid = 0, next_tid = 0;
|
||||
tid_t tid = 0;
|
||||
unsigned long nr_freed = 0;
|
||||
unsigned long nr_scanned = *nr_to_scan;
|
||||
unsigned long freed;
|
||||
|
||||
again:
|
||||
spin_lock(&journal->j_list_lock);
|
||||
|
@ -539,19 +443,11 @@ again:
|
|||
transaction = next_transaction;
|
||||
next_transaction = transaction->t_cpnext;
|
||||
tid = transaction->t_tid;
|
||||
released = false;
|
||||
|
||||
nr_freed += journal_shrink_one_cp_list(transaction->t_checkpoint_list,
|
||||
nr_to_scan, &released);
|
||||
if (*nr_to_scan == 0)
|
||||
break;
|
||||
if (need_resched() || spin_needbreak(&journal->j_list_lock))
|
||||
break;
|
||||
if (released)
|
||||
continue;
|
||||
|
||||
nr_freed += journal_shrink_one_cp_list(transaction->t_checkpoint_io_list,
|
||||
nr_to_scan, &released);
|
||||
freed = journal_shrink_one_cp_list(transaction->t_checkpoint_list,
|
||||
false, &released);
|
||||
nr_freed += freed;
|
||||
(*nr_to_scan) -= min(*nr_to_scan, freed);
|
||||
if (*nr_to_scan == 0)
|
||||
break;
|
||||
if (need_resched() || spin_needbreak(&journal->j_list_lock))
|
||||
|
@ -572,9 +468,8 @@ again:
|
|||
if (*nr_to_scan && next_tid)
|
||||
goto again;
|
||||
out:
|
||||
nr_scanned -= *nr_to_scan;
|
||||
trace_jbd2_shrink_checkpoint_list(journal, first_tid, tid, last_tid,
|
||||
nr_freed, nr_scanned, next_tid);
|
||||
nr_freed, next_tid);
|
||||
|
||||
return nr_freed;
|
||||
}
|
||||
|
@ -590,7 +485,7 @@ out:
|
|||
void __jbd2_journal_clean_checkpoint_list(journal_t *journal, bool destroy)
|
||||
{
|
||||
transaction_t *transaction, *last_transaction, *next_transaction;
|
||||
int ret;
|
||||
bool released;
|
||||
|
||||
transaction = journal->j_checkpoint_transactions;
|
||||
if (!transaction)
|
||||
|
@ -601,8 +496,8 @@ void __jbd2_journal_clean_checkpoint_list(journal_t *journal, bool destroy)
|
|||
do {
|
||||
transaction = next_transaction;
|
||||
next_transaction = transaction->t_cpnext;
|
||||
ret = journal_clean_one_cp_list(transaction->t_checkpoint_list,
|
||||
destroy);
|
||||
journal_shrink_one_cp_list(transaction->t_checkpoint_list,
|
||||
destroy, &released);
|
||||
/*
|
||||
* This function only frees up some memory if possible so we
|
||||
* dont have an obligation to finish processing. Bail out if
|
||||
|
@ -610,23 +505,12 @@ void __jbd2_journal_clean_checkpoint_list(journal_t *journal, bool destroy)
|
|||
*/
|
||||
if (need_resched())
|
||||
return;
|
||||
if (ret)
|
||||
continue;
|
||||
/*
|
||||
* It is essential that we are as careful as in the case of
|
||||
* t_checkpoint_list with removing the buffer from the list as
|
||||
* we can possibly see not yet submitted buffers on io_list
|
||||
*/
|
||||
ret = journal_clean_one_cp_list(transaction->
|
||||
t_checkpoint_io_list, destroy);
|
||||
if (need_resched())
|
||||
return;
|
||||
/*
|
||||
* Stop scanning if we couldn't free the transaction. This
|
||||
* avoids pointless scanning of transactions which still
|
||||
* weren't checkpointed.
|
||||
*/
|
||||
if (!ret)
|
||||
if (!released)
|
||||
return;
|
||||
} while (transaction != last_transaction);
|
||||
}
|
||||
|
@ -705,7 +589,7 @@ int __jbd2_journal_remove_checkpoint(struct journal_head *jh)
|
|||
jbd2_journal_put_journal_head(jh);
|
||||
|
||||
/* Is this transaction empty? */
|
||||
if (transaction->t_checkpoint_list || transaction->t_checkpoint_io_list)
|
||||
if (transaction->t_checkpoint_list)
|
||||
return 0;
|
||||
|
||||
/*
|
||||
|
@ -736,6 +620,34 @@ int __jbd2_journal_remove_checkpoint(struct journal_head *jh)
|
|||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check the checkpoint buffer and try to remove it from the checkpoint
|
||||
* list if it's clean. Returns -EBUSY if it is not clean, returns 1 if
|
||||
* it frees the transaction, 0 otherwise.
|
||||
*
|
||||
* This function is called with j_list_lock held.
|
||||
*/
|
||||
int jbd2_journal_try_remove_checkpoint(struct journal_head *jh)
|
||||
{
|
||||
struct buffer_head *bh = jh2bh(jh);
|
||||
|
||||
if (!trylock_buffer(bh))
|
||||
return -EBUSY;
|
||||
if (buffer_dirty(bh)) {
|
||||
unlock_buffer(bh);
|
||||
return -EBUSY;
|
||||
}
|
||||
unlock_buffer(bh);
|
||||
|
||||
/*
|
||||
* Buffer is clean and the IO has finished (we held the buffer
|
||||
* lock) so the checkpoint is done. We can safely remove the
|
||||
* buffer from this transaction.
|
||||
*/
|
||||
JBUFFER_TRACE(jh, "remove from checkpoint list");
|
||||
return __jbd2_journal_remove_checkpoint(jh);
|
||||
}
|
||||
|
||||
/*
|
||||
* journal_insert_checkpoint: put a committed buffer onto a checkpoint
|
||||
* list so that we know when it is safe to clean the transaction out of
|
||||
|
@ -797,7 +709,6 @@ void __jbd2_journal_drop_transaction(journal_t *journal, transaction_t *transact
|
|||
J_ASSERT(transaction->t_forget == NULL);
|
||||
J_ASSERT(transaction->t_shadow_list == NULL);
|
||||
J_ASSERT(transaction->t_checkpoint_list == NULL);
|
||||
J_ASSERT(transaction->t_checkpoint_io_list == NULL);
|
||||
J_ASSERT(atomic_read(&transaction->t_updates) == 0);
|
||||
J_ASSERT(journal->j_committing_transaction != transaction);
|
||||
J_ASSERT(journal->j_running_transaction != transaction);
|
||||
|
|
|
@ -1141,8 +1141,7 @@ restart_loop:
|
|||
spin_lock(&journal->j_list_lock);
|
||||
commit_transaction->t_state = T_FINISHED;
|
||||
/* Check if the transaction can be dropped now that we are finished */
|
||||
if (commit_transaction->t_checkpoint_list == NULL &&
|
||||
commit_transaction->t_checkpoint_io_list == NULL) {
|
||||
if (commit_transaction->t_checkpoint_list == NULL) {
|
||||
__jbd2_journal_drop_transaction(journal, commit_transaction);
|
||||
jbd2_journal_free_transaction(commit_transaction);
|
||||
}
|
||||
|
|
|
@ -1784,8 +1784,7 @@ int jbd2_journal_forget(handle_t *handle, struct buffer_head *bh)
|
|||
* Otherwise, if the buffer has been written to disk,
|
||||
* it is safe to remove the checkpoint and drop it.
|
||||
*/
|
||||
if (!buffer_dirty(bh)) {
|
||||
__jbd2_journal_remove_checkpoint(jh);
|
||||
if (jbd2_journal_try_remove_checkpoint(jh) >= 0) {
|
||||
spin_unlock(&journal->j_list_lock);
|
||||
goto drop;
|
||||
}
|
||||
|
@ -2100,35 +2099,6 @@ void jbd2_journal_unfile_buffer(journal_t *journal, struct journal_head *jh)
|
|||
__brelse(bh);
|
||||
}
|
||||
|
||||
/*
|
||||
* Called from jbd2_journal_try_to_free_buffers().
|
||||
*
|
||||
* Called under jh->b_state_lock
|
||||
*/
|
||||
static void
|
||||
__journal_try_to_free_buffer(journal_t *journal, struct buffer_head *bh)
|
||||
{
|
||||
struct journal_head *jh;
|
||||
|
||||
jh = bh2jh(bh);
|
||||
|
||||
if (buffer_locked(bh) || buffer_dirty(bh))
|
||||
goto out;
|
||||
|
||||
if (jh->b_next_transaction != NULL || jh->b_transaction != NULL)
|
||||
goto out;
|
||||
|
||||
spin_lock(&journal->j_list_lock);
|
||||
if (jh->b_cp_transaction != NULL) {
|
||||
/* written-back checkpointed metadata buffer */
|
||||
JBUFFER_TRACE(jh, "remove from checkpoint list");
|
||||
__jbd2_journal_remove_checkpoint(jh);
|
||||
}
|
||||
spin_unlock(&journal->j_list_lock);
|
||||
out:
|
||||
return;
|
||||
}
|
||||
|
||||
/**
|
||||
* jbd2_journal_try_to_free_buffers() - try to free page buffers.
|
||||
* @journal: journal for operation
|
||||
|
@ -2186,7 +2156,13 @@ bool jbd2_journal_try_to_free_buffers(journal_t *journal, struct folio *folio)
|
|||
continue;
|
||||
|
||||
spin_lock(&jh->b_state_lock);
|
||||
__journal_try_to_free_buffer(journal, bh);
|
||||
if (!jh->b_transaction && !jh->b_next_transaction) {
|
||||
spin_lock(&journal->j_list_lock);
|
||||
/* Remove written-back checkpointed metadata buffer */
|
||||
if (jh->b_cp_transaction != NULL)
|
||||
jbd2_journal_try_remove_checkpoint(jh);
|
||||
spin_unlock(&journal->j_list_lock);
|
||||
}
|
||||
spin_unlock(&jh->b_state_lock);
|
||||
jbd2_journal_put_journal_head(jh);
|
||||
if (buffer_jbd(bh))
|
||||
|
|
|
@ -613,12 +613,6 @@ struct transaction_s
|
|||
*/
|
||||
struct journal_head *t_checkpoint_list;
|
||||
|
||||
/*
|
||||
* Doubly-linked circular list of all buffers submitted for IO while
|
||||
* checkpointing. [j_list_lock]
|
||||
*/
|
||||
struct journal_head *t_checkpoint_io_list;
|
||||
|
||||
/*
|
||||
* Doubly-linked circular list of metadata buffers being
|
||||
* shadowed by log IO. The IO buffers on the iobuf list and
|
||||
|
@ -1449,6 +1443,7 @@ extern void jbd2_journal_commit_transaction(journal_t *);
|
|||
void __jbd2_journal_clean_checkpoint_list(journal_t *journal, bool destroy);
|
||||
unsigned long jbd2_journal_shrink_checkpoint_list(journal_t *journal, unsigned long *nr_to_scan);
|
||||
int __jbd2_journal_remove_checkpoint(struct journal_head *);
|
||||
int jbd2_journal_try_remove_checkpoint(struct journal_head *jh);
|
||||
void jbd2_journal_destroy_checkpoint(journal_t *journal);
|
||||
void __jbd2_journal_insert_checkpoint(struct journal_head *, transaction_t *);
|
||||
|
||||
|
|
|
@ -462,11 +462,9 @@ TRACE_EVENT(jbd2_shrink_scan_exit,
|
|||
TRACE_EVENT(jbd2_shrink_checkpoint_list,
|
||||
|
||||
TP_PROTO(journal_t *journal, tid_t first_tid, tid_t tid, tid_t last_tid,
|
||||
unsigned long nr_freed, unsigned long nr_scanned,
|
||||
tid_t next_tid),
|
||||
unsigned long nr_freed, tid_t next_tid),
|
||||
|
||||
TP_ARGS(journal, first_tid, tid, last_tid, nr_freed,
|
||||
nr_scanned, next_tid),
|
||||
TP_ARGS(journal, first_tid, tid, last_tid, nr_freed, next_tid),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field(dev_t, dev)
|
||||
|
@ -474,7 +472,6 @@ TRACE_EVENT(jbd2_shrink_checkpoint_list,
|
|||
__field(tid_t, tid)
|
||||
__field(tid_t, last_tid)
|
||||
__field(unsigned long, nr_freed)
|
||||
__field(unsigned long, nr_scanned)
|
||||
__field(tid_t, next_tid)
|
||||
),
|
||||
|
||||
|
@ -484,15 +481,14 @@ TRACE_EVENT(jbd2_shrink_checkpoint_list,
|
|||
__entry->tid = tid;
|
||||
__entry->last_tid = last_tid;
|
||||
__entry->nr_freed = nr_freed;
|
||||
__entry->nr_scanned = nr_scanned;
|
||||
__entry->next_tid = next_tid;
|
||||
),
|
||||
|
||||
TP_printk("dev %d,%d shrink transaction %u-%u(%u) freed %lu "
|
||||
"scanned %lu next transaction %u",
|
||||
"next transaction %u",
|
||||
MAJOR(__entry->dev), MINOR(__entry->dev),
|
||||
__entry->first_tid, __entry->tid, __entry->last_tid,
|
||||
__entry->nr_freed, __entry->nr_scanned, __entry->next_tid)
|
||||
__entry->nr_freed, __entry->next_tid)
|
||||
);
|
||||
|
||||
#endif /* _TRACE_JBD2_H */
|
||||
|
|
Загрузка…
Ссылка в новой задаче