diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 5ff85ecc6c3f..40d3bca6aaa4 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2840,8 +2840,17 @@ update: processed->uptodate = uptodate; } -static void endio_readpage_update_page_status(struct page *page, bool uptodate, - u64 start, u32 len) +static void begin_page_read(struct btrfs_fs_info *fs_info, struct page *page) +{ + ASSERT(PageLocked(page)); + if (fs_info->sectorsize == PAGE_SIZE) + return; + + ASSERT(PagePrivate(page)); + btrfs_subpage_start_reader(fs_info, page, page_offset(page), PAGE_SIZE); +} + +static void end_page_read(struct page *page, bool uptodate, u64 start, u32 len) { struct btrfs_fs_info *fs_info = btrfs_sb(page->mapping->host->i_sb); @@ -2857,7 +2866,12 @@ static void endio_readpage_update_page_status(struct page *page, bool uptodate, if (fs_info->sectorsize == PAGE_SIZE) unlock_page(page); - /* Subpage locking will be handled in later patches */ + else if (is_data_inode(page->mapping->host)) + /* + * For subpage data, unlock the page if we're the last reader. + * For subpage metadata, page lock is not utilized for read. + */ + btrfs_subpage_end_reader(fs_info, page, start, len); } /* @@ -2994,7 +3008,7 @@ readpage_ok: bio_offset += len; /* Update page status and unlock */ - endio_readpage_update_page_status(page, uptodate, start, len); + end_page_read(page, uptodate, start, len); endio_readpage_release_extent(&processed, BTRFS_I(inode), start, end, uptodate); } @@ -3263,6 +3277,7 @@ int btrfs_do_readpage(struct page *page, struct extent_map **em_cached, unsigned int read_flags, u64 *prev_em_start) { struct inode *inode = page->mapping->host; + struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); u64 start = page_offset(page); const u64 end = start + PAGE_SIZE - 1; u64 cur = start; @@ -3282,7 +3297,8 @@ int btrfs_do_readpage(struct page *page, struct extent_map **em_cached, ret = set_page_extent_mapped(page); if (ret < 0) { unlock_extent(tree, start, end); - SetPageError(page); + btrfs_page_set_error(fs_info, page, start, PAGE_SIZE); + unlock_page(page); goto out; } @@ -3290,6 +3306,7 @@ int btrfs_do_readpage(struct page *page, struct extent_map **em_cached, if (cleancache_get_page(page) == 0) { BUG_ON(blocksize != PAGE_SIZE); unlock_extent(tree, start, end); + unlock_page(page); goto out; } } @@ -3306,6 +3323,7 @@ int btrfs_do_readpage(struct page *page, struct extent_map **em_cached, kunmap_atomic(userpage); } } + begin_page_read(fs_info, page); while (cur <= end) { bool force_bio_submit = false; u64 disk_bytenr; @@ -3323,13 +3341,14 @@ int btrfs_do_readpage(struct page *page, struct extent_map **em_cached, &cached, GFP_NOFS); unlock_extent_cached(tree, cur, cur + iosize - 1, &cached); + end_page_read(page, true, cur, iosize); break; } em = __get_extent_map(inode, page, pg_offset, cur, end - cur + 1, em_cached); if (IS_ERR_OR_NULL(em)) { - SetPageError(page); unlock_extent(tree, cur, end); + end_page_read(page, false, cur, end + 1 - cur); break; } extent_offset = cur - em->start; @@ -3412,6 +3431,7 @@ int btrfs_do_readpage(struct page *page, struct extent_map **em_cached, &cached, GFP_NOFS); unlock_extent_cached(tree, cur, cur + iosize - 1, &cached); + end_page_read(page, true, cur, iosize); cur = cur + iosize; pg_offset += iosize; continue; @@ -3421,6 +3441,7 @@ int btrfs_do_readpage(struct page *page, struct extent_map **em_cached, EXTENT_UPTODATE, 1, NULL)) { check_page_uptodate(tree, page); unlock_extent(tree, cur, cur + iosize - 1); + end_page_read(page, true, cur, iosize); cur = cur + iosize; pg_offset += iosize; continue; @@ -3429,8 +3450,8 @@ int btrfs_do_readpage(struct page *page, struct extent_map **em_cached, * to date. Error out */ if (block_start == EXTENT_MAP_INLINE) { - SetPageError(page); unlock_extent(tree, cur, cur + iosize - 1); + end_page_read(page, false, cur, iosize); cur = cur + iosize; pg_offset += iosize; continue; @@ -3447,19 +3468,14 @@ int btrfs_do_readpage(struct page *page, struct extent_map **em_cached, nr++; *bio_flags = this_bio_flag; } else { - SetPageError(page); unlock_extent(tree, cur, cur + iosize - 1); + end_page_read(page, false, cur, iosize); goto out; } cur = cur + iosize; pg_offset += iosize; } out: - if (!nr) { - if (!PageError(page)) - SetPageUptodate(page); - unlock_page(page); - } return ret; } diff --git a/fs/btrfs/subpage.c b/fs/btrfs/subpage.c index 2c51ab71e000..c69049e7daa9 100644 --- a/fs/btrfs/subpage.c +++ b/fs/btrfs/subpage.c @@ -54,6 +54,8 @@ int btrfs_alloc_subpage(const struct btrfs_fs_info *fs_info, spin_lock_init(&(*ret)->lock); if (type == BTRFS_SUBPAGE_METADATA) atomic_set(&(*ret)->eb_refs, 0); + else + atomic_set(&(*ret)->readers, 0); return 0; } @@ -102,6 +104,47 @@ void btrfs_page_dec_eb_refs(const struct btrfs_fs_info *fs_info, atomic_dec(&subpage->eb_refs); } +static void btrfs_subpage_assert(const struct btrfs_fs_info *fs_info, + struct page *page, u64 start, u32 len) +{ + /* Basic checks */ + ASSERT(PagePrivate(page) && page->private); + ASSERT(IS_ALIGNED(start, fs_info->sectorsize) && + IS_ALIGNED(len, fs_info->sectorsize)); + /* + * The range check only works for mapped page, we can still have + * unmapped page like dummy extent buffer pages. + */ + if (page->mapping) + ASSERT(page_offset(page) <= start && + start + len <= page_offset(page) + PAGE_SIZE); +} + +void btrfs_subpage_start_reader(const struct btrfs_fs_info *fs_info, + struct page *page, u64 start, u32 len) +{ + struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private; + const int nbits = len >> fs_info->sectorsize_bits; + int ret; + + btrfs_subpage_assert(fs_info, page, start, len); + + ret = atomic_add_return(nbits, &subpage->readers); + ASSERT(ret == nbits); +} + +void btrfs_subpage_end_reader(const struct btrfs_fs_info *fs_info, + struct page *page, u64 start, u32 len) +{ + struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private; + const int nbits = len >> fs_info->sectorsize_bits; + + btrfs_subpage_assert(fs_info, page, start, len); + ASSERT(atomic_read(&subpage->readers) >= nbits); + if (atomic_sub_and_test(nbits, &subpage->readers)) + unlock_page(page); +} + /* * Convert the [start, start + len) range into a u16 bitmap * @@ -113,18 +156,8 @@ static u16 btrfs_subpage_calc_bitmap(const struct btrfs_fs_info *fs_info, const int bit_start = offset_in_page(start) >> fs_info->sectorsize_bits; const int nbits = len >> fs_info->sectorsize_bits; - /* Basic checks */ - ASSERT(PagePrivate(page) && page->private); - ASSERT(IS_ALIGNED(start, fs_info->sectorsize) && - IS_ALIGNED(len, fs_info->sectorsize)); + btrfs_subpage_assert(fs_info, page, start, len); - /* - * The range check only works for mapped page, we can still have - * unmapped page like dummy extent buffer pages. - */ - if (page->mapping) - ASSERT(page_offset(page) <= start && - start + len <= page_offset(page) + PAGE_SIZE); /* * Here nbits can be 16, thus can go beyond u16 range. We make the * first left shift to be calculate in unsigned long (at least u32), diff --git a/fs/btrfs/subpage.h b/fs/btrfs/subpage.h index f3c5def313a1..b86a4881475d 100644 --- a/fs/btrfs/subpage.h +++ b/fs/btrfs/subpage.h @@ -29,6 +29,9 @@ struct btrfs_subpage { */ atomic_t eb_refs; /* Structures only used by data */ + struct { + atomic_t readers; + }; }; }; @@ -53,6 +56,11 @@ void btrfs_page_inc_eb_refs(const struct btrfs_fs_info *fs_info, void btrfs_page_dec_eb_refs(const struct btrfs_fs_info *fs_info, struct page *page); +void btrfs_subpage_start_reader(const struct btrfs_fs_info *fs_info, + struct page *page, u64 start, u32 len); +void btrfs_subpage_end_reader(const struct btrfs_fs_info *fs_info, + struct page *page, u64 start, u32 len); + /* * Template for subpage related operations. *