Merge branch 'dax-4.10-iomap-pmd' into for-next
This commit is contained in:
Коммит
b649c42e25
|
@ -55,7 +55,6 @@ config FS_DAX_PMD
|
|||
depends on FS_DAX
|
||||
depends on ZONE_DEVICE
|
||||
depends on TRANSPARENT_HUGEPAGE
|
||||
depends on BROKEN
|
||||
|
||||
endif # BLOCK
|
||||
|
||||
|
|
924
fs/dax.c
924
fs/dax.c
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -38,7 +38,7 @@ static ssize_t ext2_dax_read_iter(struct kiocb *iocb, struct iov_iter *to)
|
|||
return 0; /* skip atime */
|
||||
|
||||
inode_lock_shared(inode);
|
||||
ret = iomap_dax_rw(iocb, to, &ext2_iomap_ops);
|
||||
ret = dax_iomap_rw(iocb, to, &ext2_iomap_ops);
|
||||
inode_unlock_shared(inode);
|
||||
|
||||
file_accessed(iocb->ki_filp);
|
||||
|
@ -62,7 +62,7 @@ static ssize_t ext2_dax_write_iter(struct kiocb *iocb, struct iov_iter *from)
|
|||
if (ret)
|
||||
goto out_unlock;
|
||||
|
||||
ret = iomap_dax_rw(iocb, from, &ext2_iomap_ops);
|
||||
ret = dax_iomap_rw(iocb, from, &ext2_iomap_ops);
|
||||
if (ret > 0 && iocb->ki_pos > i_size_read(inode)) {
|
||||
i_size_write(inode, iocb->ki_pos);
|
||||
mark_inode_dirty(inode);
|
||||
|
@ -99,7 +99,7 @@ static int ext2_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
|
|||
}
|
||||
down_read(&ei->dax_sem);
|
||||
|
||||
ret = iomap_dax_fault(vma, vmf, &ext2_iomap_ops);
|
||||
ret = dax_iomap_fault(vma, vmf, &ext2_iomap_ops);
|
||||
|
||||
up_read(&ei->dax_sem);
|
||||
if (vmf->flags & FAULT_FLAG_WRITE)
|
||||
|
@ -107,27 +107,6 @@ static int ext2_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
|
|||
return ret;
|
||||
}
|
||||
|
||||
static int ext2_dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr,
|
||||
pmd_t *pmd, unsigned int flags)
|
||||
{
|
||||
struct inode *inode = file_inode(vma->vm_file);
|
||||
struct ext2_inode_info *ei = EXT2_I(inode);
|
||||
int ret;
|
||||
|
||||
if (flags & FAULT_FLAG_WRITE) {
|
||||
sb_start_pagefault(inode->i_sb);
|
||||
file_update_time(vma->vm_file);
|
||||
}
|
||||
down_read(&ei->dax_sem);
|
||||
|
||||
ret = dax_pmd_fault(vma, addr, pmd, flags, ext2_get_block);
|
||||
|
||||
up_read(&ei->dax_sem);
|
||||
if (flags & FAULT_FLAG_WRITE)
|
||||
sb_end_pagefault(inode->i_sb);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int ext2_dax_pfn_mkwrite(struct vm_area_struct *vma,
|
||||
struct vm_fault *vmf)
|
||||
{
|
||||
|
@ -154,7 +133,11 @@ static int ext2_dax_pfn_mkwrite(struct vm_area_struct *vma,
|
|||
|
||||
static const struct vm_operations_struct ext2_dax_vm_ops = {
|
||||
.fault = ext2_dax_fault,
|
||||
.pmd_fault = ext2_dax_pmd_fault,
|
||||
/*
|
||||
* .pmd_fault is not supported for DAX because allocation in ext2
|
||||
* cannot be reliably aligned to huge page sizes and so pmd faults
|
||||
* will always fail and fail back to regular faults.
|
||||
*/
|
||||
.page_mkwrite = ext2_dax_fault,
|
||||
.pfn_mkwrite = ext2_dax_pfn_mkwrite,
|
||||
};
|
||||
|
@ -166,7 +149,7 @@ static int ext2_file_mmap(struct file *file, struct vm_area_struct *vma)
|
|||
|
||||
file_accessed(file);
|
||||
vma->vm_ops = &ext2_dax_vm_ops;
|
||||
vma->vm_flags |= VM_MIXEDMAP | VM_HUGEPAGE;
|
||||
vma->vm_flags |= VM_MIXEDMAP;
|
||||
return 0;
|
||||
}
|
||||
#else
|
||||
|
|
|
@ -767,6 +767,9 @@ static int _ext4_get_block(struct inode *inode, sector_t iblock,
|
|||
ext4_update_bh_state(bh, map.m_flags);
|
||||
bh->b_size = inode->i_sb->s_blocksize * map.m_len;
|
||||
ret = 0;
|
||||
} else if (ret == 0) {
|
||||
/* hole case, need to fill in bh->b_size */
|
||||
bh->b_size = inode->i_sb->s_blocksize * map.m_len;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
|
|
@ -467,8 +467,9 @@ int iomap_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
|
|||
|
||||
offset = page_offset(page);
|
||||
while (length > 0) {
|
||||
ret = iomap_apply(inode, offset, length, IOMAP_WRITE,
|
||||
ops, page, iomap_page_mkwrite_actor);
|
||||
ret = iomap_apply(inode, offset, length,
|
||||
IOMAP_WRITE | IOMAP_FAULT, ops, page,
|
||||
iomap_page_mkwrite_actor);
|
||||
if (unlikely(ret <= 0))
|
||||
goto out_unlock;
|
||||
offset += ret;
|
||||
|
|
|
@ -1298,8 +1298,7 @@ __xfs_get_blocks(
|
|||
sector_t iblock,
|
||||
struct buffer_head *bh_result,
|
||||
int create,
|
||||
bool direct,
|
||||
bool dax_fault)
|
||||
bool direct)
|
||||
{
|
||||
struct xfs_inode *ip = XFS_I(inode);
|
||||
struct xfs_mount *mp = ip->i_mount;
|
||||
|
@ -1420,13 +1419,8 @@ __xfs_get_blocks(
|
|||
if (ISUNWRITTEN(&imap))
|
||||
set_buffer_unwritten(bh_result);
|
||||
/* direct IO needs special help */
|
||||
if (create) {
|
||||
if (dax_fault)
|
||||
ASSERT(!ISUNWRITTEN(&imap));
|
||||
else
|
||||
xfs_map_direct(inode, bh_result, &imap, offset,
|
||||
is_cow);
|
||||
}
|
||||
if (create)
|
||||
xfs_map_direct(inode, bh_result, &imap, offset, is_cow);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -1466,7 +1460,7 @@ xfs_get_blocks(
|
|||
struct buffer_head *bh_result,
|
||||
int create)
|
||||
{
|
||||
return __xfs_get_blocks(inode, iblock, bh_result, create, false, false);
|
||||
return __xfs_get_blocks(inode, iblock, bh_result, create, false);
|
||||
}
|
||||
|
||||
int
|
||||
|
@ -1476,17 +1470,7 @@ xfs_get_blocks_direct(
|
|||
struct buffer_head *bh_result,
|
||||
int create)
|
||||
{
|
||||
return __xfs_get_blocks(inode, iblock, bh_result, create, true, false);
|
||||
}
|
||||
|
||||
int
|
||||
xfs_get_blocks_dax_fault(
|
||||
struct inode *inode,
|
||||
sector_t iblock,
|
||||
struct buffer_head *bh_result,
|
||||
int create)
|
||||
{
|
||||
return __xfs_get_blocks(inode, iblock, bh_result, create, true, true);
|
||||
return __xfs_get_blocks(inode, iblock, bh_result, create, true);
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
|
@ -59,9 +59,6 @@ int xfs_get_blocks(struct inode *inode, sector_t offset,
|
|||
struct buffer_head *map_bh, int create);
|
||||
int xfs_get_blocks_direct(struct inode *inode, sector_t offset,
|
||||
struct buffer_head *map_bh, int create);
|
||||
int xfs_get_blocks_dax_fault(struct inode *inode, sector_t offset,
|
||||
struct buffer_head *map_bh, int create);
|
||||
|
||||
int xfs_end_io_direct_write(struct kiocb *iocb, loff_t offset,
|
||||
ssize_t size, void *private);
|
||||
int xfs_setfilesize(struct xfs_inode *ip, xfs_off_t offset, size_t size);
|
||||
|
|
|
@ -318,7 +318,7 @@ xfs_file_dax_read(
|
|||
return 0; /* skip atime */
|
||||
|
||||
xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
|
||||
ret = iomap_dax_rw(iocb, to, &xfs_iomap_ops);
|
||||
ret = dax_iomap_rw(iocb, to, &xfs_iomap_ops);
|
||||
xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
|
||||
|
||||
file_accessed(iocb->ki_filp);
|
||||
|
@ -653,7 +653,7 @@ xfs_file_dax_write(
|
|||
|
||||
trace_xfs_file_dax_write(ip, count, pos);
|
||||
|
||||
ret = iomap_dax_rw(iocb, from, &xfs_iomap_ops);
|
||||
ret = dax_iomap_rw(iocb, from, &xfs_iomap_ops);
|
||||
if (ret > 0 && iocb->ki_pos > i_size_read(inode)) {
|
||||
i_size_write(inode, iocb->ki_pos);
|
||||
error = xfs_setfilesize(ip, pos, ret);
|
||||
|
@ -1474,7 +1474,7 @@ xfs_filemap_page_mkwrite(
|
|||
xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
|
||||
|
||||
if (IS_DAX(inode)) {
|
||||
ret = iomap_dax_fault(vma, vmf, &xfs_iomap_ops);
|
||||
ret = dax_iomap_fault(vma, vmf, &xfs_iomap_ops);
|
||||
} else {
|
||||
ret = iomap_page_mkwrite(vma, vmf, &xfs_iomap_ops);
|
||||
ret = block_page_mkwrite_return(ret);
|
||||
|
@ -1508,7 +1508,7 @@ xfs_filemap_fault(
|
|||
* changes to xfs_get_blocks_direct() to map unwritten extent
|
||||
* ioend for conversion on read-only mappings.
|
||||
*/
|
||||
ret = iomap_dax_fault(vma, vmf, &xfs_iomap_ops);
|
||||
ret = dax_iomap_fault(vma, vmf, &xfs_iomap_ops);
|
||||
} else
|
||||
ret = filemap_fault(vma, vmf);
|
||||
xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
|
||||
|
@ -1545,7 +1545,7 @@ xfs_filemap_pmd_fault(
|
|||
}
|
||||
|
||||
xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
|
||||
ret = dax_pmd_fault(vma, addr, pmd, flags, xfs_get_blocks_dax_fault);
|
||||
ret = dax_iomap_pmd_fault(vma, addr, pmd, flags, &xfs_iomap_ops);
|
||||
xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
|
||||
|
||||
if (flags & FAULT_FLAG_WRITE)
|
||||
|
|
|
@ -8,21 +8,46 @@
|
|||
|
||||
struct iomap_ops;
|
||||
|
||||
/* We use lowest available exceptional entry bit for locking */
|
||||
/*
|
||||
* We use lowest available bit in exceptional entry for locking, one bit for
|
||||
* the entry size (PMD) and two more to tell us if the entry is a huge zero
|
||||
* page (HZP) or an empty entry that is just used for locking. In total four
|
||||
* special bits.
|
||||
*
|
||||
* If the PMD bit isn't set the entry has size PAGE_SIZE, and if the HZP and
|
||||
* EMPTY bits aren't set the entry is a normal DAX entry with a filesystem
|
||||
* block allocation.
|
||||
*/
|
||||
#define RADIX_DAX_SHIFT (RADIX_TREE_EXCEPTIONAL_SHIFT + 4)
|
||||
#define RADIX_DAX_ENTRY_LOCK (1 << RADIX_TREE_EXCEPTIONAL_SHIFT)
|
||||
#define RADIX_DAX_PMD (1 << (RADIX_TREE_EXCEPTIONAL_SHIFT + 1))
|
||||
#define RADIX_DAX_HZP (1 << (RADIX_TREE_EXCEPTIONAL_SHIFT + 2))
|
||||
#define RADIX_DAX_EMPTY (1 << (RADIX_TREE_EXCEPTIONAL_SHIFT + 3))
|
||||
|
||||
ssize_t iomap_dax_rw(struct kiocb *iocb, struct iov_iter *iter,
|
||||
static inline unsigned long dax_radix_sector(void *entry)
|
||||
{
|
||||
return (unsigned long)entry >> RADIX_DAX_SHIFT;
|
||||
}
|
||||
|
||||
static inline void *dax_radix_locked_entry(sector_t sector, unsigned long flags)
|
||||
{
|
||||
return (void *)(RADIX_TREE_EXCEPTIONAL_ENTRY | flags |
|
||||
((unsigned long)sector << RADIX_DAX_SHIFT) |
|
||||
RADIX_DAX_ENTRY_LOCK);
|
||||
}
|
||||
|
||||
ssize_t dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter,
|
||||
struct iomap_ops *ops);
|
||||
ssize_t dax_do_io(struct kiocb *, struct inode *, struct iov_iter *,
|
||||
get_block_t, dio_iodone_t, int flags);
|
||||
int dax_zero_page_range(struct inode *, loff_t from, unsigned len, get_block_t);
|
||||
int dax_truncate_page(struct inode *, loff_t from, get_block_t);
|
||||
int iomap_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
|
||||
int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
|
||||
struct iomap_ops *ops);
|
||||
int dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t);
|
||||
int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index);
|
||||
void dax_wake_mapping_entry_waiter(struct address_space *mapping,
|
||||
pgoff_t index, bool wake_all);
|
||||
pgoff_t index, void *entry, bool wake_all);
|
||||
|
||||
#ifdef CONFIG_FS_DAX
|
||||
struct page *read_dax_sector(struct block_device *bdev, sector_t n);
|
||||
|
@ -48,15 +73,32 @@ static inline int __dax_zero_page_range(struct block_device *bdev,
|
|||
}
|
||||
#endif
|
||||
|
||||
#if defined(CONFIG_TRANSPARENT_HUGEPAGE)
|
||||
int dax_pmd_fault(struct vm_area_struct *, unsigned long addr, pmd_t *,
|
||||
unsigned int flags, get_block_t);
|
||||
#else
|
||||
static inline int dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr,
|
||||
pmd_t *pmd, unsigned int flags, get_block_t gb)
|
||||
{
|
||||
return VM_FAULT_FALLBACK;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_FS_DAX_PMD
|
||||
static inline unsigned int dax_radix_order(void *entry)
|
||||
{
|
||||
if ((unsigned long)entry & RADIX_DAX_PMD)
|
||||
return PMD_SHIFT - PAGE_SHIFT;
|
||||
return 0;
|
||||
}
|
||||
int dax_iomap_pmd_fault(struct vm_area_struct *vma, unsigned long address,
|
||||
pmd_t *pmd, unsigned int flags, struct iomap_ops *ops);
|
||||
#else
|
||||
static inline unsigned int dax_radix_order(void *entry)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
static inline int dax_iomap_pmd_fault(struct vm_area_struct *vma,
|
||||
unsigned long address, pmd_t *pmd, unsigned int flags,
|
||||
struct iomap_ops *ops)
|
||||
{
|
||||
return VM_FAULT_FALLBACK;
|
||||
}
|
||||
#endif
|
||||
int dax_pfn_mkwrite(struct vm_area_struct *, struct vm_fault *);
|
||||
#define dax_mkwrite(vma, vmf, gb) dax_fault(vma, vmf, gb)
|
||||
|
|
|
@ -49,6 +49,7 @@ struct iomap {
|
|||
#define IOMAP_WRITE (1 << 0) /* writing, must allocate blocks */
|
||||
#define IOMAP_ZERO (1 << 1) /* zeroing operation, may skip holes */
|
||||
#define IOMAP_REPORT (1 << 2) /* report extent status, e.g. FIEMAP */
|
||||
#define IOMAP_FAULT (1 << 3) /* mapping for page fault */
|
||||
|
||||
struct iomap_ops {
|
||||
/*
|
||||
|
|
|
@ -137,13 +137,12 @@ static int page_cache_tree_insert(struct address_space *mapping,
|
|||
} else {
|
||||
/* DAX can replace empty locked entry with a hole */
|
||||
WARN_ON_ONCE(p !=
|
||||
(void *)(RADIX_TREE_EXCEPTIONAL_ENTRY |
|
||||
RADIX_DAX_ENTRY_LOCK));
|
||||
dax_radix_locked_entry(0, RADIX_DAX_EMPTY));
|
||||
/* DAX accounts exceptional entries as normal pages */
|
||||
if (node)
|
||||
workingset_node_pages_dec(node);
|
||||
/* Wakeup waiters for exceptional entry lock */
|
||||
dax_wake_mapping_entry_waiter(mapping, page->index,
|
||||
dax_wake_mapping_entry_waiter(mapping, page->index, p,
|
||||
false);
|
||||
}
|
||||
}
|
||||
|
|
Загрузка…
Ссылка в новой задаче